Style improvements (#1957)

2025-01-26 12:22:08 +01:00 · 2023-05-09 22:49:39 -03:00 · 2023-05-09 22:49:39 -03:00 · 3913155c1f
commit 3913155c1f
parent 334486f527
23 changed files with 64 additions and 50 deletions
--- a/api-example-stream.py
+++ b/api-example-stream.py
@ -5,7 +5,7 @@ import sys
 try:
    import websockets
 except ImportError:
-    print("Websockets package not found. Make sure it's installed.") 
+    print("Websockets package not found. Make sure it's installed.")

 # For local streaming, the websockets are hosted without ssl - ws://
 HOST = 'localhost:5005'
@ -14,6 +14,7 @@ URI = f'ws://{HOST}/api/v1/stream'
 # For reverse-proxied streaming, the remote will likely host with ssl - wss://
 # URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'

+
 async def run(context):
    # Note: the selected defaults change from time to time.
    request = {
@ -42,7 +43,7 @@ async def run(context):
    async with websockets.connect(URI, ping_interval=None) as websocket:
        await websocket.send(json.dumps(request))

-        yield context # Remove this if you just want to see the reply
+        yield context  # Remove this if you just want to see the reply

        while True:
            incoming_data = await websocket.recv()
@ -58,7 +59,7 @@ async def run(context):
 async def print_response_stream(prompt):
    async for response in run(prompt):
        print(response, end='')
-        sys.stdout.flush() # If we don't flush, we won't see tokens in realtime.
+        sys.stdout.flush()  # If we don't flush, we won't see tokens in realtime.


 if __name__ == '__main__':
--- a/api-example.py
+++ b/api-example.py
@ -7,6 +7,7 @@ URI = f'http://{HOST}/api/v1/generate'
 # For reverse-proxied streaming, the remote will likely host with ssl - https://
 # URI = 'https://your-uri-here.trycloudflare.com/api/v1/generate'

+
 def run(prompt):
    request = {
        'prompt': prompt,
@ -37,6 +38,7 @@ def run(prompt):
        result = response.json()['results'][0]['text']
        print(prompt + result)

+
 if __name__ == '__main__':
    prompt = "In order to make homemade bread, follow these steps:\n1)"
    run(prompt)
--- a/extensions/api/blocking_api.py
+++ b/extensions/api/blocking_api.py
@ -2,11 +2,10 @@ import json
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from threading import Thread

+from extensions.api.util import build_parameters, try_start_cloudflared
 from modules import shared
 from modules.text_generation import encode, generate_reply

-from extensions.api.util import build_parameters, try_start_cloudflared
-

 class Handler(BaseHTTPRequestHandler):
    def do_GET(self):
--- a/extensions/api/script.py
+++ b/extensions/api/script.py
@ -5,6 +5,7 @@ from modules import shared
 BLOCKING_PORT = 5000
 STREAMING_PORT = 5005

+
 def setup():
    blocking_api.start_server(BLOCKING_PORT, share=shared.args.public_api)
    streaming_api.start_server(STREAMING_PORT, share=shared.args.public_api)
--- a/extensions/api/streaming_api.py
+++ b/extensions/api/streaming_api.py
@ -1,12 +1,12 @@
-import json
 import asyncio
-from websockets.server import serve
+import json
 from threading import Thread

-from modules import shared
-from modules.text_generation import generate_reply
+from websockets.server import serve

 from extensions.api.util import build_parameters, try_start_cloudflared
+from modules import shared
+from modules.text_generation import generate_reply

 PATH = '/api/v1/stream'

--- a/extensions/character_bias/script.py
+++ b/extensions/character_bias/script.py
@ -1,6 +1,7 @@
-import gradio as gr
 import os

+import gradio as gr
+
 # get the current directory of the script
 current_dir = os.path.dirname(os.path.abspath(__file__))

--- a/extensions/llava/script.py
+++ b/extensions/llava/script.py
@ -1,6 +1,8 @@
-import gradio as gr
 import logging

+import gradio as gr
+
+
 def ui():
    gr.Markdown("### This extension is deprecated, use \"multimodal\" extension instead")
    logging.error("LLaVA extension is deprecated, use \"multimodal\" extension instead")
--- a/extensions/multimodal/multimodal_embedder.py
+++ b/extensions/multimodal/multimodal_embedder.py
@ -6,10 +6,11 @@ from io import BytesIO
 from typing import Any, List, Optional

 import torch
+from PIL import Image
+
 from extensions.multimodal.pipeline_loader import load_pipeline
 from modules import shared
 from modules.text_generation import encode, get_max_prompt_length
-from PIL import Image


@dataclass
--- a/extensions/multimodal/script.py
+++ b/extensions/multimodal/script.py
@ -7,6 +7,7 @@ from io import BytesIO

 import gradio as gr
 import torch
+
 from extensions.multimodal.multimodal_embedder import MultimodalEmbedder
 from modules import shared

--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@ -1,11 +1,12 @@
 import base64
 import json
-import numpy as np
 import os
 import time
 from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
 from threading import Thread

+import numpy as np
+
 from modules import shared
 from modules.text_generation import encode, generate_reply

@ -61,6 +62,7 @@ def float_list_to_base64(float_list):
    ascii_string = encoded_bytes.decode('ascii')
    return ascii_string

+
 class Handler(BaseHTTPRequestHandler):
    def do_GET(self):
        if self.path.startswith('/v1/models'):
@ -387,8 +389,8 @@ class Handler(BaseHTTPRequestHandler):
                    "created": created_time,
                    "model": model,  # TODO: add Lora info?
                    resp_list: [{
-                            "index": 0,
-                            "finish_reason": "stop",
+                        "index": 0,
+                        "finish_reason": "stop",
                    }],
                    "usage": {
                        "prompt_tokens": token_count,
--- a/extensions/sd_api_pictures/script.py
+++ b/extensions/sd_api_pictures/script.py
@ -6,12 +6,13 @@ from datetime import date
 from pathlib import Path

 import gradio as gr
-import modules.shared as shared
 import requests
 import torch
-from modules.models import reload_model, unload_model
 from PIL import Image

+import modules.shared as shared
+from modules.models import reload_model, unload_model
+
 torch._C._jit_set_profiling_mode(False)

 # parameters which can be customized in settings.json of webui
@ -77,6 +78,7 @@ SD_models = ['NeverEndingDream']  # TODO: get with http://{address}}/sdapi/v1/sd

 picture_response = False  # specifies if the next model response should appear as a picture

+
 def remove_surrounded_chars(string):
    # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
    # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
@ -122,7 +124,6 @@ def input_modifier(string):

 # Get and save the Stable Diffusion-generated picture
 def get_SD_pictures(description):
-
    global params

    if params['manage_VRAM']:
@ -259,6 +260,7 @@ def SD_api_address_update(address):

    return gr.Textbox.update(label=msg)

+
 def ui():

    # Gradio elements
@ -290,12 +292,11 @@ def ui():
                cfg_scale = gr.Number(label="CFG Scale", value=params['cfg_scale'], elem_id="cfg_box")
                with gr.Column() as hr_options:
                    restore_faces = gr.Checkbox(value=params['restore_faces'], label='Restore faces')
-                    enable_hr = gr.Checkbox(value=params['enable_hr'], label='Hires. fix')                    
+                    enable_hr = gr.Checkbox(value=params['enable_hr'], label='Hires. fix')
            with gr.Row(visible=params['enable_hr'], elem_classes="hires_opts") as hr_options:
-                    hr_scale = gr.Slider(1, 4, value=params['hr_scale'], step=0.1, label='Upscale by')
-                    denoising_strength = gr.Slider(0, 1, value=params['denoising_strength'], step=0.01, label='Denoising strength')
-                    hr_upscaler = gr.Textbox(placeholder=params['hr_upscaler'], value=params['hr_upscaler'], label='Upscaler')                    
-
+                hr_scale = gr.Slider(1, 4, value=params['hr_scale'], step=0.1, label='Upscale by')
+                denoising_strength = gr.Slider(0, 1, value=params['denoising_strength'], step=0.01, label='Denoising strength')
+                hr_upscaler = gr.Textbox(placeholder=params['hr_upscaler'], value=params['hr_upscaler'], label='Upscaler')

    # Event functions to update the parameters in the backend
    address.change(lambda x: params.update({"address": filter_address(x)}), address, None)
--- a/extensions/silero_tts/script.py
+++ b/extensions/silero_tts/script.py
@ -4,6 +4,7 @@ from pathlib import Path

 import gradio as gr
 import torch
+
 from extensions.silero_tts import tts_preprocessor
 from modules import chat, shared
 from modules.html_generator import chat_html_wrapper
@ -216,4 +217,4 @@ def ui():

    # Play preview
    preview_text.submit(voice_preview, preview_text, preview_audio)
-    preview_play.click(voice_preview, preview_text, preview_audio)
+    preview_play.click(voice_preview, preview_text, preview_audio)
--- a/extensions/silero_tts/test_tts.py
+++ b/extensions/silero_tts/test_tts.py
@ -2,7 +2,6 @@ import time
 from pathlib import Path

 import torch
-
 import tts_preprocessor

 torch._C._jit_set_profiling_mode(False)
--- a/extensions/silero_tts/tts_preprocessor.py
+++ b/extensions/silero_tts/tts_preprocessor.py
@ -69,7 +69,7 @@ def remove_surrounded_chars(string):
    # first this expression will check if there is a string nested exclusively between a alt=
    # and a style= string. This would correspond to only a the alt text of an embedded image
    # If it matches it will only keep that part as the string, and rend it for further processing
-    # Afterwards this expression matches to 'as few symbols as possible (0 upwards) between any 
+    # Afterwards this expression matches to 'as few symbols as possible (0 upwards) between any
    # asterisks' OR' as few symbols as possible (0 upwards) between an asterisk and the end of the string'
    if re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL):
        m = re.search(r'(?<=alt=)(.*)(?=style=)', string, re.DOTALL)
--- a/extensions/superbooga/script.py
+++ b/extensions/superbooga/script.py
@ -59,7 +59,7 @@ class ChromaCollector(Collecter):
    def get_ids(self, search_strings: list[str], n_results: int) -> list[str]:
        n_results = min(len(self.ids), n_results)
        result = self.collection.query(query_texts=search_strings, n_results=n_results, include=['documents'])['ids'][0]
-        return list(map(lambda x : int(x[2:]), result))
+        return list(map(lambda x: int(x[2:]), result))

    def clear(self):
        self.collection.delete(ids=self.ids)
@ -162,13 +162,13 @@ def input_modifier(string):
 def custom_generate_chat_prompt(user_input, state, **kwargs):
    if len(shared.history['internal']) > 2 and user_input != '':
        chunks = []
-        for i in range(len(shared.history['internal'])-1):
+        for i in range(len(shared.history['internal']) - 1):
            chunks.append('\n'.join(shared.history['internal'][i]))

        add_chunks_to_collector(chunks)
        query = '\n'.join(shared.history['internal'][-1] + [user_input])
        try:
-            best_ids = collector.get_ids(query, n_results=len(shared.history['internal'])-1)
+            best_ids = collector.get_ids(query, n_results=len(shared.history['internal']) - 1)

            # Sort the history by relevance instead of by chronological order,
            # except for the latest message
@ -226,7 +226,7 @@ def ui():

        ## Chat mode

-        In chat mode, the extension automatically sorts the history by relevance instead of chronologically, except for the very latest input/reply pair. 
+        In chat mode, the extension automatically sorts the history by relevance instead of chronologically, except for the very latest input/reply pair.

        That is, the prompt will include (starting from the end):

--- a/extensions/whisper_stt/script.py
+++ b/extensions/whisper_stt/script.py
@ -1,5 +1,6 @@
 import gradio as gr
 import speech_recognition as sr
+
 from modules import shared

 input_hijack = {
--- a/modules/RWKV.py
+++ b/modules/RWKV.py
@ -24,13 +24,12 @@ class RWKVModel:
    @classmethod
    def from_pretrained(self, path, dtype="fp16", device="cuda"):
        tokenizer_path = Path(f"{path.parent}/20B_tokenizer.json")
-
        if shared.args.rwkv_strategy is None:
            model = RWKV(model=str(path), strategy=f'{device} {dtype}')
        else:
            model = RWKV(model=str(path), strategy=shared.args.rwkv_strategy)
-        pipeline = PIPELINE(model, str(tokenizer_path))

+        pipeline = PIPELINE(model, str(tokenizer_path))
        result = self()
        result.pipeline = pipeline
        result.model = model
@ -83,7 +82,6 @@ class RWKVModel:
            out = self.cached_output_logits

        for i in range(token_count):
-
            # forward
            tokens = self.pipeline.encode(ctx) if i == 0 else [token]
            while len(tokens) > 0:
@ -91,35 +89,38 @@ class RWKVModel:
                tokens = tokens[args.chunk_len:]

            # cache the model state after scanning the context
-            # we don't cache the state after processing our own generated tokens because 
-            # the output string might be post-processed arbitrarily. Therefore, what's fed into the model 
+            # we don't cache the state after processing our own generated tokens because
+            # the output string might be post-processed arbitrarily. Therefore, what's fed into the model
            # on the next round of chat might be slightly different what what it output on the previous round
            if i == 0:
                self.cached_context += ctx
                self.cached_model_state = copy.deepcopy(state)
                self.cached_output_logits = copy.deepcopy(out)
-            
+
            # adjust probabilities
            for n in args.token_ban:
                out[n] = -float('inf')
+
            for n in occurrence:
                out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
-            
+
            # sampler
            token = self.pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k)
            if token in args.token_stop:
                break
+
            all_tokens += [token]
            if token not in occurrence:
                occurrence[token] = 1
            else:
                occurrence[token] += 1
-            
+
            # output
            tmp = self.pipeline.decode([token])
-            if '\ufffd' not in tmp: # is valid utf-8 string?
+            if '\ufffd' not in tmp:  # is valid utf-8 string?
                if callback:
                    callback(tmp)
+
                out_str += tmp

        return out_str
@ -133,7 +134,6 @@ class RWKVTokenizer:
    def from_pretrained(self, path):
        tokenizer_path = path / "20B_tokenizer.json"
        tokenizer = Tokenizer.from_file(str(tokenizer_path))
-
        result = self()
        result.tokenizer = tokenizer
        return result
--- a/modules/deepspeed_parameters.py
+++ b/modules/deepspeed_parameters.py
@ -1,5 +1,4 @@
 def generate_ds_config(ds_bf16, train_batch_size, nvme_offload_dir):
-
    '''
    DeepSpeed configration
    https://huggingface.co/docs/transformers/main_classes/deepspeed
--- a/modules/evaluate.py
+++ b/modules/evaluate.py
@ -20,6 +20,8 @@ def load_past_evaluations():
        return df
    else:
        return pd.DataFrame(columns=['Model', 'LoRAs', 'Dataset', 'Perplexity', 'stride', 'max_length', 'Date', 'Comment'])
+
+
 past_evaluations = load_past_evaluations()


--- a/modules/extensions.py
+++ b/modules/extensions.py
@ -7,7 +7,6 @@ import gradio as gr
 import extensions
 import modules.shared as shared

-
 state = {}
 available_extensions = []
 setup_called = set()
@ -91,7 +90,7 @@ def _apply_state_modifier_extensions(state):
            state = getattr(extension, "state_modifier")(state)

    return state
- 
+

 # Extension functions that override the default tokenizer output - currently only the first one will work
 def _apply_tokenizer_extensions(function_name, state, prompt, input_ids, input_embeds):
@ -108,7 +107,7 @@ def _apply_custom_tokenized_length(prompt):
    for extension, _ in iterator():
        if hasattr(extension, 'custom_tokenized_length'):
            return getattr(extension, 'custom_tokenized_length')(prompt)
-    
+
    return None


--- a/modules/logging_colors.py
+++ b/modules/logging_colors.py
@ -1,6 +1,8 @@
 # Copied from https://stackoverflow.com/a/1336640

 import logging
+import platform
+

 def add_coloring_to_emit_windows(fn):
    # add methods we need to the class
@ -11,6 +13,7 @@ def add_coloring_to_emit_windows(fn):

    def _set_color(self, code):
        import ctypes
+
        # Constants from the Windows API
        self.STD_OUTPUT_HANDLE = -11
        hdl = ctypes.windll.kernel32.GetStdHandle(self.STD_OUTPUT_HANDLE)
@ -94,7 +97,6 @@ def add_coloring_to_emit_ansi(fn):
    return new


-import platform
 if platform.system() == 'Windows':
    # Windows does not support ANSI escapes and we are using API calls to set the console color
    logging.StreamHandler.emit = add_coloring_to_emit_windows(logging.StreamHandler.emit)
--- a/modules/models.py
+++ b/modules/models.py
@ -161,10 +161,10 @@ def load_model(model_name):
    # Custom
    else:
        params = {
-          "low_cpu_mem_usage": True,
-          "trust_remote_code": trust_remote_code
+            "low_cpu_mem_usage": True,
+            "trust_remote_code": trust_remote_code
        }
-        
+
        if not any((shared.args.cpu, torch.cuda.is_available(), torch.has_mps)):
            logging.warning("torch.cuda.is_available() returned False. This means that no GPU has been detected. Falling back to CPU mode.")
            shared.args.cpu = True
@ -288,7 +288,7 @@ def load_soft_prompt(name):
                        logging.info(f"{field}: {', '.join(j[field])}")
                    else:
                        logging.info(f"{field}: {j[field]}")
-                        
+
            logging.info()
            tensor = np.load('tensor.npy')
            Path('tensor.npy').unlink()
--- a/server.py
+++ b/server.py
@ -377,7 +377,7 @@ def create_model_menus():

    shared.gradio['lora_menu_apply'].click(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['model_status'], show_progress=False)
    shared.gradio['download_model_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['model_status'], show_progress=False)
-    shared.gradio['autoload_model'].change(lambda x : gr.update(visible=not x), shared.gradio['autoload_model'], load)
+    shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), shared.gradio['autoload_model'], load)


 def create_settings_menus(default_preset):