diff --git a/.gitignore b/.gitignore index 3cfbbb22..36852916 100644 --- a/.gitignore +++ b/.gitignore @@ -1,26 +1,21 @@ -cache/* -characters/* -extensions/silero_tts/outputs/* -extensions/elevenlabs_tts/outputs/* -extensions/sd_api_pictures/outputs/* -logs/* -loras/* -models/* -softprompts/* -torch-dumps/* +cache +characters +training/datasets +extensions/silero_tts/outputs +extensions/elevenlabs_tts/outputs +extensions/sd_api_pictures/outputs +logs +loras +models +softprompts +torch-dumps *pycache* */*pycache* */*/pycache* venv/ .venv/ +repositories settings.json img_bot* img_me* - -!characters/Example.json -!characters/Example.png -!loras/place-your-loras-here.txt -!models/place-your-models-here.txt -!softprompts/place-your-softprompts-here.txt -!torch-dumps/place-your-pt-models-here.txt diff --git a/README.md b/README.md index 60444401..3bfbc72f 100644 --- a/README.md +++ b/README.md @@ -176,10 +176,10 @@ Optionally, you can use the following command-line flags: | `--cai-chat` | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. | | `--cpu` | Use the CPU to generate text.| | `--load-in-8bit` | Load the model with 8-bit precision.| -| `--load-in-4bit` | DEPRECATED: use `--gptq-bits 4` instead. | -| `--gptq-bits GPTQ_BITS` | GPTQ: Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA and OPT. | -| `--gptq-model-type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently only LLaMa and OPT are supported. | -| `--gptq-pre-layer GPTQ_PRE_LAYER` | GPTQ: The number of layers to preload. | +| `--wbits WBITS` | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. | +| `--model_type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported. | +| `--groupsize GROUPSIZE` | GPTQ: Group size. | +| `--pre_layer PRE_LAYER` | GPTQ: The number of layers to preload. | | `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. | | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.| | `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. | diff --git a/css/chat.css b/css/chat.css index 8d9d88a6..1e703530 100644 --- a/css/chat.css +++ b/css/chat.css @@ -23,3 +23,9 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .pending.svelte-1ed2p3z { opacity: 1; } + +#extensions { + padding: 0; + padding: 0; +} + diff --git a/css/main.css b/css/main.css index 09f3b6a8..97879f01 100644 --- a/css/main.css +++ b/css/main.css @@ -54,3 +54,13 @@ ol li p, ul li p { .gradio-container-3-18-0 .prose * h1, h2, h3, h4 { color: white; } + +.gradio-container { + max-width: 100% !important; + padding-top: 0 !important; +} + +#extensions { + padding: 15px; + padding: 15px; +} diff --git a/css/main.js b/css/main.js index 9db3fe8b..029ecb62 100644 --- a/css/main.js +++ b/css/main.js @@ -11,7 +11,7 @@ let extensions = document.getElementById('extensions'); main_parent.addEventListener('click', function(e) { // Check if the main element is visible if (main.offsetHeight > 0 && main.offsetWidth > 0) { - extensions.style.display = 'block'; + extensions.style.display = 'flex'; } else { extensions.style.display = 'none'; } diff --git a/download-model.py b/download-model.py index 7ca33b7d..25386e5f 100644 --- a/download-model.py +++ b/download-model.py @@ -118,7 +118,7 @@ def get_download_links_from_huggingface(model, branch): is_safetensors = re.match("model.*\.safetensors", fname) is_pt = re.match(".*\.pt", fname) is_tokenizer = re.match("tokenizer.*\.model", fname) - is_text = re.match(".*\.(txt|json|py)", fname) or is_tokenizer + is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer if any((is_pytorch, is_safetensors, is_pt, is_tokenizer, is_text)): if is_text: diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py index a81a5da1..1352993a 100644 --- a/extensions/silero_tts/script.py +++ b/extensions/silero_tts/script.py @@ -26,6 +26,7 @@ current_params = params.copy() voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115'] voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high'] voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast'] +streaming_state = shared.args.no_stream # remember if chat streaming was enabled # Used for making text xml compatible, needed for voice pitch and speed control table = str.maketrans({ @@ -77,6 +78,7 @@ def input_modifier(string): shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')] shared.processing_message = "*Is recording a voice message...*" + shared.args.no_stream = True # Disable streaming cause otherwise the audio output will stutter and begin anew every time the message is being updated return string def output_modifier(string): @@ -84,7 +86,7 @@ def output_modifier(string): This function is applied to the model outputs. """ - global model, current_params + global model, current_params, streaming_state for i in params: if params[i] != current_params[i]: @@ -116,6 +118,7 @@ def output_modifier(string): string += f'\n\n{original_string}' shared.processing_message = "*Is typing...*" + shared.args.no_stream = streaming_state # restore the streaming option to the previous value return string def bot_prefix_modifier(string): diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index bec6c66f..afb5695f 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -14,18 +14,21 @@ import opt def load_quantized(model_name): - if not shared.args.gptq_model_type: + if not shared.args.model_type: # Try to determine model type from model name - model_type = model_name.split('-')[0].lower() - if model_type not in ('llama', 'opt'): - print("Can't determine model type from model name. Please specify it manually using --gptq-model-type " + if model_name.lower().startswith(('llama', 'alpaca')): + model_type = 'llama' + elif model_name.lower().startswith(('opt', 'galactica')): + model_type = 'opt' + else: + print("Can't determine model type from model name. Please specify it manually using --model_type " "argument") exit() else: - model_type = shared.args.gptq_model_type.lower() + model_type = shared.args.model_type.lower() if model_type == 'llama': - if not shared.args.gptq_pre_layer: + if not shared.args.pre_layer: load_quant = llama.load_quant else: load_quant = llama_inference_offload.load_quant @@ -35,35 +38,44 @@ def load_quantized(model_name): print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported") exit() + # Now we are going to try to locate the quantized model file. path_to_model = Path(f'models/{model_name}') - if path_to_model.name.lower().startswith('llama-7b'): - pt_model = f'llama-7b-{shared.args.gptq_bits}bit' - elif path_to_model.name.lower().startswith('llama-13b'): - pt_model = f'llama-13b-{shared.args.gptq_bits}bit' - elif path_to_model.name.lower().startswith('llama-30b'): - pt_model = f'llama-30b-{shared.args.gptq_bits}bit' - elif path_to_model.name.lower().startswith('llama-65b'): - pt_model = f'llama-65b-{shared.args.gptq_bits}bit' - else: - pt_model = f'{model_name}-{shared.args.gptq_bits}bit' - - # Try to find the .safetensors or .pt both in models/ and in the subfolder + found_pts = list(path_to_model.glob("*.pt")) + found_safetensors = list(path_to_model.glob("*.safetensors")) pt_path = None - for path in [Path(p+ext) for ext in ['.safetensors', '.pt'] for p in [f"models/{pt_model}", f"{path_to_model}/{pt_model}"]]: - if path.exists(): - print(f"Found {path}") - pt_path = path - break + + if len(found_pts) == 1: + pt_path = found_pts[0] + elif len(found_safetensors) == 1: + pt_path = found_safetensors[0] + else: + if path_to_model.name.lower().startswith('llama-7b'): + pt_model = f'llama-7b-{shared.args.wbits}bit' + elif path_to_model.name.lower().startswith('llama-13b'): + pt_model = f'llama-13b-{shared.args.wbits}bit' + elif path_to_model.name.lower().startswith('llama-30b'): + pt_model = f'llama-30b-{shared.args.wbits}bit' + elif path_to_model.name.lower().startswith('llama-65b'): + pt_model = f'llama-65b-{shared.args.wbits}bit' + else: + pt_model = f'{model_name}-{shared.args.wbits}bit' + + # Try to find the .safetensors or .pt both in models/ and in the subfolder + for path in [Path(p+ext) for ext in ['.safetensors', '.pt'] for p in [f"models/{pt_model}", f"{path_to_model}/{pt_model}"]]: + if path.exists(): + print(f"Found {path}") + pt_path = path + break if not pt_path: - print(f"Could not find {pt_model}, exiting...") + print("Could not find the quantized model in .pt or .safetensors format, exiting...") exit() # qwopqwop200's offload - if shared.args.gptq_pre_layer: - model = load_quant(str(path_to_model), str(pt_path), shared.args.gptq_bits, shared.args.gptq_pre_layer) + if shared.args.pre_layer: + model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, shared.args.pre_layer) else: - model = load_quant(str(path_to_model), str(pt_path), shared.args.gptq_bits) + model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize) # accelerate offload (doesn't work properly) if shared.args.gpu_memory: diff --git a/modules/LoRA.py b/modules/LoRA.py index 283fcf4c..f5dfe4ed 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -18,11 +18,11 @@ def add_lora_to_model(lora_name): # If a LoRA had been previously loaded, or if we want # to unload a LoRA, reload the model - if shared.lora_name != "None" or lora_name == "None": + if shared.lora_name not in ['None', ''] or lora_name in ['None', '']: reload_model() shared.lora_name = lora_name - if lora_name != "None": + if lora_name not in ['None', '']: print(f"Adding the LoRA {lora_name} to the model...") params = {} if not shared.args.cpu: diff --git a/modules/extensions.py b/modules/extensions.py index c55dc978..c3cf4de4 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -63,8 +63,8 @@ def create_extensions_block(): # Creating the extension ui elements if should_display_ui: - with gr.Box(elem_id="extensions"): - gr.Markdown("Extensions") + with gr.Column(elem_id="extensions"): for extension, name in iterator(): + gr.Markdown(f"\n### {name}") if hasattr(extension, "ui"): extension.ui() diff --git a/modules/models.py b/modules/models.py index ccb97da3..c9f03588 100644 --- a/modules/models.py +++ b/modules/models.py @@ -44,7 +44,7 @@ def load_model(model_name): shared.is_RWKV = model_name.lower().startswith('rwkv-') # Default settings - if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]): + if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.wbits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]): if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')): model = AutoModelForCausalLM.from_pretrained(Path(f"models/{shared.model_name}"), device_map='auto', load_in_8bit=True) else: @@ -95,7 +95,7 @@ def load_model(model_name): return model, tokenizer # Quantized model - elif shared.args.gptq_bits > 0: + elif shared.args.wbits > 0: from modules.GPTQ_loader import load_quantized model = load_quantized(model_name) diff --git a/modules/shared.py b/modules/shared.py index 720c697e..87896faf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -52,7 +52,8 @@ settings = { 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n', '(rosey|chip|joi)_.*_instruct.*': 'User: \n', - 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>' + 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>', + 'alpaca-*': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n", }, 'lora_prompts': { 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', @@ -78,10 +79,15 @@ parser.add_argument('--chat', action='store_true', help='Launch the web UI in ch parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.') parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text.') parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.') -parser.add_argument('--load-in-4bit', action='store_true', help='DEPRECATED: use --gptq-bits 4 instead.') -parser.add_argument('--gptq-bits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision. 2, 3, 4 and 8bit are supported. Currently only works with LLaMA and OPT.') -parser.add_argument('--gptq-model-type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMa and OPT are supported.') -parser.add_argument('--gptq-pre-layer', type=int, default=0, help='GPTQ: The number of layers to preload.') + +parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --wbits instead.') +parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.') +parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.') +parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.') +parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported.') +parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.') +parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to preload.') + parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.') parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.') parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.') @@ -109,6 +115,8 @@ parser.add_argument('--verbose', action='store_true', help='Print the prompts to args = parser.parse_args() # Provisional, this will be deleted later -if args.load_in_4bit: - print("Warning: --load-in-4bit is deprecated and will be removed. Use --gptq-bits 4 instead.\n") - args.gptq_bits = 4 +deprecated_dict = {'gptq_bits': ['wbits', 0], 'gptq_model_type': ['model_type', None], 'gptq_pre_layer': ['prelayer', 0]} +for k in deprecated_dict: + if eval(f"args.{k}") != deprecated_dict[k][1]: + print(f"Warning: --{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.") + exec(f"args.{deprecated_dict[k][0]} = args.{k}") diff --git a/prompts/Alpaca.txt b/prompts/Alpaca.txt new file mode 100644 index 00000000..8434a80c --- /dev/null +++ b/prompts/Alpaca.txt @@ -0,0 +1,6 @@ +Below is an instruction that describes a task. Write a response that appropriately completes the request. +### Instruction: +Write a poem about the transformers Python library. +Mention the word "large language models" in that poem. +### Response: + diff --git a/prompts/Open Assistant.txt b/prompts/Open Assistant.txt new file mode 100644 index 00000000..cf1ae4a2 --- /dev/null +++ b/prompts/Open Assistant.txt @@ -0,0 +1 @@ +<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|> diff --git a/prompts/QA.txt b/prompts/QA.txt new file mode 100644 index 00000000..32b0e235 --- /dev/null +++ b/prompts/QA.txt @@ -0,0 +1,4 @@ +Common sense questions and answers + +Question: +Factual answer: diff --git a/requirements.txt b/requirements.txt index c93ce671..4c385065 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ accelerate==0.17.1 bitsandbytes==0.37.1 flexgen==0.1.7 -gradio==3.18.0 +gradio==3.23.0 markdown numpy peft==0.2.0 diff --git a/server.py b/server.py index cd95d5ef..03158ac6 100644 --- a/server.py +++ b/server.py @@ -4,6 +4,7 @@ import re import sys import time import zipfile +from datetime import datetime from pathlib import Path import gradio as gr @@ -36,6 +37,13 @@ def get_available_models(): def get_available_presets(): return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) +def get_available_prompts(): + prompts = [] + prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('[0-9]*.txt'))), key=str.lower, reverse=True) + prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('*.txt'))), key=str.lower) + prompts += ['None'] + return prompts + def get_available_characters(): return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('characters').glob('*.json'))), key=str.lower) @@ -48,12 +56,17 @@ def get_available_softprompts(): def get_available_loras(): return ['None'] + sorted([item.name for item in list(Path('loras/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) +def unload_model(): + shared.model = shared.tokenizer = None + clear_torch_cache() + def load_model_wrapper(selected_model): if selected_model != shared.model_name: shared.model_name = selected_model - shared.model = shared.tokenizer = None - clear_torch_cache() - shared.model, shared.tokenizer = load_model(shared.model_name) + + unload_model() + if selected_model != '': + shared.model, shared.tokenizer = load_model(shared.model_name) return selected_model @@ -91,7 +104,7 @@ def load_preset_values(preset_menu, return_dict=False): if return_dict: return generate_params else: - return preset_menu, generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping'] + return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping'] def upload_soft_prompt(file): with zipfile.ZipFile(io.BytesIO(file)) as zf: @@ -116,9 +129,43 @@ def create_model_and_preset_menus(): shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') +def save_prompt(text): + fname = f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}.txt" + with open(Path(f'prompts/{fname}'), 'w', encoding='utf-8') as f: + f.write(text) + return f"Saved prompt to prompts/{fname}" + +def load_prompt(fname): + if fname in ['None', '']: + return '' + else: + with open(Path(f'prompts/{fname}.txt'), 'r', encoding='utf-8') as f: + return f.read() + +def create_prompt_menus(): + with gr.Row(): + with gr.Column(): + with gr.Row(): + shared.gradio['prompt_menu'] = gr.Dropdown(choices=get_available_prompts(), value='None', label='Prompt') + ui.create_refresh_button(shared.gradio['prompt_menu'], lambda : None, lambda : {'choices': get_available_prompts()}, 'refresh-button') + + with gr.Column(): + with gr.Column(): + shared.gradio['save_prompt'] = gr.Button('Save prompt') + shared.gradio['status'] = gr.Markdown('Ready') + + shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=True) + shared.gradio['save_prompt'].click(save_prompt, [shared.gradio['textbox']], [shared.gradio['status']], show_progress=False) + def create_settings_menus(default_preset): generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True) + with gr.Row(): + with gr.Column(): + create_model_and_preset_menus() + with gr.Column(): + shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)') + with gr.Row(): with gr.Column(): with gr.Box(): @@ -149,12 +196,6 @@ def create_settings_menus(default_preset): shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') - shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)') - - with gr.Row(): - shared.gradio['preset_menu_mirror'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') - ui.create_refresh_button(shared.gradio['preset_menu_mirror'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') - with gr.Row(): shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button') @@ -169,8 +210,7 @@ def create_settings_menus(default_preset): shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip']) shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) - shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) - shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) + shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True) shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']]) @@ -235,8 +275,9 @@ if shared.args.lora: # Default UI settings default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')] -default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] -if default_text == '': +if shared.lora_name != "None": + default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] +else: default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] title ='Text generation web UI' description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n' @@ -257,8 +298,8 @@ def create_interface(): shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528")) shared.gradio['textbox'] = gr.Textbox(label='Input') with gr.Row(): - shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop") shared.gradio['Generate'] = gr.Button('Generate') + shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop") with gr.Row(): shared.gradio['Impersonate'] = gr.Button('Impersonate') shared.gradio['Regenerate'] = gr.Button('Regenerate') @@ -271,8 +312,6 @@ def create_interface(): shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False) shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) - create_model_and_preset_menus() - with gr.Tab("Character", elem_id="chat-settings"): shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name') shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name') @@ -366,19 +405,25 @@ def create_interface(): elif shared.args.notebook: with gr.Tab("Text generation", elem_id="main"): - with gr.Tab('Raw'): - shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25) - with gr.Tab('Markdown'): - shared.gradio['markdown'] = gr.Markdown() - with gr.Tab('HTML'): - shared.gradio['html'] = gr.HTML() - with gr.Row(): - shared.gradio['Stop'] = gr.Button('Stop') - shared.gradio['Generate'] = gr.Button('Generate') - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + with gr.Column(scale=4): + with gr.Tab('Raw'): + shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_id="textbox", lines=25) + with gr.Tab('Markdown'): + shared.gradio['markdown'] = gr.Markdown() + with gr.Tab('HTML'): + shared.gradio['html'] = gr.HTML() + + with gr.Row(): + shared.gradio['Generate'] = gr.Button('Generate') + shared.gradio['Stop'] = gr.Button('Stop') + + with gr.Column(scale=1): + gr.Markdown("\n") + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + + create_prompt_menus() - create_model_and_preset_menus() with gr.Tab("Parameters", elem_id="parameters"): create_settings_menus(default_preset) @@ -402,7 +447,7 @@ def create_interface(): with gr.Column(): shared.gradio['Stop'] = gr.Button('Stop') - create_model_and_preset_menus() + create_prompt_menus() with gr.Column(): with gr.Tab('Raw'): @@ -411,6 +456,7 @@ def create_interface(): shared.gradio['markdown'] = gr.Markdown() with gr.Tab('HTML'): shared.gradio['html'] = gr.HTML() + with gr.Tab("Parameters", elem_id="parameters"): create_settings_menus(default_preset)