From 0d9932815c593fd425e25c4c9dff69005d1df95e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 08:45:14 -0700 Subject: [PATCH 001/169] Improve TheEncrypted777 on mobile devices --- css/chat.css | 10 +++++++ css/chat_style-TheEncrypted777.css | 45 ++++++++++++++++++++++++------ css/main.css | 6 ++++ server.py | 2 +- 4 files changed, 54 insertions(+), 9 deletions(-) diff --git a/css/chat.css b/css/chat.css index 17b8d142..ad76f5cc 100644 --- a/css/chat.css +++ b/css/chat.css @@ -46,6 +46,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { min-width: 0 !important; } +@media screen and (max-width: 688px) { + #main { + padding: 0px; + } + + .chat { + height: calc(100vh - 274px) !important; + } +} + /*****************************************************/ /*************** Chat box declarations ***************/ /*****************************************************/ diff --git a/css/chat_style-TheEncrypted777.css b/css/chat_style-TheEncrypted777.css index 7682011d..d92e982d 100644 --- a/css/chat_style-TheEncrypted777.css +++ b/css/chat_style-TheEncrypted777.css @@ -10,17 +10,10 @@ line-height: 1.428571429; } -.circle-you { - background-color: gray; - border-radius: 1rem; - /*Change color to any you like to be the border of your image*/ - border: 2px solid white; -} - +.circle-you, .circle-bot { background-color: gray; border-radius: 1rem; - /*Change color to any you like to be the border of the bot's image*/ border: 2px solid white; } @@ -105,3 +98,39 @@ .message-body p em { color: rgb(110, 110, 110) !important; } + +@media screen and (max-width: 688px) { + .message { + display: grid; + grid-template-columns: 60px minmax(0, 1fr); + padding-bottom: 25px; + font-size: 15px; + font-family: Helvetica, Arial, sans-serif; + line-height: 1.428571429; + } + + .circle-you, .circle-bot { + width: 50px; + height: 73px; + border-radius: 0.5rem; + } + + .circle-bot img, + .circle-you img { + width: 100%; + height: 100%; + object-fit: cover; + } + + .text { + padding-left: 0px; + } + + .message-body p { + font-size: 16px !important; + } + + .username { + font-size: 20px; + } +} diff --git a/css/main.css b/css/main.css index 5c17a179..b4066c91 100644 --- a/css/main.css +++ b/css/main.css @@ -26,6 +26,10 @@ max-width: 2.2em; } +.button_nowrap { + white-space: nowrap; +} + #slim-column { flex: none !important; min-width: 0 !important; @@ -90,6 +94,8 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { .header_bar { background-color: #f7f7f7; margin-bottom: 20px; + display: inline !important; + overflow-x: scroll; } .dark .header_bar { diff --git a/server.py b/server.py index ecb8ddc9..0f1b9332 100644 --- a/server.py +++ b/server.py @@ -643,7 +643,7 @@ def create_interface(): with gr.Row(): shared.gradio['Impersonate'] = gr.Button('Impersonate') shared.gradio['Regenerate'] = gr.Button('Regenerate') - shared.gradio['Remove last'] = gr.Button('Remove last') + shared.gradio['Remove last'] = gr.Button('Remove last', elem_classes=['button_nowrap']) with gr.Row(): shared.gradio['Copy last reply'] = gr.Button('Copy last reply') From e931844fe25620e8ccc2e7c4ed9ab06fc6644471 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 14:52:20 -0300 Subject: [PATCH 002/169] Add auto_max_new_tokens parameter (#3419) --- api-examples/api-example-chat-stream.py | 1 + api-examples/api-example-chat.py | 1 + api-examples/api-example-stream.py | 1 + api-examples/api-example.py | 1 + extensions/api/util.py | 1 + extensions/openai/defaults.py | 1 + modules/loaders.py | 5 +++++ modules/shared.py | 1 + modules/text_generation.py | 2 ++ modules/ui.py | 1 + server.py | 1 + settings-template.yaml | 1 + 12 files changed, 17 insertions(+) diff --git a/api-examples/api-example-chat-stream.py b/api-examples/api-example-chat-stream.py index 14f6f9d6..493661c2 100644 --- a/api-examples/api-example-chat-stream.py +++ b/api-examples/api-example-chat-stream.py @@ -20,6 +20,7 @@ async def run(user_input, history): request = { 'user_input': user_input, 'max_new_tokens': 250, + 'auto_max_new_tokens': False, 'history': history, 'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct' 'character': 'Example', diff --git a/api-examples/api-example-chat.py b/api-examples/api-example-chat.py index 0e155c63..31641815 100644 --- a/api-examples/api-example-chat.py +++ b/api-examples/api-example-chat.py @@ -14,6 +14,7 @@ def run(user_input, history): request = { 'user_input': user_input, 'max_new_tokens': 250, + 'auto_max_new_tokens': False, 'history': history, 'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct' 'character': 'Example', diff --git a/api-examples/api-example-stream.py b/api-examples/api-example-stream.py index 1ae5a91c..175275f9 100644 --- a/api-examples/api-example-stream.py +++ b/api-examples/api-example-stream.py @@ -20,6 +20,7 @@ async def run(context): request = { 'prompt': context, 'max_new_tokens': 250, + 'auto_max_new_tokens': False, # Generation params. If 'preset' is set to different than 'None', the values # in presets/preset-name.yaml are used instead of the individual numbers. diff --git a/api-examples/api-example.py b/api-examples/api-example.py index 4e45de9e..7f8bc1d2 100644 --- a/api-examples/api-example.py +++ b/api-examples/api-example.py @@ -12,6 +12,7 @@ def run(prompt): request = { 'prompt': prompt, 'max_new_tokens': 250, + 'auto_max_new_tokens': False, # Generation params. If 'preset' is set to different than 'None', the values # in presets/preset-name.yaml are used instead of the individual numbers. diff --git a/extensions/api/util.py b/extensions/api/util.py index 2358b7d2..5cc259db 100644 --- a/extensions/api/util.py +++ b/extensions/api/util.py @@ -21,6 +21,7 @@ def build_parameters(body, chat=False): generate_params = { 'max_new_tokens': int(body.get('max_new_tokens', body.get('max_length', 200))), + 'auto_max_new_tokens': bool(body.get('auto_max_new_tokens', False)), 'do_sample': bool(body.get('do_sample', True)), 'temperature': float(body.get('temperature', 0.5)), 'top_p': float(body.get('top_p', 1)), diff --git a/extensions/openai/defaults.py b/extensions/openai/defaults.py index 52f0d641..cb8308e7 100644 --- a/extensions/openai/defaults.py +++ b/extensions/openai/defaults.py @@ -4,6 +4,7 @@ import copy # Data type is important, Ex. use 0.0 for a float 0 default_req_params = { 'max_new_tokens': 16, # 'Inf' for chat + 'auto_max_new_tokens': False, 'temperature': 1.0, 'top_p': 1.0, 'top_k': 1, # choose 20 for chat in absence of another default diff --git a/modules/loaders.py b/modules/loaders.py index 6d0291bf..838ecc86 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -116,6 +116,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', + 'auto_max_new_tokens', }, 'ExLlama_HF': { 'temperature', @@ -139,6 +140,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', + 'auto_max_new_tokens', }, 'ExLlama': { 'temperature', @@ -176,6 +178,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', + 'auto_max_new_tokens', }, 'GPTQ-for-LLaMa': { 'temperature', @@ -203,6 +206,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', + 'auto_max_new_tokens', }, 'llama.cpp': { 'temperature', @@ -237,6 +241,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', + 'auto_max_new_tokens', }, } diff --git a/modules/shared.py b/modules/shared.py index 59d49ab6..a2782e65 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -36,6 +36,7 @@ settings = { 'max_new_tokens': 200, 'max_new_tokens_min': 1, 'max_new_tokens_max': 4096, + 'auto_max_new_tokens': False, 'seed': -1, 'character': 'None', 'name1': 'You', diff --git a/modules/text_generation.py b/modules/text_generation.py index e1be6aa3..f6f71990 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -247,6 +247,8 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings input_ids = encode(question, add_bos_token=state['add_bos_token'], truncation_length=get_max_prompt_length(state)) output = input_ids[0] cuda = not any((shared.args.cpu, shared.args.deepspeed)) + if state['auto_max_new_tokens']: + generate_params['max_new_tokens'] = state['truncation_length'] - input_ids.shape[-1] # Add the encoded tokens to generate_params question, input_ids, inputs_embeds = apply_extensions('tokenizer', state, question, input_ids, None) diff --git a/modules/ui.py b/modules/ui.py index d9b3a131..fe3482d2 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -79,6 +79,7 @@ def list_model_elements(): def list_interface_input_elements(): elements = [ 'max_new_tokens', + 'auto_max_new_tokens', 'seed', 'temperature', 'top_p', diff --git a/server.py b/server.py index 0f1b9332..d622cdbe 100644 --- a/server.py +++ b/server.py @@ -425,6 +425,7 @@ def create_settings_menus(default_preset): shared.gradio['truncation_length'] = gr.Slider(value=shared.settings['truncation_length'], minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas. For instance: "\\nYour Assistant:", "\\nThe assistant:"') with gr.Column(): + shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') diff --git a/settings-template.yaml b/settings-template.yaml index 3d6585d3..62e86371 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -3,6 +3,7 @@ autoload_model: false max_new_tokens: 200 max_new_tokens_min: 1 max_new_tokens_max: 4096 +auto_max_new_tokens: false seed: -1 character: None name1: You From 32a2bbee4ae9e9bcf26c6b10d0386168a42d9f14 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 11:01:29 -0700 Subject: [PATCH 003/169] Implement auto_max_new_tokens for ExLlama --- modules/exllama.py | 6 +++++- modules/loaders.py | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/exllama.py b/modules/exllama.py index ecfb10a4..00b37b9c 100644 --- a/modules/exllama.py +++ b/modules/exllama.py @@ -94,11 +94,15 @@ class ExllamaModel: # Tokenizing the input ids = self.generator.tokenizer.encode(prompt) ids = ids[:, -get_max_prompt_length(state):] + if state['auto_max_new_tokens']: + max_new_tokens = state['truncation_length'] - ids.shape[-1] + else: + max_new_tokens = state['max_new_tokens'] self.generator.gen_begin_reuse(ids) initial_len = self.generator.sequence[0].shape[0] has_leading_space = False - for i in range(state['max_new_tokens']): + for i in range(max_new_tokens): token = self.generator.gen_single_token() if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): has_leading_space = True diff --git a/modules/loaders.py b/modules/loaders.py index 838ecc86..68b48204 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -151,6 +151,7 @@ loaders_samplers = { 'repetition_penalty_range', 'seed', 'ban_eos_token', + 'auto_max_new_tokens', }, 'AutoGPTQ': { 'temperature', From 0e8f9354b5c841f90db4c7f74a84a2582d3cfa66 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 18:50:13 -0700 Subject: [PATCH 004/169] Add direct download for session/chat history JSONs --- css/main.js | 1 + css/save_files.js | 40 ++++++++++++++++++++++++++++ modules/chat.py | 17 ------------ modules/ui.py | 5 ++-- server.py | 66 +++++++++++++++++------------------------------ 5 files changed, 66 insertions(+), 63 deletions(-) create mode 100644 css/save_files.js diff --git a/css/main.js b/css/main.js index f3b3c05f..9663d464 100644 --- a/css/main.js +++ b/css/main.js @@ -17,6 +17,7 @@ main_parent.addEventListener('click', function(e) { } }); +// Add some scrollbars const textareaElements = document.querySelectorAll('.add_scrollbar textarea'); for(i = 0; i < textareaElements.length; i++) { textareaElements[i].classList.remove('scroll-hide'); diff --git a/css/save_files.js b/css/save_files.js new file mode 100644 index 00000000..7dfbcfda --- /dev/null +++ b/css/save_files.js @@ -0,0 +1,40 @@ +// Functions for downloading JSON files +function getCurrentTimestamp() { + const now = new Date(); + const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds + const localTime = new Date(now.getTime() - timezoneOffset); + const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, '').slice(0, 15); + return formattedTimestamp; +} + +function saveFile(contents, filename) { + const element = document.createElement('a'); + element.setAttribute('href', 'data:text/plain;charset=utf-8,' + encodeURIComponent(contents)); + element.setAttribute('download', filename); + element.style.display = 'none'; + document.body.appendChild(element); + element.click(); + document.body.removeChild(element); +} + +function saveHistory(history, character, mode) { + let path = null; + + if (['chat', 'chat-instruct'].includes(mode) && character && character.trim() !== '') { + path = `history_${character}_${getCurrentTimestamp()}.json`; + } else { + try { + path = `history_${mode}_${getCurrentTimestamp()}.json`; + } catch (error) { + path = `history_${getCurrentTimestamp()}.json`; + } + } + saveFile(history, path); +} + +function saveSession(session, mode) { + let path = null; + + path = `session_${mode}_${getCurrentTimestamp()}.json`; + saveFile(session, path); +} diff --git a/modules/chat.py b/modules/chat.py index 070f45a4..57a04606 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -412,23 +412,6 @@ def load_history(file, history): return history -def save_history_at_user_request(history, character, mode): - def make_timestamp_path(character=None): - return f"logs/{character or ''}{'_' if character else ''}{datetime.now().strftime('%Y%m%d-%H%M%S')}.json" - - path = None - if mode in ['chat', 'chat-instruct'] and character not in ['', 'None', None]: - path = make_timestamp_path(character) - else: - # Try to use mode as the file name, otherwise just use the timestamp - try: - path = make_timestamp_path(mode.capitalize()) - except: - path = make_timestamp_path() - - return save_history(history, path) - - def save_persistent_history(history, character, mode): if mode in ['chat', 'chat-instruct'] and character not in ['', 'None', None] and not shared.args.multi_user: save_history(history, path=Path(f'logs/{character}_persistent.json')) diff --git a/modules/ui.py b/modules/ui.py index fe3482d2..df36a331 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -15,6 +15,8 @@ with open(Path(__file__).resolve().parent / '../css/main.js', 'r') as f: main_js = f.read() with open(Path(__file__).resolve().parent / '../css/chat.js', 'r') as f: chat_js = f.read() +with open(Path(__file__).resolve().parent / '../css/save_files.js', 'r') as f: + save_files_js = f.read() refresh_symbol = '🔄' delete_symbol = '🗑️' @@ -145,9 +147,6 @@ def gather_interface_values(*args): if not shared.args.multi_user: shared.persistent_interface_state = output - Path('logs').mkdir(exist_ok=True) - with open(Path(f'logs/session_{shared.get_mode()}_autosave.json'), 'w') as f: - f.write(json.dumps(output, indent=4)) return output diff --git a/server.py b/server.py index d622cdbe..b53d6a12 100644 --- a/server.py +++ b/server.py @@ -508,44 +508,24 @@ def create_file_saving_event_handlers(): if not shared.args.multi_user: - def load_session(session, state): - with open(Path(f'logs/{session}.json'), 'r') as f: - state.update(json.loads(f.read())) + def load_session(file, state): + decoded_file = file if type(file) == str else file.decode('utf-8') + data = json.loads(decoded_file) + state.update(data) if shared.is_chat(): chat.save_persistent_history(state['history'], state['character_menu'], state['mode']) return state - if shared.is_chat(): - shared.gradio['save_session'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('save_contents')).then( - lambda: 'logs/', None, gradio('save_root')).then( - lambda x: f'session_{shared.get_mode()}_{x + "_" if x not in ["None", None, ""] else ""}{utils.current_time()}.json', gradio('character_menu'), gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) + shared.gradio['save_session'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('temporary_text')).then( + None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents, \"{shared.get_mode()}\")}}") - shared.gradio['session_menu'].change( - load_session, gradio('session_menu', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) - - else: - shared.gradio['save_session'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('save_contents')).then( - lambda: 'logs/', None, gradio('save_root')).then( - lambda: f'session_{shared.get_mode()}_{utils.current_time()}.json', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['session_menu'].change( - load_session, gradio('session_menu', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False) - - shared.gradio['delete_session'].click( - lambda x: f'{x}.json', gradio('session_menu'), gradio('delete_filename')).then( - lambda: 'logs/', None, gradio('delete_root')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) + shared.gradio['load_session'].upload( + load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False) def set_interface_arguments(interface_mode, extensions, bool_active): @@ -558,7 +538,6 @@ def set_interface_arguments(interface_mode, extensions, bool_active): setattr(shared.args, k, False) if interface_mode != "default": setattr(shared.args, interface_mode, True) - for k in bool_list: setattr(shared.args, k, False) for k in bool_active: @@ -622,6 +601,9 @@ def create_interface(): # Floating menus for saving/deleting files create_file_saving_menus() + # Used for saving files using javascript + shared.gradio['temporary_text'] = gr.Textbox(visible=False) + # Create chat mode interface if shared.is_chat(): shared.input_elements = ui.list_interface_input_elements() @@ -702,11 +684,10 @@ def create_interface(): with gr.Tab('Chat history'): with gr.Row(): with gr.Column(): - shared.gradio['download'] = gr.File(label="Download") - shared.gradio['download_button'] = gr.Button(value='Refresh') + shared.gradio['save_chat_history'] = gr.Button(value='Save history') with gr.Column(): - shared.gradio['upload_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label="Upload") + shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label="Upload History JSON") with gr.Tab('Upload character'): with gr.Tab('YAML or JSON'): @@ -845,11 +826,8 @@ def create_interface(): with gr.Column(): if not shared.args.multi_user: - with gr.Row(): - shared.gradio['session_menu'] = gr.Dropdown(choices=utils.get_available_sessions(), value='None', label='Session', elem_classes='slim-dropdown', info='When saving a session, make sure to keep the initial part of the filename (session_chat, session_notebook, or session_default), otherwise it will not appear on this list afterwards.') - ui.create_refresh_button(shared.gradio['session_menu'], lambda: None, lambda: {'choices': utils.get_available_sessions()}, ['refresh-button']) - shared.gradio['save_session'] = gr.Button('💾', elem_classes=['refresh-button']) - shared.gradio['delete_session'] = gr.Button('🗑️', elem_classes=['refresh-button']) + shared.gradio['save_session'] = gr.Button('Save session') + shared.gradio['load_session'] = gr.File(type='binary', file_types=['.json'], label="Upload Session JSON") extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.') extension_status = gr.Markdown() @@ -967,8 +945,8 @@ def create_interface(): shared.gradio['instruction_template'].change( partial(chat.load_character, instruct=True), gradio('instruction_template', 'name1_instruct', 'name2_instruct'), gradio('name1_instruct', 'name2_instruct', 'dummy', 'dummy', 'context_instruct', 'turn_template')) - shared.gradio['upload_chat_history'].upload( - chat.load_history, gradio('upload_chat_history', 'history'), gradio('history')).then( + shared.gradio['load_chat_history'].upload( + chat.load_history, gradio('load_chat_history', 'history'), gradio('history')).then( chat.redraw_html, shared.reload_inputs, gradio('display')) shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) @@ -991,7 +969,9 @@ def create_interface(): lambda: 'characters/instruction-following/', None, gradio('delete_root')).then( lambda: gr.update(visible=True), None, gradio('file_deleter')) - shared.gradio['download_button'].click(chat.save_history_at_user_request, gradio('history', 'character_menu', 'mode'), gradio('download')) + shared.gradio['save_chat_history'].click(lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( + None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f"(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}") + shared.gradio['Submit character'].click(chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')) shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) From 4b6c1d3f080bb36b96ffb25bbc8e843bfe3bf945 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 20:20:23 -0700 Subject: [PATCH 005/169] CSS change --- css/main.css | 4 ++++ server.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index b4066c91..d37e3f63 100644 --- a/css/main.css +++ b/css/main.css @@ -45,6 +45,10 @@ min-height: 0 } +#save_session { + margin-top: 32px; +} + #accordion { } diff --git a/server.py b/server.py index b53d6a12..a0229995 100644 --- a/server.py +++ b/server.py @@ -826,7 +826,7 @@ def create_interface(): with gr.Column(): if not shared.args.multi_user: - shared.gradio['save_session'] = gr.Button('Save session') + shared.gradio['save_session'] = gr.Button('Save session', elem_id="save_session") shared.gradio['load_session'] = gr.File(type='binary', file_types=['.json'], label="Upload Session JSON") extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.') From 32c564509ed615a9627c7dc71fc55d5246fcfd04 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 21:13:16 -0700 Subject: [PATCH 006/169] Fix loading session in chat mode --- modules/chat.py | 4 ++++ modules/shared.py | 4 ++++ server.py | 22 ++++++++++++++-------- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 57a04606..5e4eb245 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -418,6 +418,10 @@ def save_persistent_history(history, character, mode): def load_persistent_history(state): + if shared.session_is_loading: + shared.session_is_loading = False + return state['history'] + if state['mode'] == 'instruct': return state['history'] diff --git a/modules/shared.py b/modules/shared.py index a2782e65..bac3fa8c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -30,6 +30,10 @@ reload_inputs = [] # Parameters for reloading the chat interface # For restarting the interface need_restart = False +# To prevent the persistent chat history from being loaded when +# a session JSON file is being loaded in chat mode +session_is_loading = False + settings = { 'dark_theme': True, 'autoload_model': False, diff --git a/server.py b/server.py index a0229995..6715cc4d 100644 --- a/server.py +++ b/server.py @@ -511,21 +511,27 @@ def create_file_saving_event_handlers(): def load_session(file, state): decoded_file = file if type(file) == str else file.decode('utf-8') data = json.loads(decoded_file) + + if shared.is_chat() and 'character_menu' in data and state.get('character_menu') != data.get('character_menu'): + shared.session_is_loading = True + state.update(data) - - if shared.is_chat(): - chat.save_persistent_history(state['history'], state['character_menu'], state['mode']) - return state shared.gradio['save_session'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('temporary_text')).then( - None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents, \"{shared.get_mode()}\")}}") + None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents, \"{shared.get_mode()}\")}}") - shared.gradio['load_session'].upload( - load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False) + if shared.is_chat(): + shared.gradio['load_session'].upload( + load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')) + else: + shared.gradio['load_session'].upload( + load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False) def set_interface_arguments(interface_mode, extensions, bool_active): From 6bf9e855f85854b6585e518f33c1420e0f718524 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 21:39:56 -0700 Subject: [PATCH 007/169] Minor change --- server.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server.py b/server.py index 6715cc4d..6f0c5b67 100644 --- a/server.py +++ b/server.py @@ -525,11 +525,13 @@ def create_file_saving_event_handlers(): if shared.is_chat(): shared.gradio['load_session'].upload( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( chat.redraw_html, shared.reload_inputs, gradio('display')) else: shared.gradio['load_session'].upload( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False) From e074538b5886c78ef546fb85f1104e5e61295088 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 21:45:10 -0700 Subject: [PATCH 008/169] Revert "Make long_replies ban the eos token as well" This reverts commit 6c521ce96787552a9604c344b9949945ef359a59. --- extensions/long_replies/script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/long_replies/script.py b/extensions/long_replies/script.py index a30b05a7..035e8c9e 100644 --- a/extensions/long_replies/script.py +++ b/extensions/long_replies/script.py @@ -28,7 +28,7 @@ class MyLogits(LogitsProcessor): def __call__(self, input_ids, scores): if input_ids.shape[-1] - initial_size < params["min_length"]: scores[...,self.newline_id] = -1000 - scores[...,shared.tokenizer.eos_token_id] = -1000 + # scores[...,shared.tokenizer.eos_token_id] = -1000 # probs = torch.softmax(scores, dim=-1, dtype=torch.float) # probs[0] /= probs[0].sum() From 3390196a1421fe66dd946d848b33936b8a4a42e9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 2 Aug 2023 22:13:57 -0700 Subject: [PATCH 009/169] Add some javascript alerts for confirmations --- server.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/server.py b/server.py index 6f0c5b67..679c9e93 100644 --- a/server.py +++ b/server.py @@ -528,12 +528,14 @@ def create_file_saving_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + None, None, None, _js='() => {alert("The session has been loaded.")}') else: shared.gradio['load_session'].upload( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False) + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( + None, None, None, _js='() => {alert("The session has been loaded.")}') def set_interface_arguments(interface_mode, extensions, bool_active): @@ -955,7 +957,8 @@ def create_interface(): shared.gradio['load_chat_history'].upload( chat.load_history, gradio('load_chat_history', 'history'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + None, None, None, _js='() => {alert("The history has been loaded.")}') shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) @@ -977,14 +980,20 @@ def create_interface(): lambda: 'characters/instruction-following/', None, gradio('delete_root')).then( lambda: gr.update(visible=True), None, gradio('file_deleter')) - shared.gradio['save_chat_history'].click(lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( + shared.gradio['save_chat_history'].click( + lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f"(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}") - shared.gradio['Submit character'].click(chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')) + shared.gradio['Submit character'].click( + chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( + None, None, None, _js='() => {alert("The character has been loaded.")}') + + shared.gradio['Submit tavern character'].click( + chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( + None, None, None, _js='() => {alert("The character has been loaded.")}') + shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) - - shared.gradio['Submit tavern character'].click(chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')) shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['your_picture'].change( From 3e70bce576926b6c9e1a9b2fcefeab79749af1a1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 3 Aug 2023 06:57:21 -0700 Subject: [PATCH 010/169] Properly format exceptions in the UI --- modules/training.py | 4 ++-- server.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/training.py b/modules/training.py index c98fded2..ef833679 100644 --- a/modules/training.py +++ b/modules/training.py @@ -483,7 +483,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch exc = traceback.format_exc() logger.error('Failed to reload the model.') print(exc) - return exc + return exc.replace('\n', '\n\n') # == Start prepping the model itself == if not hasattr(shared.model, 'lm_head') or hasattr(shared.model.lm_head, 'weight'): @@ -518,7 +518,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin") set_peft_model_state_dict(lora_model, state_dict_peft) except: - yield traceback.format_exc() + yield traceback.format_exc().replace('\n', '\n\n') return if shared.args.monkey_patch: diff --git a/server.py b/server.py index 679c9e93..601ae33f 100644 --- a/server.py +++ b/server.py @@ -75,7 +75,7 @@ def load_model_wrapper(selected_model, loader, autoload=False): exc = traceback.format_exc() logger.error('Failed to load the model.') print(exc) - yield exc + yield exc.replace('\n', '\n\n') def load_lora_wrapper(selected_loras): @@ -159,7 +159,7 @@ def download_model_wrapper(repo_id, progress=gr.Progress()): yield ("Done!") except: progress(1.0) - yield traceback.format_exc() + yield traceback.format_exc().replace('\n', '\n\n') def create_model_menus(): From 87dab03dc02eb48b8fd7c8b9a2acb8281678798e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 3 Aug 2023 11:00:36 -0300 Subject: [PATCH 011/169] Add the --cpu option for llama.cpp to prevent CUDA from being used (#3432) --- README.md | 5 +++-- modules/llamacpp_hf.py | 17 ++++++++++++++--- modules/llamacpp_model.py | 22 +++++++++++++++++++--- modules/loaders.py | 2 ++ modules/shared.py | 4 ++-- 5 files changed, 40 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 073a841d..6ec84ba2 100644 --- a/README.md +++ b/README.md @@ -249,8 +249,9 @@ Optionally, you can use the following command-line flags: | `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. | | `--n_ctx N_CTX` | Size of the prompt context. | | `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). | -| `--n_gqa N_GQA` | grouped-query attention. Must be 8 for llama2 70b. | -| `--rms_norm_eps RMS_NORM_EPS` | Must be 1e-5 for llama2 70b. | +| `--n_gqa N_GQA` | grouped-query attention. Must be 8 for llama-2 70b. | +| `--rms_norm_eps RMS_NORM_EPS` | 5e-6 is a good value for llama-2 models. | +| `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. | #### AutoGPTQ diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index 349a5782..e9f4ade6 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -10,13 +10,22 @@ from transformers.modeling_outputs import CausalLMOutputWithPast from modules import shared from modules.logging_colors import logger +import llama_cpp + if torch.cuda.is_available() and not torch.version.hip: try: - from llama_cpp_cuda import Llama + import llama_cpp_cuda except: - from llama_cpp import Llama + llama_cpp_cuda = None else: - from llama_cpp import Llama + llama_cpp_cuda = None + + +def llama_cpp_lib(): + if shared.args.cpu or llama_cpp_cuda is None: + return llama_cpp + else: + return llama_cpp_cuda class LlamacppHF(PreTrainedModel): @@ -111,5 +120,7 @@ class LlamacppHF(PreTrainedModel): 'logits_all': True, } + Llama = llama_cpp_lib().Llama model = Llama(**params) + return LlamacppHF(model) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 0f9c3470..53177f4f 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -7,13 +7,22 @@ from modules import shared from modules.callbacks import Iteratorize from modules.logging_colors import logger +import llama_cpp + if torch.cuda.is_available() and not torch.version.hip: try: - from llama_cpp_cuda import Llama, LlamaCache, LogitsProcessorList + import llama_cpp_cuda except: - from llama_cpp import Llama, LlamaCache, LogitsProcessorList + llama_cpp_cuda = None else: - from llama_cpp import Llama, LlamaCache, LogitsProcessorList + llama_cpp_cuda = None + + +def llama_cpp_lib(): + if shared.args.cpu or llama_cpp_cuda is None: + return llama_cpp + else: + return llama_cpp_cuda def ban_eos_logits_processor(eos_token, input_ids, logits): @@ -30,6 +39,10 @@ class LlamaCppModel: @classmethod def from_pretrained(self, path): + + Llama = llama_cpp_lib().Llama + LlamaCache = llama_cpp_lib().LlamaCache + result = self() cache_capacity = 0 if shared.args.cache_capacity is not None: @@ -74,6 +87,9 @@ class LlamaCppModel: return self.model.detokenize(tokens) def generate(self, prompt, state, callback=None): + + LogitsProcessorList = llama_cpp_lib().LogitsProcessorList + prompt = prompt if type(prompt) is str else prompt.decode() completion_chunks = self.model.create_completion( prompt=prompt, diff --git a/modules/loaders.py b/modules/loaders.py index 68b48204..aa1afcb8 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -41,6 +41,7 @@ loaders_and_params = { 'llama_cpp_seed', 'compress_pos_emb', 'alpha_value', + 'cpu', ], 'llamacpp_HF': [ 'n_ctx', @@ -55,6 +56,7 @@ loaders_and_params = { 'llama_cpp_seed', 'compress_pos_emb', 'alpha_value', + 'cpu', 'llamacpp_HF_info', ], 'Transformers': [ diff --git a/modules/shared.py b/modules/shared.py index bac3fa8c..fc9ba3cf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -132,8 +132,8 @@ parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity. parser.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.') parser.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') parser.add_argument('--llama_cpp_seed', type=int, default=0, help='Seed for llama-cpp models. Default 0 (random)') -parser.add_argument('--n_gqa', type=int, default=0, help='grouped-query attention. Must be 8 for llama2 70b.') -parser.add_argument('--rms_norm_eps', type=float, default=0, help='Must be 1e-5 for llama2 70b.') +parser.add_argument('--n_gqa', type=int, default=0, help='grouped-query attention. Must be 8 for llama-2 70b.') +parser.add_argument('--rms_norm_eps', type=float, default=0, help='5e-6 is a good value for llama-2 models.') # GPTQ parser.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.') From 1839dff7639ff03ebfb6a5d8984070f7fac9d4e0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 3 Aug 2023 08:13:17 -0700 Subject: [PATCH 012/169] Use Esc to Stop the generation --- css/main.js | 11 +++++++++++ server.py | 4 ++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/css/main.js b/css/main.js index 9663d464..7a2368fe 100644 --- a/css/main.js +++ b/css/main.js @@ -24,3 +24,14 @@ for(i = 0; i < textareaElements.length; i++) { textareaElements[i].classList.add('pretty_scrollbar'); textareaElements[i].style.resize = "none"; } + +// Stop generation on Esc pressed +document.addEventListener("keydown", function(event) { + if (event.key === "Escape") { + // Find the element with id 'stop' and click it + var stopButton = document.getElementById("stop"); + if (stopButton) { + stopButton.click(); + } + } +}); diff --git a/server.py b/server.py index 601ae33f..10ea4ece 100644 --- a/server.py +++ b/server.py @@ -741,7 +741,7 @@ def create_interface(): with gr.Row(): shared.gradio['Generate'] = gr.Button('Generate', variant='primary', elem_classes="small-button") - shared.gradio['Stop'] = gr.Button('Stop', elem_classes="small-button") + shared.gradio['Stop'] = gr.Button('Stop', elem_classes="small-button", elem_id='stop') shared.gradio['Undo'] = gr.Button('Undo', elem_classes="small-button") shared.gradio['Regenerate'] = gr.Button('Regenerate', elem_classes="small-button") @@ -772,7 +772,7 @@ def create_interface(): shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) with gr.Row(): shared.gradio['Generate'] = gr.Button('Generate', variant='primary') - shared.gradio['Stop'] = gr.Button('Stop') + shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') shared.gradio['Continue'] = gr.Button('Continue') shared.gradio['count_tokens'] = gr.Button('Count tokens') From d578baeb2c2b4363fbbdb9446b332d5d062af0c6 Mon Sep 17 00:00:00 2001 From: rafa-9 <92696534+rafa-9@users.noreply.github.com> Date: Thu, 3 Aug 2023 14:56:40 -0400 Subject: [PATCH 013/169] Use character settings from API properties if present (#3428) --- api-examples/api-example-chat-stream.py | 10 ++++++++-- api-examples/api-example-chat.py | 10 ++++++++-- extensions/api/util.py | 16 ++++++++-------- 3 files changed, 24 insertions(+), 12 deletions(-) diff --git a/api-examples/api-example-chat-stream.py b/api-examples/api-example-chat-stream.py index 493661c2..2914d451 100644 --- a/api-examples/api-example-chat-stream.py +++ b/api-examples/api-example-chat-stream.py @@ -25,9 +25,15 @@ async def run(user_input, history): 'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct' 'character': 'Example', 'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset - # 'context_instruct': '', # Optional 'your_name': 'You', - + # 'name1': 'name of user', # Optional + # 'name2': 'name of character', # Optional + # 'context': 'character context', # Optional + # 'greeting': 'greeting', # Optional + # 'name1_instruct': 'You', # Optional + # 'name2_instruct': 'Assistant', # Optional + # 'context_instruct': 'context_instruct', # Optional + # 'turn_template': 'turn_template', # Optional 'regenerate': False, '_continue': False, 'stop_at_newline': False, diff --git a/api-examples/api-example-chat.py b/api-examples/api-example-chat.py index 31641815..e2797f1e 100644 --- a/api-examples/api-example-chat.py +++ b/api-examples/api-example-chat.py @@ -19,9 +19,15 @@ def run(user_input, history): 'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct' 'character': 'Example', 'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset - # 'context_instruct': '', # Optional 'your_name': 'You', - + # 'name1': 'name of user', # Optional + # 'name2': 'name of character', # Optional + # 'context': 'character context', # Optional + # 'greeting': 'greeting', # Optional + # 'name1_instruct': 'You', # Optional + # 'name2_instruct': 'Assistant', # Optional + # 'context_instruct': 'context_instruct', # Optional + # 'turn_template': 'turn_template', # Optional 'regenerate': False, '_continue': False, 'stop_at_newline': False, diff --git a/extensions/api/util.py b/extensions/api/util.py index 5cc259db..ef58a70f 100644 --- a/extensions/api/util.py +++ b/extensions/api/util.py @@ -69,14 +69,14 @@ def build_parameters(body, chat=False): 'stop_at_newline': bool(body.get('stop_at_newline', shared.settings['stop_at_newline'])), 'chat_generation_attempts': int(body.get('chat_generation_attempts', shared.settings['chat_generation_attempts'])), 'mode': str(body.get('mode', 'chat')), - 'name1': name1, - 'name2': name2, - 'context': context, - 'greeting': greeting, - 'name1_instruct': name1_instruct, - 'name2_instruct': name2_instruct, - 'context_instruct': body.get('context_instruct', context_instruct), - 'turn_template': turn_template, + 'name1': str(body.get('name1', name1)), + 'name2': str(body.get('name2', name2)), + 'context': str(body.get('context', context)), + 'greeting': str(body.get('greeting', greeting)), + 'name1_instruct': str(body.get('name1_instruct', name1_instruct)), + 'name2_instruct': str(body.get('name2_instruct', name2_instruct)), + 'context_instruct': str(body.get('context_instruct', context_instruct)), + 'turn_template': str(body.get('turn_template', turn_template)), 'chat-instruct_command': str(body.get('chat-instruct_command', shared.settings['chat-instruct_command'])), 'history': body.get('history', {'internal': [], 'visible': []}) }) From f61573bbde6531b5ac574e5111960d8a9b843f66 Mon Sep 17 00:00:00 2001 From: Paul DeCarlo Date: Thu, 3 Aug 2023 21:57:33 +0300 Subject: [PATCH 014/169] Add standalone Dockerfile for NVIDIA Jetson (#3336) --- docker/Dockerfile.jetson | 51 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 docker/Dockerfile.jetson diff --git a/docker/Dockerfile.jetson b/docker/Dockerfile.jetson new file mode 100644 index 00000000..cefbc3c2 --- /dev/null +++ b/docker/Dockerfile.jetson @@ -0,0 +1,51 @@ +#Standalone Dockerfile for text-generation-webui on NVIDIA Jetson Embedded devices + +FROM nvcr.io/nvidia/l4t-pytorch:r35.2.1-pth2.0-py3 as builder +ENV TORCH_CUDA_ARCH_LIST Turing +RUN apt-get update && \ + apt-get install -y python3 python3-pip git build-essential python3-dev + +RUN pip3 install --upgrade pip setuptools +RUN git clone https://github.com/g588928812/bitsandbytes_jetsonX.git /build +WORKDIR /build +RUN CUDA_VERSION=118 make cuda11x +RUN mkdir /wheels +RUN python3 setup.py bdist_wheel -d /wheels +RUN rm -rf /build +RUN git clone https://github.com/oobabooga/GPTQ-for-LLaMa /build +WORKDIR /build +RUN pip3 install -r requirements.txt +RUN python3 setup_cuda.py bdist_wheel -d /wheels + +FROM nvcr.io/nvidia/l4t-pytorch:r35.2.1-pth2.0-py3 +COPY --from=builder /wheels /wheels +COPY --from=builder /build /build +RUN apt-get update && \ + apt-get install --no-install-recommends -y git python3-dev python3 python3-pip make g++ && \ + rm -rf /var/lib/apt/lists/* +RUN pip3 install /wheels/*.whl +RUN rm -rf /wheels +WORKDIR /build +RUN pip3 install -r requirements.txt +RUN git clone https://github.com/oobabooga/text-generation-webui /app + +WORKDIR /app +#ENV WEBUI_VERSION="2908a515877ffde2b1684b2353f6d72e6cb4d31b" +#RUN git reset --hard ${WEBUI_VERSION} +RUN pip3 install --upgrade pip setuptools +RUN pip3 install protobuf>=3.3.0 +RUN pip3 install -r requirements.txt +#Force to use bitsandbytes_jetsonX +RUN pip3 uninstall -y bitsandbytes +RUN mkdir /app/repositories +RUN mv /build /app/repositories/GPTQ-for-LLaMa + +#Remove Python 3.10 specific macros +RUN sed -i 's/@functools.cache/@functools.lru_cache(maxsize=None)/g' /app/modules/chat.py +RUN sed -i 's/@functools.cache/@functools.lru_cache(maxsize=None)/g' /app/modules/loaders.py +RUN sed -i 's/@functools.cache/@functools.lru_cache(maxsize=None)/g' /app/modules/presets.py + +EXPOSE 7860 + +ENV CLI_ARGS="--listen" +CMD python3 server.py ${CLI_ARGS} \ No newline at end of file From 32e7cbb635612be3aeff6e2598434e466429a26e Mon Sep 17 00:00:00 2001 From: matatonic <73265741+matatonic@users.noreply.github.com> Date: Thu, 3 Aug 2023 15:02:54 -0400 Subject: [PATCH 015/169] More models: +StableBeluga2 (#3415) --- characters/instruction-following/StableBeluga2.yaml | 4 ++++ models/config.yaml | 5 +++++ 2 files changed, 9 insertions(+) create mode 100644 characters/instruction-following/StableBeluga2.yaml diff --git a/characters/instruction-following/StableBeluga2.yaml b/characters/instruction-following/StableBeluga2.yaml new file mode 100644 index 00000000..cd5675f8 --- /dev/null +++ b/characters/instruction-following/StableBeluga2.yaml @@ -0,0 +1,4 @@ +user: "### User:" +bot: "### Assistant:" +turn_template: "<|user|>\n<|user-message|>\n\n<|bot|>\n<|bot-message|>\n\n" +context: "### System:\nThis is a system prompt, please behave and help the user.\n\n" diff --git a/models/config.yaml b/models/config.yaml index 0c1027c0..4d618de2 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -283,3 +283,8 @@ TheBloke_WizardLM-30B-GPTQ: .*newhope: mode: 'instruct' instruction_template: 'NewHope' +.*stablebeluga2: + mode: 'instruct' + instruction_template: 'StableBeluga2' + truncation_length: 4096 + rms_norm_eps: 5.0e-6 From 8f98268252ab6fe4b54609e7abac752eff268ea5 Mon Sep 17 00:00:00 2001 From: matatonic <73265741+matatonic@users.noreply.github.com> Date: Thu, 3 Aug 2023 15:10:49 -0400 Subject: [PATCH 016/169] extensions/openai: include content-length for json replies (#3416) --- extensions/openai/script.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index f95205a5..d1faa019 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -67,10 +67,13 @@ class Handler(BaseHTTPRequestHandler): self.send_response(code) self.send_access_control_headers() self.send_header('Content-Type', 'application/json') - self.end_headers() response = json.dumps(ret) r_utf8 = response.encode('utf-8') + + self.send_header('Content-Length', str(len(r_utf8))) + self.end_headers() + self.wfile.write(r_utf8) if not no_debug: debug_msg(r_utf8) From 4e6dc6d99d4b8288d632a966750b2b42e3c8d47e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 3 Aug 2023 14:36:35 -0700 Subject: [PATCH 017/169] Add Contributing guidelines --- .github/pull_request_template.md | 3 +++ README.md | 5 +---- 2 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..51e26b13 --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,3 @@ +## Checklist: + +- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines). diff --git a/README.md b/README.md index 6ec84ba2..4756672e 100644 --- a/README.md +++ b/README.md @@ -341,10 +341,7 @@ The presets that are included by default are the result of a contest that receiv ## Contributing -* Pull requests, suggestions, and issue reports are welcome. -* Make sure to carefully [search](https://github.com/oobabooga/text-generation-webui/issues) existing issues before starting a new one. -* If you have some experience with git, testing an open pull request and leaving a comment on whether it works as expected or not is immensely helpful. -* A simple way to contribute, even if you are not a programmer, is to leave a 👍 on an issue or pull request that you find relevant. +If you would like to contribute to the project, check out the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines). ## Community From f4005164f4318ce8ba728d0ed7de7b7d40315bf3 Mon Sep 17 00:00:00 2001 From: Pete <33569918+jparmstr@users.noreply.github.com> Date: Thu, 3 Aug 2023 19:01:15 -0400 Subject: [PATCH 018/169] Fix llama.cpp truncation (#3400) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- modules/llamacpp_model.py | 7 +++++++ modules/text_generation.py | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index 53177f4f..e5401378 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -6,6 +6,7 @@ import torch from modules import shared from modules.callbacks import Iteratorize from modules.logging_colors import logger +from modules.text_generation import get_max_prompt_length import llama_cpp @@ -91,6 +92,12 @@ class LlamaCppModel: LogitsProcessorList = llama_cpp_lib().LogitsProcessorList prompt = prompt if type(prompt) is str else prompt.decode() + + # Handle truncation + prompt = self.encode(prompt) + prompt = prompt[-get_max_prompt_length(state):] + prompt = self.decode(prompt).decode('utf-8') + completion_chunks = self.model.create_completion( prompt=prompt, max_tokens=state['max_new_tokens'], diff --git a/modules/text_generation.py b/modules/text_generation.py index f6f71990..7507a731 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -39,7 +39,6 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']: input_ids = shared.tokenizer.encode(str(prompt)) input_ids = np.array(input_ids).reshape(1, len(input_ids)) - return input_ids else: input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens) From 4b3384e353b9630bdc16efe946c02daa78b33f48 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 3 Aug 2023 17:10:57 -0700 Subject: [PATCH 019/169] Handle unfinished lists during markdown streaming --- download-model.py | 12 ++++++------ modules/html_generator.py | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/download-model.py b/download-model.py index 0f650516..e1afa9ef 100644 --- a/download-model.py +++ b/download-model.py @@ -75,12 +75,12 @@ class ModelDownloader: if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')): is_lora = True - is_pytorch = re.match("(pytorch|adapter|gptq)_model.*\.bin", fname) - is_safetensors = re.match(".*\.safetensors", fname) - is_pt = re.match(".*\.pt", fname) - is_ggml = re.match(".*ggml.*\.bin", fname) - is_tokenizer = re.match("(tokenizer|ice|spiece).*\.model", fname) - is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer + is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname) + is_safetensors = re.match(r".*\.safetensors", fname) + is_pt = re.match(r".*\.pt", fname) + is_ggml = re.match(r".*ggml.*\.bin", fname) + is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) + is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer if any((is_pytorch, is_safetensors, is_pt, is_ggml, is_tokenizer, is_text)): if 'lfs' in dict[i]: sha256.append([fname, dict[i]['lfs']['oid']]) diff --git a/modules/html_generator.py b/modules/html_generator.py index ab0aeab0..c6ca13b6 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -61,8 +61,26 @@ def convert_to_markdown(string): if is_code: result = result + '```' # Unfinished code block - string = result.strip() - return markdown.markdown(string, extensions=['fenced_code', 'tables']) + result = result.strip() + + # Unfinished list, like "\n1.". A |delete| string is added and then + # removed to force a
    to be generated instead of a

    . + if re.search(r'(\d+\.?)$', result): + delete_str = '|delete|' + + if not result.endswith('.'): + result += '.' + + result = re.sub(r'(\d+\.)$', r'\g<1> ' + delete_str, result) + + html = markdown.markdown(result, extensions=['fenced_code', 'tables']) + pos = html.rfind(delete_str) + if pos > -1: + html = html[:pos] + html[pos + len(delete_str):] + else: + html = markdown.markdown(result, extensions=['fenced_code', 'tables']) + + return html def generate_basic_html(string): From 2336b75d925a94ff121073f6287bcdbba81f0e7b Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Thu, 3 Aug 2023 22:58:37 -0600 Subject: [PATCH 020/169] Remove unnecessary chat.js (#3445) --- css/chat.css | 7 ++++++- css/chat.js | 4 ---- modules/ui.py | 2 -- server.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) delete mode 100644 css/chat.js diff --git a/css/chat.css b/css/chat.css index ad76f5cc..67bbe512 100644 --- a/css/chat.css +++ b/css/chat.css @@ -26,7 +26,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { #extensions { padding: 0; - padding: 0; } #gradio-chatbot { @@ -46,6 +45,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { min-width: 0 !important; } +#main > :first-child, #extensions { + max-width: 800px; + margin-left: auto; + margin-right: auto; +} + @media screen and (max-width: 688px) { #main { padding: 0px; diff --git a/css/chat.js b/css/chat.js deleted file mode 100644 index e304f125..00000000 --- a/css/chat.js +++ /dev/null @@ -1,4 +0,0 @@ -document.getElementById("main").childNodes[0].style = "max-width: 800px; margin-left: auto; margin-right: auto"; -document.getElementById("extensions").style.setProperty("max-width", "800px"); -document.getElementById("extensions").style.setProperty("margin-left", "auto"); -document.getElementById("extensions").style.setProperty("margin-right", "auto"); diff --git a/modules/ui.py b/modules/ui.py index df36a331..eed2ef66 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -13,8 +13,6 @@ with open(Path(__file__).resolve().parent / '../css/chat.css', 'r') as f: chat_css = f.read() with open(Path(__file__).resolve().parent / '../css/main.js', 'r') as f: main_js = f.read() -with open(Path(__file__).resolve().parent / '../css/chat.js', 'r') as f: - chat_js = f.read() with open(Path(__file__).resolve().parent / '../css/save_files.js', 'r') as f: save_files_js = f.read() diff --git a/server.py b/server.py index 10ea4ece..d1a23bbe 100644 --- a/server.py +++ b/server.py @@ -597,7 +597,7 @@ def create_interface(): # css/js strings css = ui.css if not shared.is_chat() else ui.css + ui.chat_css - js = ui.main_js if not shared.is_chat() else ui.main_js + ui.chat_js + js = ui.main_js css += apply_extensions('css') js += apply_extensions('js') From ed57a79c6e44c1eafd4667ebd19e72a2143b7a4d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 4 Aug 2023 02:29:14 -0300 Subject: [PATCH 021/169] Add back silero preview by @missionfloyd (#3446) --- extensions/silero_tts/harvard_sentences.txt | 720 ++++++++++++++++++++ extensions/silero_tts/script.py | 42 +- extensions/silero_tts/style.css | 8 + 3 files changed, 768 insertions(+), 2 deletions(-) create mode 100644 extensions/silero_tts/harvard_sentences.txt create mode 100644 extensions/silero_tts/style.css diff --git a/extensions/silero_tts/harvard_sentences.txt b/extensions/silero_tts/harvard_sentences.txt new file mode 100644 index 00000000..958d7f3c --- /dev/null +++ b/extensions/silero_tts/harvard_sentences.txt @@ -0,0 +1,720 @@ +The birch canoe slid on the smooth planks. +Glue the sheet to the dark blue background. +It's easy to tell the depth of a well. +These days a chicken leg is a rare dish. +Rice is often served in round bowls. +The juice of lemons makes fine punch. +The box was thrown beside the parked truck. +The hogs were fed chopped corn and garbage. +Four hours of steady work faced us. +A large size in stockings is hard to sell. +The boy was there when the sun rose. +A rod is used to catch pink salmon. +The source of the huge river is the clear spring. +Kick the ball straight and follow through. +Help the woman get back to her feet. +A pot of tea helps to pass the evening. +Smoky fires lack flame and heat. +The soft cushion broke the man's fall. +The salt breeze came across from the sea. +The girl at the booth sold fifty bonds. +The small pup gnawed a hole in the sock. +The fish twisted and turned on the bent hook. +Press the pants and sew a button on the vest. +The swan dive was far short of perfect. +The beauty of the view stunned the young boy. +Two blue fish swam in the tank. +Her purse was full of useless trash. +The colt reared and threw the tall rider. +It snowed, rained, and hailed the same morning. +Read verse out loud for pleasure. +Hoist the load to your left shoulder. +Take the winding path to reach the lake. +Note closely the size of the gas tank. +Wipe the grease off his dirty face. +Mend the coat before you go out. +The wrist was badly strained and hung limp. +The stray cat gave birth to kittens. +The young girl gave no clear response. +The meal was cooked before the bell rang. +What joy there is in living. +A king ruled the state in the early days. +The ship was torn apart on the sharp reef. +Sickness kept him home the third week. +The wide road shimmered in the hot sun. +The lazy cow lay in the cool grass. +Lift the square stone over the fence. +The rope will bind the seven books at once. +Hop over the fence and plunge in. +The friendly gang left the drug store. +Mesh wire keeps chicks inside. +The frosty air passed through the coat. +The crooked maze failed to fool the mouse. +Adding fast leads to wrong sums. +The show was a flop from the very start. +A saw is a tool used for making boards. +The wagon moved on well oiled wheels. +March the soldiers past the next hill. +A cup of sugar makes sweet fudge. +Place a rosebush near the porch steps. +Both lost their lives in the raging storm. +We talked of the side show in the circus. +Use a pencil to write the first draft. +He ran half way to the hardware store. +The clock struck to mark the third period. +A small creek cut across the field. +Cars and busses stalled in snow drifts. +The set of china hit the floor with a crash. +This is a grand season for hikes on the road. +The dune rose from the edge of the water. +Those words were the cue for the actor to leave. +A yacht slid around the point into the bay. +The two met while playing on the sand. +The ink stain dried on the finished page. +The walled town was seized without a fight. +The lease ran out in sixteen weeks. +A tame squirrel makes a nice pet. +The horn of the car woke the sleeping cop. +The heart beat strongly and with firm strokes. +The pearl was worn in a thin silver ring. +The fruit peel was cut in thick slices. +The Navy attacked the big task force. +See the cat glaring at the scared mouse. +There are more than two factors here. +The hat brim was wide and too droopy. +The lawyer tried to lose his case. +The grass curled around the fence post. +Cut the pie into large parts. +Men strive but seldom get rich. +Always close the barn door tight. +He lay prone and hardly moved a limb. +The slush lay deep along the street. +A wisp of cloud hung in the blue air. +A pound of sugar costs more than eggs. +The fin was sharp and cut the clear water. +The play seems dull and quite stupid. +Bail the boat to stop it from sinking. +The term ended in late June that year. +A tusk is used to make costly gifts. +Ten pins were set in order. +The bill was paid every third week. +Oak is strong and also gives shade. +Cats and dogs each hate the other. +The pipe began to rust while new. +Open the crate but don't break the glass. +Add the sum to the product of these three. +Thieves who rob friends deserve jail. +The ripe taste of cheese improves with age. +Act on these orders with great speed. +The hog crawled under the high fence. +Move the vat over the hot fire. +The bark of the pine tree was shiny and dark. +Leaves turn brown and yellow in the fall. +The pennant waved when the wind blew. +Split the log with a quick, sharp blow. +Burn peat after the logs give out. +He ordered peach pie with ice cream. +Weave the carpet on the right hand side. +Hemp is a weed found in parts of the tropics. +A lame back kept his score low. +We find joy in the simplest things. +Type out three lists of orders. +The harder he tried the less he got done. +The boss ran the show with a watchful eye. +The cup cracked and spilled its contents. +Paste can cleanse the most dirty brass. +The slang word for raw whiskey is booze. +It caught its hind paw in a rusty trap. +The wharf could be seen at the farther shore. +Feel the heat of the weak dying flame. +The tiny girl took off her hat. +A cramp is no small danger on a swim. +He said the same phrase thirty times. +Pluck the bright rose without leaves. +Two plus seven is less than ten. +The glow deepened in the eyes of the sweet girl. +Bring your problems to the wise chief. +Write a fond note to the friend you cherish. +Clothes and lodging are free to new men. +We frown when events take a bad turn. +Port is a strong wine with a smoky taste. +The young kid jumped the rusty gate. +Guess the results from the first scores. +A salt pickle tastes fine with ham. +The just claim got the right verdict. +These thistles bend in a high wind. +Pure bred poodles have curls. +The tree top waved in a graceful way. +The spot on the blotter was made by green ink. +Mud was spattered on the front of his white shirt. +The cigar burned a hole in the desk top. +The empty flask stood on the tin tray. +A speedy man can beat this track mark. +He broke a new shoelace that day. +The coffee stand is too high for the couch. +The urge to write short stories is rare. +The pencils have all been used. +The pirates seized the crew of the lost ship. +We tried to replace the coin but failed. +She sewed the torn coat quite neatly. +The sofa cushion is red and of light weight. +The jacket hung on the back of the wide chair. +At that high level the air is pure. +Drop the two when you add the figures. +A filing case is now hard to buy. +An abrupt start does not win the prize. +Wood is best for making toys and blocks. +The office paint was a dull, sad tan. +He knew the skill of the great young actress. +A rag will soak up spilled water. +A shower of dirt fell from the hot pipes. +Steam hissed from the broken valve. +The child almost hurt the small dog. +There was a sound of dry leaves outside. +The sky that morning was clear and bright blue. +Torn scraps littered the stone floor. +Sunday is the best part of the week. +The doctor cured him with these pills. +The new girl was fired today at noon. +They felt gay when the ship arrived in port. +Add the store's account to the last cent. +Acid burns holes in wool cloth. +Fairy tales should be fun to write. +Eight miles of woodland burned to waste. +The third act was dull and tired the players. +A young child should not suffer fright. +Add the column and put the sum here. +We admire and love a good cook. +There the flood mark is ten inches. +He carved a head from the round block of marble. +She has a smart way of wearing clothes. +The fruit of a fig tree is apple-shaped. +Corn cobs can be used to kindle a fire. +Where were they when the noise started. +The paper box is full of thumb tacks. +Sell your gift to a buyer at a good gain. +The tongs lay beside the ice pail. +The petals fall with the next puff of wind. +Bring your best compass to the third class. +They could laugh although they were sad. +Farmers came in to thresh the oat crop. +The brown house was on fire to the attic. +The lure is used to catch trout and flounder. +Float the soap on top of the bath water. +A blue crane is a tall wading bird. +A fresh start will work such wonders. +The club rented the rink for the fifth night. +After the dance, they went straight home. +The hostess taught the new maid to serve. +He wrote his last novel there at the inn. +Even the worst will beat his low score. +The cement had dried when he moved it. +The loss of the second ship was hard to take. +The fly made its way along the wall. +Do that with a wooden stick. +Live wires should be kept covered. +The large house had hot water taps. +It is hard to erase blue or red ink. +Write at once or you may forget it. +The doorknob was made of bright clean brass. +The wreck occurred by the bank on Main Street. +A pencil with black lead writes best. +Coax a young calf to drink from a bucket. +Schools for ladies teach charm and grace. +The lamp shone with a steady green flame. +They took the axe and the saw to the forest. +The ancient coin was quite dull and worn. +The shaky barn fell with a loud crash. +Jazz and swing fans like fast music. +Rake the rubbish up and then burn it. +Slash the gold cloth into fine ribbons. +Try to have the court decide the case. +They are pushed back each time they attack. +He broke his ties with groups of former friends. +They floated on the raft to sun their white backs. +The map had an X that meant nothing. +Whitings are small fish caught in nets. +Some ads serve to cheat buyers. +Jerk the rope and the bell rings weakly. +A waxed floor makes us lose balance. +Madam, this is the best brand of corn. +On the islands the sea breeze is soft and mild. +The play began as soon as we sat down. +This will lead the world to more sound and fury. +Add salt before you fry the egg. +The rush for funds reached its peak Tuesday. +The birch looked stark white and lonesome. +The box is held by a bright red snapper. +To make pure ice, you freeze water. +The first worm gets snapped early. +Jump the fence and hurry up the bank. +Yell and clap as the curtain slides back. +They are men who walk the middle of the road. +Both brothers wear the same size. +In some form or other we need fun. +The prince ordered his head chopped off. +The houses are built of red clay bricks. +Ducks fly north but lack a compass. +Fruit flavors are used in fizz drinks. +These pills do less good than others. +Canned pears lack full flavor. +The dark pot hung in the front closet. +Carry the pail to the wall and spill it there. +The train brought our hero to the big town. +We are sure that one war is enough. +Gray paint stretched for miles around. +The rude laugh filled the empty room. +High seats are best for football fans. +Tea served from the brown jug is tasty. +A dash of pepper spoils beef stew. +A zestful food is the hot-cross bun. +The horse trotted around the field at a brisk pace. +Find the twin who stole the pearl necklace. +Cut the cord that binds the box tightly. +The red tape bound the smuggled food. +Look in the corner to find the tan shirt. +The cold drizzle will halt the bond drive. +Nine men were hired to dig the ruins. +The junk yard had a mouldy smell. +The flint sputtered and lit a pine torch. +Soak the cloth and drown the sharp odor. +The shelves were bare of both jam or crackers. +A joy to every child is the swan boat. +All sat frozen and watched the screen. +A cloud of dust stung his tender eyes. +To reach the end he needs much courage. +Shape the clay gently into block form. +A ridge on a smooth surface is a bump or flaw. +Hedge apples may stain your hands green. +Quench your thirst, then eat the crackers. +Tight curls get limp on rainy days. +The mute muffled the high tones of the horn. +The gold ring fits only a pierced ear. +The old pan was covered with hard fudge. +Watch the log float in the wide river. +The node on the stalk of wheat grew daily. +The heap of fallen leaves was set on fire. +Write fast if you want to finish early. +His shirt was clean but one button was gone. +The barrel of beer was a brew of malt and hops. +Tin cans are absent from store shelves. +Slide the box into that empty space. +The plant grew large and green in the window. +The beam dropped down on the workmen's head. +Pink clouds floated with the breeze. +She danced like a swan, tall and graceful. +The tube was blown and the tire flat and useless. +It is late morning on the old wall clock. +Let's all join as we sing the last chorus. +The last switch cannot be turned off. +The fight will end in just six minutes. +The store walls were lined with colored frocks. +The peace league met to discuss their plans. +The rise to fame of a person takes luck. +Paper is scarce, so write with much care. +The quick fox jumped on the sleeping cat. +The nozzle of the fire hose was bright brass. +Screw the round cap on as tight as needed. +Time brings us many changes. +The purple tie was ten years old. +Men think and plan and sometimes act. +Fill the ink jar with sticky glue. +He smoke a big pipe with strong contents. +We need grain to keep our mules healthy. +Pack the records in a neat thin case. +The crunch of feet in the snow was the only sound. +The copper bowl shone in the sun's rays. +Boards will warp unless kept dry. +The plush chair leaned against the wall. +Glass will clink when struck by metal. +Bathe and relax in the cool green grass. +Nine rows of soldiers stood in line. +The beach is dry and shallow at low tide. +The idea is to sew both edges straight. +The kitten chased the dog down the street. +Pages bound in cloth make a book. +Try to trace the fine lines of the painting. +Women form less than half of the group. +The zones merge in the central part of town. +A gem in the rough needs work to polish. +Code is used when secrets are sent. +Most of the news is easy for us to hear. +He used the lathe to make brass objects. +The vane on top of the pole revolved in the wind. +Mince pie is a dish served to children. +The clan gathered on each dull night. +Let it burn, it gives us warmth and comfort. +A castle built from sand fails to endure. +A child's wit saved the day for us. +Tack the strip of carpet to the worn floor. +Next Tuesday we must vote. +Pour the stew from the pot into the plate. +Each penny shone like new. +The man went to the woods to gather sticks. +The dirt piles were lines along the road. +The logs fell and tumbled into the clear stream. +Just hoist it up and take it away. +A ripe plum is fit for a king's palate. +Our plans right now are hazy. +Brass rings are sold by these natives. +It takes a good trap to capture a bear. +Feed the white mouse some flower seeds. +The thaw came early and freed the stream. +He took the lead and kept it the whole distance. +The key you designed will fit the lock. +Plead to the council to free the poor thief. +Better hash is made of rare beef. +This plank was made for walking on. +The lake sparkled in the red hot sun. +He crawled with care along the ledge. +Tend the sheep while the dog wanders. +It takes a lot of help to finish these. +Mark the spot with a sign painted red. +Take two shares as a fair profit. +The fur of cats goes by many names. +North winds bring colds and fevers. +He asks no person to vouch for him. +Go now and come here later. +A sash of gold silk will trim her dress. +Soap can wash most dirt away. +That move means the game is over. +He wrote down a long list of items. +A siege will crack the strong defense. +Grape juice and water mix well. +Roads are paved with sticky tar. +Fake stones shine but cost little. +The drip of the rain made a pleasant sound. +Smoke poured out of every crack. +Serve the hot rum to the tired heroes. +Much of the story makes good sense. +The sun came up to light the eastern sky. +Heave the line over the port side. +A lathe cuts and trims any wood. +It's a dense crowd in two distinct ways. +His hip struck the knee of the next player. +The stale smell of old beer lingers. +The desk was firm on the shaky floor. +It takes heat to bring out the odor. +Beef is scarcer than some lamb. +Raise the sail and steer the ship northward. +A cone costs five cents on Mondays. +A pod is what peas always grow in. +Jerk the dart from the cork target. +No cement will hold hard wood. +We now have a new base for shipping. +A list of names is carved around the base. +The sheep were led home by a dog. +Three for a dime, the young peddler cried. +The sense of smell is better than that of touch. +No hardship seemed to keep him sad. +Grace makes up for lack of beauty. +Nudge gently but wake her now. +The news struck doubt into restless minds. +Once we stood beside the shore. +A chink in the wall allowed a draft to blow. +Fasten two pins on each side. +A cold dip restores health and zest. +He takes the oath of office each March. +The sand drifts over the sill of the old house. +The point of the steel pen was bent and twisted. +There is a lag between thought and act. +Seed is needed to plant the spring corn. +Draw the chart with heavy black lines. +The boy owed his pal thirty cents. +The chap slipped into the crowd and was lost. +Hats are worn to tea and not to dinner. +The ramp led up to the wide highway. +Beat the dust from the rug onto the lawn. +Say it slowly but make it ring clear. +The straw nest housed five robins. +Screen the porch with woven straw mats. +This horse will nose his way to the finish. +The dry wax protects the deep scratch. +He picked up the dice for a second roll. +These coins will be needed to pay his debt. +The nag pulled the frail cart along. +Twist the valve and release hot steam. +The vamp of the shoe had a gold buckle. +The smell of burned rags itches my nose. +New pants lack cuffs and pockets. +The marsh will freeze when cold enough. +They slice the sausage thin with a knife. +The bloom of the rose lasts a few days. +A gray mare walked before the colt. +Breakfast buns are fine with a hot drink. +Bottles hold four kinds of rum. +The man wore a feather in his felt hat. +He wheeled the bike past the winding road. +Drop the ashes on the worn old rug. +The desk and both chairs were painted tan. +Throw out the used paper cup and plate. +A clean neck means a neat collar. +The couch cover and hall drapes were blue. +The stems of the tall glasses cracked and broke. +The wall phone rang loud and often. +The clothes dried on a thin wooden rack. +Turn on the lantern which gives us light. +The cleat sank deeply into the soft turf. +The bills were mailed promptly on the tenth of the month. +To have is better than to wait and hope. +The price is fair for a good antique clock. +The music played on while they talked. +Dispense with a vest on a day like this. +The bunch of grapes was pressed into wine. +He sent the figs, but kept the ripe cherries. +The hinge on the door creaked with old age. +The screen before the fire kept in the sparks. +Fly by night, and you waste little time. +Thick glasses helped him read the print. +Birth and death mark the limits of life. +The chair looked strong but had no bottom. +The kite flew wildly in the high wind. +A fur muff is stylish once more. +The tin box held priceless stones. +We need an end of all such matter. +The case was puzzling to the old and wise. +The bright lanterns were gay on the dark lawn. +We don't get much money but we have fun. +The youth drove with zest, but little skill. +Five years he lived with a shaggy dog. +A fence cuts through the corner lot. +The way to save money is not to spend much. +Shut the hatch before the waves push it in. +The odor of spring makes young hearts jump. +Crack the walnut with your sharp side teeth. +He offered proof in the form of a large chart. +Send the stuff in a thick paper bag. +A quart of milk is water for the most part. +They told wild tales to frighten him. +The three story house was built of stone. +In the rear of the ground floor was a large passage. +A man in a blue sweater sat at the desk. +Oats are a food eaten by horse and man. +Their eyelids droop for want of sleep. +A sip of tea revives his tired friend. +There are many ways to do these things. +Tuck the sheet under the edge of the mat. +A force equal to that would move the earth. +We like to see clear weather. +The work of the tailor is seen on each side. +Take a chance and win a china doll. +Shake the dust from your shoes, stranger. +She was kind to sick old people. +The square wooden crate was packed to be shipped. +The dusty bench stood by the stone wall. +We dress to suit the weather of most days. +Smile when you say nasty words. +A bowl of rice is free with chicken stew. +The water in this well is a source of good health. +Take shelter in this tent, but keep still. +That guy is the writer of a few banned books. +The little tales they tell are false. +The door was barred, locked, and bolted as well. +Ripe pears are fit for a queen's table. +A big wet stain was on the round carpet. +The kite dipped and swayed, but stayed aloft. +The pleasant hours fly by much too soon. +The room was crowded with a wild mob. +This strong arm shall shield your honor. +She blushed when he gave her a white orchid. +The beetle droned in the hot June sun. +Press the pedal with your left foot. +Neat plans fail without luck. +The black trunk fell from the landing. +The bank pressed for payment of the debt. +The theft of the pearl pin was kept secret. +Shake hands with this friendly child. +The vast space stretched into the far distance. +A rich farm is rare in this sandy waste. +His wide grin earned many friends. +Flax makes a fine brand of paper. +Hurdle the pit with the aid of a long pole. +A strong bid may scare your partner stiff. +Even a just cause needs power to win. +Peep under the tent and see the clowns. +The leaf drifts along with a slow spin. +Cheap clothes are flashy but don't last. +A thing of small note can cause despair. +Flood the mails with requests for this book. +A thick coat of black paint covered all. +The pencil was cut to be sharp at both ends. +Those last words were a strong statement. +He wrote his name boldly at the top of the sheet. +Dill pickles are sour but taste fine. +Down that road is the way to the grain farmer. +Either mud or dust are found at all times. +The best method is to fix it in place with clips. +If you mumble your speech will be lost. +At night the alarm roused him from a deep sleep. +Read just what the meter says. +Fill your pack with bright trinkets for the poor. +The small red neon lamp went out. +Clams are small, round, soft, and tasty. +The fan whirled its round blades softly. +The line where the edges join was clean. +Breathe deep and smell the piny air. +It matters not if he reads these words or those. +A brown leather bag hung from its strap. +A toad and a frog are hard to tell apart. +A white silk jacket goes with any shoes. +A break in the dam almost caused a flood. +Paint the sockets in the wall dull green. +The child crawled into the dense grass. +Bribes fail where honest men work. +Trample the spark, else the flames will spread. +The hilt of the sword was carved with fine designs. +A round hole was drilled through the thin board. +Footprints showed the path he took up the beach. +She was waiting at my front lawn. +A vent near the edge brought in fresh air. +Prod the old mule with a crooked stick. +It is a band of steel three inches wide. +The pipe ran almost the length of the ditch. +It was hidden from sight by a mass of leaves and shrubs. +The weight of the package was seen on the high scale. +Wake and rise, and step into the green outdoors. +The green light in the brown box flickered. +The brass tube circled the high wall. +The lobes of her ears were pierced to hold rings. +Hold the hammer near the end to drive the nail. +Next Sunday is the twelfth of the month. +Every word and phrase he speaks is true. +He put his last cartridge into the gun and fired. +They took their kids from the public school. +Drive the screw straight into the wood. +Keep the hatch tight and the watch constant. +Sever the twine with a quick snip of the knife. +Paper will dry out when wet. +Slide the catch back and open the desk. +Help the weak to preserve their strength. +A sullen smile gets few friends. +Stop whistling and watch the boys march. +Jerk the cord, and out tumbles the gold. +Slide the tray across the glass top. +The cloud moved in a stately way and was gone. +Light maple makes for a swell room. +Set the piece here and say nothing. +Dull stories make her laugh. +A stiff cord will do to fasten your shoe. +Get the trust fund to the bank early. +Choose between the high road and the low. +A plea for funds seems to come again. +He lent his coat to the tall gaunt stranger. +There is a strong chance it will happen once more. +The duke left the park in a silver coach. +Greet the new guests and leave quickly. +When the frost has come it is time for turkey. +Sweet words work better than fierce. +A thin stripe runs down the middle. +A six comes up more often than a ten. +Lush fern grow on the lofty rocks. +The ram scared the school children off. +The team with the best timing looks good. +The farmer swapped his horse for a brown ox. +Sit on the perch and tell the others what to do. +A steep trail is painful for our feet. +The early phase of life moves fast. +Green moss grows on the northern side. +Tea in thin china has a sweet taste. +Pitch the straw through the door of the stable. +The latch on the back gate needed a nail. +The goose was brought straight from the old market. +The sink is the thing in which we pile dishes. +A whiff of it will cure the most stubborn cold. +The facts don't always show who is right. +She flaps her cape as she parades the street. +The loss of the cruiser was a blow to the fleet. +Loop the braid to the left and then over. +Plead with the lawyer to drop the lost cause. +Calves thrive on tender spring grass. +Post no bills on this office wall. +Tear a thin sheet from the yellow pad. +A cruise in warm waters in a sleek yacht is fun. +A streak of color ran down the left edge. +It was done before the boy could see it. +Crouch before you jump or miss the mark. +Pack the kits and don't forget the salt. +The square peg will settle in the round hole. +Fine soap saves tender skin. +Poached eggs and tea must suffice. +Bad nerves are jangled by a door slam. +Ship maps are different from those for planes. +Dimes showered down from all sides. +They sang the same tunes at each party. +The sky in the west is tinged with orange red. +The pods of peas ferment in bare fields. +The horse balked and threw the tall rider. +The hitch between the horse and cart broke. +Pile the coal high in the shed corner. +A gold vase is both rare and costly. +The knife was hung inside its bright sheath. +The rarest spice comes from the far East. +The roof should be tilted at a sharp slant. +A smatter of French is worse than none. +The mule trod the treadmill day and night. +The aim of the contest is to raise a great fund. +To send it now in large amounts is bad. +There is a fine hard tang in salty air. +Cod is the main business of the north shore. +The slab was hewn from heavy blocks of slate. +Dunk the stale biscuits into strong drink. +Hang tinsel from both branches. +Cap the jar with a tight brass cover. +The poor boy missed the boat again. +Be sure to set the lamp firmly in the hole. +Pick a card and slip it under the pack. +A round mat will cover the dull spot. +The first part of the plan needs changing. +A good book informs of what we ought to know. +The mail comes in three batches per day. +You cannot brew tea in a cold pot. +Dots of light betrayed the black cat. +Put the chart on the mantel and tack it down. +The night shift men rate extra pay. +The red paper brightened the dim stage. +See the player scoot to third base. +Slide the bill between the two leaves. +Many hands help get the job done. +We don't like to admit our small faults. +No doubt about the way the wind blows. +Dig deep in the earth for pirate's gold. +The steady drip is worse than a drenching rain. +A flat pack takes less luggage space. +Green ice frosted the punch bowl. +A stuffed chair slipped from the moving van. +The stitch will serve but needs to be shortened. +A thin book fits in the side pocket. +The gloss on top made it unfit to read. +The hail pattered on the burnt brown grass. +Seven seals were stamped on great sheets. +Our troops are set to strike heavy blows. +The store was jammed before the sale could start. +It was a bad error on the part of the new judge. +One step more and the board will collapse. +Take the match and strike it against your shoe. +The pot boiled, but the contents failed to jell. +The baby puts his right foot in his mouth. +The bombs left most of the town in ruins. +Stop and stare at the hard working man. +The streets are narrow and full of sharp turns. +The pup jerked the leash as he saw a feline shape. +Open your book to the first page. +Fish evade the net and swim off. +Dip the pail once and let it settle. +Will you please answer that phone. +The big red apple fell to the ground. +The curtain rose and the show was on. +The young prince became heir to the throne. +He sent the boy on a short errand. +Leave now and you will arrive on time. +The corner store was robbed last night. +A gold ring will please most any girl. +The long journey home took a year. +She saw a cat in the neighbor's house. +A pink shell was found on the sandy beach. +Small children came to see him. +The grass and bushes were wet with dew. +The blind man counted his old coins. +A severe storm tore down the barn. +She called his name many times. +When you hear the bell, come quickly. \ No newline at end of file diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py index 3ecd5bd9..b96a47fd 100644 --- a/extensions/silero_tts/script.py +++ b/extensions/silero_tts/script.py @@ -1,3 +1,4 @@ +import random import time from pathlib import Path @@ -106,6 +107,7 @@ def history_modifier(history): def output_modifier(string, state): global model, current_params, streaming_state + for i in params: if params[i] != current_params[i]: model = load_model() @@ -140,6 +142,35 @@ def setup(): model = load_model() +def random_sentence(): + with open(Path("extensions/silero_tts/harvard_sentences.txt")) as f: + return random.choice(list(f)) + + +def voice_preview(preview_text): + global model, current_params, streaming_state + + for i in params: + if params[i] != current_params[i]: + model = load_model() + current_params = params.copy() + break + + string = tts_preprocessor.preprocess(preview_text or random_sentence()) + + output_file = Path('extensions/silero_tts/outputs/voice_preview.wav') + prosody = f"" + silero_input = f'{prosody}{xmlesc(string)}' + model.save_wav(ssml_text=silero_input, speaker=params['speaker'], sample_rate=int(params['sample_rate']), audio_path=str(output_file)) + + return f'' + + +def custom_css(): + path_to_css = Path(__file__).parent.resolve() / 'style.css' + return open(path_to_css, 'r').read() + + def ui(): # Gradio elements with gr.Accordion("Silero TTS"): @@ -153,13 +184,16 @@ def ui(): v_pitch = gr.Dropdown(value=params['voice_pitch'], choices=voice_pitches, label='Voice pitch') v_speed = gr.Dropdown(value=params['voice_speed'], choices=voice_speeds, label='Voice speed') + with gr.Row(): + preview_text = gr.Text(show_label=False, placeholder="Preview text", elem_id="silero_preview_text") + preview_play = gr.Button("Preview") + preview_audio = gr.HTML(visible=False) + with gr.Row(): convert = gr.Button('Permanently replace audios with the message texts') convert_cancel = gr.Button('Cancel', visible=False) convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False) - gr.Markdown('[Click here for Silero audio samples](https://oobabooga.github.io/silero-samples/index.html)') - if shared.is_chat(): # Convert history with confirmation convert_arr = [convert_confirm, convert, convert_cancel] @@ -185,3 +219,7 @@ def ui(): voice.change(lambda x: params.update({"speaker": x}), voice, None) v_pitch.change(lambda x: params.update({"voice_pitch": x}), v_pitch, None) v_speed.change(lambda x: params.update({"voice_speed": x}), v_speed, None) + + # Play preview + preview_text.submit(voice_preview, preview_text, preview_audio) + preview_play.click(voice_preview, preview_text, preview_audio) diff --git a/extensions/silero_tts/style.css b/extensions/silero_tts/style.css new file mode 100644 index 00000000..2ab7aefb --- /dev/null +++ b/extensions/silero_tts/style.css @@ -0,0 +1,8 @@ +.SDAP .hires_opts input[type="number"] { + width: 6em !important; +} + +/* silero_tts preview */ +.form:has(> #silero_preview_text) { + min-width: 75% +} From 8df3cdfd511f3857c5e09038559063ca442e59fc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 4 Aug 2023 13:57:31 -0300 Subject: [PATCH 022/169] Add SSL certificate support (#3453) --- README.md | 2 ++ modules/shared.py | 2 ++ server.py | 16 +++++++++++----- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 4756672e..218fa765 100644 --- a/README.md +++ b/README.md @@ -317,6 +317,8 @@ Optionally, you can use the following command-line flags: | `--auto-launch` | Open the web UI in the default browser upon launch. | | `--gradio-auth USER:PWD` | set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3" | | `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" | +| `--ssl-keyfile SSL_KEYFILE` | The path to the SSL certificate key file. | +| `--ssl-certfile SSL_CERTFILE` | The path to the SSL certificate cert file. | #### API diff --git a/modules/shared.py b/modules/shared.py index fc9ba3cf..51017a1b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -180,6 +180,8 @@ parser.add_argument('--share', action='store_true', help='Create a public URL. T parser.add_argument('--auto-launch', action='store_true', default=False, help='Open the web UI in the default browser upon launch.') parser.add_argument("--gradio-auth", type=str, help='set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3"', default=None) parser.add_argument("--gradio-auth-path", type=str, help='Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3"', default=None) +parser.add_argument("--ssl-keyfile", type=str, help='The path to the SSL certificate key file.', default=None) +parser.add_argument("--ssl-certfile", type=str, help='The path to the SSL certificate cert file.', default=None) # API parser.add_argument('--api', action='store_true', help='Enable the API extension.') diff --git a/server.py b/server.py index d1a23bbe..0e1d199d 100644 --- a/server.py +++ b/server.py @@ -1081,11 +1081,17 @@ def create_interface(): # Launch the interface shared.gradio['interface'].queue() with OpenMonkeyPatch(): - if shared.args.listen: - shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_name=shared.args.listen_host or '0.0.0.0', server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=auth) - else: - shared.gradio['interface'].launch(prevent_thread_lock=True, share=shared.args.share, server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, auth=auth) - + shared.gradio['interface'].launch( + prevent_thread_lock=True, + share=shared.args.share, + server_name = None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'), + server_port=shared.args.listen_port, + inbrowser=shared.args.auto_launch, + auth=auth, + ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, + ssl_keyfile=shared.args.ssl_keyfile, + ssl_certfile=shared.args.ssl_certfile + ) if __name__ == "__main__": # Loading custom settings From 6e30f76ba555defce214e47839c8f4f303317931 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Fri, 4 Aug 2023 17:28:59 -0500 Subject: [PATCH 023/169] Bump bitsandbytes to 0.41.1 (#3457) --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9486f808..fd3b42c4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,8 +19,8 @@ transformers==4.31.* tqdm wandb git+https://github.com/huggingface/peft@96c0277a1b9a381b10ab34dbf84917f9b3b992e6 -bitsandbytes==0.41.0; platform_system != "Windows" -https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.0-py3-none-win_amd64.whl; platform_system == "Windows" +bitsandbytes==0.41.1; platform_system != "Windows" +https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.9/exllama-0.0.9+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" From 23055b21ee59eeea48a8b5696ad04c6fb8bc38f8 Mon Sep 17 00:00:00 2001 From: SodaPrettyCold <139355831+SodaPrettyCold@users.noreply.github.com> Date: Sat, 5 Aug 2023 07:20:28 +0800 Subject: [PATCH 024/169] [Bug fix] Remove html tags form the Prompt sent to Stable Diffusion (#3151) --- extensions/sd_api_pictures/script.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py index 88a0d940..e33367d4 100644 --- a/extensions/sd_api_pictures/script.py +++ b/extensions/sd_api_pictures/script.py @@ -133,6 +133,9 @@ def get_SD_pictures(description, character): if params['manage_VRAM']: give_VRAM_priority('SD') + description = re.sub('', ' ', description) + description = f"({description}:1)" + payload = { "prompt": params['prompt_prefix'] + description, "seed": params['seed'], From 9dcb37e8d4fafb5c1b59f7a56e25fcb9c21e1398 Mon Sep 17 00:00:00 2001 From: Forkoz <59298527+Ph0rk0z@users.noreply.github.com> Date: Sat, 5 Aug 2023 16:45:47 +0000 Subject: [PATCH 025/169] Fix: Mirostat fails on models split across multiple GPUs --- modules/sampler_hijack.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 0a86b4fd..d5ebbb76 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -104,7 +104,7 @@ class MirostatLogitsWarper(LogitsWarper): break # Normalize the probabilities of the remaining words - prob_topk = torch.softmax(sorted_logits, dim=0) + prob_topk = torch.softmax(sorted_logits, dim=0).to('cuda') prev_i = torch.multinomial(prob_topk, num_samples=1, replacement=True).to('cuda') From 5ee95d126cac801fd773e4350a1be5b06979b799 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Sat, 5 Aug 2023 11:46:14 -0500 Subject: [PATCH 026/169] Bump exllama wheels to 0.0.10 (#3467) --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index fd3b42c4..5a46addd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,8 +23,8 @@ bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/exllama/releases/download/0.0.9/exllama-0.0.9+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/jllllll/exllama/releases/download/0.0.9/exllama-0.0.9+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" # llama-cpp-python without GPU support llama-cpp-python==0.1.77; platform_system != "Windows" https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.77/llama_cpp_python-0.1.77-cp310-cp310-win_amd64.whl; platform_system == "Windows" From 44f31731af75eae977cb098de070a62c0e362156 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Sat, 5 Aug 2023 11:47:16 -0500 Subject: [PATCH 027/169] Create logs dir if missing when saving history (#3462) --- modules/chat.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/chat.py b/modules/chat.py index 5e4eb245..8e562b98 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -394,6 +394,8 @@ def redraw_html(history, name1, name2, mode, style, reset_cache=False): def save_history(history, path=None): p = path or Path('logs/exported_history.json') + if not p.parent.is_dir(): + p.parent.mkdir(parents=True) with open(p, 'w', encoding='utf-8') as f: f.write(json.dumps(history, indent=4)) From 5134878344ced8cfb42bd56ff6dbd357935370f3 Mon Sep 17 00:00:00 2001 From: missionfloyd Date: Sat, 5 Aug 2023 10:53:54 -0600 Subject: [PATCH 028/169] Fix chat message order (#3461) --- css/chat.css | 5 ++ modules/html_generator.py | 104 ++++++++++++++++++-------------------- 2 files changed, 54 insertions(+), 55 deletions(-) diff --git a/css/chat.css b/css/chat.css index 67bbe512..677d86db 100644 --- a/css/chat.css +++ b/css/chat.css @@ -79,6 +79,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-top: 1px; } +.chat > .messages { + display: flex; + flex-direction: column; +} + .message-body li { margin-top: 0.5em !important; margin-bottom: 0.5em !important; diff --git a/modules/html_generator.py b/modules/html_generator.py index c6ca13b6..15c731c3 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -168,10 +168,21 @@ def get_image_cache(path): def generate_instruct_html(history): - output = f'

    ' - for i, _row in enumerate(history[::-1]): + output = f'
    ' + for i, _row in enumerate(history): row = [convert_to_markdown(entry) for entry in _row] + if row[0]: # don't display empty user messages + output += f""" +
    +
    +
    + {row[0]} +
    +
    +
    + """ + output += f"""
    @@ -182,34 +193,38 @@ def generate_instruct_html(history):
    """ - if len(row[0]) == 0: # don't display empty user messages - continue - - output += f""" -
    -
    -
    - {row[0]} -
    -
    -
    - """ - - output += "
    " + output += "
    " return output def generate_cai_chat_html(history, name1, name2, style, reset_cache=False): - output = f'
    ' + output = f'
    ' # We use ?name2 and ?time.time() to force the browser to reset caches img_bot = f'' if Path("cache/pfp_character.png").exists() else '' img_me = f'' if Path("cache/pfp_me.png").exists() else '' - for i, _row in enumerate(history[::-1]): + for i, _row in enumerate(history): row = [convert_to_markdown(entry) for entry in _row] + if row[0]: # don't display empty user messages + output += f""" +
    +
    + {img_me} +
    +
    +
    + {name1} +
    +
    + {row[0]} +
    +
    +
    + """ + output += f"""
    @@ -226,49 +241,18 @@ def generate_cai_chat_html(history, name1, name2, style, reset_cache=False):
    """ - if len(row[0]) == 0: # don't display empty user messages - continue - - output += f""" -
    -
    - {img_me} -
    -
    -
    - {name1} -
    -
    - {row[0]} -
    -
    -
    - """ - - output += "
    " + output += "
    " return output def generate_chat_html(history, name1, name2, reset_cache=False): - output = f'
    ' + output = f'
    ' - for i, _row in enumerate(history[::-1]): + for i, _row in enumerate(history): row = [convert_to_markdown(entry) for entry in _row] - output += f""" -
    -
    -
    - {row[1]} -
    -
    -
    - """ - - if len(row[0]) == 0: # don't display empty user messages - continue - - output += f""" + if row[0]: # don't display empty user messages + output += f"""
    @@ -278,7 +262,17 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
    """ - output += "
    " + output += f""" +
    +
    +
    + {row[1]} +
    +
    +
    + """ + + output += "
    " return output From 0af10ab49bfc1cab80d0126707321a58bd9e3485 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Aug 2023 17:22:48 -0300 Subject: [PATCH 029/169] Add Classifier Free Guidance (CFG) for Transformers/ExLlama (#3325) --- api-examples/api-example-chat-stream.py | 2 + api-examples/api-example-chat.py | 2 + api-examples/api-example-stream.py | 2 + api-examples/api-example.py | 2 + extensions/api/util.py | 2 + extensions/openai/defaults.py | 2 + modules/exllama.py | 97 ++++++++++++++++++++----- modules/exllama_hf.py | 9 +-- modules/llamacpp_hf.py | 9 +-- modules/loaders.py | 8 ++ modules/presets.py | 21 ++++-- modules/shared.py | 1 + modules/text_generation.py | 5 +- modules/ui.py | 2 + requirements.txt | 2 +- server.py | 6 +- settings-template.yaml | 1 + 17 files changed, 131 insertions(+), 42 deletions(-) diff --git a/api-examples/api-example-chat-stream.py b/api-examples/api-example-chat-stream.py index 2914d451..a774f907 100644 --- a/api-examples/api-example-chat-stream.py +++ b/api-examples/api-example-chat-stream.py @@ -63,6 +63,8 @@ async def run(user_input, history): 'mirostat_mode': 0, 'mirostat_tau': 5, 'mirostat_eta': 0.1, + 'guidance_scale': 1, + 'negative_prompt': '', 'seed': -1, 'add_bos_token': True, diff --git a/api-examples/api-example-chat.py b/api-examples/api-example-chat.py index e2797f1e..824bf3a0 100644 --- a/api-examples/api-example-chat.py +++ b/api-examples/api-example-chat.py @@ -57,6 +57,8 @@ def run(user_input, history): 'mirostat_mode': 0, 'mirostat_tau': 5, 'mirostat_eta': 0.1, + 'guidance_scale': 1, + 'negative_prompt': '', 'seed': -1, 'add_bos_token': True, diff --git a/api-examples/api-example-stream.py b/api-examples/api-example-stream.py index 175275f9..bf5eabac 100644 --- a/api-examples/api-example-stream.py +++ b/api-examples/api-example-stream.py @@ -45,6 +45,8 @@ async def run(context): 'mirostat_mode': 0, 'mirostat_tau': 5, 'mirostat_eta': 0.1, + 'guidance_scale': 1, + 'negative_prompt': '', 'seed': -1, 'add_bos_token': True, diff --git a/api-examples/api-example.py b/api-examples/api-example.py index 7f8bc1d2..16029807 100644 --- a/api-examples/api-example.py +++ b/api-examples/api-example.py @@ -37,6 +37,8 @@ def run(prompt): 'mirostat_mode': 0, 'mirostat_tau': 5, 'mirostat_eta': 0.1, + 'guidance_scale': 1, + 'negative_prompt': '', 'seed': -1, 'add_bos_token': True, diff --git a/extensions/api/util.py b/extensions/api/util.py index ef58a70f..2654d046 100644 --- a/extensions/api/util.py +++ b/extensions/api/util.py @@ -43,6 +43,8 @@ def build_parameters(body, chat=False): 'mirostat_mode': int(body.get('mirostat_mode', 0)), 'mirostat_tau': float(body.get('mirostat_tau', 5)), 'mirostat_eta': float(body.get('mirostat_eta', 0.1)), + 'guidance_scale': float(body.get('guidance_scale', 1)), + 'negative_prompt': str(body.get('negative_prompt', '')), 'seed': int(body.get('seed', -1)), 'add_bos_token': bool(body.get('add_bos_token', True)), 'truncation_length': int(body.get('truncation_length', body.get('max_context_length', 2048))), diff --git a/extensions/openai/defaults.py b/extensions/openai/defaults.py index cb8308e7..ffef12d0 100644 --- a/extensions/openai/defaults.py +++ b/extensions/openai/defaults.py @@ -33,6 +33,8 @@ default_req_params = { 'mirostat_mode': 0, 'mirostat_tau': 5.0, 'mirostat_eta': 0.1, + 'guidance_scale': 1, + 'negative_prompt': '', 'ban_eos_token': False, 'skip_special_tokens': True, 'custom_stopping_strings': '', diff --git a/modules/exllama.py b/modules/exllama.py index 00b37b9c..dc632a25 100644 --- a/modules/exllama.py +++ b/modules/exllama.py @@ -1,9 +1,11 @@ from pathlib import Path +import torch.nn.functional as F from torch import version as torch_version from modules import shared from modules.logging_colors import logger +from modules.models import clear_torch_cache from modules.text_generation import get_max_prompt_length try: @@ -78,6 +80,21 @@ class ExllamaModel: return result, result def generate_with_streaming(self, prompt, state): + + # The cache batch size must be 2 for CFG and 1 otherwise + if state['guidance_scale'] == 1: + if self.cache.batch_size == 2: + del self.cache + clear_torch_cache() + self.cache = ExLlamaCache(self.model) + self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache) + else: + if self.cache.batch_size == 1: + del self.cache + clear_torch_cache() + self.cache = ExLlamaCache(self.model, batch_size=2) + self.generator = ExLlamaGenerator(self.model, self.tokenizer, self.cache) + self.generator.settings.temperature = state['temperature'] self.generator.settings.top_p = state['top_p'] self.generator.settings.top_k = state['top_k'] @@ -89,31 +106,71 @@ class ExllamaModel: else: self.generator.disallow_tokens(None) - self.generator.end_beam_search() + # Case 1: no CFG + if state['guidance_scale'] == 1: + self.generator.end_beam_search() - # Tokenizing the input - ids = self.generator.tokenizer.encode(prompt) - ids = ids[:, -get_max_prompt_length(state):] - if state['auto_max_new_tokens']: - max_new_tokens = state['truncation_length'] - ids.shape[-1] + # Tokenizing the input + ids = self.generator.tokenizer.encode(prompt) + ids = ids[:, -get_max_prompt_length(state):] + if state['auto_max_new_tokens']: + max_new_tokens = state['truncation_length'] - ids.shape[-1] + else: + max_new_tokens = state['max_new_tokens'] + + self.generator.gen_begin_reuse(ids) + initial_len = self.generator.sequence[0].shape[0] + has_leading_space = False + + for i in range(max_new_tokens): + token = self.generator.gen_single_token() + if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): + has_leading_space = True + + decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:]) + if has_leading_space: + decoded_text = ' ' + decoded_text + + yield decoded_text + if token.item() == self.generator.tokenizer.eos_token_id or shared.stop_everything: + break + + # Case 2: CFG else: - max_new_tokens = state['max_new_tokens'] + alpha = state['guidance_scale'] + prompts = [prompt, state['negative_prompt'] or ''] - self.generator.gen_begin_reuse(ids) - initial_len = self.generator.sequence[0].shape[0] - has_leading_space = False - for i in range(max_new_tokens): - token = self.generator.gen_single_token() - if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): - has_leading_space = True + ids, mask = self.tokenizer.encode(prompts, return_mask=True) + if state['auto_max_new_tokens']: + max_new_tokens = state['truncation_length'] - ids[0].shape[-1] + else: + max_new_tokens = state['max_new_tokens'] - decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:]) - if has_leading_space: - decoded_text = ' ' + decoded_text + self.generator.gen_begin(ids, mask=mask) + initial_len = self.generator.sequence[0].shape[0] + has_leading_space = False - yield decoded_text - if token.item() == self.generator.tokenizer.eos_token_id or shared.stop_everything: - break + for i in range(max_new_tokens): + logits = self.model.forward(self.generator.sequence[:, -1:], self.cache, input_mask=mask) + self.generator.apply_rep_penalty(logits) + + logits = F.log_softmax(logits, dim=-1) + logits_mixed = alpha * logits[0] + (1 - alpha) * logits[1] + + token, _ = self.generator.sample_current(logits_mixed) + if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): + has_leading_space = True + + decoded_text = self.generator.tokenizer.decode(self.generator.sequence[0][initial_len:]) + if has_leading_space: + decoded_text = ' ' + decoded_text + + yield decoded_text + if token.item() == self.tokenizer.eos_token_id or shared.stop_everything: + break + + batch_token = token.repeat(2, 1) + self.generator.gen_accept_token(batch_token) def generate(self, prompt, state): output = '' diff --git a/modules/exllama_hf.py b/modules/exllama_hf.py index fd775b4a..ebafb4f7 100644 --- a/modules/exllama_hf.py +++ b/modules/exllama_hf.py @@ -47,12 +47,11 @@ class ExllamaHF(PreTrainedModel): return torch.device(0) def __call__(self, *args, **kwargs): - # TODO: Some decoding methods (such as Contrastive Search) may not work at this time - assert len(args) == 0, 'no *args should be passed to forward' + input_ids = args[0] if len(args) > 0 else kwargs['input_ids'] use_cache = kwargs.get('use_cache', True) labels = kwargs.get('labels', None) - seq = kwargs['input_ids'][0].tolist() - cache = kwargs['past_key_values'] if 'past_key_values' in kwargs else None + cache = kwargs.get('past_key_values', None) + seq = input_ids[0].tolist() if labels is None: if cache is None: @@ -60,7 +59,7 @@ class ExllamaHF(PreTrainedModel): cache = self.ex_cache self.ex_model.forward(torch.tensor([seq[:-1]], dtype=torch.long), cache, preprocess_only=True, lora=self.lora) - logits = self.ex_model.forward(torch.tensor([seq[-1:]], dtype=torch.long), cache, lora=self.lora).to(kwargs['input_ids'].device) + logits = self.ex_model.forward(torch.tensor([seq[-1:]], dtype=torch.long), cache, lora=self.lora).to(input_ids.device) else: if cache is None: self.ex_cache.current_seq_len = 0 diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index e9f4ade6..df9e0b2e 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -49,12 +49,11 @@ class LlamacppHF(PreTrainedModel): return torch.device(0) def __call__(self, *args, **kwargs): - # TODO: Some decoding methods (such as Contrastive Search) may not work at this time - assert len(args) == 0, 'no *args should be passed to forward' + input_ids = args[0] if len(args) > 0 else kwargs['input_ids'] use_cache = kwargs.get('use_cache', True) labels = kwargs.get('labels', None) - seq = kwargs['input_ids'][0].tolist() - cache = kwargs['past_key_values'] if 'past_key_values' in kwargs else None + cache = kwargs.get('past_key_values', None) + seq = input_ids[0].tolist() # Make the forward call seq_tensor = torch.tensor(seq) @@ -70,7 +69,7 @@ class LlamacppHF(PreTrainedModel): self.model.reset() self.model.eval(seq) logits = torch.tensor(self.model.eval_logits) - logits = logits.view(1, logits.shape[0], logits.shape[1]).to(kwargs['input_ids'].device) + logits = logits.view(1, logits.shape[0], logits.shape[1]).to(input_ids.device) self.cache = seq_tensor diff --git a/modules/loaders.py b/modules/loaders.py index aa1afcb8..519e47a7 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -115,6 +115,8 @@ loaders_samplers = { 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', + 'guidance_scale', + 'negative_prompt', 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', @@ -152,6 +154,8 @@ loaders_samplers = { 'repetition_penalty', 'repetition_penalty_range', 'seed', + 'guidance_scale', + 'negative_prompt', 'ban_eos_token', 'auto_max_new_tokens', }, @@ -178,6 +182,8 @@ loaders_samplers = { 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', + 'guidance_scale', + 'negative_prompt', 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', @@ -206,6 +212,8 @@ loaders_samplers = { 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', + 'guidance_scale', + 'negative_prompt', 'ban_eos_token', 'add_bos_token', 'skip_special_tokens', diff --git a/modules/presets.py b/modules/presets.py index 072b15fd..32b7f71c 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -9,6 +9,7 @@ def default_preset(): 'do_sample': True, 'temperature': 1, 'top_p': 1, + 'top_k': 0, 'typical_p': 1, 'epsilon_cutoff': 0, 'eta_cutoff': 0, @@ -17,19 +18,23 @@ def default_preset(): 'repetition_penalty': 1, 'repetition_penalty_range': 0, 'encoder_repetition_penalty': 1, - 'top_k': 0, - 'num_beams': 1, - 'penalty_alpha': 0, - 'min_length': 0, - 'length_penalty': 1, 'no_repeat_ngram_size': 0, - 'early_stopping': False, + 'min_length': 0, + 'guidance_scale': 1, 'mirostat_mode': 0, 'mirostat_tau': 5.0, 'mirostat_eta': 0.1, + 'penalty_alpha': 0, + 'num_beams': 1, + 'length_penalty': 1, + 'early_stopping': False, } +def presets_params(): + return [k for k in default_preset()] + + def load_preset(name): generate_params = default_preset() if name not in ['None', None, '']: @@ -51,12 +56,12 @@ def load_preset_memoized(name): def load_preset_for_ui(name, state): generate_params = load_preset(name) state.update(generate_params) - return state, *[generate_params[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'tfs', 'top_a']] + return state, *[generate_params[k] for k in presets_params()] def generate_preset_yaml(state): defaults = default_preset() - data = {k: state[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'tfs', 'top_a']} + data = {k: state[k] for k in presets_params()} # Remove entries that are identical to the defaults for k in list(data.keys()): diff --git a/modules/shared.py b/modules/shared.py index 51017a1b..be5be109 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -42,6 +42,7 @@ settings = { 'max_new_tokens_max': 4096, 'auto_max_new_tokens': False, 'seed': -1, + 'negative_prompt': '', 'character': 'None', 'name1': 'You', 'name2': 'Assistant', diff --git a/modules/text_generation.py b/modules/text_generation.py index 7507a731..df9d708b 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -226,9 +226,12 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta']: + for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']: generate_params[k] = state[k] + if state['negative_prompt'] != '': + generate_params['negative_prompt_ids'] = encode(state['negative_prompt']) + for k in ['epsilon_cutoff', 'eta_cutoff']: if state[k] > 0: generate_params[k] = state[k] * 1e-4 diff --git a/modules/ui.py b/modules/ui.py index eed2ef66..8a7f9f47 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -100,6 +100,8 @@ def list_interface_input_elements(): 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', + 'negative_prompt', + 'guidance_scale', 'add_bos_token', 'ban_eos_token', 'truncation_length', diff --git a/requirements.txt b/requirements.txt index 5a46addd..9deadd48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,10 +15,10 @@ safetensors==0.3.1 scipy sentencepiece tensorboard -transformers==4.31.* tqdm wandb git+https://github.com/huggingface/peft@96c0277a1b9a381b10ab34dbf84917f9b3b992e6 +git+https://github.com/huggingface/transformers@d533465150532b0c5de167b574e59f64c68b1154 bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" diff --git a/server.py b/server.py index 0e1d199d..adff9669 100644 --- a/server.py +++ b/server.py @@ -229,7 +229,7 @@ def create_model_menus(): shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0) shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') - shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=2048, maximum=16384, step=256, info='Maximum sequence length.', value=shared.args.max_seq_len) + shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=16384, step=256, info='Maximum sequence length.', value=shared.args.max_seq_len) shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=1, info='Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.', value=shared.args.compress_pos_emb) shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=32, step=1, info='Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) @@ -408,6 +408,8 @@ def create_settings_menus(default_preset): with gr.Box(): with gr.Row(): with gr.Column(): + shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') + shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt') shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') @@ -433,7 +435,7 @@ def create_settings_menus(default_preset): shared.gradio['stream'] = gr.Checkbox(value=not shared.args.no_stream, label='Activate text streaming') filter_by_loader.change(loaders.blacklist_samplers, filter_by_loader, gradio(loaders.list_all_samplers()), show_progress=False) - shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state', 'do_sample', 'temperature', 'top_p', 'typical_p', 'epsilon_cutoff', 'eta_cutoff', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'tfs', 'top_a')) + shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) def create_file_saving_menus(): diff --git a/settings-template.yaml b/settings-template.yaml index 62e86371..a0c53b33 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -5,6 +5,7 @@ max_new_tokens_min: 1 max_new_tokens_max: 4096 auto_max_new_tokens: false seed: -1 +negative_prompt: '' character: None name1: You name2: Assistant From d4b851bdc835669072df1b243cf2d6739df5d2b0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Aug 2023 13:42:43 -0700 Subject: [PATCH 030/169] Credit turboderp --- modules/exllama.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/exllama.py b/modules/exllama.py index dc632a25..30c37634 100644 --- a/modules/exllama.py +++ b/modules/exllama.py @@ -136,6 +136,7 @@ class ExllamaModel: break # Case 2: CFG + # Copied from https://github.com/turboderp/exllama/blob/master/example_cfg.py else: alpha = state['guidance_scale'] prompts = [prompt, state['negative_prompt'] or ''] From 65aa11890ff947adf4e1d38ff174c3a17a512c2e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Aug 2023 21:49:27 -0300 Subject: [PATCH 031/169] Refactor everything (#3481) --- {css => js}/main.js | 0 {css => js}/save_files.js | 0 modules/chat.py | 1 - modules/llamacpp_hf.py | 4 +- modules/llamacpp_model.py | 2 +- modules/models.py | 4 +- modules/models_settings.py | 4 +- modules/prompts.py | 51 ++ modules/text_generation.py | 173 +++--- modules/training.py | 163 +++--- modules/ui.py | 10 +- modules/ui_chat.py | 262 +++++++++ modules/ui_default.py | 94 ++++ modules/ui_file_saving.py | 108 ++++ modules/ui_model_menu.py | 229 ++++++++ modules/ui_notebook.py | 98 ++++ modules/ui_parameters.py | 143 +++++ modules/ui_session.py | 71 +++ server.py | 1067 +++--------------------------------- 19 files changed, 1306 insertions(+), 1178 deletions(-) rename {css => js}/main.js (100%) rename {css => js}/save_files.js (100%) create mode 100644 modules/prompts.py create mode 100644 modules/ui_chat.py create mode 100644 modules/ui_default.py create mode 100644 modules/ui_file_saving.py create mode 100644 modules/ui_model_menu.py create mode 100644 modules/ui_notebook.py create mode 100644 modules/ui_parameters.py create mode 100644 modules/ui_session.py diff --git a/css/main.js b/js/main.js similarity index 100% rename from css/main.js rename to js/main.js diff --git a/css/save_files.js b/js/save_files.js similarity index 100% rename from css/save_files.js rename to js/save_files.js diff --git a/modules/chat.py b/modules/chat.py index 8e562b98..8a86523c 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -3,7 +3,6 @@ import copy import functools import json import re -from datetime import datetime from pathlib import Path import gradio as gr diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index df9e0b2e..fa0554cd 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -64,7 +64,7 @@ class LlamacppHF(PreTrainedModel): else: self.model.eval([seq[-1]]) - logits = torch.tensor(self.model.scores[self.model.n_tokens-1, :]).view(1, 1, -1).to(kwargs['input_ids'].device) + logits = torch.tensor(self.model.scores[self.model.n_tokens - 1, :]).view(1, 1, -1).to(kwargs['input_ids'].device) else: self.model.reset() self.model.eval(seq) @@ -112,7 +112,7 @@ class LlamacppHF(PreTrainedModel): 'use_mlock': shared.args.mlock, 'low_vram': shared.args.low_vram, 'n_gpu_layers': shared.args.n_gpu_layers, - 'rope_freq_base': 10000 * shared.args.alpha_value ** (64/63.), + 'rope_freq_base': 10000 * shared.args.alpha_value ** (64 / 63.), 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'n_gqa': shared.args.n_gqa or None, 'rms_norm_eps': shared.args.rms_norm_eps or None, diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index e5401378..f7f4cc9b 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -65,7 +65,7 @@ class LlamaCppModel: 'use_mlock': shared.args.mlock, 'low_vram': shared.args.low_vram, 'n_gpu_layers': shared.args.n_gpu_layers, - 'rope_freq_base': 10000 * shared.args.alpha_value ** (64/63.), + 'rope_freq_base': 10000 * shared.args.alpha_value ** (64 / 63.), 'rope_freq_scale': 1.0 / shared.args.compress_pos_emb, 'n_gqa': shared.args.n_gqa or None, 'rms_norm_eps': shared.args.rms_norm_eps or None, diff --git a/modules/models.py b/modules/models.py index 4866893a..4f6a44c1 100644 --- a/modules/models.py +++ b/modules/models.py @@ -1,9 +1,9 @@ import gc +import hashlib import os import re import time from pathlib import Path -import hashlib import torch import transformers @@ -14,7 +14,7 @@ from transformers import ( AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, - BitsAndBytesConfig, + BitsAndBytesConfig ) import modules.shared as shared diff --git a/modules/models_settings.py b/modules/models_settings.py index 00a6b90f..06a41da4 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -26,9 +26,9 @@ def infer_loader(model_name): loader = 'AutoGPTQ' elif len(list(path_to_model.glob('*ggml*.bin'))) > 0: loader = 'llama.cpp' - elif re.match('.*ggml.*\.bin', model_name.lower()): + elif re.match(r'.*ggml.*\.bin', model_name.lower()): loader = 'llama.cpp' - elif re.match('.*rwkv.*\.pth', model_name.lower()): + elif re.match(r'.*rwkv.*\.pth', model_name.lower()): loader = 'RWKV' else: loader = 'Transformers' diff --git a/modules/prompts.py b/modules/prompts.py new file mode 100644 index 00000000..f68c83c4 --- /dev/null +++ b/modules/prompts.py @@ -0,0 +1,51 @@ +import re +from pathlib import Path + +import yaml + +from modules import utils +from modules.text_generation import get_encoded_length + + +def load_prompt(fname): + if fname in ['None', '']: + return '' + elif fname.startswith('Instruct-'): + fname = re.sub('^Instruct-', '', fname) + file_path = Path(f'characters/instruction-following/{fname}.yaml') + if not file_path.exists(): + return '' + + with open(file_path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + output = '' + if 'context' in data: + output += data['context'] + + replacements = { + '<|user|>': data['user'], + '<|bot|>': data['bot'], + '<|user-message|>': 'Input', + } + + output += utils.replace_all(data['turn_template'].split('<|bot-message|>')[0], replacements) + return output.rstrip(' ') + else: + file_path = Path(f'prompts/{fname}.txt') + if not file_path.exists(): + return '' + + with open(file_path, 'r', encoding='utf-8') as f: + text = f.read() + if text[-1] == '\n': + text = text[:-1] + + return text + + +def count_tokens(text): + try: + tokens = get_encoded_length(text) + return f'{tokens} tokens in the input.' + except: + return 'Couldn\'t count the number of tokens. Is a tokenizer loaded?' diff --git a/modules/text_generation.py b/modules/text_generation.py index df9d708b..6e95414b 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -31,8 +31,62 @@ def generate_reply(*args, **kwargs): shared.generation_lock.release() -def get_max_prompt_length(state): - return state['truncation_length'] - state['max_new_tokens'] +def _generate_reply(question, state, stopping_strings=None, is_chat=False): + + # Find the appropriate generation function + generate_func = apply_extensions('custom_generate_reply') + if generate_func is None: + if shared.model_name == 'None' or shared.model is None: + logger.error("No model is loaded! Select one in the Model tab.") + yield '' + return + + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel']: + generate_func = generate_reply_custom + else: + generate_func = generate_reply_HF + + # Prepare the input + original_question = question + if not is_chat: + state = apply_extensions('state', state) + question = apply_extensions('input', question, state) + + # Find the stopping strings + all_stop_strings = [] + for st in (stopping_strings, ast.literal_eval(f"[{state['custom_stopping_strings']}]")): + if type(st) is list and len(st) > 0: + all_stop_strings += st + + if shared.args.verbose: + print(f'\n\n{question}\n--------------------\n') + + shared.stop_everything = False + clear_torch_cache() + seed = set_manual_seed(state['seed']) + last_update = -1 + reply = '' + is_stream = state['stream'] + if len(all_stop_strings) > 0 and not state['stream']: + state = copy.deepcopy(state) + state['stream'] = True + + # Generate + for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat): + reply, stop_found = apply_stopping_strings(reply, all_stop_strings) + if is_stream: + cur_time = time.time() + if cur_time - last_update > 0.041666666666666664: # Limit streaming to 24 fps + last_update = cur_time + yield reply + + if stop_found: + break + + if not is_chat: + reply = apply_extensions('output', reply, state) + + yield reply def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None): @@ -61,6 +115,10 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt return input_ids.cuda() +def decode(output_ids, skip_special_tokens=True): + return shared.tokenizer.decode(output_ids, skip_special_tokens) + + def get_encoded_length(prompt): length_after_extensions = apply_extensions('tokenized_length', prompt) if length_after_extensions is not None: @@ -69,12 +127,36 @@ def get_encoded_length(prompt): return len(encode(prompt)[0]) -def decode(output_ids, skip_special_tokens=True): - return shared.tokenizer.decode(output_ids, skip_special_tokens) +def get_max_prompt_length(state): + return state['truncation_length'] - state['max_new_tokens'] + + +def generate_reply_wrapper(question, state, stopping_strings=None): + """ + Returns formatted outputs for the UI + """ + reply = question if not shared.is_seq2seq else '' + yield formatted_outputs(reply, shared.model_name) + + for reply in generate_reply(question, state, stopping_strings, is_chat=False): + if not shared.is_seq2seq: + reply = question + reply + + yield formatted_outputs(reply, shared.model_name) + + +def formatted_outputs(reply, model_name): + if any(s in model_name for s in ['gpt-4chan', 'gpt4chan']): + reply = fix_gpt4chan(reply) + return reply, generate_4chan_html(reply) + else: + return reply, generate_basic_html(reply) -# Removes empty replies from gpt4chan outputs def fix_gpt4chan(s): + """ + Removes empty replies from gpt4chan outputs + """ for i in range(10): s = re.sub("--- [0-9]*\n>>[0-9]*\n---", "---", s) s = re.sub("--- [0-9]*\n *\n---", "---", s) @@ -83,8 +165,10 @@ def fix_gpt4chan(s): return s -# Fix the LaTeX equations in galactica def fix_galactica(s): + """ + Fix the LaTeX equations in GALACTICA + """ s = s.replace(r'\[', r'$') s = s.replace(r'\]', r'$') s = s.replace(r'\(', r'$') @@ -109,14 +193,6 @@ def get_reply_from_output_ids(output_ids, input_ids, original_question, state, i return reply -def formatted_outputs(reply, model_name): - if any(s in model_name for s in ['gpt-4chan', 'gpt4chan']): - reply = fix_gpt4chan(reply) - return reply, generate_4chan_html(reply) - else: - return reply, generate_basic_html(reply) - - def set_manual_seed(seed): seed = int(seed) if seed == -1: @@ -133,17 +209,6 @@ def stop_everything_event(): shared.stop_everything = True -def generate_reply_wrapper(question, state, stopping_strings=None): - reply = question if not shared.is_seq2seq else '' - yield formatted_outputs(reply, shared.model_name) - - for reply in generate_reply(question, state, stopping_strings, is_chat=False): - if not shared.is_seq2seq: - reply = question + reply - - yield formatted_outputs(reply, shared.model_name) - - def apply_stopping_strings(reply, all_stop_strings): stop_found = False for string in all_stop_strings: @@ -169,61 +234,6 @@ def apply_stopping_strings(reply, all_stop_strings): return reply, stop_found -def _generate_reply(question, state, stopping_strings=None, is_chat=False): - generate_func = apply_extensions('custom_generate_reply') - if generate_func is None: - if shared.model_name == 'None' or shared.model is None: - logger.error("No model is loaded! Select one in the Model tab.") - yield '' - return - - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel']: - generate_func = generate_reply_custom - else: - generate_func = generate_reply_HF - - # Preparing the input - original_question = question - if not is_chat: - state = apply_extensions('state', state) - question = apply_extensions('input', question, state) - - # Finding the stopping strings - all_stop_strings = [] - for st in (stopping_strings, ast.literal_eval(f"[{state['custom_stopping_strings']}]")): - if type(st) is list and len(st) > 0: - all_stop_strings += st - - if shared.args.verbose: - print(f'\n\n{question}\n--------------------\n') - - shared.stop_everything = False - clear_torch_cache() - seed = set_manual_seed(state['seed']) - last_update = -1 - reply = '' - is_stream = state['stream'] - if len(all_stop_strings) > 0 and not state['stream']: - state = copy.deepcopy(state) - state['stream'] = True - - for reply in generate_func(question, original_question, seed, state, stopping_strings, is_chat=is_chat): - reply, stop_found = apply_stopping_strings(reply, all_stop_strings) - if is_stream: - cur_time = time.time() - if cur_time - last_update > 0.041666666666666664: # Limit streaming to 24 fps - last_update = cur_time - yield reply - - if stop_found: - break - - if not is_chat: - reply = apply_extensions('output', reply, state) - - yield reply - - def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']: @@ -316,6 +326,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings def generate_reply_custom(question, original_question, seed, state, stopping_strings=None, is_chat=False): + """ + For models that do not use the transformers library for sampling + """ seed = set_manual_seed(state['seed']) t0 = time.time() diff --git a/modules/training.py b/modules/training.py index ef833679..7558cd5d 100644 --- a/modules/training.py +++ b/modules/training.py @@ -17,8 +17,6 @@ from pathlib import Path import gradio as gr import torch import transformers -from modules.models import load_model, unload_model - from datasets import Dataset, load_dataset from peft import ( LoraConfig, @@ -34,6 +32,7 @@ from modules.evaluate import ( save_past_evaluations ) from modules.logging_colors import logger +from modules.models import load_model, unload_model from modules.utils import natural_keys # This mapping is from a very recent commit, not yet released. @@ -65,100 +64,101 @@ WANT_INTERRUPT = False PARAMETERS = ["lora_name", "always_override", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "raw_text_file", "overlap_len", "newline_favor_len", "higher_rank_limit", "warmup_steps", "optimizer", "hard_cut_string", "train_only_after", "stop_at_loss", "add_eos_token", "min_chars", "report_to"] -def create_train_interface(): - with gr.Tab('Train LoRA', elem_id='lora-train-tab'): - gr.Markdown("Confused? [[Click here for a guide]](https://github.com/oobabooga/text-generation-webui/blob/main/docs/Training-LoRAs.md)") - - with gr.Row(): - lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file') - always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name given is the same as an existing file, checking this will replace that file. Leaving unchecked will load that file and continue from it (must use the same rank value as the original had).') - save_steps = gr.Number(label='Save every n steps', value=0, info='If above 0, a checkpoint of the LoRA will be saved every time this many steps pass.') - - with gr.Row(): - copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=utils.get_available_loras()) - ui.create_refresh_button(copy_from, lambda: None, lambda: {'choices': utils.get_available_loras()}, 'refresh-button') - - with gr.Row(): - # TODO: Implement multi-device support. - micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') - batch_size = gr.Slider(label='Batch Size', value=128, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') - - with gr.Row(): - epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') - learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') - lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.') - - # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. - lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, higher values like 128 or 256 are good for teaching content upgrades, extremely high values (1024+) are difficult to train but may improve fine-detail learning for large datasets. Higher ranks also require higher VRAM.') - lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') - - cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') - - with gr.Tab(label='Formatted Dataset'): - with gr.Row(): - dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.') - ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button') - eval_dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.') - ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button') - format = gr.Dropdown(choices=utils.get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') - ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('training/formats', 'json')}, 'refresh-button') - - eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.') - - with gr.Tab(label="Raw text file"): - with gr.Row(): - raw_text_file = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.') - ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'txt')}, 'refresh-button') - hard_cut_string = gr.Textbox(label='Hard Cut String', value='\\n\\n\\n', info='String that indicates a hard cut between text parts. Helps prevent unwanted overlap.') - min_chars = gr.Number(label='Ignore small blocks', value=0, info='Ignore Hard Cut blocks that have less or equal characters than this number') +def create_ui(): + with gr.Tab("Training", elem_id="training-tab"): + tmp = gr.State('') + with gr.Tab('Train LoRA', elem_id='lora-train-tab'): + gr.Markdown("Confused? [[Click here for a guide]](https://github.com/oobabooga/text-generation-webui/blob/main/docs/Training-LoRAs.md)") with gr.Row(): - overlap_len = gr.Slider(label='Overlap Length', minimum=0, maximum=512, value=128, step=16, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length below). Setting overlap to exactly half the cutoff length may be ideal.') - newline_favor_len = gr.Slider(label='Prefer Newline Cut Length', minimum=0, maximum=512, value=128, step=16, info='Length (in characters, not tokens) of the maximum distance to shift an overlap cut by to ensure chunks cut at newlines. If too low, cuts may occur in the middle of lines.') - - with gr.Accordion(label='Advanced Options', open=False): - lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.') - warmup_steps = gr.Number(label='Warmup Steps', value=100, info='For this many steps at the start, the learning rate will be lower than normal. This helps the trainer prepare the model and precompute statistics to improve the quality of training after the start.') - optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.') - train_only_after = gr.Textbox(label='Train Only After', value='', info='Only consider text *after* this string in any given chunk for training. For Alpaca datasets, use "### Response:" to only train the response and ignore the input.') - stop_at_loss = gr.Slider(label='Stop at loss', minimum=0.0, maximum=3.0, step=0.1, value=0.00, info='The process will automatically stop once the desired loss value is reached. (reasonable numbers are 1.5-1.8)') - add_eos_token = gr.Checkbox(label='Add EOS token', value=False, info="Adds EOS token for each dataset item. In case of raw text, the EOS will be added at the Hard Cut") + lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file') + always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name given is the same as an existing file, checking this will replace that file. Leaving unchecked will load that file and continue from it (must use the same rank value as the original had).') + save_steps = gr.Number(label='Save every n steps', value=0, info='If above 0, a checkpoint of the LoRA will be saved every time this many steps pass.') with gr.Row(): - higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.') + copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=utils.get_available_loras()) + ui.create_refresh_button(copy_from, lambda: None, lambda: {'choices': utils.get_available_loras()}, 'refresh-button') + with gr.Row(): - report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True) + # TODO: Implement multi-device support. + micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.') + batch_size = gr.Slider(label='Batch Size', value=128, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.') - with gr.Row(): - start_button = gr.Button("Start LoRA Training") - stop_button = gr.Button("Interrupt") + with gr.Row(): + epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') + learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='Learning rate, in scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') + lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.') - output = gr.Markdown(value="Ready") + # TODO: What is the actual maximum rank? Likely distinct per model. This might be better to somehow be on a log scale. + lora_rank = gr.Slider(label='LoRA Rank', value=32, minimum=0, maximum=1024, step=4, info='LoRA Rank, or dimension count. Higher values produce a larger file with better control over the model\'s content. Smaller values produce a smaller file with less overall control. Small values like 4 or 8 are great for stylistic guidance, higher values like 128 or 256 are good for teaching content upgrades, extremely high values (1024+) are difficult to train but may improve fine-detail learning for large datasets. Higher ranks also require higher VRAM.') + lora_alpha = gr.Slider(label='LoRA Alpha', value=64, minimum=0, maximum=2048, step=4, info='LoRA Alpha. This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.') - with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'): - with gr.Row(): - with gr.Column(): - models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True) - evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.') + cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=2048, value=256, step=32, info='Cutoff length for text input. Essentially, how long of a line of text to feed in at a time. Higher values require drastically more VRAM.') + + with gr.Tab(label='Formatted Dataset'): with gr.Row(): - stride_length = gr.Slider(label='Stride', minimum=1, maximum=2048, value=512, step=1, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.') - max_length = gr.Slider(label='max_length', minimum=0, maximum=8096, value=0, step=1, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') + dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.') + ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button') + eval_dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.') + ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button') + format = gr.Dropdown(choices=utils.get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.') + ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('training/formats', 'json')}, 'refresh-button') + + eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.') + + with gr.Tab(label="Raw text file"): + with gr.Row(): + raw_text_file = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.') + ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'txt')}, 'refresh-button') + hard_cut_string = gr.Textbox(label='Hard Cut String', value='\\n\\n\\n', info='String that indicates a hard cut between text parts. Helps prevent unwanted overlap.') + min_chars = gr.Number(label='Ignore small blocks', value=0, info='Ignore Hard Cut blocks that have less or equal characters than this number') with gr.Row(): - start_current_evaluation = gr.Button("Evaluate loaded model") - start_evaluation = gr.Button("Evaluate selected models") - stop_evaluation = gr.Button("Interrupt") + overlap_len = gr.Slider(label='Overlap Length', minimum=0, maximum=512, value=128, step=16, info='Overlap length - ie how many tokens from the prior chunk of text to include into the next chunk. (The chunks themselves will be of a size determined by Cutoff Length below). Setting overlap to exactly half the cutoff length may be ideal.') + newline_favor_len = gr.Slider(label='Prefer Newline Cut Length', minimum=0, maximum=512, value=128, step=16, info='Length (in characters, not tokens) of the maximum distance to shift an overlap cut by to ensure chunks cut at newlines. If too low, cuts may occur in the middle of lines.') - with gr.Column(): - evaluation_log = gr.Markdown(value='') + with gr.Accordion(label='Advanced Options', open=False): + lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.') + warmup_steps = gr.Number(label='Warmup Steps', value=100, info='For this many steps at the start, the learning rate will be lower than normal. This helps the trainer prepare the model and precompute statistics to improve the quality of training after the start.') + optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.') + train_only_after = gr.Textbox(label='Train Only After', value='', info='Only consider text *after* this string in any given chunk for training. For Alpaca datasets, use "### Response:" to only train the response and ignore the input.') + stop_at_loss = gr.Slider(label='Stop at loss', minimum=0.0, maximum=3.0, step=0.1, value=0.00, info='The process will automatically stop once the desired loss value is reached. (reasonable numbers are 1.5-1.8)') + add_eos_token = gr.Checkbox(label='Add EOS token', value=False, info="Adds EOS token for each dataset item. In case of raw text, the EOS will be added at the Hard Cut") - evaluation_table = gr.Dataframe(value=generate_markdown_table(), interactive=True) - with gr.Row(): - save_comments = gr.Button('Save comments', elem_classes="small-button") - refresh_table = gr.Button('Refresh the table', elem_classes="small-button") + with gr.Row(): + higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.') + with gr.Row(): + report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True) + + with gr.Row(): + start_button = gr.Button("Start LoRA Training") + stop_button = gr.Button("Interrupt") + + output = gr.Markdown(value="Ready") + + with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'): + with gr.Row(): + with gr.Column(): + models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True) + evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.') + with gr.Row(): + stride_length = gr.Slider(label='Stride', minimum=1, maximum=2048, value=512, step=1, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.') + max_length = gr.Slider(label='max_length', minimum=0, maximum=8096, value=0, step=1, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.') + + with gr.Row(): + start_current_evaluation = gr.Button("Evaluate loaded model") + start_evaluation = gr.Button("Evaluate selected models") + stop_evaluation = gr.Button("Interrupt") + + with gr.Column(): + evaluation_log = gr.Markdown(value='') + + evaluation_table = gr.Dataframe(value=generate_markdown_table(), interactive=True) + with gr.Row(): + save_comments = gr.Button('Save comments', elem_classes="small-button") + refresh_table = gr.Button('Refresh the table', elem_classes="small-button") # Training events - all_params = [lora_name, always_override, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, raw_text_file, overlap_len, newline_favor_len, higher_rank_limit, warmup_steps, optimizer, hard_cut_string, train_only_after, stop_at_loss, add_eos_token, min_chars, report_to] copy_from.change(do_copy_params, [copy_from] + all_params, all_params) @@ -172,7 +172,6 @@ def create_train_interface(): ev = start_evaluation.click(calculate_perplexity, [models, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False) start_evaluation.click(generate_markdown_table, None, evaluation_table, show_progress=False) - tmp = gr.State('') start_current_evaluation.click(lambda: ['current model'], None, tmp) ev_cur = start_current_evaluation.click(calculate_perplexity, [tmp, evaluate_text_file, stride_length, max_length], evaluation_log, show_progress=False) start_current_evaluation.click(generate_markdown_table, None, evaluation_table, show_progress=False) diff --git a/modules/ui.py b/modules/ui.py index 8a7f9f47..b58b7dd6 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1,4 +1,3 @@ -import json from pathlib import Path import gradio as gr @@ -11,9 +10,9 @@ with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f: css = f.read() with open(Path(__file__).resolve().parent / '../css/chat.css', 'r') as f: chat_css = f.read() -with open(Path(__file__).resolve().parent / '../css/main.js', 'r') as f: +with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f: main_js = f.read() -with open(Path(__file__).resolve().parent / '../css/save_files.js', 'r') as f: +with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f: save_files_js = f.read() refresh_symbol = '🔄' @@ -30,6 +29,11 @@ theme = gr.themes.Default( background_fill_secondary='#eaeaea' ) +if Path("notification.mp3").exists(): + audio_notification_js = "document.querySelector('#audio_notification audio')?.play();" +else: + audio_notification_js = "" + def list_model_elements(): elements = [ diff --git a/modules/ui_chat.py b/modules/ui_chat.py new file mode 100644 index 00000000..8a0c103b --- /dev/null +++ b/modules/ui_chat.py @@ -0,0 +1,262 @@ +import json +from functools import partial +from pathlib import Path + +import gradio as gr +from PIL import Image + +from modules import chat, shared, ui, utils +from modules.html_generator import chat_html_wrapper +from modules.text_generation import stop_everything_event +from modules.utils import gradio + + +def create_ui(): + + shared.gradio.update({ + 'interface_state': gr.State({k: None for k in shared.input_elements}), + 'Chat input': gr.State(), + 'dummy': gr.State(), + 'history': gr.State({'internal': [], 'visible': []}), + }) + + with gr.Tab('Text generation', elem_id='main'): + shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, shared.settings['name1'], shared.settings['name2'], 'chat', 'cai-chat')) + shared.gradio['textbox'] = gr.Textbox(label='Input') + with gr.Row(): + shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') + shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate', variant='primary') + shared.gradio['Continue'] = gr.Button('Continue') + + with gr.Row(): + shared.gradio['Impersonate'] = gr.Button('Impersonate') + shared.gradio['Regenerate'] = gr.Button('Regenerate') + shared.gradio['Remove last'] = gr.Button('Remove last', elem_classes=['button_nowrap']) + + with gr.Row(): + shared.gradio['Copy last reply'] = gr.Button('Copy last reply') + shared.gradio['Replace last reply'] = gr.Button('Replace last reply') + shared.gradio['Send dummy message'] = gr.Button('Send dummy message') + shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply') + + with gr.Row(): + shared.gradio['Clear history'] = gr.Button('Clear history') + shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant='stop', visible=False) + shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) + + with gr.Row(): + shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with']) + + with gr.Row(): + shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'instruct', 'chat-instruct'] else 'chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under "Chat settings" must match the current model.') + shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') + + with gr.Tab('Chat settings', elem_id='chat-settings'): + with gr.Tab("Character"): + with gr.Row(): + with gr.Column(scale=8): + with gr.Row(): + shared.gradio['character_menu'] = gr.Dropdown(value='None', choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button') + shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button') + + shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') + shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name') + shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=4, label='Context', elem_classes=['add_scrollbar']) + shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=4, label='Greeting', elem_classes=['add_scrollbar']) + + with gr.Column(scale=1): + shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil') + shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None) + + with gr.Tab("Instruction template"): + with gr.Row(): + with gr.Row(): + shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Instruction template', value='None', info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button') + shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button') + + shared.gradio['name1_instruct'] = gr.Textbox(value='', lines=2, label='User string') + shared.gradio['name2_instruct'] = gr.Textbox(value='', lines=1, label='Bot string') + shared.gradio['context_instruct'] = gr.Textbox(value='', lines=4, label='Context') + shared.gradio['turn_template'] = gr.Textbox(value=shared.settings['turn_template'], lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') + with gr.Row(): + shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar']) + + with gr.Tab('Chat history'): + with gr.Row(): + with gr.Column(): + shared.gradio['save_chat_history'] = gr.Button(value='Save history') + + with gr.Column(): + shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label="Upload History JSON") + + with gr.Tab('Upload character'): + with gr.Tab('YAML or JSON'): + with gr.Row(): + shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File') + shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)') + + shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False) + + with gr.Tab('TavernAI PNG'): + with gr.Row(): + with gr.Column(): + shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id="upload_img_tavern") + shared.gradio['tavern_json'] = gr.State() + with gr.Column(): + shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) + shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) + + shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) + + +def create_event_handlers(): + gen_events = [] + + shared.input_params = gradio('Chat input', 'start_with', 'interface_state') + clear_arr = gradio('Clear history-confirm', 'Clear history', 'Clear history-cancel') + shared.reload_inputs = gradio('history', 'name1', 'name2', 'mode', 'chat_style') + + gen_events.append(shared.gradio['Generate'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( + chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + ) + + gen_events.append(shared.gradio['textbox'].submit( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( + chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + ) + + gen_events.append(shared.gradio['Regenerate'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + partial(chat.generate_chat_reply_wrapper, regenerate=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + ) + + gen_events.append(shared.gradio['Continue'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + partial(chat.generate_chat_reply_wrapper, _continue=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + ) + + gen_events.append(shared.gradio['Impersonate'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( + chat.impersonate_wrapper, shared.input_params, gradio('textbox'), show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + ) + + shared.gradio['Replace last reply'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then( + lambda: '', None, gradio('textbox'), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) + + shared.gradio['Send dummy message'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then( + lambda: '', None, gradio('textbox'), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) + + shared.gradio['Send dummy reply'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then( + lambda: '', None, gradio('textbox'), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) + + shared.gradio['Clear history'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, clear_arr) + shared.gradio['Clear history-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr) + shared.gradio['Clear history-confirm'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr).then( + chat.clear_chat_log, gradio('interface_state'), gradio('history')).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) + + shared.gradio['Remove last'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) + + shared.gradio['character_menu'].change( + partial(chat.load_character, instruct=False), gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context', 'dummy')).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.load_persistent_history, gradio('interface_state'), gradio('history')).then( + chat.redraw_html, shared.reload_inputs, gradio('display')) + + shared.gradio['Stop'].click( + stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None).then( + chat.redraw_html, shared.reload_inputs, gradio('display')) + + shared.gradio['mode'].change( + lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')) + + shared.gradio['chat_style'].change(chat.redraw_html, shared.reload_inputs, gradio('display')) + shared.gradio['instruction_template'].change( + partial(chat.load_character, instruct=True), gradio('instruction_template', 'name1_instruct', 'name2_instruct'), gradio('name1_instruct', 'name2_instruct', 'dummy', 'dummy', 'context_instruct', 'turn_template')) + + shared.gradio['load_chat_history'].upload( + chat.load_history, gradio('load_chat_history', 'history'), gradio('history')).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + None, None, None, _js='() => {alert("The history has been loaded.")}') + + shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) + + # Save/delete a character + shared.gradio['save_character'].click( + lambda x: x, gradio('name2'), gradio('save_character_filename')).then( + lambda: gr.update(visible=True), None, gradio('character_saver')) + + shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter')) + + shared.gradio['save_template'].click( + lambda: 'My Template.yaml', None, gradio('save_filename')).then( + lambda: 'characters/instruction-following/', None, gradio('save_root')).then( + chat.generate_instruction_template_yaml, gradio('name1_instruct', 'name2_instruct', 'context_instruct', 'turn_template'), gradio('save_contents')).then( + lambda: gr.update(visible=True), None, gradio('file_saver')) + + shared.gradio['delete_template'].click( + lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then( + lambda: 'characters/instruction-following/', None, gradio('delete_root')).then( + lambda: gr.update(visible=True), None, gradio('file_deleter')) + + shared.gradio['save_chat_history'].click( + lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( + None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f"(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}") + + shared.gradio['Submit character'].click( + chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( + None, None, None, _js='() => {alert("The character has been loaded.")}') + + shared.gradio['Submit tavern character'].click( + chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( + None, None, None, _js='() => {alert("The character has been loaded.")}') + + shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) + shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) + shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) + shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) + shared.gradio['your_picture'].change( + chat.upload_your_profile_picture, gradio('your_picture'), None).then( + partial(chat.redraw_html, reset_cache=True), shared.reload_inputs, gradio('display')) diff --git a/modules/ui_default.py b/modules/ui_default.py new file mode 100644 index 00000000..f0ab74ef --- /dev/null +++ b/modules/ui_default.py @@ -0,0 +1,94 @@ +import gradio as gr + +from modules import shared, ui, utils +from modules.prompts import count_tokens, load_prompt +from modules.text_generation import ( + generate_reply_wrapper, + stop_everything_event +) +from modules.utils import gradio + + +def create_ui(): + default_text = load_prompt(shared.settings['prompt']) + + shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) + shared.gradio['last_input'] = gr.State('') + + with gr.Tab("Text generation", elem_id="main"): + with gr.Row(): + with gr.Column(): + shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + with gr.Row(): + shared.gradio['Generate'] = gr.Button('Generate', variant='primary') + shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') + shared.gradio['Continue'] = gr.Button('Continue') + shared.gradio['count_tokens'] = gr.Button('Count tokens') + + with gr.Row(): + shared.gradio['prompt_menu'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['prompt_menu'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button') + shared.gradio['save_prompt'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_prompt'] = gr.Button('🗑️', elem_classes='refresh-button') + + shared.gradio['status'] = gr.Markdown('') + + with gr.Column(): + with gr.Tab('Raw'): + shared.gradio['output_textbox'] = gr.Textbox(lines=27, label='Output', elem_classes=['textbox_default_output', 'add_scrollbar']) + + with gr.Tab('Markdown'): + shared.gradio['markdown_render'] = gr.Button('Render') + shared.gradio['markdown'] = gr.Markdown() + + with gr.Tab('HTML'): + shared.gradio['html'] = gr.HTML() + + +def create_event_handlers(): + gen_events = [] + shared.input_params = gradio('textbox', 'interface_state') + output_params = gradio('output_textbox', 'html') + + gen_events.append(shared.gradio['Generate'].click( + lambda x: x, gradio('textbox'), gradio('last_input')).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") + ) + + gen_events.append(shared.gradio['textbox'].submit( + lambda x: x, gradio('textbox'), gradio('last_input')).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") + ) + + shared.gradio['markdown_render'].click(lambda x: x, gradio('output_textbox'), gradio('markdown'), queue=False) + gen_events.append(shared.gradio['Continue'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + generate_reply_wrapper, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[1]; element.scrollTop = element.scrollHeight}") + ) + + shared.gradio['Stop'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) + shared.gradio['prompt_menu'].change(load_prompt, gradio('prompt_menu'), gradio('textbox'), show_progress=False) + shared.gradio['save_prompt'].click( + lambda x: x, gradio('textbox'), gradio('save_contents')).then( + lambda: 'prompts/', None, gradio('save_root')).then( + lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( + lambda: gr.update(visible=True), None, gradio('file_saver')) + + shared.gradio['delete_prompt'].click( + lambda: 'prompts/', None, gradio('delete_root')).then( + lambda x: x + '.txt', gradio('prompt_menu'), gradio('delete_filename')).then( + lambda: gr.update(visible=True), None, gradio('file_deleter')) + + shared.gradio['count_tokens'].click(count_tokens, gradio('textbox'), gradio('status'), show_progress=False) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py new file mode 100644 index 00000000..952d66c9 --- /dev/null +++ b/modules/ui_file_saving.py @@ -0,0 +1,108 @@ +import json + +import gradio as gr + +from modules import chat, presets, shared, ui, utils +from modules.utils import gradio + + +def create_ui(): + + # Text file saver + with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['file_saver']: + shared.gradio['save_filename'] = gr.Textbox(lines=1, label='File name') + shared.gradio['save_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False) + shared.gradio['save_contents'] = gr.Textbox(lines=10, label='File contents') + with gr.Row(): + shared.gradio['save_confirm'] = gr.Button('Save', elem_classes="small-button") + shared.gradio['save_cancel'] = gr.Button('Cancel', elem_classes="small-button") + + # Text file deleter + with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['file_deleter']: + shared.gradio['delete_filename'] = gr.Textbox(lines=1, label='File name') + shared.gradio['delete_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False) + with gr.Row(): + shared.gradio['delete_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop') + shared.gradio['delete_cancel'] = gr.Button('Cancel', elem_classes="small-button") + + # Character saver/deleter + if shared.is_chat(): + with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']: + shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.') + with gr.Row(): + shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button") + shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") + + with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_deleter']: + gr.Markdown('Confirm the character deletion?') + with gr.Row(): + shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop') + shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") + + +def create_event_handlers(): + shared.gradio['save_confirm'].click( + lambda x, y, z: utils.save_file(x + y, z), gradio('save_root', 'save_filename', 'save_contents'), None).then( + lambda: gr.update(visible=False), None, gradio('file_saver')) + + shared.gradio['delete_confirm'].click( + lambda x, y: utils.delete_file(x + y), gradio('delete_root', 'delete_filename'), None).then( + lambda: gr.update(visible=False), None, gradio('file_deleter')) + + shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter')) + shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver')) + if shared.is_chat(): + shared.gradio['save_character_confirm'].click( + chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then( + lambda: gr.update(visible=False), None, gradio('character_saver')) + + shared.gradio['delete_character_confirm'].click( + chat.delete_character, gradio('character_menu'), None).then( + lambda: gr.update(visible=False), None, gradio('character_deleter')).then( + lambda: gr.update(choices=utils.get_available_characters()), None, gradio('character_menu')) + + shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver')) + shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter')) + + shared.gradio['save_preset'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + presets.generate_preset_yaml, gradio('interface_state'), gradio('save_contents')).then( + lambda: 'presets/', None, gradio('save_root')).then( + lambda: 'My Preset.yaml', None, gradio('save_filename')).then( + lambda: gr.update(visible=True), None, gradio('file_saver')) + + shared.gradio['delete_preset'].click( + lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then( + lambda: 'presets/', None, gradio('delete_root')).then( + lambda: gr.update(visible=True), None, gradio('file_deleter')) + + if not shared.args.multi_user: + shared.gradio['save_session'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('temporary_text')).then( + None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents, \"{shared.get_mode()}\")}}") + + if shared.is_chat(): + shared.gradio['load_session'].upload( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( + chat.redraw_html, shared.reload_inputs, gradio('display')).then( + None, None, None, _js='() => {alert("The session has been loaded.")}') + else: + shared.gradio['load_session'].upload( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( + None, None, None, _js='() => {alert("The session has been loaded.")}') + + +def load_session(file, state): + decoded_file = file if type(file) == str else file.decode('utf-8') + data = json.loads(decoded_file) + + if shared.is_chat() and 'character_menu' in data and state.get('character_menu') != data.get('character_menu'): + shared.session_is_loading = True + + state.update(data) + return state diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py new file mode 100644 index 00000000..c9d772b8 --- /dev/null +++ b/modules/ui_model_menu.py @@ -0,0 +1,229 @@ +import importlib +import math +import re +import traceback +from functools import partial + +import gradio as gr +import psutil +import torch + +from modules import loaders, shared, ui, utils +from modules.logging_colors import logger +from modules.LoRA import add_lora_to_model +from modules.models import load_model, unload_model +from modules.models_settings import ( + apply_model_settings_to_state, + save_model_settings, + update_model_parameters +) +from modules.utils import gradio + + +def create_ui(): + # Finding the default values for the GPU and CPU memories + total_mem = [] + for i in range(torch.cuda.device_count()): + total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024))) + + default_gpu_mem = [] + if shared.args.gpu_memory is not None and len(shared.args.gpu_memory) > 0: + for i in shared.args.gpu_memory: + if 'mib' in i.lower(): + default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i))) + else: + default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i)) * 1000) + while len(default_gpu_mem) < len(total_mem): + default_gpu_mem.append(0) + + total_cpu_mem = math.floor(psutil.virtual_memory().total / (1024 * 1024)) + if shared.args.cpu_memory is not None: + default_cpu_mem = re.sub('[a-zA-Z ]', '', shared.args.cpu_memory) + else: + default_cpu_mem = 0 + + with gr.Tab("Model", elem_id="model-tab"): + with gr.Row(): + with gr.Column(): + with gr.Row(): + with gr.Column(): + with gr.Row(): + shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=shared.model_name, label='Model', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button') + shared.gradio['load_model'] = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button') + shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button') + shared.gradio['reload_model'] = gr.Button("Reload", elem_classes='refresh-button') + shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button') + + with gr.Column(): + with gr.Row(): + shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button') + shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button') + + with gr.Row(): + with gr.Column(): + shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=["Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value=None) + with gr.Box(): + with gr.Row(): + with gr.Column(): + for i in range(len(total_mem)): + shared.gradio[f'gpu_memory_{i}'] = gr.Slider(label=f"gpu-memory in MiB for device :{i}", maximum=total_mem[i], value=default_gpu_mem[i]) + + shared.gradio['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem) + shared.gradio['transformers_info'] = gr.Markdown('load-in-4bit params:') + shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype) + shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type) + + shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers) + shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=16384, step=256, label="n_ctx", value=shared.args.n_ctx) + shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads) + shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch) + shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama-2 70b.') + shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='5e-6 is a good value for llama-2 models.') + + shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") + shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None") + shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None") + shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0) + shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.') + shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') + shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=16384, step=256, info='Maximum sequence length.', value=shared.args.max_seq_len) + shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=1, info='Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.', value=shared.args.compress_pos_emb) + shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=32, step=1, info='Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) + + with gr.Column(): + shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) + shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Disable if running low on VRAM.') + shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.') + shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.') + shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.') + shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu) + shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit) + shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16) + shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) + shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk) + shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit) + shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant) + shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap) + shared.gradio['low_vram'] = gr.Checkbox(label="low-vram", value=shared.args.low_vram) + shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock) + shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed) + shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Make sure to inspect the .py files inside the model folder before loading it with this option enabled.') + shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa is currently 2x faster than AutoGPTQ on some systems. It is installed by default with the one-click installers. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).') + shared.gradio['exllama_info'] = gr.Markdown('For more information, consult the [docs](https://github.com/oobabooga/text-generation-webui/blob/main/docs/ExLlama.md).') + shared.gradio['exllama_HF_info'] = gr.Markdown('ExLlama_HF is a wrapper that lets you use ExLlama like a Transformers model, which means it can use the Transformers samplers. It\'s a bit slower than the regular ExLlama.') + shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF is a wrapper that lets you use llama.cpp like a Transformers model, which means it can use the Transformers samplers. To use it, make sure to first download oobabooga/llama-tokenizer under "Download custom model or LoRA".') + + with gr.Column(): + with gr.Row(): + shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.') + + shared.gradio['custom_model_menu'] = gr.Textbox(label="Download custom model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main") + shared.gradio['download_model_button'] = gr.Button("Download") + + with gr.Row(): + shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') + + +def create_event_handlers(): + shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) + + # In this event handler, the interface state is read and updated + # with the model defaults (if any), and then the model is loaded + # unless "autoload_model" is unchecked + shared.gradio['model_menu'].change( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + apply_model_settings_to_state, gradio('model_menu', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( + update_model_parameters, gradio('interface_state'), None).then( + load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False) + + shared.gradio['load_model'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + update_model_parameters, gradio('interface_state'), None).then( + partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False) + + shared.gradio['unload_model'].click( + unload_model, None, None).then( + lambda: "Model unloaded", None, gradio('model_status')) + + shared.gradio['reload_model'].click( + unload_model, None, None).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + update_model_parameters, gradio('interface_state'), None).then( + partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False) + + shared.gradio['save_model_settings'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False) + + shared.gradio['lora_menu_apply'].click(load_lora_wrapper, gradio('lora_menu'), gradio('model_status'), show_progress=False) + shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu'), gradio('model_status'), show_progress=True) + shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), gradio('load_model')) + + +def load_model_wrapper(selected_model, loader, autoload=False): + if not autoload: + yield f"The settings for {selected_model} have been updated.\nClick on \"Load\" to load it." + return + + if selected_model == 'None': + yield "No model selected" + else: + try: + yield f"Loading {selected_model}..." + shared.model_name = selected_model + unload_model() + if selected_model != '': + shared.model, shared.tokenizer = load_model(shared.model_name, loader) + + if shared.model is not None: + yield f"Successfully loaded {selected_model}" + else: + yield f"Failed to load {selected_model}." + except: + exc = traceback.format_exc() + logger.error('Failed to load the model.') + print(exc) + yield exc.replace('\n', '\n\n') + + +def load_lora_wrapper(selected_loras): + yield ("Applying the following LoRAs to {}:\n\n{}".format(shared.model_name, '\n'.join(selected_loras))) + add_lora_to_model(selected_loras) + yield ("Successfuly applied the LoRAs") + + +def download_model_wrapper(repo_id, progress=gr.Progress()): + try: + downloader_module = importlib.import_module("download-model") + downloader = downloader_module.ModelDownloader() + repo_id_parts = repo_id.split(":") + model = repo_id_parts[0] if len(repo_id_parts) > 0 else repo_id + branch = repo_id_parts[1] if len(repo_id_parts) > 1 else "main" + check = False + + progress(0.0) + yield ("Cleaning up the model/branch names") + model, branch = downloader.sanitize_model_and_branch_names(model, branch) + + yield ("Getting the download links from Hugging Face") + links, sha256, is_lora = downloader.get_download_links_from_huggingface(model, branch, text_only=False) + + yield ("Getting the output folder") + base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir + output_folder = downloader.get_output_folder(model, branch, is_lora, base_folder=base_folder) + + if check: + progress(0.5) + yield ("Checking previously downloaded files") + downloader.check_model_files(model, branch, links, sha256, output_folder) + progress(1.0) + else: + yield (f"Downloading files to {output_folder}") + downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=1) + yield ("Done!") + except: + progress(1.0) + yield traceback.format_exc().replace('\n', '\n\n') diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py new file mode 100644 index 00000000..998a7cf7 --- /dev/null +++ b/modules/ui_notebook.py @@ -0,0 +1,98 @@ +import gradio as gr + +from modules import shared, ui, utils +from modules.prompts import count_tokens, load_prompt +from modules.text_generation import ( + generate_reply_wrapper, + stop_everything_event +) +from modules.utils import gradio + + +def create_ui(): + default_text = load_prompt(shared.settings['prompt']) + + shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) + shared.gradio['last_input'] = gr.State('') + + with gr.Tab("Text generation", elem_id="main"): + with gr.Row(): + with gr.Column(scale=4): + with gr.Tab('Raw'): + shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox', 'add_scrollbar'], lines=27) + + with gr.Tab('Markdown'): + shared.gradio['markdown_render'] = gr.Button('Render') + shared.gradio['markdown'] = gr.Markdown() + + with gr.Tab('HTML'): + shared.gradio['html'] = gr.HTML() + + with gr.Row(): + shared.gradio['Generate'] = gr.Button('Generate', variant='primary', elem_classes="small-button") + shared.gradio['Stop'] = gr.Button('Stop', elem_classes="small-button", elem_id='stop') + shared.gradio['Undo'] = gr.Button('Undo', elem_classes="small-button") + shared.gradio['Regenerate'] = gr.Button('Regenerate', elem_classes="small-button") + + with gr.Column(scale=1): + gr.HTML('
    ') + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + with gr.Row(): + shared.gradio['prompt_menu'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['prompt_menu'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small']) + shared.gradio['save_prompt'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small']) + shared.gradio['delete_prompt'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small']) + + shared.gradio['count_tokens'] = gr.Button('Count tokens') + shared.gradio['status'] = gr.Markdown('') + + +def create_event_handlers(): + gen_events = [] + + shared.input_params = gradio('textbox', 'interface_state') + output_params = gradio('textbox', 'html') + + gen_events.append(shared.gradio['Generate'].click( + lambda x: x, gradio('textbox'), gradio('last_input')).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") + ) + + gen_events.append(shared.gradio['textbox'].submit( + lambda x: x, gradio('textbox'), gradio('last_input')).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") + ) + + shared.gradio['Undo'].click(lambda x: x, gradio('last_input'), gradio('textbox'), show_progress=False) + shared.gradio['markdown_render'].click(lambda x: x, gradio('textbox'), gradio('markdown'), queue=False) + gen_events.append(shared.gradio['Regenerate'].click( + lambda x: x, gradio('last_input'), gradio('textbox'), show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") + ) + + shared.gradio['Stop'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) + shared.gradio['prompt_menu'].change(load_prompt, gradio('prompt_menu'), gradio('textbox'), show_progress=False) + shared.gradio['save_prompt'].click( + lambda x: x, gradio('textbox'), gradio('save_contents')).then( + lambda: 'prompts/', None, gradio('save_root')).then( + lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( + lambda: gr.update(visible=True), None, gradio('file_saver')) + + shared.gradio['delete_prompt'].click( + lambda: 'prompts/', None, gradio('delete_root')).then( + lambda x: x + '.txt', gradio('prompt_menu'), gradio('delete_filename')).then( + lambda: gr.update(visible=True), None, gradio('file_deleter')) + + shared.gradio['count_tokens'].click(count_tokens, gradio('textbox'), gradio('status'), show_progress=False) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py new file mode 100644 index 00000000..75bce9b1 --- /dev/null +++ b/modules/ui_parameters.py @@ -0,0 +1,143 @@ +import gradio as gr + +from modules import loaders, presets, shared, ui, utils +from modules.utils import gradio + + +def create_ui(default_preset): + generate_params = presets.load_preset(default_preset) + with gr.Tab("Parameters", elem_id="parameters"): + with gr.Row(): + with gr.Column(): + with gr.Row(): + shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Generation parameters preset', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button') + shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button') + + with gr.Column(): + shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All", "Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value="All", elem_classes='slim-dropdown') + + with gr.Row(): + with gr.Column(): + with gr.Box(): + with gr.Row(): + with gr.Column(): + shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature') + shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p') + shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k') + shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') + shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff') + shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff') + shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs') + shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a') + + with gr.Column(): + shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty') + shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range') + shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty') + shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') + shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length') + shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') + shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') + + with gr.Accordion("Learn more", open=False): + gr.Markdown(""" + + For a technical description of the parameters, the [transformers documentation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig) is a good reference. + + The best presets, according to the [Preset Arena](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md) experiment, are: + + * Instruction following: + 1) Divine Intellect + 2) Big O + 3) simple-1 + 4) Space Alien + 5) StarChat + 6) Titanic + 7) tfs-with-top-a + 8) Asterism + 9) Contrastive Search + + * Chat: + 1) Midnight Enigma + 2) Yara + 3) Shortwave + + ### Temperature + Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness. + ### top_p + If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results. + ### top_k + Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results. + ### typical_p + If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text. + ### epsilon_cutoff + In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0. + ### eta_cutoff + In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0. + ### repetition_penalty + Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition. + ### repetition_penalty_range + The number of most recent tokens to consider for repetition penalty. 0 makes all tokens be used. + ### encoder_repetition_penalty + Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge. + ### no_repeat_ngram_size + If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases. + ### min_length + Minimum generation length in tokens. + ### penalty_alpha + Contrastive Search is enabled by setting this to greater than zero and unchecking "do_sample". It should be used with a low value of top_k, for instance, top_k = 4. + + """, elem_classes="markdown") + + with gr.Column(): + create_chat_settings_menus() + with gr.Box(): + with gr.Row(): + with gr.Column(): + shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') + shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt') + shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') + shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') + shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') + + with gr.Column(): + shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') + + shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams', info='For Beam Search, along with length_penalty and early_stopping.') + shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') + shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') + + with gr.Box(): + with gr.Row(): + with gr.Column(): + shared.gradio['truncation_length'] = gr.Slider(value=shared.settings['truncation_length'], minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') + shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas. For instance: "\\nYour Assistant:", "\\nThe assistant:"') + with gr.Column(): + shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') + shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') + shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') + + shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') + shared.gradio['stream'] = gr.Checkbox(value=not shared.args.no_stream, label='Activate text streaming') + + +def create_event_handlers(): + shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader'), gradio(loaders.list_all_samplers()), show_progress=False) + shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) + + +def create_chat_settings_menus(): + if not shared.is_chat(): + return + + with gr.Box(): + gr.Markdown("Chat parameters") + with gr.Row(): + with gr.Column(): + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)', info='New generations will be called until either this number is reached or no new content is generated between two iterations.') + + with gr.Column(): + shared.gradio['stop_at_newline'] = gr.Checkbox(value=shared.settings['stop_at_newline'], label='Stop generating at new line character') diff --git a/modules/ui_session.py b/modules/ui_session.py new file mode 100644 index 00000000..7a1a32b0 --- /dev/null +++ b/modules/ui_session.py @@ -0,0 +1,71 @@ +import gradio as gr + +from modules import shared, ui, utils +from modules.github import clone_or_pull_repository +from modules.utils import gradio + + +def create_ui(): + with gr.Tab("Session", elem_id="session-tab"): + modes = ["default", "notebook", "chat"] + current_mode = "default" + for mode in modes[1:]: + if getattr(shared.args, mode): + current_mode = mode + break + + cmd_list = vars(shared.args) + bool_list = sorted([k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes + ui.list_model_elements()]) + bool_active = [k for k in bool_list if vars(shared.args)[k]] + + with gr.Row(): + + with gr.Column(): + with gr.Row(): + shared.gradio['interface_modes_menu'] = gr.Dropdown(choices=modes, value=current_mode, label="Mode", elem_classes='slim-dropdown') + shared.gradio['reset_interface'] = gr.Button("Apply and restart", elem_classes="small-button", variant="primary") + shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡', elem_classes="small-button") + + with gr.Row(): + with gr.Column(): + shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table') + + with gr.Column(): + shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=bool_list, value=bool_active, label="Boolean command-line flags", elem_classes='checkboxgroup-table') + + with gr.Column(): + if not shared.args.multi_user: + shared.gradio['save_session'] = gr.Button('Save session', elem_id="save_session") + shared.gradio['load_session'] = gr.File(type='binary', file_types=['.json'], label="Upload Session JSON") + + extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.') + extension_status = gr.Markdown() + + extension_name.submit( + clone_or_pull_repository, extension_name, extension_status, show_progress=False).then( + lambda: gr.update(choices=utils.get_available_extensions(), value=shared.args.extensions), None, gradio('extensions_menu')) + + # Reset interface event + shared.gradio['reset_interface'].click( + set_interface_arguments, gradio('interface_modes_menu', 'extensions_menu', 'bool_menu'), None).then( + lambda: None, None, None, _js='() => {document.body.innerHTML=\'

    Reloading...

    \'; setTimeout(function(){location.reload()},2500); return []}') + + shared.gradio['toggle_dark_mode'].click(lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') + + +def set_interface_arguments(interface_mode, extensions, bool_active): + modes = ["default", "notebook", "chat", "cai_chat"] + cmd_list = vars(shared.args) + bool_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes] + + shared.args.extensions = extensions + for k in modes[1:]: + setattr(shared.args, k, False) + if interface_mode != "default": + setattr(shared.args, interface_mode, True) + for k in bool_list: + setattr(shared.args, k, False) + for k in bool_active: + setattr(shared.args, k, True) + + shared.need_restart = True diff --git a/server.py b/server.py index adff9669..0be1f4c4 100644 --- a/server.py +++ b/server.py @@ -14,579 +14,62 @@ with RequestBlocker(): import matplotlib matplotlib.use('Agg') # This fixes LaTeX rendering on some systems -import importlib import json -import math import os -import re import sys import time -import traceback from functools import partial from pathlib import Path from threading import Lock -import psutil -import torch import yaml -from PIL import Image import modules.extensions as extensions_module -from modules import chat, loaders, presets, shared, training, ui, utils -from modules.extensions import apply_extensions -from modules.github import clone_or_pull_repository -from modules.html_generator import chat_html_wrapper -from modules.LoRA import add_lora_to_model -from modules.models import load_model, unload_model -from modules.models_settings import ( - apply_model_settings_to_state, - get_model_settings_from_yamls, - save_model_settings, - update_model_parameters +from modules import ( + chat, + shared, + training, + ui, + ui_chat, + ui_default, + ui_file_saving, + ui_model_menu, + ui_notebook, + ui_parameters, + ui_session, + utils, ) -from modules.text_generation import ( - generate_reply_wrapper, - get_encoded_length, - stop_everything_event +from modules.extensions import apply_extensions +from modules.LoRA import add_lora_to_model +from modules.models import load_model +from modules.models_settings import ( + get_model_settings_from_yamls, + update_model_parameters ) from modules.utils import gradio -def load_model_wrapper(selected_model, loader, autoload=False): - if not autoload: - yield f"The settings for {selected_model} have been updated.\nClick on \"Load\" to load it." - return - - if selected_model == 'None': - yield "No model selected" - else: - try: - yield f"Loading {selected_model}..." - shared.model_name = selected_model - unload_model() - if selected_model != '': - shared.model, shared.tokenizer = load_model(shared.model_name, loader) - - if shared.model is not None: - yield f"Successfully loaded {selected_model}" - else: - yield f"Failed to load {selected_model}." - except: - exc = traceback.format_exc() - logger.error('Failed to load the model.') - print(exc) - yield exc.replace('\n', '\n\n') - - -def load_lora_wrapper(selected_loras): - yield ("Applying the following LoRAs to {}:\n\n{}".format(shared.model_name, '\n'.join(selected_loras))) - add_lora_to_model(selected_loras) - yield ("Successfuly applied the LoRAs") - - -def load_prompt(fname): - if fname in ['None', '']: - return '' - elif fname.startswith('Instruct-'): - fname = re.sub('^Instruct-', '', fname) - file_path = Path(f'characters/instruction-following/{fname}.yaml') - if not file_path.exists(): - return '' - - with open(file_path, 'r', encoding='utf-8') as f: - data = yaml.safe_load(f) - output = '' - if 'context' in data: - output += data['context'] - - replacements = { - '<|user|>': data['user'], - '<|bot|>': data['bot'], - '<|user-message|>': 'Input', - } - - output += utils.replace_all(data['turn_template'].split('<|bot-message|>')[0], replacements) - return output.rstrip(' ') - else: - file_path = Path(f'prompts/{fname}.txt') - if not file_path.exists(): - return '' - - with open(file_path, 'r', encoding='utf-8') as f: - text = f.read() - if text[-1] == '\n': - text = text[:-1] - - return text - - -def count_tokens(text): - try: - tokens = get_encoded_length(text) - return f'{tokens} tokens in the input.' - except: - return 'Couldn\'t count the number of tokens. Is a tokenizer loaded?' - - -def download_model_wrapper(repo_id, progress=gr.Progress()): - try: - downloader_module = importlib.import_module("download-model") - downloader = downloader_module.ModelDownloader() - repo_id_parts = repo_id.split(":") - model = repo_id_parts[0] if len(repo_id_parts) > 0 else repo_id - branch = repo_id_parts[1] if len(repo_id_parts) > 1 else "main" - check = False - - progress(0.0) - yield ("Cleaning up the model/branch names") - model, branch = downloader.sanitize_model_and_branch_names(model, branch) - - yield ("Getting the download links from Hugging Face") - links, sha256, is_lora = downloader.get_download_links_from_huggingface(model, branch, text_only=False) - - yield ("Getting the output folder") - base_folder = shared.args.lora_dir if is_lora else shared.args.model_dir - output_folder = downloader.get_output_folder(model, branch, is_lora, base_folder=base_folder) - - if check: - progress(0.5) - yield ("Checking previously downloaded files") - downloader.check_model_files(model, branch, links, sha256, output_folder) - progress(1.0) - else: - yield (f"Downloading files to {output_folder}") - downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=1) - yield ("Done!") - except: - progress(1.0) - yield traceback.format_exc().replace('\n', '\n\n') - - -def create_model_menus(): - # Finding the default values for the GPU and CPU memories - total_mem = [] - for i in range(torch.cuda.device_count()): - total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024 * 1024))) - - default_gpu_mem = [] - if shared.args.gpu_memory is not None and len(shared.args.gpu_memory) > 0: - for i in shared.args.gpu_memory: - if 'mib' in i.lower(): - default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i))) - else: - default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i)) * 1000) - while len(default_gpu_mem) < len(total_mem): - default_gpu_mem.append(0) - - total_cpu_mem = math.floor(psutil.virtual_memory().total / (1024 * 1024)) - if shared.args.cpu_memory is not None: - default_cpu_mem = re.sub('[a-zA-Z ]', '', shared.args.cpu_memory) - else: - default_cpu_mem = 0 - - with gr.Row(): - with gr.Column(): - with gr.Row(): - with gr.Column(): - with gr.Row(): - shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=shared.model_name, label='Model', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button') - load = gr.Button("Load", visible=not shared.settings['autoload_model'], elem_classes='refresh-button') - unload = gr.Button("Unload", elem_classes='refresh-button') - reload = gr.Button("Reload", elem_classes='refresh-button') - save_settings = gr.Button("Save settings", elem_classes='refresh-button') - - with gr.Column(): - with gr.Row(): - shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button') - shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button') - - with gr.Row(): - with gr.Column(): - shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=["Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value=None) - with gr.Box(): - with gr.Row(): - with gr.Column(): - for i in range(len(total_mem)): - shared.gradio[f'gpu_memory_{i}'] = gr.Slider(label=f"gpu-memory in MiB for device :{i}", maximum=total_mem[i], value=default_gpu_mem[i]) - - shared.gradio['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem) - shared.gradio['transformers_info'] = gr.Markdown('load-in-4bit params:') - shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype) - shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type) - - shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers) - shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=16384, step=256, label="n_ctx", value=shared.args.n_ctx) - shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads) - shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch) - shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama-2 70b.') - shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='5e-6 is a good value for llama-2 models.') - - shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") - shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None") - shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None") - shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0) - shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.') - shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') - shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=16384, step=256, info='Maximum sequence length.', value=shared.args.max_seq_len) - shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=1, info='Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.', value=shared.args.compress_pos_emb) - shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=32, step=1, info='Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) - - with gr.Column(): - shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) - shared.gradio['no_inject_fused_attention'] = gr.Checkbox(label="no_inject_fused_attention", value=shared.args.no_inject_fused_attention, info='Disable fused attention. Fused attention improves inference performance but uses more VRAM. Disable if running low on VRAM.') - shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.') - shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.') - shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.') - shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu) - shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit) - shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16) - shared.gradio['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices) - shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk) - shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit) - shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant) - shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap) - shared.gradio['low_vram'] = gr.Checkbox(label="low-vram", value=shared.args.low_vram) - shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock) - shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed) - shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Make sure to inspect the .py files inside the model folder before loading it with this option enabled.') - shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa is currently 2x faster than AutoGPTQ on some systems. It is installed by default with the one-click installers. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).') - shared.gradio['exllama_info'] = gr.Markdown('For more information, consult the [docs](https://github.com/oobabooga/text-generation-webui/blob/main/docs/ExLlama.md).') - shared.gradio['exllama_HF_info'] = gr.Markdown('ExLlama_HF is a wrapper that lets you use ExLlama like a Transformers model, which means it can use the Transformers samplers. It\'s a bit slower than the regular ExLlama.') - shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF is a wrapper that lets you use llama.cpp like a Transformers model, which means it can use the Transformers samplers. To use it, make sure to first download oobabooga/llama-tokenizer under "Download custom model or LoRA".') - - with gr.Column(): - with gr.Row(): - shared.gradio['autoload_model'] = gr.Checkbox(value=shared.settings['autoload_model'], label='Autoload the model', info='Whether to load the model as soon as it is selected in the Model dropdown.') - - shared.gradio['custom_model_menu'] = gr.Textbox(label="Download custom model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main") - shared.gradio['download_model_button'] = gr.Button("Download") - - with gr.Row(): - shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready') - - shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) - - # In this event handler, the interface state is read and updated - # with the model defaults (if any), and then the model is loaded - # unless "autoload_model" is unchecked - shared.gradio['model_menu'].change( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - apply_model_settings_to_state, gradio('model_menu', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - update_model_parameters, gradio('interface_state'), None).then( - load_model_wrapper, gradio('model_menu', 'loader', 'autoload_model'), gradio('model_status'), show_progress=False) - - load.click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - update_model_parameters, gradio('interface_state'), None).then( - partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False) - - unload.click( - unload_model, None, None).then( - lambda: "Model unloaded", None, gradio('model_status')) - - reload.click( - unload_model, None, None).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - update_model_parameters, gradio('interface_state'), None).then( - partial(load_model_wrapper, autoload=True), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=False) - - save_settings.click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - save_model_settings, gradio('model_menu', 'interface_state'), gradio('model_status'), show_progress=False) - - shared.gradio['lora_menu_apply'].click(load_lora_wrapper, gradio('lora_menu'), gradio('model_status'), show_progress=False) - shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu'), gradio('model_status'), show_progress=True) - shared.gradio['autoload_model'].change(lambda x: gr.update(visible=not x), gradio('autoload_model'), load) - - -def create_chat_settings_menus(): - if not shared.is_chat(): - return - - with gr.Box(): - gr.Markdown("Chat parameters") - with gr.Row(): - with gr.Column(): - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) - shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)', info='New generations will be called until either this number is reached or no new content is generated between two iterations.') - - with gr.Column(): - shared.gradio['stop_at_newline'] = gr.Checkbox(value=shared.settings['stop_at_newline'], label='Stop generating at new line character') - - -def create_settings_menus(default_preset): - generate_params = presets.load_preset(default_preset) - with gr.Row(): - with gr.Column(): - with gr.Row(): - shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Generation parameters preset', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button') - shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button') - - with gr.Column(): - filter_by_loader = gr.Dropdown(label="Filter by loader", choices=["All", "Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value="All", elem_classes='slim-dropdown') - - with gr.Row(): - with gr.Column(): - with gr.Box(): - with gr.Row(): - with gr.Column(): - shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature') - shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p') - shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k') - shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') - shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff') - shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff') - shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs') - shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a') - - with gr.Column(): - shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty') - shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range') - shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty') - shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') - shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length') - shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') - shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') - - with gr.Accordion("Learn more", open=False): - gr.Markdown(""" - - For a technical description of the parameters, the [transformers documentation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig) is a good reference. - - The best presets, according to the [Preset Arena](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md) experiment, are: - - * Instruction following: - 1) Divine Intellect - 2) Big O - 3) simple-1 - 4) Space Alien - 5) StarChat - 6) Titanic - 7) tfs-with-top-a - 8) Asterism - 9) Contrastive Search - - * Chat: - 1) Midnight Enigma - 2) Yara - 3) Shortwave - - ### Temperature - Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness. - ### top_p - If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results. - ### top_k - Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results. - ### typical_p - If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text. - ### epsilon_cutoff - In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0. - ### eta_cutoff - In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0. - ### repetition_penalty - Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition. - ### repetition_penalty_range - The number of most recent tokens to consider for repetition penalty. 0 makes all tokens be used. - ### encoder_repetition_penalty - Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge. - ### no_repeat_ngram_size - If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases. - ### min_length - Minimum generation length in tokens. - ### penalty_alpha - Contrastive Search is enabled by setting this to greater than zero and unchecking "do_sample". It should be used with a low value of top_k, for instance, top_k = 4. - - """, elem_classes="markdown") - - with gr.Column(): - create_chat_settings_menus() - with gr.Box(): - with gr.Row(): - with gr.Column(): - shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') - shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt') - shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') - shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') - shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') - - with gr.Column(): - shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') - - shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams', info='For Beam Search, along with length_penalty and early_stopping.') - shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') - shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') - - with gr.Box(): - with gr.Row(): - with gr.Column(): - shared.gradio['truncation_length'] = gr.Slider(value=shared.settings['truncation_length'], minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') - shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas. For instance: "\\nYour Assistant:", "\\nThe assistant:"') - with gr.Column(): - shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') - shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') - shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') - - shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') - shared.gradio['stream'] = gr.Checkbox(value=not shared.args.no_stream, label='Activate text streaming') - - filter_by_loader.change(loaders.blacklist_samplers, filter_by_loader, gradio(loaders.list_all_samplers()), show_progress=False) - shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) - - -def create_file_saving_menus(): - - # Text file saver - with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['file_saver']: - shared.gradio['save_filename'] = gr.Textbox(lines=1, label='File name') - shared.gradio['save_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False) - shared.gradio['save_contents'] = gr.Textbox(lines=10, label='File contents') - with gr.Row(): - shared.gradio['save_confirm'] = gr.Button('Save', elem_classes="small-button") - shared.gradio['save_cancel'] = gr.Button('Cancel', elem_classes="small-button") - - # Text file deleter - with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['file_deleter']: - shared.gradio['delete_filename'] = gr.Textbox(lines=1, label='File name') - shared.gradio['delete_root'] = gr.Textbox(lines=1, label='File folder', info='For reference. Unchangeable.', interactive=False) - with gr.Row(): - shared.gradio['delete_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop') - shared.gradio['delete_cancel'] = gr.Button('Cancel', elem_classes="small-button") - - # Character saver/deleter - if shared.is_chat(): - with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']: - shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.') - with gr.Row(): - shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button") - shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") - - with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_deleter']: - gr.Markdown('Confirm the character deletion?') - with gr.Row(): - shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop') - shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") - - -def create_file_saving_event_handlers(): - shared.gradio['save_confirm'].click( - lambda x, y, z: utils.save_file(x + y, z), gradio('save_root', 'save_filename', 'save_contents'), None).then( - lambda: gr.update(visible=False), None, gradio('file_saver')) - - shared.gradio['delete_confirm'].click( - lambda x, y: utils.delete_file(x + y), gradio('delete_root', 'delete_filename'), None).then( - lambda: gr.update(visible=False), None, gradio('file_deleter')) - - shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter')) - shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver')) - if shared.is_chat(): - shared.gradio['save_character_confirm'].click( - chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then( - lambda: gr.update(visible=False), None, gradio('character_saver')) - - shared.gradio['delete_character_confirm'].click( - chat.delete_character, gradio('character_menu'), None).then( - lambda: gr.update(visible=False), None, gradio('character_deleter')).then( - lambda: gr.update(choices=utils.get_available_characters()), None, gradio('character_menu')) - - shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver')) - shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter')) - - shared.gradio['save_preset'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - presets.generate_preset_yaml, gradio('interface_state'), gradio('save_contents')).then( - lambda: 'presets/', None, gradio('save_root')).then( - lambda: 'My Preset.yaml', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['delete_preset'].click( - lambda x: f'{x}.yaml', gradio('preset_menu'), gradio('delete_filename')).then( - lambda: 'presets/', None, gradio('delete_root')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) - - if not shared.args.multi_user: - - def load_session(file, state): - decoded_file = file if type(file) == str else file.decode('utf-8') - data = json.loads(decoded_file) - - if shared.is_chat() and 'character_menu' in data and state.get('character_menu') != data.get('character_menu'): - shared.session_is_loading = True - - state.update(data) - return state - - shared.gradio['save_session'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('temporary_text')).then( - None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents, \"{shared.get_mode()}\")}}") - - if shared.is_chat(): - shared.gradio['load_session'].upload( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - None, None, None, _js='() => {alert("The session has been loaded.")}') - else: - shared.gradio['load_session'].upload( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - None, None, None, _js='() => {alert("The session has been loaded.")}') - - -def set_interface_arguments(interface_mode, extensions, bool_active): - modes = ["default", "notebook", "chat", "cai_chat"] - cmd_list = vars(shared.args) - bool_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes] - - shared.args.extensions = extensions - for k in modes[1:]: - setattr(shared.args, k, False) - if interface_mode != "default": - setattr(shared.args, interface_mode, True) - for k in bool_list: - setattr(shared.args, k, False) - for k in bool_active: - setattr(shared.args, k, True) - - shared.need_restart = True - - def create_interface(): - # Defining some variables - gen_events = [] - default_preset = shared.settings['preset'] - default_text = load_prompt(shared.settings['prompt']) title = 'Text generation web UI' - # Authentication variables - auth = None - gradio_auth_creds = [] + # Password authentication + auth = [] if shared.args.gradio_auth: - gradio_auth_creds += [x.strip() for x in shared.args.gradio_auth.strip('"').replace('\n', '').split(',') if x.strip()] - if shared.args.gradio_auth_path is not None: + auth.extend(x.strip() for x in shared.args.gradio_auth.strip('"').replace('\n', '').split(',') if x.strip()) + if shared.args.gradio_auth_path: with open(shared.args.gradio_auth_path, 'r', encoding="utf8") as file: - for line in file.readlines(): - gradio_auth_creds += [x.strip() for x in line.split(',') if x.strip()] - if gradio_auth_creds: - auth = [tuple(cred.split(':')) for cred in gradio_auth_creds] + auth.extend(x.strip() for line in file for x in line.split(',') if x.strip()) + auth = [tuple(cred.split(':')) for cred in auth] - # Importing the extension files and executing their setup() functions + # Import the extensions and execute their setup() functions if shared.args.extensions is not None and len(shared.args.extensions) > 0: extensions_module.load_extensions() - # Forcing some events to be triggered on page load + # Force some events to be triggered on page load shared.persistent_interface_state.update({ 'loader': shared.args.loader or 'Transformers', }) - if shared.is_chat(): shared.persistent_interface_state.update({ 'mode': shared.settings['mode'], @@ -603,482 +86,55 @@ def create_interface(): css += apply_extensions('css') js += apply_extensions('js') + # The input elements for the generation functions + shared.input_elements = ui.list_interface_input_elements() + with gr.Blocks(css=css, analytics_enabled=False, title=title, theme=ui.theme) as shared.gradio['interface']: + + # Audio notification if Path("notification.mp3").exists(): shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="notification.mp3", elem_id="audio_notification", visible=False) - audio_notification_js = "document.querySelector('#audio_notification audio')?.play();" - else: - audio_notification_js = "" # Floating menus for saving/deleting files - create_file_saving_menus() + ui_file_saving.create_ui() - # Used for saving files using javascript + # Temporary clipboard for saving files shared.gradio['temporary_text'] = gr.Textbox(visible=False) - # Create chat mode interface + # Text Generation tab if shared.is_chat(): - shared.input_elements = ui.list_interface_input_elements() - - shared.gradio.update({ - 'interface_state': gr.State({k: None for k in shared.input_elements}), - 'Chat input': gr.State(), - 'dummy': gr.State(), - 'history': gr.State({'internal': [], 'visible': []}), - }) - - with gr.Tab('Text generation', elem_id='main'): - shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, shared.settings['name1'], shared.settings['name2'], 'chat', 'cai-chat')) - shared.gradio['textbox'] = gr.Textbox(label='Input') - with gr.Row(): - shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') - shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate', variant='primary') - shared.gradio['Continue'] = gr.Button('Continue') - - with gr.Row(): - shared.gradio['Impersonate'] = gr.Button('Impersonate') - shared.gradio['Regenerate'] = gr.Button('Regenerate') - shared.gradio['Remove last'] = gr.Button('Remove last', elem_classes=['button_nowrap']) - - with gr.Row(): - shared.gradio['Copy last reply'] = gr.Button('Copy last reply') - shared.gradio['Replace last reply'] = gr.Button('Replace last reply') - shared.gradio['Send dummy message'] = gr.Button('Send dummy message') - shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply') - - with gr.Row(): - shared.gradio['Clear history'] = gr.Button('Clear history') - shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant='stop', visible=False) - shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) - - with gr.Row(): - shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with']) - - with gr.Row(): - shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'instruct', 'chat-instruct'] else 'chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under "Chat settings" must match the current model.') - shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') - - with gr.Tab('Chat settings', elem_id='chat-settings'): - - with gr.Tab("Character"): - with gr.Row(): - with gr.Column(scale=8): - with gr.Row(): - shared.gradio['character_menu'] = gr.Dropdown(value='None', choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button') - shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button') - - shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') - shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name') - shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=4, label='Context', elem_classes=['add_scrollbar']) - shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=4, label='Greeting', elem_classes=['add_scrollbar']) - - with gr.Column(scale=1): - shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil') - shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None) - - with gr.Tab("Instruction template"): - with gr.Row(): - with gr.Row(): - shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Instruction template', value='None', info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button') - shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button') - - shared.gradio['name1_instruct'] = gr.Textbox(value='', lines=2, label='User string') - shared.gradio['name2_instruct'] = gr.Textbox(value='', lines=1, label='Bot string') - shared.gradio['context_instruct'] = gr.Textbox(value='', lines=4, label='Context') - shared.gradio['turn_template'] = gr.Textbox(value=shared.settings['turn_template'], lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') - with gr.Row(): - shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar']) - - with gr.Tab('Chat history'): - with gr.Row(): - with gr.Column(): - shared.gradio['save_chat_history'] = gr.Button(value='Save history') - - with gr.Column(): - shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label="Upload History JSON") - - with gr.Tab('Upload character'): - with gr.Tab('YAML or JSON'): - with gr.Row(): - shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File') - shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)') - - shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False) - - with gr.Tab('TavernAI PNG'): - with gr.Row(): - with gr.Column(): - shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id="upload_img_tavern") - shared.gradio['tavern_json'] = gr.State() - with gr.Column(): - shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) - shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) - - shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) - - with gr.Tab("Parameters", elem_id="parameters"): - create_settings_menus(default_preset) - - # Create notebook mode interface + ui_chat.create_ui() elif shared.args.notebook: - shared.input_elements = ui.list_interface_input_elements() - shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) - shared.gradio['last_input'] = gr.State('') - with gr.Tab("Text generation", elem_id="main"): - with gr.Row(): - with gr.Column(scale=4): - with gr.Tab('Raw'): - shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox', 'add_scrollbar'], lines=27) - - with gr.Tab('Markdown'): - shared.gradio['markdown_render'] = gr.Button('Render') - shared.gradio['markdown'] = gr.Markdown() - - with gr.Tab('HTML'): - shared.gradio['html'] = gr.HTML() - - with gr.Row(): - shared.gradio['Generate'] = gr.Button('Generate', variant='primary', elem_classes="small-button") - shared.gradio['Stop'] = gr.Button('Stop', elem_classes="small-button", elem_id='stop') - shared.gradio['Undo'] = gr.Button('Undo', elem_classes="small-button") - shared.gradio['Regenerate'] = gr.Button('Regenerate', elem_classes="small-button") - - with gr.Column(scale=1): - gr.HTML('
    ') - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) - with gr.Row(): - shared.gradio['prompt_menu'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['prompt_menu'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small']) - shared.gradio['save_prompt'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small']) - shared.gradio['delete_prompt'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small']) - - shared.gradio['count_tokens'] = gr.Button('Count tokens') - shared.gradio['status'] = gr.Markdown('') - - with gr.Tab("Parameters", elem_id="parameters"): - create_settings_menus(default_preset) - - # Create default mode interface + ui_notebook.create_ui() else: - shared.input_elements = ui.list_interface_input_elements() - shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) - shared.gradio['last_input'] = gr.State('') - with gr.Tab("Text generation", elem_id="main"): - with gr.Row(): - with gr.Column(): - shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) - with gr.Row(): - shared.gradio['Generate'] = gr.Button('Generate', variant='primary') - shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') - shared.gradio['Continue'] = gr.Button('Continue') - shared.gradio['count_tokens'] = gr.Button('Count tokens') + ui_default.create_ui() - with gr.Row(): - shared.gradio['prompt_menu'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['prompt_menu'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button') - shared.gradio['save_prompt'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_prompt'] = gr.Button('🗑️', elem_classes='refresh-button') + ui_parameters.create_ui(shared.settings['preset']) # Parameters tab + ui_model_menu.create_ui() # Model tab + training.create_ui() # Training tab + ui_session.create_ui() # Session tab - shared.gradio['status'] = gr.Markdown('') - - with gr.Column(): - with gr.Tab('Raw'): - shared.gradio['output_textbox'] = gr.Textbox(lines=27, label='Output', elem_classes=['textbox_default_output', 'add_scrollbar']) - - with gr.Tab('Markdown'): - shared.gradio['markdown_render'] = gr.Button('Render') - shared.gradio['markdown'] = gr.Markdown() - - with gr.Tab('HTML'): - shared.gradio['html'] = gr.HTML() - - with gr.Tab("Parameters", elem_id="parameters"): - create_settings_menus(default_preset) - - # Model tab - with gr.Tab("Model", elem_id="model-tab"): - create_model_menus() - - # Training tab - with gr.Tab("Training", elem_id="training-tab"): - training.create_train_interface() - - # Session tab - with gr.Tab("Session", elem_id="session-tab"): - modes = ["default", "notebook", "chat"] - current_mode = "default" - for mode in modes[1:]: - if getattr(shared.args, mode): - current_mode = mode - break - - cmd_list = vars(shared.args) - bool_list = sorted([k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes + ui.list_model_elements()]) - bool_active = [k for k in bool_list if vars(shared.args)[k]] - - with gr.Row(): - - with gr.Column(): - with gr.Row(): - shared.gradio['interface_modes_menu'] = gr.Dropdown(choices=modes, value=current_mode, label="Mode", elem_classes='slim-dropdown') - shared.gradio['reset_interface'] = gr.Button("Apply and restart", elem_classes="small-button", variant="primary") - shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡', elem_classes="small-button") - - with gr.Row(): - with gr.Column(): - shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table') - - with gr.Column(): - shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=bool_list, value=bool_active, label="Boolean command-line flags", elem_classes='checkboxgroup-table') - - with gr.Column(): - if not shared.args.multi_user: - shared.gradio['save_session'] = gr.Button('Save session', elem_id="save_session") - shared.gradio['load_session'] = gr.File(type='binary', file_types=['.json'], label="Upload Session JSON") - - extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.') - extension_status = gr.Markdown() - - extension_name.submit( - clone_or_pull_repository, extension_name, extension_status, show_progress=False).then( - lambda: gr.update(choices=utils.get_available_extensions(), value=shared.args.extensions), None, gradio('extensions_menu')) - - # Reset interface event - shared.gradio['reset_interface'].click( - set_interface_arguments, gradio('interface_modes_menu', 'extensions_menu', 'bool_menu'), None).then( - lambda: None, None, None, _js='() => {document.body.innerHTML=\'

    Reloading...

    \'; setTimeout(function(){location.reload()},2500); return []}') - - shared.gradio['toggle_dark_mode'].click(lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') - - # chat mode event handlers + # Generation events if shared.is_chat(): - shared.input_params = gradio('Chat input', 'start_with', 'interface_state') - clear_arr = gradio('Clear history-confirm', 'Clear history', 'Clear history-cancel') - shared.reload_inputs = gradio('history', 'name1', 'name2', 'mode', 'chat_style') - - gen_events.append(shared.gradio['Generate'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( - chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - ) - - gen_events.append(shared.gradio['textbox'].submit( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( - chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - ) - - gen_events.append(shared.gradio['Regenerate'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - partial(chat.generate_chat_reply_wrapper, regenerate=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - ) - - gen_events.append(shared.gradio['Continue'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - partial(chat.generate_chat_reply_wrapper, _continue=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - ) - - gen_events.append(shared.gradio['Impersonate'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( - chat.impersonate_wrapper, shared.input_params, gradio('textbox'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - ) - - shared.gradio['Replace last reply'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then( - lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) - - shared.gradio['Send dummy message'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then( - lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) - - shared.gradio['Send dummy reply'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then( - lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) - - shared.gradio['Clear history'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, clear_arr) - shared.gradio['Clear history-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr) - shared.gradio['Clear history-confirm'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr).then( - chat.clear_chat_log, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) - - shared.gradio['Remove last'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) - - shared.gradio['character_menu'].change( - partial(chat.load_character, instruct=False), gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context', 'dummy')).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.load_persistent_history, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) - - shared.gradio['Stop'].click( - stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) - - shared.gradio['mode'].change( - lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) - - shared.gradio['chat_style'].change(chat.redraw_html, shared.reload_inputs, gradio('display')) - shared.gradio['instruction_template'].change( - partial(chat.load_character, instruct=True), gradio('instruction_template', 'name1_instruct', 'name2_instruct'), gradio('name1_instruct', 'name2_instruct', 'dummy', 'dummy', 'context_instruct', 'turn_template')) - - shared.gradio['load_chat_history'].upload( - chat.load_history, gradio('load_chat_history', 'history'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - None, None, None, _js='() => {alert("The history has been loaded.")}') - - shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) - - # Save/delete a character - shared.gradio['save_character'].click( - lambda x: x, gradio('name2'), gradio('save_character_filename')).then( - lambda: gr.update(visible=True), None, gradio('character_saver')) - - shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter')) - - shared.gradio['save_template'].click( - lambda: 'My Template.yaml', None, gradio('save_filename')).then( - lambda: 'characters/instruction-following/', None, gradio('save_root')).then( - chat.generate_instruction_template_yaml, gradio('name1_instruct', 'name2_instruct', 'context_instruct', 'turn_template'), gradio('save_contents')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['delete_template'].click( - lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then( - lambda: 'characters/instruction-following/', None, gradio('delete_root')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) - - shared.gradio['save_chat_history'].click( - lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( - None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f"(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}") - - shared.gradio['Submit character'].click( - chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( - None, None, None, _js='() => {alert("The character has been loaded.")}') - - shared.gradio['Submit tavern character'].click( - chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( - None, None, None, _js='() => {alert("The character has been loaded.")}') - - shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) - shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) - shared.gradio['upload_img_tavern'].upload(chat.check_tavern_character, gradio('upload_img_tavern'), gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) - shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) - shared.gradio['your_picture'].change( - chat.upload_your_profile_picture, gradio('your_picture'), None).then( - partial(chat.redraw_html, reset_cache=True), shared.reload_inputs, gradio('display')) - - # notebook/default modes event handlers + ui_chat.create_event_handlers() + elif shared.args.notebook: + ui_notebook.create_event_handlers() else: - shared.input_params = gradio('textbox', 'interface_state') - if shared.args.notebook: - output_params = gradio('textbox', 'html') - else: - output_params = gradio('output_textbox', 'html') + ui_default.create_event_handlers() - gen_events.append(shared.gradio['Generate'].click( - lambda x: x, gradio('textbox'), gradio('last_input')).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") - ) - - gen_events.append(shared.gradio['textbox'].submit( - lambda x: x, gradio('textbox'), gradio('last_input')).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") - ) - - if shared.args.notebook: - shared.gradio['Undo'].click(lambda x: x, gradio('last_input'), gradio('textbox'), show_progress=False) - shared.gradio['markdown_render'].click(lambda x: x, gradio('textbox'), gradio('markdown'), queue=False) - gen_events.append(shared.gradio['Regenerate'].click( - lambda x: x, gradio('last_input'), gradio('textbox'), show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") - ) - else: - shared.gradio['markdown_render'].click(lambda x: x, gradio('output_textbox'), gradio('markdown'), queue=False) - gen_events.append(shared.gradio['Continue'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=False).then( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{audio_notification_js}}}") - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[1]; element.scrollTop = element.scrollHeight}") - ) - - shared.gradio['Stop'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) - shared.gradio['prompt_menu'].change(load_prompt, gradio('prompt_menu'), gradio('textbox'), show_progress=False) - shared.gradio['save_prompt'].click( - lambda x: x, gradio('textbox'), gradio('save_contents')).then( - lambda: 'prompts/', None, gradio('save_root')).then( - lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_saver')) - - shared.gradio['delete_prompt'].click( - lambda: 'prompts/', None, gradio('delete_root')).then( - lambda x: x + '.txt', gradio('prompt_menu'), gradio('delete_filename')).then( - lambda: gr.update(visible=True), None, gradio('file_deleter')) - - shared.gradio['count_tokens'].click(count_tokens, gradio('textbox'), gradio('status'), show_progress=False) - - create_file_saving_event_handlers() - - if shared.settings['dark_theme']: - shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") + # Other events + ui_file_saving.create_event_handlers() + ui_parameters.create_event_handlers() + ui_model_menu.create_event_handlers() + # Interface launch events shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) if shared.is_chat(): shared.gradio['interface'].load(chat.redraw_html, shared.reload_inputs, gradio('display')) - # Extensions tabs - extensions_module.create_extensions_tabs() - - # Extensions block - extensions_module.create_extensions_block() + extensions_module.create_extensions_tabs() # Extensions tabs + extensions_module.create_extensions_block() # Extensions block # Launch the interface shared.gradio['interface'].queue() @@ -1086,17 +142,19 @@ def create_interface(): shared.gradio['interface'].launch( prevent_thread_lock=True, share=shared.args.share, - server_name = None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'), + server_name=None if not shared.args.listen else (shared.args.listen_host or '0.0.0.0'), server_port=shared.args.listen_port, inbrowser=shared.args.auto_launch, - auth=auth, + auth=auth or None, ssl_verify=False if (shared.args.ssl_keyfile or shared.args.ssl_certfile) else True, ssl_keyfile=shared.args.ssl_keyfile, ssl_certfile=shared.args.ssl_certfile ) + if __name__ == "__main__": - # Loading custom settings + + # Load custom settings settings_file = None if shared.args.settings is not None and Path(shared.args.settings).exists(): settings_file = Path(shared.args.settings) @@ -1109,10 +167,9 @@ if __name__ == "__main__": logger.info(f"Loading settings from {settings_file}...") file_contents = open(settings_file, 'r', encoding='utf-8').read() new_settings = json.loads(file_contents) if settings_file.suffix == "json" else yaml.safe_load(file_contents) - for item in new_settings: - shared.settings[item] = new_settings[item] + shared.settings.update(new_settings) - # Set default model settings based on settings file + # Fallback settings for models shared.model_config['.*'] = { 'wbits': 'None', 'model_type': 'None', @@ -1128,7 +185,7 @@ if __name__ == "__main__": shared.model_config.move_to_end('.*', last=False) # Move to the beginning - # Default extensions + # Activate the extensions listed on settings.yaml extensions_module.available_extensions = utils.get_available_extensions() if shared.is_chat(): for extension in shared.settings['chat_default_extensions']: From c237ce607e75b536f4807575aff846a6cad0da7b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Aug 2023 17:50:07 -0700 Subject: [PATCH 032/169] Move characters/instruction-following to instruction-templates --- extensions/openai/completions.py | 4 ++-- extensions/openai/edits.py | 4 ++-- .../Airoboros-v1.2.yaml | 0 .../Alpaca.yaml | 0 .../Bactrian.yaml | 0 .../Baichuan Chat.yaml | 0 .../Baize.yaml | 0 .../Bluemoon.yaml | 0 .../ChatGLM.yaml | 0 .../Chinese-Vicuna-Chat.yaml | 0 .../Galactica Cite.yaml | 0 .../Galactica Finetuned.yaml | 0 .../Galactica Q.yaml | 0 .../Galactica Summary.yaml | 0 .../Galactica Work.yaml | 0 .../Galactica v2.yaml | 0 .../Galactica.yaml | 0 .../Gorilla.yaml | 0 .../Guanaco non-chat.yaml | 0 .../Guanaco-QLoRA.yaml | 0 .../Guanaco.yaml | 0 .../H2O-human_bot.yaml | 0 .../H2O-prompt_answer.yaml | 0 .../Hippogriff.yaml | 0 .../INCITE-Chat.yaml | 0 .../INCITE-Instruct.yaml | 0 .../KoAlpaca.yaml | 0 .../Koala.yaml | 0 .../LLaVA.yaml | 0 .../Llama-v2.yaml | 0 .../instruction-following => instruction-templates}/MOSS.yaml | 0 .../MPT-Chat.yaml | 0 .../Manticore Chat.yaml | 0 .../Metharme.yaml | 0 .../Minotaur.yaml | 0 .../NewHope.yaml | 0 .../Open Assistant.yaml | 0 .../OpenBuddy.yaml | 0 .../Orca Mini.yaml | 0 .../RWKV-Raven.yaml | 0 .../Samantha.yaml | 0 .../StableBeluga2.yaml | 0 .../StableLM.yaml | 0 .../StableVicuna.yaml | 0 .../Starchat-Beta.yaml | 0 .../instruction-following => instruction-templates}/Tulu.yaml | 0 .../Vicuna-v0.yaml | 0 .../Vicuna-v1.1.yaml | 0 .../Vigogne-Chat.yaml | 0 .../Vigogne-Instruct.yaml | 0 .../Wizard-Mega ShareGPT.yaml | 0 .../Wizard-Mega WizardLM.yaml | 0 .../Wizard-Mega.yaml | 0 .../WizardLM.yaml | 0 .../instruction-following => instruction-templates}/Ziya.yaml | 0 modules/chat.py | 2 +- modules/prompts.py | 2 +- modules/ui_chat.py | 4 ++-- modules/utils.py | 4 ++-- 59 files changed, 10 insertions(+), 10 deletions(-) rename {characters/instruction-following => instruction-templates}/Airoboros-v1.2.yaml (100%) rename {characters/instruction-following => instruction-templates}/Alpaca.yaml (100%) rename {characters/instruction-following => instruction-templates}/Bactrian.yaml (100%) rename {characters/instruction-following => instruction-templates}/Baichuan Chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/Baize.yaml (100%) rename {characters/instruction-following => instruction-templates}/Bluemoon.yaml (100%) rename {characters/instruction-following => instruction-templates}/ChatGLM.yaml (100%) rename {characters/instruction-following => instruction-templates}/Chinese-Vicuna-Chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica Cite.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica Finetuned.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica Q.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica Summary.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica Work.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica v2.yaml (100%) rename {characters/instruction-following => instruction-templates}/Galactica.yaml (100%) rename {characters/instruction-following => instruction-templates}/Gorilla.yaml (100%) rename {characters/instruction-following => instruction-templates}/Guanaco non-chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/Guanaco-QLoRA.yaml (100%) rename {characters/instruction-following => instruction-templates}/Guanaco.yaml (100%) rename {characters/instruction-following => instruction-templates}/H2O-human_bot.yaml (100%) rename {characters/instruction-following => instruction-templates}/H2O-prompt_answer.yaml (100%) rename {characters/instruction-following => instruction-templates}/Hippogriff.yaml (100%) rename {characters/instruction-following => instruction-templates}/INCITE-Chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/INCITE-Instruct.yaml (100%) rename {characters/instruction-following => instruction-templates}/KoAlpaca.yaml (100%) rename {characters/instruction-following => instruction-templates}/Koala.yaml (100%) rename {characters/instruction-following => instruction-templates}/LLaVA.yaml (100%) rename {characters/instruction-following => instruction-templates}/Llama-v2.yaml (100%) rename {characters/instruction-following => instruction-templates}/MOSS.yaml (100%) rename {characters/instruction-following => instruction-templates}/MPT-Chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/Manticore Chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/Metharme.yaml (100%) rename {characters/instruction-following => instruction-templates}/Minotaur.yaml (100%) rename {characters/instruction-following => instruction-templates}/NewHope.yaml (100%) rename {characters/instruction-following => instruction-templates}/Open Assistant.yaml (100%) rename {characters/instruction-following => instruction-templates}/OpenBuddy.yaml (100%) rename {characters/instruction-following => instruction-templates}/Orca Mini.yaml (100%) rename {characters/instruction-following => instruction-templates}/RWKV-Raven.yaml (100%) rename {characters/instruction-following => instruction-templates}/Samantha.yaml (100%) rename {characters/instruction-following => instruction-templates}/StableBeluga2.yaml (100%) rename {characters/instruction-following => instruction-templates}/StableLM.yaml (100%) rename {characters/instruction-following => instruction-templates}/StableVicuna.yaml (100%) rename {characters/instruction-following => instruction-templates}/Starchat-Beta.yaml (100%) rename {characters/instruction-following => instruction-templates}/Tulu.yaml (100%) rename {characters/instruction-following => instruction-templates}/Vicuna-v0.yaml (100%) rename {characters/instruction-following => instruction-templates}/Vicuna-v1.1.yaml (100%) rename {characters/instruction-following => instruction-templates}/Vigogne-Chat.yaml (100%) rename {characters/instruction-following => instruction-templates}/Vigogne-Instruct.yaml (100%) rename {characters/instruction-following => instruction-templates}/Wizard-Mega ShareGPT.yaml (100%) rename {characters/instruction-following => instruction-templates}/Wizard-Mega WizardLM.yaml (100%) rename {characters/instruction-following => instruction-templates}/Wizard-Mega.yaml (100%) rename {characters/instruction-following => instruction-templates}/WizardLM.yaml (100%) rename {characters/instruction-following => instruction-templates}/Ziya.yaml (100%) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 646da958..3e277710 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -165,7 +165,7 @@ def messages_to_prompt(body: dict, req_params: dict, max_tokens): # Instruct models can be much better if shared.settings['instruction_template']: try: - instruct = yaml.safe_load(open(f"characters/instruction-following/{shared.settings['instruction_template']}.yaml", 'r')) + instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r')) template = instruct['turn_template'] system_message_template = "{message}" @@ -193,7 +193,7 @@ def messages_to_prompt(body: dict, req_params: dict, max_tokens): except Exception as e: req_params['stopping_strings'].extend(['\nUser:', 'User:']) # XXX User: prompt here also - print(f"Exception: When loading characters/instruction-following/{shared.settings['instruction_template']}.yaml: {repr(e)}") + print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}") print("Warning: Loaded default instruction-following template for model.") else: diff --git a/extensions/openai/edits.py b/extensions/openai/edits.py index f10f5779..2b527dc0 100644 --- a/extensions/openai/edits.py +++ b/extensions/openai/edits.py @@ -31,7 +31,7 @@ def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict: stopping_strings.extend(['\n###']) else: try: - instruct = yaml.safe_load(open(f"characters/instruction-following/{shared.settings['instruction_template']}.yaml", 'r')) + instruct = yaml.safe_load(open(f"instruction-templates/{shared.settings['instruction_template']}.yaml", 'r')) template = instruct['turn_template'] template = template\ @@ -45,7 +45,7 @@ def edits(instruction: str, input: str, temperature=1.0, top_p=1.0) -> dict: except Exception as e: instruction_template = default_template - print(f"Exception: When loading characters/instruction-following/{shared.settings['instruction_template']}.yaml: {repr(e)}") + print(f"Exception: When loading instruction-templates/{shared.settings['instruction_template']}.yaml: {repr(e)}") print("Warning: Loaded default instruction-following template (Alpaca) for model.") else: stopping_strings.extend(['\n###']) diff --git a/characters/instruction-following/Airoboros-v1.2.yaml b/instruction-templates/Airoboros-v1.2.yaml similarity index 100% rename from characters/instruction-following/Airoboros-v1.2.yaml rename to instruction-templates/Airoboros-v1.2.yaml diff --git a/characters/instruction-following/Alpaca.yaml b/instruction-templates/Alpaca.yaml similarity index 100% rename from characters/instruction-following/Alpaca.yaml rename to instruction-templates/Alpaca.yaml diff --git a/characters/instruction-following/Bactrian.yaml b/instruction-templates/Bactrian.yaml similarity index 100% rename from characters/instruction-following/Bactrian.yaml rename to instruction-templates/Bactrian.yaml diff --git a/characters/instruction-following/Baichuan Chat.yaml b/instruction-templates/Baichuan Chat.yaml similarity index 100% rename from characters/instruction-following/Baichuan Chat.yaml rename to instruction-templates/Baichuan Chat.yaml diff --git a/characters/instruction-following/Baize.yaml b/instruction-templates/Baize.yaml similarity index 100% rename from characters/instruction-following/Baize.yaml rename to instruction-templates/Baize.yaml diff --git a/characters/instruction-following/Bluemoon.yaml b/instruction-templates/Bluemoon.yaml similarity index 100% rename from characters/instruction-following/Bluemoon.yaml rename to instruction-templates/Bluemoon.yaml diff --git a/characters/instruction-following/ChatGLM.yaml b/instruction-templates/ChatGLM.yaml similarity index 100% rename from characters/instruction-following/ChatGLM.yaml rename to instruction-templates/ChatGLM.yaml diff --git a/characters/instruction-following/Chinese-Vicuna-Chat.yaml b/instruction-templates/Chinese-Vicuna-Chat.yaml similarity index 100% rename from characters/instruction-following/Chinese-Vicuna-Chat.yaml rename to instruction-templates/Chinese-Vicuna-Chat.yaml diff --git a/characters/instruction-following/Galactica Cite.yaml b/instruction-templates/Galactica Cite.yaml similarity index 100% rename from characters/instruction-following/Galactica Cite.yaml rename to instruction-templates/Galactica Cite.yaml diff --git a/characters/instruction-following/Galactica Finetuned.yaml b/instruction-templates/Galactica Finetuned.yaml similarity index 100% rename from characters/instruction-following/Galactica Finetuned.yaml rename to instruction-templates/Galactica Finetuned.yaml diff --git a/characters/instruction-following/Galactica Q.yaml b/instruction-templates/Galactica Q.yaml similarity index 100% rename from characters/instruction-following/Galactica Q.yaml rename to instruction-templates/Galactica Q.yaml diff --git a/characters/instruction-following/Galactica Summary.yaml b/instruction-templates/Galactica Summary.yaml similarity index 100% rename from characters/instruction-following/Galactica Summary.yaml rename to instruction-templates/Galactica Summary.yaml diff --git a/characters/instruction-following/Galactica Work.yaml b/instruction-templates/Galactica Work.yaml similarity index 100% rename from characters/instruction-following/Galactica Work.yaml rename to instruction-templates/Galactica Work.yaml diff --git a/characters/instruction-following/Galactica v2.yaml b/instruction-templates/Galactica v2.yaml similarity index 100% rename from characters/instruction-following/Galactica v2.yaml rename to instruction-templates/Galactica v2.yaml diff --git a/characters/instruction-following/Galactica.yaml b/instruction-templates/Galactica.yaml similarity index 100% rename from characters/instruction-following/Galactica.yaml rename to instruction-templates/Galactica.yaml diff --git a/characters/instruction-following/Gorilla.yaml b/instruction-templates/Gorilla.yaml similarity index 100% rename from characters/instruction-following/Gorilla.yaml rename to instruction-templates/Gorilla.yaml diff --git a/characters/instruction-following/Guanaco non-chat.yaml b/instruction-templates/Guanaco non-chat.yaml similarity index 100% rename from characters/instruction-following/Guanaco non-chat.yaml rename to instruction-templates/Guanaco non-chat.yaml diff --git a/characters/instruction-following/Guanaco-QLoRA.yaml b/instruction-templates/Guanaco-QLoRA.yaml similarity index 100% rename from characters/instruction-following/Guanaco-QLoRA.yaml rename to instruction-templates/Guanaco-QLoRA.yaml diff --git a/characters/instruction-following/Guanaco.yaml b/instruction-templates/Guanaco.yaml similarity index 100% rename from characters/instruction-following/Guanaco.yaml rename to instruction-templates/Guanaco.yaml diff --git a/characters/instruction-following/H2O-human_bot.yaml b/instruction-templates/H2O-human_bot.yaml similarity index 100% rename from characters/instruction-following/H2O-human_bot.yaml rename to instruction-templates/H2O-human_bot.yaml diff --git a/characters/instruction-following/H2O-prompt_answer.yaml b/instruction-templates/H2O-prompt_answer.yaml similarity index 100% rename from characters/instruction-following/H2O-prompt_answer.yaml rename to instruction-templates/H2O-prompt_answer.yaml diff --git a/characters/instruction-following/Hippogriff.yaml b/instruction-templates/Hippogriff.yaml similarity index 100% rename from characters/instruction-following/Hippogriff.yaml rename to instruction-templates/Hippogriff.yaml diff --git a/characters/instruction-following/INCITE-Chat.yaml b/instruction-templates/INCITE-Chat.yaml similarity index 100% rename from characters/instruction-following/INCITE-Chat.yaml rename to instruction-templates/INCITE-Chat.yaml diff --git a/characters/instruction-following/INCITE-Instruct.yaml b/instruction-templates/INCITE-Instruct.yaml similarity index 100% rename from characters/instruction-following/INCITE-Instruct.yaml rename to instruction-templates/INCITE-Instruct.yaml diff --git a/characters/instruction-following/KoAlpaca.yaml b/instruction-templates/KoAlpaca.yaml similarity index 100% rename from characters/instruction-following/KoAlpaca.yaml rename to instruction-templates/KoAlpaca.yaml diff --git a/characters/instruction-following/Koala.yaml b/instruction-templates/Koala.yaml similarity index 100% rename from characters/instruction-following/Koala.yaml rename to instruction-templates/Koala.yaml diff --git a/characters/instruction-following/LLaVA.yaml b/instruction-templates/LLaVA.yaml similarity index 100% rename from characters/instruction-following/LLaVA.yaml rename to instruction-templates/LLaVA.yaml diff --git a/characters/instruction-following/Llama-v2.yaml b/instruction-templates/Llama-v2.yaml similarity index 100% rename from characters/instruction-following/Llama-v2.yaml rename to instruction-templates/Llama-v2.yaml diff --git a/characters/instruction-following/MOSS.yaml b/instruction-templates/MOSS.yaml similarity index 100% rename from characters/instruction-following/MOSS.yaml rename to instruction-templates/MOSS.yaml diff --git a/characters/instruction-following/MPT-Chat.yaml b/instruction-templates/MPT-Chat.yaml similarity index 100% rename from characters/instruction-following/MPT-Chat.yaml rename to instruction-templates/MPT-Chat.yaml diff --git a/characters/instruction-following/Manticore Chat.yaml b/instruction-templates/Manticore Chat.yaml similarity index 100% rename from characters/instruction-following/Manticore Chat.yaml rename to instruction-templates/Manticore Chat.yaml diff --git a/characters/instruction-following/Metharme.yaml b/instruction-templates/Metharme.yaml similarity index 100% rename from characters/instruction-following/Metharme.yaml rename to instruction-templates/Metharme.yaml diff --git a/characters/instruction-following/Minotaur.yaml b/instruction-templates/Minotaur.yaml similarity index 100% rename from characters/instruction-following/Minotaur.yaml rename to instruction-templates/Minotaur.yaml diff --git a/characters/instruction-following/NewHope.yaml b/instruction-templates/NewHope.yaml similarity index 100% rename from characters/instruction-following/NewHope.yaml rename to instruction-templates/NewHope.yaml diff --git a/characters/instruction-following/Open Assistant.yaml b/instruction-templates/Open Assistant.yaml similarity index 100% rename from characters/instruction-following/Open Assistant.yaml rename to instruction-templates/Open Assistant.yaml diff --git a/characters/instruction-following/OpenBuddy.yaml b/instruction-templates/OpenBuddy.yaml similarity index 100% rename from characters/instruction-following/OpenBuddy.yaml rename to instruction-templates/OpenBuddy.yaml diff --git a/characters/instruction-following/Orca Mini.yaml b/instruction-templates/Orca Mini.yaml similarity index 100% rename from characters/instruction-following/Orca Mini.yaml rename to instruction-templates/Orca Mini.yaml diff --git a/characters/instruction-following/RWKV-Raven.yaml b/instruction-templates/RWKV-Raven.yaml similarity index 100% rename from characters/instruction-following/RWKV-Raven.yaml rename to instruction-templates/RWKV-Raven.yaml diff --git a/characters/instruction-following/Samantha.yaml b/instruction-templates/Samantha.yaml similarity index 100% rename from characters/instruction-following/Samantha.yaml rename to instruction-templates/Samantha.yaml diff --git a/characters/instruction-following/StableBeluga2.yaml b/instruction-templates/StableBeluga2.yaml similarity index 100% rename from characters/instruction-following/StableBeluga2.yaml rename to instruction-templates/StableBeluga2.yaml diff --git a/characters/instruction-following/StableLM.yaml b/instruction-templates/StableLM.yaml similarity index 100% rename from characters/instruction-following/StableLM.yaml rename to instruction-templates/StableLM.yaml diff --git a/characters/instruction-following/StableVicuna.yaml b/instruction-templates/StableVicuna.yaml similarity index 100% rename from characters/instruction-following/StableVicuna.yaml rename to instruction-templates/StableVicuna.yaml diff --git a/characters/instruction-following/Starchat-Beta.yaml b/instruction-templates/Starchat-Beta.yaml similarity index 100% rename from characters/instruction-following/Starchat-Beta.yaml rename to instruction-templates/Starchat-Beta.yaml diff --git a/characters/instruction-following/Tulu.yaml b/instruction-templates/Tulu.yaml similarity index 100% rename from characters/instruction-following/Tulu.yaml rename to instruction-templates/Tulu.yaml diff --git a/characters/instruction-following/Vicuna-v0.yaml b/instruction-templates/Vicuna-v0.yaml similarity index 100% rename from characters/instruction-following/Vicuna-v0.yaml rename to instruction-templates/Vicuna-v0.yaml diff --git a/characters/instruction-following/Vicuna-v1.1.yaml b/instruction-templates/Vicuna-v1.1.yaml similarity index 100% rename from characters/instruction-following/Vicuna-v1.1.yaml rename to instruction-templates/Vicuna-v1.1.yaml diff --git a/characters/instruction-following/Vigogne-Chat.yaml b/instruction-templates/Vigogne-Chat.yaml similarity index 100% rename from characters/instruction-following/Vigogne-Chat.yaml rename to instruction-templates/Vigogne-Chat.yaml diff --git a/characters/instruction-following/Vigogne-Instruct.yaml b/instruction-templates/Vigogne-Instruct.yaml similarity index 100% rename from characters/instruction-following/Vigogne-Instruct.yaml rename to instruction-templates/Vigogne-Instruct.yaml diff --git a/characters/instruction-following/Wizard-Mega ShareGPT.yaml b/instruction-templates/Wizard-Mega ShareGPT.yaml similarity index 100% rename from characters/instruction-following/Wizard-Mega ShareGPT.yaml rename to instruction-templates/Wizard-Mega ShareGPT.yaml diff --git a/characters/instruction-following/Wizard-Mega WizardLM.yaml b/instruction-templates/Wizard-Mega WizardLM.yaml similarity index 100% rename from characters/instruction-following/Wizard-Mega WizardLM.yaml rename to instruction-templates/Wizard-Mega WizardLM.yaml diff --git a/characters/instruction-following/Wizard-Mega.yaml b/instruction-templates/Wizard-Mega.yaml similarity index 100% rename from characters/instruction-following/Wizard-Mega.yaml rename to instruction-templates/Wizard-Mega.yaml diff --git a/characters/instruction-following/WizardLM.yaml b/instruction-templates/WizardLM.yaml similarity index 100% rename from characters/instruction-following/WizardLM.yaml rename to instruction-templates/WizardLM.yaml diff --git a/characters/instruction-following/Ziya.yaml b/instruction-templates/Ziya.yaml similarity index 100% rename from characters/instruction-following/Ziya.yaml rename to instruction-templates/Ziya.yaml diff --git a/modules/chat.py b/modules/chat.py index 8a86523c..a445c6a8 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -475,7 +475,7 @@ def load_character(character, name1, name2, instruct=False): Path("cache/pfp_character.png").unlink() if character not in ['None', '', None]: - folder = 'characters' if not instruct else 'characters/instruction-following' + folder = 'characters' if not instruct else 'instruction-templates' picture = generate_pfp_cache(character) filepath = None for extension in ["yml", "yaml", "json"]: diff --git a/modules/prompts.py b/modules/prompts.py index f68c83c4..8a3cf3e3 100644 --- a/modules/prompts.py +++ b/modules/prompts.py @@ -12,7 +12,7 @@ def load_prompt(fname): return '' elif fname.startswith('Instruct-'): fname = re.sub('^Instruct-', '', fname) - file_path = Path(f'characters/instruction-following/{fname}.yaml') + file_path = Path(f'instruction-templates/{fname}.yaml') if not file_path.exists(): return '' diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 8a0c103b..a858acaf 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -232,13 +232,13 @@ def create_event_handlers(): shared.gradio['save_template'].click( lambda: 'My Template.yaml', None, gradio('save_filename')).then( - lambda: 'characters/instruction-following/', None, gradio('save_root')).then( + lambda: 'instruction-templates/', None, gradio('save_root')).then( chat.generate_instruction_template_yaml, gradio('name1_instruct', 'name2_instruct', 'context_instruct', 'turn_template'), gradio('save_contents')).then( lambda: gr.update(visible=True), None, gradio('file_saver')) shared.gradio['delete_template'].click( lambda x: f'{x}.yaml', gradio('instruction_template'), gradio('delete_filename')).then( - lambda: 'characters/instruction-following/', None, gradio('delete_root')).then( + lambda: 'instruction-templates/', None, gradio('delete_root')).then( lambda: gr.update(visible=True), None, gradio('file_deleter')) shared.gradio['save_chat_history'].click( diff --git a/modules/utils.py b/modules/utils.py index 9ae5dc86..adaa15e8 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -90,11 +90,11 @@ def get_available_prompts(): def get_available_characters(): paths = (x for x in Path('characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml')) - return ['None'] + sorted(set((k.stem for k in paths if k.stem != "instruction-following")), key=natural_keys) + return ['None'] + sorted(set((k.stem for k in paths)), key=natural_keys) def get_available_instruction_templates(): - path = "characters/instruction-following" + path = "instruction-templates" paths = [] if os.path.exists(path): paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml')) From 3d48933f274ca571c88bb225b22950b6efb75324 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Aug 2023 18:58:59 -0700 Subject: [PATCH 033/169] Remove ancient deprecation warnings --- modules/shared.py | 62 +++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 40 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index be5be109..c2f5e0f2 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -6,34 +6,29 @@ import yaml from modules.logging_colors import logger -generation_lock = None + +# Model variables model = None tokenizer = None -is_seq2seq = False model_name = "None" -lora_names = [] +is_seq2seq = False model_dirty_from_training = False +lora_names = [] -# Chat variables +# Generation variables stop_everything = False +generation_lock = None processing_message = '*Is typing...*' +input_params = [] +reload_inputs = [] -# UI elements (buttons, sliders, HTML, etc) +# UI variables gradio = {} - -# For keeping the values of UI elements on page reload persistent_interface_state = {} - -input_params = [] # Generation input parameters -reload_inputs = [] # Parameters for reloading the chat interface - -# For restarting the interface need_restart = False - -# To prevent the persistent chat history from being loaded when -# a session JSON file is being loaded in chat mode session_is_loading = False +# UI defaults settings = { 'dark_theme': True, 'autoload_model': False, @@ -148,8 +143,6 @@ parser.add_argument('--warmup_autotune', action='store_true', help='(triton) Ena parser.add_argument('--fused_mlp', action='store_true', help='(triton) Enable fused mlp.') # AutoGPTQ -parser.add_argument('--gptq-for-llama', action='store_true', help='DEPRECATED') -parser.add_argument('--autogptq', action='store_true', help='DEPRECATED') parser.add_argument('--triton', action='store_true', help='Use triton.') parser.add_argument('--no_inject_fused_attention', action='store_true', help='Do not use fused attention (lowers VRAM requirements).') parser.add_argument('--no_inject_fused_mlp', action='store_true', help='Triton mode only: Do not use fused MLP (lowers VRAM requirements).') @@ -196,14 +189,6 @@ parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The m args = parser.parse_args() args_defaults = parser.parse_args([]) -# Deprecation warnings -if args.autogptq: - logger.warning('--autogptq has been deprecated and will be removed soon. Use --loader autogptq instead.') - args.loader = 'autogptq' -if args.gptq_for_llama: - logger.warning('--gptq-for-llama has been deprecated and will be removed soon. Use --loader gptq-for-llama instead.') - args.loader = 'gptq-for-llama' - # Security warnings if args.trust_remote_code: logger.warning("trust_remote_code is enabled. This is dangerous.") @@ -231,10 +216,6 @@ def fix_loader_name(name): return 'ExLlama_HF' -if args.loader is not None: - args.loader = fix_loader_name(args.loader) - - def add_extension(name): if args.extensions is None: args.extensions = [name] @@ -242,15 +223,6 @@ def add_extension(name): args.extensions.append(name) -# Activating the API extension -if args.api or args.public_api: - add_extension('api') - -# Activating the multimodal extension -if args.multimodal_pipeline is not None: - add_extension('multimodal') - - def is_chat(): return args.chat @@ -264,14 +236,24 @@ def get_mode(): return 'default' -# Loading model-specific settings +args.loader = fix_loader_name(args.loader) + +# Activate the API extension +if args.api or args.public_api: + add_extension('api') + +# Activate the multimodal extension +if args.multimodal_pipeline is not None: + add_extension('multimodal') + +# Load model-specific settings with Path(f'{args.model_dir}/config.yaml') as p: if p.exists(): model_config = yaml.safe_load(open(p, 'r').read()) else: model_config = {} -# Applying user-defined model settings +# Load custom model-specific settings with Path(f'{args.model_dir}/config-user.yaml') as p: if p.exists(): user_config = yaml.safe_load(open(p, 'r').read()) From 2cf64474f2319ec009a95a96ec7d1ea799217104 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Sun, 6 Aug 2023 21:46:25 -0500 Subject: [PATCH 034/169] Use chat_instruct_command in API (#3482) --- api-examples/api-example-chat-stream.py | 2 +- api-examples/api-example-chat.py | 2 +- extensions/api/util.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/api-examples/api-example-chat-stream.py b/api-examples/api-example-chat-stream.py index a774f907..055900bd 100644 --- a/api-examples/api-example-chat-stream.py +++ b/api-examples/api-example-chat-stream.py @@ -38,7 +38,7 @@ async def run(user_input, history): '_continue': False, 'stop_at_newline': False, 'chat_generation_attempts': 1, - 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', + 'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', # Generation params. If 'preset' is set to different than 'None', the values # in presets/preset-name.yaml are used instead of the individual numbers. diff --git a/api-examples/api-example-chat.py b/api-examples/api-example-chat.py index 824bf3a0..c3d0c538 100644 --- a/api-examples/api-example-chat.py +++ b/api-examples/api-example-chat.py @@ -32,7 +32,7 @@ def run(user_input, history): '_continue': False, 'stop_at_newline': False, 'chat_generation_attempts': 1, - 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', + 'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', # Generation params. If 'preset' is set to different than 'None', the values # in presets/preset-name.yaml are used instead of the individual numbers. diff --git a/extensions/api/util.py b/extensions/api/util.py index 2654d046..f36c070b 100644 --- a/extensions/api/util.py +++ b/extensions/api/util.py @@ -79,7 +79,7 @@ def build_parameters(body, chat=False): 'name2_instruct': str(body.get('name2_instruct', name2_instruct)), 'context_instruct': str(body.get('context_instruct', context_instruct)), 'turn_template': str(body.get('turn_template', turn_template)), - 'chat-instruct_command': str(body.get('chat-instruct_command', shared.settings['chat-instruct_command'])), + 'chat-instruct_command': str(body.get('chat_instruct_command', body.get('chat-instruct_command', shared.settings['chat-instruct_command']))), 'history': body.get('history', {'internal': [], 'visible': []}) }) From a373c96d5931ad73eb41ed3e045d9846fb7533d6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Aug 2023 20:36:35 -0700 Subject: [PATCH 035/169] Fix a bug in modules/shared.py --- modules/shared.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/shared.py b/modules/shared.py index c2f5e0f2..30f6512c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -199,6 +199,9 @@ if args.multi_user: def fix_loader_name(name): + if not name: + return name + name = name.lower() if name in ['llamacpp', 'llama.cpp', 'llama-cpp', 'llama cpp']: return 'llama.cpp' From 412f6ff9d3bb0bb9a30932ffc87ac1114c606ad5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 7 Aug 2023 06:08:51 -0700 Subject: [PATCH 036/169] Change alpha_value maximum and step --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index c9d772b8..89b82c1f 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -90,7 +90,7 @@ def create_ui(): shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=16384, step=256, info='Maximum sequence length.', value=shared.args.max_seq_len) shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=1, info='Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.', value=shared.args.compress_pos_emb) - shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=32, step=1, info='Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) + shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.1, info='Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) with gr.Column(): shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) From 3b27404865c20193667cd614ec12f2f354a08798 Mon Sep 17 00:00:00 2001 From: Sam Date: Mon, 7 Aug 2023 23:19:16 +1000 Subject: [PATCH 037/169] Make dockerfile respect specified cuda version (#3474) --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 7cc0ff15..3c5108d8 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -16,7 +16,7 @@ RUN . /build/venv/bin/activate && \ # https://developer.nvidia.com/cuda-gpus # for a rtx 2060: ARG TORCH_CUDA_ARCH_LIST="7.5" -ARG TORCH_CUDA_ARCH_LIST="3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX" +ARG TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-3.5;5.0;6.0;6.1;7.0;7.5;8.0;8.6+PTX}" RUN . /build/venv/bin/activate && \ python3 setup_cuda.py bdist_wheel -d . From 2d0634cd0764a5d64c39a483e51dd587ea008917 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 7 Aug 2023 08:57:19 -0700 Subject: [PATCH 038/169] Bump transformers commit for positive prompts --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 9deadd48..da6a5f20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,7 +18,7 @@ tensorboard tqdm wandb git+https://github.com/huggingface/peft@96c0277a1b9a381b10ab34dbf84917f9b3b992e6 -git+https://github.com/huggingface/transformers@d533465150532b0c5de167b574e59f64c68b1154 +git+https://github.com/huggingface/transformers@baf1daa58eb2960248fd9f7c3af0ed245b8ce4af bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" From bbe4a29a258d028c6369c0eda90b9607b86f0156 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:03:09 -0300 Subject: [PATCH 039/169] Add back dark theme code --- server.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server.py b/server.py index 0be1f4c4..414cd3ba 100644 --- a/server.py +++ b/server.py @@ -128,6 +128,9 @@ def create_interface(): ui_model_menu.create_event_handlers() # Interface launch events + if shared.settings['dark_theme']: + shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") + shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) if shared.is_chat(): From 584dd334242df56cca4a53664c5d1e3b57094e74 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:44:59 -0300 Subject: [PATCH 040/169] Fix missing example_dialogue when uploading characters --- modules/chat.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index a445c6a8..5667d433 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -510,9 +510,6 @@ def load_character(character, name1, name2, instruct=False): context = build_pygmalion_style_context(data) greeting_field = 'char_greeting' - if 'example_dialogue' in data: - context += f"{data['example_dialogue'].strip()}\n" - if greeting_field in data: greeting = data[greeting_field] @@ -573,6 +570,9 @@ def build_pygmalion_style_context(data): if 'world_scenario' in data and data['world_scenario'] != '': context += f"Scenario: {data['world_scenario']}\n" + if 'example_dialogue' in data and data['example_dialogue'] != '': + context += f"{data['example_dialogue'].strip()}\n" + context = f"{context.strip()}\n" return context From 6d354bb50b62d6ded5431a9097e6ca0db748678d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 7 Aug 2023 23:57:25 -0300 Subject: [PATCH 041/169] Allow the webui to do multiple tasks simultaneously --- server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server.py b/server.py index 414cd3ba..b477d4c1 100644 --- a/server.py +++ b/server.py @@ -140,7 +140,7 @@ def create_interface(): extensions_module.create_extensions_block() # Extensions block # Launch the interface - shared.gradio['interface'].queue() + shared.gradio['interface'].queue(concurrency_count=64) with OpenMonkeyPatch(): shared.gradio['interface'].launch( prevent_thread_lock=True, From 37fb7194521fe4196f2f15cfa93045fea8c2349a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Aug 2023 00:09:00 -0300 Subject: [PATCH 042/169] Increase the Context/Greeting boxes sizes --- modules/ui_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index a858acaf..4471d2f4 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -63,8 +63,8 @@ def create_ui(): shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name') - shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=4, label='Context', elem_classes=['add_scrollbar']) - shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=4, label='Greeting', elem_classes=['add_scrollbar']) + shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar']) + shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar']) with gr.Column(scale=1): shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil') From 0e78f3b4d4b188d902f9ccdeaebbf48f161b1d50 Mon Sep 17 00:00:00 2001 From: Gennadij Date: Tue, 8 Aug 2023 06:31:11 +0300 Subject: [PATCH 043/169] Fixed a typo in "rms_norm_eps", incorrectly set as n_gqa (#3494) --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 89b82c1f..7961c225 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -80,7 +80,7 @@ def create_ui(): shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads) shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch) shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama-2 70b.') - shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.n_gqa, info='5e-6 is a good value for llama-2 models.') + shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.rms_norm_eps, info='5e-6 is a good value for llama-2 models.') shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None") From bf08b16b32847cc813b55a2d93fc15b2cf3a53ea Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:09:01 -0700 Subject: [PATCH 044/169] Fix disappearing profile picture bug --- modules/chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index 5667d433..efb7ecb8 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -471,7 +471,7 @@ def load_character(character, name1, name2, instruct=False): picture = None # Deleting the profile picture cache, if any - if Path("cache/pfp_character.png").exists(): + if Path("cache/pfp_character.png").exists() and not instruct: Path("cache/pfp_character.png").unlink() if character not in ['None', '', None]: From 4ba30f676544600c5c9ffdddfc50bfb4682f8a36 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Aug 2023 14:10:04 -0700 Subject: [PATCH 045/169] Add OpenChat template --- instruction-templates/OpenChat.yaml | 4 ++++ models/config.yaml | 3 +++ 2 files changed, 7 insertions(+) create mode 100644 instruction-templates/OpenChat.yaml diff --git a/instruction-templates/OpenChat.yaml b/instruction-templates/OpenChat.yaml new file mode 100644 index 00000000..3b84c226 --- /dev/null +++ b/instruction-templates/OpenChat.yaml @@ -0,0 +1,4 @@ +user: "GPT4 User:" +bot: "GPT4 Assistant:" +turn_template: "<|user|> <|user-message|><|end_of_turn|><|bot|> <|bot-message|><|end_of_turn|>" +context: "" diff --git a/models/config.yaml b/models/config.yaml index 4d618de2..97ec9e92 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -288,3 +288,6 @@ TheBloke_WizardLM-30B-GPTQ: instruction_template: 'StableBeluga2' truncation_length: 4096 rms_norm_eps: 5.0e-6 +.*openchat: + mode: 'instruct' + instruction_template: 'OpenChat' From 901b028d551c85b4a19d8ceeb497efe2de7b32db Mon Sep 17 00:00:00 2001 From: Friedemann Lipphardt Date: Wed, 9 Aug 2023 03:20:27 +0200 Subject: [PATCH 046/169] Add option for named cloudflare tunnels (#3364) --- README.md | 1 + docker/docker-compose.yml | 1 + extensions/api/blocking_api.py | 8 ++++---- extensions/api/requirements.txt | 2 +- extensions/api/script.py | 4 ++-- extensions/api/streaming_api.py | 8 ++++---- extensions/api/util.py | 8 ++++---- modules/shared.py | 1 + 8 files changed, 18 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 218fa765..3df9a16f 100644 --- a/README.md +++ b/README.md @@ -326,6 +326,7 @@ Optionally, you can use the following command-line flags: |---------------------------------------|-------------| | `--api` | Enable the API extension. | | `--public-api` | Create a public URL for the API using Cloudfare. | +| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. | | `--api-blocking-port BLOCKING_PORT` | The listening port for the blocking API. | | `--api-streaming-port STREAMING_PORT` | The listening port for the streaming API. | diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index 46b27580..ce29f33b 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -23,6 +23,7 @@ services: - ./prompts:/app/prompts - ./softprompts:/app/softprompts - ./training:/app/training + - ./cloudflared:/etc/cloudflared deploy: resources: reservations: diff --git a/extensions/api/blocking_api.py b/extensions/api/blocking_api.py index fbbc5ec1..6b28205a 100644 --- a/extensions/api/blocking_api.py +++ b/extensions/api/blocking_api.py @@ -200,7 +200,7 @@ class Handler(BaseHTTPRequestHandler): super().end_headers() -def _run_server(port: int, share: bool = False): +def _run_server(port: int, share: bool = False, tunnel_id=str): address = '0.0.0.0' if shared.args.listen else '127.0.0.1' server = ThreadingHTTPServer((address, port), Handler) @@ -210,7 +210,7 @@ def _run_server(port: int, share: bool = False): if share: try: - try_start_cloudflared(port, max_attempts=3, on_start=on_start) + try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start) except Exception: pass else: @@ -220,5 +220,5 @@ def _run_server(port: int, share: bool = False): server.serve_forever() -def start_server(port: int, share: bool = False): - Thread(target=_run_server, args=[port, share], daemon=True).start() +def start_server(port: int, share: bool = False, tunnel_id=str): + Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start() diff --git a/extensions/api/requirements.txt b/extensions/api/requirements.txt index 14e29d35..e4f26c3a 100644 --- a/extensions/api/requirements.txt +++ b/extensions/api/requirements.txt @@ -1,2 +1,2 @@ -flask_cloudflared==0.0.12 +flask_cloudflared==0.0.14 websockets==11.0.2 \ No newline at end of file diff --git a/extensions/api/script.py b/extensions/api/script.py index 5d1b1a68..80617b3e 100644 --- a/extensions/api/script.py +++ b/extensions/api/script.py @@ -4,5 +4,5 @@ from modules import shared def setup(): - blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api) - streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api) + blocking_api.start_server(shared.args.api_blocking_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id) + streaming_api.start_server(shared.args.api_streaming_port, share=shared.args.public_api, tunnel_id=shared.args.public_api_id) diff --git a/extensions/api/streaming_api.py b/extensions/api/streaming_api.py index 6afa827d..9175eeb0 100644 --- a/extensions/api/streaming_api.py +++ b/extensions/api/streaming_api.py @@ -102,7 +102,7 @@ async def _run(host: str, port: int): await asyncio.Future() # run forever -def _run_server(port: int, share: bool = False): +def _run_server(port: int, share: bool = False, tunnel_id=str): address = '0.0.0.0' if shared.args.listen else '127.0.0.1' def on_start(public_url: str): @@ -111,7 +111,7 @@ def _run_server(port: int, share: bool = False): if share: try: - try_start_cloudflared(port, max_attempts=3, on_start=on_start) + try_start_cloudflared(port, tunnel_id, max_attempts=3, on_start=on_start) except Exception as e: print(e) else: @@ -120,5 +120,5 @@ def _run_server(port: int, share: bool = False): asyncio.run(_run(host=address, port=port)) -def start_server(port: int, share: bool = False): - Thread(target=_run_server, args=[port, share], daemon=True).start() +def start_server(port: int, share: bool = False, tunnel_id=str): + Thread(target=_run_server, args=[port, share, tunnel_id], daemon=True).start() diff --git a/extensions/api/util.py b/extensions/api/util.py index f36c070b..7ebfaa32 100644 --- a/extensions/api/util.py +++ b/extensions/api/util.py @@ -86,12 +86,12 @@ def build_parameters(body, chat=False): return generate_params -def try_start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None): +def try_start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None): Thread(target=_start_cloudflared, args=[ - port, max_attempts, on_start], daemon=True).start() + port, tunnel_id, max_attempts, on_start], daemon=True).start() -def _start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None): +def _start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None): try: from flask_cloudflared import _run_cloudflared except ImportError: @@ -101,7 +101,7 @@ def _start_cloudflared(port: int, max_attempts: int = 3, on_start: Optional[Call for _ in range(max_attempts): try: - public_url = _run_cloudflared(port, port + 1) + public_url = _run_cloudflared(port, port + 1, tunnel_id=tunnel_id) if on_start: on_start(public_url) diff --git a/modules/shared.py b/modules/shared.py index 30f6512c..05c402c4 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -182,6 +182,7 @@ parser.add_argument('--api', action='store_true', help='Enable the API extension parser.add_argument('--api-blocking-port', type=int, default=5000, help='The listening port for the blocking API.') parser.add_argument('--api-streaming-port', type=int, default=5005, help='The listening port for the streaming API.') parser.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudfare.') +parser.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None) # Multimodal parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The multimodal pipeline to use. Examples: llava-7b, llava-13b.') From f4caaf337afda85236e3963c22042e2581597424 Mon Sep 17 00:00:00 2001 From: Hans Raaf Date: Wed, 9 Aug 2023 04:26:28 +0200 Subject: [PATCH 047/169] Fix superbooga when using regenerate (#3362) --- extensions/superbooga/script.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/extensions/superbooga/script.py b/extensions/superbooga/script.py index 5ef14d9d..475cf1e0 100644 --- a/extensions/superbooga/script.py +++ b/extensions/superbooga/script.py @@ -96,7 +96,8 @@ def apply_settings(chunk_count, chunk_count_initial, time_weight): def custom_generate_chat_prompt(user_input, state, **kwargs): global chat_collector - history = state['history'] + # get history as being modified when using regenerate. + history = kwargs['history'] if state['mode'] == 'instruct': results = collector.get_sorted(user_input, n_results=params['chunk_count']) From d8fb506affda77dcc418fc25810de7254ce125bd Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Aug 2023 21:24:28 -0700 Subject: [PATCH 048/169] Add RoPE scaling support for transformers (including dynamic NTK) https://github.com/huggingface/transformers/pull/24653 --- README.md | 4 ++-- modules/loaders.py | 10 ++++++---- modules/models.py | 7 ++++++- modules/shared.py | 2 +- modules/ui_model_menu.py | 2 +- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 3df9a16f..f7e18350 100644 --- a/README.md +++ b/README.md @@ -299,12 +299,12 @@ Optionally, you can use the following command-line flags: | `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". | | `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. | -#### RoPE (for llama.cpp and ExLlama only) +#### RoPE (for llama.cpp, ExLlama, and transformers) | Flag | Description | |------------------|-------------| |`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. | -|`--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both. | +|`--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. | #### Gradio diff --git a/modules/loaders.py b/modules/loaders.py index 519e47a7..07bc455c 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -39,8 +39,8 @@ loaders_and_params = { 'low_vram', 'mlock', 'llama_cpp_seed', - 'compress_pos_emb', 'alpha_value', + 'compress_pos_emb', 'cpu', ], 'llamacpp_HF': [ @@ -54,8 +54,8 @@ loaders_and_params = { 'low_vram', 'mlock', 'llama_cpp_seed', - 'compress_pos_emb', 'alpha_value', + 'compress_pos_emb', 'cpu', 'llamacpp_HF_info', ], @@ -73,20 +73,22 @@ loaders_and_params = { 'quant_type', 'compute_dtype', 'trust_remote_code', + 'alpha_value', + 'compress_pos_emb', 'transformers_info' ], 'ExLlama': [ 'gpu_split', 'max_seq_len', - 'compress_pos_emb', 'alpha_value', + 'compress_pos_emb', 'exllama_info', ], 'ExLlama_HF': [ 'gpu_split', 'max_seq_len', - 'compress_pos_emb', 'alpha_value', + 'compress_pos_emb', 'exllama_HF_info', ] } diff --git a/modules/models.py b/modules/models.py index 4f6a44c1..aad142c1 100644 --- a/modules/models.py +++ b/modules/models.py @@ -144,7 +144,7 @@ def huggingface_loader(model_name): LoaderClass = AutoModelForCausalLM # Load the model in simple 16-bit mode by default - if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None]): + if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.compress_pos_emb > 1, shared.args.alpha_value > 1]): model = LoaderClass.from_pretrained(Path(f"{shared.args.model_dir}/{model_name}"), low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if shared.args.bf16 else torch.float16, trust_remote_code=shared.args.trust_remote_code) if torch.backends.mps.is_available(): device = torch.device('mps') @@ -215,6 +215,11 @@ def huggingface_loader(model_name): no_split_module_classes=model._no_split_modules ) + if shared.args.compress_pos_emb > 1: + params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb} + elif shared.args.alpha_value > 1: + params['rope_scaling'] = {'type': 'dynamic', 'factor': shared.args.alpha_value} + model = LoaderClass.from_pretrained(checkpoint, **params) return model diff --git a/modules/shared.py b/modules/shared.py index 05c402c4..951120c8 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -164,7 +164,7 @@ parser.add_argument('--rwkv-cuda-on', action='store_true', help='RWKV: Compile t # RoPE parser.add_argument('--compress_pos_emb', type=int, default=1, help="Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.") -parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.") +parser.add_argument('--alpha_value', type=int, default=1, help="Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.") # Gradio parser.add_argument('--listen', action='store_true', help='Make the web UI reachable from your local network.') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 7961c225..55416a07 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -89,8 +89,8 @@ def create_ui(): shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=16384, step=256, info='Maximum sequence length.', value=shared.args.max_seq_len) + shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.1, info='Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) shared.gradio['compress_pos_emb'] = gr.Slider(label='compress_pos_emb', minimum=1, maximum=8, step=1, info='Positional embeddings compression factor. Should typically be set to max_seq_len / 2048.', value=shared.args.compress_pos_emb) - shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.1, info='Positional embeddings alpha factor for NTK RoPE scaling. Scaling is not identical to embedding compression. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value) with gr.Column(): shared.gradio['triton'] = gr.Checkbox(label="triton", value=shared.args.triton) From 5bfcfcfc5ab7fe003c1f401949c467edbad6376a Mon Sep 17 00:00:00 2001 From: GiganticPrime Date: Wed, 9 Aug 2023 21:26:12 +0900 Subject: [PATCH 049/169] Added the logic for starchat model series (#3185) --- models/config.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/models/config.yaml b/models/config.yaml index 97ec9e92..7e3e8ca4 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -234,6 +234,7 @@ TheBloke_WizardLM-30B-GPTQ: .*starchat-beta: mode: 'instruct' instruction_template: 'Starchat-Beta' + custom_stopping_strings: '"<|end|>"' .*minotaur: mode: 'instruct' instruction_template: 'Minotaur' From 2255349f1904b1bfc1e3c6ef790777ad035363ea Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Aug 2023 05:46:25 -0700 Subject: [PATCH 050/169] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f7e18350..98de6c09 100644 --- a/README.md +++ b/README.md @@ -303,8 +303,8 @@ Optionally, you can use the following command-line flags: | Flag | Description | |------------------|-------------| -|`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. | |`--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. | +|`--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should typically be set to max_seq_len / 2048. | #### Gradio From 6c6a52aaad8e5d3ed28878ce9c51848f00ec422a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Aug 2023 07:47:19 -0700 Subject: [PATCH 051/169] Change the filenames for caches and histories --- modules/chat.py | 14 +++++++++++--- modules/html_generator.py | 13 +++++++++++-- 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index efb7ecb8..c9af55db 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -395,6 +395,7 @@ def save_history(history, path=None): p = path or Path('logs/exported_history.json') if not p.parent.is_dir(): p.parent.mkdir(parents=True) + with open(p, 'w', encoding='utf-8') as f: f.write(json.dumps(history, indent=4)) @@ -415,7 +416,7 @@ def load_history(file, history): def save_persistent_history(history, character, mode): if mode in ['chat', 'chat-instruct'] and character not in ['', 'None', None] and not shared.args.multi_user: - save_history(history, path=Path(f'logs/{character}_persistent.json')) + save_history(history, path=Path(f'logs/persistent_{character}.json')) def load_persistent_history(state): @@ -428,8 +429,15 @@ def load_persistent_history(state): character = state['character_menu'] greeting = replace_character_names(state['greeting'], state['name1'], state['name2']) - p = Path(f'logs/{character}_persistent.json') - if not shared.args.multi_user and character not in ['None', '', None] and p.exists(): + + should_load_history = (not shared.args.multi_user and character not in ['None', '', None]) + old_p = Path(f'logs/{character}_persistent.json') + p = Path(f'logs/persistent_{character}.json') + if should_load_history and old_p.exists(): + logger.warning(f"Renaming {old_p} to {p}") + old_p.rename(p) + + if should_load_history and p.exists(): f = json.loads(open(p, 'rb').read()) if 'internal' in f and 'visible' in f: history = f diff --git a/modules/html_generator.py b/modules/html_generator.py index 15c731c3..422beb30 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -6,6 +6,7 @@ from pathlib import Path import markdown from PIL import Image, ImageOps +from modules.logging_colors import logger from modules.utils import get_available_chat_styles # This is to store the paths to the thumbnails of the profile pictures @@ -120,6 +121,7 @@ def generate_4chan_html(f): post = line else: post += line + if post != '': src = process_post(post, c) posts.append(src) @@ -134,13 +136,14 @@ def generate_4chan_html(f): output += f'
    ' for post in posts: output += post + output += '
    ' output = output.split('\n') for i in range(len(output)): output[i] = re.sub(r'^(>(.*?)(
    |
    ))', r'\1', output[i]) output[i] = re.sub(r'^
    (>(.*?)(
    |
    ))', r'
    \1', output[i]) - output = '\n'.join(output) + output = '\n'.join(output) return output @@ -160,7 +163,13 @@ def get_image_cache(path): mtime = os.stat(path).st_mtime if (path in image_cache and mtime != image_cache[path][0]) or (path not in image_cache): img = make_thumbnail(Image.open(path)) - output_file = Path(f'cache/{path.name}_cache.png') + + old_p = Path(f'cache/{path.name}_cache.png') + p = Path(f'cache/cache_{path.name}.png') + if old_p.exists(): + old_p.rename(p) + + output_file = p img.convert('RGB').save(output_file, format='PNG') image_cache[path] = [mtime, output_file.as_posix()] From 7c1300fab55e39f80c1d6fa8f547b9a7c41fcbcd Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Aug 2023 08:07:55 -0700 Subject: [PATCH 052/169] Pin aiofiles version to fix statvfs issue --- requirements.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index da6a5f20..0cea4fe8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,12 @@ +aiofiles==23.1.0 +fastapi==0.95.2 +gradio_client==0.2.5 +gradio==3.33.1 + accelerate==0.21.0 colorama datasets einops -fastapi==0.95.2 -gradio_client==0.2.5 -gradio==3.33.1 markdown numpy pandas @@ -17,14 +19,17 @@ sentencepiece tensorboard tqdm wandb + git+https://github.com/huggingface/peft@96c0277a1b9a381b10ab34dbf84917f9b3b992e6 git+https://github.com/huggingface/transformers@baf1daa58eb2960248fd9f7c3af0ed245b8ce4af + bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" + # llama-cpp-python without GPU support llama-cpp-python==0.1.77; platform_system != "Windows" https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.77/llama_cpp_python-0.1.77-cp310-cp310-win_amd64.whl; platform_system == "Windows" From a4e48cbdb67b50cc174c8e09704e54472b059aa2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Aug 2023 08:31:17 -0700 Subject: [PATCH 053/169] Bump AutoGPTQ --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0cea4fe8..e65bed6b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,8 +25,8 @@ git+https://github.com/huggingface/transformers@baf1daa58eb2960248fd9f7c3af0ed24 bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From a3295dd6667219da2458a1420b746008d71b18b4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Aug 2023 10:38:35 -0700 Subject: [PATCH 054/169] Detect n_gqa and prompt template for wizardlm-70b --- instruction-templates/WizardLM.yaml | 4 ---- models/config.yaml | 20 ++------------------ 2 files changed, 2 insertions(+), 22 deletions(-) delete mode 100644 instruction-templates/WizardLM.yaml diff --git a/instruction-templates/WizardLM.yaml b/instruction-templates/WizardLM.yaml deleted file mode 100644 index c65bb8f4..00000000 --- a/instruction-templates/WizardLM.yaml +++ /dev/null @@ -1,4 +0,0 @@ -user: "" -bot: "### Response:" -turn_template: "<|user-message|>\n\n<|bot|><|bot-message|>\n\n" -context: "" \ No newline at end of file diff --git a/models/config.yaml b/models/config.yaml index 7e3e8ca4..23862770 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -53,9 +53,6 @@ llama-65b-gptq-3bit: .*vicuna.*(1.1|1_1|1.3|1_3): mode: 'instruct' instruction_template: 'Vicuna-v1.1' -.*wizard.*vicuna: - mode: 'instruct' - instruction_template: 'Vicuna-v1.1' .*stable.*vicuna: mode: 'instruct' instruction_template: 'StableVicuna' @@ -108,10 +105,6 @@ llama-65b-gptq-3bit: truncation_length: 4096 .*stablelm-base: truncation_length: 4096 -.*wizardlm: - mode: 'instruct' - model_type: 'llama' - instruction_template: 'WizardLM' .*galactica.*finetuned: mode: 'instruct' instruction_template: 'Galactica Finetuned' @@ -189,21 +182,12 @@ llama-65b-gptq-3bit: .*airoboros.*1.2: mode: 'instruct' instruction_template: 'Airoboros-v1.2' -.*WizardLM-30B-V1.0: - mode: 'instruct' - instruction_template: 'Vicuna-v1.1' -TheBloke_WizardLM-30B-GPTQ: - mode: 'instruct' - instruction_template: 'Vicuna-v1.1' .*alpa(cino|sta): mode: 'instruct' instruction_template: 'Alpaca' .*hippogriff: mode: 'instruct' instruction_template: 'Hippogriff' -.*gpt4all-.*-snoozy: - mode: 'instruct' - instruction_template: 'WizardLM' .*lazarus: mode: 'instruct' instruction_template: 'Alpaca' @@ -267,7 +251,7 @@ TheBloke_WizardLM-30B-GPTQ: mode: 'instruct' instruction_template: 'Alpaca' truncation_length: 8192 -.*wizardlm-.*-v1.1: +.*wizardlm: mode: 'instruct' instruction_template: 'Vicuna-v1.1' .*godzilla: @@ -279,7 +263,7 @@ TheBloke_WizardLM-30B-GPTQ: .*llama-(2|v2).*chat: mode: 'instruct' instruction_template: 'Llama-v2' -.*llama.*70b.*ggml.*\.bin: +.*70b.*ggml.*\.bin: n_gqa: 8 .*newhope: mode: 'instruct' From bee73cedbd535d8a5392472c402c843b3ed10e27 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Wed, 9 Aug 2023 23:42:34 -0500 Subject: [PATCH 055/169] Streamline GPTQ-for-LLaMa support --- README.md | 3 -- modules/GPTQ_loader.py | 64 ++++++++++------------------------------ modules/shared.py | 3 -- modules/ui_model_menu.py | 2 +- requirements.txt | 4 +++ 5 files changed, 21 insertions(+), 55 deletions(-) diff --git a/README.md b/README.md index 98de6c09..ad2ad1ed 100644 --- a/README.md +++ b/README.md @@ -280,9 +280,6 @@ Optionally, you can use the following command-line flags: | `--pre_layer PRE_LAYER [PRE_LAYER ...]` | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. | | `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. | | `--monkey-patch` | Apply the monkey patch for using LoRAs with quantized models. -| `--quant_attn` | (triton) Enable quant attention. | -| `--warmup_autotune` | (triton) Enable warmup autotune. | -| `--fused_mlp` | (triton) Enable fused mlp. | #### DeepSpeed diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index ddc5f9a5..c0cef476 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -11,26 +11,9 @@ from transformers import AutoConfig, AutoModelForCausalLM import modules.shared as shared from modules.logging_colors import logger -sys.path.insert(0, str(Path("repositories/GPTQ-for-LLaMa"))) - -try: - import llama_inference_offload -except ImportError: - logger.error('Failed to load GPTQ-for-LLaMa') - logger.error('See https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md') - sys.exit(-1) - -try: - from modelutils import find_layers -except ImportError: - from utils import find_layers - -try: - from quant import make_quant - is_triton = False -except ImportError: - import quant - is_triton = True +from gptq_for_llama import llama_inference_offload +from gptq_for_llama.modelutils import find_layers +from gptq_for_llama.quant import make_quant # This function is a replacement for the load_quant function in the @@ -59,24 +42,21 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc if name in layers: del layers[name] - if not is_triton: - gptq_args = inspect.getfullargspec(make_quant).args + gptq_args = inspect.getfullargspec(make_quant).args - make_quant_kwargs = { - 'module': model, - 'names': layers, - 'bits': wbits, - } - if 'groupsize' in gptq_args: - make_quant_kwargs['groupsize'] = groupsize - if 'faster' in gptq_args: - make_quant_kwargs['faster'] = faster_kernel - if 'kernel_switch_threshold' in gptq_args: - make_quant_kwargs['kernel_switch_threshold'] = kernel_switch_threshold + make_quant_kwargs = { + 'module': model, + 'names': layers, + 'bits': wbits, + } + if 'groupsize' in gptq_args: + make_quant_kwargs['groupsize'] = groupsize + if 'faster' in gptq_args: + make_quant_kwargs['faster'] = faster_kernel + if 'kernel_switch_threshold' in gptq_args: + make_quant_kwargs['kernel_switch_threshold'] = kernel_switch_threshold - make_quant(**make_quant_kwargs) - else: - quant.make_quant_linear(model, layers, wbits, groupsize) + make_quant(**make_quant_kwargs) del layers if checkpoint.endswith('.safetensors'): @@ -85,18 +65,6 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc else: model.load_state_dict(torch.load(checkpoint), strict=False) - if is_triton: - if shared.args.quant_attn: - quant.make_quant_attn(model) - - if eval and shared.args.fused_mlp: - quant.make_fused_mlp(model) - - if shared.args.warmup_autotune: - quant.autotune_warmup_linear(model, transpose=not eval) - if eval and shared.args.fused_mlp: - quant.autotune_warmup_fused(model) - model.seqlen = 2048 return model diff --git a/modules/shared.py b/modules/shared.py index 951120c8..224fa6aa 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -138,9 +138,6 @@ parser.add_argument('--groupsize', type=int, default=-1, help='Group size.') parser.add_argument('--pre_layer', type=int, nargs="+", help='The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg --pre_layer 30 60.') parser.add_argument('--checkpoint', type=str, help='The path to the quantized checkpoint file. If not specified, it will be automatically detected.') parser.add_argument('--monkey-patch', action='store_true', help='Apply the monkey patch for using LoRAs with quantized models.') -parser.add_argument('--quant_attn', action='store_true', help='(triton) Enable quant attention.') -parser.add_argument('--warmup_autotune', action='store_true', help='(triton) Enable warmup autotune.') -parser.add_argument('--fused_mlp', action='store_true', help='(triton) Enable fused mlp.') # AutoGPTQ parser.add_argument('--triton', action='store_true', help='Use triton.') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 55416a07..e98e237c 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -110,7 +110,7 @@ def create_ui(): shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock) shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed) shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Make sure to inspect the .py files inside the model folder before loading it with this option enabled.') - shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa is currently 2x faster than AutoGPTQ on some systems. It is installed by default with the one-click installers. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).') + shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa support is currently only kept for compatibility with older GPUs. AutoGPTQ or ExLlama is preferred when compatible. GPTQ-for-LLaMa is installed by default with the one-click installers. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).') shared.gradio['exllama_info'] = gr.Markdown('For more information, consult the [docs](https://github.com/oobabooga/text-generation-webui/blob/main/docs/ExLlama.md).') shared.gradio['exllama_HF_info'] = gr.Markdown('ExLlama_HF is a wrapper that lets you use ExLlama like a Transformers model, which means it can use the Transformers samplers. It\'s a bit slower than the regular ExLlama.') shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF is a wrapper that lets you use llama.cpp like a Transformers model, which means it can use the Transformers samplers. To use it, make sure to first download oobabooga/llama-tokenizer under "Download custom model or LoRA".') diff --git a/requirements.txt b/requirements.txt index e65bed6b..b27e14c5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,3 +36,7 @@ https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.77/llama_cpp_ # llama-cpp-python with CUDA support https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.1.77+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.1.77+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" + +# GPTQ-for-LLaMa +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From e3d3565b2a538da8769fd0352067647529b2298c Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Wed, 9 Aug 2023 23:59:04 -0500 Subject: [PATCH 056/169] Remove GPTQ-for-LLaMa monkey patch support AutoGPTQ will be the preferred GPTQ LoRa loader in the future. --- README.md | 1 - docs/GPTQ-models-(4-bit-mode).md | 27 ------------------- docs/LoRA.md | 1 - docs/Training-LoRAs.md | 8 ------ modules/monkey_patch_gptq_lora.py | 43 ------------------------------- modules/training.py | 23 ----------------- 6 files changed, 103 deletions(-) delete mode 100644 modules/monkey_patch_gptq_lora.py diff --git a/README.md b/README.md index ad2ad1ed..5739d0ba 100644 --- a/README.md +++ b/README.md @@ -279,7 +279,6 @@ Optionally, you can use the following command-line flags: | `--groupsize GROUPSIZE` | Group size. | | `--pre_layer PRE_LAYER [PRE_LAYER ...]` | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. | | `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. | -| `--monkey-patch` | Apply the monkey patch for using LoRAs with quantized models. #### DeepSpeed diff --git a/docs/GPTQ-models-(4-bit-mode).md b/docs/GPTQ-models-(4-bit-mode).md index 838595ef..d3869bb7 100644 --- a/docs/GPTQ-models-(4-bit-mode).md +++ b/docs/GPTQ-models-(4-bit-mode).md @@ -198,31 +198,4 @@ Output generated in 123.79 seconds (1.61 tokens/s, 199 tokens) You can also use multiple GPUs with `pre_layer` if using the oobabooga fork of GPTQ, eg `--pre_layer 30 60` will load a LLaMA-30B model half onto your first GPU and half onto your second, or `--pre_layer 20 40` will load 20 layers onto GPU-0, 20 layers onto GPU-1, and 20 layers offloaded to CPU. -### Using LoRAs with GPTQ-for-LLaMa - -This requires using a monkey patch that is supported by this web UI: https://github.com/johnsmith0031/alpaca_lora_4bit - -To use it: - -1. Clone `johnsmith0031/alpaca_lora_4bit` into the repositories folder: - -``` -cd text-generation-webui/repositories -git clone https://github.com/johnsmith0031/alpaca_lora_4bit -``` - -⚠️ I have tested it with the following commit specifically: `2f704b93c961bf202937b10aac9322b092afdce0` - -2. Install https://github.com/sterlind/GPTQ-for-LLaMa with this command: - -``` -pip install git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit -``` - -3. Start the UI with the `--monkey-patch` flag: - -``` -python server.py --model llama-7b-4bit-128g --listen --lora tloen_alpaca-lora-7b --monkey-patch -``` - diff --git a/docs/LoRA.md b/docs/LoRA.md index f1504d10..02ce55be 100644 --- a/docs/LoRA.md +++ b/docs/LoRA.md @@ -11,7 +11,6 @@ This is the current state of LoRA integration in the web UI: | Transformers | Full support in 16-bit, `--load-in-8bit`, `--load-in-4bit`, and CPU modes. | | ExLlama | Single LoRA support. Fast to remove the LoRA afterwards. | | AutoGPTQ | Single LoRA support. Removing the LoRA requires reloading the entire model.| -| GPTQ-for-LLaMa | Full support with the [monkey patch](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#using-loras-with-gptq-for-llama). | ## Downloading a LoRA diff --git a/docs/Training-LoRAs.md b/docs/Training-LoRAs.md index 83e6d5a7..bdc79992 100644 --- a/docs/Training-LoRAs.md +++ b/docs/Training-LoRAs.md @@ -131,14 +131,6 @@ So, in effect, Loss is a balancing game: you want to get it low enough that it u Note: if you see Loss start at or suddenly jump to exactly `0`, it is likely something has gone wrong in your training process (eg model corruption). -## Note: 4-Bit Monkeypatch - -The [4-bit LoRA monkeypatch](GPTQ-models-(4-bit-mode).md#using-loras-in-4-bit-mode) works for training, but has side effects: -- VRAM usage is higher currently. You can reduce the `Micro Batch Size` to `1` to compensate. -- Models do funky things. LoRAs apply themselves, or refuse to apply, or spontaneously error out, or etc. It can be helpful to reload base model or restart the WebUI between training/usage to minimize chances of anything going haywire. -- Loading or working with multiple LoRAs at the same time doesn't currently work. -- Generally, recognize and treat the monkeypatch as the dirty temporary hack it is - it works, but isn't very stable. It will get better in time when everything is merged upstream for full official support. - ## Legacy notes LoRA training was contributed by [mcmonkey4eva](https://github.com/mcmonkey4eva) in PR [#570](https://github.com/oobabooga/text-generation-webui/pull/570). diff --git a/modules/monkey_patch_gptq_lora.py b/modules/monkey_patch_gptq_lora.py deleted file mode 100644 index bf8d478d..00000000 --- a/modules/monkey_patch_gptq_lora.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copied from https://github.com/johnsmith0031/alpaca_lora_4bit - -import sys -from pathlib import Path - -sys.path.insert(0, str(Path("repositories/alpaca_lora_4bit"))) - -import autograd_4bit -from amp_wrapper import AMPWrapper -from autograd_4bit import ( - Autograd4bitQuantLinear, - load_llama_model_4bit_low_ram -) -from monkeypatch.peft_tuners_lora_monkey_patch import ( - Linear4bitLt, - replace_peft_model_with_gptq_lora_model -) - -from modules import shared -from modules.GPTQ_loader import find_quantized_model_file - -replace_peft_model_with_gptq_lora_model() - - -def load_model_llama(model_name): - config_path = str(Path(f'{shared.args.model_dir}/{model_name}')) - model_path = str(find_quantized_model_file(model_name)) - model, tokenizer = load_llama_model_4bit_low_ram(config_path, model_path, groupsize=shared.args.groupsize, is_v1_model=False) - for n, m in model.named_modules(): - if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt): - if m.is_v1_model: - m.zeros = m.zeros.half() - m.scales = m.scales.half() - m.bias = m.bias.half() - - autograd_4bit.use_new = True - autograd_4bit.auto_switch = True - - model.half() - wrapper = AMPWrapper(model) - wrapper.apply_generate() - - return model, tokenizer diff --git a/modules/training.py b/modules/training.py index 7558cd5d..fa721ff0 100644 --- a/modules/training.py +++ b/modules/training.py @@ -270,12 +270,6 @@ def calc_trainable_parameters(model): def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str): - if shared.args.monkey_patch: - from monkeypatch.peft_tuners_lora_monkey_patch import ( - replace_peft_model_with_gptq_lora_model - ) - replace_peft_model_with_gptq_lora_model() - global WANT_INTERRUPT WANT_INTERRUPT = False @@ -307,15 +301,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch time.sleep(5) - if shared.args.wbits > 0 and not shared.args.monkey_patch: - yield "LoRA training with GPTQ models requires loading with `--monkey-patch`" - return - - elif not (shared.args.load_in_8bit or shared.args.load_in_4bit) and shared.args.wbits <= 0: - yield "It is highly recommended you use `--load-in-8bit` for LoRA training. *(Will continue anyway in 2 seconds, press `Interrupt` to stop.)*" - logger.warning("It is highly recommended you use `--load-in-8bit` for LoRA training.") - time.sleep(2) # Give it a moment for the message to show in UI before continuing - if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0: yield "Cannot input zeroes." return @@ -520,14 +505,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch yield traceback.format_exc().replace('\n', '\n\n') return - if shared.args.monkey_patch: - for n, m in lora_model.named_modules(): - if '4bit' in str(type(m)): - if m.is_v1_model: - m.zeros = m.zeros.half() - - m.scales = m.scales.half() - class Tracked(): def __init__(self): self.current_steps = 0 From d7ee4c23862081f6b8dbbaac8f22e7bc519da172 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Thu, 10 Aug 2023 00:10:14 -0500 Subject: [PATCH 057/169] Remove unused import --- modules/GPTQ_loader.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index c0cef476..bc528b18 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -1,6 +1,5 @@ import inspect import re -import sys from pathlib import Path import accelerate From d6765bebc4920827200ee5779e2441dec65763e1 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Thu, 10 Aug 2023 00:53:48 -0500 Subject: [PATCH 058/169] Update installation documentation --- docs/GPTQ-models-(4-bit-mode).md | 55 ++++---------------------------- modules/ui_model_menu.py | 2 +- 2 files changed, 8 insertions(+), 49 deletions(-) diff --git a/docs/GPTQ-models-(4-bit-mode).md b/docs/GPTQ-models-(4-bit-mode).md index d3869bb7..e8d983eb 100644 --- a/docs/GPTQ-models-(4-bit-mode).md +++ b/docs/GPTQ-models-(4-bit-mode).md @@ -70,53 +70,13 @@ Not supported yet. GPTQ-for-LLaMa is the original adaptation of GPTQ for the LLaMA model. It was made possible by [@qwopqwop200](https://github.com/qwopqwop200/GPTQ-for-LLaMa): https://github.com/qwopqwop200/GPTQ-for-LLaMa -Different branches of GPTQ-for-LLaMa are currently available, including: - -| Branch | Comment | -|----|----| -| [Old CUDA branch (recommended)](https://github.com/oobabooga/GPTQ-for-LLaMa/) | The fastest branch, works on Windows and Linux. | -| [Up-to-date triton branch](https://github.com/qwopqwop200/GPTQ-for-LLaMa) | Slightly more precise than the old CUDA branch from 13b upwards, significantly more precise for 7b. 2x slower for small context size and only works on Linux. | -| [Up-to-date CUDA branch](https://github.com/qwopqwop200/GPTQ-for-LLaMa/tree/cuda) | As precise as the up-to-date triton branch, 10x slower than the old cuda branch for small context size. | - -Overall, I recommend using the old CUDA branch. It is included by default in the one-click-installer for this web UI. - -### Installation - -Start by cloning GPTQ-for-LLaMa into your `text-generation-webui/repositories` folder: - -``` -mkdir repositories -cd repositories -git clone https://github.com/oobabooga/GPTQ-for-LLaMa.git -b cuda -``` - -If you want to you to use the up-to-date CUDA or triton branches instead of the old CUDA branch, use these commands: - -``` -git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b cuda -``` - -``` -git clone https://github.com/qwopqwop200/GPTQ-for-LLaMa.git -b triton -``` - -Next you need to install the CUDA extensions. You can do that either by installing the precompiled wheels, or by compiling the wheels yourself. +A Python package containing both major CUDA versions of GPTQ-for-LLaMa is used to simplify installation and compatibility: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA ### Precompiled wheels -Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-Wheels +Kindly provided by our friend jllllll: https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases -Windows: - -``` -pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/main/quant_cuda-0.0.0-cp310-cp310-win_amd64.whl -``` - -Linux: - -``` -pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/Linux-x64/quant_cuda-0.0.0-cp310-cp310-linux_x86_64.whl -``` +Wheels are included in requirements.txt and are installed with the webui on supported systems. ### Manual installation @@ -124,20 +84,19 @@ pip install https://github.com/jllllll/GPTQ-for-LLaMa-Wheels/raw/Linux-x64/quant ``` conda activate textgen -conda install -c conda-forge cudatoolkit-dev +conda install cuda -c nvidia/label/cuda-11.7.1 ``` The command above takes some 10 minutes to run and shows no progress bar or updates along the way. -You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough. +You are also going to need to have a C++ compiler installed. On Linux, `sudo apt install build-essential` or equivalent is enough. On Windows, Visual Studio or Visual Studio Build Tools is required. -If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+), you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise. +If you're using an older version of CUDA toolkit (e.g. 11.7) but the latest version of `gcc` and `g++` (12.0+) on Linux, you should downgrade with: `conda install -c conda-forge gxx==11.3.0`. Kernel compilation will fail otherwise. #### Step 2: compile the CUDA extensions ``` -cd repositories/GPTQ-for-LLaMa -python setup_cuda.py install +python -m pip install git+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA -v ``` ### Getting pre-converted LLaMA weights diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index e98e237c..0c1042f6 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -110,7 +110,7 @@ def create_ui(): shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock) shared.gradio['llama_cpp_seed'] = gr.Number(label='Seed (0 for random)', value=shared.args.llama_cpp_seed) shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Make sure to inspect the .py files inside the model folder before loading it with this option enabled.') - shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa support is currently only kept for compatibility with older GPUs. AutoGPTQ or ExLlama is preferred when compatible. GPTQ-for-LLaMa is installed by default with the one-click installers. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).') + shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa support is currently only kept for compatibility with older GPUs. AutoGPTQ or ExLlama is preferred when compatible. GPTQ-for-LLaMa is installed by default with the webui on supported systems. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).') shared.gradio['exllama_info'] = gr.Markdown('For more information, consult the [docs](https://github.com/oobabooga/text-generation-webui/blob/main/docs/ExLlama.md).') shared.gradio['exllama_HF_info'] = gr.Markdown('ExLlama_HF is a wrapper that lets you use ExLlama like a Transformers model, which means it can use the Transformers samplers. It\'s a bit slower than the regular ExLlama.') shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF is a wrapper that lets you use llama.cpp like a Transformers model, which means it can use the Transformers samplers. To use it, make sure to first download oobabooga/llama-tokenizer under "Download custom model or LoRA".') From 16e2b117b415074afd2917a72496b776debfcd58 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 10 Aug 2023 08:38:10 -0700 Subject: [PATCH 059/169] Minor doc change --- docs/GPTQ-models-(4-bit-mode).md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/GPTQ-models-(4-bit-mode).md b/docs/GPTQ-models-(4-bit-mode).md index e8d983eb..b42f4224 100644 --- a/docs/GPTQ-models-(4-bit-mode).md +++ b/docs/GPTQ-models-(4-bit-mode).md @@ -64,7 +64,7 @@ python server.py --autogptq --gpu-memory 3000MiB 6000MiB --model model_name ### Using LoRAs with AutoGPTQ -Not supported yet. +Works fine for a single LoRA. ## GPTQ-for-LLaMa From c7f52bbdc106896b8f839c442b0c82937f006fd8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 10 Aug 2023 08:39:41 -0700 Subject: [PATCH 060/169] Revert "Remove GPTQ-for-LLaMa monkey patch support" This reverts commit e3d3565b2a538da8769fd0352067647529b2298c. --- README.md | 1 + docs/GPTQ-models-(4-bit-mode).md | 27 +++++++++++++++++++ docs/LoRA.md | 1 + docs/Training-LoRAs.md | 8 ++++++ modules/monkey_patch_gptq_lora.py | 43 +++++++++++++++++++++++++++++++ modules/training.py | 23 +++++++++++++++++ 6 files changed, 103 insertions(+) create mode 100644 modules/monkey_patch_gptq_lora.py diff --git a/README.md b/README.md index 5739d0ba..ad2ad1ed 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,7 @@ Optionally, you can use the following command-line flags: | `--groupsize GROUPSIZE` | Group size. | | `--pre_layer PRE_LAYER [PRE_LAYER ...]` | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. | | `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. | +| `--monkey-patch` | Apply the monkey patch for using LoRAs with quantized models. #### DeepSpeed diff --git a/docs/GPTQ-models-(4-bit-mode).md b/docs/GPTQ-models-(4-bit-mode).md index b42f4224..428d7560 100644 --- a/docs/GPTQ-models-(4-bit-mode).md +++ b/docs/GPTQ-models-(4-bit-mode).md @@ -157,4 +157,31 @@ Output generated in 123.79 seconds (1.61 tokens/s, 199 tokens) You can also use multiple GPUs with `pre_layer` if using the oobabooga fork of GPTQ, eg `--pre_layer 30 60` will load a LLaMA-30B model half onto your first GPU and half onto your second, or `--pre_layer 20 40` will load 20 layers onto GPU-0, 20 layers onto GPU-1, and 20 layers offloaded to CPU. +### Using LoRAs with GPTQ-for-LLaMa + +This requires using a monkey patch that is supported by this web UI: https://github.com/johnsmith0031/alpaca_lora_4bit + +To use it: + +1. Clone `johnsmith0031/alpaca_lora_4bit` into the repositories folder: + +``` +cd text-generation-webui/repositories +git clone https://github.com/johnsmith0031/alpaca_lora_4bit +``` + +⚠️ I have tested it with the following commit specifically: `2f704b93c961bf202937b10aac9322b092afdce0` + +2. Install https://github.com/sterlind/GPTQ-for-LLaMa with this command: + +``` +pip install git+https://github.com/sterlind/GPTQ-for-LLaMa.git@lora_4bit +``` + +3. Start the UI with the `--monkey-patch` flag: + +``` +python server.py --model llama-7b-4bit-128g --listen --lora tloen_alpaca-lora-7b --monkey-patch +``` + diff --git a/docs/LoRA.md b/docs/LoRA.md index 02ce55be..f1504d10 100644 --- a/docs/LoRA.md +++ b/docs/LoRA.md @@ -11,6 +11,7 @@ This is the current state of LoRA integration in the web UI: | Transformers | Full support in 16-bit, `--load-in-8bit`, `--load-in-4bit`, and CPU modes. | | ExLlama | Single LoRA support. Fast to remove the LoRA afterwards. | | AutoGPTQ | Single LoRA support. Removing the LoRA requires reloading the entire model.| +| GPTQ-for-LLaMa | Full support with the [monkey patch](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#using-loras-with-gptq-for-llama). | ## Downloading a LoRA diff --git a/docs/Training-LoRAs.md b/docs/Training-LoRAs.md index bdc79992..83e6d5a7 100644 --- a/docs/Training-LoRAs.md +++ b/docs/Training-LoRAs.md @@ -131,6 +131,14 @@ So, in effect, Loss is a balancing game: you want to get it low enough that it u Note: if you see Loss start at or suddenly jump to exactly `0`, it is likely something has gone wrong in your training process (eg model corruption). +## Note: 4-Bit Monkeypatch + +The [4-bit LoRA monkeypatch](GPTQ-models-(4-bit-mode).md#using-loras-in-4-bit-mode) works for training, but has side effects: +- VRAM usage is higher currently. You can reduce the `Micro Batch Size` to `1` to compensate. +- Models do funky things. LoRAs apply themselves, or refuse to apply, or spontaneously error out, or etc. It can be helpful to reload base model or restart the WebUI between training/usage to minimize chances of anything going haywire. +- Loading or working with multiple LoRAs at the same time doesn't currently work. +- Generally, recognize and treat the monkeypatch as the dirty temporary hack it is - it works, but isn't very stable. It will get better in time when everything is merged upstream for full official support. + ## Legacy notes LoRA training was contributed by [mcmonkey4eva](https://github.com/mcmonkey4eva) in PR [#570](https://github.com/oobabooga/text-generation-webui/pull/570). diff --git a/modules/monkey_patch_gptq_lora.py b/modules/monkey_patch_gptq_lora.py new file mode 100644 index 00000000..bf8d478d --- /dev/null +++ b/modules/monkey_patch_gptq_lora.py @@ -0,0 +1,43 @@ +# Copied from https://github.com/johnsmith0031/alpaca_lora_4bit + +import sys +from pathlib import Path + +sys.path.insert(0, str(Path("repositories/alpaca_lora_4bit"))) + +import autograd_4bit +from amp_wrapper import AMPWrapper +from autograd_4bit import ( + Autograd4bitQuantLinear, + load_llama_model_4bit_low_ram +) +from monkeypatch.peft_tuners_lora_monkey_patch import ( + Linear4bitLt, + replace_peft_model_with_gptq_lora_model +) + +from modules import shared +from modules.GPTQ_loader import find_quantized_model_file + +replace_peft_model_with_gptq_lora_model() + + +def load_model_llama(model_name): + config_path = str(Path(f'{shared.args.model_dir}/{model_name}')) + model_path = str(find_quantized_model_file(model_name)) + model, tokenizer = load_llama_model_4bit_low_ram(config_path, model_path, groupsize=shared.args.groupsize, is_v1_model=False) + for n, m in model.named_modules(): + if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt): + if m.is_v1_model: + m.zeros = m.zeros.half() + m.scales = m.scales.half() + m.bias = m.bias.half() + + autograd_4bit.use_new = True + autograd_4bit.auto_switch = True + + model.half() + wrapper = AMPWrapper(model) + wrapper.apply_generate() + + return model, tokenizer diff --git a/modules/training.py b/modules/training.py index fa721ff0..7558cd5d 100644 --- a/modules/training.py +++ b/modules/training.py @@ -270,6 +270,12 @@ def calc_trainable_parameters(model): def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, overlap_len: int, newline_favor_len: int, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str): + if shared.args.monkey_patch: + from monkeypatch.peft_tuners_lora_monkey_patch import ( + replace_peft_model_with_gptq_lora_model + ) + replace_peft_model_with_gptq_lora_model() + global WANT_INTERRUPT WANT_INTERRUPT = False @@ -301,6 +307,15 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch time.sleep(5) + if shared.args.wbits > 0 and not shared.args.monkey_patch: + yield "LoRA training with GPTQ models requires loading with `--monkey-patch`" + return + + elif not (shared.args.load_in_8bit or shared.args.load_in_4bit) and shared.args.wbits <= 0: + yield "It is highly recommended you use `--load-in-8bit` for LoRA training. *(Will continue anyway in 2 seconds, press `Interrupt` to stop.)*" + logger.warning("It is highly recommended you use `--load-in-8bit` for LoRA training.") + time.sleep(2) # Give it a moment for the message to show in UI before continuing + if cutoff_len <= 0 or micro_batch_size <= 0 or batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0: yield "Cannot input zeroes." return @@ -505,6 +520,14 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch yield traceback.format_exc().replace('\n', '\n\n') return + if shared.args.monkey_patch: + for n, m in lora_model.named_modules(): + if '4bit' in str(type(m)): + if m.is_v1_model: + m.zeros = m.zeros.half() + + m.scales = m.scales.half() + class Tracked(): def __init__(self): self.current_steps = 0 From e12a1852d9b617fc851caa5363527ea0e2ce8b4f Mon Sep 17 00:00:00 2001 From: Gennadij Date: Thu, 10 Aug 2023 19:42:24 +0300 Subject: [PATCH 061/169] Add Vicuna-v1.5 detection (#3524) --- models/config.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/models/config.yaml b/models/config.yaml index 23862770..3d5f48ff 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -53,6 +53,11 @@ llama-65b-gptq-3bit: .*vicuna.*(1.1|1_1|1.3|1_3): mode: 'instruct' instruction_template: 'Vicuna-v1.1' +.*vicuna.*(1.5|1_5): + mode: 'instruct' + instruction_template: 'Vicuna-v1.1' + truncation_length: 4096 + rms_norm_eps: 5.0e-6 .*stable.*vicuna: mode: 'instruct' instruction_template: 'StableVicuna' From 3929971b669e6807cce8bbcaebc0d9a0998464a5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 10 Aug 2023 10:01:12 -0700 Subject: [PATCH 062/169] Don't show oobabooga_llama-tokenizer in the model dropdown --- modules/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/utils.py b/modules/utils.py index adaa15e8..011c71f1 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -71,7 +71,12 @@ def natural_keys(text): def get_available_models(): - return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml'))], key=natural_keys) + model_list = [] + for item in list(Path(f'{shared.args.model_dir}/').glob('*')): + if not item.name.endswith(('.txt', '-np', '.pt', '.json', '.yaml', '.py')) and 'llama-tokenizer' not in item.name: + model_list.append(re.sub('.pth$', '', item.name)) + + return sorted(model_list, key=natural_keys) def get_available_presets(): @@ -120,8 +125,3 @@ def get_datasets(path: str, ext: str): def get_available_chat_styles(): return sorted(set(('-'.join(k.stem.split('-')[1:]) for k in Path('css').glob('chat_style*.css'))), key=natural_keys) - - -def get_available_sessions(): - items = sorted(set(k.stem for k in Path('logs').glob(f'session_{shared.get_mode()}*')), key=natural_keys, reverse=True) - return [item for item in items if 'autosave' in item] + [item for item in items if 'autosave' not in item] From 0789554f65e31a089e4a81e8a47daf2932b762d6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 10 Aug 2023 09:54:28 -0700 Subject: [PATCH 063/169] Allow --lora to use an absolute path --- modules/LoRA.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/modules/LoRA.py b/modules/LoRA.py index 1350783f..10020552 100644 --- a/modules/LoRA.py +++ b/modules/LoRA.py @@ -17,6 +17,14 @@ def add_lora_to_model(lora_names): add_lora_transformers(lora_names) +def get_lora_path(lora_name): + p = Path(lora_name) + if p.exists(): + lora_name = p.parts[-1] + + return Path(f"{shared.args.lora_dir}/{lora_name}") + + def add_lora_exllama(lora_names): try: @@ -40,7 +48,7 @@ def add_lora_exllama(lora_names): if len(lora_names) > 1: logger.warning('ExLlama can only work with 1 LoRA at the moment. Only the first one in the list will be loaded.') - lora_path = Path(f"{shared.args.lora_dir}/{lora_names[0]}") + lora_path = get_lora_path(lora_names[0]) lora_config_path = lora_path / "adapter_config.json" lora_adapter_path = lora_path / "adapter_model.bin" @@ -81,7 +89,7 @@ def add_lora_autogptq(lora_names): inference_mode=True, ) - lora_path = Path(f"{shared.args.lora_dir}/{lora_names[0]}") + lora_path = get_lora_path(lora_names[0]) logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join([lora_names[0]]))) shared.model = get_gptq_peft_model(shared.model, peft_config, lora_path) shared.lora_names = [lora_names[0]] @@ -101,7 +109,7 @@ def add_lora_transformers(lora_names): if len(removed_set) == 0 and len(prior_set) > 0: logger.info(f"Adding the LoRA(s) named {added_set} to the model...") for lora in added_set: - shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora) + shared.model.load_adapter(get_lora_path(lora), lora) return @@ -123,9 +131,9 @@ def add_lora_transformers(lora_names): params['device_map'] = {"base_model.model." + k: v for k, v in shared.model.hf_device_map.items()} logger.info("Applying the following LoRAs to {}: {}".format(shared.model_name, ', '.join(lora_names))) - shared.model = PeftModel.from_pretrained(shared.model, Path(f"{shared.args.lora_dir}/{lora_names[0]}"), adapter_name=lora_names[0], **params) + shared.model = PeftModel.from_pretrained(shared.model, get_lora_path(lora_names[0]), adapter_name=lora_names[0], **params) for lora in lora_names[1:]: - shared.model.load_adapter(Path(f"{shared.args.lora_dir}/{lora}"), lora) + shared.model.load_adapter(get_lora_path(lora), lora) shared.lora_names = lora_names From 8dbaa20ca8104aa5ead76dec13af3faa25f5d7e8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 10 Aug 2023 13:14:48 -0700 Subject: [PATCH 064/169] Don't replace last reply with an empty message --- modules/chat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index c9af55db..c2a05d3f 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -347,7 +347,10 @@ def send_last_reply_to_input(history): def replace_last_reply(text, state): history = state['history'] - if len(history['visible']) > 0: + + if len(text.strip()) == 0: + return history + elif len(history['visible']) > 0: history['visible'][-1][1] = text history['internal'][-1][1] = apply_extensions('input', text, state) From 7a4fcee0697b35081e781ab67c70af0c459579eb Mon Sep 17 00:00:00 2001 From: cal066 <60696996+cal066@users.noreply.github.com> Date: Fri, 11 Aug 2023 17:41:33 +0000 Subject: [PATCH 065/169] Add ctransformers support (#3313) --------- Co-authored-by: cal066 Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> Co-authored-by: randoentity <137087500+randoentity@users.noreply.github.com> --- README.md | 2 +- modules/ctransformers_model.py | 76 +++++++++++++++++++++++ modules/loaders.py | 109 +++++++++++++++++++++++---------- modules/models.py | 23 ++++++- modules/shared.py | 2 + modules/text_generation.py | 6 +- modules/ui_model_menu.py | 8 ++- modules/ui_parameters.py | 2 +- requirements.txt | 3 + 9 files changed, 188 insertions(+), 43 deletions(-) create mode 100644 modules/ctransformers_model.py diff --git a/README.md b/README.md index ad2ad1ed..8ceb077c 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features * 3 interface modes: default, notebook, and chat -* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa +* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa, ctransformers * Dropdown menu for quickly switching between different models * LoRA: load and unload LoRAs on the fly, train a new LoRA * Precise instruction templates for chat mode, including Llama 2, Alpaca, Vicuna, WizardLM, StableLM, and many others diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py new file mode 100644 index 00000000..b3d001d3 --- /dev/null +++ b/modules/ctransformers_model.py @@ -0,0 +1,76 @@ +from ctransformers import AutoConfig, AutoModelForCausalLM + +from modules import shared +from modules.callbacks import Iteratorize +from modules.logging_colors import logger + + +class CtransformersModel: + def __init__(self): + pass + + @classmethod + def from_pretrained(self, path): + result = self() + + # ctransformers uses -1 for random seed + config = AutoConfig.from_pretrained( + str(path), + threads=shared.args.threads, + gpu_layers=shared.args.n_gpu_layers, + batch_size=shared.args.n_batch, + stream=True, + seed=(-1 if shared.args.llama_cpp_seed == 0 else shared.args.llama_cpp_seed) + ) + + self.model = AutoModelForCausalLM.from_pretrained( + str(result.model_dir(path) if result.model_type_is_auto() else path), + model_type=(None if result.model_type_is_auto() else shared.args.model_type), + config=config + ) + + logger.info(f'Using ctransformers model_type: {self.model.model_type} for {self.model.model_path}') + return result, result + + def model_type_is_auto(self): + return shared.args.model_type == "Auto" or shared.args.model_type == "None" + + def model_dir(self, path): + if path.is_file(): + return path.parent + + return path + + def encode(self, string, **kwargs): + return self.model.tokenize(string) + + def decode(self, ids): + return self.model.detokenize(ids) + + def generate(self, prompt, state, callback=None): + prompt = prompt if type(prompt) is str else prompt.decode() + generator = self.model._stream( + prompt=prompt, + max_new_tokens=state['max_new_tokens'], + temperature=state['temperature'], + top_p=state['top_p'], + top_k=state['top_k'], + repetition_penalty=state['repetition_penalty'], + threads=shared.args.threads + ) + + output = "" + for token in generator: + if callback: + callback(token) + + output += token + + return output + + def generate_with_streaming(self, *args, **kwargs): + with Iteratorize(self.generate, args, kwargs, callback=None) as generator: + reply = '' + for token in generator: + reply += token + yield reply diff --git a/modules/loaders.py b/modules/loaders.py index 07bc455c..fa5e03c2 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -1,10 +1,43 @@ import functools +from collections import OrderedDict import gradio as gr from modules import shared -loaders_and_params = { +loaders_and_params = OrderedDict({ + 'Transformers': [ + 'cpu_memory', + 'gpu_memory', + 'trust_remote_code', + 'load_in_8bit', + 'bf16', + 'cpu', + 'disk', + 'auto_devices', + 'load_in_4bit', + 'use_double_quant', + 'quant_type', + 'compute_dtype', + 'trust_remote_code', + 'alpha_value', + 'compress_pos_emb', + 'transformers_info' + ], + 'ExLlama_HF': [ + 'gpu_split', + 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', + 'exllama_HF_info', + ], + 'ExLlama': [ + 'gpu_split', + 'max_seq_len', + 'alpha_value', + 'compress_pos_emb', + 'exllama_info', + ], 'AutoGPTQ': [ 'triton', 'no_inject_fused_attention', @@ -59,39 +92,15 @@ loaders_and_params = { 'cpu', 'llamacpp_HF_info', ], - 'Transformers': [ - 'cpu_memory', - 'gpu_memory', - 'trust_remote_code', - 'load_in_8bit', - 'bf16', - 'cpu', - 'disk', - 'auto_devices', - 'load_in_4bit', - 'use_double_quant', - 'quant_type', - 'compute_dtype', - 'trust_remote_code', - 'alpha_value', - 'compress_pos_emb', - 'transformers_info' - ], - 'ExLlama': [ - 'gpu_split', - 'max_seq_len', - 'alpha_value', - 'compress_pos_emb', - 'exllama_info', - ], - 'ExLlama_HF': [ - 'gpu_split', - 'max_seq_len', - 'alpha_value', - 'compress_pos_emb', - 'exllama_HF_info', + 'ctransformers': [ + 'n_ctx', + 'n_gpu_layers', + 'n_batch', + 'threads', + 'model_type', + 'llama_cpp_seed', ] -} +}) loaders_samplers = { 'Transformers': { @@ -256,6 +265,33 @@ loaders_samplers = { 'skip_special_tokens', 'auto_max_new_tokens', }, + 'ctransformers': { + 'temperature', + 'top_p', + 'top_k', + 'repetition_penalty', + } +} + +loaders_model_types = { + 'GPTQ-for-LLaMa': [ + "None", + "llama", + "opt", + "gptj" + ], + 'ctransformers': [ + "None", + "gpt2", + "gptj", + "gptneox", + "llama", + "mpt", + "dollyv2" + "replit", + "starcoder", + "falcon" + ], } @@ -277,6 +313,13 @@ def blacklist_samplers(loader): return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers] +def get_model_types(loader): + if loader in loaders_model_types: + return loaders_model_types[loader] + + return ["None"] + + def get_gpu_memory_keys(): return [k for k in shared.gradio if k.startswith('gpu_memory')] diff --git a/modules/models.py b/modules/models.py index aad142c1..d60aecd0 100644 --- a/modules/models.py +++ b/modules/models.py @@ -58,7 +58,8 @@ def load_model(model_name, loader=None): 'llamacpp_HF': llamacpp_HF_loader, 'RWKV': RWKV_loader, 'ExLlama': ExLlama_loader, - 'ExLlama_HF': ExLlama_HF_loader + 'ExLlama_HF': ExLlama_HF_loader, + 'ctransformers': ctransformers_loader, } p = Path(model_name) @@ -242,7 +243,7 @@ def llamacpp_loader(model_name): else: model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*ggml*.bin'))[0] - logger.info(f"llama.cpp weights detected: {model_file}\n") + logger.info(f"llama.cpp weights detected: {model_file}") model, tokenizer = LlamaCppModel.from_pretrained(model_file) return model, tokenizer @@ -268,6 +269,24 @@ def llamacpp_HF_loader(model_name): return model, tokenizer +def ctransformers_loader(model_name): + from modules.ctransformers_model import CtransformersModel + + path = Path(f'{shared.args.model_dir}/{model_name}') + ctrans = CtransformersModel() + if ctrans.model_type_is_auto(): + model_file = path + else: + if path.is_file(): + model_file = path + else: + model_file = list(Path(f'{shared.args.model_dir}/{model_name}').glob('*.bin'))[0] + + logger.info(f'ctransformers weights detected: {model_file}') + model, tokenizer = ctrans.from_pretrained(model_file) + return model, tokenizer + + def GPTQ_loader(model_name): # Monkey patch diff --git a/modules/shared.py b/modules/shared.py index 224fa6aa..cb6f0ae1 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -215,6 +215,8 @@ def fix_loader_name(name): return 'ExLlama' elif name in ['exllama-hf', 'exllama_hf', 'exllama hf', 'ex-llama-hf', 'ex_llama_hf']: return 'ExLlama_HF' + elif name in ['ctransformers', 'ctranforemrs', 'ctransformer']: + return 'ctransformers' def add_extension(name): diff --git a/modules/text_generation.py b/modules/text_generation.py index 6e95414b..30e81355 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -41,7 +41,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False): yield '' return - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel']: + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'CtransformersModel']: generate_func = generate_reply_custom else: generate_func = generate_reply_HF @@ -90,7 +90,7 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False): def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None): - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel']: + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'CtransformersModel']: input_ids = shared.tokenizer.encode(str(prompt)) input_ids = np.array(input_ids).reshape(1, len(input_ids)) else: @@ -104,7 +104,7 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt if truncation_length is not None: input_ids = input_ids[:, -truncation_length:] - if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel'] or shared.args.cpu: + if shared.model.__class__.__name__ in ['LlamaCppModel', 'RWKVModel', 'ExllamaModel', 'CtransformersModel'] or shared.args.cpu: return input_ids elif shared.args.deepspeed: return input_ids.to(device=local_rank) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 0c1042f6..7b852a44 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -63,7 +63,7 @@ def create_ui(): with gr.Row(): with gr.Column(): - shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=["Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value=None) + shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys(), value=None) with gr.Box(): with gr.Row(): with gr.Column(): @@ -84,7 +84,7 @@ def create_ui(): shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None") shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None") - shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gptj"], value=shared.args.model_type or "None") + shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None"], value=shared.args.model_type or "None") shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0) shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') @@ -127,7 +127,9 @@ def create_ui(): def create_event_handlers(): - shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())) + shared.gradio['loader'].change( + loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params())).then( + lambda value: gr.update(choices=loaders.get_model_types(value)), gradio('loader'), gradio('model_type')) # In this event handler, the interface state is read and updated # with the model defaults (if any), and then the model is loaded diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 75bce9b1..4b9fb918 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -16,7 +16,7 @@ def create_ui(default_preset): shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button') with gr.Column(): - shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All", "Transformers", "ExLlama_HF", "ExLlama", "AutoGPTQ", "GPTQ-for-LLaMa", "llama.cpp", "llamacpp_HF"], value="All", elem_classes='slim-dropdown') + shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown') with gr.Row(): with gr.Column(): diff --git a/requirements.txt b/requirements.txt index b27e14c5..ec6a7e47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -40,3 +40,6 @@ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/text # GPTQ-for-LLaMa https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" + +# ctransformers +https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.20+cu117-py3-none-any.whl From 28c8df337bc14a752f66f8ee258b73ee621329b1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:02:56 -0700 Subject: [PATCH 066/169] Add repetition_penalty_range to ctransformers --- modules/ctransformers_model.py | 1 + modules/loaders.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index b3d001d3..c5bc701a 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -56,6 +56,7 @@ class CtransformersModel: top_p=state['top_p'], top_k=state['top_k'], repetition_penalty=state['repetition_penalty'], + last_n_tokens=state['repetition_penalty_range'], threads=shared.args.threads ) diff --git a/modules/loaders.py b/modules/loaders.py index fa5e03c2..21854de7 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -86,7 +86,6 @@ loaders_and_params = OrderedDict({ 'no_mmap', 'low_vram', 'mlock', - 'llama_cpp_seed', 'alpha_value', 'compress_pos_emb', 'cpu', @@ -270,6 +269,7 @@ loaders_samplers = { 'top_p', 'top_k', 'repetition_penalty', + 'repetition_penalty_range', } } From 2f918ccf7cc0b9b1901714c2f70e8d8f124cb6bf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 11:15:22 -0700 Subject: [PATCH 067/169] Remove unused parameter --- modules/loaders.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/loaders.py b/modules/loaders.py index 21854de7..9a222a72 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -92,7 +92,6 @@ loaders_and_params = OrderedDict({ 'llamacpp_HF_info', ], 'ctransformers': [ - 'n_ctx', 'n_gpu_layers', 'n_batch', 'threads', From 4c450e6b7073fb04614adf1f90845107c44174ea Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 15:50:16 -0300 Subject: [PATCH 068/169] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8ceb077c..6694e500 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features * 3 interface modes: default, notebook, and chat -* Multiple model backends: transformers, llama.cpp, ExLlama, AutoGPTQ, GPTQ-for-LLaMa, ctransformers +* Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [ctransformers](https://github.com/marella/ctransformers) * Dropdown menu for quickly switching between different models * LoRA: load and unload LoRAs on the fly, train a new LoRA * Precise instruction templates for chat mode, including Llama 2, Alpaca, Vicuna, WizardLM, StableLM, and many others From 0e05818266570454c16d7c45b656338c99f22c46 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 11 Aug 2023 16:33:15 -0700 Subject: [PATCH 069/169] Style changes --- modules/ui_chat.py | 31 ++++++++++++++----------------- modules/ui_default.py | 8 ++++---- modules/ui_notebook.py | 17 ++++++++--------- 3 files changed, 26 insertions(+), 30 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 4471d2f4..1d73adf7 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -12,13 +12,10 @@ from modules.utils import gradio def create_ui(): - - shared.gradio.update({ - 'interface_state': gr.State({k: None for k in shared.input_elements}), - 'Chat input': gr.State(), - 'dummy': gr.State(), - 'history': gr.State({'internal': [], 'visible': []}), - }) + shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) + shared.gradio['Chat input'] = gr.State() + shared.gradio['dummy'] = gr.State() + shared.gradio['history'] = gr.State({'internal': [], 'visible': []}) with gr.Tab('Text generation', elem_id='main'): shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, shared.settings['name1'], shared.settings['name2'], 'chat', 'cai-chat')) @@ -52,7 +49,7 @@ def create_ui(): shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') with gr.Tab('Chat settings', elem_id='chat-settings'): - with gr.Tab("Character"): + with gr.Tab('Character'): with gr.Row(): with gr.Column(scale=8): with gr.Row(): @@ -70,7 +67,7 @@ def create_ui(): shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil') shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None) - with gr.Tab("Instruction template"): + with gr.Tab('Instruction template'): with gr.Row(): with gr.Row(): shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Instruction template', value='None', info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes='slim-dropdown') @@ -91,7 +88,7 @@ def create_ui(): shared.gradio['save_chat_history'] = gr.Button(value='Save history') with gr.Column(): - shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label="Upload History JSON") + shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON') with gr.Tab('Upload character'): with gr.Tab('YAML or JSON'): @@ -104,7 +101,7 @@ def create_ui(): with gr.Tab('TavernAI PNG'): with gr.Row(): with gr.Column(): - shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id="upload_img_tavern") + shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern') shared.gradio['tavern_json'] = gr.State() with gr.Column(): shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) @@ -126,7 +123,7 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') ) gen_events.append(shared.gradio['textbox'].submit( @@ -135,7 +132,7 @@ def create_event_handlers(): chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') ) gen_events.append(shared.gradio['Regenerate'].click( @@ -143,7 +140,7 @@ def create_event_handlers(): partial(chat.generate_chat_reply_wrapper, regenerate=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') ) gen_events.append(shared.gradio['Continue'].click( @@ -151,7 +148,7 @@ def create_event_handlers(): partial(chat.generate_chat_reply_wrapper, _continue=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') ) gen_events.append(shared.gradio['Impersonate'].click( @@ -159,7 +156,7 @@ def create_event_handlers(): lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( chat.impersonate_wrapper, shared.input_params, gradio('textbox'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') ) shared.gradio['Replace last reply'].click( @@ -243,7 +240,7 @@ def create_event_handlers(): shared.gradio['save_chat_history'].click( lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then( - None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f"(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}") + None, gradio('temporary_text', 'character_menu', 'mode'), None, _js=f'(hist, char, mode) => {{{ui.save_files_js}; saveHistory(hist, char, mode)}}') shared.gradio['Submit character'].click( chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( diff --git a/modules/ui_default.py b/modules/ui_default.py index f0ab74ef..b879e1ef 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -15,7 +15,7 @@ def create_ui(): shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) shared.gradio['last_input'] = gr.State('') - with gr.Tab("Text generation", elem_id="main"): + with gr.Tab('Text generation', elem_id='main'): with gr.Row(): with gr.Column(): shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') @@ -56,7 +56,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) @@ -65,7 +65,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) @@ -74,7 +74,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[1]; element.scrollTop = element.scrollHeight}") ) diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 998a7cf7..9e8b3af6 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -11,11 +11,10 @@ from modules.utils import gradio def create_ui(): default_text = load_prompt(shared.settings['prompt']) - shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) shared.gradio['last_input'] = gr.State('') - with gr.Tab("Text generation", elem_id="main"): + with gr.Tab('Text generation', elem_id='main'): with gr.Row(): with gr.Column(scale=4): with gr.Tab('Raw'): @@ -29,10 +28,10 @@ def create_ui(): shared.gradio['html'] = gr.HTML() with gr.Row(): - shared.gradio['Generate'] = gr.Button('Generate', variant='primary', elem_classes="small-button") - shared.gradio['Stop'] = gr.Button('Stop', elem_classes="small-button", elem_id='stop') - shared.gradio['Undo'] = gr.Button('Undo', elem_classes="small-button") - shared.gradio['Regenerate'] = gr.Button('Regenerate', elem_classes="small-button") + shared.gradio['Generate'] = gr.Button('Generate', variant='primary', elem_classes='small-button') + shared.gradio['Stop'] = gr.Button('Stop', elem_classes='small-button', elem_id='stop') + shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button') + shared.gradio['Regenerate'] = gr.Button('Regenerate', elem_classes='small-button') with gr.Column(scale=1): gr.HTML('
    ') @@ -58,7 +57,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) @@ -67,7 +66,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) @@ -78,7 +77,7 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: None, None, None, _js=f"() => {{{ui.audio_notification_js}}}") + lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) From 0230fa4e9c91b99e95f8b85385cc385fb1fdcbfe Mon Sep 17 00:00:00 2001 From: Chris Lefever Date: Sat, 12 Aug 2023 02:26:58 -0400 Subject: [PATCH 070/169] Add the --disable_exllama option for AutoGPTQ --- README.md | 1 + modules/AutoGPTQ_loader.py | 1 + modules/loaders.py | 1 + modules/shared.py | 1 + modules/ui.py | 1 + modules/ui_model_menu.py | 1 + 6 files changed, 6 insertions(+) diff --git a/README.md b/README.md index 6694e500..278e5e3a 100644 --- a/README.md +++ b/README.md @@ -262,6 +262,7 @@ Optionally, you can use the following command-line flags: | `--no_inject_fused_mlp` | Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. | | `--no_use_cuda_fp16` | This can make models faster on some systems. | | `--desc_act` | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. | +| `--disable_exllama` | Disable ExLlama kernel, which can improve inference speed on some systems. | #### ExLlama diff --git a/modules/AutoGPTQ_loader.py b/modules/AutoGPTQ_loader.py index 0d41ac0a..987f5ba7 100644 --- a/modules/AutoGPTQ_loader.py +++ b/modules/AutoGPTQ_loader.py @@ -50,6 +50,7 @@ def load_quantized(model_name): 'max_memory': get_max_memory_dict(), 'quantize_config': quantize_config, 'use_cuda_fp16': not shared.args.no_use_cuda_fp16, + 'disable_exllama': shared.args.disable_exllama, } logger.info(f"The AutoGPTQ params are: {params}") diff --git a/modules/loaders.py b/modules/loaders.py index 9a222a72..a96c43ea 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -46,6 +46,7 @@ loaders_and_params = OrderedDict({ 'wbits', 'groupsize', 'desc_act', + 'disable_exllama', 'gpu_memory', 'cpu_memory', 'cpu', diff --git a/modules/shared.py b/modules/shared.py index cb6f0ae1..ba89fb52 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -145,6 +145,7 @@ parser.add_argument('--no_inject_fused_attention', action='store_true', help='Do parser.add_argument('--no_inject_fused_mlp', action='store_true', help='Triton mode only: Do not use fused MLP (lowers VRAM requirements).') parser.add_argument('--no_use_cuda_fp16', action='store_true', help='This can make models faster on some systems.') parser.add_argument('--desc_act', action='store_true', help='For models that don\'t have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.') +parser.add_argument('--disable_exllama', action='store_true', help='Disable ExLlama kernel, which can improve inference speed on some systems.') # ExLlama parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7") diff --git a/modules/ui.py b/modules/ui.py index b58b7dd6..37284d25 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -58,6 +58,7 @@ def list_model_elements(): 'no_inject_fused_attention', 'no_inject_fused_mlp', 'no_use_cuda_fp16', + 'disable_exllama', 'threads', 'n_batch', 'no_mmap', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 7b852a44..3059f616 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -98,6 +98,7 @@ def create_ui(): shared.gradio['no_inject_fused_mlp'] = gr.Checkbox(label="no_inject_fused_mlp", value=shared.args.no_inject_fused_mlp, info='Affects Triton only. Disable fused MLP. Fused MLP improves performance but uses more VRAM. Disable if running low on VRAM.') shared.gradio['no_use_cuda_fp16'] = gr.Checkbox(label="no_use_cuda_fp16", value=shared.args.no_use_cuda_fp16, info='This can make models faster on some systems.') shared.gradio['desc_act'] = gr.Checkbox(label="desc_act", value=shared.args.desc_act, info='\'desc_act\', \'wbits\', and \'groupsize\' are used for old models without a quantize_config.json.') + shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel, which can improve inference speed on some systems.') shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu) shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit) shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16) From 73421b1fedc46e013d35ceaf8f2adba01c5c20a8 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Sat, 12 Aug 2023 21:02:47 -0500 Subject: [PATCH 071/169] Bump ctransformers wheel version (#3558) --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index ec6a7e47..c4595d48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -42,4 +42,4 @@ https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_ https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" # ctransformers -https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.20+cu117-py3-none-any.whl +https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.22+cu117-py3-none-any.whl From bf70c19603627a5bbf6b5a2370bddc9ac45d81db Mon Sep 17 00:00:00 2001 From: cal066 <60696996+cal066@users.noreply.github.com> Date: Sun, 13 Aug 2023 03:04:03 +0000 Subject: [PATCH 072/169] ctransformers: move thread and seed parameters (#3543) --- modules/ctransformers_model.py | 7 +++---- modules/loaders.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index c5bc701a..f5641616 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -13,14 +13,12 @@ class CtransformersModel: def from_pretrained(self, path): result = self() - # ctransformers uses -1 for random seed config = AutoConfig.from_pretrained( str(path), threads=shared.args.threads, gpu_layers=shared.args.n_gpu_layers, batch_size=shared.args.n_batch, - stream=True, - seed=(-1 if shared.args.llama_cpp_seed == 0 else shared.args.llama_cpp_seed) + stream=True ) self.model = AutoModelForCausalLM.from_pretrained( @@ -49,6 +47,7 @@ class CtransformersModel: def generate(self, prompt, state, callback=None): prompt = prompt if type(prompt) is str else prompt.decode() + # ctransformers uses -1 for random seed generator = self.model._stream( prompt=prompt, max_new_tokens=state['max_new_tokens'], @@ -57,7 +56,7 @@ class CtransformersModel: top_k=state['top_k'], repetition_penalty=state['repetition_penalty'], last_n_tokens=state['repetition_penalty_range'], - threads=shared.args.threads + seed=state['seed'] ) output = "" diff --git a/modules/loaders.py b/modules/loaders.py index 9a222a72..2b3a50b3 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -95,8 +95,7 @@ loaders_and_params = OrderedDict({ 'n_gpu_layers', 'n_batch', 'threads', - 'model_type', - 'llama_cpp_seed', + 'model_type' ] }) From a1a9ec895d96b27f2c03cf5df4bce679f3abaf91 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 01:12:15 -0300 Subject: [PATCH 073/169] Unify the 3 interface modes (#3554) --- README.md | 2 - api-examples/api-example-chat-stream.py | 2 - api-examples/api-example-chat.py | 2 - css/chat.css | 146 ---------------- css/main.css | 156 +++++++++++++++-- docs/Extensions.md | 8 +- extensions/api/util.py | 2 - extensions/elevenlabs_tts/script.py | 35 ++-- extensions/example/script.py | 4 +- extensions/gallery/script.js | 14 ++ extensions/gallery/script.py | 7 +- extensions/send_pictures/script.py | 4 +- extensions/silero_tts/script.py | 33 ++-- extensions/superbooga/script.py | 6 +- js/main.js | 31 +++- js/save_files.js | 4 +- modules/chat.py | 99 ++++------- modules/extensions.py | 30 +++- modules/shared.py | 29 +--- modules/ui.py | 54 +++--- modules/ui_chat.py | 155 ++++++++--------- modules/ui_default.py | 70 ++++---- modules/ui_file_saving.py | 66 ++++--- modules/ui_notebook.py | 77 ++++----- modules/ui_parameters.py | 220 +++++++++++------------- modules/ui_session.py | 50 +++--- modules/utils.py | 2 +- server.py | 59 +++---- settings-template.yaml | 7 +- 29 files changed, 660 insertions(+), 714 deletions(-) delete mode 100644 css/chat.css create mode 100644 extensions/gallery/script.js diff --git a/README.md b/README.md index 6694e500..73ae33bd 100644 --- a/README.md +++ b/README.md @@ -189,8 +189,6 @@ Optionally, you can use the following command-line flags: | Flag | Description | |--------------------------------------------|-------------| | `-h`, `--help` | Show this help message and exit. | -| `--notebook` | Launch the web UI in notebook mode, where the output is written to the same text box as the input. | -| `--chat` | Launch the web UI in chat mode. | | `--multi-user` | Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental. | | `--character CHARACTER` | The name of the character to load in chat mode by default. | | `--model MODEL` | Name of the model to load by default. | diff --git a/api-examples/api-example-chat-stream.py b/api-examples/api-example-chat-stream.py index 055900bd..cccd5b26 100644 --- a/api-examples/api-example-chat-stream.py +++ b/api-examples/api-example-chat-stream.py @@ -36,8 +36,6 @@ async def run(user_input, history): # 'turn_template': 'turn_template', # Optional 'regenerate': False, '_continue': False, - 'stop_at_newline': False, - 'chat_generation_attempts': 1, 'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', # Generation params. If 'preset' is set to different than 'None', the values diff --git a/api-examples/api-example-chat.py b/api-examples/api-example-chat.py index c3d0c538..c197a584 100644 --- a/api-examples/api-example-chat.py +++ b/api-examples/api-example-chat.py @@ -30,8 +30,6 @@ def run(user_input, history): # 'turn_template': 'turn_template', # Optional 'regenerate': False, '_continue': False, - 'stop_at_newline': False, - 'chat_generation_attempts': 1, 'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', # Generation params. If 'preset' is set to different than 'None', the values diff --git a/css/chat.css b/css/chat.css deleted file mode 100644 index 677d86db..00000000 --- a/css/chat.css +++ /dev/null @@ -1,146 +0,0 @@ -.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx { - height: 66.67vh -} - -.gradio-container { - margin-left: auto !important; - margin-right: auto !important; -} - -.w-screen { - width: unset -} - -div.svelte-362y77>*, div.svelte-362y77>.form>* { - flex-wrap: nowrap -} - -/* fixes the API documentation in chat mode */ -.api-docs.svelte-1iguv9h.svelte-1iguv9h.svelte-1iguv9h { - display: grid; -} - -.pending.svelte-1ed2p3z { - opacity: 1; -} - -#extensions { - padding: 0; -} - -#gradio-chatbot { - height: 66.67vh; -} - -.wrap.svelte-6roggh.svelte-6roggh { - max-height: 92.5%; -} - -/* This is for the microphone button in the whisper extension */ -.sm.svelte-1ipelgc { - width: 100%; -} - -#main button { - min-width: 0 !important; -} - -#main > :first-child, #extensions { - max-width: 800px; - margin-left: auto; - margin-right: auto; -} - -@media screen and (max-width: 688px) { - #main { - padding: 0px; - } - - .chat { - height: calc(100vh - 274px) !important; - } -} - -/*****************************************************/ -/*************** Chat box declarations ***************/ -/*****************************************************/ - -.chat { - margin-left: auto; - margin-right: auto; - max-width: 800px; - height: calc(100vh - 286px); - overflow-y: auto; - padding-right: 20px; - display: flex; - flex-direction: column-reverse; - word-break: break-word; - overflow-wrap: anywhere; - padding-top: 1px; -} - -.chat > .messages { - display: flex; - flex-direction: column; -} - -.message-body li { - margin-top: 0.5em !important; - margin-bottom: 0.5em !important; -} - -.message-body li > p { - display: inline !important; -} - -.message-body ul, .message-body ol { - font-size: 15px !important; -} - -.message-body ul { - list-style-type: disc !important; -} - -.message-body pre { - margin-bottom: 1.25em !important; -} - -.message-body code { - white-space: pre-wrap !important; - word-wrap: break-word !important; -} - -.message-body :not(pre) > code { - white-space: normal !important; -} - -@media print { - body { - visibility: hidden; - } - - .chat { - visibility: visible; - position: absolute; - left: 0; - top: 0; - max-width: none; - max-height: none; - width: 100%; - height: fit-content; - display: flex; - flex-direction: column-reverse; - } - - .message { - break-inside: avoid; - } - - .gradio-container { - overflow: visible; - } - - .tab-nav { - display: none !important; - } -} diff --git a/css/main.css b/css/main.css index d37e3f63..5f293921 100644 --- a/css/main.css +++ b/css/main.css @@ -45,13 +45,6 @@ min-height: 0 } -#save_session { - margin-top: 32px; -} - -#accordion { -} - .dark svg { fill: white; } @@ -64,7 +57,7 @@ ol li p, ul li p { display: inline-block; } -#main, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab { +#chat-tab, #default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab { border: 0; } @@ -78,7 +71,6 @@ ol li p, ul li p { } #extensions { - padding: 15px; margin-bottom: 35px; } @@ -108,7 +100,7 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { } .textbox_default textarea { - height: calc(100vh - 380px); + height: calc(100vh - 310px); } .textbox_default_output textarea { @@ -128,6 +120,12 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { color: #efefef !important; } +@media screen and (max-width: 711px) { + .textbox_default textarea { + height: calc(100vh - 275px); + } +} + /* Hide the gradio footer*/ footer { display: none !important; @@ -193,3 +191,141 @@ button { .dark .pretty_scrollbar::-webkit-resizer { background: #374151; } + +/*****************************************************/ +/*************** Chat UI declarations ****************/ +/*****************************************************/ + +.h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx { + height: 66.67vh +} + +.gradio-container { + margin-left: auto !important; + margin-right: auto !important; +} + +.w-screen { + width: unset +} + +div.svelte-362y77>*, div.svelte-362y77>.form>* { + flex-wrap: nowrap +} + +.pending.svelte-1ed2p3z { + opacity: 1; +} + +#gradio-chatbot { + height: 66.67vh; +} + +.wrap.svelte-6roggh.svelte-6roggh { + max-height: 92.5%; +} + +/* This is for the microphone button in the whisper extension */ +.sm.svelte-1ipelgc { + width: 100%; +} + +#chat-tab button, #notebook-tab button, #default-tab button { + min-width: 0 !important; +} + +#chat-tab > :first-child, #extensions { + max-width: 800px; + margin-left: auto; + margin-right: auto; +} + +@media screen and (max-width: 688px) { + #chat-tab { + padding: 0px; + } + + .chat { + height: calc(100vh - 274px) !important; + } +} + +.chat { + margin-left: auto; + margin-right: auto; + max-width: 800px; + height: calc(100vh - 286px); + overflow-y: auto; + padding-right: 20px; + display: flex; + flex-direction: column-reverse; + word-break: break-word; + overflow-wrap: anywhere; + padding-top: 1px; +} + +.chat > .messages { + display: flex; + flex-direction: column; +} + +.message-body li { + margin-top: 0.5em !important; + margin-bottom: 0.5em !important; +} + +.message-body li > p { + display: inline !important; +} + +.message-body ul, .message-body ol { + font-size: 15px !important; +} + +.message-body ul { + list-style-type: disc !important; +} + +.message-body pre { + margin-bottom: 1.25em !important; +} + +.message-body code { + white-space: pre-wrap !important; + word-wrap: break-word !important; +} + +.message-body :not(pre) > code { + white-space: normal !important; +} + +@media print { + body { + visibility: hidden; + } + + .chat { + visibility: visible; + position: absolute; + left: 0; + top: 0; + max-width: none; + max-height: none; + width: 100%; + height: fit-content; + display: flex; + flex-direction: column-reverse; + } + + .message { + break-inside: avoid; + } + + .gradio-container { + overflow: visible; + } + + .tab-nav { + display: none !important; + } +} diff --git a/docs/Extensions.md b/docs/Extensions.md index 4e59e855..53acce59 100644 --- a/docs/Extensions.md +++ b/docs/Extensions.md @@ -39,8 +39,8 @@ The extensions framework is based on special functions and variables that you ca | `def ui()` | Creates custom gradio elements when the UI is launched. | | `def custom_css()` | Returns custom CSS as a string. It is applied whenever the web UI is loaded. | | `def custom_js()` | Same as above but for javascript. | -| `def input_modifier(string, state)` | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. | -| `def output_modifier(string, state)` | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. | +| `def input_modifier(string, state, is_chat=False)` | Modifies the input string before it enters the model. In chat mode, it is applied to the user message. Otherwise, it is applied to the entire prompt. | +| `def output_modifier(string, state, is_chat=False)` | Modifies the output string before it is presented in the UI. In chat mode, it is applied to the bot's reply. Otherwise, it is applied to the entire output. | | `def chat_input_modifier(text, visible_text, state)` | Modifies both the visible and internal inputs in chat mode. Can be used to hijack the chat input with custom content. | | `def bot_prefix_modifier(string, state)` | Applied in chat mode to the prefix for the bot's reply. | | `def state_modifier(state)` | Modifies the dictionary containing the UI input parameters before it is used by the text generation functions. | @@ -163,7 +163,7 @@ def chat_input_modifier(text, visible_text, state): """ return text, visible_text -def input_modifier(string, state): +def input_modifier(string, state, is_chat=False): """ In default/notebook modes, modifies the whole prompt. @@ -196,7 +196,7 @@ def logits_processor_modifier(processor_list, input_ids): processor_list.append(MyLogits()) return processor_list -def output_modifier(string, state): +def output_modifier(string, state, is_chat=False): """ Modifies the LLM output before it gets presented. diff --git a/extensions/api/util.py b/extensions/api/util.py index 7ebfaa32..0db1c46c 100644 --- a/extensions/api/util.py +++ b/extensions/api/util.py @@ -68,8 +68,6 @@ def build_parameters(body, chat=False): name1, name2, _, greeting, context, _ = load_character_memoized(character, str(body.get('your_name', shared.settings['name1'])), shared.settings['name2'], instruct=False) name1_instruct, name2_instruct, _, _, context_instruct, turn_template = load_character_memoized(instruction_template, '', '', instruct=True) generate_params.update({ - 'stop_at_newline': bool(body.get('stop_at_newline', shared.settings['stop_at_newline'])), - 'chat_generation_attempts': int(body.get('chat_generation_attempts', shared.settings['chat_generation_attempts'])), 'mode': str(body.get('mode', 'chat')), 'name1': str(body.get('name1', name1)), 'name2': str(body.get('name2', name2)), diff --git a/extensions/elevenlabs_tts/script.py b/extensions/elevenlabs_tts/script.py index f74e1047..2324d782 100644 --- a/extensions/elevenlabs_tts/script.py +++ b/extensions/elevenlabs_tts/script.py @@ -4,9 +4,9 @@ from pathlib import Path import elevenlabs import gradio as gr -from modules import chat, shared -from modules.utils import gradio +from modules import chat, shared, ui_chat from modules.logging_colors import logger +from modules.utils import gradio params = { 'activate': True, @@ -167,24 +167,23 @@ def ui(): convert_cancel = gr.Button('Cancel', visible=False) convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False) - if shared.is_chat(): - # Convert history with confirmation - convert_arr = [convert_confirm, convert, convert_cancel] - convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr) - convert_confirm.click( - lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then( - remove_tts_from_history, gradio('history'), gradio('history')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + # Convert history with confirmation + convert_arr = [convert_confirm, convert, convert_cancel] + convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr) + convert_confirm.click( + lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then( + remove_tts_from_history, gradio('history'), gradio('history')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) - convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) + convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) - # Toggle message text in history - show_text.change( - lambda x: params.update({"show_text": x}), show_text, None).then( - toggle_text_in_history, gradio('history'), gradio('history')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + # Toggle message text in history + show_text.change( + lambda x: params.update({"show_text": x}), show_text, None).then( + toggle_text_in_history, gradio('history'), gradio('history')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) # Event functions to update the parameters in the backend activate.change(lambda x: params.update({'activate': x}), activate, None) diff --git a/extensions/example/script.py b/extensions/example/script.py index b4db7102..44f0cb3c 100644 --- a/extensions/example/script.py +++ b/extensions/example/script.py @@ -59,7 +59,7 @@ def chat_input_modifier(text, visible_text, state): """ return text, visible_text -def input_modifier(string, state): +def input_modifier(string, state, is_chat=False): """ In default/notebook modes, modifies the whole prompt. @@ -92,7 +92,7 @@ def logits_processor_modifier(processor_list, input_ids): processor_list.append(MyLogits()) return processor_list -def output_modifier(string, state): +def output_modifier(string, state, is_chat=False): """ Modifies the LLM output before it gets presented. diff --git a/extensions/gallery/script.js b/extensions/gallery/script.js new file mode 100644 index 00000000..878401ec --- /dev/null +++ b/extensions/gallery/script.js @@ -0,0 +1,14 @@ +let gallery_element = document.getElementById('gallery-extension'); + +main_parent.addEventListener('click', function(e) { + let chat_visible = (chat_tab.offsetHeight > 0 && chat_tab.offsetWidth > 0); + let notebook_visible = (notebook_tab.offsetHeight > 0 && notebook_tab.offsetWidth > 0); + let default_visible = (default_tab.offsetHeight > 0 && default_tab.offsetWidth > 0); + + // Only show this extension in the Chat tab + if (chat_visible) { + gallery_element.style.display = 'flex'; + } else { + gallery_element.style.display = 'none'; + } +}); diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py index 993ef273..611a11f4 100644 --- a/extensions/gallery/script.py +++ b/extensions/gallery/script.py @@ -82,8 +82,13 @@ def select_character(evt: gr.SelectData): return (evt.value[1]) +def custom_js(): + path_to_js = Path(__file__).parent.resolve() / 'script.js' + return open(path_to_js, 'r').read() + + def ui(): - with gr.Accordion("Character gallery", open=False): + with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'): update = gr.Button("Refresh") gr.HTML(value="") gallery = gr.Dataset(components=[gr.HTML(visible=False)], diff --git a/extensions/send_pictures/script.py b/extensions/send_pictures/script.py index 39c9362a..f8e6c969 100644 --- a/extensions/send_pictures/script.py +++ b/extensions/send_pictures/script.py @@ -5,7 +5,7 @@ import gradio as gr import torch from transformers import BlipForConditionalGeneration, BlipProcessor -from modules import chat, shared +from modules import chat, shared, ui_chat from modules.ui import gather_interface_values from modules.utils import gradio @@ -54,5 +54,5 @@ def ui(): "value": generate_chat_picture(picture, name1, name2) }), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None).then( gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( + chat.generate_chat_reply_wrapper, gradio(ui_chat.inputs), gradio('display', 'history'), show_progress=False).then( lambda: None, None, picture_select, show_progress=False) diff --git a/extensions/silero_tts/script.py b/extensions/silero_tts/script.py index b96a47fd..707d919b 100644 --- a/extensions/silero_tts/script.py +++ b/extensions/silero_tts/script.py @@ -6,7 +6,7 @@ import gradio as gr import torch from extensions.silero_tts import tts_preprocessor -from modules import chat, shared +from modules import chat, shared, ui_chat from modules.utils import gradio torch._C._jit_set_profiling_mode(False) @@ -194,24 +194,23 @@ def ui(): convert_cancel = gr.Button('Cancel', visible=False) convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False) - if shared.is_chat(): - # Convert history with confirmation - convert_arr = [convert_confirm, convert, convert_cancel] - convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr) - convert_confirm.click( - lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then( - remove_tts_from_history, gradio('history'), gradio('history')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + # Convert history with confirmation + convert_arr = [convert_confirm, convert, convert_cancel] + convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr) + convert_confirm.click( + lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then( + remove_tts_from_history, gradio('history'), gradio('history')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) - convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) + convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr) - # Toggle message text in history - show_text.change( - lambda x: params.update({"show_text": x}), show_text, None).then( - toggle_text_in_history, gradio('history'), gradio('history')).then( - chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + # Toggle message text in history + show_text.change( + lambda x: params.update({"show_text": x}), show_text, None).then( + toggle_text_in_history, gradio('history'), gradio('history')).then( + chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) # Event functions to update the parameters in the backend activate.change(lambda x: params.update({"activate": x}), activate, None) diff --git a/extensions/superbooga/script.py b/extensions/superbooga/script.py index 475cf1e0..06fe8ad3 100644 --- a/extensions/superbooga/script.py +++ b/extensions/superbooga/script.py @@ -4,7 +4,7 @@ import textwrap import gradio as gr from bs4 import BeautifulSoup -from modules import chat, shared +from modules import chat from modules.logging_colors import logger from .chromadb import add_chunks_to_collector, make_collector @@ -143,8 +143,8 @@ def remove_special_tokens(string): return re.sub(pattern, '', string) -def input_modifier(string): - if shared.is_chat(): +def input_modifier(string, state, is_chat=False): + if is_chat: return string # Find the user input diff --git a/js/main.js b/js/main.js index 7a2368fe..40197869 100644 --- a/js/main.js +++ b/js/main.js @@ -1,17 +1,30 @@ -document.getElementById("main").parentNode.childNodes[0].classList.add("header_bar"); -document.getElementById("main").parentNode.style = "padding: 0; margin: 0"; -document.getElementById("main").parentNode.parentNode.parentNode.style = "padding: 0"; +let chat_tab = document.getElementById('chat-tab'); +let notebook_tab = document.getElementById('notebook-tab'); +let default_tab = document.getElementById('default-tab'); -// Get references to the elements -let main = document.getElementById('main'); -let main_parent = main.parentNode; +let main_parent = chat_tab.parentNode; let extensions = document.getElementById('extensions'); -// Add an event listener to the main element +main_parent.childNodes[0].classList.add("header_bar"); +main_parent.style = "padding: 0; margin: 0"; +main_parent.parentNode.parentNode.style = "padding: 0"; + +// Add an event listener to the generation tabs main_parent.addEventListener('click', function(e) { - // Check if the main element is visible - if (main.offsetHeight > 0 && main.offsetWidth > 0) { + let chat_visible = (chat_tab.offsetHeight > 0 && chat_tab.offsetWidth > 0); + let notebook_visible = (notebook_tab.offsetHeight > 0 && notebook_tab.offsetWidth > 0); + let default_visible = (default_tab.offsetHeight > 0 && default_tab.offsetWidth > 0); + + // Check if one of the generation tabs is visible + if (chat_visible || notebook_visible || default_visible) { extensions.style.display = 'flex'; + if (chat_visible) { + extensions.style.maxWidth = "800px"; + extensions.style.padding = "0px"; + } else { + extensions.style.maxWidth = "none"; + extensions.style.padding = "15px"; + } } else { extensions.style.display = 'none'; } diff --git a/js/save_files.js b/js/save_files.js index 7dfbcfda..d5b22c4b 100644 --- a/js/save_files.js +++ b/js/save_files.js @@ -32,9 +32,9 @@ function saveHistory(history, character, mode) { saveFile(history, path); } -function saveSession(session, mode) { +function saveSession(session) { let path = null; - path = `session_${mode}_${getCurrentTimestamp()}.json`; + path = `session_${getCurrentTimestamp()}.json`; saveFile(session, path); } diff --git a/modules/chat.py b/modules/chat.py index c2a05d3f..e2bba18f 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -175,9 +175,6 @@ def get_stopping_strings(state): f"\n{state['name2']}:" ] - if state['stop_at_newline']: - stopping_strings.append("\n") - return stopping_strings @@ -201,7 +198,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if not any((regenerate, _continue)): visible_text = text text, visible_text = apply_extensions('chat_input', text, visible_text, state) - text = apply_extensions('input', text, state) + text = apply_extensions('input', text, state, is_chat=True) # *Is typing...* if loading_message: @@ -230,45 +227,37 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess prompt = generate_chat_prompt(text, state, **kwargs) # Generate - cumulative_reply = '' - for i in range(state['chat_generation_attempts']): - reply = None - for j, reply in enumerate(generate_reply(prompt + cumulative_reply, state, stopping_strings=stopping_strings, is_chat=True)): - reply = cumulative_reply + reply + reply = None + for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True)): - # Extract the reply - visible_reply = re.sub("(||{{user}})", state['name1'], reply) + # Extract the reply + visible_reply = re.sub("(||{{user}})", state['name1'], reply) - # We need this global variable to handle the Stop event, - # otherwise gradio gets confused - if shared.stop_everything: - output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state) + # We need this global variable to handle the Stop event, + # otherwise gradio gets confused + if shared.stop_everything: + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) + yield output + return + + if just_started: + just_started = False + if not _continue: + output['internal'].append(['', '']) + output['visible'].append(['', '']) + + if _continue: + output['internal'][-1] = [text, last_reply[0] + reply] + output['visible'][-1] = [visible_text, last_reply[1] + visible_reply] + if is_stream: + yield output + elif not (j == 0 and visible_reply.strip() == ''): + output['internal'][-1] = [text, reply.lstrip(' ')] + output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')] + if is_stream: yield output - return - if just_started: - just_started = False - if not _continue: - output['internal'].append(['', '']) - output['visible'].append(['', '']) - - if _continue: - output['internal'][-1] = [text, last_reply[0] + reply] - output['visible'][-1] = [visible_text, last_reply[1] + visible_reply] - if is_stream: - yield output - elif not (j == 0 and visible_reply.strip() == ''): - output['internal'][-1] = [text, reply.lstrip(' ')] - output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')] - if is_stream: - yield output - - if reply in [None, cumulative_reply]: - break - else: - cumulative_reply = reply - - output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state) + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output @@ -278,27 +267,15 @@ def impersonate_wrapper(text, start_with, state): yield '' return - # Defining some variables - cumulative_reply = '' prompt = generate_chat_prompt('', state, impersonate=True) stopping_strings = get_stopping_strings(state) yield text + '...' - cumulative_reply = text - for i in range(state['chat_generation_attempts']): - reply = None - for reply in generate_reply(prompt + cumulative_reply, state, stopping_strings=stopping_strings, is_chat=True): - reply = cumulative_reply + reply - yield reply.lstrip(' ') - if shared.stop_everything: - return - - if reply in [None, cumulative_reply]: - break - else: - cumulative_reply = reply - - yield cumulative_reply.lstrip(' ') + reply = None + for reply in generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True): + yield reply.lstrip(' ') + if shared.stop_everything: + return def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_message=True): @@ -352,7 +329,7 @@ def replace_last_reply(text, state): return history elif len(history['visible']) > 0: history['visible'][-1][1] = text - history['internal'][-1][1] = apply_extensions('input', text, state) + history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) return history @@ -360,7 +337,7 @@ def replace_last_reply(text, state): def send_dummy_message(text, state): history = state['history'] history['visible'].append([text, '']) - history['internal'].append([apply_extensions('input', text, state), '']) + history['internal'].append([apply_extensions('input', text, state, is_chat=True), '']) return history @@ -371,7 +348,7 @@ def send_dummy_reply(text, state): history['internal'].append(['', '']) history['visible'][-1][1] = text - history['internal'][-1][1] = apply_extensions('input', text, state) + history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) return history @@ -385,7 +362,7 @@ def clear_chat_log(state): if mode != 'instruct': if greeting != '': history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]] - history['visible'] += [['', apply_extensions('output', greeting, state)]] + history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]] return history @@ -452,7 +429,7 @@ def load_persistent_history(state): history = {'internal': [], 'visible': []} if greeting != "": history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]] - history['visible'] += [['', apply_extensions('output', greeting, state)]] + history['visible'] += [['', apply_extensions('output', greeting, state, is_chat=True)]] return history diff --git a/modules/extensions.py b/modules/extensions.py index 76b6be8b..796ff072 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -53,14 +53,32 @@ def iterator(): # Extension functions that map string -> string -def _apply_string_extensions(function_name, text, state): +def _apply_string_extensions(function_name, text, state, is_chat=False): for extension, _ in iterator(): if hasattr(extension, function_name): func = getattr(extension, function_name) - if len(signature(func).parameters) == 2: - text = func(text, state) + + # Handle old extensions without the 'state' arg or + # the 'is_chat' kwarg + count = 0 + has_chat = False + for k in signature(func).parameters: + if k == 'is_chat': + has_chat = True + else: + count += 1 + + if count == 2: + args = [text, state] else: - text = func(text) + args = [text] + + if has_chat: + kwargs = {'is_chat': is_chat} + else: + kwargs = {} + + text = func(*args, **kwargs) return text @@ -169,9 +187,7 @@ def create_extensions_block(): if len(to_display) > 0: with gr.Column(elem_id="extensions"): for row in to_display: - extension, name = row - display_name = getattr(extension, 'params', {}).get('display_name', name) - gr.Markdown(f"\n### {display_name}") + extension, _ = row extension.ui() diff --git a/modules/shared.py b/modules/shared.py index cb6f0ae1..89b5f0cb 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -19,8 +19,6 @@ lora_names = [] stop_everything = False generation_lock = None processing_message = '*Is typing...*' -input_params = [] -reload_inputs = [] # UI variables gradio = {} @@ -45,7 +43,6 @@ settings = { 'greeting': '', 'turn_template': '', 'custom_stopping_strings': '', - 'stop_at_newline': False, 'add_bos_token': True, 'ban_eos_token': False, 'skip_special_tokens': True, @@ -57,11 +54,7 @@ settings = { 'chat_style': 'TheEncrypted777', 'instruction_template': 'None', 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', - 'chat_generation_attempts': 1, - 'chat_generation_attempts_min': 1, - 'chat_generation_attempts_max': 10, - 'default_extensions': [], - 'chat_default_extensions': ['gallery'], + 'default_extensions': ['gallery'], 'preset': 'simple-1', 'prompt': 'QA', } @@ -81,8 +74,8 @@ def str2bool(v): parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54)) # Basic settings -parser.add_argument('--notebook', action='store_true', help='Launch the web UI in notebook mode, where the output is written to the same text box as the input.') -parser.add_argument('--chat', action='store_true', help='Launch the web UI in chat mode with a style similar to the Character.AI website.') +parser.add_argument('--notebook', action='store_true', help='DEPRECATED') +parser.add_argument('--chat', action='store_true', help='DEPRECATED') parser.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental.') parser.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.') parser.add_argument('--model', type=str, help='Name of the model to load by default.') @@ -187,6 +180,11 @@ parser.add_argument('--multimodal-pipeline', type=str, default=None, help='The m args = parser.parse_args() args_defaults = parser.parse_args([]) +# Deprecation warnings +for k in ['chat', 'notebook']: + if getattr(args, k): + logger.warning(f'--{k} has been deprecated and will be removed soon. Please remove that flag.') + # Security warnings if args.trust_remote_code: logger.warning("trust_remote_code is enabled. This is dangerous.") @@ -227,16 +225,7 @@ def add_extension(name): def is_chat(): - return args.chat - - -def get_mode(): - if args.chat: - return 'chat' - elif args.notebook: - return 'notebook' - else: - return 'default' + return True args.loader = fix_loader_name(args.loader) diff --git a/modules/ui.py b/modules/ui.py index b58b7dd6..e7817f73 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -8,10 +8,8 @@ from modules import shared with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f: css = f.read() -with open(Path(__file__).resolve().parent / '../css/chat.css', 'r') as f: - chat_css = f.read() with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f: - main_js = f.read() + js = f.read() with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f: save_files_js = f.read() @@ -116,31 +114,35 @@ def list_interface_input_elements(): 'top_a', ] - if shared.args.chat: - elements += [ - 'character_menu', - 'history', - 'name1', - 'name2', - 'greeting', - 'context', - 'chat_generation_attempts', - 'stop_at_newline', - 'mode', - 'instruction_template', - 'name1_instruct', - 'name2_instruct', - 'context_instruct', - 'turn_template', - 'chat_style', - 'chat-instruct_command', - ] - else: - elements.append('textbox') - if not shared.args.notebook: - elements.append('output_textbox') + # Chat elements + elements += [ + 'textbox', + 'character_menu', + 'history', + 'name1', + 'name2', + 'greeting', + 'context', + 'mode', + 'instruction_template', + 'name1_instruct', + 'name2_instruct', + 'context_instruct', + 'turn_template', + 'chat_style', + 'chat-instruct_command', + ] + # Notebook/default elements + elements += [ + 'textbox-notebook', + 'textbox-default', + 'output_textbox' + ] + + # Model elements elements += list_model_elements() + return elements diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 1d73adf7..76e70ed0 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -10,14 +10,17 @@ from modules.html_generator import chat_html_wrapper from modules.text_generation import stop_everything_event from modules.utils import gradio +inputs = ('Chat input', 'start_with', 'interface_state') +reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style') +clear_arr = ('Clear history-confirm', 'Clear history', 'Clear history-cancel') + def create_ui(): - shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) shared.gradio['Chat input'] = gr.State() shared.gradio['dummy'] = gr.State() shared.gradio['history'] = gr.State({'internal': [], 'visible': []}) - with gr.Tab('Text generation', elem_id='main'): + with gr.Tab('Chat', elem_id='chat-tab'): shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, shared.settings['name1'], shared.settings['name2'], 'chat', 'cai-chat')) shared.gradio['textbox'] = gr.Textbox(label='Input') with gr.Row(): @@ -45,82 +48,80 @@ def create_ui(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with']) with gr.Row(): - shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'instruct', 'chat-instruct'] else 'chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under "Chat settings" must match the current model.') + shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'instruct', 'chat-instruct'] else 'chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.') shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') - with gr.Tab('Chat settings', elem_id='chat-settings'): - with gr.Tab('Character'): - with gr.Row(): - with gr.Column(scale=8): - with gr.Row(): - shared.gradio['character_menu'] = gr.Dropdown(value='None', choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button') - shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button') - shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') - shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name') - shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar']) - shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar']) - - with gr.Column(scale=1): - shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil') - shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None) - - with gr.Tab('Instruction template'): - with gr.Row(): +def create_chat_settings_ui(): + with gr.Tab('Character'): + with gr.Row(): + with gr.Column(scale=8): with gr.Row(): - shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Instruction template', value='None', info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button') - shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button') + shared.gradio['character_menu'] = gr.Dropdown(value='None', choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button') + shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button') - shared.gradio['name1_instruct'] = gr.Textbox(value='', lines=2, label='User string') - shared.gradio['name2_instruct'] = gr.Textbox(value='', lines=1, label='Bot string') - shared.gradio['context_instruct'] = gr.Textbox(value='', lines=4, label='Context') - shared.gradio['turn_template'] = gr.Textbox(value=shared.settings['turn_template'], lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') + shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') + shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name') + shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar']) + shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar']) + + with gr.Column(scale=1): + shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil') + shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None) + + with gr.Tab('Instruction template'): + with gr.Row(): with gr.Row(): - shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar']) + shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Instruction template', value='None', info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button') + shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button') - with gr.Tab('Chat history'): + shared.gradio['name1_instruct'] = gr.Textbox(value='', lines=2, label='User string') + shared.gradio['name2_instruct'] = gr.Textbox(value='', lines=1, label='Bot string') + shared.gradio['context_instruct'] = gr.Textbox(value='', lines=4, label='Context') + shared.gradio['turn_template'] = gr.Textbox(value=shared.settings['turn_template'], lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') + with gr.Row(): + shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar']) + + with gr.Tab('Chat history'): + with gr.Row(): + with gr.Column(): + shared.gradio['save_chat_history'] = gr.Button(value='Save history') + + with gr.Column(): + shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON') + + with gr.Tab('Upload character'): + with gr.Tab('YAML or JSON'): + with gr.Row(): + shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File') + shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)') + + shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False) + + with gr.Tab('TavernAI PNG'): with gr.Row(): with gr.Column(): - shared.gradio['save_chat_history'] = gr.Button(value='Save history') - + shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern') + shared.gradio['tavern_json'] = gr.State() with gr.Column(): - shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON') + shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) + shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) - with gr.Tab('Upload character'): - with gr.Tab('YAML or JSON'): - with gr.Row(): - shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File') - shared.gradio['upload_img_bot'] = gr.Image(type='pil', label='Profile Picture (optional)') - - shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False) - - with gr.Tab('TavernAI PNG'): - with gr.Row(): - with gr.Column(): - shared.gradio['upload_img_tavern'] = gr.Image(type='pil', label='TavernAI PNG File', elem_id='upload_img_tavern') - shared.gradio['tavern_json'] = gr.State() - with gr.Column(): - shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False) - shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=4, max_lines=4, label='Description', interactive=False) - - shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) + shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False) def create_event_handlers(): gen_events = [] - - shared.input_params = gradio('Chat input', 'start_with', 'interface_state') - clear_arr = gradio('Clear history-confirm', 'Clear history', 'Clear history-cancel') - shared.reload_inputs = gradio('history', 'name1', 'name2', 'mode', 'chat_style') + shared.input_params = gradio(inputs) # Obsolete, kept for compatibility with old extensions gen_events.append(shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( - chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( + chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') @@ -129,7 +130,7 @@ def create_event_handlers(): gen_events.append(shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( - chat.generate_chat_reply_wrapper, shared.input_params, gradio('display', 'history'), show_progress=False).then( + chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') @@ -137,7 +138,7 @@ def create_event_handlers(): gen_events.append(shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - partial(chat.generate_chat_reply_wrapper, regenerate=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( + partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') @@ -145,7 +146,7 @@ def create_event_handlers(): gen_events.append(shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - partial(chat.generate_chat_reply_wrapper, _continue=True), shared.input_params, gradio('display', 'history'), show_progress=False).then( + partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') @@ -154,7 +155,7 @@ def create_event_handlers(): gen_events.append(shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( - chat.impersonate_wrapper, shared.input_params, gradio('textbox'), show_progress=False).then( + chat.impersonate_wrapper, gradio(inputs), gradio('textbox'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') ) @@ -163,59 +164,59 @@ def create_event_handlers(): ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.replace_last_reply, gradio('textbox', 'interface_state'), gradio('history')).then( lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.redraw_html, gradio(reload_arr), gradio('display')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) shared.gradio['Send dummy message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.send_dummy_message, gradio('textbox', 'interface_state'), gradio('history')).then( lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.redraw_html, gradio(reload_arr), gradio('display')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) shared.gradio['Send dummy reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.send_dummy_reply, gradio('textbox', 'interface_state'), gradio('history')).then( lambda: '', None, gradio('textbox'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.redraw_html, gradio(reload_arr), gradio('display')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) - shared.gradio['Clear history'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, clear_arr) - shared.gradio['Clear history-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr) + shared.gradio['Clear history'].click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, gradio(clear_arr)) + shared.gradio['Clear history-cancel'].click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)) shared.gradio['Clear history-confirm'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, clear_arr).then( + lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, gradio(clear_arr)).then( chat.clear_chat_log, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.redraw_html, gradio(reload_arr), gradio('display')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) shared.gradio['Remove last'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.remove_last_message, gradio('history'), gradio('textbox', 'history'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.redraw_html, gradio(reload_arr), gradio('display')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None) shared.gradio['character_menu'].change( partial(chat.load_character, instruct=False), gradio('character_menu', 'name1', 'name2'), gradio('name1', 'name2', 'character_picture', 'greeting', 'context', 'dummy')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.load_persistent_history, gradio('interface_state'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + chat.redraw_html, gradio(reload_arr), gradio('display')) shared.gradio['Stop'].click( stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + chat.redraw_html, gradio(reload_arr), gradio('display')) shared.gradio['mode'].change( lambda x: gr.update(visible=x != 'instruct'), gradio('mode'), gradio('chat_style'), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')) + chat.redraw_html, gradio(reload_arr), gradio('display')) - shared.gradio['chat_style'].change(chat.redraw_html, shared.reload_inputs, gradio('display')) + shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display')) shared.gradio['instruction_template'].change( partial(chat.load_character, instruct=True), gradio('instruction_template', 'name1_instruct', 'name2_instruct'), gradio('name1_instruct', 'name2_instruct', 'dummy', 'dummy', 'context_instruct', 'turn_template')) shared.gradio['load_chat_history'].upload( chat.load_history, gradio('load_chat_history', 'history'), gradio('history')).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( + chat.redraw_html, gradio(reload_arr), gradio('display')).then( None, None, None, _js='() => {alert("The history has been loaded.")}') shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) @@ -256,4 +257,4 @@ def create_event_handlers(): shared.gradio['upload_img_tavern'].clear(lambda: (None, None, None, gr.update(interactive=False)), None, gradio('tavern_name', 'tavern_desc', 'tavern_json', 'Submit tavern character'), show_progress=False) shared.gradio['your_picture'].change( chat.upload_your_profile_picture, gradio('your_picture'), None).then( - partial(chat.redraw_html, reset_cache=True), shared.reload_inputs, gradio('display')) + partial(chat.redraw_html, reset_cache=True), gradio(reload_arr), gradio('display')) diff --git a/modules/ui_default.py b/modules/ui_default.py index b879e1ef..d26863bc 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -8,87 +8,85 @@ from modules.text_generation import ( ) from modules.utils import gradio +inputs = ('textbox-default', 'interface_state') +outputs = ('output_textbox', 'html-default') + def create_ui(): default_text = load_prompt(shared.settings['prompt']) - shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) - shared.gradio['last_input'] = gr.State('') - - with gr.Tab('Text generation', elem_id='main'): + with gr.Tab('Default', elem_id='default-tab'): + shared.gradio['last_input-default'] = gr.State('') with gr.Row(): with gr.Column(): - shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + shared.gradio['textbox-default'] = gr.Textbox(value=default_text, elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') with gr.Row(): - shared.gradio['Generate'] = gr.Button('Generate', variant='primary') - shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') - shared.gradio['Continue'] = gr.Button('Continue') - shared.gradio['count_tokens'] = gr.Button('Count tokens') + shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary') + shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop') + shared.gradio['Continue-default'] = gr.Button('Continue') + shared.gradio['count_tokens-default'] = gr.Button('Count tokens') with gr.Row(): - shared.gradio['prompt_menu'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['prompt_menu'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button') - shared.gradio['save_prompt'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_prompt'] = gr.Button('🗑️', elem_classes='refresh-button') + shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button') + shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button') - shared.gradio['status'] = gr.Markdown('') + shared.gradio['status-default'] = gr.Markdown('') with gr.Column(): with gr.Tab('Raw'): shared.gradio['output_textbox'] = gr.Textbox(lines=27, label='Output', elem_classes=['textbox_default_output', 'add_scrollbar']) with gr.Tab('Markdown'): - shared.gradio['markdown_render'] = gr.Button('Render') - shared.gradio['markdown'] = gr.Markdown() + shared.gradio['markdown_render-default'] = gr.Button('Render') + shared.gradio['markdown-default'] = gr.Markdown() with gr.Tab('HTML'): - shared.gradio['html'] = gr.HTML() + shared.gradio['html-default'] = gr.HTML() def create_event_handlers(): gen_events = [] - shared.input_params = gradio('textbox', 'interface_state') - output_params = gradio('output_textbox', 'html') - gen_events.append(shared.gradio['Generate'].click( - lambda x: x, gradio('textbox'), gradio('last_input')).then( + gen_events.append(shared.gradio['Generate-default'].click( + lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) - gen_events.append(shared.gradio['textbox'].submit( - lambda x: x, gradio('textbox'), gradio('last_input')).then( + gen_events.append(shared.gradio['textbox-default'].submit( + lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) - shared.gradio['markdown_render'].click(lambda x: x, gradio('output_textbox'), gradio('markdown'), queue=False) - gen_events.append(shared.gradio['Continue'].click( + shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) + gen_events.append(shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=False).then( + generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[1]; element.scrollTop = element.scrollHeight}") ) - shared.gradio['Stop'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) - shared.gradio['prompt_menu'].change(load_prompt, gradio('prompt_menu'), gradio('textbox'), show_progress=False) - shared.gradio['save_prompt'].click( - lambda x: x, gradio('textbox'), gradio('save_contents')).then( + shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) + shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) + shared.gradio['save_prompt-default'].click( + lambda x: x, gradio('textbox-default'), gradio('save_contents')).then( lambda: 'prompts/', None, gradio('save_root')).then( lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( lambda: gr.update(visible=True), None, gradio('file_saver')) - shared.gradio['delete_prompt'].click( + shared.gradio['delete_prompt-default'].click( lambda: 'prompts/', None, gradio('delete_root')).then( - lambda x: x + '.txt', gradio('prompt_menu'), gradio('delete_filename')).then( + lambda x: x + '.txt', gradio('prompt_menu-default'), gradio('delete_filename')).then( lambda: gr.update(visible=True), None, gradio('file_deleter')) - shared.gradio['count_tokens'].click(count_tokens, gradio('textbox'), gradio('status'), show_progress=False) + shared.gradio['count_tokens-default'].click(count_tokens, gradio('textbox-default'), gradio('status-default'), show_progress=False) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 952d66c9..98165d67 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -2,7 +2,7 @@ import json import gradio as gr -from modules import chat, presets, shared, ui, utils +from modules import chat, presets, shared, ui, ui_chat, utils from modules.utils import gradio @@ -26,18 +26,17 @@ def create_ui(): shared.gradio['delete_cancel'] = gr.Button('Cancel', elem_classes="small-button") # Character saver/deleter - if shared.is_chat(): - with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']: - shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.') - with gr.Row(): - shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button") - shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") + with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']: + shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.') + with gr.Row(): + shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button") + shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") - with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_deleter']: - gr.Markdown('Confirm the character deletion?') - with gr.Row(): - shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop') - shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") + with gr.Box(visible=False, elem_classes='file-saver') as shared.gradio['character_deleter']: + gr.Markdown('Confirm the character deletion?') + with gr.Row(): + shared.gradio['delete_character_confirm'] = gr.Button('Delete', elem_classes="small-button", variant='stop') + shared.gradio['delete_character_cancel'] = gr.Button('Cancel', elem_classes="small-button") def create_event_handlers(): @@ -51,18 +50,18 @@ def create_event_handlers(): shared.gradio['delete_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_deleter')) shared.gradio['save_cancel'].click(lambda: gr.update(visible=False), None, gradio('file_saver')) - if shared.is_chat(): - shared.gradio['save_character_confirm'].click( - chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then( - lambda: gr.update(visible=False), None, gradio('character_saver')) - shared.gradio['delete_character_confirm'].click( - chat.delete_character, gradio('character_menu'), None).then( - lambda: gr.update(visible=False), None, gradio('character_deleter')).then( - lambda: gr.update(choices=utils.get_available_characters()), None, gradio('character_menu')) + shared.gradio['save_character_confirm'].click( + chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then( + lambda: gr.update(visible=False), None, gradio('character_saver')) - shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver')) - shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter')) + shared.gradio['delete_character_confirm'].click( + chat.delete_character, gradio('character_menu'), None).then( + lambda: gr.update(visible=False), None, gradio('character_deleter')).then( + lambda: gr.update(choices=utils.get_available_characters()), None, gradio('character_menu')) + + shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver')) + shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter')) shared.gradio['save_preset'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -80,28 +79,21 @@ def create_event_handlers(): shared.gradio['save_session'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('temporary_text')).then( - None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents, \"{shared.get_mode()}\")}}") + None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents)}}") - if shared.is_chat(): - shared.gradio['load_session'].upload( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - chat.redraw_html, shared.reload_inputs, gradio('display')).then( - None, None, None, _js='() => {alert("The session has been loaded.")}') - else: - shared.gradio['load_session'].upload( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( - ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( - None, None, None, _js='() => {alert("The session has been loaded.")}') + shared.gradio['load_session'].upload( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + load_session, gradio('load_session', 'interface_state'), gradio('interface_state')).then( + ui.apply_interface_values, gradio('interface_state'), gradio(ui.list_interface_input_elements()), show_progress=False).then( + chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')).then( + None, None, None, _js='() => {alert("The session has been loaded.")}') def load_session(file, state): decoded_file = file if type(file) == str else file.decode('utf-8') data = json.loads(decoded_file) - if shared.is_chat() and 'character_menu' in data and state.get('character_menu') != data.get('character_menu'): + if 'character_menu' in data and state.get('character_menu') != data.get('character_menu'): shared.session_is_loading = True state.update(data) diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 9e8b3af6..7d6648d2 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -8,90 +8,85 @@ from modules.text_generation import ( ) from modules.utils import gradio +inputs = ('textbox-notebook', 'interface_state') +outputs = ('textbox-notebook', 'html-notebook') + def create_ui(): default_text = load_prompt(shared.settings['prompt']) - shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) - shared.gradio['last_input'] = gr.State('') - with gr.Tab('Text generation', elem_id='main'): + with gr.Tab('Notebook', elem_id='notebook-tab'): + shared.gradio['last_input-notebook'] = gr.State('') with gr.Row(): with gr.Column(scale=4): with gr.Tab('Raw'): - shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_classes=['textbox', 'add_scrollbar'], lines=27) + shared.gradio['textbox-notebook'] = gr.Textbox(value=default_text, elem_classes=['textbox', 'add_scrollbar'], lines=27) with gr.Tab('Markdown'): - shared.gradio['markdown_render'] = gr.Button('Render') - shared.gradio['markdown'] = gr.Markdown() + shared.gradio['markdown_render-notebook'] = gr.Button('Render') + shared.gradio['markdown-notebook'] = gr.Markdown() with gr.Tab('HTML'): - shared.gradio['html'] = gr.HTML() + shared.gradio['html-notebook'] = gr.HTML() with gr.Row(): - shared.gradio['Generate'] = gr.Button('Generate', variant='primary', elem_classes='small-button') - shared.gradio['Stop'] = gr.Button('Stop', elem_classes='small-button', elem_id='stop') + shared.gradio['Generate-notebook'] = gr.Button('Generate', variant='primary', elem_classes='small-button') + shared.gradio['Stop-notebook'] = gr.Button('Stop', elem_classes='small-button', elem_id='stop') shared.gradio['Undo'] = gr.Button('Undo', elem_classes='small-button') - shared.gradio['Regenerate'] = gr.Button('Regenerate', elem_classes='small-button') + shared.gradio['Regenerate-notebook'] = gr.Button('Regenerate', elem_classes='small-button') with gr.Column(scale=1): gr.HTML('
    ') - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) with gr.Row(): - shared.gradio['prompt_menu'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['prompt_menu'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small']) - shared.gradio['save_prompt'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small']) - shared.gradio['delete_prompt'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small']) + shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small']) + shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small']) + shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small']) - shared.gradio['count_tokens'] = gr.Button('Count tokens') - shared.gradio['status'] = gr.Markdown('') + shared.gradio['count_tokens-notebook'] = gr.Button('Count tokens') + shared.gradio['status-notebook'] = gr.Markdown('') def create_event_handlers(): gen_events = [] - shared.input_params = gradio('textbox', 'interface_state') - output_params = gradio('textbox', 'html') - - gen_events.append(shared.gradio['Generate'].click( - lambda x: x, gradio('textbox'), gradio('last_input')).then( + gen_events.append(shared.gradio['Generate-notebook'].click( + lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) - gen_events.append(shared.gradio['textbox'].submit( - lambda x: x, gradio('textbox'), gradio('last_input')).then( + gen_events.append(shared.gradio['textbox-notebook'].submit( + lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) - shared.gradio['Undo'].click(lambda x: x, gradio('last_input'), gradio('textbox'), show_progress=False) - shared.gradio['markdown_render'].click(lambda x: x, gradio('textbox'), gradio('markdown'), queue=False) - gen_events.append(shared.gradio['Regenerate'].click( - lambda x: x, gradio('last_input'), gradio('textbox'), show_progress=False).then( + shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) + shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) + gen_events.append(shared.gradio['Regenerate-notebook'].click( + lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - generate_reply_wrapper, shared.input_params, output_params, show_progress=False).then( + generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") ) - shared.gradio['Stop'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) - shared.gradio['prompt_menu'].change(load_prompt, gradio('prompt_menu'), gradio('textbox'), show_progress=False) - shared.gradio['save_prompt'].click( - lambda x: x, gradio('textbox'), gradio('save_contents')).then( + shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) + shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) + shared.gradio['save_prompt-notebook'].click( + lambda x: x, gradio('textbox-notebook'), gradio('save_contents')).then( lambda: 'prompts/', None, gradio('save_root')).then( lambda: utils.current_time() + '.txt', None, gradio('save_filename')).then( lambda: gr.update(visible=True), None, gradio('file_saver')) - shared.gradio['delete_prompt'].click( + shared.gradio['delete_prompt-notebook'].click( lambda: 'prompts/', None, gradio('delete_root')).then( - lambda x: x + '.txt', gradio('prompt_menu'), gradio('delete_filename')).then( + lambda x: x + '.txt', gradio('prompt_menu-notebook'), gradio('delete_filename')).then( lambda: gr.update(visible=True), None, gradio('file_deleter')) - shared.gradio['count_tokens'].click(count_tokens, gradio('textbox'), gradio('status'), show_progress=False) + shared.gradio['count_tokens-notebook'].click(count_tokens, gradio('textbox-notebook'), gradio('status-notebook'), show_progress=False) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 4b9fb918..2f0c2efd 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -1,143 +1,131 @@ import gradio as gr -from modules import loaders, presets, shared, ui, utils +from modules import loaders, presets, shared, ui, ui_chat, utils from modules.utils import gradio def create_ui(default_preset): generate_params = presets.load_preset(default_preset) with gr.Tab("Parameters", elem_id="parameters"): - with gr.Row(): - with gr.Column(): - with gr.Row(): - shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Generation parameters preset', elem_classes='slim-dropdown') - ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button') - shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button') - shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button') - - with gr.Column(): - shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown') - - with gr.Row(): - with gr.Column(): - with gr.Box(): + with gr.Tab("Generation"): + with gr.Row(): + with gr.Column(): with gr.Row(): - with gr.Column(): - shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature') - shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p') - shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k') - shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') - shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff') - shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff') - shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs') - shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a') + shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown') + ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button') + shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button') + shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button') - with gr.Column(): - shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty') - shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range') - shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty') - shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') - shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length') - shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') - shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') + with gr.Column(): + shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()), value="All", elem_classes='slim-dropdown') - with gr.Accordion("Learn more", open=False): - gr.Markdown(""" + with gr.Row(): + with gr.Column(): + with gr.Box(): + with gr.Row(): + with gr.Column(): + shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) + shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature') + shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p') + shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k') + shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p') + shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff') + shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff') + shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs') + shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a') - For a technical description of the parameters, the [transformers documentation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig) is a good reference. + with gr.Column(): + shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty') + shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range') + shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty') + shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size') + shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length') + shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') + shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') - The best presets, according to the [Preset Arena](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md) experiment, are: + with gr.Accordion("Learn more", open=False): + gr.Markdown(""" - * Instruction following: - 1) Divine Intellect - 2) Big O - 3) simple-1 - 4) Space Alien - 5) StarChat - 6) Titanic - 7) tfs-with-top-a - 8) Asterism - 9) Contrastive Search + For a technical description of the parameters, the [transformers documentation](https://huggingface.co/docs/transformers/main_classes/text_generation#transformers.GenerationConfig) is a good reference. - * Chat: - 1) Midnight Enigma - 2) Yara - 3) Shortwave + The best presets, according to the [Preset Arena](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md) experiment, are: - ### Temperature - Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness. - ### top_p - If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results. - ### top_k - Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results. - ### typical_p - If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text. - ### epsilon_cutoff - In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0. - ### eta_cutoff - In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0. - ### repetition_penalty - Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition. - ### repetition_penalty_range - The number of most recent tokens to consider for repetition penalty. 0 makes all tokens be used. - ### encoder_repetition_penalty - Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge. - ### no_repeat_ngram_size - If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases. - ### min_length - Minimum generation length in tokens. - ### penalty_alpha - Contrastive Search is enabled by setting this to greater than zero and unchecking "do_sample". It should be used with a low value of top_k, for instance, top_k = 4. + * Instruction following: + 1) Divine Intellect + 2) Big O + 3) simple-1 + 4) Space Alien + 5) StarChat + 6) Titanic + 7) tfs-with-top-a + 8) Asterism + 9) Contrastive Search - """, elem_classes="markdown") + * Chat: + 1) Midnight Enigma + 2) Yara + 3) Shortwave - with gr.Column(): - create_chat_settings_menus() - with gr.Box(): - with gr.Row(): - with gr.Column(): - shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') - shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt') - shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') - shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') - shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') + ### Temperature + Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness. + ### top_p + If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results. + ### top_k + Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results. + ### typical_p + If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text. + ### epsilon_cutoff + In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0. + ### eta_cutoff + In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0. + ### repetition_penalty + Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition. + ### repetition_penalty_range + The number of most recent tokens to consider for repetition penalty. 0 makes all tokens be used. + ### encoder_repetition_penalty + Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge. + ### no_repeat_ngram_size + If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases. + ### min_length + Minimum generation length in tokens. + ### penalty_alpha + Contrastive Search is enabled by setting this to greater than zero and unchecking "do_sample". It should be used with a low value of top_k, for instance, top_k = 4. - with gr.Column(): - shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') + """, elem_classes="markdown") - shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams', info='For Beam Search, along with length_penalty and early_stopping.') - shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') - shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') + with gr.Column(): + with gr.Box(): + with gr.Row(): + with gr.Column(): + shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') + shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt') + shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') + shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') + shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') - with gr.Box(): - with gr.Row(): - with gr.Column(): - shared.gradio['truncation_length'] = gr.Slider(value=shared.settings['truncation_length'], minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') - shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas. For instance: "\\nYour Assistant:", "\\nThe assistant:"') - with gr.Column(): - shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') - shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') - shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') + with gr.Column(): + shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.') - shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') - shared.gradio['stream'] = gr.Checkbox(value=not shared.args.no_stream, label='Activate text streaming') + shared.gradio['num_beams'] = gr.Slider(1, 20, step=1, value=generate_params['num_beams'], label='num_beams', info='For Beam Search, along with length_penalty and early_stopping.') + shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') + shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') + + with gr.Box(): + with gr.Row(): + with gr.Column(): + shared.gradio['truncation_length'] = gr.Slider(value=shared.settings['truncation_length'], minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') + shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas. For instance: "\\nYour Assistant:", "\\nThe assistant:"') + with gr.Column(): + shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') + shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') + shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') + + shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') + shared.gradio['stream'] = gr.Checkbox(value=not shared.args.no_stream, label='Activate text streaming') + + ui_chat.create_chat_settings_ui() def create_event_handlers(): shared.gradio['filter_by_loader'].change(loaders.blacklist_samplers, gradio('filter_by_loader'), gradio(loaders.list_all_samplers()), show_progress=False) shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) - - -def create_chat_settings_menus(): - if not shared.is_chat(): - return - - with gr.Box(): - gr.Markdown("Chat parameters") - with gr.Row(): - with gr.Column(): - shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) - shared.gradio['chat_generation_attempts'] = gr.Slider(minimum=shared.settings['chat_generation_attempts_min'], maximum=shared.settings['chat_generation_attempts_max'], value=shared.settings['chat_generation_attempts'], step=1, label='Generation attempts (for longer replies)', info='New generations will be called until either this number is reached or no new content is generated between two iterations.') - - with gr.Column(): - shared.gradio['stop_at_newline'] = gr.Checkbox(value=shared.settings['stop_at_newline'], label='Stop generating at new line character') diff --git a/modules/ui_session.py b/modules/ui_session.py index 7a1a32b0..3d0fdac6 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -7,35 +7,21 @@ from modules.utils import gradio def create_ui(): with gr.Tab("Session", elem_id="session-tab"): - modes = ["default", "notebook", "chat"] - current_mode = "default" - for mode in modes[1:]: - if getattr(shared.args, mode): - current_mode = mode - break - - cmd_list = vars(shared.args) - bool_list = sorted([k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes + ui.list_model_elements()]) - bool_active = [k for k in bool_list if vars(shared.args)[k]] - with gr.Row(): - with gr.Column(): - with gr.Row(): - shared.gradio['interface_modes_menu'] = gr.Dropdown(choices=modes, value=current_mode, label="Mode", elem_classes='slim-dropdown') - shared.gradio['reset_interface'] = gr.Button("Apply and restart", elem_classes="small-button", variant="primary") - shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡', elem_classes="small-button") + shared.gradio['reset_interface'] = gr.Button("Apply and restart") + shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡') with gr.Row(): with gr.Column(): shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table') with gr.Column(): - shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=bool_list, value=bool_active, label="Boolean command-line flags", elem_classes='checkboxgroup-table') + shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table') with gr.Column(): if not shared.args.multi_user: - shared.gradio['save_session'] = gr.Button('Save session', elem_id="save_session") + shared.gradio['save_session'] = gr.Button('Save session') shared.gradio['load_session'] = gr.File(type='binary', file_types=['.json'], label="Upload Session JSON") extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.') @@ -47,25 +33,33 @@ def create_ui(): # Reset interface event shared.gradio['reset_interface'].click( - set_interface_arguments, gradio('interface_modes_menu', 'extensions_menu', 'bool_menu'), None).then( + set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then( lambda: None, None, None, _js='() => {document.body.innerHTML=\'

    Reloading...

    \'; setTimeout(function(){location.reload()},2500); return []}') shared.gradio['toggle_dark_mode'].click(lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') -def set_interface_arguments(interface_mode, extensions, bool_active): - modes = ["default", "notebook", "chat", "cai_chat"] - cmd_list = vars(shared.args) - bool_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes] - +def set_interface_arguments(extensions, bool_active): shared.args.extensions = extensions - for k in modes[1:]: - setattr(shared.args, k, False) - if interface_mode != "default": - setattr(shared.args, interface_mode, True) + + bool_list = get_boolean_arguments() + for k in bool_list: setattr(shared.args, k, False) for k in bool_active: setattr(shared.args, k, True) shared.need_restart = True + + +def get_boolean_arguments(active=False): + exclude = ["default", "notebook", "chat"] + + cmd_list = vars(shared.args) + bool_list = sorted([k for k in cmd_list if type(cmd_list[k]) is bool and k not in exclude + ui.list_model_elements()]) + bool_active = [k for k in bool_list if vars(shared.args)[k]] + + if active: + return bool_active + else: + return bool_list diff --git a/modules/utils.py b/modules/utils.py index 011c71f1..6fa94730 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -9,7 +9,7 @@ from modules.logging_colors import logger # Helper function to get multiple values from shared.gradio def gradio(*keys): - if len(keys) == 1 and type(keys[0]) is list: + if len(keys) == 1 and type(keys[0]) in [list, tuple]: keys = keys[0] return [shared.gradio[k] for k in keys] diff --git a/server.py b/server.py index b477d4c1..e86e3338 100644 --- a/server.py +++ b/server.py @@ -69,28 +69,28 @@ def create_interface(): # Force some events to be triggered on page load shared.persistent_interface_state.update({ 'loader': shared.args.loader or 'Transformers', + 'mode': shared.settings['mode'], + 'character_menu': shared.args.character or shared.settings['character'], + 'instruction_template': shared.settings['instruction_template'] }) - if shared.is_chat(): - shared.persistent_interface_state.update({ - 'mode': shared.settings['mode'], - 'character_menu': shared.args.character or shared.settings['character'], - 'instruction_template': shared.settings['instruction_template'] - }) - if Path("cache/pfp_character.png").exists(): - Path("cache/pfp_character.png").unlink() + if Path("cache/pfp_character.png").exists(): + Path("cache/pfp_character.png").unlink() # css/js strings - css = ui.css if not shared.is_chat() else ui.css + ui.chat_css - js = ui.main_js + css = ui.css + js = ui.js css += apply_extensions('css') js += apply_extensions('js') - # The input elements for the generation functions + # Interface state elements shared.input_elements = ui.list_interface_input_elements() with gr.Blocks(css=css, analytics_enabled=False, title=title, theme=ui.theme) as shared.gradio['interface']: + # Interface state + shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) + # Audio notification if Path("notification.mp3").exists(): shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="notification.mp3", elem_id="audio_notification", visible=False) @@ -102,12 +102,9 @@ def create_interface(): shared.gradio['temporary_text'] = gr.Textbox(visible=False) # Text Generation tab - if shared.is_chat(): - ui_chat.create_ui() - elif shared.args.notebook: - ui_notebook.create_ui() - else: - ui_default.create_ui() + ui_chat.create_ui() + ui_default.create_ui() + ui_notebook.create_ui() ui_parameters.create_ui(shared.settings['preset']) # Parameters tab ui_model_menu.create_ui() # Model tab @@ -115,12 +112,9 @@ def create_interface(): ui_session.create_ui() # Session tab # Generation events - if shared.is_chat(): - ui_chat.create_event_handlers() - elif shared.args.notebook: - ui_notebook.create_event_handlers() - else: - ui_default.create_event_handlers() + ui_chat.create_event_handlers() + ui_default.create_event_handlers() + ui_notebook.create_event_handlers() # Other events ui_file_saving.create_event_handlers() @@ -130,11 +124,10 @@ def create_interface(): # Interface launch events if shared.settings['dark_theme']: shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") - + shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) - if shared.is_chat(): - shared.gradio['interface'].load(chat.redraw_html, shared.reload_inputs, gradio('display')) + shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) extensions_module.create_extensions_tabs() # Extensions tabs extensions_module.create_extensions_block() # Extensions block @@ -190,16 +183,10 @@ if __name__ == "__main__": # Activate the extensions listed on settings.yaml extensions_module.available_extensions = utils.get_available_extensions() - if shared.is_chat(): - for extension in shared.settings['chat_default_extensions']: - shared.args.extensions = shared.args.extensions or [] - if extension not in shared.args.extensions: - shared.args.extensions.append(extension) - else: - for extension in shared.settings['default_extensions']: - shared.args.extensions = shared.args.extensions or [] - if extension not in shared.args.extensions: - shared.args.extensions.append(extension) + for extension in shared.settings['default_extensions']: + shared.args.extensions = shared.args.extensions or [] + if extension not in shared.args.extensions: + shared.args.extensions.append(extension) available_models = utils.get_available_models() diff --git a/settings-template.yaml b/settings-template.yaml index a0c53b33..b1d63c71 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -13,7 +13,6 @@ context: This is a conversation with your Assistant. It is a computer program de greeting: '' turn_template: '' custom_stopping_strings: '' -stop_at_newline: false add_bos_token: true ban_eos_token: false skip_special_tokens: true @@ -28,11 +27,7 @@ chat-instruct_command: |- Continue the chat dialogue below. Write a single reply for the character "<|character|>". <|prompt|> -chat_generation_attempts: 1 -chat_generation_attempts_min: 1 -chat_generation_attempts_max: 10 -default_extensions: [] -chat_default_extensions: +default_extensions: - gallery preset: simple-1 prompt: QA From 919a3cf9d093fe0abd0657a4375ddc3c0b37376e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 05:43:09 -0700 Subject: [PATCH 074/169] Fix the gallery --- extensions/gallery/script.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/extensions/gallery/script.js b/extensions/gallery/script.js index 878401ec..0f25b4ea 100644 --- a/extensions/gallery/script.js +++ b/extensions/gallery/script.js @@ -7,7 +7,7 @@ main_parent.addEventListener('click', function(e) { // Only show this extension in the Chat tab if (chat_visible) { - gallery_element.style.display = 'flex'; + gallery_element.style.display = 'block'; } else { gallery_element.style.display = 'none'; } From f6db2c78d11704ffe04ec21f91259e28edc0cd56 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 05:48:53 -0700 Subject: [PATCH 075/169] Fix ctransformers seed --- modules/ctransformers_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index f5641616..74c4018a 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -56,7 +56,7 @@ class CtransformersModel: top_k=state['top_k'], repetition_penalty=state['repetition_penalty'], last_n_tokens=state['repetition_penalty_range'], - seed=state['seed'] + seed=int(state['seed']) ) output = "" From 3ae2cee446b075a20937a60d4e49d42bebf4a744 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 06:09:27 -0700 Subject: [PATCH 076/169] Fix empty space when the gallery is hidden --- extensions/gallery/script.js | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/extensions/gallery/script.js b/extensions/gallery/script.js index 0f25b4ea..23acfbf6 100644 --- a/extensions/gallery/script.js +++ b/extensions/gallery/script.js @@ -1,4 +1,7 @@ let gallery_element = document.getElementById('gallery-extension'); +let extensions_block = gallery_element.parentElement; +let extensions_block_size = extensions_block.childNodes.length; +let gallery_only = (extensions_block_size == 5); main_parent.addEventListener('click', function(e) { let chat_visible = (chat_tab.offsetHeight > 0 && chat_tab.offsetWidth > 0); @@ -8,7 +11,13 @@ main_parent.addEventListener('click', function(e) { // Only show this extension in the Chat tab if (chat_visible) { gallery_element.style.display = 'block'; + if (gallery_only) { + extensions_block.style.display = ''; + } } else { gallery_element.style.display = 'none'; + if (gallery_only) { + extensions_block.style.display = 'none'; + } } }); From 4a05aa92cb60d82623a5484fef4328e8d77fc1b6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 18:14:09 -0700 Subject: [PATCH 077/169] Add "send to" buttons for instruction templates - Remove instruction templates from prompt dropdowns (default/notebook) - Add 3 buttons to Parameters > Instruction template as a replacement - Increase the number of lines of 'negative prompt' field to 3, and add a scrollbar - When uploading a character, switch to the Character tab - When uploading chat history, switch to the Chat tab --- js/switch_tabs.js | 31 +++++++++++++++++++++++++++++ modules/prompts.py | 42 ++++++++++++++++++++-------------------- modules/ui.py | 2 ++ modules/ui_chat.py | 25 ++++++++++++++++++++---- modules/ui_parameters.py | 2 +- modules/utils.py | 1 - 6 files changed, 76 insertions(+), 27 deletions(-) create mode 100644 js/switch_tabs.js diff --git a/js/switch_tabs.js b/js/switch_tabs.js new file mode 100644 index 00000000..ed6c653d --- /dev/null +++ b/js/switch_tabs.js @@ -0,0 +1,31 @@ +let chat_tab = document.getElementById('chat-tab'); +let main_parent = chat_tab.parentNode; + +function switch_to_chat() { + let chat_tab_button = main_parent.childNodes[0].childNodes[1]; + chat_tab_button.click(); +} + +function switch_to_default() { + let default_tab_button = main_parent.childNodes[0].childNodes[4]; + default_tab_button.click(); +} + +function switch_to_notebook() { + let notebook_tab_button = main_parent.childNodes[0].childNodes[7]; + notebook_tab_button.click(); +} + +function switch_to_generation_parameters() { + let parameters_tab_button = main_parent.childNodes[0].childNodes[10]; + let generation_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[1]; + parameters_tab_button.click(); + generation_tab_button.click(); +} + +function switch_to_character() { + let parameters_tab_button = main_parent.childNodes[0].childNodes[10]; + let character_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[4]; + parameters_tab_button.click(); + character_tab_button.click(); +} diff --git a/modules/prompts.py b/modules/prompts.py index 8a3cf3e3..e7654fbf 100644 --- a/modules/prompts.py +++ b/modules/prompts.py @@ -1,4 +1,3 @@ -import re from pathlib import Path import yaml @@ -10,26 +9,6 @@ from modules.text_generation import get_encoded_length def load_prompt(fname): if fname in ['None', '']: return '' - elif fname.startswith('Instruct-'): - fname = re.sub('^Instruct-', '', fname) - file_path = Path(f'instruction-templates/{fname}.yaml') - if not file_path.exists(): - return '' - - with open(file_path, 'r', encoding='utf-8') as f: - data = yaml.safe_load(f) - output = '' - if 'context' in data: - output += data['context'] - - replacements = { - '<|user|>': data['user'], - '<|bot|>': data['bot'], - '<|user-message|>': 'Input', - } - - output += utils.replace_all(data['turn_template'].split('<|bot-message|>')[0], replacements) - return output.rstrip(' ') else: file_path = Path(f'prompts/{fname}.txt') if not file_path.exists(): @@ -43,6 +22,27 @@ def load_prompt(fname): return text +def load_instruction_prompt_simple(fname): + file_path = Path(f'instruction-templates/{fname}.yaml') + if not file_path.exists(): + return '' + + with open(file_path, 'r', encoding='utf-8') as f: + data = yaml.safe_load(f) + output = '' + if 'context' in data: + output += data['context'] + + replacements = { + '<|user|>': data['user'], + '<|bot|>': data['bot'], + '<|user-message|>': 'Input', + } + + output += utils.replace_all(data['turn_template'].split('<|bot-message|>')[0], replacements) + return output.rstrip(' ') + + def count_tokens(text): try: tokens = get_encoded_length(text) diff --git a/modules/ui.py b/modules/ui.py index e7817f73..a7d7811e 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -12,6 +12,8 @@ with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f: js = f.read() with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f: save_files_js = f.read() +with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f: + switch_tabs_js = f.read() refresh_symbol = '🔄' delete_symbol = '🗑️' diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 76e70ed0..461cf811 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -5,7 +5,7 @@ from pathlib import Path import gradio as gr from PIL import Image -from modules import chat, shared, ui, utils +from modules import chat, prompts, shared, ui, utils from modules.html_generator import chat_html_wrapper from modules.text_generation import stop_everything_event from modules.utils import gradio @@ -83,6 +83,11 @@ def create_chat_settings_ui(): shared.gradio['name2_instruct'] = gr.Textbox(value='', lines=1, label='Bot string') shared.gradio['context_instruct'] = gr.Textbox(value='', lines=4, label='Context') shared.gradio['turn_template'] = gr.Textbox(value=shared.settings['turn_template'], lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') + with gr.Row(): + shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button']) + shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button']) + shared.gradio['send_instruction_to_negative_prompt'] = gr.Button('Send to negative prompt', elem_classes=['small-button']) + with gr.Row(): shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=4, label='Command for chat-instruct mode', info='<|character|> gets replaced by the bot name, and <|prompt|> gets replaced by the regular chat prompt.', elem_classes=['add_scrollbar']) @@ -217,7 +222,7 @@ def create_event_handlers(): shared.gradio['load_chat_history'].upload( chat.load_history, gradio('load_chat_history', 'history'), gradio('history')).then( chat.redraw_html, gradio(reload_arr), gradio('display')).then( - None, None, None, _js='() => {alert("The history has been loaded.")}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_chat()}}') shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) @@ -245,11 +250,11 @@ def create_event_handlers(): shared.gradio['Submit character'].click( chat.upload_character, gradio('upload_json', 'upload_img_bot'), gradio('character_menu')).then( - None, None, None, _js='() => {alert("The character has been loaded.")}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['Submit tavern character'].click( chat.upload_tavern_character, gradio('upload_img_tavern', 'tavern_json'), gradio('character_menu')).then( - None, None, None, _js='() => {alert("The character has been loaded.")}') + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_character()}}') shared.gradio['upload_json'].upload(lambda: gr.update(interactive=True), None, gradio('Submit character')) shared.gradio['upload_json'].clear(lambda: gr.update(interactive=False), None, gradio('Submit character')) @@ -258,3 +263,15 @@ def create_event_handlers(): shared.gradio['your_picture'].change( chat.upload_your_profile_picture, gradio('your_picture'), None).then( partial(chat.redraw_html, reset_cache=True), gradio(reload_arr), gradio('display')) + + shared.gradio['send_instruction_to_default'].click( + prompts.load_instruction_prompt_simple, gradio('instruction_template'), gradio('textbox-default')).then( + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_default()}}') + + shared.gradio['send_instruction_to_notebook'].click( + prompts.load_instruction_prompt_simple, gradio('instruction_template'), gradio('textbox-notebook')).then( + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') + + shared.gradio['send_instruction_to_negative_prompt'].click( + prompts.load_instruction_prompt_simple, gradio('instruction_template'), gradio('negative_prompt')).then( + lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 2f0c2efd..c6d38804 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -98,7 +98,7 @@ def create_ui(default_preset): with gr.Row(): with gr.Column(): shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.') - shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt') + shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', lines=3, elem_classes=['add_scrollbar']) shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') diff --git a/modules/utils.py b/modules/utils.py index 6fa94730..0a7edffa 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -88,7 +88,6 @@ def get_available_prompts(): files = set((k.stem for k in Path('prompts').glob('*.txt'))) prompts += sorted([k for k in files if re.match('^[0-9]', k)], key=natural_keys, reverse=True) prompts += sorted([k for k in files if re.match('^[^0-9]', k)], key=natural_keys) - prompts += ['Instruct-' + k for k in get_available_instruction_templates() if k != 'None'] prompts += ['None'] return prompts From c2692142199e7a15324db46e1ecec7a24543964a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 18:45:13 -0700 Subject: [PATCH 078/169] CSS change to make buttons smaller --- css/main.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/main.css b/css/main.css index 5f293921..5432a9db 100644 --- a/css/main.css +++ b/css/main.css @@ -7,6 +7,7 @@ } .small-button { + min-width: 0 !important; max-width: 171px; height: 39.594px; align-self: end; From b8df4a436eebe741a0ab7852e4df317f862e947b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 18:48:15 -0700 Subject: [PATCH 079/169] Scroll up when switching tabs --- js/switch_tabs.js | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/js/switch_tabs.js b/js/switch_tabs.js index ed6c653d..56279193 100644 --- a/js/switch_tabs.js +++ b/js/switch_tabs.js @@ -1,19 +1,29 @@ let chat_tab = document.getElementById('chat-tab'); let main_parent = chat_tab.parentNode; +function scrollToTop() { + window.scrollTo({ + top: 0, + // behavior: 'smooth' + }); +} + function switch_to_chat() { let chat_tab_button = main_parent.childNodes[0].childNodes[1]; chat_tab_button.click(); + scrollToTop(); } function switch_to_default() { let default_tab_button = main_parent.childNodes[0].childNodes[4]; default_tab_button.click(); + scrollToTop(); } function switch_to_notebook() { let notebook_tab_button = main_parent.childNodes[0].childNodes[7]; notebook_tab_button.click(); + scrollToTop(); } function switch_to_generation_parameters() { @@ -21,6 +31,7 @@ function switch_to_generation_parameters() { let generation_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[1]; parameters_tab_button.click(); generation_tab_button.click(); + scrollToTop(); } function switch_to_character() { @@ -28,4 +39,5 @@ function switch_to_character() { let character_tab_button = document.getElementById('character-menu').parentNode.parentNode.parentNode.parentNode.parentNode.parentNode.childNodes[0].childNodes[4]; parameters_tab_button.click(); character_tab_button.click(); + scrollToTop(); } From 66c04c304deb89ecb8286e3dbcfda5d0c31b6a32 Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Sun, 13 Aug 2023 22:09:03 -0400 Subject: [PATCH 080/169] Various ctransformers fixes (#3556) --------- Co-authored-by: cal066 --- README.md | 19 +++++++++++++++---- models/config.yaml | 14 ++++++++++++++ modules/ctransformers_model.py | 5 +++-- modules/loaders.py | 1 + 4 files changed, 33 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 73ae33bd..9201df13 100644 --- a/README.md +++ b/README.md @@ -205,7 +205,7 @@ Optionally, you can use the following command-line flags: | Flag | Description | |--------------------------------------------|-------------| -| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv | +| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, ctransformers | #### Accelerate/transformers @@ -235,22 +235,33 @@ Optionally, you can use the following command-line flags: | `--quant_type QUANT_TYPE` | quant_type for 4-bit. Valid options: nf4, fp4. | | `--use_double_quant` | use_double_quant for 4-bit. | -#### llama.cpp +#### GGML (for llama.cpp and ctransformers) | Flag | Description | |-------------|-------------| | `--threads` | Number of threads to use. | | `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. | +| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. | +| `--n_ctx N_CTX` | Size of the prompt context. | + +#### llama.cpp + +| Flag | Description | +|-------------|-------------| | `--no-mmap` | Prevent mmap from being used. | | `--mlock` | Force the system to keep the model in RAM. | | `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. | -| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. | -| `--n_ctx N_CTX` | Size of the prompt context. | | `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). | | `--n_gqa N_GQA` | grouped-query attention. Must be 8 for llama-2 70b. | | `--rms_norm_eps RMS_NORM_EPS` | 5e-6 is a good value for llama-2 models. | | `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. | +#### ctransformers + +| Flag | Description | +|-------------|-------------| +| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gpt_neox, falcon, llama, mpt, gpt_bigcode, dolly-v2, and replit are supported. | + #### AutoGPTQ | Flag | Description | diff --git a/models/config.yaml b/models/config.yaml index 3d5f48ff..ba12e8bc 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -10,6 +10,18 @@ model_type: 'llama' .*bloom: model_type: 'bloom' +.*gpt2: + model_type: 'gpt2' +.*falcon: + model_type: 'falcon' +.*mpt: + model_type: 'mpt' +.*(starcoder|starchat): + model_type: 'gpt_bigcode' +.*dolly-v2: + model_type: 'dolly-v2' +.*replit: + model_type: 'replit' llama-65b-gptq-3bit: groupsize: 'None' .*(4bit|int4): @@ -281,3 +293,5 @@ llama-65b-gptq-3bit: .*openchat: mode: 'instruct' instruction_template: 'OpenChat' +.*falcon.*-instruct: + mode: 'instruct' diff --git a/modules/ctransformers_model.py b/modules/ctransformers_model.py index 74c4018a..5e0f347c 100644 --- a/modules/ctransformers_model.py +++ b/modules/ctransformers_model.py @@ -18,6 +18,7 @@ class CtransformersModel: threads=shared.args.threads, gpu_layers=shared.args.n_gpu_layers, batch_size=shared.args.n_batch, + context_length=shared.args.n_ctx, stream=True ) @@ -31,7 +32,7 @@ class CtransformersModel: return result, result def model_type_is_auto(self): - return shared.args.model_type == "Auto" or shared.args.model_type == "None" + return shared.args.model_type is None or shared.args.model_type == "Auto" or shared.args.model_type == "None" def model_dir(self, path): if path.is_file(): @@ -48,7 +49,7 @@ class CtransformersModel: def generate(self, prompt, state, callback=None): prompt = prompt if type(prompt) is str else prompt.decode() # ctransformers uses -1 for random seed - generator = self.model._stream( + generator = self.model( prompt=prompt, max_new_tokens=state['max_new_tokens'], temperature=state['temperature'], diff --git a/modules/loaders.py b/modules/loaders.py index 2b3a50b3..f7288f90 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -92,6 +92,7 @@ loaders_and_params = OrderedDict({ 'llamacpp_HF_info', ], 'ctransformers': [ + 'n_ctx', 'n_gpu_layers', 'n_batch', 'threads', From cc7e6ef645186219b92865c6cc98f6eb59dd3abf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 19:24:09 -0700 Subject: [PATCH 081/169] Fix a CSS conflict --- css/html_4chan_style.css | 2 +- modules/html_generator.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/css/html_4chan_style.css b/css/html_4chan_style.css index 99ac6845..cef9f6eb 100644 --- a/css/html_4chan_style.css +++ b/css/html_4chan_style.css @@ -98,7 +98,7 @@ margin-right: 40px !important; } -#parent #container .message { +#parent #container .message_4chan { color: black; border: none; } \ No newline at end of file diff --git a/modules/html_generator.py b/modules/html_generator.py index 422beb30..eb1da374 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -100,7 +100,7 @@ def process_post(post, c): src = re.sub('>', '>', src) src = re.sub('(>>[0-9]*)', '\\1', src) src = re.sub('\n', '
    \n', src) - src = f'
    {src}\n' + src = f'
    {src}\n' src = f'Anonymous No.{number}\n{src}' return src @@ -141,7 +141,7 @@ def generate_4chan_html(f): output = output.split('\n') for i in range(len(output)): output[i] = re.sub(r'^(>(.*?)(
    |
    ))', r'\1', output[i]) - output[i] = re.sub(r'^
    (>(.*?)(
    |
    ))', r'
    \1', output[i]) + output[i] = re.sub(r'^
    (>(.*?)(
    |))', r'
    \1', output[i]) output = '\n'.join(output) return output From ff9b5861c8c7595d5a2dab03610b13cbc2c760e4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 21:10:47 -0700 Subject: [PATCH 082/169] Fix impersonate when some text is present (closes #3564) --- modules/chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index e2bba18f..ffddb700 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -272,8 +272,8 @@ def impersonate_wrapper(text, start_with, state): yield text + '...' reply = None - for reply in generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True): - yield reply.lstrip(' ') + for reply in generate_reply(prompt + text, state, stopping_strings=stopping_strings, is_chat=True): + yield (text + reply).lstrip(' ') if shared.stop_everything: return From a95e6f02cbcc88756777c2affa8b621c7fbbb525 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 13 Aug 2023 21:17:20 -0700 Subject: [PATCH 083/169] Add a placeholder for custom stopping strings --- modules/ui_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index c6d38804..235fd0bf 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -114,7 +114,7 @@ def create_ui(default_preset): with gr.Row(): with gr.Column(): shared.gradio['truncation_length'] = gr.Slider(value=shared.settings['truncation_length'], minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') - shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas. For instance: "\\nYour Assistant:", "\\nThe assistant:"') + shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"') with gr.Column(): shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.') shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.') From 619cb4e78b1c5db39cbd65d2d5c631ec50f6ab42 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 11:46:07 -0300 Subject: [PATCH 084/169] Add "save defaults to settings.yaml" button (#3574) --- README.md | 1 - modules/chat.py | 8 ++++---- modules/shared.py | 37 +++++++++++++++++++------------------ modules/ui.py | 25 ++++++++++++++++++++++++- modules/ui_chat.py | 4 ++-- modules/ui_default.py | 4 +--- modules/ui_notebook.py | 4 +--- modules/ui_parameters.py | 2 +- modules/ui_session.py | 12 ++++++++++-- server.py | 4 +++- settings-template.yaml | 33 +++++++++++++++++---------------- 11 files changed, 82 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 9201df13..3c9996aa 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,6 @@ Optionally, you can use the following command-line flags: | `--model-dir MODEL_DIR` | Path to directory with all the models. | | `--lora-dir LORA_DIR` | Path to directory with all the loras. | | `--model-menu` | Show a model menu in the terminal when the web UI is first launched. | -| `--no-stream` | Don't stream the text output in real time. | | `--settings SETTINGS_FILE` | Load the default interface settings from this yaml file. See `settings-template.yaml` for an example. If you create a file called `settings.yaml`, this file will be loaded by default without the need to use the `--settings` flag. | | `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. | | `--verbose` | Print the prompts to the terminal. | diff --git a/modules/chat.py b/modules/chat.py index ffddb700..dad3d8b3 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -261,7 +261,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess yield output -def impersonate_wrapper(text, start_with, state): +def impersonate_wrapper(text, state): if shared.model_name == 'None' or shared.model is None: logger.error("No model is loaded! Select one in the Model tab.") yield '' @@ -291,15 +291,15 @@ def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_ # Same as above but returns HTML for the UI -def generate_chat_reply_wrapper(text, start_with, state, regenerate=False, _continue=False): - if start_with != '' and not _continue: +def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): + if state['start_with'] != '' and not _continue: if regenerate: text, state['history'] = remove_last_message(state['history']) regenerate = False _continue = True send_dummy_message(text, state) - send_dummy_reply(start_with, state) + send_dummy_reply(state['start_with'], state) for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True)): yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style']), history diff --git a/modules/shared.py b/modules/shared.py index 89b5f0cb..e36e2437 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -29,34 +29,35 @@ session_is_loading = False # UI defaults settings = { 'dark_theme': True, - 'autoload_model': False, + 'start_with': '', + 'mode': 'chat', + 'chat_style': 'TheEncrypted777', + 'character': 'None', + 'prompt-default': 'QA', + 'prompt-notebook': 'QA', + 'preset': 'simple-1', 'max_new_tokens': 200, 'max_new_tokens_min': 1, 'max_new_tokens_max': 4096, - 'auto_max_new_tokens': False, 'seed': -1, 'negative_prompt': '', - 'character': 'None', + 'truncation_length': 2048, + 'truncation_length_min': 0, + 'truncation_length_max': 16384, + 'custom_stopping_strings': '', + 'auto_max_new_tokens': False, + 'ban_eos_token': False, + 'add_bos_token': True, + 'skip_special_tokens': True, + 'stream': True, 'name1': 'You', 'name2': 'Assistant', 'context': 'This is a conversation with your Assistant. It is a computer program designed to help you with various tasks such as answering questions, providing recommendations, and helping with decision making. You can ask it anything you want and it will do its best to give you accurate and relevant information.', 'greeting': '', - 'turn_template': '', - 'custom_stopping_strings': '', - 'add_bos_token': True, - 'ban_eos_token': False, - 'skip_special_tokens': True, - 'truncation_length': 2048, - 'truncation_length_min': 0, - 'truncation_length_max': 16384, - 'mode': 'chat', - 'start_with': '', - 'chat_style': 'TheEncrypted777', 'instruction_template': 'None', 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', + 'autoload_model': False, 'default_extensions': ['gallery'], - 'preset': 'simple-1', - 'prompt': 'QA', } @@ -83,7 +84,7 @@ parser.add_argument('--lora', type=str, nargs="+", help='The list of LoRAs to lo parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models") parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras") parser.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.') -parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.') +parser.add_argument('--no-stream', action='store_true', help='DEPRECATED') parser.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.') parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') parser.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') @@ -181,7 +182,7 @@ args = parser.parse_args() args_defaults = parser.parse_args([]) # Deprecation warnings -for k in ['chat', 'notebook']: +for k in ['chat', 'notebook', 'no_stream']: if getattr(args, k): logger.warning(f'--{k} has been deprecated and will be removed soon. Please remove that flag.') diff --git a/modules/ui.py b/modules/ui.py index a7d7811e..19b9997f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -1,7 +1,9 @@ +import copy from pathlib import Path import gradio as gr import torch +import yaml from modules import shared @@ -119,6 +121,7 @@ def list_interface_input_elements(): # Chat elements elements += [ 'textbox', + 'start_with', 'character_menu', 'history', 'name1', @@ -139,7 +142,9 @@ def list_interface_input_elements(): elements += [ 'textbox-notebook', 'textbox-default', - 'output_textbox' + 'output_textbox', + 'prompt_menu-default', + 'prompt_menu-notebook', ] # Model elements @@ -170,6 +175,24 @@ def apply_interface_values(state, use_persistent=False): return [state[k] if k in state else gr.update() for k in elements] +def save_settings(state, preset, instruction_template, extensions): + output = copy.deepcopy(shared.settings) + exclude = ['name1', 'name2', 'greeting', 'context', 'turn_template'] + for k in state: + if k in shared.settings and k not in exclude: + output[k] = state[k] + + output['preset'] = preset + output['prompt-default'] = state['prompt_menu-default'] + output['prompt-notebook'] = state['prompt_menu-notebook'] + output['character'] = state['character_menu'] + output['instruction_template'] = instruction_template + output['default_extensions'] = extensions + output['seed'] = int(output['seed']) + + return yaml.dump(output, sort_keys=False, width=float("inf")) + + class ToolButton(gr.Button, gr.components.IOComponent): """ Small button with single emoji as text, fits inside gradio forms diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 461cf811..d8179867 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -10,7 +10,7 @@ from modules.html_generator import chat_html_wrapper from modules.text_generation import stop_everything_event from modules.utils import gradio -inputs = ('Chat input', 'start_with', 'interface_state') +inputs = ('Chat input', 'interface_state') reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style') clear_arr = ('Clear history-confirm', 'Clear history', 'Clear history-cancel') @@ -82,7 +82,7 @@ def create_chat_settings_ui(): shared.gradio['name1_instruct'] = gr.Textbox(value='', lines=2, label='User string') shared.gradio['name2_instruct'] = gr.Textbox(value='', lines=1, label='Bot string') shared.gradio['context_instruct'] = gr.Textbox(value='', lines=4, label='Context') - shared.gradio['turn_template'] = gr.Textbox(value=shared.settings['turn_template'], lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') + shared.gradio['turn_template'] = gr.Textbox(value='', lines=1, label='Turn template', info='Used to precisely define the placement of spaces and new line characters in instruction prompts.') with gr.Row(): shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button']) shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button']) diff --git a/modules/ui_default.py b/modules/ui_default.py index d26863bc..e4771fb8 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -13,13 +13,11 @@ outputs = ('output_textbox', 'html-default') def create_ui(): - default_text = load_prompt(shared.settings['prompt']) - with gr.Tab('Default', elem_id='default-tab'): shared.gradio['last_input-default'] = gr.State('') with gr.Row(): with gr.Column(): - shared.gradio['textbox-default'] = gr.Textbox(value=default_text, elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') + shared.gradio['textbox-default'] = gr.Textbox(value='', elem_classes=['textbox_default', 'add_scrollbar'], lines=27, label='Input') with gr.Row(): shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary') shared.gradio['Stop-default'] = gr.Button('Stop', elem_id='stop') diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 7d6648d2..dba9039a 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -13,14 +13,12 @@ outputs = ('textbox-notebook', 'html-notebook') def create_ui(): - default_text = load_prompt(shared.settings['prompt']) - with gr.Tab('Notebook', elem_id='notebook-tab'): shared.gradio['last_input-notebook'] = gr.State('') with gr.Row(): with gr.Column(scale=4): with gr.Tab('Raw'): - shared.gradio['textbox-notebook'] = gr.Textbox(value=default_text, elem_classes=['textbox', 'add_scrollbar'], lines=27) + shared.gradio['textbox-notebook'] = gr.Textbox(value='', elem_classes=['textbox', 'add_scrollbar'], lines=27) with gr.Tab('Markdown'): shared.gradio['markdown_render-notebook'] = gr.Button('Render') diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 235fd0bf..a0f95158 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -121,7 +121,7 @@ def create_ui(default_preset): shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.') shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.') - shared.gradio['stream'] = gr.Checkbox(value=not shared.args.no_stream, label='Activate text streaming') + shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming') ui_chat.create_chat_settings_ui() diff --git a/modules/ui_session.py b/modules/ui_session.py index 3d0fdac6..b774a207 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -9,8 +9,10 @@ def create_ui(): with gr.Tab("Session", elem_id="session-tab"): with gr.Row(): with gr.Column(): - shared.gradio['reset_interface'] = gr.Button("Apply and restart") - shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡') + shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart") + with gr.Row(): + shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡') + shared.gradio['save_settings'] = gr.Button('Save UI defaults to settings.yaml') with gr.Row(): with gr.Column(): @@ -37,6 +39,12 @@ def create_ui(): lambda: None, None, None, _js='() => {document.body.innerHTML=\'

    Reloading...

    \'; setTimeout(function(){location.reload()},2500); return []}') shared.gradio['toggle_dark_mode'].click(lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') + shared.gradio['save_settings'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + ui.save_settings, gradio('interface_state', 'preset_menu', 'instruction_template', 'extensions_menu'), gradio('save_contents')).then( + lambda: './', None, gradio('save_root')).then( + lambda: 'settings.yaml', None, gradio('save_filename')).then( + lambda: gr.update(visible=True), None, gradio('file_saver')) def set_interface_arguments(extensions, bool_active): diff --git a/server.py b/server.py index e86e3338..d90453a0 100644 --- a/server.py +++ b/server.py @@ -71,7 +71,9 @@ def create_interface(): 'loader': shared.args.loader or 'Transformers', 'mode': shared.settings['mode'], 'character_menu': shared.args.character or shared.settings['character'], - 'instruction_template': shared.settings['instruction_template'] + 'instruction_template': shared.settings['instruction_template'], + 'prompt_menu-default': shared.settings['prompt-default'], + 'prompt_menu-notebook': shared.settings['prompt-notebook'], }) if Path("cache/pfp_character.png").exists(): diff --git a/settings-template.yaml b/settings-template.yaml index b1d63c71..11cd1185 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -1,33 +1,34 @@ dark_theme: true -autoload_model: false +start_with: '' +mode: chat +chat_style: TheEncrypted777 +character: None +prompt-default: QA +prompt-notebook: QA +preset: simple-1 max_new_tokens: 200 max_new_tokens_min: 1 max_new_tokens_max: 4096 -auto_max_new_tokens: false seed: -1 negative_prompt: '' -character: None +truncation_length: 2048 +truncation_length_min: 0 +truncation_length_max: 16384 +custom_stopping_strings: '' +auto_max_new_tokens: false +ban_eos_token: false +add_bos_token: true +skip_special_tokens: true +stream: true name1: You name2: Assistant context: This is a conversation with your Assistant. It is a computer program designed to help you with various tasks such as answering questions, providing recommendations, and helping with decision making. You can ask it anything you want and it will do its best to give you accurate and relevant information. greeting: '' -turn_template: '' -custom_stopping_strings: '' -add_bos_token: true -ban_eos_token: false -skip_special_tokens: true -truncation_length: 2048 -truncation_length_min: 0 -truncation_length_max: 16384 -mode: chat -start_with: '' -chat_style: TheEncrypted777 instruction_template: None chat-instruct_command: |- Continue the chat dialogue below. Write a single reply for the character "<|character|>". <|prompt|> +autoload_model: false default_extensions: - gallery -preset: simple-1 -prompt: QA From 890b4abdad2a49279d645c0c5d0a6b891b12a9a0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 07:55:52 -0700 Subject: [PATCH 085/169] Fix session saving --- modules/ui_file_saving.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 98165d67..4ccc3126 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -1,3 +1,4 @@ +import copy import json import gradio as gr @@ -78,7 +79,7 @@ def create_event_handlers(): if not shared.args.multi_user: shared.gradio['save_session'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: json.dumps(x, indent=4), gradio('interface_state'), gradio('temporary_text')).then( + save_session, gradio('interface_state'), gradio('temporary_text')).then( None, gradio('temporary_text'), None, _js=f"(contents) => {{{ui.save_files_js}; saveSession(contents)}}") shared.gradio['load_session'].upload( @@ -98,3 +99,11 @@ def load_session(file, state): state.update(data) return state + + +def save_session(state): + output = copy.deepcopy(state) + for key in ['prompt_menu-default', 'prompt_menu-notebook']: + del output[key] + + return json.dumps(output, indent=4) From 3e0a9f9cdb3b372c5f8bec314ec1a1a66aa0c43e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 08:18:21 -0700 Subject: [PATCH 086/169] Refresh the character dropdown when saving/deleting a character --- modules/chat.py | 1 - modules/ui_file_saving.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index dad3d8b3..d83e9490 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -508,7 +508,6 @@ def load_character(character, name1, name2, instruct=False): context = shared.settings['context'] name2 = shared.settings['name2'] greeting = shared.settings['greeting'] - turn_template = shared.settings['turn_template'] return name1, name2, picture, greeting, context, turn_template.replace("\n", r"\n") diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py index 4ccc3126..b4674426 100644 --- a/modules/ui_file_saving.py +++ b/modules/ui_file_saving.py @@ -54,12 +54,13 @@ def create_event_handlers(): shared.gradio['save_character_confirm'].click( chat.save_character, gradio('name2', 'greeting', 'context', 'character_picture', 'save_character_filename'), None).then( - lambda: gr.update(visible=False), None, gradio('character_saver')) + lambda: gr.update(visible=False), None, gradio('character_saver')).then( + lambda x: gr.update(choices=utils.get_available_characters(), value=x), gradio('save_character_filename'), gradio('character_menu')) shared.gradio['delete_character_confirm'].click( chat.delete_character, gradio('character_menu'), None).then( lambda: gr.update(visible=False), None, gradio('character_deleter')).then( - lambda: gr.update(choices=utils.get_available_characters()), None, gradio('character_menu')) + lambda: gr.update(choices=utils.get_available_characters(), value="None"), None, gradio('character_menu')) shared.gradio['save_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_saver')) shared.gradio['delete_character_cancel'].click(lambda: gr.update(visible=False), None, gradio('character_deleter')) From d8a82d34ed1cf63f4190500bbbf478f8dd74d8f7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 08:45:58 -0700 Subject: [PATCH 087/169] Improve a warning --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index e36e2437..a2ee0b91 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -184,7 +184,7 @@ args_defaults = parser.parse_args([]) # Deprecation warnings for k in ['chat', 'notebook', 'no_stream']: if getattr(args, k): - logger.warning(f'--{k} has been deprecated and will be removed soon. Please remove that flag.') + logger.warning(f'The --{k} flag has been deprecated and will be removed soon. Please remove that flag.') # Security warnings if args.trust_remote_code: From 4d067e9b5213657e350f835056ef13028326de27 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 09:39:06 -0700 Subject: [PATCH 088/169] Add back a variable to keep old extensions working --- modules/ui_chat.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index d8179867..fc850ea3 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -121,7 +121,10 @@ def create_chat_settings_ui(): def create_event_handlers(): gen_events = [] - shared.input_params = gradio(inputs) # Obsolete, kept for compatibility with old extensions + + # Obsolete variables, kept for compatibility with old extensions + shared.input_params = gradio(inputs) + shared.reload_inputs = gradio(reload_arr) gen_events.append(shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( From 7e57b35b5e7c9a2cd20504633d2bcb5b4eaf600a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 10:10:39 -0700 Subject: [PATCH 089/169] Clean up old code --- modules/ui_chat.py | 18 ++++++------------ modules/ui_default.py | 16 ++++------------ modules/ui_notebook.py | 13 ++++--------- 3 files changed, 14 insertions(+), 33 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index fc850ea3..a3a4ccf0 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -120,53 +120,47 @@ def create_chat_settings_ui(): def create_event_handlers(): - gen_events = [] # Obsolete variables, kept for compatibility with old extensions shared.input_params = gradio(inputs) shared.reload_inputs = gradio(reload_arr) - gen_events.append(shared.gradio['Generate'].click( + shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) - gen_events.append(shared.gradio['textbox'].submit( + shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) - gen_events.append(shared.gradio['Regenerate'].click( + shared.gradio['Regenerate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, regenerate=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) - gen_events.append(shared.gradio['Continue'].click( + shared.gradio['Continue'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( partial(chat.generate_chat_reply_wrapper, _continue=True), gradio(inputs), gradio('display', 'history'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.save_persistent_history, gradio('history', 'character_menu', 'mode'), None).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) - gen_events.append(shared.gradio['Impersonate'].click( + shared.gradio['Impersonate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: x, gradio('textbox'), gradio('Chat input'), show_progress=False).then( chat.impersonate_wrapper, gradio(inputs), gradio('textbox'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) shared.gradio['Replace last reply'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -211,7 +205,7 @@ def create_event_handlers(): chat.redraw_html, gradio(reload_arr), gradio('display')) shared.gradio['Stop'].click( - stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None).then( + stop_everything_event, None, None, queue=False).then( chat.redraw_html, gradio(reload_arr), gradio('display')) shared.gradio['mode'].change( diff --git a/modules/ui_default.py b/modules/ui_default.py index e4771fb8..99657227 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -45,36 +45,28 @@ def create_ui(): def create_event_handlers(): - gen_events = [] - - gen_events.append(shared.gradio['Generate-default'].click( + shared.gradio['Generate-default'].click( lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") - ) - gen_events.append(shared.gradio['textbox-default'].submit( + shared.gradio['textbox-default'].submit( lambda x: x, gradio('textbox-default'), gradio('last_input-default')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[0]; element.scrollTop = element.scrollHeight}") - ) shared.gradio['markdown_render-default'].click(lambda x: x, gradio('output_textbox'), gradio('markdown-default'), queue=False) - gen_events.append(shared.gradio['Continue-default'].click( + shared.gradio['Continue-default'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, [shared.gradio['output_textbox']] + gradio(inputs)[1:], gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - # lambda: None, None, None, _js="() => {element = document.getElementsByTagName('textarea')[1]; element.scrollTop = element.scrollHeight}") - ) - shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) + shared.gradio['Stop-default'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-default'].change(load_prompt, gradio('prompt_menu-default'), gradio('textbox-default'), show_progress=False) shared.gradio['save_prompt-default'].click( lambda x: x, gradio('textbox-default'), gradio('save_contents')).then( diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index dba9039a..6949ed78 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -46,35 +46,30 @@ def create_ui(): def create_event_handlers(): - gen_events = [] - - gen_events.append(shared.gradio['Generate-notebook'].click( + shared.gradio['Generate-notebook'].click( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) - gen_events.append(shared.gradio['textbox-notebook'].submit( + shared.gradio['textbox-notebook'].submit( lambda x: x, gradio('textbox-notebook'), gradio('last_input-notebook')).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) shared.gradio['Undo'].click(lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['markdown_render-notebook'].click(lambda x: x, gradio('textbox-notebook'), gradio('markdown-notebook'), queue=False) - gen_events.append(shared.gradio['Regenerate-notebook'].click( + shared.gradio['Regenerate-notebook'].click( lambda x: x, gradio('last_input-notebook'), gradio('textbox-notebook'), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( generate_reply_wrapper, gradio(inputs), gradio(outputs), show_progress=False).then( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: None, None, None, _js=f'() => {{{ui.audio_notification_js}}}') - ) - shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False, cancels=gen_events if shared.args.no_stream else None) + shared.gradio['Stop-notebook'].click(stop_everything_event, None, None, queue=False) shared.gradio['prompt_menu-notebook'].change(load_prompt, gradio('prompt_menu-notebook'), gradio('textbox-notebook'), show_progress=False) shared.gradio['save_prompt-notebook'].click( lambda x: x, gradio('textbox-notebook'), gradio('save_contents')).then( From 8294eadd384e8d9543b169abd751406885e3f3c6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 14 Aug 2023 11:13:46 -0700 Subject: [PATCH 090/169] Bump AutoGPTQ wheel --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index ec6a7e47..5e750903 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,8 +25,8 @@ git+https://github.com/huggingface/transformers@baf1daa58eb2960248fd9f7c3af0ed24 bitsandbytes==0.41.1; platform_system != "Windows" https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.0/auto_gptq-0.4.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.1/auto_gptq-0.4.1+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.10/exllama-0.0.10+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 991bb57e439ccfbcd5a0f154957c98d2e3d66c35 Mon Sep 17 00:00:00 2001 From: cal066 <60696996+cal066@users.noreply.github.com> Date: Mon, 14 Aug 2023 18:17:24 +0000 Subject: [PATCH 091/169] ctransformers: Fix up model_type name consistency (#3567) --- README.md | 2 +- models/config.yaml | 6 +++--- modules/loaders.py | 1 + 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0cbe24cc..3a7e7eab 100644 --- a/README.md +++ b/README.md @@ -259,7 +259,7 @@ Optionally, you can use the following command-line flags: | Flag | Description | |-------------|-------------| -| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gpt_neox, falcon, llama, mpt, gpt_bigcode, dolly-v2, and replit are supported. | +| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gptneox, falcon, llama, mpt, starcoder (gptbigcode), dollyv2, and replit are supported. | #### AutoGPTQ diff --git a/models/config.yaml b/models/config.yaml index ba12e8bc..624840df 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -5,7 +5,7 @@ .*(gpt-j|gptj|gpt4all-j|malion-6b|pygway|pygmalion-6b|dolly-v1): model_type: 'gptj' .*(gpt-neox|koalpaca-polyglot|polyglot.*koalpaca|polyglot-ko|polyglot_ko|pythia|stablelm|incite|dolly-v2|polycoder|h2ogpt-oig|h2ogpt-oasst1|h2ogpt-gm): - model_type: 'gpt_neox' + model_type: 'gptneox' .*llama: model_type: 'llama' .*bloom: @@ -17,9 +17,9 @@ .*mpt: model_type: 'mpt' .*(starcoder|starchat): - model_type: 'gpt_bigcode' + model_type: 'starcoder' .*dolly-v2: - model_type: 'dolly-v2' + model_type: 'dollyv2' .*replit: model_type: 'replit' llama-65b-gptq-3bit: diff --git a/modules/loaders.py b/modules/loaders.py index d7bd8d48..08a11ac0 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -290,6 +290,7 @@ loaders_model_types = { "dollyv2" "replit", "starcoder", + "gptbigcode", "falcon" ], } From 155862a4a0938a47e9792cd6a7e8dcebe1a969f1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 15 Aug 2023 11:40:37 -0700 Subject: [PATCH 092/169] Update README --- README.md | 79 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 3a7e7eab..f1af6519 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Text generation web UI -A gradio web UI for running Large Language Models like LLaMA, llama.cpp, GPT-J, OPT, and GALACTICA. +A Gradio web UI for Large Language Models. Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation. @@ -10,20 +10,18 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -* 3 interface modes: default, notebook, and chat +* 3 interface modes: default (two columns), notebook, and chat * Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [ctransformers](https://github.com/marella/ctransformers) * Dropdown menu for quickly switching between different models -* LoRA: load and unload LoRAs on the fly, train a new LoRA -* Precise instruction templates for chat mode, including Llama 2, Alpaca, Vicuna, WizardLM, StableLM, and many others +* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA +* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others +* 4-bit, 8-bit, and CPU inference through the transformers library +* Use llama.cpp models with transformers samplers (`llamacpp_HF` loader) * [Multimodal pipelines, including LLaVA and MiniGPT-4](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal) -* 8-bit and 4-bit inference through bitsandbytes -* CPU mode for transformers models -* [DeepSpeed ZeRO-3 inference](docs/DeepSpeed.md) -* [Extensions](docs/Extensions.md) +* [Extensions framework](docs/Extensions.md) * [Custom chat characters](docs/Chat-mode.md) * Very efficient text streaming * Markdown output with LaTeX rendering, to use for instance with [GALACTICA](https://github.com/paperswithcode/galai) -* Nice HTML output for GPT-4chan * API, including endpoints for websocket streaming ([see the examples](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples)) To learn how to use the various features, check out the Documentation: https://github.com/oobabooga/text-generation-webui/tree/main/docs @@ -38,26 +36,24 @@ To learn how to use the various features, check out the Documentation: https://g Just download the zip above, extract it, and double-click on "start". The web UI and all its dependencies will be installed in the same folder. -* The source codes are here: https://github.com/oobabooga/one-click-installers +* The source codes and more information can be found here: https://github.com/oobabooga/one-click-installers * There is no need to run the installers as admin. -* AMD doesn't work on Windows. * Huge thanks to [@jllllll](https://github.com/jllllll), [@ClayShoaf](https://github.com/ClayShoaf), and [@xNul](https://github.com/xNul) for their contributions to these installers. ### Manual installation using Conda -Recommended if you have some experience with the command line. +Recommended if you have some experience with the command-line. #### 0. Install Conda https://docs.conda.io/en/latest/miniconda.html -On Linux or WSL, it can be automatically installed with these two commands: +On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)): ``` curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh" bash Miniconda3.sh ``` -Source: https://educe-ubc.github.io/conda.html #### 1. Create a new conda environment @@ -92,9 +88,9 @@ cd text-generation-webui pip install -r requirements.txt ``` -#### bitsandbytes +#### Note about older NVIDIA GPUs -bitsandbytes >= 0.39 may not work on older NVIDIA GPUs. In that case, to use `--load-in-8bit`, you may have to downgrade like this: +bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this: * Linux: `pip install bitsandbytes==0.38.1` * Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl` @@ -113,37 +109,52 @@ docker compose up --build ### Updating the requirements -From time to time, the `requirements.txt` changes. To update, use this command: +From time to time, the `requirements.txt` changes. To update, use these commands: ``` conda activate textgen cd text-generation-webui pip install -r requirements.txt --upgrade ``` + ## Downloading models -Models should be placed inside the `models/` folder. +Models should be placed in the `text-generation-webui/models` folder. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads). -[Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads) is the main place to download models. These are some examples: +* Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example: -* [Pythia](https://huggingface.co/models?sort=downloads&search=eleutherai%2Fpythia+deduped) -* [OPT](https://huggingface.co/models?search=facebook/opt) -* [GALACTICA](https://huggingface.co/models?search=facebook/galactica) -* [GPT-J 6B](https://huggingface.co/EleutherAI/gpt-j-6B/tree/main) +``` +text-generation-webui/ +├── models +│   ├── lmsys_vicuna-33b-v1.3 +│   │   ├── config.json +│   │   ├── generation_config.json +│   │   ├── huggingface-metadata.txt +│   │   ├── pytorch_model-00001-of-00007.bin +│   │   ├── pytorch_model-00002-of-00007.bin +│   │   ├── pytorch_model-00003-of-00007.bin +│   │   ├── pytorch_model-00004-of-00007.bin +│   │   ├── pytorch_model-00005-of-00007.bin +│   │   ├── pytorch_model-00006-of-00007.bin +│   │   ├── pytorch_model-00007-of-00007.bin +│   │   ├── pytorch_model.bin.index.json +│   │   ├── README.md +│   │   ├── special_tokens_map.json +│   │   ├── tokenizer_config.json +│   │   └── tokenizer.model +``` -You can automatically download a model from HF using the script `download-model.py`: +In the "Model" tab of the UI, those models can be downloaded from Hugging Face. You can also download them from the command-line with `python download-model.py organization/model`. - python download-model.py organization/model +* GGML models are a single file and can be placed directly into `models`. Example: -For example: +``` +text-generation-webui/ +├── models +│   ├── llama-13b.ggmlv3.q4_K_M.bin +``` - python download-model.py facebook/opt-1.3b - -To download a protected model, set env vars `HF_USER` and `HF_PASS` to your Hugging Face username and password (or [User Access Token](https://huggingface.co/settings/tokens)). The model's terms must first be accepted on the HF website. - -#### GGML models - -You can drop these directly into the `models/` folder, making sure that the file name contains `ggml` somewhere and ends in `.bin`. +Those models have to be downloaded manually and placed into that folder. #### GPT-4chan @@ -354,5 +365,5 @@ If you would like to contribute to the project, check out the [Contributing guid ## Community -* Subreddit: https://www.reddit.com/r/oobaboogazz/ +* Subreddit: https://www.reddit.com/r/oobabooga/ * Discord: https://discord.gg/jwZCF2dPQN From 7089b2a48f98266556b85bb756ffcefab233e97e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:16:21 -0700 Subject: [PATCH 093/169] Update README --- README.md | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f1af6519..8b6bd281 100644 --- a/README.md +++ b/README.md @@ -124,12 +124,11 @@ Models should be placed in the `text-generation-webui/models` folder. They are u * Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example: ``` -text-generation-webui/ +text-generation-webui ├── models │   ├── lmsys_vicuna-33b-v1.3 │   │   ├── config.json │   │   ├── generation_config.json -│   │   ├── huggingface-metadata.txt │   │   ├── pytorch_model-00001-of-00007.bin │   │   ├── pytorch_model-00002-of-00007.bin │   │   ├── pytorch_model-00003-of-00007.bin @@ -138,15 +137,14 @@ text-generation-webui/ │   │   ├── pytorch_model-00006-of-00007.bin │   │   ├── pytorch_model-00007-of-00007.bin │   │   ├── pytorch_model.bin.index.json -│   │   ├── README.md │   │   ├── special_tokens_map.json │   │   ├── tokenizer_config.json │   │   └── tokenizer.model ``` -In the "Model" tab of the UI, those models can be downloaded from Hugging Face. You can also download them from the command-line with `python download-model.py organization/model`. +In the "Model" tab of the UI, those models can be directly downloaded from Hugging Face. You can also download them from the command-line with `python download-model.py organization/model`. -* GGML models are a single file and can be placed directly into `models`. Example: +* GGML models are a single file and should be placed directly into `models`. Example: ``` text-generation-webui/ @@ -154,7 +152,7 @@ text-generation-webui/ │   ├── llama-13b.ggmlv3.q4_K_M.bin ``` -Those models have to be downloaded manually and placed into that folder. +These models have to be downloaded manually and are not supported by the automated downloaders yet. #### GPT-4chan From a03a70bed63758ee77fbbd10e4ccefaa247b8c88 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:20:59 -0700 Subject: [PATCH 094/169] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 8b6bd281..fcb30e95 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ text-generation-webui │   │   └── tokenizer.model ``` -In the "Model" tab of the UI, those models can be directly downloaded from Hugging Face. You can also download them from the command-line with `python download-model.py organization/model`. +In the "Model" tab of the UI, those models can be automatically downloaded from Hugging Face. You can also download them via the command-line with `python download-model.py organization/model`. * GGML models are a single file and should be placed directly into `models`. Example: @@ -152,7 +152,7 @@ text-generation-webui/ │   ├── llama-13b.ggmlv3.q4_K_M.bin ``` -These models have to be downloaded manually and are not supported by the automated downloaders yet. +Those models must be downloaded manually, as they are not currently supported by the automated downloader. #### GPT-4chan From 87dd85b7190cc26730bae3a86de5f5faaed6be7d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 15 Aug 2023 12:21:50 -0700 Subject: [PATCH 095/169] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index fcb30e95..1b862a7d 100644 --- a/README.md +++ b/README.md @@ -147,7 +147,7 @@ In the "Model" tab of the UI, those models can be automatically downloaded from * GGML models are a single file and should be placed directly into `models`. Example: ``` -text-generation-webui/ +text-generation-webui ├── models │   ├── llama-13b.ggmlv3.q4_K_M.bin ``` From 32ff3da941d3e9d7c603cff88db645cf76647a32 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 15 Aug 2023 17:16:24 -0300 Subject: [PATCH 096/169] Update ancient screenshots --- README.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1b862a7d..18f6d73f 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,9 @@ A Gradio web UI for Large Language Models. Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation. -|![Image1](https://github.com/oobabooga/screenshots/raw/main/qa.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/cai3.png) | +|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) | |:---:|:---:| -|![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png) | ![Image4](https://github.com/oobabooga/screenshots/raw/main/galactica.png) | +|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) | ## Features @@ -178,7 +178,10 @@ After downloading the model, follow these steps: python download-model.py EleutherAI/gpt-j-6B --text-only ``` -When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format. +When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format: + +![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png) + ## Starting the web UI From 2a292082240527ba5c065e941811d82082b48b11 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 16 Aug 2023 02:39:58 -0300 Subject: [PATCH 097/169] Add a "Show controls" button to chat UI (#3590) --- css/main.css | 37 +++++++++++++++++++++++++----------- extensions/gallery/script.js | 17 ++++++++++++++--- js/show_controls.js | 18 ++++++++++++++++++ modules/ui.py | 2 ++ modules/ui_chat.py | 8 ++++++-- 5 files changed, 66 insertions(+), 16 deletions(-) create mode 100644 js/show_controls.js diff --git a/css/main.css b/css/main.css index 5432a9db..e82a8c61 100644 --- a/css/main.css +++ b/css/main.css @@ -101,15 +101,15 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { } .textbox_default textarea { - height: calc(100vh - 310px); + height: calc(100dvh - 310px); } .textbox_default_output textarea { - height: calc(100vh - 190px); + height: calc(100dvh - 190px); } .textbox textarea { - height: calc(100vh - 241px); + height: calc(100dvh - 241px); } .textbox_default textarea, .textbox_default_output textarea, .textbox textarea { @@ -123,7 +123,7 @@ div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * { @media screen and (max-width: 711px) { .textbox_default textarea { - height: calc(100vh - 275px); + height: calc(100dvh - 295px); } } @@ -218,10 +218,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { opacity: 1; } -#gradio-chatbot { - height: 66.67vh; -} - .wrap.svelte-6roggh.svelte-6roggh { max-height: 92.5%; } @@ -246,8 +242,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding: 0px; } - .chat { - height: calc(100vh - 274px) !important; + #chat { + height: calc(100dvh - 272px) !important; + } + + .bigchat #chat { + height: calc(100dvh - 180px) !important; } } @@ -255,7 +255,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { margin-left: auto; margin-right: auto; max-width: 800px; - height: calc(100vh - 286px); + height: 100%; overflow-y: auto; padding-right: 20px; display: flex; @@ -265,6 +265,21 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-top: 1px; } +#chat { + height: calc(100dvh - 286px); +} + +.bigchat #chat { + height: calc(100dvh - 200px); +} + + +#show-controls { + position: absolute; + background-color: transparent; + left: calc(100% - 140px); +} + .chat > .messages { display: flex; flex-direction: column; diff --git a/extensions/gallery/script.js b/extensions/gallery/script.js index 23acfbf6..4203f0dd 100644 --- a/extensions/gallery/script.js +++ b/extensions/gallery/script.js @@ -1,23 +1,34 @@ let gallery_element = document.getElementById('gallery-extension'); +let chat_mode_element = document.getElementById('chat-mode'); + let extensions_block = gallery_element.parentElement; let extensions_block_size = extensions_block.childNodes.length; let gallery_only = (extensions_block_size == 5); main_parent.addEventListener('click', function(e) { let chat_visible = (chat_tab.offsetHeight > 0 && chat_tab.offsetWidth > 0); + let chat_mode_visible = (chat_mode_element.offsetHeight > 0 && chat_mode_element.offsetWidth > 0); let notebook_visible = (notebook_tab.offsetHeight > 0 && notebook_tab.offsetWidth > 0); let default_visible = (default_tab.offsetHeight > 0 && default_tab.offsetWidth > 0); // Only show this extension in the Chat tab if (chat_visible) { - gallery_element.style.display = 'block'; - if (gallery_only) { - extensions_block.style.display = ''; + if (chat_mode_visible) { + gallery_element.style.display = 'block'; + if (gallery_only) { + extensions_block.style.display = ''; + } + } else { + gallery_element.style.display = 'none'; + extensions_block.style.display = 'none'; } } else { gallery_element.style.display = 'none'; if (gallery_only) { extensions_block.style.display = 'none'; } + else { + extensions_block.style.display = ''; + } } }); diff --git a/js/show_controls.js b/js/show_controls.js new file mode 100644 index 00000000..83bb6c02 --- /dev/null +++ b/js/show_controls.js @@ -0,0 +1,18 @@ +const belowChatInput = document.querySelectorAll("#chat-tab > div > :nth-child(n+3), #extensions"); +const chatParent = document.getElementById("chat").parentNode; + +function toggle_controls(value) { + if (value) { + belowChatInput.forEach(element => { + element.style.display = "inherit"; + }); + + chatParent.classList.remove("bigchat"); + } else { + belowChatInput.forEach(element => { + element.style.display = "none"; + }); + + chatParent.classList.add("bigchat"); + } +} diff --git a/modules/ui.py b/modules/ui.py index 682d74f9..94bfe4a6 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -16,6 +16,8 @@ with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f: save_files_js = f.read() with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f: switch_tabs_js = f.read() +with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f: + show_controls_js = f.read() refresh_symbol = '🔄' delete_symbol = '🗑️' diff --git a/modules/ui_chat.py b/modules/ui_chat.py index a3a4ccf0..aec7a2ad 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -22,7 +22,9 @@ def create_ui(): with gr.Tab('Chat', elem_id='chat-tab'): shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, shared.settings['name1'], shared.settings['name2'], 'chat', 'cai-chat')) - shared.gradio['textbox'] = gr.Textbox(label='Input') + + shared.gradio['textbox'] = gr.Textbox(label='Input', elem_id='chat-input') + shared.gradio['show-controls'] = gr.Checkbox(value=True, label='Show controls', elem_id='show-controls') with gr.Row(): shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate', variant='primary') @@ -48,7 +50,7 @@ def create_ui(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with']) with gr.Row(): - shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'instruct', 'chat-instruct'] else 'chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.') + shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'instruct', 'chat-instruct'] else 'chat', label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template selected under Parameters > Instruction template must match the current model.', elem_id='chat-mode') shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct') @@ -125,6 +127,8 @@ def create_event_handlers(): shared.input_params = gradio(inputs) shared.reload_inputs = gradio(reload_arr) + shared.gradio['show-controls'].change(None, gradio('show-controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( From 73d9befb65b26a016bc142794abbaf823b7e59fa Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 16 Aug 2023 07:03:53 -0700 Subject: [PATCH 098/169] Make "Show controls" customizable through settings.yaml --- css/main.css | 1 - modules/shared.py | 1 + modules/ui.py | 3 ++- modules/ui_chat.py | 8 ++++---- modules/ui_session.py | 2 +- server.py | 1 + settings-template.yaml | 1 + 7 files changed, 10 insertions(+), 7 deletions(-) diff --git a/css/main.css b/css/main.css index e82a8c61..0305efb4 100644 --- a/css/main.css +++ b/css/main.css @@ -273,7 +273,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { height: calc(100dvh - 200px); } - #show-controls { position: absolute; background-color: transparent; diff --git a/modules/shared.py b/modules/shared.py index fa1a0a3b..88aa8cf2 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -29,6 +29,7 @@ session_is_loading = False # UI defaults settings = { 'dark_theme': True, + 'show_controls': True, 'start_with': '', 'mode': 'chat', 'chat_style': 'TheEncrypted777', diff --git a/modules/ui.py b/modules/ui.py index 94bfe4a6..a99af375 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -178,7 +178,7 @@ def apply_interface_values(state, use_persistent=False): return [state[k] if k in state else gr.update() for k in elements] -def save_settings(state, preset, instruction_template, extensions): +def save_settings(state, preset, instruction_template, extensions, show_controls): output = copy.deepcopy(shared.settings) exclude = ['name1', 'name2', 'greeting', 'context', 'turn_template'] for k in state: @@ -192,6 +192,7 @@ def save_settings(state, preset, instruction_template, extensions): output['instruction_template'] = instruction_template output['default_extensions'] = extensions output['seed'] = int(output['seed']) + output['show_controls'] = show_controls return yaml.dump(output, sort_keys=False, width=float("inf")) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index aec7a2ad..b356f8c6 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -22,9 +22,9 @@ def create_ui(): with gr.Tab('Chat', elem_id='chat-tab'): shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, shared.settings['name1'], shared.settings['name2'], 'chat', 'cai-chat')) - shared.gradio['textbox'] = gr.Textbox(label='Input', elem_id='chat-input') - shared.gradio['show-controls'] = gr.Checkbox(value=True, label='Show controls', elem_id='show-controls') + shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls', elem_id='show-controls') + with gr.Row(): shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop') shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate', variant='primary') @@ -127,8 +127,6 @@ def create_event_handlers(): shared.input_params = gradio(inputs) shared.reload_inputs = gradio(reload_arr) - shared.gradio['show-controls'].change(None, gradio('show-controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') - shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( @@ -276,3 +274,5 @@ def create_event_handlers(): shared.gradio['send_instruction_to_negative_prompt'].click( prompts.load_instruction_prompt_simple, gradio('instruction_template'), gradio('negative_prompt')).then( lambda: None, None, None, _js=f'() => {{{ui.switch_tabs_js}; switch_to_generation_parameters()}}') + + shared.gradio['show_controls'].change(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') diff --git a/modules/ui_session.py b/modules/ui_session.py index b774a207..537a31f2 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -41,7 +41,7 @@ def create_ui(): shared.gradio['toggle_dark_mode'].click(lambda: None, None, None, _js='() => {document.getElementsByTagName("body")[0].classList.toggle("dark")}') shared.gradio['save_settings'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - ui.save_settings, gradio('interface_state', 'preset_menu', 'instruction_template', 'extensions_menu'), gradio('save_contents')).then( + ui.save_settings, gradio('interface_state', 'preset_menu', 'instruction_template', 'extensions_menu', 'show_controls'), gradio('save_contents')).then( lambda: './', None, gradio('save_root')).then( lambda: 'settings.yaml', None, gradio('save_filename')).then( lambda: gr.update(visible=True), None, gradio('file_saver')) diff --git a/server.py b/server.py index d90453a0..ef213a87 100644 --- a/server.py +++ b/server.py @@ -128,6 +128,7 @@ def create_interface(): shared.gradio['interface'].load(lambda: None, None, None, _js="() => document.getElementsByTagName('body')[0].classList.add('dark')") shared.gradio['interface'].load(lambda: None, None, None, _js=f"() => {{{js}}}") + shared.gradio['interface'].load(None, gradio('show_controls'), None, _js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') shared.gradio['interface'].load(partial(ui.apply_interface_values, {}, use_persistent=True), None, gradio(ui.list_interface_input_elements()), show_progress=False) shared.gradio['interface'].load(chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display')) diff --git a/settings-template.yaml b/settings-template.yaml index 11cd1185..b2526df1 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -1,4 +1,5 @@ dark_theme: true +show_controls: true start_with: '' mode: chat chat_style: TheEncrypted777 From 7966989667b4ad88cae3e760fdc759a634ce5478 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 16 Aug 2023 07:25:59 -0700 Subject: [PATCH 099/169] Minor CSS fix --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 0305efb4..d301e10e 100644 --- a/css/main.css +++ b/css/main.css @@ -266,7 +266,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #chat { - height: calc(100dvh - 286px); + height: calc(100dvh - 284px); } .bigchat #chat { From a4e903e932c6b3b43b2ccb88f9e75049b2ac4b2e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 16 Aug 2023 09:23:29 -0700 Subject: [PATCH 100/169] Escape HTML in chat messages --- modules/chat.py | 25 +++++++++++++------------ modules/html_generator.py | 2 ++ 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index d83e9490..d81d254f 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1,6 +1,7 @@ import base64 import copy import functools +import html import json import re from pathlib import Path @@ -188,15 +189,16 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess yield output return - # Defining some variables just_started = True visible_text = None stopping_strings = get_stopping_strings(state) is_stream = state['stream'] - # Preparing the input + # Prepare the input if not any((regenerate, _continue)): - visible_text = text + visible_text = html.escape(text) + + # Apply extensions text, visible_text = apply_extensions('chat_input', text, visible_text, state) text = apply_extensions('input', text, state, is_chat=True) @@ -208,6 +210,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if regenerate: output['visible'].pop() output['internal'].pop() + # *Is typing...* if loading_message: yield {'visible': output['visible'] + [[visible_text, shared.processing_message]], 'internal': output['internal']} @@ -216,12 +219,11 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if loading_message: yield {'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']], 'internal': output['internal']} - # Generating the prompt + # Generate the prompt kwargs = { '_continue': _continue, 'history': output, } - prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs) if prompt is None: prompt = generate_chat_prompt(text, state, **kwargs) @@ -232,9 +234,8 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess # Extract the reply visible_reply = re.sub("(||{{user}})", state['name1'], reply) + visible_reply = html.escape(visible_reply) - # We need this global variable to handle the Stop event, - # otherwise gradio gets confused if shared.stop_everything: output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output @@ -307,8 +308,8 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): def remove_last_message(history): if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>': - last = history['visible'].pop() - history['internal'].pop() + last = history['internal'].pop() + history['visible'].pop() else: last = ['', ''] @@ -328,7 +329,7 @@ def replace_last_reply(text, state): if len(text.strip()) == 0: return history elif len(history['visible']) > 0: - history['visible'][-1][1] = text + history['visible'][-1][1] = html.escape(text) history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) return history @@ -336,7 +337,7 @@ def replace_last_reply(text, state): def send_dummy_message(text, state): history = state['history'] - history['visible'].append([text, '']) + history['visible'].append([html.escape(text), '']) history['internal'].append([apply_extensions('input', text, state, is_chat=True), '']) return history @@ -347,7 +348,7 @@ def send_dummy_reply(text, state): history['visible'].append(['', '']) history['internal'].append(['', '']) - history['visible'][-1][1] = text + history['visible'][-1][1] = html.escape(text) history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) return history diff --git a/modules/html_generator.py b/modules/html_generator.py index eb1da374..3d9f758b 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -1,3 +1,4 @@ +import html import os import re import time @@ -85,6 +86,7 @@ def convert_to_markdown(string): def generate_basic_html(string): + string = html.escape(string) string = convert_to_markdown(string) string = f'
    {string}
    ' return string From 300219b0816a86eafedda0ae285e30390a28b629 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 16 Aug 2023 09:35:10 -0700 Subject: [PATCH 101/169] Fix