Merge branch 'main' into catalpaaa-lora-and-model-dir

This commit is contained in:
oobabooga 2023-03-27 23:16:44 -03:00
commit fde92048af
20 changed files with 208 additions and 114 deletions

30
.gitignore vendored
View File

@ -1,26 +1,22 @@
cache/* cache
characters/* characters
extensions/silero_tts/outputs/* training/datasets
extensions/elevenlabs_tts/outputs/* extensions/silero_tts/outputs
extensions/sd_api_pictures/outputs/* extensions/elevenlabs_tts/outputs
logs/* extensions/sd_api_pictures/outputs
loras/* logs
models/* loras
softprompts/* models
torch-dumps/* softprompts
torch-dumps
*pycache* *pycache*
*/*pycache* */*pycache*
*/*/pycache* */*/pycache*
venv/ venv/
.venv/ .venv/
repositories
settings.json settings.json
img_bot* img_bot*
img_me* img_me*
prompts/[0-9]*
!characters/Example.json
!characters/Example.png
!loras/place-your-loras-here.txt
!models/place-your-models-here.txt
!softprompts/place-your-softprompts-here.txt
!torch-dumps/place-your-pt-models-here.txt

View File

@ -176,10 +176,10 @@ Optionally, you can use the following command-line flags:
| `--cai-chat` | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. | | `--cai-chat` | Launch the web UI in chat mode with a style similar to Character.AI's. If the file `img_bot.png` or `img_bot.jpg` exists in the same folder as server.py, this image will be used as the bot's profile picture. Similarly, `img_me.png` or `img_me.jpg` will be used as your profile picture. |
| `--cpu` | Use the CPU to generate text.| | `--cpu` | Use the CPU to generate text.|
| `--load-in-8bit` | Load the model with 8-bit precision.| | `--load-in-8bit` | Load the model with 8-bit precision.|
| `--load-in-4bit` | DEPRECATED: use `--gptq-bits 4` instead. | | `--wbits WBITS` | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
| `--gptq-bits GPTQ_BITS` | GPTQ: Load a pre-quantized model with specified precision. 2, 3, 4 and 8 (bit) are supported. Currently only works with LLaMA and OPT. | | `--model_type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported. |
| `--gptq-model-type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently only LLaMa and OPT are supported. | | `--groupsize GROUPSIZE` | GPTQ: Group size. |
| `--gptq-pre-layer GPTQ_PRE_LAYER` | GPTQ: The number of layers to preload. | | `--pre_layer PRE_LAYER` | GPTQ: The number of layers to preload. |
| `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. | | `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
| `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.| | `--auto-devices` | Automatically split the model across the available GPU(s) and CPU.|
| `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. | | `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |

View File

@ -23,3 +23,9 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
.pending.svelte-1ed2p3z { .pending.svelte-1ed2p3z {
opacity: 1; opacity: 1;
} }
#extensions {
padding: 0;
padding: 0;
}

View File

@ -37,12 +37,6 @@
text-decoration: none !important; text-decoration: none !important;
} }
svg {
display: unset !important;
vertical-align: middle !important;
margin: 5px;
}
ol li p, ul li p { ol li p, ul li p {
display: inline-block; display: inline-block;
} }
@ -54,3 +48,18 @@ ol li p, ul li p {
.gradio-container-3-18-0 .prose * h1, h2, h3, h4 { .gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
color: white; color: white;
} }
.gradio-container {
max-width: 100% !important;
padding-top: 0 !important;
}
#extensions {
padding: 15px;
padding: 15px;
}
span.math.inline {
font-size: 27px;
vertical-align: baseline !important;
}

View File

@ -11,7 +11,7 @@ let extensions = document.getElementById('extensions');
main_parent.addEventListener('click', function(e) { main_parent.addEventListener('click', function(e) {
// Check if the main element is visible // Check if the main element is visible
if (main.offsetHeight > 0 && main.offsetWidth > 0) { if (main.offsetHeight > 0 && main.offsetWidth > 0) {
extensions.style.display = 'block'; extensions.style.display = 'flex';
} else { } else {
extensions.style.display = 'none'; extensions.style.display = 'none';
} }

View File

@ -118,7 +118,7 @@ def get_download_links_from_huggingface(model, branch):
is_safetensors = re.match("model.*\.safetensors", fname) is_safetensors = re.match("model.*\.safetensors", fname)
is_pt = re.match(".*\.pt", fname) is_pt = re.match(".*\.pt", fname)
is_tokenizer = re.match("tokenizer.*\.model", fname) is_tokenizer = re.match("tokenizer.*\.model", fname)
is_text = re.match(".*\.(txt|json|py)", fname) or is_tokenizer is_text = re.match(".*\.(txt|json|py|md)", fname) or is_tokenizer
if any((is_pytorch, is_safetensors, is_pt, is_tokenizer, is_text)): if any((is_pytorch, is_safetensors, is_pt, is_tokenizer, is_text)):
if is_text: if is_text:

View File

@ -26,6 +26,7 @@ current_params = params.copy()
voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115'] voices_by_gender = ['en_99', 'en_45', 'en_18', 'en_117', 'en_49', 'en_51', 'en_68', 'en_0', 'en_26', 'en_56', 'en_74', 'en_5', 'en_38', 'en_53', 'en_21', 'en_37', 'en_107', 'en_10', 'en_82', 'en_16', 'en_41', 'en_12', 'en_67', 'en_61', 'en_14', 'en_11', 'en_39', 'en_52', 'en_24', 'en_97', 'en_28', 'en_72', 'en_94', 'en_36', 'en_4', 'en_43', 'en_88', 'en_25', 'en_65', 'en_6', 'en_44', 'en_75', 'en_91', 'en_60', 'en_109', 'en_85', 'en_101', 'en_108', 'en_50', 'en_96', 'en_64', 'en_92', 'en_76', 'en_33', 'en_116', 'en_48', 'en_98', 'en_86', 'en_62', 'en_54', 'en_95', 'en_55', 'en_111', 'en_3', 'en_83', 'en_8', 'en_47', 'en_59', 'en_1', 'en_2', 'en_7', 'en_9', 'en_13', 'en_15', 'en_17', 'en_19', 'en_20', 'en_22', 'en_23', 'en_27', 'en_29', 'en_30', 'en_31', 'en_32', 'en_34', 'en_35', 'en_40', 'en_42', 'en_46', 'en_57', 'en_58', 'en_63', 'en_66', 'en_69', 'en_70', 'en_71', 'en_73', 'en_77', 'en_78', 'en_79', 'en_80', 'en_81', 'en_84', 'en_87', 'en_89', 'en_90', 'en_93', 'en_100', 'en_102', 'en_103', 'en_104', 'en_105', 'en_106', 'en_110', 'en_112', 'en_113', 'en_114', 'en_115']
voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high'] voice_pitches = ['x-low', 'low', 'medium', 'high', 'x-high']
voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast'] voice_speeds = ['x-slow', 'slow', 'medium', 'fast', 'x-fast']
streaming_state = shared.args.no_stream # remember if chat streaming was enabled
# Used for making text xml compatible, needed for voice pitch and speed control # Used for making text xml compatible, needed for voice pitch and speed control
table = str.maketrans({ table = str.maketrans({
@ -77,6 +78,7 @@ def input_modifier(string):
shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')] shared.history['visible'][-1] = [shared.history['visible'][-1][0], shared.history['visible'][-1][1].replace('controls autoplay>','controls>')]
shared.processing_message = "*Is recording a voice message...*" shared.processing_message = "*Is recording a voice message...*"
shared.args.no_stream = True # Disable streaming cause otherwise the audio output will stutter and begin anew every time the message is being updated
return string return string
def output_modifier(string): def output_modifier(string):
@ -84,7 +86,7 @@ def output_modifier(string):
This function is applied to the model outputs. This function is applied to the model outputs.
""" """
global model, current_params global model, current_params, streaming_state
for i in params: for i in params:
if params[i] != current_params[i]: if params[i] != current_params[i]:
@ -116,6 +118,7 @@ def output_modifier(string):
string += f'\n\n{original_string}' string += f'\n\n{original_string}'
shared.processing_message = "*Is typing...*" shared.processing_message = "*Is typing...*"
shared.args.no_stream = streaming_state # restore the streaming option to the previous value
return string return string
def bot_prefix_modifier(string): def bot_prefix_modifier(string):

View File

@ -14,18 +14,21 @@ import opt
def load_quantized(model_name): def load_quantized(model_name):
if not shared.args.gptq_model_type: if not shared.args.model_type:
# Try to determine model type from model name # Try to determine model type from model name
model_type = model_name.split('-')[0].lower() if model_name.lower().startswith(('llama', 'alpaca')):
if model_type not in ('llama', 'opt'): model_type = 'llama'
print("Can't determine model type from model name. Please specify it manually using --gptq-model-type " elif model_name.lower().startswith(('opt', 'galactica')):
model_type = 'opt'
else:
print("Can't determine model type from model name. Please specify it manually using --model_type "
"argument") "argument")
exit() exit()
else: else:
model_type = shared.args.gptq_model_type.lower() model_type = shared.args.model_type.lower()
if model_type == 'llama': if model_type == 'llama':
if not shared.args.gptq_pre_layer: if not shared.args.pre_layer:
load_quant = llama.load_quant load_quant = llama.load_quant
else: else:
load_quant = llama_inference_offload.load_quant load_quant = llama_inference_offload.load_quant
@ -35,35 +38,44 @@ def load_quantized(model_name):
print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported") print("Unknown pre-quantized model type specified. Only 'llama' and 'opt' are supported")
exit() exit()
# Now we are going to try to locate the quantized model file.
path_to_model = Path(f'models/{model_name}') path_to_model = Path(f'models/{model_name}')
if path_to_model.name.lower().startswith('llama-7b'): found_pts = list(path_to_model.glob("*.pt"))
pt_model = f'llama-7b-{shared.args.gptq_bits}bit' found_safetensors = list(path_to_model.glob("*.safetensors"))
elif path_to_model.name.lower().startswith('llama-13b'):
pt_model = f'llama-13b-{shared.args.gptq_bits}bit'
elif path_to_model.name.lower().startswith('llama-30b'):
pt_model = f'llama-30b-{shared.args.gptq_bits}bit'
elif path_to_model.name.lower().startswith('llama-65b'):
pt_model = f'llama-65b-{shared.args.gptq_bits}bit'
else:
pt_model = f'{model_name}-{shared.args.gptq_bits}bit'
# Try to find the .safetensors or .pt both in models/ and in the subfolder
pt_path = None pt_path = None
for path in [Path(p+ext) for ext in ['.safetensors', '.pt'] for p in [f"models/{pt_model}", f"{path_to_model}/{pt_model}"]]:
if path.exists(): if len(found_pts) == 1:
print(f"Found {path}") pt_path = found_pts[0]
pt_path = path elif len(found_safetensors) == 1:
break pt_path = found_safetensors[0]
else:
if path_to_model.name.lower().startswith('llama-7b'):
pt_model = f'llama-7b-{shared.args.wbits}bit'
elif path_to_model.name.lower().startswith('llama-13b'):
pt_model = f'llama-13b-{shared.args.wbits}bit'
elif path_to_model.name.lower().startswith('llama-30b'):
pt_model = f'llama-30b-{shared.args.wbits}bit'
elif path_to_model.name.lower().startswith('llama-65b'):
pt_model = f'llama-65b-{shared.args.wbits}bit'
else:
pt_model = f'{model_name}-{shared.args.wbits}bit'
# Try to find the .safetensors or .pt both in models/ and in the subfolder
for path in [Path(p+ext) for ext in ['.safetensors', '.pt'] for p in [f"models/{pt_model}", f"{path_to_model}/{pt_model}"]]:
if path.exists():
print(f"Found {path}")
pt_path = path
break
if not pt_path: if not pt_path:
print(f"Could not find {pt_model}, exiting...") print("Could not find the quantized model in .pt or .safetensors format, exiting...")
exit() exit()
# qwopqwop200's offload # qwopqwop200's offload
if shared.args.gptq_pre_layer: if shared.args.pre_layer:
model = load_quant(str(path_to_model), str(pt_path), shared.args.gptq_bits, shared.args.gptq_pre_layer) model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize, shared.args.pre_layer)
else: else:
model = load_quant(str(path_to_model), str(pt_path), shared.args.gptq_bits) model = load_quant(str(path_to_model), str(pt_path), shared.args.wbits, shared.args.groupsize)
# accelerate offload (doesn't work properly) # accelerate offload (doesn't work properly)
if shared.args.gpu_memory: if shared.args.gpu_memory:

View File

@ -18,11 +18,11 @@ def add_lora_to_model(lora_name):
# If a LoRA had been previously loaded, or if we want # If a LoRA had been previously loaded, or if we want
# to unload a LoRA, reload the model # to unload a LoRA, reload the model
if shared.lora_name != "None" or lora_name == "None": if shared.lora_name not in ['None', ''] or lora_name in ['None', '']:
reload_model() reload_model()
shared.lora_name = lora_name shared.lora_name = lora_name
if lora_name != "None": if lora_name not in ['None', '']:
print(f"Adding the LoRA {lora_name} to the model...") print(f"Adding the LoRA {lora_name} to the model...")
params = {} params = {}
if not shared.args.cpu: if not shared.args.cpu:

View File

@ -25,7 +25,7 @@ class _SentinelTokenStoppingCriteria(transformers.StoppingCriteria):
if trimmed_sample.shape[-1] < self.sentinel_token_ids[i].shape[-1]: if trimmed_sample.shape[-1] < self.sentinel_token_ids[i].shape[-1]:
continue continue
for window in trimmed_sample.unfold(0, self.sentinel_token_ids[i].shape[-1], 1): for window in trimmed_sample.unfold(0, self.sentinel_token_ids[i].shape[-1], 1):
if torch.all(torch.eq(self.sentinel_token_ids[i], window)): if torch.all(torch.eq(self.sentinel_token_ids[i][0], window)):
return True return True
return False return False
@ -54,7 +54,7 @@ class Iteratorize:
self.stop_now = False self.stop_now = False
def _callback(val): def _callback(val):
if self.stop_now: if self.stop_now or shared.stop_everything:
raise ValueError raise ValueError
self.q.put(val) self.q.put(val)

View File

@ -80,11 +80,7 @@ def extract_message_from_reply(reply, name1, name2, check):
reply = fix_newlines(reply) reply = fix_newlines(reply)
return reply, next_character_found return reply, next_character_found
def stop_everything_event():
shared.stop_everything = True
def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False): def chatbot_wrapper(text, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, name1, name2, context, check, chat_prompt_size, chat_generation_attempts=1, regenerate=False):
shared.stop_everything = False
just_started = True just_started = True
eos_token = '\n' if check else None eos_token = '\n' if check else None
name1_original = name1 name1_original = name1

View File

@ -63,8 +63,8 @@ def create_extensions_block():
# Creating the extension ui elements # Creating the extension ui elements
if should_display_ui: if should_display_ui:
with gr.Box(elem_id="extensions"): with gr.Column(elem_id="extensions"):
gr.Markdown("Extensions")
for extension, name in iterator(): for extension, name in iterator():
gr.Markdown(f"\n### {name}")
if hasattr(extension, "ui"): if hasattr(extension, "ui"):
extension.ui() extension.ui()

View File

@ -44,7 +44,7 @@ def load_model(model_name):
shared.is_RWKV = model_name.lower().startswith('rwkv-') shared.is_RWKV = model_name.lower().startswith('rwkv-')
# Default settings # Default settings
if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.gptq_bits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]): if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.wbits, shared.args.auto_devices, shared.args.disk, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.deepspeed, shared.args.flexgen, shared.is_RWKV]):
if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')): if any(size in shared.model_name.lower() for size in ('13b', '20b', '30b')):
model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), device_map='auto', load_in_8bit=True) model = AutoModelForCausalLM.from_pretrained(Path(f"{shared.args.model_dir}/{shared.model_name}"), device_map='auto', load_in_8bit=True)
else: else:
@ -95,7 +95,7 @@ def load_model(model_name):
return model, tokenizer return model, tokenizer
# Quantized model # Quantized model
elif shared.args.gptq_bits > 0: elif shared.args.wbits > 0:
from modules.GPTQ_loader import load_quantized from modules.GPTQ_loader import load_quantized
model = load_quantized(model_name) model = load_quantized(model_name)

View File

@ -52,7 +52,8 @@ settings = {
'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
'^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n', '^(gpt4chan|gpt-4chan|4chan)': '-----\n--- 865467536\nInput text\n--- 865467537\n',
'(rosey|chip|joi)_.*_instruct.*': 'User: \n', '(rosey|chip|joi)_.*_instruct.*': 'User: \n',
'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>' 'oasst-*': '<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>',
'alpaca-*': "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n### Instruction:\nWrite a poem about the transformers Python library. \nMention the word \"large language models\" in that poem.\n### Response:\n",
}, },
'lora_prompts': { 'lora_prompts': {
'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:', 'default': 'Common sense questions and answers\n\nQuestion: \nFactual answer:',
@ -78,10 +79,15 @@ parser.add_argument('--chat', action='store_true', help='Launch the web UI in ch
parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.') parser.add_argument('--cai-chat', action='store_true', help='Launch the web UI in chat mode with a style similar to Character.AI\'s. If the file img_bot.png or img_bot.jpg exists in the same folder as server.py, this image will be used as the bot\'s profile picture. Similarly, img_me.png or img_me.jpg will be used as your profile picture.')
parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text.') parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text.')
parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.') parser.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision.')
parser.add_argument('--load-in-4bit', action='store_true', help='DEPRECATED: use --gptq-bits 4 instead.')
parser.add_argument('--gptq-bits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision. 2, 3, 4 and 8bit are supported. Currently only works with LLaMA and OPT.') parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --wbits instead.')
parser.add_argument('--gptq-model-type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMa and OPT are supported.') parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.')
parser.add_argument('--gptq-pre-layer', type=int, default=0, help='GPTQ: The number of layers to preload.') parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.')
parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently only LLaMA and OPT are supported.')
parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.')
parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to preload.')
parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.') parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.') parser.add_argument('--auto-devices', action='store_true', help='Automatically split the model across the available GPU(s) and CPU.')
parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.') parser.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
@ -112,6 +118,8 @@ parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to dire
args = parser.parse_args() args = parser.parse_args()
# Provisional, this will be deleted later # Provisional, this will be deleted later
if args.load_in_4bit: deprecated_dict = {'gptq_bits': ['wbits', 0], 'gptq_model_type': ['model_type', None], 'gptq_pre_layer': ['prelayer', 0]}
print("Warning: --load-in-4bit is deprecated and will be removed. Use --gptq-bits 4 instead.\n") for k in deprecated_dict:
args.gptq_bits = 4 if eval(f"args.{k}") != deprecated_dict[k][1]:
print(f"Warning: --{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.")
exec(f"args.{deprecated_dict[k][0]} = args.{k}")

View File

@ -99,9 +99,13 @@ def set_manual_seed(seed):
if torch.cuda.is_available(): if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed) torch.cuda.manual_seed_all(seed)
def stop_everything_event():
shared.stop_everything = True
def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, eos_token=None, stopping_strings=[]): def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typical_p, repetition_penalty, encoder_repetition_penalty, top_k, min_length, no_repeat_ngram_size, num_beams, penalty_alpha, length_penalty, early_stopping, seed, eos_token=None, stopping_strings=[]):
clear_torch_cache() clear_torch_cache()
set_manual_seed(seed) set_manual_seed(seed)
shared.stop_everything = False
t0 = time.time() t0 = time.time()
original_question = question original_question = question
@ -236,8 +240,6 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
break break
yield formatted_outputs(reply, shared.model_name) yield formatted_outputs(reply, shared.model_name)
yield formatted_outputs(reply, shared.model_name)
# Stream the output naively for FlexGen since it doesn't support 'stopping_criteria' # Stream the output naively for FlexGen since it doesn't support 'stopping_criteria'
else: else:
for i in range(max_new_tokens//8+1): for i in range(max_new_tokens//8+1):
@ -270,5 +272,5 @@ def generate_reply(question, max_new_tokens, do_sample, temperature, top_p, typi
traceback.print_exc() traceback.print_exc()
finally: finally:
t1 = time.time() t1 = time.time()
print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output)-len(original_input_ids[0]))/(t1-t0):.2f} tokens/s, {len(output)-len(original_input_ids[0])} tokens)") print(f"Output generated in {(t1-t0):.2f} seconds ({(len(output)-len(original_input_ids[0]))/(t1-t0):.2f} tokens/s, {len(output)-len(original_input_ids[0])} tokens, context {len(original_input_ids[0])})")
return return

6
prompts/Alpaca.txt Normal file
View File

@ -0,0 +1,6 @@
Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
Write a poem about the transformers Python library.
Mention the word "large language models" in that poem.
### Response:

View File

@ -0,0 +1 @@
<|prompter|>Write a story about future of AI development<|endoftext|><|assistant|>

4
prompts/QA.txt Normal file
View File

@ -0,0 +1,4 @@
Common sense questions and answers
Question:
Factual answer:

View File

@ -1,7 +1,7 @@
accelerate==0.17.1 accelerate==0.17.1
bitsandbytes==0.37.1 bitsandbytes==0.37.1
flexgen==0.1.7 flexgen==0.1.7
gradio==3.18.0 gradio==3.23.0
markdown markdown
numpy numpy
peft==0.2.0 peft==0.2.0

117
server.py
View File

@ -4,6 +4,7 @@ import re
import sys import sys
import time import time
import zipfile import zipfile
from datetime import datetime
from pathlib import Path from pathlib import Path
import gradio as gr import gradio as gr
@ -15,7 +16,8 @@ import modules.ui as ui
from modules.html_generator import generate_chat_html from modules.html_generator import generate_chat_html
from modules.LoRA import add_lora_to_model from modules.LoRA import add_lora_to_model
from modules.models import load_model, load_soft_prompt from modules.models import load_model, load_soft_prompt
from modules.text_generation import clear_torch_cache, generate_reply from modules.text_generation import (clear_torch_cache, generate_reply,
stop_everything_event)
# Loading custom settings # Loading custom settings
settings_file = None settings_file = None
@ -38,6 +40,13 @@ def get_available_models():
def get_available_presets(): def get_available_presets():
return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower) return sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('presets').glob('*.txt'))), key=str.lower)
def get_available_prompts():
prompts = []
prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('[0-9]*.txt'))), key=str.lower, reverse=True)
prompts += sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('prompts').glob('*.txt'))), key=str.lower)
prompts += ['None']
return prompts
def get_available_characters(): def get_available_characters():
return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('characters').glob('*.json'))), key=str.lower) return ['None'] + sorted(set(map(lambda x : '.'.join(str(x.name).split('.')[:-1]), Path('characters').glob('*.json'))), key=str.lower)
@ -50,12 +59,17 @@ def get_available_softprompts():
def get_available_loras(): def get_available_loras():
return ['None'] + sorted([item.name for item in list(Path('shared.args.lora_dir').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) return ['None'] + sorted([item.name for item in list(Path('shared.args.lora_dir').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower)
def unload_model():
shared.model = shared.tokenizer = None
clear_torch_cache()
def load_model_wrapper(selected_model): def load_model_wrapper(selected_model):
if selected_model != shared.model_name: if selected_model != shared.model_name:
shared.model_name = selected_model shared.model_name = selected_model
shared.model = shared.tokenizer = None
clear_torch_cache() unload_model()
shared.model, shared.tokenizer = load_model(shared.model_name) if selected_model != '':
shared.model, shared.tokenizer = load_model(shared.model_name)
return selected_model return selected_model
@ -93,7 +107,7 @@ def load_preset_values(preset_menu, return_dict=False):
if return_dict: if return_dict:
return generate_params return generate_params
else: else:
return preset_menu, generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping'] return generate_params['do_sample'], generate_params['temperature'], generate_params['top_p'], generate_params['typical_p'], generate_params['repetition_penalty'], generate_params['encoder_repetition_penalty'], generate_params['top_k'], generate_params['min_length'], generate_params['no_repeat_ngram_size'], generate_params['num_beams'], generate_params['penalty_alpha'], generate_params['length_penalty'], generate_params['early_stopping']
def upload_soft_prompt(file): def upload_soft_prompt(file):
with zipfile.ZipFile(io.BytesIO(file)) as zf: with zipfile.ZipFile(io.BytesIO(file)) as zf:
@ -118,9 +132,43 @@ def create_model_and_preset_menus():
shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset') shared.gradio['preset_menu'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button') ui.create_refresh_button(shared.gradio['preset_menu'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
def save_prompt(text):
fname = f"{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}.txt"
with open(Path(f'prompts/{fname}'), 'w', encoding='utf-8') as f:
f.write(text)
return f"Saved to prompts/{fname}"
def load_prompt(fname):
if fname in ['None', '']:
return ''
else:
with open(Path(f'prompts/{fname}.txt'), 'r', encoding='utf-8') as f:
return f.read()
def create_prompt_menus():
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['prompt_menu'] = gr.Dropdown(choices=get_available_prompts(), value='None', label='Prompt')
ui.create_refresh_button(shared.gradio['prompt_menu'], lambda : None, lambda : {'choices': get_available_prompts()}, 'refresh-button')
with gr.Column():
with gr.Column():
shared.gradio['save_prompt'] = gr.Button('Save prompt')
shared.gradio['status'] = gr.Markdown('Ready')
shared.gradio['prompt_menu'].change(load_prompt, [shared.gradio['prompt_menu']], [shared.gradio['textbox']], show_progress=False)
shared.gradio['save_prompt'].click(save_prompt, [shared.gradio['textbox']], [shared.gradio['status']], show_progress=False)
def create_settings_menus(default_preset): def create_settings_menus(default_preset):
generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True) generate_params = load_preset_values(default_preset if not shared.args.flexgen else 'Naive', return_dict=True)
with gr.Row():
with gr.Column():
create_model_and_preset_menus()
with gr.Column():
shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)')
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
with gr.Box(): with gr.Box():
@ -151,12 +199,6 @@ def create_settings_menus(default_preset):
shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty') shared.gradio['length_penalty'] = gr.Slider(-5, 5, value=generate_params['length_penalty'], label='length_penalty')
shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping') shared.gradio['early_stopping'] = gr.Checkbox(value=generate_params['early_stopping'], label='early_stopping')
shared.gradio['seed'] = gr.Number(value=-1, label='Seed (-1 for random)')
with gr.Row():
shared.gradio['preset_menu_mirror'] = gr.Dropdown(choices=available_presets, value=default_preset if not shared.args.flexgen else 'Naive', label='Generation parameters preset')
ui.create_refresh_button(shared.gradio['preset_menu_mirror'], lambda : None, lambda : {'choices': get_available_presets()}, 'refresh-button')
with gr.Row(): with gr.Row():
shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA')
ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button') ui.create_refresh_button(shared.gradio['lora_menu'], lambda : None, lambda : {'choices': get_available_loras()}, 'refresh-button')
@ -171,8 +213,7 @@ def create_settings_menus(default_preset):
shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip']) shared.gradio['upload_softprompt'] = gr.File(type='binary', file_types=['.zip'])
shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True) shared.gradio['model_menu'].change(load_model_wrapper, [shared.gradio['model_menu']], [shared.gradio['model_menu']], show_progress=True)
shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['preset_menu_mirror', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']]) shared.gradio['preset_menu'].change(load_preset_values, [shared.gradio['preset_menu']], [shared.gradio[k] for k in ['do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
shared.gradio['preset_menu_mirror'].change(load_preset_values, [shared.gradio['preset_menu_mirror']], [shared.gradio[k] for k in ['preset_menu', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping']])
shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True) shared.gradio['lora_menu'].change(load_lora_wrapper, [shared.gradio['lora_menu']], [shared.gradio['lora_menu'], shared.gradio['textbox']], show_progress=True)
shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True) shared.gradio['softprompts_menu'].change(load_soft_prompt, [shared.gradio['softprompts_menu']], [shared.gradio['softprompts_menu']], show_progress=True)
shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']]) shared.gradio['upload_softprompt'].upload(upload_soft_prompt, [shared.gradio['upload_softprompt']], [shared.gradio['softprompts_menu']])
@ -237,8 +278,9 @@ if shared.args.lora:
# Default UI settings # Default UI settings
default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')] default_preset = shared.settings['presets'][next((k for k in shared.settings['presets'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')] if shared.lora_name != "None":
if default_text == '': default_text = shared.settings['lora_prompts'][next((k for k in shared.settings['lora_prompts'] if re.match(k.lower(), shared.lora_name.lower())), 'default')]
else:
default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')] default_text = shared.settings['prompts'][next((k for k in shared.settings['prompts'] if re.match(k.lower(), shared.model_name.lower())), 'default')]
title ='Text generation web UI' title ='Text generation web UI'
description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n' description = '\n\n# Text generation lab\nGenerate text using Large Language Models.\n'
@ -259,8 +301,8 @@ def create_interface():
shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528")) shared.gradio['display'] = gr.Chatbot(value=shared.history['visible']).style(color_map=("#326efd", "#212528"))
shared.gradio['textbox'] = gr.Textbox(label='Input') shared.gradio['textbox'] = gr.Textbox(label='Input')
with gr.Row(): with gr.Row():
shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
shared.gradio['Generate'] = gr.Button('Generate') shared.gradio['Generate'] = gr.Button('Generate')
shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop")
with gr.Row(): with gr.Row():
shared.gradio['Impersonate'] = gr.Button('Impersonate') shared.gradio['Impersonate'] = gr.Button('Impersonate')
shared.gradio['Regenerate'] = gr.Button('Regenerate') shared.gradio['Regenerate'] = gr.Button('Regenerate')
@ -273,8 +315,6 @@ def create_interface():
shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False) shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False)
shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
create_model_and_preset_menus()
with gr.Tab("Character", elem_id="chat-settings"): with gr.Tab("Character", elem_id="chat-settings"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name') shared.gradio['name1'] = gr.Textbox(value=shared.settings[f'name1{suffix}'], lines=1, label='Your name')
shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name') shared.gradio['name2'] = gr.Textbox(value=shared.settings[f'name2{suffix}'], lines=1, label='Bot\'s name')
@ -327,7 +367,7 @@ def create_interface():
gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['textbox'].submit(eval(function_call), shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['Regenerate'].click(chat.regenerate_wrapper, shared.input_params, shared.gradio['display'], show_progress=shared.args.no_stream))
gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['Impersonate'].click(chat.impersonate_wrapper, shared.input_params, shared.gradio['textbox'], show_progress=shared.args.no_stream))
shared.gradio['Stop'].click(chat.stop_everything_event, [], [], cancels=gen_events, queue=False) shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None)
shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream) shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, [], shared.gradio['textbox'], show_progress=shared.args.no_stream)
shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream) shared.gradio['Replace last reply'].click(chat.replace_last_reply, [shared.gradio['textbox'], shared.gradio['name1'], shared.gradio['name2']], shared.gradio['display'], show_progress=shared.args.no_stream)
@ -368,19 +408,29 @@ def create_interface():
elif shared.args.notebook: elif shared.args.notebook:
with gr.Tab("Text generation", elem_id="main"): with gr.Tab("Text generation", elem_id="main"):
with gr.Tab('Raw'):
shared.gradio['textbox'] = gr.Textbox(value=default_text, lines=25)
with gr.Tab('Markdown'):
shared.gradio['markdown'] = gr.Markdown()
with gr.Tab('HTML'):
shared.gradio['html'] = gr.HTML()
with gr.Row(): with gr.Row():
shared.gradio['Stop'] = gr.Button('Stop') with gr.Column(scale=4):
shared.gradio['Generate'] = gr.Button('Generate') with gr.Tab('Raw'):
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens']) shared.gradio['textbox'] = gr.Textbox(value=default_text, elem_id="textbox", lines=25)
with gr.Tab('Markdown'):
shared.gradio['markdown'] = gr.Markdown()
with gr.Tab('HTML'):
shared.gradio['html'] = gr.HTML()
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['Generate'] = gr.Button('Generate')
shared.gradio['Stop'] = gr.Button('Stop')
with gr.Column():
pass
with gr.Column(scale=1):
gr.HTML('<div style="padding-bottom: 13px"></div>')
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
create_prompt_menus()
create_model_and_preset_menus()
with gr.Tab("Parameters", elem_id="parameters"): with gr.Tab("Parameters", elem_id="parameters"):
create_settings_menus(default_preset) create_settings_menus(default_preset)
@ -388,7 +438,7 @@ def create_interface():
output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']] output_params = [shared.gradio[k] for k in ['textbox', 'markdown', 'html']]
gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen')) gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
shared.gradio['Stop'].click(None, None, None, cancels=gen_events) shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None)
shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
else: else:
@ -404,7 +454,7 @@ def create_interface():
with gr.Column(): with gr.Column():
shared.gradio['Stop'] = gr.Button('Stop') shared.gradio['Stop'] = gr.Button('Stop')
create_model_and_preset_menus() create_prompt_menus()
with gr.Column(): with gr.Column():
with gr.Tab('Raw'): with gr.Tab('Raw'):
@ -413,6 +463,7 @@ def create_interface():
shared.gradio['markdown'] = gr.Markdown() shared.gradio['markdown'] = gr.Markdown()
with gr.Tab('HTML'): with gr.Tab('HTML'):
shared.gradio['html'] = gr.HTML() shared.gradio['html'] = gr.HTML()
with gr.Tab("Parameters", elem_id="parameters"): with gr.Tab("Parameters", elem_id="parameters"):
create_settings_menus(default_preset) create_settings_menus(default_preset)
@ -421,7 +472,7 @@ def create_interface():
gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen')) gen_events.append(shared.gradio['Generate'].click(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream, api_name='textgen'))
gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['textbox'].submit(generate_reply, shared.input_params, output_params, show_progress=shared.args.no_stream))
gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream)) gen_events.append(shared.gradio['Continue'].click(generate_reply, [shared.gradio['output_textbox']] + shared.input_params[1:], output_params, show_progress=shared.args.no_stream))
shared.gradio['Stop'].click(None, None, None, cancels=gen_events) shared.gradio['Stop'].click(stop_everything_event, [], [], queue=False, cancels=gen_events if shared.args.no_stream else None)
shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}") shared.gradio['interface'].load(None, None, None, _js=f"() => {{{ui.main_js}}}")
with gr.Tab("Interface mode", elem_id="interface-mode"): with gr.Tab("Interface mode", elem_id="interface-mode"):