Merge remote-tracking branch 'origin/main' into triton

This commit is contained in:
Light 2023-04-13 19:38:51 +08:00
commit 15d5a043f2
4 changed files with 182 additions and 52 deletions

View File

@ -125,7 +125,7 @@ cp .env.example .env
docker compose up --build docker compose up --build
``` ```
Make sure to edit `.env.example` and set the appropriate CUDA version for your GPU. Make sure to edit `.env.example` and set the appropriate CUDA version for your GPU, which can be found on [developer.nvidia.com](https://developer.nvidia.com/cuda-gpus).
You need to have docker compose v2.17 or higher installed in your system. For installation instructions, see [Docker compose installation](https://github.com/oobabooga/text-generation-webui/wiki/Docker-compose-installation). You need to have docker compose v2.17 or higher installed in your system. For installation instructions, see [Docker compose installation](https://github.com/oobabooga/text-generation-webui/wiki/Docker-compose-installation).
@ -203,6 +203,7 @@ Optionally, you can use the following command-line flags:
| `--lora LORA` | Name of the LoRA to apply to the model by default. | | `--lora LORA` | Name of the LoRA to apply to the model by default. |
| `--model-dir MODEL_DIR` | Path to directory with all the models. | | `--model-dir MODEL_DIR` | Path to directory with all the models. |
| `--lora-dir LORA_DIR` | Path to directory with all the loras. | | `--lora-dir LORA_DIR` | Path to directory with all the loras. |
| `--model-menu` | Show a model menu in the terminal when the web UI is first launched. |
| `--no-stream` | Don't stream the text output in real time. | | `--no-stream` | Don't stream the text output in real time. |
| `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag. | | `--settings SETTINGS_FILE` | Load the default interface settings from this json file. See `settings-template.json` for an example. If you create a file called `settings.json`, this file will be loaded by default without the need to use the `--settings` flag. |
| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. | | `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |

View File

@ -32,7 +32,7 @@ settings = {
'name1': 'You', 'name1': 'You',
'name2': 'Assistant', 'name2': 'Assistant',
'context': 'This is a conversation with your Assistant. The Assistant is very helpful and is eager to chat with you and answer your questions.', 'context': 'This is a conversation with your Assistant. The Assistant is very helpful and is eager to chat with you and answer your questions.',
'greeting': 'Hello there!', 'greeting': '',
'end_of_turn': '', 'end_of_turn': '',
'custom_stopping_strings': '', 'custom_stopping_strings': '',
'stop_at_newline': False, 'stop_at_newline': False,
@ -41,6 +41,7 @@ settings = {
'truncation_length': 2048, 'truncation_length': 2048,
'truncation_length_min': 0, 'truncation_length_min': 0,
'truncation_length_max': 4096, 'truncation_length_max': 4096,
'mode': 'cai-chat',
'chat_prompt_size': 2048, 'chat_prompt_size': 2048,
'chat_prompt_size_min': 0, 'chat_prompt_size_min': 0,
'chat_prompt_size_max': 2048, 'chat_prompt_size_max': 2048,
@ -89,6 +90,7 @@ parser.add_argument('--model', type=str, help='Name of the model to load by defa
parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.') parser.add_argument('--lora', type=str, help='Name of the LoRA to apply to the model by default.')
parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models") parser.add_argument("--model-dir", type=str, default='models/', help="Path to directory with all the models")
parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras") parser.add_argument("--lora-dir", type=str, default='loras/', help="Path to directory with all the loras")
parser.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.')
parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.') parser.add_argument('--no-stream', action='store_true', help='Don\'t stream the text output in real time.')
parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.') parser.add_argument('--settings', type=str, help='Load the default interface settings from this json file. See settings-template.json for an example. If you create a file called settings.json, this file will be loaded by default without the need to use the --settings flag.')
parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') parser.add_argument('--extensions', type=str, nargs="+", help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
@ -116,9 +118,6 @@ parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quan
parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.') parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.')
parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models.') parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models.')
parser.add_argument('--warmup_autotune', action=argparse.BooleanOptionalAction, default=True, help='GPTQ: Enable warmup autotune. Only usable for triton.') parser.add_argument('--warmup_autotune', action=argparse.BooleanOptionalAction, default=True, help='GPTQ: Enable warmup autotune. Only usable for triton.')
parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --wbits instead.')
parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.')
parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.')
# FlexGen # FlexGen
parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.') parser.add_argument('--flexgen', action='store_true', help='Enable the use of FlexGen offloading.')
@ -145,7 +144,7 @@ parser.add_argument("--gradio-auth-path", type=str, help='Set the gradio authent
args = parser.parse_args() args = parser.parse_args()
# Deprecation warnings for parameters that have been renamed # Deprecation warnings for parameters that have been renamed
deprecated_dict = {'gptq_bits': ['wbits', 0], 'gptq_model_type': ['model_type', None], 'gptq_pre_layer': ['prelayer', 0]} deprecated_dict = {}
for k in deprecated_dict: for k in deprecated_dict:
if eval(f"args.{k}") != deprecated_dict[k][1]: if eval(f"args.{k}") != deprecated_dict[k][1]:
print(f"Warning: --{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.") print(f"Warning: --{k} is deprecated and will be removed. Use --{deprecated_dict[k][0]} instead.")

217
server.py
View File

@ -5,6 +5,7 @@ os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
import importlib import importlib
import io import io
import json import json
import math
import os import os
import re import re
import sys import sys
@ -15,6 +16,8 @@ from datetime import datetime
from pathlib import Path from pathlib import Path
import gradio as gr import gradio as gr
import psutil
import torch
from PIL import Image from PIL import Image
import modules.extensions as extensions_module import modules.extensions as extensions_module
@ -37,11 +40,18 @@ if settings_file is not None:
shared.settings[item] = new_settings[item] shared.settings[item] = new_settings[item]
def special_sort(model_name):
if '_' in model_name:
return ('_'.join(model_name.split('_')[1:])).lower()
else:
return model_name.lower()
def get_available_models(): def get_available_models():
if shared.args.flexgen: if shared.args.flexgen:
return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=str.lower) return sorted([re.sub('-np$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if item.name.endswith('-np')], key=special_sort)
else: else:
return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) return sorted([re.sub('.pth$', '', item.name) for item in list(Path(f'{shared.args.model_dir}/').glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=special_sort)
def get_available_presets(): def get_available_presets():
@ -78,18 +88,20 @@ def get_available_softprompts():
def get_available_loras(): def get_available_loras():
return ['None'] + sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=str.lower) return ['None'] + sorted([item.name for item in list(Path(shared.args.lora_dir).glob('*')) if not item.name.endswith(('.txt', '-np', '.pt', '.json'))], key=special_sort)
def load_model_wrapper(selected_model): def load_model_wrapper(selected_model):
if selected_model != shared.model_name: try:
yield f"Loading {selected_model}..."
shared.model_name = selected_model shared.model_name = selected_model
unload_model() unload_model()
if selected_model != '': if selected_model != '':
shared.model, shared.tokenizer = load_model(shared.model_name) shared.model, shared.tokenizer = load_model(shared.model_name)
return selected_model yield f"Successfully loaded {selected_model}"
except:
yield traceback.format_exc()
def load_lora_wrapper(selected_lora): def load_lora_wrapper(selected_lora):
@ -203,31 +215,146 @@ def download_model_wrapper(repo_id):
yield traceback.format_exc() yield traceback.format_exc()
def create_model_menus(): # Model parameters: list the relevant interface elements
with gr.Row(): def list_model_parameters():
with gr.Column(): parameters = ['cpu_memory', 'auto_devices', 'disk', 'cpu', 'bf16', 'load_in_8bit', 'wbits', 'groupsize', 'model_type', 'pre_layer']
with gr.Row(): for i in range(torch.cuda.device_count()):
shared.gradio['model_menu'] = gr.Dropdown(choices=available_models, value=shared.model_name, label='Model') parameters.append(f'gpu_memory_{i}')
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': get_available_models()}, 'refresh-button') return parameters
with gr.Column():
with gr.Row():
shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA') # Model parameters: update the command-line arguments based on the interface values
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': get_available_loras()}, 'refresh-button') def update_model_parameters(*args):
with gr.Row(): args = list(args) # the values of the parameters
with gr.Column(): elements = list_model_parameters() # the names of the parameters
with gr.Row():
with gr.Column(): gpu_memories = []
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download custom model or LoRA", for i, element in enumerate(elements):
info="Enter Hugging Face username/model path, e.g: facebook/galactica-125m")
with gr.Column(): if element.startswith('gpu_memory'):
shared.gradio['download_button'] = gr.Button("Download") gpu_memories.append(args[i])
shared.gradio['download_status'] = gr.Markdown() continue
with gr.Column():
pass if element == 'cpu_memory' and args[i] == 0:
args[i] = None
if element == 'wbits' and args[i] == 'None':
args[i] = 0
if element == 'groupsize' and args[i] == 'None':
args[i] = -1
if element == 'model_type' and args[i] == 'None':
args[i] = None
if element in ['wbits', 'groupsize', 'pre_layer']:
args[i] = int(args[i])
elif element == 'cpu_memory' and args[i] is not None:
args[i] = f"{args[i]}MiB"
#print(element, repr(eval(f"shared.args.{element}")), repr(args[i]))
#print(f"shared.args.{element} = args[i]")
exec(f"shared.args.{element} = args[i]")
found_positive = False
for i in gpu_memories:
if i > 0:
found_positive = True
break
if found_positive:
shared.args.gpu_memory = [f"{i}MiB" for i in gpu_memories]
else:
shared.args.gpu_memory = None
def create_model_menus():
# Finding the default values for the GPU and CPU memories
total_mem = []
for i in range(torch.cuda.device_count()):
total_mem.append(math.floor(torch.cuda.get_device_properties(i).total_memory / (1024*1024)))
default_gpu_mem = []
if shared.args.gpu_memory is not None and len(shared.args.gpu_memory) > 0:
for i in shared.args.gpu_memory:
if 'mib' in i.lower():
default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i)))
else:
default_gpu_mem.append(int(re.sub('[a-zA-Z ]', '', i))*1000)
while len(default_gpu_mem) < len(total_mem):
default_gpu_mem.append(0)
total_cpu_mem = math.floor(psutil.virtual_memory().total / (1024*1024))
if shared.args.cpu_memory is not None:
default_cpu_mem = re.sub('[a-zA-Z ]', '', shared.args.cpu_memory)
else:
default_cpu_mem = 0
components = {}
with gr.Row():
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['model_menu'] = gr.Dropdown(choices=available_models, value=shared.model_name, label='Model')
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': get_available_models()}, 'refresh-button')
with gr.Column():
with gr.Row():
shared.gradio['lora_menu'] = gr.Dropdown(choices=available_loras, value=shared.lora_name, label='LoRA')
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': get_available_loras()}, 'refresh-button')
with gr.Column():
unload = gr.Button("Unload the model")
reload = gr.Button("Reload the model")
with gr.Row():
with gr.Column():
with gr.Box():
gr.Markdown('Transformers parameters')
with gr.Row():
with gr.Column():
for i in range(len(total_mem)):
components[f'gpu_memory_{i}'] = gr.Slider(label=f"gpu-memory in MiB for device :{i}", maximum=total_mem[i], value=default_gpu_mem[i])
components['cpu_memory'] = gr.Slider(label="cpu-memory in MiB", maximum=total_cpu_mem, value=default_cpu_mem)
with gr.Column():
components['auto_devices'] = gr.Checkbox(label="auto-devices", value=shared.args.auto_devices)
components['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
components['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu)
components['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
components['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
with gr.Column():
with gr.Box():
gr.Markdown('GPTQ parameters')
with gr.Row():
with gr.Column():
components['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=shared.args.wbits if shared.args.wbits > 0 else "None")
components['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
with gr.Column():
components['model_type'] = gr.Dropdown(label="model_type", choices=["None", "llama", "opt", "gpt-j"], value=shared.args.model_type or "None")
components['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer)
with gr.Row():
with gr.Column():
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download custom model or LoRA", info="Enter Hugging Face username/model path, e.g: facebook/galactica-125m")
shared.gradio['download_button'] = gr.Button("Download")
with gr.Column():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
shared.gradio['model_menu'].change(
update_model_parameters, [components[k] for k in list_model_parameters()], None).then(
load_model_wrapper, shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=True)
unload.click(
unload_model, None, None).then(
lambda: "Model unloaded", None, shared.gradio['model_status'])
reload.click(
unload_model, None, None).then(
update_model_parameters, [components[k] for k in list_model_parameters()], None).then(
load_model_wrapper, shared.gradio['model_menu'], shared.gradio['model_status'], show_progress=True)
shared.gradio['model_menu'].change(load_model_wrapper, shared.gradio['model_menu'], shared.gradio['model_menu'], show_progress=True)
shared.gradio['lora_menu'].change(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['lora_menu'], show_progress=True) shared.gradio['lora_menu'].change(load_lora_wrapper, shared.gradio['lora_menu'], shared.gradio['lora_menu'], show_progress=True)
shared.gradio['download_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['download_status'], show_progress=False) shared.gradio['download_button'].click(download_model_wrapper, shared.gradio['custom_model_menu'], shared.gradio['model_status'], show_progress=False)
def create_settings_menus(default_preset): def create_settings_menus(default_preset):
@ -333,7 +460,8 @@ else:
# Default model # Default model
if shared.args.model is not None: if shared.args.model is not None:
shared.model_name = shared.args.model shared.model_name = shared.args.model
else: shared.model, shared.tokenizer = load_model(shared.model_name)
elif shared.args.model_menu:
if len(available_models) == 0: if len(available_models) == 0:
print('No models are available! Please download at least one.') print('No models are available! Please download at least one.')
sys.exit(0) sys.exit(0)
@ -347,8 +475,9 @@ else:
i = int(input()) - 1 i = int(input()) - 1
print() print()
shared.model_name = available_models[i] shared.model_name = available_models[i]
shared.model, shared.tokenizer = load_model(shared.model_name) shared.model, shared.tokenizer = load_model(shared.model_name)
if shared.args.lora:
if shared.args.model is not None and shared.args.lora:
add_lora_to_model(shared.args.lora) add_lora_to_model(shared.args.lora)
# Default UI settings # Default UI settings
@ -372,12 +501,12 @@ def create_interface():
shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements}) shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements})
shared.gradio['Chat input'] = gr.State() shared.gradio['Chat input'] = gr.State()
with gr.Tab("Text generation", elem_id="main"): with gr.Tab('Text generation', elem_id='main'):
shared.gradio['display'] = gr.HTML(value=chat_html_wrapper(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], 'cai-chat')) shared.gradio['display'] = gr.HTML(value=chat_html_wrapper(shared.history['visible'], shared.settings['name1'], shared.settings['name2'], 'cai-chat'))
shared.gradio['textbox'] = gr.Textbox(label='Input') shared.gradio['textbox'] = gr.Textbox(label='Input')
with gr.Row(): with gr.Row():
shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate') shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate')
shared.gradio['Stop'] = gr.Button('Stop', elem_id="stop") shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop')
with gr.Row(): with gr.Row():
shared.gradio['Regenerate'] = gr.Button('Regenerate') shared.gradio['Regenerate'] = gr.Button('Regenerate')
shared.gradio['Continue'] = gr.Button('Continue') shared.gradio['Continue'] = gr.Button('Continue')
@ -389,24 +518,24 @@ def create_interface():
shared.gradio['Copy last reply'] = gr.Button('Copy last reply') shared.gradio['Copy last reply'] = gr.Button('Copy last reply')
with gr.Row(): with gr.Row():
shared.gradio['Clear history'] = gr.Button('Clear history') shared.gradio['Clear history'] = gr.Button('Clear history')
shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant="stop", visible=False) shared.gradio['Clear history-confirm'] = gr.Button('Confirm', variant='stop', visible=False)
shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False) shared.gradio['Clear history-cancel'] = gr.Button('Cancel', visible=False)
shared.gradio['Remove last'] = gr.Button('Remove last') shared.gradio['Remove last'] = gr.Button('Remove last')
shared.gradio["mode"] = gr.Radio(choices=["cai-chat", "chat", "instruct"], value="cai-chat", label="Mode") shared.gradio['mode'] = gr.Radio(choices=['cai-chat', 'chat', 'instruct'], value=shared.settings['mode'], label='Mode')
shared.gradio["Instruction templates"] = gr.Dropdown(choices=get_available_instruction_templates(), label="Instruction template", value="None", visible=False, info="Change this according to the model/LoRA that you are using.") shared.gradio['Instruction templates'] = gr.Dropdown(choices=get_available_instruction_templates(), label='Instruction template', value='None', visible=False, info='Change this according to the model/LoRA that you are using.')
with gr.Tab("Character", elem_id="chat-settings"): with gr.Tab('Character', elem_id='chat-settings'):
with gr.Row(): with gr.Row():
with gr.Column(scale=8): with gr.Column(scale=8):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name') shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Your name')
shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name') shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=4, label='Greeting') shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=4, label='Greeting')
shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=4, label='Context') shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=4, label='Context')
shared.gradio['end_of_turn'] = gr.Textbox(value=shared.settings["end_of_turn"], lines=1, label='End of turn string') shared.gradio['end_of_turn'] = gr.Textbox(value=shared.settings['end_of_turn'], lines=1, label='End of turn string')
with gr.Column(scale=1): with gr.Column(scale=1):
shared.gradio['character_picture'] = gr.Image(label='Character picture', type="pil") shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil')
shared.gradio['your_picture'] = gr.Image(label='Your picture', type="pil", value=Image.open(Path("cache/pfp_me.png")) if Path("cache/pfp_me.png").exists() else None) shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None)
with gr.Row(): with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu') shared.gradio['character_menu'] = gr.Dropdown(choices=available_characters, value='None', label='Character', elem_id='character-menu')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': get_available_characters()}, 'refresh-button') ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': get_available_characters()}, 'refresh-button')
@ -422,7 +551,7 @@ def create_interface():
shared.gradio['download'] = gr.File() shared.gradio['download'] = gr.File()
shared.gradio['download_button'] = gr.Button(value='Click me') shared.gradio['download_button'] = gr.Button(value='Click me')
with gr.Tab('Upload character'): with gr.Tab('Upload character'):
gr.Markdown("# JSON format") gr.Markdown('# JSON format')
with gr.Row(): with gr.Row():
with gr.Column(): with gr.Column():
gr.Markdown('1. Select the JSON file') gr.Markdown('1. Select the JSON file')
@ -432,7 +561,7 @@ def create_interface():
shared.gradio['upload_img_bot'] = gr.File(type='binary', file_types=['image']) shared.gradio['upload_img_bot'] = gr.File(type='binary', file_types=['image'])
shared.gradio['Upload character'] = gr.Button(value='Submit') shared.gradio['Upload character'] = gr.Button(value='Submit')
gr.Markdown("# TavernAI PNG format") gr.Markdown('# TavernAI PNG format')
shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image']) shared.gradio['upload_img_tavern'] = gr.File(type='binary', file_types=['image'])
with gr.Tab("Parameters", elem_id="parameters"): with gr.Tab("Parameters", elem_id="parameters"):
@ -648,7 +777,7 @@ def create_interface():
current_mode = mode current_mode = mode
break break
cmd_list = vars(shared.args) cmd_list = vars(shared.args)
bool_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes] bool_list = [k for k in cmd_list if type(cmd_list[k]) is bool and k not in modes + list_model_parameters()]
bool_active = [k for k in bool_list if vars(shared.args)[k]] bool_active = [k for k in bool_list if vars(shared.args)[k]]
gr.Markdown("*Experimental*") gr.Markdown("*Experimental*")

View File

@ -6,15 +6,16 @@
"name1": "You", "name1": "You",
"name2": "Assistant", "name2": "Assistant",
"context": "This is a conversation with your Assistant. The Assistant is very helpful and is eager to chat with you and answer your questions.", "context": "This is a conversation with your Assistant. The Assistant is very helpful and is eager to chat with you and answer your questions.",
"greeting": "Hello there!", "greeting": "",
"end_of_turn": "", "end_of_turn": "",
"custom_stopping_strings": "", "custom_stopping_strings": "",
"stop_at_newline": false, "stop_at_newline": false,
"add_bos_token": true, "add_bos_token": true,
"ban_eos_token": true, "ban_eos_token": false,
"truncation_length": 2048, "truncation_length": 2048,
"truncation_length_min": 0, "truncation_length_min": 0,
"truncation_length_max": 4096, "truncation_length_max": 4096,
"mode": "cai-chat",
"chat_prompt_size": 2048, "chat_prompt_size": 2048,
"chat_prompt_size_min": 0, "chat_prompt_size_min": 0,
"chat_prompt_size_max": 2048, "chat_prompt_size_max": 2048,