mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-11 21:10:40 +01:00
Merge pull request #2587 from oobabooga/dev
This commit is contained in:
commit
aaf240a14c
176
api-examples/api-example-model.py
Executable file
176
api-examples/api-example-model.py
Executable file
@ -0,0 +1,176 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
HOST = '0.0.0.0:5000'
|
||||||
|
|
||||||
|
def generate(prompt, tokens = 200):
|
||||||
|
request = { 'prompt': prompt, 'max_new_tokens': tokens }
|
||||||
|
response = requests.post(f'http://{HOST}/api/v1/generate', json=request)
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
return response.json()['results'][0]['text']
|
||||||
|
|
||||||
|
|
||||||
|
def model_api(request):
|
||||||
|
response = requests.post(f'http://{HOST}/api/v1/model', json=request)
|
||||||
|
return response.json()
|
||||||
|
|
||||||
|
|
||||||
|
# print some common settings
|
||||||
|
def print_basic_model_info(response):
|
||||||
|
basic_settings = ['truncation_length', 'instruction_template']
|
||||||
|
print("Model: ", response['result']['model_name'])
|
||||||
|
print("Lora(s): ", response['result']['lora_names'])
|
||||||
|
for setting in basic_settings:
|
||||||
|
print(setting, "=", response['result']['shared.settings'][setting])
|
||||||
|
|
||||||
|
|
||||||
|
# model info
|
||||||
|
def model_info():
|
||||||
|
response = model_api({'action': 'info'})
|
||||||
|
print_basic_model_info(response)
|
||||||
|
|
||||||
|
|
||||||
|
# simple loader
|
||||||
|
def model_load(model_name):
|
||||||
|
return model_api({'action': 'load', 'model_name': model_name})
|
||||||
|
|
||||||
|
|
||||||
|
# complex loader
|
||||||
|
def complex_model_load(model):
|
||||||
|
|
||||||
|
def guess_groupsize(model_name):
|
||||||
|
if '1024g' in model_name:
|
||||||
|
return 1024
|
||||||
|
elif '128g' in model_name:
|
||||||
|
return 128
|
||||||
|
elif '32g' in model_name:
|
||||||
|
return 32
|
||||||
|
else:
|
||||||
|
return -1
|
||||||
|
|
||||||
|
req = {
|
||||||
|
'action': 'load',
|
||||||
|
'model_name': model,
|
||||||
|
'args': {
|
||||||
|
'gptq_for_llama': False, # Use AutoGPTQ by default, set to True for gptq-for-llama
|
||||||
|
|
||||||
|
'bf16': False,
|
||||||
|
'load_in_8bit': False,
|
||||||
|
'groupsize': 0,
|
||||||
|
'wbits': 0,
|
||||||
|
|
||||||
|
# llama.cpp
|
||||||
|
'threads': 0,
|
||||||
|
'n_batch': 512,
|
||||||
|
'no_mmap': False,
|
||||||
|
'mlock': False,
|
||||||
|
'cache_capacity': None,
|
||||||
|
'n_gpu_layers': 0,
|
||||||
|
'n_ctx': 2048,
|
||||||
|
|
||||||
|
# RWKV
|
||||||
|
'rwkv_strategy': None,
|
||||||
|
'rwkv_cuda_on': False,
|
||||||
|
|
||||||
|
# b&b 4-bit
|
||||||
|
#'load_in_4bit': False,
|
||||||
|
#'compute_dtype': 'float16',
|
||||||
|
#'quant_type': 'nf4',
|
||||||
|
#'use_double_quant': False,
|
||||||
|
|
||||||
|
#"cpu": false,
|
||||||
|
#"auto_devices": false,
|
||||||
|
#"gpu_memory": null,
|
||||||
|
#"cpu_memory": null,
|
||||||
|
#"disk": false,
|
||||||
|
#"disk_cache_dir": "cache",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
model = model.lower()
|
||||||
|
|
||||||
|
if '4bit' in model or 'gptq' in model or 'int4' in model:
|
||||||
|
req['args']['wbits'] = 4
|
||||||
|
req['args']['groupsize'] = guess_groupsize(model)
|
||||||
|
elif '3bit' in model:
|
||||||
|
req['args']['wbits'] = 3
|
||||||
|
req['args']['groupsize'] = guess_groupsize(model)
|
||||||
|
else:
|
||||||
|
req['args']['gptq_for_llama'] = False
|
||||||
|
|
||||||
|
if '8bit' in model:
|
||||||
|
req['args']['load_in_8bit'] = True
|
||||||
|
elif '-hf' in model or 'fp16' in model:
|
||||||
|
if '7b' in model:
|
||||||
|
req['args']['bf16'] = True # for 24GB
|
||||||
|
elif '13b' in model:
|
||||||
|
req['args']['load_in_8bit'] = True # for 24GB
|
||||||
|
elif 'ggml' in model:
|
||||||
|
#req['args']['threads'] = 16
|
||||||
|
if '7b' in model:
|
||||||
|
req['args']['n_gpu_layers'] = 100
|
||||||
|
elif '13b' in model:
|
||||||
|
req['args']['n_gpu_layers'] = 100
|
||||||
|
elif '30b' in model or '33b' in model:
|
||||||
|
req['args']['n_gpu_layers'] = 59 # 24GB
|
||||||
|
elif '65b' in model:
|
||||||
|
req['args']['n_gpu_layers'] = 42 # 24GB
|
||||||
|
elif 'rwkv' in model:
|
||||||
|
req['args']['rwkv_cuda_on'] = True
|
||||||
|
if '14b' in model:
|
||||||
|
req['args']['rwkv_strategy'] = 'cuda f16i8' # 24GB
|
||||||
|
else:
|
||||||
|
req['args']['rwkv_strategy'] = 'cuda f16' # 24GB
|
||||||
|
|
||||||
|
|
||||||
|
return model_api(req)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
for model in model_api({'action': 'list'})['result']:
|
||||||
|
try:
|
||||||
|
resp = complex_model_load(model)
|
||||||
|
|
||||||
|
if 'error' in resp:
|
||||||
|
print (f"❌ {model} FAIL Error: {resp['error']['message']}")
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
print_basic_model_info(resp)
|
||||||
|
|
||||||
|
ans = generate("0,1,1,2,3,5,8,13,", tokens=2)
|
||||||
|
|
||||||
|
if '21' in ans:
|
||||||
|
print (f"✅ {model} PASS ({ans})")
|
||||||
|
else:
|
||||||
|
print (f"❌ {model} FAIL ({ans})")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print (f"❌ {model} FAIL Exception: {repr(e)}")
|
||||||
|
|
||||||
|
|
||||||
|
# 0,1,1,2,3,5,8,13, is the fibonacci sequence, the next number is 21.
|
||||||
|
# Some results below.
|
||||||
|
""" $ ./model-api-example.py
|
||||||
|
Model: 4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda
|
||||||
|
Lora(s): []
|
||||||
|
truncation_length = 2048
|
||||||
|
instruction_template = Alpaca
|
||||||
|
✅ 4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda PASS (21)
|
||||||
|
Model: 4bit_WizardLM-13B-Uncensored-4bit-128g
|
||||||
|
Lora(s): []
|
||||||
|
truncation_length = 2048
|
||||||
|
instruction_template = WizardLM
|
||||||
|
✅ 4bit_WizardLM-13B-Uncensored-4bit-128g PASS (21)
|
||||||
|
Model: Aeala_VicUnlocked-alpaca-30b-4bit
|
||||||
|
Lora(s): []
|
||||||
|
truncation_length = 2048
|
||||||
|
instruction_template = Alpaca
|
||||||
|
✅ Aeala_VicUnlocked-alpaca-30b-4bit PASS (21)
|
||||||
|
Model: alpaca-30b-4bit
|
||||||
|
Lora(s): []
|
||||||
|
truncation_length = 2048
|
||||||
|
instruction_template = Alpaca
|
||||||
|
✅ alpaca-30b-4bit PASS (21)
|
||||||
|
"""
|
4
characters/instruction-following/Guanaco-QLoRA.yaml
Normal file
4
characters/instruction-following/Guanaco-QLoRA.yaml
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
user: "### Human:"
|
||||||
|
bot: "### Assistant:"
|
||||||
|
turn_template: "<|user|> <|user-message|>\n<|bot|> <|bot-message|></s>\n"
|
||||||
|
context: ""
|
@ -28,9 +28,15 @@ Once downloaded, it will be automatically applied to **every** `LlamaForCausalLM
|
|||||||
pip install protobuf==3.20.1
|
pip install protobuf==3.20.1
|
||||||
```
|
```
|
||||||
|
|
||||||
2. Use the script below to convert the model in `.pth` format that you, a fellow academic, downloaded using Meta's official link:
|
2. Use the script below to convert the model in `.pth` format that you, a fellow academic, downloaded using Meta's official link.
|
||||||
|
|
||||||
### [convert_llama_weights_to_hf.py](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py)
|
If you have `transformers` installed in place:
|
||||||
|
|
||||||
|
```
|
||||||
|
python -m transformers.models.llama.convert_llama_weights_to_hf --input_dir /path/to/LLaMA --model_size 7B --output_dir /tmp/outputs/llama-7b
|
||||||
|
```
|
||||||
|
|
||||||
|
Otherwise download [convert_llama_weights_to_hf.py](https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py) first and run:
|
||||||
|
|
||||||
```
|
```
|
||||||
python convert_llama_weights_to_hf.py --input_dir /path/to/LLaMA --model_size 7B --output_dir /tmp/outputs/llama-7b
|
python convert_llama_weights_to_hf.py --input_dir /path/to/LLaMA --model_size 7B --output_dir /tmp/outputs/llama-7b
|
||||||
|
@ -108,7 +108,7 @@ class ModelDownloader:
|
|||||||
is_lora = False
|
is_lora = False
|
||||||
while True:
|
while True:
|
||||||
url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
|
url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
|
||||||
r = self.s.get(url, timeout=10)
|
r = self.s.get(url, timeout=20)
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
content = r.content
|
content = r.content
|
||||||
|
|
||||||
@ -180,7 +180,7 @@ class ModelDownloader:
|
|||||||
output_path = output_folder / filename
|
output_path = output_folder / filename
|
||||||
if output_path.exists() and not start_from_scratch:
|
if output_path.exists() and not start_from_scratch:
|
||||||
# Check if the file has already been downloaded completely
|
# Check if the file has already been downloaded completely
|
||||||
r = self.s.get(url, stream=True, timeout=10)
|
r = self.s.get(url, stream=True, timeout=20)
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
if output_path.stat().st_size >= total_size:
|
if output_path.stat().st_size >= total_size:
|
||||||
return
|
return
|
||||||
@ -191,7 +191,7 @@ class ModelDownloader:
|
|||||||
headers = {}
|
headers = {}
|
||||||
mode = 'wb'
|
mode = 'wb'
|
||||||
|
|
||||||
r = self.s.get(url, stream=True, headers=headers, timeout=10)
|
r = self.s.get(url, stream=True, headers=headers, timeout=20)
|
||||||
with open(output_path, mode) as f:
|
with open(output_path, mode) as f:
|
||||||
total_size = int(r.headers.get('content-length', 0))
|
total_size = int(r.headers.get('content-length', 0))
|
||||||
block_size = 1024
|
block_size = 1024
|
||||||
|
@ -5,9 +5,23 @@ from threading import Thread
|
|||||||
from extensions.api.util import build_parameters, try_start_cloudflared
|
from extensions.api.util import build_parameters, try_start_cloudflared
|
||||||
from modules import shared
|
from modules import shared
|
||||||
from modules.chat import generate_chat_reply
|
from modules.chat import generate_chat_reply
|
||||||
from modules.text_generation import encode, generate_reply, stop_everything_event
|
from modules.LoRA import add_lora_to_model
|
||||||
|
from modules.models import load_model, unload_model
|
||||||
|
from modules.text_generation import (encode, generate_reply,
|
||||||
|
stop_everything_event)
|
||||||
|
from modules.utils import get_available_models
|
||||||
|
from server import get_model_specific_settings, update_model_parameters
|
||||||
|
|
||||||
|
|
||||||
|
def get_model_info():
|
||||||
|
return {
|
||||||
|
'model_name': shared.model_name,
|
||||||
|
'lora_names': shared.lora_names,
|
||||||
|
# dump
|
||||||
|
'shared.settings': shared.settings,
|
||||||
|
'shared.args': vars(shared.args),
|
||||||
|
}
|
||||||
|
|
||||||
class Handler(BaseHTTPRequestHandler):
|
class Handler(BaseHTTPRequestHandler):
|
||||||
def do_GET(self):
|
def do_GET(self):
|
||||||
if self.path == '/api/v1/model':
|
if self.path == '/api/v1/model':
|
||||||
@ -91,6 +105,67 @@ class Handler(BaseHTTPRequestHandler):
|
|||||||
|
|
||||||
self.wfile.write(response.encode('utf-8'))
|
self.wfile.write(response.encode('utf-8'))
|
||||||
|
|
||||||
|
elif self.path == '/api/v1/model':
|
||||||
|
self.send_response(200)
|
||||||
|
self.send_header('Content-Type', 'application/json')
|
||||||
|
self.end_headers()
|
||||||
|
|
||||||
|
# by default return the same as the GET interface
|
||||||
|
result = shared.model_name
|
||||||
|
|
||||||
|
# Actions: info, load, list, unload
|
||||||
|
action = body.get('action', '')
|
||||||
|
|
||||||
|
if action == 'load':
|
||||||
|
model_name = body['model_name']
|
||||||
|
args = body.get('args', {})
|
||||||
|
print('args', args)
|
||||||
|
for k in args:
|
||||||
|
setattr(shared.args, k, args[k])
|
||||||
|
|
||||||
|
shared.model_name = model_name
|
||||||
|
unload_model()
|
||||||
|
|
||||||
|
model_settings = get_model_specific_settings(shared.model_name)
|
||||||
|
shared.settings.update(model_settings)
|
||||||
|
update_model_parameters(model_settings, initial=True)
|
||||||
|
|
||||||
|
if shared.settings['mode'] != 'instruct':
|
||||||
|
shared.settings['instruction_template'] = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||||
|
if shared.args.lora:
|
||||||
|
add_lora_to_model(shared.args.lora) # list
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
response = json.dumps({'error': { 'message': repr(e) } })
|
||||||
|
|
||||||
|
self.wfile.write(response.encode('utf-8'))
|
||||||
|
raise e
|
||||||
|
|
||||||
|
shared.args.model = shared.model_name
|
||||||
|
|
||||||
|
result = get_model_info()
|
||||||
|
|
||||||
|
elif action == 'unload':
|
||||||
|
unload_model()
|
||||||
|
shared.model_name = None
|
||||||
|
shared.args.model = None
|
||||||
|
result = get_model_info()
|
||||||
|
|
||||||
|
elif action == 'list':
|
||||||
|
result = get_available_models()
|
||||||
|
|
||||||
|
elif action == 'info':
|
||||||
|
result = get_model_info()
|
||||||
|
|
||||||
|
response = json.dumps({
|
||||||
|
'result': result,
|
||||||
|
})
|
||||||
|
|
||||||
|
self.wfile.write(response.encode('utf-8'))
|
||||||
|
|
||||||
elif self.path == '/api/v1/token-count':
|
elif self.path == '/api/v1/token-count':
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
self.send_header('Content-Type', 'application/json')
|
self.send_header('Content-Type', 'application/json')
|
||||||
|
@ -188,3 +188,9 @@ llama-65b-gptq-3bit:
|
|||||||
mode: 'instruct'
|
mode: 'instruct'
|
||||||
instruction_template: 'Vicuna-v1.1'
|
instruction_template: 'Vicuna-v1.1'
|
||||||
truncation_length: 4096
|
truncation_length: 4096
|
||||||
|
.*WizardLM-30B-V1.0:
|
||||||
|
mode: 'instruct'
|
||||||
|
instruction_template: 'Vicuna-v1.1'
|
||||||
|
TheBloke_WizardLM-30B-GPTQ:
|
||||||
|
mode: 'instruct'
|
||||||
|
instruction_template: 'Vicuna-v1.1'
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
accelerate==0.20.3
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
@ -14,12 +15,11 @@ safetensors==0.3.1
|
|||||||
sentencepiece
|
sentencepiece
|
||||||
tqdm
|
tqdm
|
||||||
scipy
|
scipy
|
||||||
git+https://github.com/huggingface/peft@3714aa2fff158fdfa637b2b65952580801d890b2
|
transformers==4.30.0
|
||||||
git+https://github.com/huggingface/transformers@e45e756d22206ca8fa9fb057c8c3d8fa79bf81c6
|
git+https://github.com/huggingface/peft@e45529b149c7f91ec1d4d82a5a152ef56c56cb94
|
||||||
git+https://github.com/huggingface/accelerate@0226f750257b3bf2cadc4f189f9eef0c764a0467
|
|
||||||
bitsandbytes==0.39.0; platform_system != "Windows"
|
bitsandbytes==0.39.0; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.39.0-py3-none-any.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.39.0-py3-none-any.whl; platform_system == "Windows"
|
||||||
llama-cpp-python==0.1.57; platform_system != "Windows"
|
llama-cpp-python==0.1.57; platform_system != "Windows"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.57/llama_cpp_python-0.1.57-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.57/llama_cpp_python-0.1.57-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.0/auto_gptq-0.2.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.2/auto_gptq-0.2.2+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.0/auto_gptq-0.2.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.2.2/auto_gptq-0.2.2+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux"
|
||||||
|
@ -474,21 +474,21 @@ def create_settings_menus(default_preset):
|
|||||||
gr.Markdown('Main parameters')
|
gr.Markdown('Main parameters')
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
|
shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
|
||||||
shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature', info='Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness.')
|
shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature', info='Primary factor to control randomness of outputs. 0 = deterministic (only the most likely token is used). Higher value = more randomness.')
|
||||||
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p', info='If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results.')
|
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p', info='If not set to 1, select tokens with probabilities adding up to less than this number. Higher value = higher range of possible random results.')
|
||||||
shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k', info='Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results.')
|
shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k', info='Similar to top_p, but select instead only the top_k most likely tokens. Higher value = higher range of possible random results.')
|
||||||
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p', info='If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text.')
|
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p', info='If not set to 1, select only tokens that are at least this much more likely to appear than random tokens, given the prior text.')
|
||||||
shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff', info='In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0.')
|
shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff', info='In units of 1e-4; a reasonable value is 3. This sets a probability floor below which tokens are excluded from being sampled. Should be used with top_p, top_k, and eta_cutoff set to 0.')
|
||||||
shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff', info='In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0.')
|
shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff', info='In units of 1e-4; a reasonable value is 3. Should be used with top_p, top_k, and epsilon_cutoff set to 0.')
|
||||||
shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
|
|
||||||
shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
|
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty', info='Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition.')
|
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty', info='Exponential penalty factor for repeating prior tokens. 1 means no penalty, higher value = less repetition, lower value = more repetition.')
|
||||||
shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty', info='Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge.')
|
shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty', info='Also known as the "Hallucinations filter". Used to penalize tokens that are *not* in the prior text. Higher value = more likely to stay in context, lower value = more likely to diverge.')
|
||||||
shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size', info='If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases.')
|
shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size', info='If not set to 0, specifies the length of token sets that are completely blocked from repeating at all. Higher values = blocks larger phrases, lower values = blocks words or letters from repeating. Only 0 or high values are a good idea in most cases.')
|
||||||
shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length', info='Minimum generation length in tokens.')
|
shared.gradio['min_length'] = gr.Slider(0, 2000, step=1, value=generate_params['min_length'], label='min_length', info='Minimum generation length in tokens.')
|
||||||
shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
|
shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
|
||||||
|
shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
create_chat_settings_menus()
|
create_chat_settings_menus()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user