Organize internals (#6646)

This commit is contained in:
oobabooga 2025-01-10 18:04:32 -03:00 committed by GitHub
parent 17aa97248f
commit 83c426e96b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 346 additions and 310 deletions

View File

@ -7,48 +7,48 @@ from pydantic import BaseModel, Field
class GenerationOptions(BaseModel): class GenerationOptions(BaseModel):
preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.") preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
min_p: float = 0
dynamic_temperature: bool = False
dynatemp_low: float = 1 dynatemp_low: float = 1
dynatemp_high: float = 1 dynatemp_high: float = 1
dynatemp_exponent: float = 1 dynatemp_exponent: float = 1
smoothing_factor: float = 0 smoothing_factor: float = 0
smoothing_curve: float = 1 smoothing_curve: float = 1
min_p: float = 0
top_k: int = 0 top_k: int = 0
repetition_penalty: float = 1
repetition_penalty_range: int = 1024
typical_p: float = 1 typical_p: float = 1
tfs: float = 1 xtc_threshold: float = 0.1
top_a: float = 0 xtc_probability: float = 0
epsilon_cutoff: float = 0 epsilon_cutoff: float = 0
eta_cutoff: float = 0 eta_cutoff: float = 0
guidance_scale: float = 1 tfs: float = 1
negative_prompt: str = '' top_a: float = 0
dry_multiplier: float = 0
dry_allowed_length: int = 2
dry_base: float = 1.75
repetition_penalty: float = 1
encoder_repetition_penalty: float = 1
no_repeat_ngram_size: int = 0
repetition_penalty_range: int = 1024
penalty_alpha: float = 0 penalty_alpha: float = 0
guidance_scale: float = 1
mirostat_mode: int = 0 mirostat_mode: int = 0
mirostat_tau: float = 5 mirostat_tau: float = 5
mirostat_eta: float = 0.1 mirostat_eta: float = 0.1
temperature_last: bool = False
do_sample: bool = True
seed: int = -1
encoder_repetition_penalty: float = 1
no_repeat_ngram_size: int = 0
dry_multiplier: float = 0
dry_base: float = 1.75
dry_allowed_length: int = 2
dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
xtc_threshold: float = 0.1
xtc_probability: float = 0
truncation_length: int = 0
max_tokens_second: int = 0
prompt_lookup_num_tokens: int = 0 prompt_lookup_num_tokens: int = 0
static_cache: bool = False max_tokens_second: int = 0
custom_token_bans: str = "" do_sample: bool = True
sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].") dynamic_temperature: bool = False
temperature_last: bool = False
auto_max_new_tokens: bool = False auto_max_new_tokens: bool = False
ban_eos_token: bool = False ban_eos_token: bool = False
add_bos_token: bool = True add_bos_token: bool = True
skip_special_tokens: bool = True skip_special_tokens: bool = True
static_cache: bool = False
truncation_length: int = 0
seed: int = -1
sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
custom_token_bans: str = ""
negative_prompt: str = ''
dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
grammar_string: str = "" grammar_string: str = ""

View File

@ -7,102 +7,103 @@ from modules import shared
loaders_and_params = OrderedDict({ loaders_and_params = OrderedDict({
'Transformers': [ 'Transformers': [
'cpu_memory',
'gpu_memory', 'gpu_memory',
'load_in_4bit', 'cpu_memory',
'load_in_8bit',
'torch_compile',
'bf16',
'cpu',
'disk',
'auto_devices',
'use_double_quant',
'quant_type',
'compute_dtype',
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
'use_eager_attention',
'alpha_value', 'alpha_value',
'compress_pos_emb', 'compress_pos_emb',
], 'compute_dtype',
'llama.cpp': [ 'quant_type',
'n_ctx', 'load_in_8bit',
'n_gpu_layers', 'load_in_4bit',
'cache_type', 'torch_compile',
'tensor_split', 'use_flash_attention_2',
'n_batch', 'auto_devices',
'threads',
'threads_batch',
'no_mmap',
'mlock',
'no_mul_mat_q',
'rope_freq_base',
'compress_pos_emb',
'cpu', 'cpu',
'numa', 'disk',
'no_offload_kqv', 'use_double_quant',
'row_split', 'use_eager_attention',
'tensorcores', 'bf16',
'flash_attn',
'streaming_llm',
'attention_sink_size',
],
'llamacpp_HF': [
'n_ctx',
'n_gpu_layers',
'cache_type',
'tensor_split',
'n_batch',
'threads',
'threads_batch',
'no_mmap',
'mlock',
'no_mul_mat_q',
'rope_freq_base',
'compress_pos_emb',
'cpu',
'numa',
'cfg_cache',
'trust_remote_code', 'trust_remote_code',
'no_use_fast', 'no_use_fast',
'logits_all', ],
'no_offload_kqv', 'llama.cpp': [
'row_split', 'n_gpu_layers',
'threads',
'threads_batch',
'n_batch',
'n_ctx',
'cache_type',
'tensor_split',
'rope_freq_base',
'compress_pos_emb',
'attention_sink_size',
'tensorcores', 'tensorcores',
'flash_attn', 'flash_attn',
'streaming_llm', 'streaming_llm',
'cpu',
'row_split',
'no_offload_kqv',
'no_mul_mat_q',
'no_mmap',
'mlock',
'numa',
],
'llamacpp_HF': [
'n_gpu_layers',
'threads',
'threads_batch',
'n_batch',
'n_ctx',
'cache_type',
'tensor_split',
'rope_freq_base',
'compress_pos_emb',
'attention_sink_size', 'attention_sink_size',
'tensorcores',
'flash_attn',
'streaming_llm',
'cpu',
'row_split',
'no_offload_kqv',
'no_mul_mat_q',
'no_mmap',
'mlock',
'numa',
'cfg_cache',
'logits_all',
'trust_remote_code',
'no_use_fast',
'llamacpp_HF_info', 'llamacpp_HF_info',
], ],
'ExLlamav2_HF': [ 'ExLlamav2_HF': [
'gpu_split',
'max_seq_len', 'max_seq_len',
'cfg_cache', 'cache_type',
'gpu_split',
'alpha_value',
'compress_pos_emb',
'num_experts_per_token',
'autosplit',
'enable_tp',
'no_flash_attn', 'no_flash_attn',
'no_xformers', 'no_xformers',
'no_sdpa', 'no_sdpa',
'num_experts_per_token', 'cfg_cache',
'cache_type',
'autosplit',
'enable_tp',
'alpha_value',
'compress_pos_emb',
'trust_remote_code', 'trust_remote_code',
'no_use_fast', 'no_use_fast',
], ],
'ExLlamav2': [ 'ExLlamav2': [
'gpu_split',
'max_seq_len', 'max_seq_len',
'cache_type',
'gpu_split',
'alpha_value',
'compress_pos_emb',
'num_experts_per_token',
'autosplit',
'enable_tp',
'no_flash_attn', 'no_flash_attn',
'no_xformers', 'no_xformers',
'no_sdpa', 'no_sdpa',
'num_experts_per_token',
'cache_type',
'autosplit',
'enable_tp',
'alpha_value',
'compress_pos_emb',
'exllamav2_info', 'exllamav2_info',
], ],
'HQQ': [ 'HQQ': [
@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({
def transformers_samplers(): def transformers_samplers():
return { return {
'temperature', 'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low', 'dynatemp_low',
'dynatemp_high', 'dynatemp_high',
'dynatemp_exponent', 'dynatemp_exponent',
'smoothing_factor', 'smoothing_factor',
'smoothing_curve', 'smoothing_curve',
'top_p',
'min_p', 'min_p',
'top_p',
'top_k', 'top_k',
'typical_p', 'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff', 'epsilon_cutoff',
'eta_cutoff', 'eta_cutoff',
'tfs', 'tfs',
'top_a', 'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty', 'repetition_penalty',
'presence_penalty',
'frequency_penalty', 'frequency_penalty',
'repetition_penalty_range', 'presence_penalty',
'encoder_repetition_penalty', 'encoder_repetition_penalty',
'no_repeat_ngram_size', 'no_repeat_ngram_size',
'dry_multiplier', 'repetition_penalty_range',
'dry_base',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'do_sample',
'penalty_alpha', 'penalty_alpha',
'guidance_scale',
'mirostat_mode', 'mirostat_mode',
'mirostat_tau', 'mirostat_tau',
'mirostat_eta', 'mirostat_eta',
'grammar_file_row', 'prompt_lookup_num_tokens',
'grammar_string', 'do_sample',
'guidance_scale', 'dynamic_temperature',
'negative_prompt', 'temperature_last',
'auto_max_new_tokens',
'ban_eos_token', 'ban_eos_token',
'custom_token_bans',
'sampler_priority',
'add_bos_token', 'add_bos_token',
'skip_special_tokens', 'skip_special_tokens',
'auto_max_new_tokens',
'prompt_lookup_num_tokens',
'static_cache', 'static_cache',
'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
} }
@ -174,155 +175,156 @@ loaders_samplers = {
'HQQ': transformers_samplers(), 'HQQ': transformers_samplers(),
'ExLlamav2': { 'ExLlamav2': {
'temperature', 'temperature',
'temperature_last',
'smoothing_factor',
'dynatemp_low', 'dynatemp_low',
'dynatemp_high', 'dynatemp_high',
'dynatemp_exponent', 'dynatemp_exponent',
'top_p', 'smoothing_factor',
'min_p', 'min_p',
'top_p',
'top_k', 'top_k',
'typical_p', 'typical_p',
'xtc_threshold',
'xtc_probability',
'tfs', 'tfs',
'top_a', 'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty', 'repetition_penalty',
'presence_penalty',
'frequency_penalty', 'frequency_penalty',
'presence_penalty',
'repetition_penalty_range', 'repetition_penalty_range',
'mirostat_mode', 'mirostat_mode',
'mirostat_tau', 'mirostat_tau',
'mirostat_eta', 'mirostat_eta',
'dry_multiplier', 'dynamic_temperature',
'dry_base', 'temperature_last',
'dry_allowed_length', 'auto_max_new_tokens',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'ban_eos_token', 'ban_eos_token',
'add_bos_token', 'add_bos_token',
'custom_token_bans',
'skip_special_tokens', 'skip_special_tokens',
'auto_max_new_tokens', 'seed',
'custom_token_bans',
'dry_sequence_breakers',
}, },
'ExLlamav2_HF': { 'ExLlamav2_HF': {
'temperature', 'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low', 'dynatemp_low',
'dynatemp_high', 'dynatemp_high',
'dynatemp_exponent', 'dynatemp_exponent',
'smoothing_factor', 'smoothing_factor',
'smoothing_curve', 'smoothing_curve',
'top_p',
'min_p', 'min_p',
'top_p',
'top_k', 'top_k',
'typical_p', 'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff', 'epsilon_cutoff',
'eta_cutoff', 'eta_cutoff',
'tfs', 'tfs',
'top_a', 'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty', 'repetition_penalty',
'presence_penalty',
'frequency_penalty', 'frequency_penalty',
'repetition_penalty_range', 'presence_penalty',
'encoder_repetition_penalty', 'encoder_repetition_penalty',
'no_repeat_ngram_size', 'no_repeat_ngram_size',
'dry_multiplier', 'repetition_penalty_range',
'dry_base', 'guidance_scale',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'do_sample',
'mirostat_mode', 'mirostat_mode',
'mirostat_tau', 'mirostat_tau',
'mirostat_eta', 'mirostat_eta',
'grammar_file_row', 'do_sample',
'grammar_string', 'dynamic_temperature',
'guidance_scale', 'temperature_last',
'negative_prompt', 'auto_max_new_tokens',
'ban_eos_token', 'ban_eos_token',
'custom_token_bans',
'sampler_priority',
'add_bos_token', 'add_bos_token',
'skip_special_tokens', 'skip_special_tokens',
'auto_max_new_tokens', 'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
}, },
'llama.cpp': { 'llama.cpp': {
'temperature', 'temperature',
'top_p',
'min_p', 'min_p',
'top_p',
'top_k', 'top_k',
'typical_p', 'typical_p',
'tfs', 'tfs',
'repetition_penalty', 'repetition_penalty',
'presence_penalty',
'frequency_penalty', 'frequency_penalty',
'seed', 'presence_penalty',
'mirostat_mode', 'mirostat_mode',
'mirostat_tau', 'mirostat_tau',
'mirostat_eta', 'mirostat_eta',
'grammar_file_row',
'grammar_string',
'ban_eos_token', 'ban_eos_token',
'seed',
'custom_token_bans', 'custom_token_bans',
'grammar_string',
'grammar_file_row',
}, },
'llamacpp_HF': { 'llamacpp_HF': {
'temperature', 'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low', 'dynatemp_low',
'dynatemp_high', 'dynatemp_high',
'dynatemp_exponent', 'dynatemp_exponent',
'smoothing_factor', 'smoothing_factor',
'smoothing_curve', 'smoothing_curve',
'top_p',
'min_p', 'min_p',
'top_p',
'top_k', 'top_k',
'typical_p', 'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff', 'epsilon_cutoff',
'eta_cutoff', 'eta_cutoff',
'tfs', 'tfs',
'top_a', 'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty', 'repetition_penalty',
'presence_penalty',
'frequency_penalty', 'frequency_penalty',
'repetition_penalty_range', 'presence_penalty',
'encoder_repetition_penalty', 'encoder_repetition_penalty',
'no_repeat_ngram_size', 'no_repeat_ngram_size',
'dry_multiplier', 'repetition_penalty_range',
'dry_base', 'guidance_scale',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'do_sample',
'mirostat_mode', 'mirostat_mode',
'mirostat_tau', 'mirostat_tau',
'mirostat_eta', 'mirostat_eta',
'grammar_file_row', 'do_sample',
'grammar_string', 'dynamic_temperature',
'guidance_scale', 'temperature_last',
'negative_prompt', 'auto_max_new_tokens',
'ban_eos_token', 'ban_eos_token',
'custom_token_bans',
'sampler_priority',
'add_bos_token', 'add_bos_token',
'skip_special_tokens', 'skip_special_tokens',
'auto_max_new_tokens', 'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
}, },
'TensorRT-LLM': { 'TensorRT-LLM': {
'temperature', 'temperature',
'top_p', 'top_p',
'top_k', 'top_k',
'repetition_penalty', 'repetition_penalty',
'presence_penalty',
'frequency_penalty', 'frequency_penalty',
'ban_eos_token', 'presence_penalty',
'auto_max_new_tokens', 'auto_max_new_tokens',
'ban_eos_token',
} }
} }

View File

@ -13,40 +13,40 @@ from modules.logging_colors import logger
def default_preset(): def default_preset():
return { return {
'temperature': 1, 'temperature': 1,
'temperature_last': False,
'dynamic_temperature': False,
'dynatemp_low': 1, 'dynatemp_low': 1,
'dynatemp_high': 1, 'dynatemp_high': 1,
'dynatemp_exponent': 1, 'dynatemp_exponent': 1,
'smoothing_factor': 0, 'smoothing_factor': 0,
'smoothing_curve': 1, 'smoothing_curve': 1,
'top_p': 1,
'min_p': 0, 'min_p': 0,
'top_p': 1,
'top_k': 0, 'top_k': 0,
'repetition_penalty': 1,
'presence_penalty': 0,
'frequency_penalty': 0,
'repetition_penalty_range': 1024,
'typical_p': 1, 'typical_p': 1,
'tfs': 1, 'xtc_threshold': 0.1,
'top_a': 0, 'xtc_probability': 0,
'epsilon_cutoff': 0, 'epsilon_cutoff': 0,
'eta_cutoff': 0, 'eta_cutoff': 0,
'guidance_scale': 1, 'tfs': 1,
'top_a': 0,
'dry_multiplier': 0,
'dry_allowed_length': 2,
'dry_base': 1.75,
'repetition_penalty': 1,
'frequency_penalty': 0,
'presence_penalty': 0,
'encoder_repetition_penalty': 1,
'no_repeat_ngram_size': 0,
'repetition_penalty_range': 1024,
'penalty_alpha': 0, 'penalty_alpha': 0,
'guidance_scale': 1,
'mirostat_mode': 0, 'mirostat_mode': 0,
'mirostat_tau': 5, 'mirostat_tau': 5,
'mirostat_eta': 0.1, 'mirostat_eta': 0.1,
'do_sample': True, 'do_sample': True,
'encoder_repetition_penalty': 1, 'dynamic_temperature': False,
'no_repeat_ngram_size': 0, 'temperature_last': False,
'dry_multiplier': 0, 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
'dry_base': 1.75,
'dry_allowed_length': 2,
'dry_sequence_breakers': '"\\n", ":", "\\"", "*"', 'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
'xtc_threshold': 0.1,
'xtc_probability': 0,
'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram'
} }

View File

@ -29,39 +29,39 @@ need_restart = False
# UI defaults # UI defaults
settings = { settings = {
'dark_theme': True,
'show_controls': True, 'show_controls': True,
'start_with': '', 'start_with': '',
'mode': 'chat-instruct', 'mode': 'chat-instruct',
'chat_style': 'cai-chat', 'chat_style': 'cai-chat',
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'prompt-default': 'QA', 'prompt-default': 'QA',
'prompt-notebook': 'QA', 'prompt-notebook': 'QA',
'preset': 'min_p',
'max_new_tokens': 512,
'max_new_tokens_min': 1,
'max_new_tokens_max': 4096,
'negative_prompt': '',
'seed': -1,
'truncation_length': 2048,
'max_tokens_second': 0,
'max_updates_second': 0,
'prompt_lookup_num_tokens': 0,
'static_cache': False,
'custom_stopping_strings': '',
'custom_token_bans': '',
'auto_max_new_tokens': False,
'ban_eos_token': False,
'add_bos_token': True,
'skip_special_tokens': True,
'stream': True,
'character': 'Assistant', 'character': 'Assistant',
'name1': 'You', 'name1': 'You',
'user_bio': '', 'user_bio': '',
'custom_system_message': '', 'custom_system_message': '',
'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', 'preset': 'min_p',
'max_new_tokens': 512,
'max_new_tokens_min': 1,
'max_new_tokens_max': 4096,
'prompt_lookup_num_tokens': 0,
'max_tokens_second': 0,
'max_updates_second': 0,
'auto_max_new_tokens': False,
'ban_eos_token': False,
'add_bos_token': True,
'skip_special_tokens': True,
'stream': True,
'static_cache': False,
'truncation_length': 2048,
'seed': -1,
'custom_stopping_strings': '',
'custom_token_bans': '',
'negative_prompt': '',
'autoload_model': False, 'autoload_model': False,
'dark_theme': True,
'default_extensions': [], 'default_extensions': [],
} }

View File

@ -287,31 +287,62 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
clear_torch_cache() clear_torch_cache()
generate_params = {} generate_params = {}
for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']: for k in [
'temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'smoothing_curve',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'frequency_penalty',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'repetition_penalty_range',
'penalty_alpha',
'guidance_scale',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'max_new_tokens',
'do_sample',
'dynamic_temperature',
'temperature_last',
'dry_sequence_breakers',
]:
if k in state: if k in state:
generate_params[k] = state[k] generate_params[k] = state[k]
if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
generate_params['sampler_priority'] = state['sampler_priority']
elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
if state['negative_prompt'] != '':
generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
if state['prompt_lookup_num_tokens'] > 0:
generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
if state['static_cache']:
generate_params['cache_implementation'] = 'static'
for k in ['epsilon_cutoff', 'eta_cutoff']: for k in ['epsilon_cutoff', 'eta_cutoff']:
if state[k] > 0: if state[k] > 0:
generate_params[k] = state[k] * 1e-4 generate_params[k] = state[k] * 1e-4
if state['prompt_lookup_num_tokens'] > 0:
generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
if state['ban_eos_token']: if state['ban_eos_token']:
generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id] generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]
if state['static_cache']:
generate_params['cache_implementation'] = 'static'
if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
generate_params['sampler_priority'] = state['sampler_priority']
elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
if state['custom_token_bans']: if state['custom_token_bans']:
to_ban = [int(x) for x in state['custom_token_bans'].split(',')] to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
if len(to_ban) > 0: if len(to_ban) > 0:
@ -320,6 +351,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
else: else:
generate_params['suppress_tokens'] = to_ban generate_params['suppress_tokens'] = to_ban
if state['negative_prompt'] != '':
generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
generate_params.update({'use_cache': not shared.args.no_cache}) generate_params.update({'use_cache': not shared.args.no_cache})
if shared.args.deepspeed: if shared.args.deepspeed:
generate_params.update({'synced_gpus': True}) generate_params.update({'synced_gpus': True})

View File

@ -102,55 +102,55 @@ else:
def list_model_elements(): def list_model_elements():
elements = [ elements = [
'loader',
'filter_by_loader', 'filter_by_loader',
'loader',
'cpu_memory', 'cpu_memory',
'auto_devices', 'n_gpu_layers',
'disk',
'cpu',
'bf16',
'load_in_4bit',
'load_in_8bit',
'torch_compile',
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
'use_eager_attention',
'compute_dtype',
'quant_type',
'use_double_quant',
'cfg_cache',
'no_flash_attn',
'no_xformers',
'no_sdpa',
'num_experts_per_token',
'cache_type',
'autosplit',
'enable_tp',
'threads', 'threads',
'threads_batch', 'threads_batch',
'n_batch', 'n_batch',
'no_mmap', 'hqq_backend',
'mlock',
'no_mul_mat_q',
'n_gpu_layers',
'tensor_split',
'n_ctx', 'n_ctx',
'gpu_split',
'max_seq_len', 'max_seq_len',
'compress_pos_emb', 'cache_type',
'tensor_split',
'gpu_split',
'alpha_value', 'alpha_value',
'rope_freq_base', 'rope_freq_base',
'numa', 'compress_pos_emb',
'logits_all', 'compute_dtype',
'no_offload_kqv', 'quant_type',
'row_split',
'tensorcores',
'flash_attn',
'streaming_llm',
'attention_sink_size', 'attention_sink_size',
'hqq_backend', 'num_experts_per_token',
'tensorcores',
'load_in_8bit',
'load_in_4bit',
'torch_compile',
'flash_attn',
'use_flash_attention_2',
'streaming_llm',
'auto_devices',
'cpu',
'disk',
'row_split',
'no_offload_kqv',
'no_mul_mat_q',
'no_mmap',
'mlock',
'numa',
'use_double_quant',
'use_eager_attention',
'bf16',
'autosplit',
'enable_tp',
'no_flash_attn',
'no_xformers',
'no_sdpa',
'cfg_cache',
'cpp_runner', 'cpp_runner',
'logits_all',
'trust_remote_code',
'no_use_fast',
] ]
if is_torch_xpu_available(): if is_torch_xpu_available():
@ -165,87 +165,87 @@ def list_model_elements():
def list_interface_input_elements(): def list_interface_input_elements():
elements = [ elements = [
'max_new_tokens',
'auto_max_new_tokens',
'max_tokens_second',
'max_updates_second',
'prompt_lookup_num_tokens',
'seed',
'temperature', 'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low', 'dynatemp_low',
'dynatemp_high', 'dynatemp_high',
'dynatemp_exponent', 'dynatemp_exponent',
'smoothing_factor', 'smoothing_factor',
'smoothing_curve', 'smoothing_curve',
'top_p',
'min_p', 'min_p',
'top_p',
'top_k', 'top_k',
'typical_p', 'typical_p',
'epsilon_cutoff',
'eta_cutoff',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'repetition_penalty_range',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold', 'xtc_threshold',
'xtc_probability', 'xtc_probability',
'do_sample', 'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'frequency_penalty',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'repetition_penalty_range',
'penalty_alpha', 'penalty_alpha',
'guidance_scale',
'mirostat_mode', 'mirostat_mode',
'mirostat_tau', 'mirostat_tau',
'mirostat_eta', 'mirostat_eta',
'grammar_string', 'max_new_tokens',
'negative_prompt', 'prompt_lookup_num_tokens',
'guidance_scale', 'max_tokens_second',
'add_bos_token', 'max_updates_second',
'do_sample',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token', 'ban_eos_token',
'custom_token_bans', 'add_bos_token',
'sampler_priority',
'truncation_length',
'custom_stopping_strings',
'skip_special_tokens', 'skip_special_tokens',
'stream', 'stream',
'static_cache', 'static_cache',
'tfs', 'truncation_length',
'top_a', 'seed',
'sampler_priority',
'custom_stopping_strings',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
] ]
# Chat elements # Chat elements
elements += [ elements += [
'textbox',
'start_with',
'character_menu',
'history', 'history',
'search_chat', 'search_chat',
'unique_id', 'unique_id',
'textbox',
'start_with',
'mode',
'chat_style',
'chat-instruct_command',
'character_menu',
'name2',
'context',
'greeting',
'name1', 'name1',
'user_bio', 'user_bio',
'name2',
'greeting',
'context',
'mode',
'custom_system_message', 'custom_system_message',
'instruction_template_str', 'instruction_template_str',
'chat_template_str', 'chat_template_str',
'chat_style',
'chat-instruct_command',
] ]
# Notebook/default elements # Notebook/default elements
elements += [ elements += [
'textbox-notebook',
'textbox-default', 'textbox-default',
'output_textbox', 'textbox-notebook',
'prompt_menu-default', 'prompt_menu-default',
'prompt_menu-notebook', 'prompt_menu-notebook',
'output_textbox',
] ]
# Model elements # Model elements