diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index dfac8e03..5f0e0128 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -7,48 +7,48 @@ from pydantic import BaseModel, Field class GenerationOptions(BaseModel): preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.") - min_p: float = 0 - dynamic_temperature: bool = False dynatemp_low: float = 1 dynatemp_high: float = 1 dynatemp_exponent: float = 1 smoothing_factor: float = 0 smoothing_curve: float = 1 + min_p: float = 0 top_k: int = 0 - repetition_penalty: float = 1 - repetition_penalty_range: int = 1024 typical_p: float = 1 - tfs: float = 1 - top_a: float = 0 + xtc_threshold: float = 0.1 + xtc_probability: float = 0 epsilon_cutoff: float = 0 eta_cutoff: float = 0 - guidance_scale: float = 1 - negative_prompt: str = '' + tfs: float = 1 + top_a: float = 0 + dry_multiplier: float = 0 + dry_allowed_length: int = 2 + dry_base: float = 1.75 + repetition_penalty: float = 1 + encoder_repetition_penalty: float = 1 + no_repeat_ngram_size: int = 0 + repetition_penalty_range: int = 1024 penalty_alpha: float = 0 + guidance_scale: float = 1 mirostat_mode: int = 0 mirostat_tau: float = 5 mirostat_eta: float = 0.1 - temperature_last: bool = False - do_sample: bool = True - seed: int = -1 - encoder_repetition_penalty: float = 1 - no_repeat_ngram_size: int = 0 - dry_multiplier: float = 0 - dry_base: float = 1.75 - dry_allowed_length: int = 2 - dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"' - xtc_threshold: float = 0.1 - xtc_probability: float = 0 - truncation_length: int = 0 - max_tokens_second: int = 0 prompt_lookup_num_tokens: int = 0 - static_cache: bool = False - custom_token_bans: str = "" - sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].") + max_tokens_second: int = 0 + do_sample: bool = True + dynamic_temperature: bool = False + temperature_last: bool = False auto_max_new_tokens: bool = False ban_eos_token: bool = False add_bos_token: bool = True skip_special_tokens: bool = True + static_cache: bool = False + truncation_length: int = 0 + seed: int = -1 + sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].") + custom_token_bans: str = "" + negative_prompt: str = '' + dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"' grammar_string: str = "" diff --git a/modules/loaders.py b/modules/loaders.py index 4e331dbb..cd864e40 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -7,102 +7,103 @@ from modules import shared loaders_and_params = OrderedDict({ 'Transformers': [ - 'cpu_memory', 'gpu_memory', - 'load_in_4bit', - 'load_in_8bit', - 'torch_compile', - 'bf16', - 'cpu', - 'disk', - 'auto_devices', - 'use_double_quant', - 'quant_type', - 'compute_dtype', - 'trust_remote_code', - 'no_use_fast', - 'use_flash_attention_2', - 'use_eager_attention', + 'cpu_memory', 'alpha_value', 'compress_pos_emb', - ], - 'llama.cpp': [ - 'n_ctx', - 'n_gpu_layers', - 'cache_type', - 'tensor_split', - 'n_batch', - 'threads', - 'threads_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'rope_freq_base', - 'compress_pos_emb', + 'compute_dtype', + 'quant_type', + 'load_in_8bit', + 'load_in_4bit', + 'torch_compile', + 'use_flash_attention_2', + 'auto_devices', 'cpu', - 'numa', - 'no_offload_kqv', - 'row_split', - 'tensorcores', - 'flash_attn', - 'streaming_llm', - 'attention_sink_size', - ], - 'llamacpp_HF': [ - 'n_ctx', - 'n_gpu_layers', - 'cache_type', - 'tensor_split', - 'n_batch', - 'threads', - 'threads_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'rope_freq_base', - 'compress_pos_emb', - 'cpu', - 'numa', - 'cfg_cache', + 'disk', + 'use_double_quant', + 'use_eager_attention', + 'bf16', + 'trust_remote_code', 'no_use_fast', - 'logits_all', - 'no_offload_kqv', - 'row_split', + ], + 'llama.cpp': [ + 'n_gpu_layers', + 'threads', + 'threads_batch', + 'n_batch', + 'n_ctx', + 'cache_type', + 'tensor_split', + 'rope_freq_base', + 'compress_pos_emb', + 'attention_sink_size', 'tensorcores', 'flash_attn', 'streaming_llm', + 'cpu', + 'row_split', + 'no_offload_kqv', + 'no_mul_mat_q', + 'no_mmap', + 'mlock', + 'numa', + ], + 'llamacpp_HF': [ + 'n_gpu_layers', + 'threads', + 'threads_batch', + 'n_batch', + 'n_ctx', + 'cache_type', + 'tensor_split', + 'rope_freq_base', + 'compress_pos_emb', 'attention_sink_size', + 'tensorcores', + 'flash_attn', + 'streaming_llm', + 'cpu', + 'row_split', + 'no_offload_kqv', + 'no_mul_mat_q', + 'no_mmap', + 'mlock', + 'numa', + 'cfg_cache', + 'logits_all', + 'trust_remote_code', + 'no_use_fast', 'llamacpp_HF_info', ], 'ExLlamav2_HF': [ - 'gpu_split', 'max_seq_len', - 'cfg_cache', + 'cache_type', + 'gpu_split', + 'alpha_value', + 'compress_pos_emb', + 'num_experts_per_token', + 'autosplit', + 'enable_tp', 'no_flash_attn', 'no_xformers', 'no_sdpa', - 'num_experts_per_token', - 'cache_type', - 'autosplit', - 'enable_tp', - 'alpha_value', - 'compress_pos_emb', + 'cfg_cache', 'trust_remote_code', 'no_use_fast', ], 'ExLlamav2': [ - 'gpu_split', 'max_seq_len', + 'cache_type', + 'gpu_split', + 'alpha_value', + 'compress_pos_emb', + 'num_experts_per_token', + 'autosplit', + 'enable_tp', 'no_flash_attn', 'no_xformers', 'no_sdpa', - 'num_experts_per_token', - 'cache_type', - 'autosplit', - 'enable_tp', - 'alpha_value', - 'compress_pos_emb', 'exllamav2_info', ], 'HQQ': [ @@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({ def transformers_samplers(): return { 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'repetition_penalty_range', + 'presence_penalty', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', - 'do_sample', + 'repetition_penalty_range', 'penalty_alpha', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', - 'guidance_scale', - 'negative_prompt', + 'prompt_lookup_num_tokens', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', 'add_bos_token', 'skip_special_tokens', - 'auto_max_new_tokens', - 'prompt_lookup_num_tokens', 'static_cache', + 'seed', + 'sampler_priority', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', } @@ -174,155 +175,156 @@ loaders_samplers = { 'HQQ': transformers_samplers(), 'ExLlamav2': { 'temperature', - 'temperature_last', - 'smoothing_factor', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', - 'top_p', + 'smoothing_factor', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', + 'presence_penalty', 'repetition_penalty_range', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', 'add_bos_token', - 'custom_token_bans', 'skip_special_tokens', - 'auto_max_new_tokens', + 'seed', + 'custom_token_bans', + 'dry_sequence_breakers', }, 'ExLlamav2_HF': { 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'repetition_penalty_range', + 'presence_penalty', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', - 'do_sample', + 'repetition_penalty_range', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', - 'guidance_scale', - 'negative_prompt', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', 'add_bos_token', 'skip_special_tokens', - 'auto_max_new_tokens', + 'seed', + 'sampler_priority', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', }, 'llama.cpp': { 'temperature', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', 'tfs', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'seed', + 'presence_penalty', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', 'ban_eos_token', + 'seed', 'custom_token_bans', + 'grammar_string', + 'grammar_file_row', }, 'llamacpp_HF': { 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'repetition_penalty_range', + 'presence_penalty', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', - 'do_sample', + 'repetition_penalty_range', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', - 'guidance_scale', - 'negative_prompt', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', 'add_bos_token', 'skip_special_tokens', - 'auto_max_new_tokens', + 'seed', + 'sampler_priority', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', }, 'TensorRT-LLM': { 'temperature', 'top_p', 'top_k', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'ban_eos_token', + 'presence_penalty', 'auto_max_new_tokens', + 'ban_eos_token', } } diff --git a/modules/presets.py b/modules/presets.py index c8118fb3..b841af53 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -13,40 +13,40 @@ from modules.logging_colors import logger def default_preset(): return { 'temperature': 1, - 'temperature_last': False, - 'dynamic_temperature': False, 'dynatemp_low': 1, 'dynatemp_high': 1, 'dynatemp_exponent': 1, 'smoothing_factor': 0, 'smoothing_curve': 1, - 'top_p': 1, 'min_p': 0, + 'top_p': 1, 'top_k': 0, - 'repetition_penalty': 1, - 'presence_penalty': 0, - 'frequency_penalty': 0, - 'repetition_penalty_range': 1024, 'typical_p': 1, - 'tfs': 1, - 'top_a': 0, + 'xtc_threshold': 0.1, + 'xtc_probability': 0, 'epsilon_cutoff': 0, 'eta_cutoff': 0, - 'guidance_scale': 1, + 'tfs': 1, + 'top_a': 0, + 'dry_multiplier': 0, + 'dry_allowed_length': 2, + 'dry_base': 1.75, + 'repetition_penalty': 1, + 'frequency_penalty': 0, + 'presence_penalty': 0, + 'encoder_repetition_penalty': 1, + 'no_repeat_ngram_size': 0, + 'repetition_penalty_range': 1024, 'penalty_alpha': 0, + 'guidance_scale': 1, 'mirostat_mode': 0, 'mirostat_tau': 5, 'mirostat_eta': 0.1, 'do_sample': True, - 'encoder_repetition_penalty': 1, - 'no_repeat_ngram_size': 0, - 'dry_multiplier': 0, - 'dry_base': 1.75, - 'dry_allowed_length': 2, + 'dynamic_temperature': False, + 'temperature_last': False, + 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram', 'dry_sequence_breakers': '"\\n", ":", "\\"", "*"', - 'xtc_threshold': 0.1, - 'xtc_probability': 0, - 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram' } diff --git a/modules/shared.py b/modules/shared.py index 89263205..928747f7 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -29,39 +29,39 @@ need_restart = False # UI defaults settings = { - 'dark_theme': True, 'show_controls': True, 'start_with': '', 'mode': 'chat-instruct', 'chat_style': 'cai-chat', + 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', 'prompt-default': 'QA', 'prompt-notebook': 'QA', - 'preset': 'min_p', - 'max_new_tokens': 512, - 'max_new_tokens_min': 1, - 'max_new_tokens_max': 4096, - 'negative_prompt': '', - 'seed': -1, - 'truncation_length': 2048, - 'max_tokens_second': 0, - 'max_updates_second': 0, - 'prompt_lookup_num_tokens': 0, - 'static_cache': False, - 'custom_stopping_strings': '', - 'custom_token_bans': '', - 'auto_max_new_tokens': False, - 'ban_eos_token': False, - 'add_bos_token': True, - 'skip_special_tokens': True, - 'stream': True, 'character': 'Assistant', 'name1': 'You', 'user_bio': '', 'custom_system_message': '', 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", - 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', + 'preset': 'min_p', + 'max_new_tokens': 512, + 'max_new_tokens_min': 1, + 'max_new_tokens_max': 4096, + 'prompt_lookup_num_tokens': 0, + 'max_tokens_second': 0, + 'max_updates_second': 0, + 'auto_max_new_tokens': False, + 'ban_eos_token': False, + 'add_bos_token': True, + 'skip_special_tokens': True, + 'stream': True, + 'static_cache': False, + 'truncation_length': 2048, + 'seed': -1, + 'custom_stopping_strings': '', + 'custom_token_bans': '', + 'negative_prompt': '', 'autoload_model': False, + 'dark_theme': True, 'default_extensions': [], } diff --git a/modules/text_generation.py b/modules/text_generation.py index 3e9788b8..152b2b8d 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -287,31 +287,62 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings clear_torch_cache() generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']: + for k in [ + 'temperature', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', + 'smoothing_factor', + 'smoothing_curve', + 'min_p', + 'top_p', + 'top_k', + 'typical_p', + 'xtc_threshold', + 'xtc_probability', + 'tfs', + 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', + 'repetition_penalty', + 'frequency_penalty', + 'presence_penalty', + 'encoder_repetition_penalty', + 'no_repeat_ngram_size', + 'repetition_penalty_range', + 'penalty_alpha', + 'guidance_scale', + 'mirostat_mode', + 'mirostat_tau', + 'mirostat_eta', + 'max_new_tokens', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'dry_sequence_breakers', + ]: if k in state: generate_params[k] = state[k] - if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0: - generate_params['sampler_priority'] = state['sampler_priority'] - elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '': - generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()] - - if state['negative_prompt'] != '': - generate_params['negative_prompt_ids'] = encode(state['negative_prompt']) - - if state['prompt_lookup_num_tokens'] > 0: - generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens'] - - if state['static_cache']: - generate_params['cache_implementation'] = 'static' - for k in ['epsilon_cutoff', 'eta_cutoff']: if state[k] > 0: generate_params[k] = state[k] * 1e-4 + if state['prompt_lookup_num_tokens'] > 0: + generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens'] + if state['ban_eos_token']: generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id] + if state['static_cache']: + generate_params['cache_implementation'] = 'static' + + if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0: + generate_params['sampler_priority'] = state['sampler_priority'] + elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '': + generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()] + if state['custom_token_bans']: to_ban = [int(x) for x in state['custom_token_bans'].split(',')] if len(to_ban) > 0: @@ -320,6 +351,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings else: generate_params['suppress_tokens'] = to_ban + if state['negative_prompt'] != '': + generate_params['negative_prompt_ids'] = encode(state['negative_prompt']) + generate_params.update({'use_cache': not shared.args.no_cache}) if shared.args.deepspeed: generate_params.update({'synced_gpus': True}) diff --git a/modules/ui.py b/modules/ui.py index e66de434..4f7ee785 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -102,55 +102,55 @@ else: def list_model_elements(): elements = [ - 'loader', 'filter_by_loader', + 'loader', 'cpu_memory', - 'auto_devices', - 'disk', - 'cpu', - 'bf16', - 'load_in_4bit', - 'load_in_8bit', - 'torch_compile', - 'trust_remote_code', - 'no_use_fast', - 'use_flash_attention_2', - 'use_eager_attention', - 'compute_dtype', - 'quant_type', - 'use_double_quant', - 'cfg_cache', - 'no_flash_attn', - 'no_xformers', - 'no_sdpa', - 'num_experts_per_token', - 'cache_type', - 'autosplit', - 'enable_tp', + 'n_gpu_layers', 'threads', 'threads_batch', 'n_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'n_gpu_layers', - 'tensor_split', + 'hqq_backend', 'n_ctx', - 'gpu_split', 'max_seq_len', - 'compress_pos_emb', + 'cache_type', + 'tensor_split', + 'gpu_split', 'alpha_value', 'rope_freq_base', - 'numa', - 'logits_all', - 'no_offload_kqv', - 'row_split', - 'tensorcores', - 'flash_attn', - 'streaming_llm', + 'compress_pos_emb', + 'compute_dtype', + 'quant_type', 'attention_sink_size', - 'hqq_backend', + 'num_experts_per_token', + 'tensorcores', + 'load_in_8bit', + 'load_in_4bit', + 'torch_compile', + 'flash_attn', + 'use_flash_attention_2', + 'streaming_llm', + 'auto_devices', + 'cpu', + 'disk', + 'row_split', + 'no_offload_kqv', + 'no_mul_mat_q', + 'no_mmap', + 'mlock', + 'numa', + 'use_double_quant', + 'use_eager_attention', + 'bf16', + 'autosplit', + 'enable_tp', + 'no_flash_attn', + 'no_xformers', + 'no_sdpa', + 'cfg_cache', 'cpp_runner', + 'logits_all', + 'trust_remote_code', + 'no_use_fast', ] if is_torch_xpu_available(): @@ -165,87 +165,87 @@ def list_model_elements(): def list_interface_input_elements(): elements = [ - 'max_new_tokens', - 'auto_max_new_tokens', - 'max_tokens_second', - 'max_updates_second', - 'prompt_lookup_num_tokens', - 'seed', 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', - 'epsilon_cutoff', - 'eta_cutoff', - 'repetition_penalty', - 'presence_penalty', - 'frequency_penalty', - 'repetition_penalty_range', - 'encoder_repetition_penalty', - 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability', - 'do_sample', + 'epsilon_cutoff', + 'eta_cutoff', + 'tfs', + 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', + 'repetition_penalty', + 'frequency_penalty', + 'presence_penalty', + 'encoder_repetition_penalty', + 'no_repeat_ngram_size', + 'repetition_penalty_range', 'penalty_alpha', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_string', - 'negative_prompt', - 'guidance_scale', - 'add_bos_token', + 'max_new_tokens', + 'prompt_lookup_num_tokens', + 'max_tokens_second', + 'max_updates_second', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', - 'truncation_length', - 'custom_stopping_strings', + 'add_bos_token', 'skip_special_tokens', 'stream', 'static_cache', - 'tfs', - 'top_a', + 'truncation_length', + 'seed', + 'sampler_priority', + 'custom_stopping_strings', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', ] # Chat elements elements += [ - 'textbox', - 'start_with', - 'character_menu', 'history', 'search_chat', 'unique_id', + 'textbox', + 'start_with', + 'mode', + 'chat_style', + 'chat-instruct_command', + 'character_menu', + 'name2', + 'context', + 'greeting', 'name1', 'user_bio', - 'name2', - 'greeting', - 'context', - 'mode', 'custom_system_message', 'instruction_template_str', 'chat_template_str', - 'chat_style', - 'chat-instruct_command', ] # Notebook/default elements elements += [ - 'textbox-notebook', 'textbox-default', - 'output_textbox', + 'textbox-notebook', 'prompt_menu-default', 'prompt_menu-notebook', + 'output_textbox', ] # Model elements