2023-06-16 19:00:37 -03:00
|
|
|
import functools
|
2023-08-11 17:41:33 +00:00
|
|
|
from collections import OrderedDict
|
2023-06-16 19:00:37 -03:00
|
|
|
|
|
|
|
import gradio as gr
|
|
|
|
|
|
|
|
from modules import shared
|
|
|
|
|
2023-08-11 17:41:33 +00:00
|
|
|
loaders_and_params = OrderedDict({
|
|
|
|
'Transformers': [
|
|
|
|
'cpu_memory',
|
|
|
|
'gpu_memory',
|
2025-01-05 05:45:12 -08:00
|
|
|
'load_in_4bit',
|
2023-08-11 17:41:33 +00:00
|
|
|
'load_in_8bit',
|
2025-01-05 05:45:12 -08:00
|
|
|
'torch_compile',
|
2023-08-11 17:41:33 +00:00
|
|
|
'bf16',
|
|
|
|
'cpu',
|
|
|
|
'disk',
|
|
|
|
'auto_devices',
|
|
|
|
'use_double_quant',
|
|
|
|
'quant_type',
|
|
|
|
'compute_dtype',
|
|
|
|
'trust_remote_code',
|
2023-11-16 19:45:05 -08:00
|
|
|
'no_use_fast',
|
2023-11-05 00:59:33 +08:00
|
|
|
'use_flash_attention_2',
|
2024-07-01 23:08:08 +08:00
|
|
|
'use_eager_attention',
|
2023-08-11 17:41:33 +00:00
|
|
|
'alpha_value',
|
|
|
|
'compress_pos_emb',
|
|
|
|
],
|
2023-11-29 15:13:03 -08:00
|
|
|
'llama.cpp': [
|
|
|
|
'n_ctx',
|
|
|
|
'n_gpu_layers',
|
2024-12-17 15:43:48 -05:00
|
|
|
'cache_type',
|
2023-11-29 15:13:03 -08:00
|
|
|
'tensor_split',
|
|
|
|
'n_batch',
|
|
|
|
'threads',
|
|
|
|
'threads_batch',
|
|
|
|
'no_mmap',
|
|
|
|
'mlock',
|
|
|
|
'no_mul_mat_q',
|
|
|
|
'rope_freq_base',
|
|
|
|
'compress_pos_emb',
|
2024-04-30 09:11:31 -03:00
|
|
|
'cpu',
|
2023-11-29 15:13:03 -08:00
|
|
|
'numa',
|
2023-12-19 17:30:53 -03:00
|
|
|
'no_offload_kqv',
|
2024-02-05 02:36:40 +00:00
|
|
|
'row_split',
|
2024-07-22 18:05:11 -07:00
|
|
|
'tensorcores',
|
2024-06-13 03:34:54 +00:00
|
|
|
'flash_attn',
|
2024-03-09 00:25:33 -03:00
|
|
|
'streaming_llm',
|
|
|
|
'attention_sink_size',
|
2023-11-29 15:13:03 -08:00
|
|
|
],
|
2023-11-28 18:41:11 -08:00
|
|
|
'llamacpp_HF': [
|
|
|
|
'n_ctx',
|
|
|
|
'n_gpu_layers',
|
2024-12-17 15:43:48 -05:00
|
|
|
'cache_type',
|
2023-11-28 18:41:11 -08:00
|
|
|
'tensor_split',
|
|
|
|
'n_batch',
|
|
|
|
'threads',
|
|
|
|
'threads_batch',
|
|
|
|
'no_mmap',
|
|
|
|
'mlock',
|
|
|
|
'no_mul_mat_q',
|
|
|
|
'rope_freq_base',
|
|
|
|
'compress_pos_emb',
|
2024-04-30 09:11:31 -03:00
|
|
|
'cpu',
|
2023-11-28 18:41:11 -08:00
|
|
|
'numa',
|
|
|
|
'cfg_cache',
|
2023-12-08 06:29:26 -08:00
|
|
|
'trust_remote_code',
|
2023-11-28 18:41:11 -08:00
|
|
|
'no_use_fast',
|
|
|
|
'logits_all',
|
2023-12-19 15:22:21 -03:00
|
|
|
'no_offload_kqv',
|
2024-02-05 02:36:40 +00:00
|
|
|
'row_split',
|
2024-07-22 18:05:11 -07:00
|
|
|
'tensorcores',
|
2024-06-13 03:34:54 +00:00
|
|
|
'flash_attn',
|
2024-03-09 00:25:33 -03:00
|
|
|
'streaming_llm',
|
|
|
|
'attention_sink_size',
|
2023-11-28 18:41:11 -08:00
|
|
|
'llamacpp_HF_info',
|
|
|
|
],
|
2023-12-17 12:08:33 -03:00
|
|
|
'ExLlamav2_HF': [
|
2023-08-11 17:41:33 +00:00
|
|
|
'gpu_split',
|
|
|
|
'max_seq_len',
|
2023-12-17 12:08:33 -03:00
|
|
|
'cfg_cache',
|
|
|
|
'no_flash_attn',
|
2024-07-11 15:47:37 -07:00
|
|
|
'no_xformers',
|
|
|
|
'no_sdpa',
|
2023-12-17 12:08:33 -03:00
|
|
|
'num_experts_per_token',
|
2024-12-17 15:43:48 -05:00
|
|
|
'cache_type',
|
2024-02-16 15:26:10 -03:00
|
|
|
'autosplit',
|
2024-10-01 11:16:15 -07:00
|
|
|
'enable_tp',
|
2023-08-11 17:41:33 +00:00
|
|
|
'alpha_value',
|
|
|
|
'compress_pos_emb',
|
2023-12-08 06:29:26 -08:00
|
|
|
'trust_remote_code',
|
2023-11-16 19:45:05 -08:00
|
|
|
'no_use_fast',
|
2023-08-11 17:41:33 +00:00
|
|
|
],
|
2024-02-06 06:21:17 -08:00
|
|
|
'ExLlamav2': [
|
|
|
|
'gpu_split',
|
|
|
|
'max_seq_len',
|
|
|
|
'no_flash_attn',
|
2024-07-11 15:47:37 -07:00
|
|
|
'no_xformers',
|
|
|
|
'no_sdpa',
|
2024-02-06 06:21:17 -08:00
|
|
|
'num_experts_per_token',
|
2024-12-17 15:43:48 -05:00
|
|
|
'cache_type',
|
2024-02-16 15:26:10 -03:00
|
|
|
'autosplit',
|
2024-10-01 11:16:15 -07:00
|
|
|
'enable_tp',
|
2024-02-06 06:21:17 -08:00
|
|
|
'alpha_value',
|
|
|
|
'compress_pos_emb',
|
|
|
|
'exllamav2_info',
|
|
|
|
],
|
2023-12-18 19:23:16 -05:00
|
|
|
'HQQ': [
|
|
|
|
'hqq_backend',
|
|
|
|
'trust_remote_code',
|
|
|
|
'no_use_fast',
|
2024-06-24 02:30:03 -03:00
|
|
|
],
|
|
|
|
'TensorRT-LLM': [
|
|
|
|
'max_seq_len',
|
|
|
|
'cpp_runner',
|
|
|
|
'tensorrt_llm_info',
|
2023-12-06 00:01:01 -03:00
|
|
|
]
|
2023-08-11 17:41:33 +00:00
|
|
|
})
|
2023-06-16 19:00:37 -03:00
|
|
|
|
2023-12-20 19:18:07 -08:00
|
|
|
|
|
|
|
def transformers_samplers():
|
|
|
|
return {
|
2023-07-31 18:44:00 -07:00
|
|
|
'temperature',
|
2023-11-04 13:09:07 -03:00
|
|
|
'temperature_last',
|
2024-01-07 17:03:47 -03:00
|
|
|
'dynamic_temperature',
|
2024-01-08 23:28:35 -03:00
|
|
|
'dynatemp_low',
|
|
|
|
'dynatemp_high',
|
|
|
|
'dynatemp_exponent',
|
2024-02-03 21:20:02 -06:00
|
|
|
'smoothing_factor',
|
2024-03-03 10:22:21 -06:00
|
|
|
'smoothing_curve',
|
2023-07-31 18:44:00 -07:00
|
|
|
'top_p',
|
2023-11-02 14:32:51 -05:00
|
|
|
'min_p',
|
2023-07-31 18:44:00 -07:00
|
|
|
'top_k',
|
|
|
|
'typical_p',
|
|
|
|
'epsilon_cutoff',
|
|
|
|
'eta_cutoff',
|
|
|
|
'tfs',
|
|
|
|
'top_a',
|
|
|
|
'repetition_penalty',
|
2023-10-25 10:10:28 -05:00
|
|
|
'presence_penalty',
|
|
|
|
'frequency_penalty',
|
2023-07-31 18:44:00 -07:00
|
|
|
'repetition_penalty_range',
|
|
|
|
'encoder_repetition_penalty',
|
|
|
|
'no_repeat_ngram_size',
|
2024-05-20 08:23:47 +05:30
|
|
|
'dry_multiplier',
|
|
|
|
'dry_base',
|
|
|
|
'dry_allowed_length',
|
|
|
|
'dry_sequence_breakers',
|
2024-09-28 07:20:12 +05:30
|
|
|
'xtc_threshold',
|
|
|
|
'xtc_probability',
|
2023-07-31 18:44:00 -07:00
|
|
|
'seed',
|
|
|
|
'do_sample',
|
|
|
|
'penalty_alpha',
|
|
|
|
'mirostat_mode',
|
|
|
|
'mirostat_tau',
|
|
|
|
'mirostat_eta',
|
2023-10-05 10:01:36 -03:00
|
|
|
'grammar_file_row',
|
|
|
|
'grammar_string',
|
2023-08-06 17:22:48 -03:00
|
|
|
'guidance_scale',
|
|
|
|
'negative_prompt',
|
2023-07-31 18:44:00 -07:00
|
|
|
'ban_eos_token',
|
2023-09-15 14:27:27 -07:00
|
|
|
'custom_token_bans',
|
2024-02-06 11:20:10 -03:00
|
|
|
'sampler_priority',
|
2023-07-31 18:44:00 -07:00
|
|
|
'add_bos_token',
|
|
|
|
'skip_special_tokens',
|
2023-08-02 14:52:20 -03:00
|
|
|
'auto_max_new_tokens',
|
2025-01-04 17:52:57 -08:00
|
|
|
'prompt_lookup_num_tokens',
|
|
|
|
'static_cache',
|
2023-12-20 19:18:07 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
loaders_samplers = {
|
|
|
|
'Transformers': transformers_samplers(),
|
|
|
|
'HQQ': transformers_samplers(),
|
2024-02-06 06:21:17 -08:00
|
|
|
'ExLlamav2': {
|
|
|
|
'temperature',
|
|
|
|
'temperature_last',
|
2025-01-04 16:22:59 -08:00
|
|
|
'smoothing_factor',
|
|
|
|
'dynatemp_low',
|
|
|
|
'dynatemp_high',
|
|
|
|
'dynatemp_exponent',
|
2024-02-06 06:21:17 -08:00
|
|
|
'top_p',
|
|
|
|
'min_p',
|
|
|
|
'top_k',
|
|
|
|
'typical_p',
|
|
|
|
'tfs',
|
|
|
|
'top_a',
|
|
|
|
'repetition_penalty',
|
|
|
|
'presence_penalty',
|
|
|
|
'frequency_penalty',
|
|
|
|
'repetition_penalty_range',
|
|
|
|
'mirostat_mode',
|
|
|
|
'mirostat_tau',
|
|
|
|
'mirostat_eta',
|
2025-01-04 16:22:59 -08:00
|
|
|
'dry_multiplier',
|
|
|
|
'dry_base',
|
|
|
|
'dry_allowed_length',
|
|
|
|
'dry_sequence_breakers',
|
|
|
|
'xtc_threshold',
|
|
|
|
'xtc_probability',
|
|
|
|
'seed',
|
2024-02-06 06:21:17 -08:00
|
|
|
'ban_eos_token',
|
|
|
|
'add_bos_token',
|
|
|
|
'custom_token_bans',
|
|
|
|
'skip_special_tokens',
|
|
|
|
'auto_max_new_tokens',
|
|
|
|
},
|
2023-09-12 14:33:07 -03:00
|
|
|
'ExLlamav2_HF': {
|
|
|
|
'temperature',
|
2023-11-04 13:09:07 -03:00
|
|
|
'temperature_last',
|
2024-01-07 17:03:47 -03:00
|
|
|
'dynamic_temperature',
|
2024-01-08 23:28:35 -03:00
|
|
|
'dynatemp_low',
|
|
|
|
'dynatemp_high',
|
|
|
|
'dynatemp_exponent',
|
2024-02-03 21:20:02 -06:00
|
|
|
'smoothing_factor',
|
2024-03-03 10:22:21 -06:00
|
|
|
'smoothing_curve',
|
2023-09-12 14:33:07 -03:00
|
|
|
'top_p',
|
2023-11-02 14:32:51 -05:00
|
|
|
'min_p',
|
2023-09-12 14:33:07 -03:00
|
|
|
'top_k',
|
|
|
|
'typical_p',
|
|
|
|
'epsilon_cutoff',
|
|
|
|
'eta_cutoff',
|
|
|
|
'tfs',
|
|
|
|
'top_a',
|
|
|
|
'repetition_penalty',
|
2023-10-25 10:10:28 -05:00
|
|
|
'presence_penalty',
|
|
|
|
'frequency_penalty',
|
2023-09-12 14:33:07 -03:00
|
|
|
'repetition_penalty_range',
|
|
|
|
'encoder_repetition_penalty',
|
|
|
|
'no_repeat_ngram_size',
|
2024-05-20 08:23:47 +05:30
|
|
|
'dry_multiplier',
|
|
|
|
'dry_base',
|
|
|
|
'dry_allowed_length',
|
|
|
|
'dry_sequence_breakers',
|
2024-09-28 07:20:12 +05:30
|
|
|
'xtc_threshold',
|
|
|
|
'xtc_probability',
|
2023-09-12 14:33:07 -03:00
|
|
|
'seed',
|
|
|
|
'do_sample',
|
|
|
|
'mirostat_mode',
|
|
|
|
'mirostat_tau',
|
|
|
|
'mirostat_eta',
|
2023-10-05 10:01:36 -03:00
|
|
|
'grammar_file_row',
|
|
|
|
'grammar_string',
|
2023-09-12 14:33:07 -03:00
|
|
|
'guidance_scale',
|
|
|
|
'negative_prompt',
|
|
|
|
'ban_eos_token',
|
2023-09-15 14:27:27 -07:00
|
|
|
'custom_token_bans',
|
2024-02-06 11:20:10 -03:00
|
|
|
'sampler_priority',
|
2023-09-12 14:33:07 -03:00
|
|
|
'add_bos_token',
|
|
|
|
'skip_special_tokens',
|
|
|
|
'auto_max_new_tokens',
|
|
|
|
},
|
2023-07-31 18:44:00 -07:00
|
|
|
'llama.cpp': {
|
|
|
|
'temperature',
|
|
|
|
'top_p',
|
2023-11-21 20:59:39 -03:00
|
|
|
'min_p',
|
2023-07-31 18:44:00 -07:00
|
|
|
'top_k',
|
2023-11-21 20:59:39 -03:00
|
|
|
'typical_p',
|
2023-07-31 18:44:00 -07:00
|
|
|
'tfs',
|
|
|
|
'repetition_penalty',
|
2023-10-25 10:10:28 -05:00
|
|
|
'presence_penalty',
|
|
|
|
'frequency_penalty',
|
2023-11-18 00:31:27 -03:00
|
|
|
'seed',
|
2023-07-31 18:44:00 -07:00
|
|
|
'mirostat_mode',
|
|
|
|
'mirostat_tau',
|
|
|
|
'mirostat_eta',
|
2023-09-24 07:17:33 -07:00
|
|
|
'grammar_file_row',
|
2023-09-24 18:05:24 -03:00
|
|
|
'grammar_string',
|
2023-07-31 18:44:00 -07:00
|
|
|
'ban_eos_token',
|
2023-09-15 14:27:27 -07:00
|
|
|
'custom_token_bans',
|
2023-07-31 18:44:00 -07:00
|
|
|
},
|
|
|
|
'llamacpp_HF': {
|
|
|
|
'temperature',
|
2023-11-04 13:09:07 -03:00
|
|
|
'temperature_last',
|
2024-01-07 17:03:47 -03:00
|
|
|
'dynamic_temperature',
|
2024-01-08 23:28:35 -03:00
|
|
|
'dynatemp_low',
|
|
|
|
'dynatemp_high',
|
|
|
|
'dynatemp_exponent',
|
2024-02-03 21:20:02 -06:00
|
|
|
'smoothing_factor',
|
2024-03-03 10:22:21 -06:00
|
|
|
'smoothing_curve',
|
2023-07-31 18:44:00 -07:00
|
|
|
'top_p',
|
2023-11-02 14:32:51 -05:00
|
|
|
'min_p',
|
2023-07-31 18:44:00 -07:00
|
|
|
'top_k',
|
|
|
|
'typical_p',
|
|
|
|
'epsilon_cutoff',
|
|
|
|
'eta_cutoff',
|
|
|
|
'tfs',
|
|
|
|
'top_a',
|
|
|
|
'repetition_penalty',
|
2023-10-25 10:10:28 -05:00
|
|
|
'presence_penalty',
|
|
|
|
'frequency_penalty',
|
2023-07-31 18:44:00 -07:00
|
|
|
'repetition_penalty_range',
|
|
|
|
'encoder_repetition_penalty',
|
|
|
|
'no_repeat_ngram_size',
|
2024-05-20 08:23:47 +05:30
|
|
|
'dry_multiplier',
|
|
|
|
'dry_base',
|
|
|
|
'dry_allowed_length',
|
|
|
|
'dry_sequence_breakers',
|
2024-09-28 07:20:12 +05:30
|
|
|
'xtc_threshold',
|
|
|
|
'xtc_probability',
|
2023-07-31 18:44:00 -07:00
|
|
|
'seed',
|
|
|
|
'do_sample',
|
|
|
|
'mirostat_mode',
|
|
|
|
'mirostat_tau',
|
|
|
|
'mirostat_eta',
|
2023-10-05 10:01:36 -03:00
|
|
|
'grammar_file_row',
|
|
|
|
'grammar_string',
|
2023-08-24 20:32:21 -03:00
|
|
|
'guidance_scale',
|
|
|
|
'negative_prompt',
|
2023-07-31 18:44:00 -07:00
|
|
|
'ban_eos_token',
|
2023-09-15 14:27:27 -07:00
|
|
|
'custom_token_bans',
|
2024-02-06 11:20:10 -03:00
|
|
|
'sampler_priority',
|
2023-07-31 18:44:00 -07:00
|
|
|
'add_bos_token',
|
|
|
|
'skip_special_tokens',
|
2023-08-02 14:52:20 -03:00
|
|
|
'auto_max_new_tokens',
|
2023-07-31 18:44:00 -07:00
|
|
|
},
|
2024-06-24 02:30:03 -03:00
|
|
|
'TensorRT-LLM': {
|
|
|
|
'temperature',
|
|
|
|
'top_p',
|
|
|
|
'top_k',
|
|
|
|
'repetition_penalty',
|
|
|
|
'presence_penalty',
|
|
|
|
'frequency_penalty',
|
|
|
|
'ban_eos_token',
|
|
|
|
'auto_max_new_tokens',
|
|
|
|
}
|
2023-08-11 17:41:33 +00:00
|
|
|
}
|
|
|
|
|
2023-07-31 18:44:00 -07:00
|
|
|
|
|
|
|
@functools.cache
|
|
|
|
def list_all_samplers():
|
|
|
|
all_samplers = set()
|
|
|
|
for k in loaders_samplers:
|
|
|
|
for sampler in loaders_samplers[k]:
|
|
|
|
all_samplers.add(sampler)
|
|
|
|
|
|
|
|
return sorted(all_samplers)
|
|
|
|
|
|
|
|
|
2024-01-08 19:42:31 -08:00
|
|
|
def blacklist_samplers(loader, dynamic_temperature):
|
2023-07-31 18:44:00 -07:00
|
|
|
all_samplers = list_all_samplers()
|
2024-01-08 19:42:31 -08:00
|
|
|
output = []
|
|
|
|
|
|
|
|
for sampler in all_samplers:
|
|
|
|
if loader == 'All' or sampler in loaders_samplers[loader]:
|
|
|
|
if sampler.startswith('dynatemp'):
|
|
|
|
output.append(gr.update(visible=dynamic_temperature))
|
|
|
|
else:
|
|
|
|
output.append(gr.update(visible=True))
|
|
|
|
else:
|
|
|
|
output.append(gr.update(visible=False))
|
|
|
|
|
|
|
|
return output
|
2023-07-31 18:44:00 -07:00
|
|
|
|
2023-06-16 19:00:37 -03:00
|
|
|
|
|
|
|
def get_gpu_memory_keys():
|
|
|
|
return [k for k in shared.gradio if k.startswith('gpu_memory')]
|
|
|
|
|
|
|
|
|
|
|
|
@functools.cache
|
|
|
|
def get_all_params():
|
|
|
|
all_params = set()
|
|
|
|
for k in loaders_and_params:
|
|
|
|
for el in loaders_and_params[k]:
|
|
|
|
all_params.add(el)
|
|
|
|
|
|
|
|
if 'gpu_memory' in all_params:
|
|
|
|
all_params.remove('gpu_memory')
|
|
|
|
for k in get_gpu_memory_keys():
|
|
|
|
all_params.add(k)
|
|
|
|
|
|
|
|
return sorted(all_params)
|
|
|
|
|
|
|
|
|
|
|
|
def make_loader_params_visible(loader):
|
|
|
|
params = []
|
|
|
|
all_params = get_all_params()
|
|
|
|
if loader in loaders_and_params:
|
|
|
|
params = loaders_and_params[loader]
|
|
|
|
|
|
|
|
if 'gpu_memory' in params:
|
|
|
|
params.remove('gpu_memory')
|
|
|
|
params += get_gpu_memory_keys()
|
|
|
|
|
|
|
|
return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]
|