text-generation-webui/modules/loaders.py

import functools

import gradio as gr

from modules import shared

loaders_and_params = {
    'AutoGPTQ': [
        'triton',
        'no_inject_fused_attention',
        'no_inject_fused_mlp',
        'no_use_cuda_fp16',
        'wbits',
        'groupsize',
        'desc_act',
        'gpu_memory',
        'cpu_memory',
        'cpu',
        'disk',
        'auto_devices',
        'trust_remote_code',
        'autogptq_info',
    ],
    'GPTQ-for-LLaMa': [
        'wbits',
        'groupsize',
        'model_type',
        'pre_layer',
        'gptq_for_llama_info',
    ],
    'llama.cpp': [
        'n_ctx',
        'n_gqa',
        'rms_norm_eps',
        'n_gpu_layers',
        'n_batch',
        'threads',
        'no_mmap',
        'low_vram',
        'mlock',
        'llama_cpp_seed',
        'compress_pos_emb',
        'alpha_value',
    ],
    'llamacpp_HF': [
        'n_ctx',
        'n_gqa',
        'rms_norm_eps',
        'n_gpu_layers',
        'n_batch',
        'threads',
        'no_mmap',
        'low_vram',
        'mlock',
        'llama_cpp_seed',
        'compress_pos_emb',
        'alpha_value',
        'llamacpp_HF_info',
    ],
    'Transformers': [
        'cpu_memory',
        'gpu_memory',
        'trust_remote_code',
        'load_in_8bit',
        'bf16',
        'cpu',
        'disk',
        'auto_devices',
        'load_in_4bit',
        'use_double_quant',
        'quant_type',
        'compute_dtype',
        'trust_remote_code',
        'transformers_info'
    ],
    'ExLlama': [
        'gpu_split',
        'max_seq_len',
        'compress_pos_emb',
        'alpha_value',
        'exllama_info',
    ],
    'ExLlama_HF': [
        'gpu_split',
        'max_seq_len',
        'compress_pos_emb',
        'alpha_value',
        'exllama_HF_info',
    ]
}

loaders_samplers = {
    'Transformers': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'penalty_alpha',
        'num_beams',
        'length_penalty',
        'early_stopping',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'ExLlama_HF': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'ExLlama': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'repetition_penalty',
        'repetition_penalty_range',
        'seed',
        'ban_eos_token',
        'auto_max_new_tokens',
    },
    'AutoGPTQ': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'penalty_alpha',
        'num_beams',
        'length_penalty',
        'early_stopping',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'GPTQ-for-LLaMa': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'penalty_alpha',
        'num_beams',
        'length_penalty',
        'early_stopping',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
    'llama.cpp': {
        'temperature',
        'top_p',
        'top_k',
        'tfs',
        'repetition_penalty',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
    },
    'llamacpp_HF': {
        'temperature',
        'top_p',
        'top_k',
        'typical_p',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
        'repetition_penalty',
        'repetition_penalty_range',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
        'min_length',
        'seed',
        'do_sample',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
        'ban_eos_token',
        'add_bos_token',
        'skip_special_tokens',
        'auto_max_new_tokens',
    },
}


@functools.cache
def list_all_samplers():
    all_samplers = set()
    for k in loaders_samplers:
        for sampler in loaders_samplers[k]:
            all_samplers.add(sampler)

    return sorted(all_samplers)


def blacklist_samplers(loader):
    all_samplers = list_all_samplers()
    if loader == 'All':
        return [gr.update(visible=True) for sampler in all_samplers]
    else:
        return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers]


def get_gpu_memory_keys():
    return [k for k in shared.gradio if k.startswith('gpu_memory')]


@functools.cache
def get_all_params():
    all_params = set()
    for k in loaders_and_params:
        for el in loaders_and_params[k]:
            all_params.add(el)

    if 'gpu_memory' in all_params:
        all_params.remove('gpu_memory')
        for k in get_gpu_memory_keys():
            all_params.add(k)

    return sorted(all_params)


def make_loader_params_visible(loader):
    params = []
    all_params = get_all_params()
    if loader in loaders_and_params:
        params = loaders_and_params[loader]

        if 'gpu_memory' in params:
            params.remove('gpu_memory')
            params += get_gpu_memory_keys()

    return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`import functools`

			`import gradio as gr`

			`from modules import shared`

			`loaders_and_params = {`
			`'AutoGPTQ': [`
			`'triton',`
			`'no_inject_fused_attention',`
			`'no_inject_fused_mlp',`
Add --no_use_cuda_fp16 param for AutoGPTQ 2023-06-23 17:22:56 +02:00			`'no_use_cuda_fp16',`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`'wbits',`
			`'groupsize',`
			`'desc_act',`
			`'gpu_memory',`
			`'cpu_memory',`
			`'cpu',`
			`'disk',`
			`'auto_devices',`
			`'trust_remote_code',`
			`'autogptq_info',`
			`],`
			`'GPTQ-for-LLaMa': [`
			`'wbits',`
			`'groupsize',`
			`'model_type',`
			`'pre_layer',`
			`'gptq_for_llama_info',`
			`],`
			`'llama.cpp': [`
			`'n_ctx',`
Add llama-2-70b GGML support (#3285) 2023-07-24 21:37:03 +02:00			`'n_gqa',`
			`'rms_norm_eps',`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`'n_gpu_layers',`
			`'n_batch',`
			`'threads',`
			`'no_mmap',`
Add low vram mode on llama cpp (#3076) 2023-07-12 16:05:13 +02:00			`'low_vram',`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`'mlock',`
			`'llama_cpp_seed',`
[GGML] Support for customizable RoPE (#3083) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-07-18 03:32:37 +02:00			`'compress_pos_emb',`
			`'alpha_value',`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`],`
Create llamacpp_HF loader (#3062) 2023-07-16 07:21:13 +02:00			`'llamacpp_HF': [`
			`'n_ctx',`
Add llama-2-70b GGML support (#3285) 2023-07-24 21:37:03 +02:00			`'n_gqa',`
			`'rms_norm_eps',`
Create llamacpp_HF loader (#3062) 2023-07-16 07:21:13 +02:00			`'n_gpu_layers',`
			`'n_batch',`
			`'threads',`
			`'no_mmap',`
			`'low_vram',`
			`'mlock',`
			`'llama_cpp_seed',`
[GGML] Support for customizable RoPE (#3083) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> 2023-07-18 03:32:37 +02:00			`'compress_pos_emb',`
			`'alpha_value',`
Create llamacpp_HF loader (#3062) 2023-07-16 07:21:13 +02:00			`'llamacpp_HF_info',`
			`],`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`'Transformers': [`
			`'cpu_memory',`
			`'gpu_memory',`
			`'trust_remote_code',`
			`'load_in_8bit',`
			`'bf16',`
			`'cpu',`
			`'disk',`
			`'auto_devices',`
			`'load_in_4bit',`
			`'use_double_quant',`
			`'quant_type',`
			`'compute_dtype',`
			`'trust_remote_code',`
Add some clarifications 2023-06-17 00:07:16 +02:00			`'transformers_info'`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`],`
lint 2023-07-12 20:33:25 +02:00			`'ExLlama': [`
Add gpu_split param to ExLlama Adapted from code created by Ph0rk0z. Thank you Ph0rk0z. 2023-06-17 01:49:36 +02:00			`'gpu_split',`
ExLlama with long context (#2875) 2023-06-26 03:49:26 +02:00			`'max_seq_len',`
			`'compress_pos_emb',`
Add Support for Static NTK RoPE scaling for exllama/exllama_hf (#2955) 2023-07-04 06:13:16 +02:00			`'alpha_value',`
Add ExLlama support (#2444) 2023-06-17 01:35:38 +02:00			`'exllama_info',`
Implement a demo HF wrapper for exllama to utilize existing HF transformers decoding. (#2777) 2023-06-21 20:31:42 +02:00			`],`
lint 2023-07-12 20:33:25 +02:00			`'ExLlama_HF': [`
Implement a demo HF wrapper for exllama to utilize existing HF transformers decoding. (#2777) 2023-06-21 20:31:42 +02:00			`'gpu_split',`
ExLlama with long context (#2875) 2023-06-26 03:49:26 +02:00			`'max_seq_len',`
			`'compress_pos_emb',`
Add Support for Static NTK RoPE scaling for exllama/exllama_hf (#2955) 2023-07-04 06:13:16 +02:00			`'alpha_value',`
Implement a demo HF wrapper for exllama to utilize existing HF transformers decoding. (#2777) 2023-06-21 20:31:42 +02:00			`'exllama_HF_info',`
Add ExLlama support (#2444) 2023-06-17 01:35:38 +02:00			`]`
Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00			`}`

Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`loaders_samplers = {`
			`'Transformers': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'penalty_alpha',`
			`'num_beams',`
			`'length_penalty',`
			`'early_stopping',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 19:52:20 +02:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`},`
			`'ExLlama_HF': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 19:52:20 +02:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`},`
			`'ExLlama': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'seed',`
			`'ban_eos_token',`
Implement auto_max_new_tokens for ExLlama 2023-08-02 20:01:29 +02:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`},`
			`'AutoGPTQ': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'penalty_alpha',`
			`'num_beams',`
			`'length_penalty',`
			`'early_stopping',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 19:52:20 +02:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`},`
			`'GPTQ-for-LLaMa': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'penalty_alpha',`
			`'num_beams',`
			`'length_penalty',`
			`'early_stopping',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 19:52:20 +02:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`},`
			`'llama.cpp': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'tfs',`
			`'repetition_penalty',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`},`
			`'llamacpp_HF': {`
			`'temperature',`
			`'top_p',`
			`'top_k',`
			`'typical_p',`
			`'epsilon_cutoff',`
			`'eta_cutoff',`
			`'tfs',`
			`'top_a',`
			`'repetition_penalty',`
			`'repetition_penalty_range',`
			`'encoder_repetition_penalty',`
			`'no_repeat_ngram_size',`
			`'min_length',`
			`'seed',`
			`'do_sample',`
			`'mirostat_mode',`
			`'mirostat_tau',`
			`'mirostat_eta',`
			`'ban_eos_token',`
			`'add_bos_token',`
			`'skip_special_tokens',`
Add auto_max_new_tokens parameter (#3419) 2023-08-02 19:52:20 +02:00			`'auto_max_new_tokens',`
Add a "Filter by loader" menu to the Parameters tab 2023-08-01 03:44:00 +02:00			`},`
			`}`


			`@functools.cache`
			`def list_all_samplers():`
			`all_samplers = set()`
			`for k in loaders_samplers:`
			`for sampler in loaders_samplers[k]:`
			`all_samplers.add(sampler)`

			`return sorted(all_samplers)`


			`def blacklist_samplers(loader):`
			`all_samplers = list_all_samplers()`
			`if loader == 'All':`
			`return [gr.update(visible=True) for sampler in all_samplers]`
			`else:`
			`return [gr.update(visible=True) if sampler in loaders_samplers[loader] else gr.update(visible=False) for sampler in all_samplers]`

Reorganize model loading UI completely (#2720) 2023-06-17 00:00:37 +02:00
			`def get_gpu_memory_keys():`
			`return [k for k in shared.gradio if k.startswith('gpu_memory')]`


			`@functools.cache`
			`def get_all_params():`
			`all_params = set()`
			`for k in loaders_and_params:`
			`for el in loaders_and_params[k]:`
			`all_params.add(el)`

			`if 'gpu_memory' in all_params:`
			`all_params.remove('gpu_memory')`
			`for k in get_gpu_memory_keys():`
			`all_params.add(k)`

			`return sorted(all_params)`


			`def make_loader_params_visible(loader):`
			`params = []`
			`all_params = get_all_params()`
			`if loader in loaders_and_params:`
			`params = loaders_and_params[loader]`

			`if 'gpu_memory' in params:`
			`params.remove('gpu_memory')`
			`params += get_gpu_memory_keys()`

			`return [gr.update(visible=True) if k in params else gr.update(visible=False) for k in all_params]`