diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index dfac8e03..5f0e0128 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -7,48 +7,48 @@ from pydantic import BaseModel, Field
 
 class GenerationOptions(BaseModel):
     preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
-    min_p: float = 0
-    dynamic_temperature: bool = False
     dynatemp_low: float = 1
     dynatemp_high: float = 1
     dynatemp_exponent: float = 1
     smoothing_factor: float = 0
     smoothing_curve: float = 1
+    min_p: float = 0
     top_k: int = 0
-    repetition_penalty: float = 1
-    repetition_penalty_range: int = 1024
     typical_p: float = 1
-    tfs: float = 1
-    top_a: float = 0
+    xtc_threshold: float = 0.1
+    xtc_probability: float = 0
     epsilon_cutoff: float = 0
     eta_cutoff: float = 0
-    guidance_scale: float = 1
-    negative_prompt: str = ''
+    tfs: float = 1
+    top_a: float = 0
+    dry_multiplier: float = 0
+    dry_allowed_length: int = 2
+    dry_base: float = 1.75
+    repetition_penalty: float = 1
+    encoder_repetition_penalty: float = 1
+    no_repeat_ngram_size: int = 0
+    repetition_penalty_range: int = 1024
     penalty_alpha: float = 0
+    guidance_scale: float = 1
     mirostat_mode: int = 0
     mirostat_tau: float = 5
     mirostat_eta: float = 0.1
-    temperature_last: bool = False
-    do_sample: bool = True
-    seed: int = -1
-    encoder_repetition_penalty: float = 1
-    no_repeat_ngram_size: int = 0
-    dry_multiplier: float = 0
-    dry_base: float = 1.75
-    dry_allowed_length: int = 2
-    dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
-    xtc_threshold: float = 0.1
-    xtc_probability: float = 0
-    truncation_length: int = 0
-    max_tokens_second: int = 0
     prompt_lookup_num_tokens: int = 0
-    static_cache: bool = False
-    custom_token_bans: str = ""
-    sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
+    max_tokens_second: int = 0
+    do_sample: bool = True
+    dynamic_temperature: bool = False
+    temperature_last: bool = False
     auto_max_new_tokens: bool = False
     ban_eos_token: bool = False
     add_bos_token: bool = True
     skip_special_tokens: bool = True
+    static_cache: bool = False
+    truncation_length: int = 0
+    seed: int = -1
+    sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
+    custom_token_bans: str = ""
+    negative_prompt: str = ''
+    dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
     grammar_string: str = ""
 
 
diff --git a/modules/loaders.py b/modules/loaders.py
index 4e331dbb..cd864e40 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -7,102 +7,103 @@ from modules import shared
 
 loaders_and_params = OrderedDict({
     'Transformers': [
-        'cpu_memory',
         'gpu_memory',
-        'load_in_4bit',
-        'load_in_8bit',
-        'torch_compile',
-        'bf16',
-        'cpu',
-        'disk',
-        'auto_devices',
-        'use_double_quant',
-        'quant_type',
-        'compute_dtype',
-        'trust_remote_code',
-        'no_use_fast',
-        'use_flash_attention_2',
-        'use_eager_attention',
+        'cpu_memory',
         'alpha_value',
         'compress_pos_emb',
-    ],
-    'llama.cpp': [
-        'n_ctx',
-        'n_gpu_layers',
-        'cache_type',
-        'tensor_split',
-        'n_batch',
-        'threads',
-        'threads_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'rope_freq_base',
-        'compress_pos_emb',
+        'compute_dtype',
+        'quant_type',
+        'load_in_8bit',
+        'load_in_4bit',
+        'torch_compile',
+        'use_flash_attention_2',
+        'auto_devices',
         'cpu',
-        'numa',
-        'no_offload_kqv',
-        'row_split',
-        'tensorcores',
-        'flash_attn',
-        'streaming_llm',
-        'attention_sink_size',
-    ],
-    'llamacpp_HF': [
-        'n_ctx',
-        'n_gpu_layers',
-        'cache_type',
-        'tensor_split',
-        'n_batch',
-        'threads',
-        'threads_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'rope_freq_base',
-        'compress_pos_emb',
-        'cpu',
-        'numa',
-        'cfg_cache',
+        'disk',
+        'use_double_quant',
+        'use_eager_attention',
+        'bf16',
+
         'trust_remote_code',
         'no_use_fast',
-        'logits_all',
-        'no_offload_kqv',
-        'row_split',
+    ],
+    'llama.cpp': [
+        'n_gpu_layers',
+        'threads',
+        'threads_batch',
+        'n_batch',
+        'n_ctx',
+        'cache_type',
+        'tensor_split',
+        'rope_freq_base',
+        'compress_pos_emb',
+        'attention_sink_size',
         'tensorcores',
         'flash_attn',
         'streaming_llm',
+        'cpu',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+    ],
+    'llamacpp_HF': [
+        'n_gpu_layers',
+        'threads',
+        'threads_batch',
+        'n_batch',
+        'n_ctx',
+        'cache_type',
+        'tensor_split',
+        'rope_freq_base',
+        'compress_pos_emb',
         'attention_sink_size',
+        'tensorcores',
+        'flash_attn',
+        'streaming_llm',
+        'cpu',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+        'cfg_cache',
+        'logits_all',
+        'trust_remote_code',
+        'no_use_fast',
         'llamacpp_HF_info',
     ],
     'ExLlamav2_HF': [
-        'gpu_split',
         'max_seq_len',
-        'cfg_cache',
+        'cache_type',
+        'gpu_split',
+        'alpha_value',
+        'compress_pos_emb',
+        'num_experts_per_token',
+        'autosplit',
+        'enable_tp',
         'no_flash_attn',
         'no_xformers',
         'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
-        'alpha_value',
-        'compress_pos_emb',
+        'cfg_cache',
         'trust_remote_code',
         'no_use_fast',
     ],
     'ExLlamav2': [
-        'gpu_split',
         'max_seq_len',
+        'cache_type',
+        'gpu_split',
+        'alpha_value',
+        'compress_pos_emb',
+        'num_experts_per_token',
+        'autosplit',
+        'enable_tp',
         'no_flash_attn',
         'no_xformers',
         'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
-        'alpha_value',
-        'compress_pos_emb',
         'exllamav2_info',
     ],
     'HQQ': [
@@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({
 def transformers_samplers():
     return {
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'epsilon_cutoff',
         'eta_cutoff',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
         'encoder_repetition_penalty',
         'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
         'penalty_alpha',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'prompt_lookup_num_tokens',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
         'add_bos_token',
         'skip_special_tokens',
-        'auto_max_new_tokens',
-        'prompt_lookup_num_tokens',
         'static_cache',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     }
 
 
@@ -174,155 +175,156 @@ loaders_samplers = {
     'HQQ': transformers_samplers(),
     'ExLlamav2': {
         'temperature',
-        'temperature_last',
-        'smoothing_factor',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
-        'top_p',
+        'smoothing_factor',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
+        'presence_penalty',
         'repetition_penalty_range',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
         'add_bos_token',
-        'custom_token_bans',
         'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'custom_token_bans',
+        'dry_sequence_breakers',
     },
     'ExLlamav2_HF': {
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'epsilon_cutoff',
         'eta_cutoff',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
         'encoder_repetition_penalty',
         'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
         'add_bos_token',
         'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     },
     'llama.cpp': {
         'temperature',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
         'tfs',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'seed',
+        'presence_penalty',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
         'ban_eos_token',
+        'seed',
         'custom_token_bans',
+        'grammar_string',
+        'grammar_file_row',
     },
     'llamacpp_HF': {
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'epsilon_cutoff',
         'eta_cutoff',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
         'encoder_repetition_penalty',
         'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
         'add_bos_token',
         'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     },
     'TensorRT-LLM': {
         'temperature',
         'top_p',
         'top_k',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'ban_eos_token',
+        'presence_penalty',
         'auto_max_new_tokens',
+        'ban_eos_token',
     }
 }
 
diff --git a/modules/presets.py b/modules/presets.py
index c8118fb3..b841af53 100644
--- a/modules/presets.py
+++ b/modules/presets.py
@@ -13,40 +13,40 @@ from modules.logging_colors import logger
 def default_preset():
     return {
         'temperature': 1,
-        'temperature_last': False,
-        'dynamic_temperature': False,
         'dynatemp_low': 1,
         'dynatemp_high': 1,
         'dynatemp_exponent': 1,
         'smoothing_factor': 0,
         'smoothing_curve': 1,
-        'top_p': 1,
         'min_p': 0,
+        'top_p': 1,
         'top_k': 0,
-        'repetition_penalty': 1,
-        'presence_penalty': 0,
-        'frequency_penalty': 0,
-        'repetition_penalty_range': 1024,
         'typical_p': 1,
-        'tfs': 1,
-        'top_a': 0,
+        'xtc_threshold': 0.1,
+        'xtc_probability': 0,
         'epsilon_cutoff': 0,
         'eta_cutoff': 0,
-        'guidance_scale': 1,
+        'tfs': 1,
+        'top_a': 0,
+        'dry_multiplier': 0,
+        'dry_allowed_length': 2,
+        'dry_base': 1.75,
+        'repetition_penalty': 1,
+        'frequency_penalty': 0,
+        'presence_penalty': 0,
+        'encoder_repetition_penalty': 1,
+        'no_repeat_ngram_size': 0,
+        'repetition_penalty_range': 1024,
         'penalty_alpha': 0,
+        'guidance_scale': 1,
         'mirostat_mode': 0,
         'mirostat_tau': 5,
         'mirostat_eta': 0.1,
         'do_sample': True,
-        'encoder_repetition_penalty': 1,
-        'no_repeat_ngram_size': 0,
-        'dry_multiplier': 0,
-        'dry_base': 1.75,
-        'dry_allowed_length': 2,
+        'dynamic_temperature': False,
+        'temperature_last': False,
+        'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
         'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
-        'xtc_threshold': 0.1,
-        'xtc_probability': 0,
-        'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram'
     }
 
 
diff --git a/modules/shared.py b/modules/shared.py
index 89263205..928747f7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -29,39 +29,39 @@ need_restart = False
 
 # UI defaults
 settings = {
-    'dark_theme': True,
     'show_controls': True,
     'start_with': '',
     'mode': 'chat-instruct',
     'chat_style': 'cai-chat',
+    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
     'prompt-default': 'QA',
     'prompt-notebook': 'QA',
-    'preset': 'min_p',
-    'max_new_tokens': 512,
-    'max_new_tokens_min': 1,
-    'max_new_tokens_max': 4096,
-    'negative_prompt': '',
-    'seed': -1,
-    'truncation_length': 2048,
-    'max_tokens_second': 0,
-    'max_updates_second': 0,
-    'prompt_lookup_num_tokens': 0,
-    'static_cache': False,
-    'custom_stopping_strings': '',
-    'custom_token_bans': '',
-    'auto_max_new_tokens': False,
-    'ban_eos_token': False,
-    'add_bos_token': True,
-    'skip_special_tokens': True,
-    'stream': True,
     'character': 'Assistant',
     'name1': 'You',
     'user_bio': '',
     'custom_system_message': '',
     'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
     'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
-    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+    'preset': 'min_p',
+    'max_new_tokens': 512,
+    'max_new_tokens_min': 1,
+    'max_new_tokens_max': 4096,
+    'prompt_lookup_num_tokens': 0,
+    'max_tokens_second': 0,
+    'max_updates_second': 0,
+    'auto_max_new_tokens': False,
+    'ban_eos_token': False,
+    'add_bos_token': True,
+    'skip_special_tokens': True,
+    'stream': True,
+    'static_cache': False,
+    'truncation_length': 2048,
+    'seed': -1,
+    'custom_stopping_strings': '',
+    'custom_token_bans': '',
+    'negative_prompt': '',
     'autoload_model': False,
+    'dark_theme': True,
     'default_extensions': [],
 }
 
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 3e9788b8..152b2b8d 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -287,31 +287,62 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
         clear_torch_cache()
 
     generate_params = {}
-    for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']:
+    for k in [
+        'temperature',
+        'dynatemp_low',
+        'dynatemp_high',
+        'dynatemp_exponent',
+        'smoothing_factor',
+        'smoothing_curve',
+        'min_p',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
+        'tfs',
+        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
+        'penalty_alpha',
+        'guidance_scale',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'max_new_tokens',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'dry_sequence_breakers',
+    ]:
         if k in state:
             generate_params[k] = state[k]
 
-    if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
-        generate_params['sampler_priority'] = state['sampler_priority']
-    elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
-        generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
-
-    if state['negative_prompt'] != '':
-        generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
-
-    if state['prompt_lookup_num_tokens'] > 0:
-        generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
-
-    if state['static_cache']:
-        generate_params['cache_implementation'] = 'static'
-
     for k in ['epsilon_cutoff', 'eta_cutoff']:
         if state[k] > 0:
             generate_params[k] = state[k] * 1e-4
 
+    if state['prompt_lookup_num_tokens'] > 0:
+        generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
+
     if state['ban_eos_token']:
         generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]
 
+    if state['static_cache']:
+        generate_params['cache_implementation'] = 'static'
+
+    if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
+        generate_params['sampler_priority'] = state['sampler_priority']
+    elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
+        generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
+
     if state['custom_token_bans']:
         to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
         if len(to_ban) > 0:
@@ -320,6 +351,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
             else:
                 generate_params['suppress_tokens'] = to_ban
 
+    if state['negative_prompt'] != '':
+        generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
+
     generate_params.update({'use_cache': not shared.args.no_cache})
     if shared.args.deepspeed:
         generate_params.update({'synced_gpus': True})
diff --git a/modules/ui.py b/modules/ui.py
index e66de434..4f7ee785 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -102,55 +102,55 @@ else:
 
 def list_model_elements():
     elements = [
-        'loader',
         'filter_by_loader',
+        'loader',
         'cpu_memory',
-        'auto_devices',
-        'disk',
-        'cpu',
-        'bf16',
-        'load_in_4bit',
-        'load_in_8bit',
-        'torch_compile',
-        'trust_remote_code',
-        'no_use_fast',
-        'use_flash_attention_2',
-        'use_eager_attention',
-        'compute_dtype',
-        'quant_type',
-        'use_double_quant',
-        'cfg_cache',
-        'no_flash_attn',
-        'no_xformers',
-        'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
+        'n_gpu_layers',
         'threads',
         'threads_batch',
         'n_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'n_gpu_layers',
-        'tensor_split',
+        'hqq_backend',
         'n_ctx',
-        'gpu_split',
         'max_seq_len',
-        'compress_pos_emb',
+        'cache_type',
+        'tensor_split',
+        'gpu_split',
         'alpha_value',
         'rope_freq_base',
-        'numa',
-        'logits_all',
-        'no_offload_kqv',
-        'row_split',
-        'tensorcores',
-        'flash_attn',
-        'streaming_llm',
+        'compress_pos_emb',
+        'compute_dtype',
+        'quant_type',
         'attention_sink_size',
-        'hqq_backend',
+        'num_experts_per_token',
+        'tensorcores',
+        'load_in_8bit',
+        'load_in_4bit',
+        'torch_compile',
+        'flash_attn',
+        'use_flash_attention_2',
+        'streaming_llm',
+        'auto_devices',
+        'cpu',
+        'disk',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+        'use_double_quant',
+        'use_eager_attention',
+        'bf16',
+        'autosplit',
+        'enable_tp',
+        'no_flash_attn',
+        'no_xformers',
+        'no_sdpa',
+        'cfg_cache',
         'cpp_runner',
+        'logits_all',
+        'trust_remote_code',
+        'no_use_fast',
     ]
 
     if is_torch_xpu_available():
@@ -165,87 +165,87 @@ def list_model_elements():
 
 def list_interface_input_elements():
     elements = [
-        'max_new_tokens',
-        'auto_max_new_tokens',
-        'max_tokens_second',
-        'max_updates_second',
-        'prompt_lookup_num_tokens',
-        'seed',
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
-        'epsilon_cutoff',
-        'eta_cutoff',
-        'repetition_penalty',
-        'presence_penalty',
-        'frequency_penalty',
-        'repetition_penalty_range',
-        'encoder_repetition_penalty',
-        'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
         'xtc_threshold',
         'xtc_probability',
-        'do_sample',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
         'penalty_alpha',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_string',
-        'negative_prompt',
-        'guidance_scale',
-        'add_bos_token',
+        'max_new_tokens',
+        'prompt_lookup_num_tokens',
+        'max_tokens_second',
+        'max_updates_second',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
-        'truncation_length',
-        'custom_stopping_strings',
+        'add_bos_token',
         'skip_special_tokens',
         'stream',
         'static_cache',
-        'tfs',
-        'top_a',
+        'truncation_length',
+        'seed',
+        'sampler_priority',
+        'custom_stopping_strings',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
     ]
 
     # Chat elements
     elements += [
-        'textbox',
-        'start_with',
-        'character_menu',
         'history',
         'search_chat',
         'unique_id',
+        'textbox',
+        'start_with',
+        'mode',
+        'chat_style',
+        'chat-instruct_command',
+        'character_menu',
+        'name2',
+        'context',
+        'greeting',
         'name1',
         'user_bio',
-        'name2',
-        'greeting',
-        'context',
-        'mode',
         'custom_system_message',
         'instruction_template_str',
         'chat_template_str',
-        'chat_style',
-        'chat-instruct_command',
     ]
 
     # Notebook/default elements
     elements += [
-        'textbox-notebook',
         'textbox-default',
-        'output_textbox',
+        'textbox-notebook',
         'prompt_menu-default',
         'prompt_menu-notebook',
+        'output_textbox',
     ]
 
     # Model elements