From 29c2693ea074390c29fedc2467a34df040f214c6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 8 Jan 2024 23:28:35 -0300 Subject: [PATCH] dynatemp_low, dynatemp_high, dynatemp_exponent parameters (#5209) --- docs/03 - Parameters Tab.md | 3 +-- extensions/openai/typing.py | 4 +++- modules/loaders.py | 12 +++++++++--- modules/presets.py | 17 ++++++++++++----- modules/sampler_hijack.py | 26 ++++++++++++++++++-------- modules/text_generation.py | 2 +- modules/ui.py | 4 +++- modules/ui_parameters.py | 5 ++++- presets/Dynamic Temperature.yaml | 5 ----- 9 files changed, 51 insertions(+), 27 deletions(-) delete mode 100644 presets/Dynamic Temperature.yaml diff --git a/docs/03 - Parameters Tab.md b/docs/03 - Parameters Tab.md index a5d68270..ebb38d97 100644 --- a/docs/03 - Parameters Tab.md +++ b/docs/03 - Parameters Tab.md @@ -54,8 +54,7 @@ For more information about the parameters, the [transformers documentation](http * **mirostat_mode**: Activates the Mirostat sampling technique. It aims to control perplexity during sampling. See the [paper](https://arxiv.org/abs/2007.14966). * **mirostat_tau**: No idea, see the paper for details. According to the Preset Arena, 8 is a good value. * **mirostat_eta**: No idea, see the paper for details. According to the Preset Arena, 0.1 is a good value. -* **dynamic_temperature_low**: The lower bound for temperature in Dynamic Temperature. Only used when "dynamic_temperature" is checked. -* **dynamic_temperature**: Activates Dynamic Temperature. This modifies temperature to range between "dynamic_temperature_low" (minimum) and "temperature" (maximum), with an entropy-based scaling. +* **dynamic_temperature**: Activates Dynamic Temperature. This modifies temperature to range between "dynatemp_low" (minimum) and "dynatemp_high" (maximum), with an entropy-based scaling. The steepness of the curve is controlled by "dynatemp_exponent". * **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency. * **do_sample**: When unchecked, sampling is entirely disabled, and greedy decoding is used instead (the most likely token is always picked). * **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp), and others are not deterministic (notably ExLlama v1 and v2). For these loaders, the seed has no effect. diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 6e8cf614..f8a5203f 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -9,7 +9,9 @@ class GenerationOptions(BaseModel): preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.") min_p: float = 0 dynamic_temperature: bool = False - dynamic_temperature_low: float = 0.1 + dynatemp_low: float = 1 + dynatemp_high: float = 1 + dynatemp_exponent: float = 1 top_k: int = 0 repetition_penalty: float = 1 repetition_penalty_range: int = 1024 diff --git a/modules/loaders.py b/modules/loaders.py index bac73cfe..4b30dbf9 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -156,7 +156,9 @@ def transformers_samplers(): 'temperature', 'temperature_last', 'dynamic_temperature', - 'dynamic_temperature_low', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', @@ -223,7 +225,9 @@ loaders_samplers = { 'temperature', 'temperature_last', 'dynamic_temperature', - 'dynamic_temperature_low', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', @@ -277,7 +281,9 @@ loaders_samplers = { 'temperature', 'temperature_last', 'dynamic_temperature', - 'dynamic_temperature_low', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', diff --git a/modules/presets.py b/modules/presets.py index 42ca7820..5e686e34 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -6,6 +6,7 @@ import yaml from modules import shared from modules.loaders import loaders_samplers +from modules.logging_colors import logger def default_preset(): @@ -13,7 +14,9 @@ def default_preset(): 'temperature': 1, 'temperature_last': False, 'dynamic_temperature': False, - 'dynamic_temperature_low': 0.1, + 'dynatemp_low': 1, + 'dynatemp_high': 1, + 'dynatemp_exponent': 1, 'top_p': 1, 'min_p': 0, 'top_k': 0, @@ -48,11 +51,15 @@ def presets_params(): def load_preset(name): generate_params = default_preset() if name not in ['None', None, '']: - with open(Path(f'presets/{name}.yaml'), 'r') as infile: - preset = yaml.safe_load(infile) + path = Path(f'presets/{name}.yaml') + if path.exists(): + with open(path, 'r') as infile: + preset = yaml.safe_load(infile) - for k in preset: - generate_params[k] = preset[k] + for k in preset: + generate_params[k] = preset[k] + else: + logger.error(f"The preset \"{name}\" does not exist under \"{path}\". Using the default parameters.") return generate_params diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index f593080c..e9d82d3c 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -16,7 +16,7 @@ global_scores = None class TemperatureLogitsWarperWithDynatemp(LogitsWarper): - def __init__(self, temperature: float, dynamic_temperature: bool, dynamic_temperature_low: float): + def __init__(self, temperature: float, dynamic_temperature: bool, dynatemp_low: float, dynatemp_high: float, dynatemp_exponent: float): if not isinstance(temperature, float) or not (temperature > 0): except_msg = ( f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token " @@ -29,7 +29,9 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper): self.temperature = temperature self.dynamic_temperature = dynamic_temperature - self.dynamic_temperature_low = dynamic_temperature_low + self.dynatemp_low = dynatemp_low + self.dynatemp_high = dynatemp_high + self.dynatemp_exponent = dynatemp_exponent def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: @@ -40,9 +42,9 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper): # Dynamic temperature else: - min_temp = self.dynamic_temperature_low - max_temp = self.temperature - exponent_val = 1.0 + min_temp = self.dynatemp_low + max_temp = self.dynatemp_high + exponent_val = self.dynatemp_exponent # Convert logits to probabilities probs = torch.softmax(scores, dim=-1) @@ -82,7 +84,7 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper): # max_prob_token_id = torch.argmax(scores, dim=-1) # Get the token ID with the highest probability # max_prob_token = shared.tokenizer.convert_ids_to_tokens(int(max_prob_token_id)) # Convert ID to token - # print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp) + # print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp, "exponent=", exponent_val) return scores @@ -292,7 +294,13 @@ def get_logits_warper_patch(self, generation_config): warpers = self._get_logits_warper_old(generation_config) for i in range(len(warpers)): if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper': - warpers[i] = TemperatureLogitsWarperWithDynatemp(temperature, generation_config.dynamic_temperature, generation_config.dynamic_temperature_low) + warpers[i] = TemperatureLogitsWarperWithDynatemp( + temperature, + generation_config.dynamic_temperature, + generation_config.dynatemp_low, + generation_config.dynatemp_high, + generation_config.dynatemp_exponent + ) warpers_to_add = LogitsProcessorList() min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1 @@ -361,7 +369,9 @@ def generation_config_init_patch(self, **kwargs): self.__init___old(**kwargs) self.min_p = kwargs.pop("min_p", 0.0) self.dynamic_temperature = kwargs.pop("dynamic_temperature", False) - self.dynamic_temperature_low = kwargs.pop("dynamic_temperature_low", 0.1) + self.dynatemp_low = kwargs.pop("dynatemp_low", 1) + self.dynatemp_high = kwargs.pop("dynatemp_high", 1) + self.dynatemp_exponent = kwargs.pop("dynatemp_exponent", 1) self.tfs = kwargs.pop("tfs", 1.0) self.top_a = kwargs.pop("top_a", 0.0) self.mirostat_mode = kwargs.pop("mirostat_mode", 0) diff --git a/modules/text_generation.py b/modules/text_generation.py index 398095ef..f17d2c8e 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -285,7 +285,7 @@ def get_reply_from_output_ids(output_ids, state, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynamic_temperature_low', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: + for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: generate_params[k] = state[k] if state['negative_prompt'] != '': diff --git a/modules/ui.py b/modules/ui.py index d543bff7..d70b1953 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -116,7 +116,9 @@ def list_interface_input_elements(): 'temperature', 'temperature_last', 'dynamic_temperature', - 'dynamic_temperature_low', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', 'top_p', 'min_p', 'top_k', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 728802ba..11af715f 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -49,8 +49,10 @@ def create_ui(default_preset): shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') - shared.gradio['dynamic_temperature_low'] = gr.Slider(0.01, 5, value=generate_params['dynamic_temperature_low'], step=0.01, label='dynamic_temperature_low', info='Only used when dynamic_temperature is checked.') shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature') + shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature']) + shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature']) + shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature']) shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Makes temperature the last sampler instead of the first.') shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') @@ -97,6 +99,7 @@ def create_event_handlers(): shared.gradio['preset_menu'].change(presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params())) shared.gradio['random_preset'].click(presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params())) shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string')) + shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent')) def get_truncation_length(): diff --git a/presets/Dynamic Temperature.yaml b/presets/Dynamic Temperature.yaml deleted file mode 100644 index 3ef48739..00000000 --- a/presets/Dynamic Temperature.yaml +++ /dev/null @@ -1,5 +0,0 @@ -dynamic_temperature: true -dynamic_temperature_low: 0.1 -temperature: 3 -temperature_last: true -min_p: 0.05