From 3eca20c015b18c7da4128ffd93f0388feed06136 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philipp=20Cla=C3=9Fen?= Date: Sat, 6 Jan 2024 07:05:03 +0100 Subject: [PATCH 1/5] Typo fixed in variable names (#5184) --- extensions/openai/script.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 0be83442..e8647357 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -174,13 +174,13 @@ async def handle_audio_transcription(request: Request): # Create AudioData object audio_data = sr.AudioData(raw_data, audio_data.frame_rate, audio_data.sample_width) - whipser_language = form.getvalue('language', None) - whipser_model = form.getvalue('model', 'tiny') # Use the model from the form data if it exists, otherwise default to tiny + whisper_language = form.getvalue('language', None) + whisper_model = form.getvalue('model', 'tiny') # Use the model from the form data if it exists, otherwise default to tiny transcription = {"text": ""} try: - transcription["text"] = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model) + transcription["text"] = r.recognize_whisper(audio_data, language=whisper_language, model=whisper_model) except sr.UnknownValueError: print("Whisper could not understand audio") transcription["text"] = "Whisper could not understand audio UnknownValueError" From 48327cc5c4dbc76a8c8dc795acd3dcf72f6d2896 Mon Sep 17 00:00:00 2001 From: kalomaze <66376113+kalomaze@users.noreply.github.com> Date: Sun, 7 Jan 2024 07:36:26 -0600 Subject: [PATCH 2/5] Dynamic Temperature HF loader support (#5174) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- docs/03 - Parameters Tab.md | 1 + extensions/dynatemp_with_range/README.md | 17 ++++ extensions/dynatemp_with_range/script.py | 50 ++++++++++++ extensions/openai/typing.py | 1 + modules/callbacks.py | 8 +- modules/llamacpp_hf.py | 3 + modules/loaders.py | 3 + modules/logits.py | 2 +- modules/presets.py | 1 + modules/sampler_hijack.py | 98 +++++++++++++++++++++++- modules/text_generation.py | 2 +- modules/ui.py | 1 + modules/ui_parameters.py | 1 + presets/Dynamic Temperature.yaml | 4 + 14 files changed, 184 insertions(+), 8 deletions(-) create mode 100644 extensions/dynatemp_with_range/README.md create mode 100644 extensions/dynatemp_with_range/script.py create mode 100644 presets/Dynamic Temperature.yaml diff --git a/docs/03 - Parameters Tab.md b/docs/03 - Parameters Tab.md index 08cedc93..88309885 100644 --- a/docs/03 - Parameters Tab.md +++ b/docs/03 - Parameters Tab.md @@ -54,6 +54,7 @@ For more information about the parameters, the [transformers documentation](http * **mirostat_mode**: Activates the Mirostat sampling technique. It aims to control perplexity during sampling. See the [paper](https://arxiv.org/abs/2007.14966). * **mirostat_tau**: No idea, see the paper for details. According to the Preset Arena, 8 is a good value. * **mirostat_eta**: No idea, see the paper for details. According to the Preset Arena, 0.1 is a good value. +* **dynatemp**: Dynamic Temperature is activated when this parameter is greater than 0. The temperature range is determined by adding and subtracting dynatemp from the current temperature. * **temperature_last**: Makes temperature the last sampler instead of the first. With this, you can remove low probability tokens with a sampler like min_p and then use a high temperature to make the model creative without losing coherency. * **do_sample**: When unchecked, sampling is entirely disabled, and greedy decoding is used instead (the most likely token is always picked). * **Seed**: Set the Pytorch seed to this number. Note that some loaders do not use Pytorch (notably llama.cpp), and others are not deterministic (notably ExLlama v1 and v2). For these loaders, the seed has no effect. diff --git a/extensions/dynatemp_with_range/README.md b/extensions/dynatemp_with_range/README.md new file mode 100644 index 00000000..12b1ca51 --- /dev/null +++ b/extensions/dynatemp_with_range/README.md @@ -0,0 +1,17 @@ +# dynatemp_with_range + +This extension makes it possible to set the minimum and maximum temperatures for dynamic temperature explicitly. + +For instance, you can directly set + +``` +min_T = 0.1 +max_T = 3 +``` + +instead of having to convert that to + +``` +T = 1.55 +dynatemp = 1.45 +``` diff --git a/extensions/dynatemp_with_range/script.py b/extensions/dynatemp_with_range/script.py new file mode 100644 index 00000000..f4b09d4b --- /dev/null +++ b/extensions/dynatemp_with_range/script.py @@ -0,0 +1,50 @@ +import gradio as gr + +params = { + "activate": True, + "minimum_temperature": 0.1, + "maximum_temperature": 2, +} + +def convert_to_dynatemp(): + temperature = 0.5 * (params["minimum_temperature"] + params["maximum_temperature"]) + dynatemp = params["maximum_temperature"] - temperature + return temperature, dynatemp + + +def state_modifier(state): + """ + Modifies the state variable, which is a dictionary containing the input + values in the UI like sliders and checkboxes. + """ + + if params["activate"]: + temperature, dynatemp = convert_to_dynatemp() + + state["temperature"] = temperature + state["dynatemp"] = dynatemp + + return state + + +def generate_info(): + temperature, dynatemp = convert_to_dynatemp() + return f"The combination above is equivalent to: T={temperature:.2f}, dynatemp={dynatemp:.2f}" + + +def ui(): + activate = gr.Checkbox(value=params['activate'], label='Activate Dynamic Temperature Range', info='When checked, the default temperature/dynatemp parameters are ignored and the parameters below are used instead.') + with gr.Row(): + minimum_temperature = gr.Slider(0, 5, step=0.01, label="Minimum temperature", value=params["minimum_temperature"], interactive=True) + maximum_temperature = gr.Slider(0, 5, step=0.01, label="Maximum temperature", value=params["maximum_temperature"], interactive=True) + + info = gr.HTML(generate_info()) + + activate.change(lambda x: params.update({"activate": x}), activate, None) + minimum_temperature.change( + lambda x: params.update({"minimum_temperature": x}), minimum_temperature, None).then( + generate_info, None, info, show_progress=False) + + maximum_temperature.change( + lambda x: params.update({"maximum_temperature": x}), maximum_temperature, None).then( + generate_info, None, info, show_progress=False) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 3a212dd9..aa2a191f 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -8,6 +8,7 @@ from pydantic import BaseModel, Field class GenerationOptions(BaseModel): preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.") min_p: float = 0 + dynatemp: float = 0 top_k: int = 0 repetition_penalty: float = 1 repetition_penalty_range: int = 1024 diff --git a/modules/callbacks.py b/modules/callbacks.py index bb979a6c..0b219954 100644 --- a/modules/callbacks.py +++ b/modules/callbacks.py @@ -10,6 +10,10 @@ from transformers import is_torch_xpu_available import modules.shared as shared +class StopNowException(Exception): + pass + + class _StopEverythingStoppingCriteria(transformers.StoppingCriteria): def __init__(self): transformers.StoppingCriteria.__init__(self) @@ -49,13 +53,13 @@ class Iteratorize: def _callback(val): if self.stop_now or shared.stop_everything: - raise ValueError + raise StopNowException self.q.put(val) def gentask(): try: ret = self.mfunc(callback=_callback, *args, **self.kwargs) - except ValueError: + except StopNowException: pass except: traceback.print_exc() diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index b8fde4d4..d491c463 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -144,6 +144,9 @@ class LlamacppHF(PreTrainedModel): self.model.n_tokens = longest_prefix if len(seq_tensor) - longest_prefix > 0: self.model.eval(seq[longest_prefix:]) + else: + self.model.n_tokens -= 1 + self.model.eval([seq[-1]]) if reset: self.model.reset() diff --git a/modules/loaders.py b/modules/loaders.py index 238ae91b..c10fd9c9 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -155,6 +155,7 @@ def transformers_samplers(): return { 'temperature', 'temperature_last', + 'dynatemp', 'top_p', 'min_p', 'top_k', @@ -220,6 +221,7 @@ loaders_samplers = { 'ExLlamav2_HF': { 'temperature', 'temperature_last', + 'dynatemp', 'top_p', 'min_p', 'top_k', @@ -272,6 +274,7 @@ loaders_samplers = { 'llamacpp_HF': { 'temperature', 'temperature_last', + 'dynatemp', 'top_p', 'min_p', 'top_k', diff --git a/modules/logits.py b/modules/logits.py index 19a9993f..c630be88 100644 --- a/modules/logits.py +++ b/modules/logits.py @@ -8,7 +8,7 @@ from modules.text_generation import generate_reply global_scores = None -def get_next_logits(prompt, state, use_samplers, previous, top_logits=50, return_dict=False): +def get_next_logits(prompt, state, use_samplers, previous, top_logits=25, return_dict=False): if shared.model is None: logger.error("No model is loaded! Select one in the Model tab.") return 'Error: No model is loaded1 Select one in the Model tab.', previous diff --git a/modules/presets.py b/modules/presets.py index 15443627..2af4dd4d 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -12,6 +12,7 @@ def default_preset(): return { 'temperature': 1, 'temperature_last': False, + 'dynatemp': 0, 'top_p': 1, 'min_p': 0, 'top_k': 0, diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index 218d1b11..e3edd484 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -10,9 +10,84 @@ from transformers.generation.logits_process import ( TemperatureLogitsWarper ) +from modules import shared + global_scores = None +class TemperatureLogitsWarperWithDynatemp(LogitsWarper): + def __init__(self, temperature: float, dynatemp: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1): + if not isinstance(temperature, float) or not (temperature > 0): + except_msg = ( + f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token " + "scores will be invalid." + ) + if isinstance(temperature, float) and temperature == 0.0: + except_msg += " If you're looking for greedy decoding strategies, set `do_sample=False`." + + raise ValueError(except_msg) + + self.temperature = temperature + self.dynatemp = dynatemp + self.filter_value = filter_value + self.min_tokens_to_keep = min_tokens_to_keep + + def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: + + # Regular temperature + if self.dynatemp == 0: + scores = scores / self.temperature + return scores + + # Dynamic temperature + else: + min_temp = max(0.0, self.temperature - self.dynatemp) + max_temp = self.temperature + self.dynatemp + exponent_val = 1.0 + + # Convert logits to probabilities + probs = torch.softmax(scores, dim=-1) + + # Calculate entropy of the softmax probabilities + entropy = -1.0 * torch.where(probs > 0, probs * torch.log(probs), torch.zeros_like(probs)).sum() + + # Guard against future possible division by zero + entropy = max(entropy, torch.tensor(1e-10)) # Ensures entropy is slightly greater than 0 + + # Any logits which are not -Infinity will be considered for calculating max entropy. + num_valid_tokens = torch.sum(scores > -float('inf')).item() + + # Now, calculate the max entropy by using only the valid tokens' count + max_entropy = math.log(num_valid_tokens) + + # Guard against future possible division by zero + max_entropy = max_entropy if max_entropy > 0.0 else 1e-10 + + # Normalize the entropy + normalized_entropy = entropy / max_entropy + + # Map the normalized entropy to the desired temperature range using the power function + dyn_temp = min_temp + (max_temp - min_temp) * (normalized_entropy.pow(exponent_val)) + + # Apply the dynamically calculated temperature scaling + scores = scores / dyn_temp + + # print("----------------------\nTemperature from generation_config:", self.temperature) + # print("min_temp:", min_temp) + # print("max_temp:", max_temp) + # print("Entropy:", entropy.item()) + # print("Max Possible Entropy considering valid tokens only:", max_entropy) + # print("Normalized Entropy:", normalized_entropy.item()) + # print("Dynamic Temperature (dyn_temp):", dyn_temp.item()) + # print("----------------------") + + # max_prob_token_id = torch.argmax(scores, dim=-1) # Get the token ID with the highest probability + # max_prob_token = shared.tokenizer.convert_ids_to_tokens(int(max_prob_token_id)) # Convert ID to token + # print("--- T=", float(dyn_temp), "token=", max_prob_token, "min=", min_temp, "max=", max_temp) + + return scores + + class MinPLogitsWarper(LogitsWarper): def __init__(self, min_p: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1): if min_p < 0 or min_p > 1.0: @@ -198,14 +273,28 @@ class RepetitionPenaltyLogitsProcessorWithRange(LogitsProcessor): # presence_penalty and frequency_penalty raw_presence_penalty = (counts > 0).to(scores.dtype) raw_frequency_penalty = counts.to(scores.dtype) - additive_penalty = raw_presence_penalty*self.presence_penalty + raw_frequency_penalty*self.frequency_penalty + additive_penalty = raw_presence_penalty * self.presence_penalty + raw_frequency_penalty * self.frequency_penalty scores_row.scatter_add_(0, unique_ids, -additive_penalty) return scores def get_logits_warper_patch(self, generation_config): + # Make sure that temperature is float and not int + if isinstance(generation_config.temperature, int): + generation_config.temperature = float(generation_config.temperature) + + temperature = generation_config.temperature + if generation_config.dynatemp > 0: + # Make sure TemperatureLogitsWarper will be created by temporarily + # setting temperature to a value != 1. + generation_config.temperature = 1.1 + warpers = self._get_logits_warper_old(generation_config) + for i in range(len(warpers)): + if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper': + warpers[i] = TemperatureLogitsWarperWithDynatemp(temperature, generation_config.dynatemp) + warpers_to_add = LogitsProcessorList() min_tokens_to_keep = 2 if generation_config.num_beams > 1 else 1 @@ -232,18 +321,18 @@ def get_logits_warper_patch(self, generation_config): if generation_config.temperature_last: temperature_idx = None for i in range(len(warpers)): - if warpers[i].__class__.__name__ == 'TemperatureLogitsWarper': + if warpers[i].__class__.__name__ in ['TemperatureLogitsWarper', 'TemperatureLogitsWarperWithDynatemp']: temperature_idx = i break if temperature_idx is not None: - warpers = warpers[:temperature_idx] + warpers[temperature_idx + 1:] + [warpers[temperature_idx]] - warpers = LogitsProcessorList(warpers) + warpers.append(warpers.pop(temperature_idx)) if normalize is not None: warpers.append(normalize) warpers.append(SpyLogitsWarper()) + warpers = LogitsProcessorList(warpers) # for i in range(len(warpers)): # print(warpers[i].__class__.__name__) return warpers @@ -272,6 +361,7 @@ def get_logits_processor_patch(self, **kwargs): def generation_config_init_patch(self, **kwargs): self.__init___old(**kwargs) self.min_p = kwargs.pop("min_p", 0.0) + self.dynatemp = kwargs.pop("dynatemp", 0.0) self.tfs = kwargs.pop("tfs", 1.0) self.top_a = kwargs.pop("top_a", 0.0) self.mirostat_mode = kwargs.pop("mirostat_mode", 0) diff --git a/modules/text_generation.py b/modules/text_generation.py index d4380188..9a908df3 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -283,7 +283,7 @@ def get_reply_from_output_ids(output_ids, state, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'do_sample', 'temperature', 'temperature_last', 'top_p', 'min_p', 'typical_p', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']: + for k in ['max_new_tokens', 'do_sample', 'temperature', 'temperature_last', 'dynatemp', 'top_p', 'min_p', 'typical_p', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']: generate_params[k] = state[k] if state['negative_prompt'] != '': diff --git a/modules/ui.py b/modules/ui.py index 47f7af99..81aa448b 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -115,6 +115,7 @@ def list_interface_input_elements(): 'seed', 'temperature', 'temperature_last', + 'dynatemp', 'top_p', 'min_p', 'top_k', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 4a2b3116..ba4c01cd 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -49,6 +49,7 @@ def create_ui(default_preset): shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.') shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau') shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta') + shared.gradio['dynatemp'] = gr.Slider(0, 5, value=generate_params['dynatemp'], step=0.01, label='dynatemp') shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Makes temperature the last sampler instead of the first.') shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample') shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') diff --git a/presets/Dynamic Temperature.yaml b/presets/Dynamic Temperature.yaml new file mode 100644 index 00000000..21fef1e7 --- /dev/null +++ b/presets/Dynamic Temperature.yaml @@ -0,0 +1,4 @@ +temperature: 1.55 +temperature_last: true +dynatemp: 1.45 +min_p: 0.05 From d93db3b48655fb68690bbd1b4afd736e7e3cd736 Mon Sep 17 00:00:00 2001 From: Yilong Guo Date: Sun, 7 Jan 2024 21:40:30 +0800 Subject: [PATCH 3/5] Refine ipex setup (#5191) --- one_click.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index f5adb80c..2bd31f72 100644 --- a/one_click.py +++ b/one_click.py @@ -233,7 +233,7 @@ def install_webui(): elif is_linux() and selected_gpu in ["APPLE", "NONE"]: install_pytorch += "--index-url https://download.pytorch.org/whl/cpu" elif selected_gpu == "INTEL": - install_pytorch += "intel_extension_for_pytorch==2.1.* --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" + install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" # Install Git and then Pytorch print_big_message("Installing PyTorch.") @@ -244,6 +244,13 @@ def install_webui(): print_big_message("Installing the CUDA runtime libraries.") run_cmd(f"conda install -y -c \"nvidia/label/{'cuda-12.1.1' if use_cuda118 == 'N' else 'cuda-11.8.0'}\" cuda-runtime", assert_success=True, environment=True) + if selected_gpu == "INTEL": + # Install oneAPI dependencies via conda + print_big_message("Installing Intel oneAPI runtime libraries.") + run_cmd(f"conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") + # Install libuv required by Intel-patched torch + run_cmd(f"conda install -y libuv") + # Install the webui requirements update_requirements(initial_installation=True) From c4c7fc4ab34e77642291064880d5a4176de2ac75 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Jan 2024 09:30:55 -0800 Subject: [PATCH 4/5] Lint --- extensions/dynatemp_with_range/script.py | 3 ++- modules/sampler_hijack.py | 4 +--- one_click.py | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/extensions/dynatemp_with_range/script.py b/extensions/dynatemp_with_range/script.py index f4b09d4b..158abb06 100644 --- a/extensions/dynatemp_with_range/script.py +++ b/extensions/dynatemp_with_range/script.py @@ -6,6 +6,7 @@ params = { "maximum_temperature": 2, } + def convert_to_dynatemp(): temperature = 0.5 * (params["minimum_temperature"] + params["maximum_temperature"]) dynatemp = params["maximum_temperature"] - temperature @@ -22,7 +23,7 @@ def state_modifier(state): temperature, dynatemp = convert_to_dynatemp() state["temperature"] = temperature - state["dynatemp"] = dynatemp + state["dynatemp"] = dynatemp return state diff --git a/modules/sampler_hijack.py b/modules/sampler_hijack.py index e3edd484..b5524548 100644 --- a/modules/sampler_hijack.py +++ b/modules/sampler_hijack.py @@ -16,7 +16,7 @@ global_scores = None class TemperatureLogitsWarperWithDynatemp(LogitsWarper): - def __init__(self, temperature: float, dynatemp: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1): + def __init__(self, temperature: float, dynatemp: float): if not isinstance(temperature, float) or not (temperature > 0): except_msg = ( f"`temperature` (={temperature}) has to be a strictly positive float, otherwise your next token " @@ -29,8 +29,6 @@ class TemperatureLogitsWarperWithDynatemp(LogitsWarper): self.temperature = temperature self.dynatemp = dynatemp - self.filter_value = filter_value - self.min_tokens_to_keep = min_tokens_to_keep def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor: diff --git a/one_click.py b/one_click.py index 2bd31f72..86d97c6e 100644 --- a/one_click.py +++ b/one_click.py @@ -247,9 +247,9 @@ def install_webui(): if selected_gpu == "INTEL": # Install oneAPI dependencies via conda print_big_message("Installing Intel oneAPI runtime libraries.") - run_cmd(f"conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") + run_cmd("conda install -y -c intel dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") # Install libuv required by Intel-patched torch - run_cmd(f"conda install -y libuv") + run_cmd("conda install -y libuv") # Install the webui requirements update_requirements(initial_installation=True) From cf820c69c584630002ae0b2ad7f74f23c4795b39 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Jan 2024 10:06:23 -0800 Subject: [PATCH 5/5] Print generation parameters with --verbose (HF only) --- modules/text_generation.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 9a908df3..295b0dc5 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -1,6 +1,7 @@ import ast import copy import html +import pprint import random import re import time @@ -65,7 +66,8 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap all_stop_strings += st if shared.args.verbose: - print(f'\n\n{question}\n--------------------\n') + logger.info("PROMPT=") + print(question) shared.stop_everything = False clear_torch_cache() @@ -283,7 +285,7 @@ def get_reply_from_output_ids(output_ids, state, starting_from=0): def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False): generate_params = {} - for k in ['max_new_tokens', 'do_sample', 'temperature', 'temperature_last', 'dynatemp', 'top_p', 'min_p', 'typical_p', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']: + for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynatemp', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'min_length', 'num_beams', 'length_penalty', 'early_stopping']: generate_params[k] = state[k] if state['negative_prompt'] != '': @@ -342,6 +344,11 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings apply_extensions('logits_processor', processor, input_ids) generate_params['logits_processor'] = processor + if shared.args.verbose: + logger.info("GENERATE_PARAMS=") + pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(generate_params) + print() + t0 = time.time() try: if not is_chat and not shared.is_seq2seq: