mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Implement Min P as a sampler option in HF loaders (#4449)
This commit is contained in:
parent
fcb7017b7a
commit
367e5e6e43
@ -26,6 +26,7 @@ def build_parameters(body, chat=False):
|
|||||||
'do_sample': bool(body.get('do_sample', True)),
|
'do_sample': bool(body.get('do_sample', True)),
|
||||||
'temperature': float(body.get('temperature', 0.5)),
|
'temperature': float(body.get('temperature', 0.5)),
|
||||||
'top_p': float(body.get('top_p', 1)),
|
'top_p': float(body.get('top_p', 1)),
|
||||||
|
'min_p': float(body.get('min_p', 1)),
|
||||||
'typical_p': float(body.get('typical_p', body.get('typical', 1))),
|
'typical_p': float(body.get('typical_p', body.get('typical', 1))),
|
||||||
'epsilon_cutoff': float(body.get('epsilon_cutoff', 0)),
|
'epsilon_cutoff': float(body.get('epsilon_cutoff', 0)),
|
||||||
'eta_cutoff': float(body.get('eta_cutoff', 0)),
|
'eta_cutoff': float(body.get('eta_cutoff', 0)),
|
||||||
|
@ -149,6 +149,7 @@ loaders_samplers = {
|
|||||||
'Transformers': {
|
'Transformers': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
@ -184,6 +185,7 @@ loaders_samplers = {
|
|||||||
'ExLlama_HF': {
|
'ExLlama_HF': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
@ -244,6 +246,7 @@ loaders_samplers = {
|
|||||||
'ExLlamav2_HF': {
|
'ExLlamav2_HF': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
@ -275,6 +278,7 @@ loaders_samplers = {
|
|||||||
'AutoGPTQ': {
|
'AutoGPTQ': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
@ -310,6 +314,7 @@ loaders_samplers = {
|
|||||||
'GPTQ-for-LLaMa': {
|
'GPTQ-for-LLaMa': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
@ -361,6 +366,7 @@ loaders_samplers = {
|
|||||||
'llamacpp_HF': {
|
'llamacpp_HF': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
@ -399,6 +405,7 @@ loaders_samplers = {
|
|||||||
'AutoAWQ': {
|
'AutoAWQ': {
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
|
@ -9,6 +9,7 @@ def default_preset():
|
|||||||
'do_sample': True,
|
'do_sample': True,
|
||||||
'temperature': 1,
|
'temperature': 1,
|
||||||
'top_p': 1,
|
'top_p': 1,
|
||||||
|
'min_p': 1,
|
||||||
'top_k': 0,
|
'top_k': 0,
|
||||||
'typical_p': 1,
|
'typical_p': 1,
|
||||||
'epsilon_cutoff': 0,
|
'epsilon_cutoff': 0,
|
||||||
|
@ -12,6 +12,34 @@ from transformers.generation.logits_process import (
|
|||||||
|
|
||||||
global_scores = None
|
global_scores = None
|
||||||
|
|
||||||
|
class MinPLogitsWarper(LogitsWarper):
|
||||||
|
def __init__(self, min_p: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
|
||||||
|
if min_p < 0 or min_p > 1.0:
|
||||||
|
raise ValueError(f"`min_p` has to be a float >= 0 and <= 1, but is {min_p}")
|
||||||
|
self.min_p = min_p
|
||||||
|
self.filter_value = filter_value
|
||||||
|
self.min_tokens_to_keep = min_tokens_to_keep
|
||||||
|
|
||||||
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
|
||||||
|
# Convert logits to probabilities
|
||||||
|
probs = torch.softmax(scores, dim=-1)
|
||||||
|
# Get the probability of the top token for each sequence in the batch
|
||||||
|
top_probs, _ = probs.max(dim=-1, keepdim=True)
|
||||||
|
# Calculate the actual min_p threshold by scaling min_p with the top token's probability
|
||||||
|
scaled_min_p = self.min_p * top_probs
|
||||||
|
# Create a mask for tokens that have a probability less than the scaled min_p
|
||||||
|
tokens_to_remove = probs < scaled_min_p
|
||||||
|
|
||||||
|
sorted_indices = torch.argsort(scores, descending=True, dim=-1)
|
||||||
|
sorted_indices_to_remove = torch.gather(tokens_to_remove, dim=-1, index=sorted_indices)
|
||||||
|
|
||||||
|
if self.min_tokens_to_keep > 1:
|
||||||
|
# Keep at least min_tokens_to_keep
|
||||||
|
sorted_indices_to_remove[..., : self.min_tokens_to_keep] = False
|
||||||
|
|
||||||
|
indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
|
||||||
|
scores = scores.masked_fill(indices_to_remove, self.filter_value)
|
||||||
|
return scores
|
||||||
|
|
||||||
class TailFreeLogitsWarper(LogitsWarper):
|
class TailFreeLogitsWarper(LogitsWarper):
|
||||||
def __init__(self, tfs: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
|
def __init__(self, tfs: float, filter_value: float = -float("Inf"), min_tokens_to_keep: int = 1):
|
||||||
@ -190,6 +218,8 @@ def get_logits_warper_patch(self, generation_config):
|
|||||||
warpers_to_add.append(TailFreeLogitsWarper(tfs=generation_config.tfs, min_tokens_to_keep=min_tokens_to_keep))
|
warpers_to_add.append(TailFreeLogitsWarper(tfs=generation_config.tfs, min_tokens_to_keep=min_tokens_to_keep))
|
||||||
if generation_config.top_a is not None and 0.0 <= generation_config.top_a <= 1.0:
|
if generation_config.top_a is not None and 0.0 <= generation_config.top_a <= 1.0:
|
||||||
warpers_to_add.append(TopALogitsWarper(top_a=generation_config.top_a, min_tokens_to_keep=min_tokens_to_keep))
|
warpers_to_add.append(TopALogitsWarper(top_a=generation_config.top_a, min_tokens_to_keep=min_tokens_to_keep))
|
||||||
|
if generation_config.min_p is not None and 0.0 <= generation_config.min_p <= 1.0:
|
||||||
|
warpers_to_add.append(MinPLogitsWarper(min_p=generation_config.min_p, min_tokens_to_keep=min_tokens_to_keep))
|
||||||
|
|
||||||
if warpers and isinstance(warpers[-1], LogitNormalization):
|
if warpers and isinstance(warpers[-1], LogitNormalization):
|
||||||
warpers = warpers[:-1] + warpers_to_add + [warpers[-1]]
|
warpers = warpers[:-1] + warpers_to_add + [warpers[-1]]
|
||||||
@ -223,6 +253,7 @@ def get_logits_processor_patch(self, **kwargs):
|
|||||||
def generation_config_init_patch(self, **kwargs):
|
def generation_config_init_patch(self, **kwargs):
|
||||||
self.__init___old(**kwargs)
|
self.__init___old(**kwargs)
|
||||||
self.tfs = kwargs.pop("tfs", 1.0)
|
self.tfs = kwargs.pop("tfs", 1.0)
|
||||||
|
self.min_p = kwargs.pop("min_p", 0.0)
|
||||||
self.top_a = kwargs.pop("top_a", 0.0)
|
self.top_a = kwargs.pop("top_a", 0.0)
|
||||||
self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
|
self.mirostat_mode = kwargs.pop("mirostat_mode", 0)
|
||||||
self.mirostat_eta = kwargs.pop("mirostat_eta", 0.1)
|
self.mirostat_eta = kwargs.pop("mirostat_eta", 0.1)
|
||||||
|
@ -274,7 +274,7 @@ def apply_stopping_strings(reply, all_stop_strings):
|
|||||||
|
|
||||||
def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False):
|
def generate_reply_HF(question, original_question, seed, state, stopping_strings=None, is_chat=False):
|
||||||
generate_params = {}
|
generate_params = {}
|
||||||
for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'typical_p', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']:
|
for k in ['max_new_tokens', 'do_sample', 'temperature', 'top_p', 'min_p', 'typical_p', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'encoder_repetition_penalty', 'top_k', 'min_length', 'no_repeat_ngram_size', 'num_beams', 'penalty_alpha', 'length_penalty', 'early_stopping', 'tfs', 'top_a', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'guidance_scale']:
|
||||||
generate_params[k] = state[k]
|
generate_params[k] = state[k]
|
||||||
|
|
||||||
if state['negative_prompt'] != '':
|
if state['negative_prompt'] != '':
|
||||||
|
@ -105,6 +105,7 @@ def list_interface_input_elements():
|
|||||||
'seed',
|
'seed',
|
||||||
'temperature',
|
'temperature',
|
||||||
'top_p',
|
'top_p',
|
||||||
|
'min_p',
|
||||||
'top_k',
|
'top_k',
|
||||||
'typical_p',
|
'typical_p',
|
||||||
'epsilon_cutoff',
|
'epsilon_cutoff',
|
||||||
|
@ -29,6 +29,7 @@ def create_ui(default_preset):
|
|||||||
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
|
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], step=1, label='max_new_tokens', value=shared.settings['max_new_tokens'])
|
||||||
shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')
|
shared.gradio['temperature'] = gr.Slider(0.01, 1.99, value=generate_params['temperature'], step=0.01, label='temperature')
|
||||||
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
|
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
|
||||||
|
shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
|
||||||
shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
|
shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
|
||||||
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
|
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
|
||||||
shared.gradio['presence_penalty'] = gr.Slider(-2, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
|
shared.gradio['presence_penalty'] = gr.Slider(-2, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
|
||||||
|
Loading…
Reference in New Issue
Block a user