mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-12-25 22:08:53 +01:00
Implement auto_max_new_tokens for ExLlama
This commit is contained in:
parent
e931844fe2
commit
32a2bbee4a
@ -94,11 +94,15 @@ class ExllamaModel:
|
||||
# Tokenizing the input
|
||||
ids = self.generator.tokenizer.encode(prompt)
|
||||
ids = ids[:, -get_max_prompt_length(state):]
|
||||
if state['auto_max_new_tokens']:
|
||||
max_new_tokens = state['truncation_length'] - ids.shape[-1]
|
||||
else:
|
||||
max_new_tokens = state['max_new_tokens']
|
||||
|
||||
self.generator.gen_begin_reuse(ids)
|
||||
initial_len = self.generator.sequence[0].shape[0]
|
||||
has_leading_space = False
|
||||
for i in range(state['max_new_tokens']):
|
||||
for i in range(max_new_tokens):
|
||||
token = self.generator.gen_single_token()
|
||||
if i == 0 and self.generator.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
||||
has_leading_space = True
|
||||
|
@ -151,6 +151,7 @@ loaders_samplers = {
|
||||
'repetition_penalty_range',
|
||||
'seed',
|
||||
'ban_eos_token',
|
||||
'auto_max_new_tokens',
|
||||
},
|
||||
'AutoGPTQ': {
|
||||
'temperature',
|
||||
|
Loading…
Reference in New Issue
Block a user