mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-26 09:40:20 +01:00
Fix Training Pad Token (#1678)
Currently padding with 0 the character vs 0 the token id (<unk> in the case of llama)
This commit is contained in:
parent
80c2f25131
commit
e3968f7dd0
@ -243,7 +243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
|||||||
return
|
return
|
||||||
|
|
||||||
gradient_accumulation_steps = batch_size // micro_batch_size
|
gradient_accumulation_steps = batch_size // micro_batch_size
|
||||||
shared.tokenizer.pad_token = 0
|
shared.tokenizer.pad_token_id = 0
|
||||||
shared.tokenizer.padding_side = "left"
|
shared.tokenizer.padding_side = "left"
|
||||||
|
|
||||||
def tokenize(prompt):
|
def tokenize(prompt):
|
||||||
|
Loading…
Reference in New Issue
Block a user