Fix Training Pad Token (#1678)

Currently padding with 0 the character vs 0 the token id (<unk> in the case of llama)
This commit is contained in:
practicaldreamer 2023-05-02 21:16:08 -05:00 committed by GitHub
parent 80c2f25131
commit e3968f7dd0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -243,7 +243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
return return
gradient_accumulation_steps = batch_size // micro_batch_size gradient_accumulation_steps = batch_size // micro_batch_size
shared.tokenizer.pad_token = 0 shared.tokenizer.pad_token_id = 0
shared.tokenizer.padding_side = "left" shared.tokenizer.padding_side = "left"
def tokenize(prompt): def tokenize(prompt):