From deba039c0394a2d5cb5f0da39c5747c2ee893c3c Mon Sep 17 00:00:00 2001 From: deevis Date: Tue, 31 Oct 2023 22:51:00 -0600 Subject: [PATCH 1/2] (fix): OpenOrca-Platypus2 models should use correct instruction_template and custom_stopping_strings (#4435) --- models/config.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/config.yaml b/models/config.yaml index 9c8ce827..00db01d1 100644 --- a/models/config.yaml +++ b/models/config.yaml @@ -45,9 +45,6 @@ .*starchat-beta: instruction_template: 'Starchat-Beta' custom_stopping_strings: '"<|end|>"' -.*(openorca-platypus2): - instruction_template: 'OpenOrca-Platypus2' - custom_stopping_strings: '"### Instruction:", "### Response:"' (?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna: instruction_template: 'Vicuna-v0' .*vicuna.*v0: @@ -152,6 +149,9 @@ instruction_template: 'Orca Mini' .*(platypus|gplatty|superplatty): instruction_template: 'Alpaca' +.*(openorca-platypus2): + instruction_template: 'OpenOrca-Platypus2' + custom_stopping_strings: '"### Instruction:", "### Response:"' .*longchat: instruction_template: 'Vicuna-v1.1' .*vicuna-33b: From a56ef2a942d4b53c86314bde097f6fa4da240ea0 Mon Sep 17 00:00:00 2001 From: Julien Chaumond Date: Thu, 2 Nov 2023 18:07:08 +0100 Subject: [PATCH 2/2] make torch.load a bit safer (#4448) --- .gitignore | 1 + modules/GPTQ_loader.py | 2 +- modules/training.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 2405a81f..a30fd144 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ .DS_Store .eslintrc.js .idea +.env .venv venv .vscode diff --git a/modules/GPTQ_loader.py b/modules/GPTQ_loader.py index fe5577e1..7dc20b0a 100644 --- a/modules/GPTQ_loader.py +++ b/modules/GPTQ_loader.py @@ -62,7 +62,7 @@ def _load_quant(model, checkpoint, wbits, groupsize=-1, faster_kernel=False, exc from safetensors.torch import load_file as safe_load model.load_state_dict(safe_load(checkpoint), strict=False) else: - model.load_state_dict(torch.load(checkpoint), strict=False) + model.load_state_dict(torch.load(checkpoint, weights_only=True), strict=False) model.seqlen = 2048 return model diff --git a/modules/training.py b/modules/training.py index b887fa47..c01f27db 100644 --- a/modules/training.py +++ b/modules/training.py @@ -544,7 +544,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en: lora_model = get_peft_model(shared.model, config) if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file(): logger.info("Loading existing LoRA data...") - state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin") + state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin", weights_only=True) set_peft_model_state_dict(lora_model, state_dict_peft) except: yield traceback.format_exc().replace('\n', '\n\n')