From 092a2c3516409a0d639dfb79c384ebc7ae3a4434 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:21:40 -0800 Subject: [PATCH 1/3] Fix a bug in llama.cpp get_logits() function --- modules/llamacpp_model.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index aa0fedbf..8b133e98 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -105,6 +105,7 @@ class LlamaCppModel: return self.model.detokenize(ids).decode('utf-8') def get_logits(self, tokens): + self.model.reset() self.model.eval(tokens) logits = self.model._scores logits = np.expand_dims(logits, 0) # batch dim is expected From 1c90e022432f1ece0f216135dc9efb15697f511b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 30 Nov 2023 11:55:18 -0800 Subject: [PATCH 2/3] Update Colab-TextGen-GPU.ipynb --- Colab-TextGen-GPU.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index f8b6719f..f73947bf 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -87,8 +87,8 @@ " !pip uninstall -y flash_attn\n", "\n", "# Parameters\n", - "model_url = \"https://huggingface.co/turboderp/Mistral-7B-instruct-exl2\" #@param {type:\"string\"}\n", - "branch = \"4.0bpw\" #@param {type:\"string\"}\n", + "model_url = \"https://huggingface.co/TheBloke/MythoMax-L2-13B-GPTQ\" #@param {type:\"string\"}\n", + "branch = \"gptq-4bit-32g-actorder_True\" #@param {type:\"string\"}\n", "command_line_flags = \"--n-gpu-layers 128 --load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n", "api = False #@param {type:\"boolean\"}\n", "\n", From 77d6ccf12b0885773eedd8e4badb244c7ae89b69 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 30 Nov 2023 12:00:32 -0800 Subject: [PATCH 3/3] Add a LOADER debug message while loading models --- modules/models.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/models.py b/modules/models.py index 19c0d903..c7dd6ccb 100644 --- a/modules/models.py +++ b/modules/models.py @@ -102,6 +102,7 @@ def load_model(model_name, loader=None): elif loader in ['llama.cpp', 'llamacpp_HF', 'ctransformers']: shared.settings['truncation_length'] = shared.args.n_ctx + logger.info(f"LOADER: {loader}") logger.info(f"TRUNCATION LENGTH: {shared.settings['truncation_length']}") logger.info(f"INSTRUCTION TEMPLATE: {shared.settings['instruction_template']}") logger.info(f"Loaded the model in {(time.time()-t0):.2f} seconds.")