diff --git a/modules/llamacpp_model_alternative.py b/modules/llamacpp_model_alternative.py index 8fea2ab4..6bdf9bc3 100644 --- a/modules/llamacpp_model_alternative.py +++ b/modules/llamacpp_model_alternative.py @@ -6,7 +6,7 @@ Documentation: https://abetlen.github.io/llama-cpp-python/ ''' -from llama_cpp import Llama +from llama_cpp import Llama, LlamaCache from modules import shared from modules.callbacks import Iteratorize @@ -27,6 +27,7 @@ class LlamaCppModel: 'n_threads': shared.args.threads or None } self.model = Llama(**params) + self.model.set_cache(LlamaCache) # This is ugly, but the model and the tokenizer are the same object in this library. return result, result diff --git a/requirements.txt b/requirements.txt index 996afe28..34eee23d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,5 +14,5 @@ tqdm git+https://github.com/huggingface/peft transformers==4.28.0 bitsandbytes==0.38.1; platform_system != "Windows" -llama-cpp-python==0.1.33; platform_system != "Windows" -https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.33/llama_cpp_python-0.1.33-cp310-cp310-win_amd64.whl; platform_system == "Windows" +llama-cpp-python==0.1.34; platform_system != "Windows" +https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.34/llama_cpp_python-0.1.34-cp310-cp310-win_amd64.whl; platform_system == "Windows"