From 613feca23bfa762d6c0bd8c514f96ca64f529850 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 22 Oct 2023 09:06:20 -0700 Subject: [PATCH] Make colab functional for llama.cpp - Download only Q4_K_M for GGUF repositories by default - Use maximum n-gpu-layers by default --- Colab-TextGen-GPU.ipynb | 2 +- download-model.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index d5525416..9951b30b 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -115,7 +115,7 @@ " output_folder = \"\"\n", "\n", "# Start the web UI\n", - "cmd = f\"python server.py --share\"\n", + "cmd = f\"python server.py --share --n-gpu-layers 128\"\n", "if output_folder != \"\":\n", " cmd += f\" --model {output_folder}\"\n", "cmd += f\" {command_line_flags}\"\n", diff --git a/download-model.py b/download-model.py index 43248ca7..25d9c804 100644 --- a/download-model.py +++ b/download-model.py @@ -128,6 +128,11 @@ class ModelDownloader: if classifications[i] in ['pytorch', 'pt']: links.pop(i) + if has_gguf and specific_file is None: + for i in range(len(classifications) - 1, -1, -1): + if 'q4_k_m' not in links[i].lower(): + links.pop(i) + is_llamacpp = has_gguf and specific_file is not None return links, sha256, is_lora, is_llamacpp