From 613feca23bfa762d6c0bd8c514f96ca64f529850 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 22 Oct 2023 09:06:20 -0700
Subject: [PATCH] Make colab functional for llama.cpp

- Download only Q4_K_M for GGUF repositories by default
- Use maximum n-gpu-layers by default
---
 Colab-TextGen-GPU.ipynb | 2 +-
 download-model.py       | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index d5525416..9951b30b 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -115,7 +115,7 @@
         "    output_folder = \"\"\n",
         "\n",
         "# Start the web UI\n",
-        "cmd = f\"python server.py --share\"\n",
+        "cmd = f\"python server.py --share --n-gpu-layers 128\"\n",
         "if output_folder != \"\":\n",
         "    cmd += f\" --model {output_folder}\"\n",
         "cmd += f\" {command_line_flags}\"\n",
diff --git a/download-model.py b/download-model.py
index 43248ca7..25d9c804 100644
--- a/download-model.py
+++ b/download-model.py
@@ -128,6 +128,11 @@ class ModelDownloader:
                 if classifications[i] in ['pytorch', 'pt']:
                     links.pop(i)
 
+        if has_gguf and specific_file is None:
+            for i in range(len(classifications) - 1, -1, -1):
+                if 'q4_k_m' not in links[i].lower():
+                    links.pop(i)
+
         is_llamacpp = has_gguf and specific_file is not None
         return links, sha256, is_lora, is_llamacpp