From 14f61942113df0fcaa9201ff0c60d2663c1c8c37 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Apr 2024 09:22:44 -0700
Subject: [PATCH 01/13] Bump Gradio to 4.25

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 25efdf53..7d1b4bdc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ bitsandbytes==0.43.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_amd.txt b/requirements_amd.txt
index da4be15e..dba19b9f 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 634f71f1..63984656 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index e6464999..11ef7e98 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 6af07357..ccf4b6e0 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 599944f0..e0820f96 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index be0c4976..eb4c6e6f 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 60cd75d5..099c90ee 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -4,7 +4,7 @@ bitsandbytes==0.43.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 63b2956f..d4af9d72 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -2,7 +2,7 @@ accelerate==0.27.*
 colorama
 datasets
 einops
-gradio==4.23.*
+gradio==4.25.*
 hqq==0.1.5
 jinja2==3.1.2
 lm_eval==0.3.0

From a4c67e1974b2a9924d504c359e807549f34373fe Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 5 Apr 2024 13:26:49 -0300
Subject: [PATCH 02/13] Bump aqlm[cpu,gpu] from 1.1.2 to 1.1.3 (#5790)

---
 requirements.txt        | 2 +-
 requirements_noavx2.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 7d1b4bdc..755afa0b 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 accelerate==0.27.*
-aqlm[gpu,cpu]==1.1.2; platform_system == "Linux"
+aqlm[gpu,cpu]==1.1.3; platform_system == "Linux"
 bitsandbytes==0.43.*
 colorama
 datasets
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 099c90ee..430e36f5 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -1,5 +1,5 @@
 accelerate==0.27.*
-aqlm[gpu,cpu]==1.1.2; platform_system == "Linux"
+aqlm[gpu,cpu]==1.1.3; platform_system == "Linux"
 bitsandbytes==0.43.*
 colorama
 datasets

From 0f536dd97da4f6ca5c4ee35b8a0e7a3238d74ce5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Apr 2024 12:17:54 -0700
Subject: [PATCH 03/13] UI: Fix the "Show controls" action

---
 modules/ui_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 8ce7b610..3d3a75df 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -378,4 +378,4 @@ def create_event_handlers():
         partial(chat.generate_chat_prompt, '', _continue=True), gradio('interface_state'), gradio('textbox-notebook')).then(
         lambda: None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
 
-    shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+    shared.gradio['show_controls'].change(lambda x: None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')

From 4e739dc211466b58b6e1f68d481200ebe9567db4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Apr 2024 18:22:25 -0700
Subject: [PATCH 04/13] Add an instruction template for Command R

---
 instruction-templates/Command-R.yaml | 26 ++++++++++++++++++++++++++
 models/config.yaml                   |  2 ++
 2 files changed, 28 insertions(+)
 create mode 100644 instruction-templates/Command-R.yaml

diff --git a/instruction-templates/Command-R.yaml b/instruction-templates/Command-R.yaml
new file mode 100644
index 00000000..f8bb8a08
--- /dev/null
+++ b/instruction-templates/Command-R.yaml
@@ -0,0 +1,26 @@
+instruction_template: |-
+  {%- if messages[0]['role'] == 'system' -%}
+      {%- set loop_messages = messages[1:] -%}
+      {%- set system_message = messages[0]['content'] -%}
+  {%- elif false == true -%}
+      {%- set loop_messages = messages -%}
+      {%- set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' -%}
+  {%- else -%}
+      {%- set loop_messages = messages -%}
+      {%- set system_message = false -%}
+  {%- endif -%}
+  {%- if system_message != false -%}
+      {{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}
+  {%- endif -%}
+  {%- for message in loop_messages -%}
+      {%- set content = message['content'] -%}
+      {%- if message['role'] == 'user' -%}
+          {{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}
+      {%- elif message['role'] == 'assistant' -%}
+          {{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>'  + content.strip() + '<|END_OF_TURN_TOKEN|>' }}
+      {%- endif -%}
+  {%- endfor -%}
+  {%- if add_generation_prompt -%}
+      {{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}
+  {%- endif -%}
+
diff --git a/models/config.yaml b/models/config.yaml
index fc7bc44f..184528ab 100644
--- a/models/config.yaml
+++ b/models/config.yaml
@@ -192,3 +192,5 @@
   instruction_template: 'Synthia'
 .*(hercules|hyperion):
   instruction_template: 'ChatML'
+.*command-r:
+  instruction_template: 'Command-R'

From 6b7f7555fcc75dadb03c4a735cffd01eceffbb01 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Apr 2024 18:40:02 -0700
Subject: [PATCH 05/13] Logging message to make transformers loader a bit more
 transparent

---
 modules/models.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/modules/models.py b/modules/models.py
index cccf1d2d..03296052 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -1,6 +1,7 @@
 import gc
 import logging
 import os
+import pprint
 import re
 import time
 import traceback
@@ -126,15 +127,19 @@ def huggingface_loader(model_name):
     path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
     params = {
         'low_cpu_mem_usage': True,
-        'trust_remote_code': shared.args.trust_remote_code,
         'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
-        'use_safetensors': True if shared.args.force_safetensors else None
     }
 
+    if shared.args.trust_remote_code:
+        params['trust_remote_code'] = True
+
     if shared.args.use_flash_attention_2:
         params['use_flash_attention_2'] = True
 
-    config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=params['trust_remote_code'])
+    if shared.args.force_safetensors:
+        params['max_memory'] = True
+
+    config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
 
     if 'chatglm' in model_name.lower():
         LoaderClass = AutoModel
@@ -147,6 +152,8 @@ def huggingface_loader(model_name):
 
     # Load the model without any special settings
     if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.compress_pos_emb > 1, shared.args.alpha_value > 1, shared.args.disable_exllama, shared.args.disable_exllamav2]):
+        logger.info("TRANSFORMERS_PARAMS=")
+        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
         model = LoaderClass.from_pretrained(path_to_model, **params)
         if not (hasattr(model, 'is_loaded_in_4bit') and model.is_loaded_in_4bit):
             if torch.backends.mps.is_available():
@@ -175,7 +182,9 @@ def huggingface_loader(model_name):
             params['torch_dtype'] = torch.float32
         else:
             params['device_map'] = 'auto'
-            params['max_memory'] = get_max_memory_dict()
+            if x := get_max_memory_dict():
+                params['max_memory'] = x
+
             if shared.args.load_in_4bit:
                 # See https://github.com/huggingface/transformers/pull/23479/files
                 # and https://huggingface.co/blog/4bit-transformers-bitsandbytes
@@ -186,7 +195,6 @@ def huggingface_loader(model_name):
                     'bnb_4bit_use_double_quant': shared.args.use_double_quant,
                 }
 
-                logger.info('Using the following 4-bit params: ' + str(quantization_config_params))
                 params['quantization_config'] = BitsAndBytesConfig(**quantization_config_params)
 
             elif shared.args.load_in_8bit:
@@ -230,6 +238,8 @@ def huggingface_loader(model_name):
         elif shared.args.alpha_value > 1:
             params['rope_scaling'] = {'type': 'dynamic', 'factor': RoPE.get_alpha_value(shared.args.alpha_value, shared.args.rope_freq_base)}
 
+        logger.info("TRANSFORMERS_PARAMS=")
+        pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
         model = LoaderClass.from_pretrained(path_to_model, **params)
 
     return model

From 1b87844928203e13267c9488e692607d488fa6a7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Apr 2024 18:43:43 -0700
Subject: [PATCH 06/13] Minor fix

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index 03296052..291383ef 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -137,7 +137,7 @@ def huggingface_loader(model_name):
         params['use_flash_attention_2'] = True
 
     if shared.args.force_safetensors:
-        params['max_memory'] = True
+        params['force_safetensors'] = True
 
     config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
 

From 64a76856bdbe72e486324b9b753a111c0c8e4adf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 07:32:17 -0700
Subject: [PATCH 07/13] Metadata: Fix loading Command R+ template with multiple
 options

---
 modules/models_settings.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index 12a2db82..85689b8b 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -117,6 +117,9 @@ def get_model_metadata(model):
         metadata = json.loads(open(path, 'r', encoding='utf-8').read())
         if 'chat_template' in metadata:
             template = metadata['chat_template']
+            if isinstance(template, list):
+                template = template[0]['template']
+
             for k in ['eos_token', 'bos_token']:
                 if k in metadata:
                     value = metadata[k]

From 168a0f4f671cf200bd0da4dc081a4c2c4c47602a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 12:43:21 -0700
Subject: [PATCH 08/13] UI: do not load the "gallery" extension by default

---
 extensions/gallery/script.py | 11 ++++++++---
 js/main.js                   |  9 +++++++++
 modules/shared.py            |  4 +---
 settings-template.yaml       |  3 +--
 4 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
index 1bb8068a..ff0242c8 100644
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@@ -3,9 +3,14 @@ from pathlib import Path
 import gradio as gr
 
 from modules.html_generator import get_image_cache
-from modules.shared import gradio, settings
+from modules.shared import gradio
 
 
+params = {
+    'items_per_page': 50,
+    'open': False,
+}
+
 cards = []
 
 
@@ -104,7 +109,7 @@ def custom_js():
 
 
 def ui():
-    with gr.Accordion("Character gallery", open=settings["gallery-open"], elem_id='gallery-extension'):
+    with gr.Accordion("Character gallery", open=params["open"], elem_id='gallery-extension'):
         gr.HTML(value="<style>" + generate_css() + "</style>")
         with gr.Row():
             filter_box = gr.Textbox(label='', placeholder='Filter', lines=1, max_lines=1, container=False, elem_id='gallery-filter-box')
@@ -116,7 +121,7 @@ def ui():
             label="",
             samples=generate_html(),
             elem_classes=["character-gallery"],
-            samples_per_page=settings["gallery-items_per_page"]
+            samples_per_page=params["items_per_page"]
         )
 
     filter_box.change(lambda: None, None, None, js=f'() => {{{custom_js()}; gotoFirstPage()}}').success(
diff --git a/js/main.js b/js/main.js
index 219d860e..a5e98aef 100644
--- a/js/main.js
+++ b/js/main.js
@@ -464,3 +464,12 @@ function handleVisibilityChange(isVisible) {
 }
 
 respondToRenameVisibility(renameTextArea, handleVisibilityChange);
+
+//------------------------------------------------
+// Adjust the chat tab margin if no extension UI
+// is present at the bottom
+//------------------------------------------------
+
+if (document.getElementById('extensions') === null) {
+  document.getElementById("chat-tab").style.marginBottom = "-29px";
+}
diff --git a/modules/shared.py b/modules/shared.py
index a48b281c..46b6ef47 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -62,9 +62,7 @@ settings = {
     'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
     'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
     'autoload_model': False,
-    'gallery-items_per_page': 50,
-    'gallery-open': False,
-    'default_extensions': ['gallery'],
+    'default_extensions': [],
 }
 
 default_settings = copy.deepcopy(settings)
diff --git a/settings-template.yaml b/settings-template.yaml
index c239057f..31a0f614 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -74,5 +74,4 @@ chat-instruct_command: |-
 autoload_model: false
 gallery-items_per_page: 50
 gallery-open: false
-default_extensions:
-- gallery
+default_extensions: []

From 1bdceea2d441d9f66734bab1694115b1d92dfd8b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 12:57:57 -0700
Subject: [PATCH 09/13] UI: Focus on the chat input after starting a new chat

---
 js/main.js         | 10 ++++++++++
 modules/models.py  |  2 ++
 modules/ui_chat.py | 10 +++++-----
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/js/main.js b/js/main.js
index a5e98aef..f6520549 100644
--- a/js/main.js
+++ b/js/main.js
@@ -473,3 +473,13 @@ respondToRenameVisibility(renameTextArea, handleVisibilityChange);
 if (document.getElementById('extensions') === null) {
   document.getElementById("chat-tab").style.marginBottom = "-29px";
 }
+
+//------------------------------------------------
+// Focus on the chat input after starting a new chat
+//------------------------------------------------
+
+document.querySelectorAll('.focus-on-chat-input').forEach(element => {
+  element.addEventListener('click', function() {
+      document.querySelector('#chat-input textarea').focus();
+  });
+});
diff --git a/modules/models.py b/modules/models.py
index 291383ef..21f5e5f6 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -154,6 +154,8 @@ def huggingface_loader(model_name):
     if not any([shared.args.cpu, shared.args.load_in_8bit, shared.args.load_in_4bit, shared.args.auto_devices, shared.args.disk, shared.args.deepspeed, shared.args.gpu_memory is not None, shared.args.cpu_memory is not None, shared.args.compress_pos_emb > 1, shared.args.alpha_value > 1, shared.args.disable_exllama, shared.args.disable_exllamav2]):
         logger.info("TRANSFORMERS_PARAMS=")
         pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
+        print()
+
         model = LoaderClass.from_pretrained(path_to_model, **params)
         if not (hasattr(model, 'is_loaded_in_4bit') and model.is_loaded_in_4bit):
             if torch.backends.mps.is_available():
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 3d3a75df..3193bd67 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -68,14 +68,14 @@ def create_ui():
                 with gr.Row():
                     shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
                     shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', visible=False, elem_classes='refresh-button')
-                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', visible=False, elem_classes='refresh-button')
-                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes='refresh-button')
+                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
 
                 with gr.Row(elem_id='rename-row'):
                     shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', visible=False, elem_classes=['no-background'])
-                    shared.gradio['rename_to-confirm'] = gr.Button('Confirm', visible=False, elem_classes='refresh-button')
-                    shared.gradio['rename_to-cancel'] = gr.Button('Cancel', visible=False, elem_classes='refresh-button')
+                    shared.gradio['rename_to-confirm'] = gr.Button('Confirm', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['rename_to-cancel'] = gr.Button('Cancel', visible=False, elem_classes=['refresh-button', 'focus-on-chat-input'])
 
         with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
             with gr.Column():

From dd6e4ac55f3235c6728bf849618cc5841bd11028 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 13:12:16 -0700
Subject: [PATCH 10/13] Prevent double <BOS_TOKEN> with Command R+

---
 modules/chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/chat.py b/modules/chat.py
index 5fb5523c..4e0bde1c 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -125,7 +125,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
         messages.append({"role": "user", "content": user_input})
 
     def remove_extra_bos(prompt):
-        for bos_token in ['<s>', '<|startoftext|>']:
+        for bos_token in ['<s>', '<|startoftext|>', '<BOS_TOKEN>', '<|endoftext|>']:
             while prompt.startswith(bos_token):
                 prompt = prompt[len(bos_token):]
 

From 096f75a4321c73cef138d0a8f1f25f32058c1b81 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 14:06:39 -0700
Subject: [PATCH 11/13] Documentation: remove obsolete RWKV docs

---
 docs/08 - Additional Tips.md | 73 ------------------------------------
 1 file changed, 73 deletions(-)

diff --git a/docs/08 - Additional Tips.md b/docs/08 - Additional Tips.md
index 89675cca..f48fa862 100644
--- a/docs/08 - Additional Tips.md	
+++ b/docs/08 - Additional Tips.md	
@@ -58,79 +58,6 @@ pip install -U deepspeed
 deepspeed --num_gpus=1 server.py --deepspeed --chat --model gpt-j-6B
 ```
 
-> RWKV: RNN with Transformer-level LLM Performance
->
-> It combines the best of RNN and transformer - great performance, fast inference, saves VRAM, fast training, "infinite" ctx_len, and free sentence embedding (using the final hidden state).
-
-https://github.com/BlinkDL/RWKV-LM
-
-https://github.com/BlinkDL/ChatRWKV
-
-## Using RWKV in the web UI
-
-### Hugging Face weights
-
-Simply download the weights from https://huggingface.co/RWKV and load them as you would for any other model.
-
-There is a bug in transformers==4.29.2 that prevents RWKV from being loaded in 8-bit mode. You can install the dev branch to solve this bug: `pip install git+https://github.com/huggingface/transformers`
-
-### Original .pth weights
-
-The instructions below are from before RWKV was supported in transformers, and they are kept for legacy purposes. The old implementation is possibly faster, but it lacks the full range of samplers that the transformers library offers.
-
-#### 0. Install the RWKV library
-
-```
-pip install rwkv
-```
-
-`0.7.3` was the last version that I tested. If you experience any issues, try ```pip install rwkv==0.7.3```.
-
-#### 1. Download the model
-
-It is available in different sizes:
-
-* https://huggingface.co/BlinkDL/rwkv-4-pile-3b/
-* https://huggingface.co/BlinkDL/rwkv-4-pile-7b/
-* https://huggingface.co/BlinkDL/rwkv-4-pile-14b/
-
-There are also older releases with smaller sizes like:
-
-* https://huggingface.co/BlinkDL/rwkv-4-pile-169m/resolve/main/RWKV-4-Pile-169M-20220807-8023.pth
-
-Download the chosen `.pth` and put it directly in the `models` folder. 
-
-#### 2. Download the tokenizer
-
-[20B_tokenizer.json](https://raw.githubusercontent.com/BlinkDL/ChatRWKV/main/v2/20B_tokenizer.json)
-
-Also put it directly in the `models` folder. Make sure to not rename it. It should be called `20B_tokenizer.json`.
-
-#### 3. Launch the web UI
-
-No additional steps are required. Just launch it as you would with any other model.
-
-```
-python server.py --listen  --no-stream --model RWKV-4-Pile-169M-20220807-8023.pth
-```
-
-#### Setting a custom strategy
-
-It is possible to have very fine control over the offloading and precision for the model with the `--rwkv-strategy` flag. Possible values include:
-
-```
-"cpu fp32" # CPU mode
-"cuda fp16" # GPU mode with float16 precision
-"cuda fp16 *30 -> cpu fp32" # GPU+CPU offloading. The higher the number after *, the higher the GPU allocation.
-"cuda fp16i8" # GPU mode with 8-bit precision
-```
-
-See the README for the PyPl package for more details: https://pypi.org/project/rwkv/
-
-#### Compiling the CUDA kernel
-
-You can compile the CUDA kernel for the model with `--rwkv-cuda-on`. This should improve the performance a lot but I haven't been able to get it to work yet.
-
 ## Miscellaneous info
 
 ### You can train LoRAs in CPU mode

From dfb01f9a63419a0f5af23ed62bf08e2b51261211 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 18:32:36 -0700
Subject: [PATCH 12/13] Bump llama-cpp-python to 0.2.60

---
 requirements.txt                 | 24 ++++++++++++------------
 requirements_amd.txt             | 12 ++++++------
 requirements_amd_noavx2.txt      |  8 ++++----
 requirements_apple_intel.txt     | 12 ++++++------
 requirements_apple_silicon.txt   | 16 ++++++++--------
 requirements_cpu_only.txt        |  8 ++++----
 requirements_cpu_only_noavx2.txt |  8 ++++----
 requirements_noavx2.txt          | 24 ++++++++++++------------
 8 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 755afa0b..ccc2aec3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,22 +33,22 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index dba19b9f..41f6ffd5 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -31,14 +31,14 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.59+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.59+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.60+rocm5.6.1-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.60+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.17/exllamav2-0.0.17+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 63984656..a9affce3 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -31,10 +31,10 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 11ef7e98..cbac29b4 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -31,10 +31,10 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.17/exllamav2-0.0.17-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index ccf4b6e0..1175b16b 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -31,12 +31,12 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.59-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.60-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.17/exllamav2-0.0.17-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index e0820f96..ea2daca6 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -31,7 +31,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama-cpp-python (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index eb4c6e6f..bc9635ca 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -31,7 +31,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 430e36f5..77862ffc 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -33,22 +33,22 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama-cpp-python (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.59+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.60+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, no tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.59+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.60+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, tensor cores)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.59+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121avx-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.2.60+cu121avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

From d02744282b7d46c5194d8e15935c86966b3546ea Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Apr 2024 18:56:58 -0700
Subject: [PATCH 13/13] Minor logging change

---
 modules/models.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/models.py b/modules/models.py
index 21f5e5f6..1519fc89 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -242,6 +242,7 @@ def huggingface_loader(model_name):
 
         logger.info("TRANSFORMERS_PARAMS=")
         pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(params)
+        print()
         model = LoaderClass.from_pretrained(path_to_model, **params)
 
     return model