From 87dc421ee8478e56f38884467c712fab8bb41f11 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 22 Jan 2024 22:40:12 -0300
Subject: [PATCH 1/5] Bump exllamav2 to 0.0.12 (#5352)

---
 modules/exllamav2.py             | 14 ++++++++++++--
 modules/loaders.py               |  4 ++++
 requirements.txt                 | 18 +++++++++---------
 requirements_amd.txt             |  6 +++---
 requirements_amd_noavx2.txt      |  6 +++---
 requirements_apple_intel.txt     |  2 +-
 requirements_apple_silicon.txt   |  2 +-
 requirements_cpu_only.txt        |  2 +-
 requirements_cpu_only_noavx2.txt |  2 +-
 requirements_noavx2.txt          | 18 +++++++++---------
 requirements_nowheels.txt        |  2 +-
 11 files changed, 45 insertions(+), 31 deletions(-)

diff --git a/modules/exllamav2.py b/modules/exllamav2.py
index 2730d9f5..551ed498 100644
--- a/modules/exllamav2.py
+++ b/modules/exllamav2.py
@@ -93,17 +93,27 @@ class Exllamav2Model:
 
     def generate_with_streaming(self, prompt, state):
         settings = ExLlamaV2Sampler.Settings()
+
+        settings.token_repetition_penalty = state['repetition_penalty']
+        settings.token_repetition_range = -1 if state['repetition_penalty_range'] <= 0 else state['repetition_penalty_range']
+
+        settings.token_frequency_penalty = state['frequency_penalty']
+        settings.token_presence_penalty = state['presence_penalty']
+
         settings.temperature = state['temperature']
         settings.top_k = state['top_k']
         settings.top_p = state['top_p']
+        settings.top_a = state['top_a']
         settings.min_p = state['min_p']
         settings.tfs = state['tfs']
         settings.typical = state['typical_p']
+
+        settings.temperature_last = state['temperature_last']
+
         settings.mirostat = state['mirostat_mode'] == 2
         settings.mirostat_tau = state['mirostat_tau']
         settings.mirostat_eta = state['mirostat_eta']
-        settings.token_repetition_penalty = state['repetition_penalty']
-        settings.token_repetition_range = -1 if state['repetition_penalty_range'] <= 0 else state['repetition_penalty_range']
+
         if state['ban_eos_token']:
             settings.disallow_tokens(self.tokenizer, [self.tokenizer.eos_token_id])
 
diff --git a/modules/loaders.py b/modules/loaders.py
index 5620c95a..2976a851 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -205,12 +205,16 @@ loaders_samplers = {
     'HQQ': transformers_samplers(),
     'ExLlamav2': {
         'temperature',
+        'temperature_last',
         'top_p',
         'min_p',
         'top_k',
         'typical_p',
         'tfs',
+        'top_a',
         'repetition_penalty',
+        'presence_penalty',
+        'frequency_penalty',
         'repetition_penalty_range',
         'seed',
         'mirostat_mode',
diff --git a/requirements.txt b/requirements.txt
index 6a60133e..54ac7a8d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11; platform_system != "Darwin" and platform_machine != "x86_64"
+exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
@@ -67,14 +67,14 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 733d2001..6882bc84 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
+exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
@@ -47,8 +47,8 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+roc
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 9651171d..a8114c6f 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
+exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
@@ -43,8 +43,8 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+roc
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 2e48ccce..653b8b06 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11
+exllamav2==0.0.12
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 3e49baa8..f3d29f2c 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11
+exllamav2==0.0.12
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 4e89c691..0325a178 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11
+exllamav2==0.0.12
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 0bd630bf..6dbb5b64 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11
+exllamav2==0.0.12
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 5d0cf902..04c948d8 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11; platform_system != "Darwin" and platform_machine != "x86_64"
+exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2
@@ -67,14 +67,14 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 24a21f39..d67134e1 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -2,7 +2,7 @@ accelerate==0.25.*
 colorama
 datasets
 einops
-exllamav2==0.0.11
+exllamav2==0.0.12
 gradio==3.50.*
 hqq==0.1.2
 jinja2==3.1.2

From c1470870bb61638fe3b3aabdc1d0aa5f9b9cb1e7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 26 Jan 2024 05:58:40 -0800
Subject: [PATCH 2/5] Update README

---
 README.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/README.md b/README.md
index 342e129e..cf87279b 100644
--- a/README.md
+++ b/README.md
@@ -415,7 +415,3 @@ If you would like to contribute to the project, check out the [Contributing guid
 ## Acknowledgment
 
 In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
-
-## Support
-
-[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/B0B5JFPBO)

From 70648e75e6ba0ef3011c69b36915e79bd845a341 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 26 Jan 2024 06:00:26 -0800
Subject: [PATCH 3/5] Docs: minor change

---
 docs/12 - OpenAI API.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index abbd432d..eb6bd468 100644
--- a/docs/12 - OpenAI API.md	
+++ b/docs/12 - OpenAI API.md	
@@ -99,7 +99,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \
 
 #### Logits
 
-```
+```shell
 curl -k http://127.0.0.1:5000/v1/internal/logits \
   -H "Content-Type: application/json" \
   -d '{
@@ -110,7 +110,7 @@ curl -k http://127.0.0.1:5000/v1/internal/logits \
 
 #### Logits after sampling parameters
 
-```
+```shell
 curl -k http://127.0.0.1:5000/v1/internal/logits \
   -H "Content-Type: application/json" \
   -d '{

From bfe2326a2412cdc2e2af6cba0ee19e2a30608e82 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Fri, 26 Jan 2024 11:10:18 -0300
Subject: [PATCH 4/5] Bump hqq from 0.1.2 to 0.1.2.post1 (#5349)

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 54ac7a8d..6b702581 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 6882bc84..757e416e 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index a8114c6f..d6b80f0e 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64"
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 653b8b06..375199c2 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index f3d29f2c..b1fca696 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 0325a178..27413604 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 6dbb5b64..f722cf54 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 04c948d8..836ce14a 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index d67134e1..218f3ddc 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -4,7 +4,7 @@ datasets
 einops
 exllamav2==0.0.12
 gradio==3.50.*
-hqq==0.1.2
+hqq==0.1.2.post1
 jinja2==3.1.2
 lm_eval==0.3.0
 markdown

From c0bdcee646a011a6380e39650dab155d50b1cfba Mon Sep 17 00:00:00 2001
From: sam-ngu <30950704+sam-ngu@users.noreply.github.com>
Date: Fri, 26 Jan 2024 22:10:57 +0800
Subject: [PATCH 5/5] added trust_remote_code to deepspeed init loaderClass
 (#5237)

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index e94dd12f..6c38c3c7 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -162,7 +162,7 @@ def huggingface_loader(model_name):
 
     # DeepSpeed ZeRO-3
     elif shared.args.deepspeed:
-        model = LoaderClass.from_pretrained(path_to_model, torch_dtype=params['torch_dtype'])
+        model = LoaderClass.from_pretrained(path_to_model, torch_dtype=params['torch_dtype'], trust_remote_code=params['trust_remote_code'])
         model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0]
         model.module.eval()  # Inference
         logger.info(f'DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}')