diff --git a/requirements.txt b/requirements.txt
index 2366ff38..d18a28b5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5; platform_system != "Darwin" and platform_machine != "x86_64"
+exllamav2==0.0.6; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.47.*
 markdown
 numpy==1.24
@@ -35,9 +35,9 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu1
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.1.post1/flash_attn-2.3.1.post1+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.6/exllamav2-0.0.6+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.6/exllamav2-0.0.6+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index d319bb7a..7a85436c 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 4e50e5c3..ec91c4c3 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index c27f7c94..6c780045 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 0ae40ad5..9e160f93 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 52353d67..49eb4f13 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 81d4eaec..35e1473c 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 28969bb9..7df47cdd 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5; platform_system != "Darwin" and platform_machine != "x86_64"
+exllamav2==0.0.6; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.47.*
 markdown
 numpy==1.24
@@ -35,9 +35,9 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu1
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.1.post1/flash_attn-2.3.1.post1+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.6/exllamav2-0.0.6+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/turboderp/exllamav2/releases/download/v0.0.6/exllamav2-0.0.6+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118avx-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 9329291a..f165523a 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -2,7 +2,7 @@ accelerate==0.23.*
 colorama
 datasets
 einops
-exllamav2==0.0.5
+exllamav2==0.0.6
 gradio==3.47.*
 markdown
 numpy==1.24