From 0eda9a05497d8be3661b3116b94e44f58473a4d8 Mon Sep 17 00:00:00 2001
From: jllllll <3887729+jllllll@users.noreply.github.com>
Date: Fri, 6 Oct 2023 22:35:41 -0500
Subject: [PATCH] Use GPTQ wheels compatible with Pytorch 2.1 (#4210)

---
 one_click.py                | 4 ++--
 requirements.txt            | 8 ++++----
 requirements_amd.txt        | 8 ++++----
 requirements_amd_noavx2.txt | 6 +++---
 requirements_noavx2.txt     | 8 ++++----
 5 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/one_click.py b/one_click.py
index ef2a0676..8e1f06dc 100644
--- a/one_click.py
+++ b/one_click.py
@@ -153,7 +153,7 @@ def install_webui():
         print("What is your GPU?")
         print()
         print("A) NVIDIA")
-        print("B) AMD (Linux/MacOS only. Requires ROCm SDK 5.4.2/5.4.3 on Linux)")
+        print("B) AMD (Linux/MacOS only. Requires ROCm SDK 5.6 on Linux)")
         print("C) Apple M Series")
         print("D) Intel Arc (IPEX)")
         print("N) None (I want to run models in CPU mode)")
@@ -175,7 +175,7 @@ def install_webui():
         install_pytorch = "python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118"
     elif not is_macos() and choice == "B":
         if is_linux():
-            install_pytorch = "python -m pip install torch==2.0.1+rocm5.4.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2"
+            install_pytorch = "python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6"
         else:
             print("AMD GPUs are only supported on Linux. Exiting...")
             sys.exit(1)
diff --git a/requirements.txt b/requirements.txt
index 09823f70..a1f54ab5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,8 +36,8 @@ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 
 # CUDA wheels
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
@@ -45,7 +45,7 @@ https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.1.post1/flash_attn-2.3.1.post1+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu118-py3-none-any.whl
 autoawq==0.1.4
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 0a015a95..0a87e3f3 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -36,7 +36,7 @@ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 
 # AMD wheels
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.4.2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index c93027e3..871d6731 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -36,6 +36,6 @@ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 
 # AMD wheels
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 91600e12..553f1415 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -36,8 +36,8 @@ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 
 # CUDA wheels
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
@@ -45,7 +45,7 @@ https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.1.post1/flash_attn-2.3.1.post1+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118avx-cp310-cp310-win_amd64.whl; platform_system == "Windows"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
-https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows"
+https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX/ctransformers-0.2.27+cu118-py3-none-any.whl
 autoawq==0.1.4