From 0eda9a05497d8be3661b3116b94e44f58473a4d8 Mon Sep 17 00:00:00 2001 From: jllllll <3887729+jllllll@users.noreply.github.com> Date: Fri, 6 Oct 2023 22:35:41 -0500 Subject: [PATCH] Use GPTQ wheels compatible with Pytorch 2.1 (#4210) --- one_click.py | 4 ++-- requirements.txt | 8 ++++---- requirements_amd.txt | 8 ++++---- requirements_amd_noavx2.txt | 6 +++--- requirements_noavx2.txt | 8 ++++---- 5 files changed, 17 insertions(+), 17 deletions(-) diff --git a/one_click.py b/one_click.py index ef2a0676..8e1f06dc 100644 --- a/one_click.py +++ b/one_click.py @@ -153,7 +153,7 @@ def install_webui(): print("What is your GPU?") print() print("A) NVIDIA") - print("B) AMD (Linux/MacOS only. Requires ROCm SDK 5.4.2/5.4.3 on Linux)") + print("B) AMD (Linux/MacOS only. Requires ROCm SDK 5.6 on Linux)") print("C) Apple M Series") print("D) Intel Arc (IPEX)") print("N) None (I want to run models in CPU mode)") @@ -175,7 +175,7 @@ def install_webui(): install_pytorch = "python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118" elif not is_macos() and choice == "B": if is_linux(): - install_pytorch = "python -m pip install torch==2.0.1+rocm5.4.2 torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2" + install_pytorch = "python -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6" else: print("AMD GPUs are only supported on Linux. Exiting...") sys.exit(1) diff --git a/requirements.txt b/requirements.txt index 09823f70..a1f54ab5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,8 +36,8 @@ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" # CUDA wheels -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" @@ -45,7 +45,7 @@ https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.1.post1/flash_attn-2.3.1.post1+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu118-py3-none-any.whl autoawq==0.1.4 diff --git a/requirements_amd.txt b/requirements_amd.txt index 0a015a95..0a87e3f3 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -36,7 +36,7 @@ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_ https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" # AMD wheels -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.4.2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.6.1-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index c93027e3..871d6731 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -36,6 +36,6 @@ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" # AMD wheels -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 91600e12..553f1415 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -36,8 +36,8 @@ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/ https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.2.11+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" # CUDA wheels -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/jllllll/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" @@ -45,7 +45,7 @@ https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+ https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.1.post1/flash_attn-2.3.1.post1+cu118torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu118avx-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" -https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-win_amd64.whl; platform_system == "Windows" +https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu118-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX/ctransformers-0.2.27+cu118-py3-none-any.whl autoawq==0.1.4