mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-23 00:18:20 +01:00
Merge pull request #4142 from jllllll/llamacpp-0.2.11
Bump llama-cpp-python to 0.2.11
This commit is contained in:
commit
0197fdddf1
@ -287,6 +287,7 @@ Optionally, you can use the following command-line flags:
|
|||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
|-------------|-------------|
|
|-------------|-------------|
|
||||||
| `--threads` | Number of threads to use. |
|
| `--threads` | Number of threads to use. |
|
||||||
|
| `--threads-batch THREADS_BATCH` | Number of threads to use for batches/prompt processing. |
|
||||||
| `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
|
| `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
|
||||||
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
|
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
|
||||||
| `--n_ctx N_CTX` | Size of the prompt context. |
|
| `--n_ctx N_CTX` | Size of the prompt context. |
|
||||||
|
@ -194,11 +194,11 @@ class LlamacppHF(PreTrainedModel):
|
|||||||
'n_ctx': shared.args.n_ctx,
|
'n_ctx': shared.args.n_ctx,
|
||||||
'seed': int(shared.args.llama_cpp_seed),
|
'seed': int(shared.args.llama_cpp_seed),
|
||||||
'n_threads': shared.args.threads or None,
|
'n_threads': shared.args.threads or None,
|
||||||
|
'n_threads_batch': shared.args.threads_batch or None,
|
||||||
'n_batch': shared.args.n_batch,
|
'n_batch': shared.args.n_batch,
|
||||||
'use_mmap': not shared.args.no_mmap,
|
'use_mmap': not shared.args.no_mmap,
|
||||||
'use_mlock': shared.args.mlock,
|
'use_mlock': shared.args.mlock,
|
||||||
'mul_mat_q': shared.args.mul_mat_q,
|
'mul_mat_q': shared.args.mul_mat_q,
|
||||||
'low_vram': shared.args.low_vram,
|
|
||||||
'numa': shared.args.numa,
|
'numa': shared.args.numa,
|
||||||
'n_gpu_layers': shared.args.n_gpu_layers,
|
'n_gpu_layers': shared.args.n_gpu_layers,
|
||||||
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
||||||
|
@ -76,11 +76,11 @@ class LlamaCppModel:
|
|||||||
'n_ctx': shared.args.n_ctx,
|
'n_ctx': shared.args.n_ctx,
|
||||||
'seed': int(shared.args.llama_cpp_seed),
|
'seed': int(shared.args.llama_cpp_seed),
|
||||||
'n_threads': shared.args.threads or None,
|
'n_threads': shared.args.threads or None,
|
||||||
|
'n_threads_batch': shared.args.threads_batch or None,
|
||||||
'n_batch': shared.args.n_batch,
|
'n_batch': shared.args.n_batch,
|
||||||
'use_mmap': not shared.args.no_mmap,
|
'use_mmap': not shared.args.no_mmap,
|
||||||
'use_mlock': shared.args.mlock,
|
'use_mlock': shared.args.mlock,
|
||||||
'mul_mat_q': shared.args.mul_mat_q,
|
'mul_mat_q': shared.args.mul_mat_q,
|
||||||
'low_vram': shared.args.low_vram,
|
|
||||||
'numa': shared.args.numa,
|
'numa': shared.args.numa,
|
||||||
'n_gpu_layers': shared.args.n_gpu_layers,
|
'n_gpu_layers': shared.args.n_gpu_layers,
|
||||||
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
'rope_freq_base': RoPE.get_rope_freq_base(shared.args.alpha_value, shared.args.rope_freq_base),
|
||||||
|
@ -91,8 +91,8 @@ loaders_and_params = OrderedDict({
|
|||||||
'tensor_split',
|
'tensor_split',
|
||||||
'n_batch',
|
'n_batch',
|
||||||
'threads',
|
'threads',
|
||||||
|
'threads_batch',
|
||||||
'no_mmap',
|
'no_mmap',
|
||||||
'low_vram',
|
|
||||||
'mlock',
|
'mlock',
|
||||||
'mul_mat_q',
|
'mul_mat_q',
|
||||||
'llama_cpp_seed',
|
'llama_cpp_seed',
|
||||||
@ -108,8 +108,8 @@ loaders_and_params = OrderedDict({
|
|||||||
'tensor_split',
|
'tensor_split',
|
||||||
'n_batch',
|
'n_batch',
|
||||||
'threads',
|
'threads',
|
||||||
|
'threads_batch',
|
||||||
'no_mmap',
|
'no_mmap',
|
||||||
'low_vram',
|
|
||||||
'mlock',
|
'mlock',
|
||||||
'mul_mat_q',
|
'mul_mat_q',
|
||||||
'alpha_value',
|
'alpha_value',
|
||||||
|
@ -115,9 +115,9 @@ parser.add_argument('--use_double_quant', action='store_true', help='use_double_
|
|||||||
|
|
||||||
# llama.cpp
|
# llama.cpp
|
||||||
parser.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
|
parser.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
|
||||||
|
parser.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
|
||||||
parser.add_argument('--n_batch', type=int, default=512, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
|
parser.add_argument('--n_batch', type=int, default=512, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
|
||||||
parser.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
|
parser.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
|
||||||
parser.add_argument('--low-vram', action='store_true', help='Low VRAM Mode')
|
|
||||||
parser.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
parser.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
||||||
parser.add_argument('--mul_mat_q', action='store_true', help='Activate new mulmat kernels.')
|
parser.add_argument('--mul_mat_q', action='store_true', help='Activate new mulmat kernels.')
|
||||||
parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
|
parser.add_argument('--cache-capacity', type=str, help='Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed.')
|
||||||
|
@ -69,9 +69,9 @@ def list_model_elements():
|
|||||||
'disable_exllama',
|
'disable_exllama',
|
||||||
'cfg_cache',
|
'cfg_cache',
|
||||||
'threads',
|
'threads',
|
||||||
|
'threads_batch',
|
||||||
'n_batch',
|
'n_batch',
|
||||||
'no_mmap',
|
'no_mmap',
|
||||||
'low_vram',
|
|
||||||
'mlock',
|
'mlock',
|
||||||
'mul_mat_q',
|
'mul_mat_q',
|
||||||
'n_gpu_layers',
|
'n_gpu_layers',
|
||||||
|
@ -83,6 +83,7 @@ def create_ui():
|
|||||||
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers)
|
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers)
|
||||||
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=32768, step=256, label="n_ctx", value=shared.args.n_ctx)
|
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=32768, step=256, label="n_ctx", value=shared.args.n_ctx)
|
||||||
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads)
|
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads)
|
||||||
|
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=32, value=shared.args.threads_batch)
|
||||||
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch)
|
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch)
|
||||||
|
|
||||||
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None")
|
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None")
|
||||||
@ -107,7 +108,6 @@ def create_ui():
|
|||||||
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
|
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
|
||||||
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
|
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
|
||||||
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
|
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
|
||||||
shared.gradio['low_vram'] = gr.Checkbox(label="low-vram", value=shared.args.low_vram)
|
|
||||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu)
|
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu)
|
||||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||||
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
|
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
|
||||||
|
@ -31,8 +31,8 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.7/llama_cpp_python-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.7/llama_cpp_python-0.2.7-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
@ -42,8 +42,8 @@ https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117
|
|||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.7+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu117-py3-none-any.whl
|
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu117-py3-none-any.whl
|
||||||
|
@ -31,11 +31,11 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.7/llama_cpp_python-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.7/llama_cpp_python-0.2.7-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.7+rocm5.4.2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.2.11+rocm5.4.2-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
@ -31,8 +31,8 @@ bitsandbytes==0.38.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.38.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.7+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+rocm5.4.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
@ -31,6 +31,7 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.7-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_11_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.7-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_12_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.7-cp310-cp310-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||||
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||||
|
@ -31,6 +31,7 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.7-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_11_0_arm64.whl; platform_system == "Darwin" and platform_release >= "20.0.0" and platform_release < "21.0.0"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.7-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_12_0_arm64.whl; platform_system == "Darwin" and platform_release >= "21.0.0" and platform_release < "22.0.0"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.7-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||||
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.2.11-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||||
|
@ -31,5 +31,5 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, AVX2)
|
# llama-cpp-python (CPU only, AVX2)
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.7/llama_cpp_python-0.2.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.7/llama_cpp_python-0.2.7-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
@ -31,5 +31,5 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.7+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
@ -31,8 +31,8 @@ bitsandbytes==0.41.1; platform_system != "Windows"
|
|||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# llama-cpp-python (CPU only, no AVX2)
|
# llama-cpp-python (CPU only, no AVX2)
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.7+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/basic/llama_cpp_python-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
@ -42,8 +42,8 @@ https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117
|
|||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.7+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.0/gptq_for_llama-0.1.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu117-py3-none-any.whl
|
https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu117-py3-none-any.whl
|
||||||
|
Loading…
Reference in New Issue
Block a user