mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
commit
0315122cf0
@ -1,4 +1,5 @@
|
||||
import importlib
|
||||
import platform
|
||||
from typing import Sequence
|
||||
|
||||
from tqdm import tqdm
|
||||
@ -13,58 +14,39 @@ imported_module = None
|
||||
def llama_cpp_lib():
|
||||
global imported_module
|
||||
|
||||
def module_to_purpose(module_name):
|
||||
if module_name == 'llama_cpp':
|
||||
return 'CPU'
|
||||
elif module_name == 'llama_cpp_cuda_tensorcores':
|
||||
return 'tensorcores'
|
||||
elif module_name == 'llama_cpp_cuda':
|
||||
return 'default'
|
||||
# Determine the platform
|
||||
is_macos = platform.system() == 'Darwin'
|
||||
|
||||
return 'unknown'
|
||||
# Define the library names based on the platform
|
||||
if is_macos:
|
||||
lib_names = [
|
||||
(None, 'llama_cpp')
|
||||
]
|
||||
else:
|
||||
lib_names = [
|
||||
('cpu', 'llama_cpp'),
|
||||
('tensorcores', 'llama_cpp_cuda_tensorcores'),
|
||||
(None, 'llama_cpp_cuda'),
|
||||
(None, 'llama_cpp')
|
||||
]
|
||||
|
||||
return_lib = None
|
||||
for arg, lib_name in lib_names:
|
||||
should_import = (arg is None or getattr(shared.args, arg))
|
||||
|
||||
if shared.args.cpu:
|
||||
if imported_module and imported_module != 'llama_cpp':
|
||||
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the CPU version currently requires a server restart.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp')
|
||||
imported_module = 'llama_cpp'
|
||||
except:
|
||||
pass
|
||||
if should_import:
|
||||
if imported_module and imported_module != lib_name:
|
||||
# Conflict detected, raise an exception
|
||||
raise Exception(f"Cannot import `{lib_name}` because `{imported_module}` is already imported. Switching to a different version of llama-cpp-python currently requires a server restart.")
|
||||
|
||||
if shared.args.tensorcores and return_lib is None:
|
||||
if imported_module and imported_module != 'llama_cpp_cuda_tensorcores':
|
||||
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the tensorcores version currently requires a server restart.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp_cuda_tensorcores')
|
||||
imported_module = 'llama_cpp_cuda_tensorcores'
|
||||
except:
|
||||
pass
|
||||
try:
|
||||
return_lib = importlib.import_module(lib_name)
|
||||
imported_module = lib_name
|
||||
monkey_patch_llama_cpp_python(return_lib)
|
||||
return return_lib
|
||||
except ImportError:
|
||||
continue
|
||||
|
||||
if return_lib is None:
|
||||
if imported_module and imported_module != 'llama_cpp_cuda':
|
||||
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the default version currently requires a server restart.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp_cuda')
|
||||
imported_module = 'llama_cpp_cuda'
|
||||
except:
|
||||
pass
|
||||
|
||||
if return_lib is None and not shared.args.cpu:
|
||||
if imported_module and imported_module != 'llama_cpp':
|
||||
raise Exception(f"The {module_to_purpose(imported_module)} version of llama-cpp-python is already loaded. Switching to the CPU version currently requires a server restart.")
|
||||
try:
|
||||
return_lib = importlib.import_module('llama_cpp')
|
||||
imported_module = 'llama_cpp'
|
||||
except:
|
||||
pass
|
||||
|
||||
if return_lib is not None:
|
||||
monkey_patch_llama_cpp_python(return_lib)
|
||||
|
||||
return return_lib
|
||||
return None
|
||||
|
||||
|
||||
def eval_with_progress(self, tokens: Sequence[int]):
|
||||
|
@ -58,8 +58,8 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"
|
||||
|
@ -58,8 +58,8 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7+cu121.torch2.2.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.1.7/exllamav2-0.1.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.5.9.post1/flash_attn-2.5.9.post1+cu122torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu122torch2.2.2cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.6.1/flash_attn-2.6.1+cu123torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
autoawq==0.2.5; platform_system == "Linux" or platform_system == "Windows"
|
||||
|
Loading…
Reference in New Issue
Block a user