mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-12-24 13:28:59 +01:00
Add CUDA wheels for llama-cpp-python by jllllll
This commit is contained in:
parent
05f4cc63c8
commit
4b19b74e6c
@ -3,7 +3,6 @@ from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import torch
|
||||
from llama_cpp import Llama
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||
@ -11,6 +10,10 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
|
||||
if torch.cuda.is_available():
|
||||
from llama_cpp_cuda import Llama
|
||||
else:
|
||||
from llama_cpp import Llama
|
||||
|
||||
class LlamacppHF(PreTrainedModel):
|
||||
def __init__(self, model):
|
||||
|
@ -9,12 +9,17 @@ https://abetlen.github.io/llama-cpp-python/
|
||||
import re
|
||||
from functools import partial
|
||||
|
||||
from llama_cpp import Llama, LlamaCache, LogitsProcessorList
|
||||
import torch
|
||||
|
||||
from modules import shared
|
||||
from modules.callbacks import Iteratorize
|
||||
from modules.logging_colors import logger
|
||||
|
||||
if torch.cuda.is_available():
|
||||
from llama_cpp_cuda import Llama, LlamaCache, LogitsProcessorList
|
||||
else:
|
||||
from llama_cpp import Llama, LlamaCache, LogitsProcessorList
|
||||
|
||||
|
||||
def ban_eos_logits_processor(eos_token, input_ids, logits):
|
||||
logits[eos_token] = -float('inf')
|
||||
|
@ -13,18 +13,22 @@ Pillow>=9.5.0
|
||||
pyyaml
|
||||
requests
|
||||
safetensors==0.3.1
|
||||
sentencepiece
|
||||
tqdm
|
||||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
wandb
|
||||
transformers==4.31.*
|
||||
tqdm
|
||||
wandb
|
||||
git+https://github.com/huggingface/peft@03eb378eb914fbee709ff7c86ba5b1d033b89524
|
||||
bitsandbytes==0.40.2; platform_system != "Windows"
|
||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
llama-cpp-python==0.1.73; platform_system != "Windows"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.73/llama_cpp_python-0.1.73-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.6/exllama-0.0.6+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.6/exllama-0.0.6+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
# llama-cpp-python without GPU support
|
||||
llama-cpp-python==0.1.73; platform_system != "Windows"
|
||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.73/llama_cpp_python-0.1.73-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
# llama-cpp-python with CUDA support
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.1.73+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.1.73+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
Loading…
Reference in New Issue
Block a user