mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-29 02:49:30 +01:00
Merge branch 'main' of https://github.com/oobabooga/text-generation-webui
This commit is contained in:
commit
87fe313d6c
@ -90,10 +90,6 @@ cd text-generation-webui
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
#### llama.cpp with GPU acceleration
|
|
||||||
|
|
||||||
Requires the additional compilation step described here: [GPU acceleration](https://github.com/oobabooga/text-generation-webui/blob/main/docs/llama.cpp-models.md#gpu-acceleration).
|
|
||||||
|
|
||||||
#### bitsandbytes
|
#### bitsandbytes
|
||||||
|
|
||||||
bitsandbytes >= 0.39 may not work on older NVIDIA GPUs. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
|
bitsandbytes >= 0.39 may not work on older NVIDIA GPUs. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
|
||||||
|
@ -3,7 +3,6 @@ from pathlib import Path
|
|||||||
from typing import Any, Dict, Optional, Union
|
from typing import Any, Dict, Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
from llama_cpp import Llama
|
|
||||||
from torch.nn import CrossEntropyLoss
|
from torch.nn import CrossEntropyLoss
|
||||||
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
|
||||||
from transformers.modeling_outputs import CausalLMOutputWithPast
|
from transformers.modeling_outputs import CausalLMOutputWithPast
|
||||||
@ -11,6 +10,10 @@ from transformers.modeling_outputs import CausalLMOutputWithPast
|
|||||||
from modules import shared
|
from modules import shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
from llama_cpp_cuda import Llama
|
||||||
|
else:
|
||||||
|
from llama_cpp import Llama
|
||||||
|
|
||||||
class LlamacppHF(PreTrainedModel):
|
class LlamacppHF(PreTrainedModel):
|
||||||
def __init__(self, model):
|
def __init__(self, model):
|
||||||
|
@ -9,12 +9,17 @@ https://abetlen.github.io/llama-cpp-python/
|
|||||||
import re
|
import re
|
||||||
from functools import partial
|
from functools import partial
|
||||||
|
|
||||||
from llama_cpp import Llama, LlamaCache, LogitsProcessorList
|
import torch
|
||||||
|
|
||||||
from modules import shared
|
from modules import shared
|
||||||
from modules.callbacks import Iteratorize
|
from modules.callbacks import Iteratorize
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
from llama_cpp_cuda import Llama, LlamaCache, LogitsProcessorList
|
||||||
|
else:
|
||||||
|
from llama_cpp import Llama, LlamaCache, LogitsProcessorList
|
||||||
|
|
||||||
|
|
||||||
def ban_eos_logits_processor(eos_token, input_ids, logits):
|
def ban_eos_logits_processor(eos_token, input_ids, logits):
|
||||||
logits[eos_token] = -float('inf')
|
logits[eos_token] = -float('inf')
|
||||||
|
@ -62,7 +62,7 @@ settings = {
|
|||||||
'chat_generation_attempts_max': 10,
|
'chat_generation_attempts_max': 10,
|
||||||
'default_extensions': [],
|
'default_extensions': [],
|
||||||
'chat_default_extensions': ['gallery'],
|
'chat_default_extensions': ['gallery'],
|
||||||
'preset': 'simple-1',
|
'preset': 'Divine Intellect',
|
||||||
'prompt': 'QA',
|
'prompt': 'QA',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,18 +13,22 @@ Pillow>=9.5.0
|
|||||||
pyyaml
|
pyyaml
|
||||||
requests
|
requests
|
||||||
safetensors==0.3.1
|
safetensors==0.3.1
|
||||||
sentencepiece
|
|
||||||
tqdm
|
|
||||||
scipy
|
scipy
|
||||||
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
wandb
|
|
||||||
transformers==4.31.*
|
transformers==4.31.*
|
||||||
|
tqdm
|
||||||
|
wandb
|
||||||
git+https://github.com/huggingface/peft@03eb378eb914fbee709ff7c86ba5b1d033b89524
|
git+https://github.com/huggingface/peft@03eb378eb914fbee709ff7c86ba5b1d033b89524
|
||||||
bitsandbytes==0.40.2; platform_system != "Windows"
|
bitsandbytes==0.40.2; platform_system != "Windows"
|
||||||
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.40.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
llama-cpp-python==0.1.73; platform_system != "Windows"
|
|
||||||
https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.73/llama_cpp_python-0.1.73-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.3.0/auto_gptq-0.3.0+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.7/exllama-0.0.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
https://github.com/jllllll/exllama/releases/download/0.0.7/exllama-0.0.7+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.7/exllama-0.0.7+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/jllllll/exllama/releases/download/0.0.7/exllama-0.0.7+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
# llama-cpp-python without GPU support
|
||||||
|
llama-cpp-python==0.1.73; platform_system != "Windows"
|
||||||
|
https://github.com/abetlen/llama-cpp-python/releases/download/v0.1.73/llama_cpp_python-0.1.73-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
# llama-cpp-python with CUDA support
|
||||||
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.1.73+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.1.73+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
@ -36,5 +36,5 @@ chat_generation_attempts_max: 10
|
|||||||
default_extensions: []
|
default_extensions: []
|
||||||
chat_default_extensions:
|
chat_default_extensions:
|
||||||
- gallery
|
- gallery
|
||||||
preset: simple-1
|
preset: 'Divine Intellect'
|
||||||
prompt: QA
|
prompt: QA
|
||||||
|
Loading…
Reference in New Issue
Block a user