mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Bump ExLlamaV2 to 0.0.5 (#4186)
This commit is contained in:
parent
7ffb424c7b
commit
8a98646a21
@ -110,7 +110,7 @@ class Exllamav2Model:
|
||||
has_leading_space = False
|
||||
for i in range(max_new_tokens):
|
||||
logits = self.model.forward(ids[:, -1:], self.cache, input_mask=None).float().cpu()
|
||||
token, _ = ExLlamaV2Sampler.sample(logits, settings, ids, random.random())
|
||||
token, _, _= ExLlamaV2Sampler.sample(logits, settings, ids, random.random(), self.tokenizer)
|
||||
ids = torch.cat([ids, token], dim=1)
|
||||
|
||||
if i == 0 and self.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
||||
|
@ -216,6 +216,7 @@ loaders_samplers = {
|
||||
'guidance_scale',
|
||||
'negative_prompt',
|
||||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'custom_token_bans',
|
||||
'auto_max_new_tokens',
|
||||
},
|
||||
@ -228,6 +229,7 @@ loaders_samplers = {
|
||||
'repetition_penalty_range',
|
||||
'seed',
|
||||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'custom_token_bans',
|
||||
'auto_max_new_tokens',
|
||||
},
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
exllamav2==0.0.5; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
@ -40,8 +40,8 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
exllamav2==0.0.5; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
@ -40,8 +40,8 @@ https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu
|
||||
https://github.com/PanQiWei/AutoGPTQ/releases/download/v0.4.2/auto_gptq-0.4.2+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/jllllll/exllama/releases/download/0.0.17/exllama-0.0.17+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.4/exllamav2-0.0.4+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.5/exllamav2-0.0.5+cu117-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.0/flash_attn-2.3.0+cu117torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu117-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
@ -8,7 +8,7 @@ accelerate==0.23.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
exllamav2==0.0.4
|
||||
exllamav2==0.0.5
|
||||
markdown
|
||||
numpy==1.24
|
||||
optimum==1.13.1
|
||||
|
Loading…
Reference in New Issue
Block a user