diff --git a/modules/exllamav2.py b/modules/exllamav2.py index cb142af5..b92e8840 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -131,7 +131,7 @@ class Exllamav2Model: token, _, _ = ExLlamaV2Sampler.sample(logits, settings, ids, random.random(), self.tokenizer) ids = torch.cat([ids, token], dim=1) - if i == 0 and self.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'): + if i == 0 and self.tokenizer.tokenizer.id_to_piece(int(token)).startswith('▁'): has_leading_space = True decoded_text = self.tokenizer.decode(ids[:, initial_len:], decode_special_tokens=not state['skip_special_tokens'])[0] diff --git a/requirements.txt b/requirements.txt index 51385e11..f4cbaeac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8; platform_system != "Darwin" and platform_machine != "x86_64" +exllamav2==0.0.10; platform_system != "Darwin" and platform_machine != "x86_64" gradio==3.50.* markdown numpy==1.24.* @@ -53,14 +53,14 @@ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" diff --git a/requirements_amd.txt b/requirements_amd.txt index 4299faa9..892cae7c 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.* diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 60f57b0d..228c5172 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.* diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 9ab01829..377373f1 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.* diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 7cc674bb..e280108b 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.* diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 6eb1d7ba..7e83c662 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.* diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index d415370e..b31eec14 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.* diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 38d1569b..c00d9bd1 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8; platform_system != "Darwin" and platform_machine != "x86_64" +exllamav2==0.0.10; platform_system != "Darwin" and platform_machine != "x86_64" gradio==3.50.* markdown numpy==1.24.* @@ -53,14 +53,14 @@ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 80a6b65d..b285e4c0 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.8 +exllamav2==0.0.10 gradio==3.50.* markdown numpy==1.24.*