From 87dc421ee8478e56f38884467c712fab8bb41f11 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 22 Jan 2024 22:40:12 -0300 Subject: [PATCH 1/5] Bump exllamav2 to 0.0.12 (#5352) --- modules/exllamav2.py | 14 ++++++++++++-- modules/loaders.py | 4 ++++ requirements.txt | 18 +++++++++--------- requirements_amd.txt | 6 +++--- requirements_amd_noavx2.txt | 6 +++--- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 18 +++++++++--------- requirements_nowheels.txt | 2 +- 11 files changed, 45 insertions(+), 31 deletions(-) diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 2730d9f5..551ed498 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -93,17 +93,27 @@ class Exllamav2Model: def generate_with_streaming(self, prompt, state): settings = ExLlamaV2Sampler.Settings() + + settings.token_repetition_penalty = state['repetition_penalty'] + settings.token_repetition_range = -1 if state['repetition_penalty_range'] <= 0 else state['repetition_penalty_range'] + + settings.token_frequency_penalty = state['frequency_penalty'] + settings.token_presence_penalty = state['presence_penalty'] + settings.temperature = state['temperature'] settings.top_k = state['top_k'] settings.top_p = state['top_p'] + settings.top_a = state['top_a'] settings.min_p = state['min_p'] settings.tfs = state['tfs'] settings.typical = state['typical_p'] + + settings.temperature_last = state['temperature_last'] + settings.mirostat = state['mirostat_mode'] == 2 settings.mirostat_tau = state['mirostat_tau'] settings.mirostat_eta = state['mirostat_eta'] - settings.token_repetition_penalty = state['repetition_penalty'] - settings.token_repetition_range = -1 if state['repetition_penalty_range'] <= 0 else state['repetition_penalty_range'] + if state['ban_eos_token']: settings.disallow_tokens(self.tokenizer, [self.tokenizer.eos_token_id]) diff --git a/modules/loaders.py b/modules/loaders.py index 5620c95a..2976a851 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -205,12 +205,16 @@ loaders_samplers = { 'HQQ': transformers_samplers(), 'ExLlamav2': { 'temperature', + 'temperature_last', 'top_p', 'min_p', 'top_k', 'typical_p', 'tfs', + 'top_a', 'repetition_penalty', + 'presence_penalty', + 'frequency_penalty', 'repetition_penalty_range', 'seed', 'mirostat_mode', diff --git a/requirements.txt b/requirements.txt index 6a60133e..54ac7a8d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11; platform_system != "Darwin" and platform_machine != "x86_64" +exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64" gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 @@ -67,14 +67,14 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" diff --git a/requirements_amd.txt b/requirements_amd.txt index 733d2001..6882bc84 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64" +exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64" gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 @@ -47,8 +47,8 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+roc https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 9651171d..a8114c6f 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64" +exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64" gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 @@ -43,8 +43,8 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+roc https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+rocm5.6-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+rocm5.6-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 2e48ccce..653b8b06 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11 +exllamav2==0.0.12 gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 3e49baa8..f3d29f2c 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11 +exllamav2==0.0.12 gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 4e89c691..0325a178 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11 +exllamav2==0.0.12 gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 0bd630bf..6dbb5b64 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11 +exllamav2==0.0.12 gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 5d0cf902..04c948d8 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11; platform_system != "Darwin" and platform_machine != "x86_64" +exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64" gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 @@ -67,14 +67,14 @@ https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu1 https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" https://github.com/jllllll/AutoGPTQ/releases/download/v0.6.0/auto_gptq-0.6.0+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" -https://github.com/turboderp/exllamav2/releases/download/v0.0.11/exllamav2-0.0.11+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9" +https://github.com/turboderp/exllamav2/releases/download/v0.0.12/exllamav2-0.0.12+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 24a21f39..d67134e1 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -2,7 +2,7 @@ accelerate==0.25.* colorama datasets einops -exllamav2==0.0.11 +exllamav2==0.0.12 gradio==3.50.* hqq==0.1.2 jinja2==3.1.2 From c1470870bb61638fe3b3aabdc1d0aa5f9b9cb1e7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 26 Jan 2024 05:58:40 -0800 Subject: [PATCH 2/5] Update README --- README.md | 4 ---- 1 file changed, 4 deletions(-) diff --git a/README.md b/README.md index 342e129e..cf87279b 100644 --- a/README.md +++ b/README.md @@ -415,7 +415,3 @@ If you would like to contribute to the project, check out the [Contributing guid ## Acknowledgment In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. - -## Support - -[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/B0B5JFPBO) From 70648e75e6ba0ef3011c69b36915e79bd845a341 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 26 Jan 2024 06:00:26 -0800 Subject: [PATCH 3/5] Docs: minor change --- docs/12 - OpenAI API.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md index abbd432d..eb6bd468 100644 --- a/docs/12 - OpenAI API.md +++ b/docs/12 - OpenAI API.md @@ -99,7 +99,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \ #### Logits -``` +```shell curl -k http://127.0.0.1:5000/v1/internal/logits \ -H "Content-Type: application/json" \ -d '{ @@ -110,7 +110,7 @@ curl -k http://127.0.0.1:5000/v1/internal/logits \ #### Logits after sampling parameters -``` +```shell curl -k http://127.0.0.1:5000/v1/internal/logits \ -H "Content-Type: application/json" \ -d '{ From bfe2326a2412cdc2e2af6cba0ee19e2a30608e82 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 11:10:18 -0300 Subject: [PATCH 4/5] Bump hqq from 0.1.2 to 0.1.2.post1 (#5349) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 54ac7a8d..6b702581 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64" gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_amd.txt b/requirements_amd.txt index 6882bc84..757e416e 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64" gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index a8114c6f..d6b80f0e 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12; platform_system == "Windows" or python_version < "3.10" or python_version > "3.11" or platform_machine != "x86_64" gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 653b8b06..375199c2 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12 gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index f3d29f2c..b1fca696 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12 gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 0325a178..27413604 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12 gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 6dbb5b64..f722cf54 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12 gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 04c948d8..836ce14a 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12; platform_system != "Darwin" and platform_machine != "x86_64" gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index d67134e1..218f3ddc 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -4,7 +4,7 @@ datasets einops exllamav2==0.0.12 gradio==3.50.* -hqq==0.1.2 +hqq==0.1.2.post1 jinja2==3.1.2 lm_eval==0.3.0 markdown From c0bdcee646a011a6380e39650dab155d50b1cfba Mon Sep 17 00:00:00 2001 From: sam-ngu <30950704+sam-ngu@users.noreply.github.com> Date: Fri, 26 Jan 2024 22:10:57 +0800 Subject: [PATCH 5/5] added trust_remote_code to deepspeed init loaderClass (#5237) --- modules/models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models.py b/modules/models.py index e94dd12f..6c38c3c7 100644 --- a/modules/models.py +++ b/modules/models.py @@ -162,7 +162,7 @@ def huggingface_loader(model_name): # DeepSpeed ZeRO-3 elif shared.args.deepspeed: - model = LoaderClass.from_pretrained(path_to_model, torch_dtype=params['torch_dtype']) + model = LoaderClass.from_pretrained(path_to_model, torch_dtype=params['torch_dtype'], trust_remote_code=params['trust_remote_code']) model = deepspeed.initialize(model=model, config_params=ds_config, model_parameters=None, optimizer=None, lr_scheduler=None)[0] model.module.eval() # Inference logger.info(f'DeepSpeed ZeRO-3 is enabled: {is_deepspeed_zero3_enabled()}')