mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-01-13 13:52:49 +01:00
commit
1ccbcb967e
@ -11,7 +11,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
|
|||||||
## Features
|
## Features
|
||||||
|
|
||||||
* 3 interface modes: default (two columns), notebook, and chat
|
* 3 interface modes: default (two columns), notebook, and chat
|
||||||
* Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlama](https://github.com/turboderp/exllama), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [CTransformers](https://github.com/marella/ctransformers), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ)
|
* Multiple model backends: [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp) (through [llama-cpp-python](https://github.com/abetlen/llama-cpp-python)), [ExLlama](https://github.com/turboderp/exllama), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [CTransformers](https://github.com/marella/ctransformers)
|
||||||
* Dropdown menu for quickly switching between different models
|
* Dropdown menu for quickly switching between different models
|
||||||
* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
|
* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
|
||||||
* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
|
* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
|
||||||
@ -283,7 +283,7 @@ Optionally, you can use the following command-line flags:
|
|||||||
|
|
||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
|--------------------------------------------|-------------|
|
|--------------------------------------------|-------------|
|
||||||
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, exllama_hf, exllamav2_hf, exllama, exllamav2, autogptq, gptq-for-llama, llama.cpp, llamacpp_hf, ctransformers, autoawq. |
|
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlama_HF, ExLlamav2_HF, AutoGPTQ, AutoAWQ, GPTQ-for-LLaMa, ExLlama, ExLlamav2, ctransformers. |
|
||||||
|
|
||||||
#### Accelerate/transformers
|
#### Accelerate/transformers
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ class Exllamav2Model:
|
|||||||
token, _, _ = ExLlamaV2Sampler.sample(logits, settings, ids, random.random(), self.tokenizer)
|
token, _, _ = ExLlamaV2Sampler.sample(logits, settings, ids, random.random(), self.tokenizer)
|
||||||
ids = torch.cat([ids, token], dim=1)
|
ids = torch.cat([ids, token], dim=1)
|
||||||
|
|
||||||
if i == 0 and self.tokenizer.tokenizer.IdToPiece(int(token)).startswith('▁'):
|
if i == 0 and self.tokenizer.tokenizer.id_to_piece(int(token)).startswith('▁'):
|
||||||
has_leading_space = True
|
has_leading_space = True
|
||||||
|
|
||||||
decoded_text = self.tokenizer.decode(ids[:, initial_len:], decode_special_tokens=not state['skip_special_tokens'])[0]
|
decoded_text = self.tokenizer.decode(ids[:, initial_len:], decode_special_tokens=not state['skip_special_tokens'])[0]
|
||||||
|
@ -71,7 +71,6 @@ loaders_and_params = OrderedDict({
|
|||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
'cfg_cache',
|
'cfg_cache',
|
||||||
'no_use_fast',
|
'no_use_fast',
|
||||||
'exllama_HF_info',
|
|
||||||
],
|
],
|
||||||
'ExLlamav2_HF': [
|
'ExLlamav2_HF': [
|
||||||
'gpu_split',
|
'gpu_split',
|
||||||
@ -133,6 +132,7 @@ loaders_and_params = OrderedDict({
|
|||||||
'cache_8bit',
|
'cache_8bit',
|
||||||
'alpha_value',
|
'alpha_value',
|
||||||
'compress_pos_emb',
|
'compress_pos_emb',
|
||||||
|
'exllamav2_info',
|
||||||
],
|
],
|
||||||
'ctransformers': [
|
'ctransformers': [
|
||||||
'n_ctx',
|
'n_ctx',
|
||||||
|
@ -43,7 +43,7 @@ settings = {
|
|||||||
'seed': -1,
|
'seed': -1,
|
||||||
'truncation_length': 2048,
|
'truncation_length': 2048,
|
||||||
'truncation_length_min': 0,
|
'truncation_length_min': 0,
|
||||||
'truncation_length_max': 32768,
|
'truncation_length_max': 200000,
|
||||||
'max_tokens_second': 0,
|
'max_tokens_second': 0,
|
||||||
'custom_stopping_strings': '',
|
'custom_stopping_strings': '',
|
||||||
'custom_token_bans': '',
|
'custom_token_bans': '',
|
||||||
@ -79,7 +79,7 @@ parser.add_argument('--verbose', action='store_true', help='Print the prompts to
|
|||||||
parser.add_argument('--chat-buttons', action='store_true', help='Show buttons on the chat tab instead of a hover menu.')
|
parser.add_argument('--chat-buttons', action='store_true', help='Show buttons on the chat tab instead of a hover menu.')
|
||||||
|
|
||||||
# Model loader
|
# Model loader
|
||||||
parser.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, exllama_hf, exllamav2_hf, exllama, exllamav2, autogptq, gptq-for-llama, llama.cpp, llamacpp_hf, ctransformers, autoawq.')
|
parser.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlama_HF, ExLlamav2_HF, AutoGPTQ, AutoAWQ, GPTQ-for-LLaMa, ExLlama, ExLlamav2, ctransformers.')
|
||||||
|
|
||||||
# Accelerate/transformers
|
# Accelerate/transformers
|
||||||
parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
|
parser.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
|
||||||
|
@ -165,7 +165,7 @@ def create_ui():
|
|||||||
stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
|
stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
max_length = gr.Slider(label='max_length', minimum=0, maximum=32768, value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
|
max_length = gr.Slider(label='max_length', minimum=0, maximum=shared.settings['truncation_length_max'], value=0, step=256, info='The context for each evaluation. If set to 0, the maximum context length for the model will be used.')
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu)
|
start_current_evaluation = gr.Button("Evaluate loaded model", interactive=not mu)
|
||||||
|
@ -95,7 +95,7 @@ def create_ui():
|
|||||||
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
|
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=shared.args.groupsize if shared.args.groupsize > 0 else "None")
|
||||||
shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None"], value=shared.args.model_type or "None")
|
shared.gradio['model_type'] = gr.Dropdown(label="model_type", choices=["None"], value=shared.args.model_type or "None")
|
||||||
shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0)
|
shared.gradio['pre_layer'] = gr.Slider(label="pre_layer", minimum=0, maximum=100, value=shared.args.pre_layer[0] if shared.args.pre_layer is not None else 0)
|
||||||
shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from LLaMA.')
|
shared.gradio['autogptq_info'] = gr.Markdown('* ExLlama_HF is recommended over AutoGPTQ for models derived from Llama.')
|
||||||
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
||||||
shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len)
|
shared.gradio['max_seq_len'] = gr.Slider(label='max_seq_len', minimum=0, maximum=shared.settings['truncation_length_max'], step=256, info='Context length. Try lowering this if you run out of memory while loading the model.', value=shared.args.max_seq_len)
|
||||||
shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.05, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value)
|
shared.gradio['alpha_value'] = gr.Slider(label='alpha_value', minimum=1, maximum=8, step=0.05, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.', value=shared.args.alpha_value)
|
||||||
@ -128,9 +128,9 @@ def create_ui():
|
|||||||
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn, info='Force flash-attention to not be used.')
|
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn, info='Force flash-attention to not be used.')
|
||||||
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
|
shared.gradio['cache_8bit'] = gr.Checkbox(label="cache_8bit", value=shared.args.cache_8bit, info='Use 8-bit cache to save VRAM.')
|
||||||
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
|
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
|
||||||
shared.gradio['gptq_for_llama_info'] = gr.Markdown('GPTQ-for-LLaMa support is currently only kept for compatibility with older GPUs. AutoGPTQ or ExLlama is preferred when compatible. GPTQ-for-LLaMa is installed by default with the webui on supported systems. Otherwise, it has to be installed manually following the instructions here: [instructions](https://github.com/oobabooga/text-generation-webui/blob/main/docs/GPTQ-models-(4-bit-mode).md#installation-1).')
|
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlama_HF or AutoGPTQ are preferred for GPTQ models when supported.')
|
||||||
shared.gradio['exllama_info'] = gr.Markdown('For more information, consult the [docs](https://github.com/oobabooga/text-generation-webui/wiki/04-%E2%80%90-Model-Tab#exllama_hf).')
|
shared.gradio['exllama_info'] = gr.Markdown("ExLlama_HF is recommended over ExLlama for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||||
shared.gradio['exllama_HF_info'] = gr.Markdown('ExLlama_HF is a wrapper that lets you use ExLlama like a Transformers model, which means it can use the Transformers samplers. It\'s a bit slower than the regular ExLlama.')
|
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer.\n\nOption 2: place your .gguf in a subfolder of models/ along with these 3 files: tokenizer.model, tokenizer_config.json, and special_tokens_map.json. This takes precedence over Option 1.')
|
shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer.\n\nOption 2: place your .gguf in a subfolder of models/ along with these 3 files: tokenizer.model, tokenizer_config.json, and special_tokens_map.json. This takes precedence over Option 1.')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8; platform_system != "Darwin" and platform_machine != "x86_64"
|
exllamav2==0.0.10; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
@ -53,14 +53,14 @@ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121
|
|||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8; platform_system != "Darwin" and platform_machine != "x86_64"
|
exllamav2==0.0.10; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
@ -53,14 +53,14 @@ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121
|
|||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
|
||||||
https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
https://github.com/turboderp/exllamav2/releases/download/v0.0.10/exllamav2-0.0.10+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||||
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
https://github.com/jllllll/flash-attention/releases/download/v2.3.4/flash_attn-2.3.4+cu121torch2.1cxx11abiFALSE-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
accelerate==0.24.*
|
accelerate==0.25.*
|
||||||
colorama
|
colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
exllamav2==0.0.8
|
exllamav2==0.0.10
|
||||||
gradio==3.50.*
|
gradio==3.50.*
|
||||||
markdown
|
markdown
|
||||||
numpy==1.24.*
|
numpy==1.24.*
|
||||||
|
@ -13,7 +13,7 @@ seed: -1
|
|||||||
negative_prompt: ''
|
negative_prompt: ''
|
||||||
truncation_length: 2048
|
truncation_length: 2048
|
||||||
truncation_length_min: 0
|
truncation_length_min: 0
|
||||||
truncation_length_max: 32768
|
truncation_length_max: 200000
|
||||||
custom_stopping_strings: ''
|
custom_stopping_strings: ''
|
||||||
auto_max_new_tokens: false
|
auto_max_new_tokens: false
|
||||||
max_tokens_second: 0
|
max_tokens_second: 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user