mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 16:17:57 +01:00
Remove flexgen 2
This commit is contained in:
parent
75c2dd38cf
commit
77d2e9f060
10
README.md
10
README.md
@ -178,7 +178,7 @@ Optionally, you can use the following command-line flags:
|
|||||||
|
|
||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
|--------------------------------------------|-------------|
|
|--------------------------------------------|-------------|
|
||||||
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, flexgen |
|
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv |
|
||||||
|
|
||||||
#### Accelerate/transformers
|
#### Accelerate/transformers
|
||||||
|
|
||||||
@ -255,14 +255,6 @@ Optionally, you can use the following command-line flags:
|
|||||||
| `--warmup_autotune` | (triton) Enable warmup autotune. |
|
| `--warmup_autotune` | (triton) Enable warmup autotune. |
|
||||||
| `--fused_mlp` | (triton) Enable fused mlp. |
|
| `--fused_mlp` | (triton) Enable fused mlp. |
|
||||||
|
|
||||||
#### FlexGen
|
|
||||||
|
|
||||||
| Flag | Description |
|
|
||||||
|------------------|-------------|
|
|
||||||
| `--percent PERCENT [PERCENT ...]` | FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0). |
|
|
||||||
| `--compress-weight` | FlexGen: Whether to compress weight (default: False).|
|
|
||||||
| `--pin-weight [PIN_WEIGHT]` | FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%). |
|
|
||||||
|
|
||||||
#### DeepSpeed
|
#### DeepSpeed
|
||||||
|
|
||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
|
@ -8,7 +8,6 @@
|
|||||||
* [Docker](Docker.md)
|
* [Docker](Docker.md)
|
||||||
* [ExLlama](ExLlama.md)
|
* [ExLlama](ExLlama.md)
|
||||||
* [Extensions](Extensions.md)
|
* [Extensions](Extensions.md)
|
||||||
* [FlexGen](FlexGen.md)
|
|
||||||
* [Generation parameters](Generation-parameters.md)
|
* [Generation parameters](Generation-parameters.md)
|
||||||
* [GGML (llama.cpp) models](GGML-llama.cpp-models.md)
|
* [GGML (llama.cpp) models](GGML-llama.cpp-models.md)
|
||||||
* [GPT-4chan model](GPT-4chan-model.md)
|
* [GPT-4chan model](GPT-4chan-model.md)
|
||||||
|
@ -155,11 +155,6 @@ parser.add_argument('--desc_act', action='store_true', help='For models that don
|
|||||||
parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
|
parser.add_argument('--gpu-split', type=str, help="Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. 20,7,7")
|
||||||
parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
|
parser.add_argument('--max_seq_len', type=int, default=2048, help="Maximum sequence length.")
|
||||||
|
|
||||||
# FlexGen
|
|
||||||
parser.add_argument('--percent', type=int, nargs="+", default=[0, 100, 100, 0, 100, 0], help='FlexGen: allocation percentages. Must be 6 numbers separated by spaces (default: 0, 100, 100, 0, 100, 0).')
|
|
||||||
parser.add_argument("--compress-weight", action="store_true", help="FlexGen: activate weight compression.")
|
|
||||||
parser.add_argument("--pin-weight", type=str2bool, nargs="?", const=True, default=True, help="FlexGen: whether to pin weights (setting this to False reduces CPU memory by 20%%).")
|
|
||||||
|
|
||||||
# DeepSpeed
|
# DeepSpeed
|
||||||
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
|
parser.add_argument('--deepspeed', action='store_true', help='Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration.')
|
||||||
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
|
parser.add_argument('--nvme-offload-dir', type=str, help='DeepSpeed: Directory to use for ZeRO-3 NVME offloading.')
|
||||||
|
@ -3,7 +3,6 @@ colorama
|
|||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.95.2
|
fastapi==0.95.2
|
||||||
flexgen==0.1.7
|
|
||||||
gradio_client==0.2.5
|
gradio_client==0.2.5
|
||||||
gradio==3.33.1
|
gradio==3.33.1
|
||||||
markdown
|
markdown
|
||||||
|
Loading…
Reference in New Issue
Block a user