mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-12-25 05:48:55 +01:00
Document --pre_layer
This commit is contained in:
parent
4c9ed09270
commit
b24147c7ca
@ -212,7 +212,7 @@ Optionally, you can use the following command-line flags:
|
|||||||
| `--wbits WBITS` | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
|
| `--wbits WBITS` | GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
|
||||||
| `--model_type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
|
| `--model_type MODEL_TYPE` | GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
|
||||||
| `--groupsize GROUPSIZE` | GPTQ: Group size. |
|
| `--groupsize GROUPSIZE` | GPTQ: Group size. |
|
||||||
| `--pre_layer PRE_LAYER` | GPTQ: The number of layers to preload. |
|
| `--pre_layer PRE_LAYER` | GPTQ: The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. |
|
||||||
|
|
||||||
#### FlexGen
|
#### FlexGen
|
||||||
|
|
||||||
|
@ -102,7 +102,7 @@ parser.add_argument('--threads', type=int, default=0, help='Number of threads to
|
|||||||
parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
|
parser.add_argument('--wbits', type=int, default=0, help='GPTQ: Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
|
||||||
parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported.')
|
parser.add_argument('--model_type', type=str, help='GPTQ: Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported.')
|
||||||
parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.')
|
parser.add_argument('--groupsize', type=int, default=-1, help='GPTQ: Group size.')
|
||||||
parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to preload.')
|
parser.add_argument('--pre_layer', type=int, default=0, help='GPTQ: The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models.')
|
||||||
parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --wbits instead.')
|
parser.add_argument('--gptq-bits', type=int, default=0, help='DEPRECATED: use --wbits instead.')
|
||||||
parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.')
|
parser.add_argument('--gptq-model-type', type=str, help='DEPRECATED: use --model_type instead.')
|
||||||
parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.')
|
parser.add_argument('--gptq-pre-layer', type=int, default=0, help='DEPRECATED: use --pre_layer instead.')
|
||||||
|
Loading…
Reference in New Issue
Block a user