mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-29 10:59:32 +01:00
add n_batch support for llama.cpp (#1115)
This commit is contained in:
parent
2f6e2ddeac
commit
78d1977ebf
@ -220,6 +220,7 @@ Optionally, you can use the following command-line flags:
|
|||||||
| Flag | Description |
|
| Flag | Description |
|
||||||
|-------------|-------------|
|
|-------------|-------------|
|
||||||
| `--threads` | Number of threads to use in llama.cpp. |
|
| `--threads` | Number of threads to use in llama.cpp. |
|
||||||
|
| `--n_batch` | Processing batch size for llama.cpp. |
|
||||||
|
|
||||||
#### GPTQ
|
#### GPTQ
|
||||||
|
|
||||||
|
@ -24,7 +24,8 @@ class LlamaCppModel:
|
|||||||
'model_path': str(path),
|
'model_path': str(path),
|
||||||
'n_ctx': 2048,
|
'n_ctx': 2048,
|
||||||
'seed': 0,
|
'seed': 0,
|
||||||
'n_threads': shared.args.threads or None
|
'n_threads': shared.args.threads or None,
|
||||||
|
'n_batch': shared.args.n_batch
|
||||||
}
|
}
|
||||||
self.model = Llama(**params)
|
self.model = Llama(**params)
|
||||||
self.model.set_cache(LlamaCache)
|
self.model.set_cache(LlamaCache)
|
||||||
|
@ -119,6 +119,7 @@ parser.add_argument('--trust-remote-code', action='store_true', help="Set trust_
|
|||||||
|
|
||||||
# llama.cpp
|
# llama.cpp
|
||||||
parser.add_argument('--threads', type=int, default=0, help='Number of threads to use in llama.cpp.')
|
parser.add_argument('--threads', type=int, default=0, help='Number of threads to use in llama.cpp.')
|
||||||
|
parser.add_argument('--n_batch', type=int, default=8, help='Processing batch size for llama.cpp.')
|
||||||
|
|
||||||
# GPTQ
|
# GPTQ
|
||||||
parser.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
|
parser.add_argument('--wbits', type=int, default=0, help='Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.')
|
||||||
|
Loading…
Reference in New Issue
Block a user