mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2024-11-22 08:07:56 +01:00
Change some comments
This commit is contained in:
parent
8aeae3b3f4
commit
3361728da1
16
README.md
16
README.md
@ -269,16 +269,16 @@ Optionally, you can use the following command-line flags:
|
||||
|
||||
#### llama.cpp
|
||||
|
||||
| Flag | Description |
|
||||
|-------------|-------------|
|
||||
| `--no-mmap` | Prevent mmap from being used. |
|
||||
| `--mlock` | Force the system to keep the model in RAM. |
|
||||
| Flag | Description |
|
||||
|---------------|---------------|
|
||||
| `--no-mmap` | Prevent mmap from being used. |
|
||||
| `--mlock` | Force the system to keep the model in RAM. |
|
||||
| `--mul_mat_q` | Activate new mulmat kernels. |
|
||||
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
|
||||
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
|
||||
| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). |
|
||||
| `--n_gqa N_GQA` | grouped-query attention. Must be 8 for llama-2 70b. |
|
||||
| `--rms_norm_eps RMS_NORM_EPS` | 5e-6 is a good value for llama-2 models. |
|
||||
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
|
||||
| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). |
|
||||
| `--n_gqa N_GQA` | GGML only (not used by GGUF): Grouped-Query Attention. Must be 8 for llama-2 70b. |
|
||||
| `--rms_norm_eps RMS_NORM_EPS` | GGML only (not used by GGUF): 5e-6 is a good value for llama-2 models. |
|
||||
| `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. |
|
||||
|`--cfg-cache` | llamacpp_HF: Create an additional cache for CFG negative prompts. |
|
||||
|
||||
|
@ -80,8 +80,8 @@ def create_ui():
|
||||
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=16384, step=256, label="n_ctx", value=shared.args.n_ctx)
|
||||
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads)
|
||||
shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, value=shared.args.n_batch)
|
||||
shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='grouped-query attention. Must be 8 for llama-2 70b.')
|
||||
shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.rms_norm_eps, info='5e-6 is a good value for llama-2 models.')
|
||||
shared.gradio['n_gqa'] = gr.Slider(minimum=0, maximum=16, step=1, label="n_gqa", value=shared.args.n_gqa, info='GGML only (not used by GGUF): Grouped-Query Attention. Must be 8 for llama-2 70b.')
|
||||
shared.gradio['rms_norm_eps'] = gr.Slider(minimum=0, maximum=1e-5, step=1e-6, label="rms_norm_eps", value=shared.args.rms_norm_eps, info='GGML only (not used by GGUF): 5e-6 is a good value for llama-2 models.')
|
||||
|
||||
shared.gradio['wbits'] = gr.Dropdown(label="wbits", choices=["None", 1, 2, 3, 4, 8], value=str(shared.args.wbits) if shared.args.wbits > 0 else "None")
|
||||
shared.gradio['groupsize'] = gr.Dropdown(label="groupsize", choices=["None", 32, 64, 128, 1024], value=str(shared.args.groupsize) if shared.args.groupsize > 0 else "None")
|
||||
|
Loading…
Reference in New Issue
Block a user