From 7fe46764fb2d675c4e281592a1328293c0c56b07 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 10 Jan 2025 07:07:41 -0800 Subject: [PATCH] Improve the --help message about --tensorcores as well --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index f478df05..89263205 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -116,7 +116,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') -group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.') +group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')