mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
llamafile : disable sgemm for batch-size 1 (#9330)
This commit is contained in:
parent
1b9ae5189c
commit
e536426ded
@ -1006,6 +1006,10 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
|
||||
assert(nth > 0);
|
||||
assert(ith < nth);
|
||||
|
||||
// only enable sgemm for prompt processing
|
||||
if (n < 2)
|
||||
return false;
|
||||
|
||||
if (Ctype != GGML_TYPE_F32)
|
||||
return false;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user