mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-24 10:29:21 +01:00
metal : utilize max shared memory for mul_mat_id (#7935)
This commit is contained in:
parent
e65bbf606c
commit
66ef1ceedf
@ -1862,9 +1862,10 @@ static enum ggml_status ggml_metal_graph_compute(
|
|||||||
// ne21 = n_rows
|
// ne21 = n_rows
|
||||||
const int dst_rows = ne20*ne21;
|
const int dst_rows = ne20*ne21;
|
||||||
const int dst_rows_min = n_as;
|
const int dst_rows_min = n_as;
|
||||||
|
const int dst_rows_max = (ctx->device.maxThreadgroupMemoryLength - 32 - 8192)/4;
|
||||||
|
|
||||||
// max size of the rowids array in the kernel shared buffer
|
// max size of the rowids array in the kernel shared buffer
|
||||||
GGML_ASSERT(dst_rows <= 2048);
|
GGML_ASSERT(dst_rows <= dst_rows_max);
|
||||||
|
|
||||||
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
// for now the matrix-matrix multiplication kernel only works on A14+/M1+ SoCs
|
||||||
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
// AMD GPU and older A-chips will reuse matrix-vector multiplication kernel
|
||||||
|
Loading…
Reference in New Issue
Block a user