mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
CUDA: fixed row rounding for 0 tensor splits (#4594)
This commit is contained in:
parent
7082d24cec
commit
e0a4002273
@ -7937,15 +7937,19 @@ static void ggml_cuda_op_mul_mat(
|
|||||||
|
|
||||||
if (id != 0) {
|
if (id != 0) {
|
||||||
row_low[id] = ne01*g_tensor_split[id];
|
row_low[id] = ne01*g_tensor_split[id];
|
||||||
|
if (row_low[id] < ne01) {
|
||||||
row_low[id] -= row_low[id] % rounding;
|
row_low[id] -= row_low[id] % rounding;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (id != g_device_count - 1) {
|
if (id != g_device_count - 1) {
|
||||||
row_high[id] = ne01*g_tensor_split[id + 1];
|
row_high[id] = ne01*g_tensor_split[id + 1];
|
||||||
|
if (row_high[id] < ne01) {
|
||||||
row_high[id] -= row_high[id] % rounding;
|
row_high[id] -= row_high[id] % rounding;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for (int64_t id = 0; id < g_device_count; ++id) {
|
for (int64_t id = 0; id < g_device_count; ++id) {
|
||||||
if ((!split && id != g_main_device) || row_low[id] == row_high[id]) {
|
if ((!split && id != g_main_device) || row_low[id] == row_high[id]) {
|
||||||
|
Loading…
Reference in New Issue
Block a user