mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
metal : replace loop of dispatch_async with dispatch_apply (#4934)
* Replace loop of dispatch_async with dispatch_apply * Update ggml-metal.m --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
7c8d3abd1a
commit
3a48d558a6
@ -737,10 +737,10 @@ static bool ggml_metal_graph_compute(
|
|||||||
ctx->command_encoders[i] = [ctx->command_buffers[i] computeCommandEncoderWithDescriptor: edesc];
|
ctx->command_encoders[i] = [ctx->command_buffers[i] computeCommandEncoderWithDescriptor: edesc];
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
|
|
||||||
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
||||||
|
dispatch_apply(n_cb, ctx->d_queue, ^(size_t iter) {
|
||||||
|
const int cb_idx = iter;
|
||||||
|
|
||||||
dispatch_async(ctx->d_queue, ^{
|
|
||||||
size_t offs_src0 = 0;
|
size_t offs_src0 = 0;
|
||||||
size_t offs_src1 = 0;
|
size_t offs_src1 = 0;
|
||||||
size_t offs_dst = 0;
|
size_t offs_dst = 0;
|
||||||
@ -2248,10 +2248,6 @@ static bool ggml_metal_graph_compute(
|
|||||||
|
|
||||||
[command_buffer commit];
|
[command_buffer commit];
|
||||||
});
|
});
|
||||||
}
|
|
||||||
|
|
||||||
// wait for all threads to finish
|
|
||||||
dispatch_barrier_sync(ctx->d_queue, ^{});
|
|
||||||
|
|
||||||
// check status of command buffers
|
// check status of command buffers
|
||||||
// needed to detect if the device ran out-of-memory for example (#1881)
|
// needed to detect if the device ran out-of-memory for example (#1881)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user