mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
metal : replace loop of dispatch_async with dispatch_apply (#4934)
* Replace loop of dispatch_async with dispatch_apply * Update ggml-metal.m --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
7c8d3abd1a
commit
3a48d558a6
@ -737,10 +737,10 @@ static bool ggml_metal_graph_compute(
|
||||
ctx->command_encoders[i] = [ctx->command_buffers[i] computeCommandEncoderWithDescriptor: edesc];
|
||||
}
|
||||
|
||||
for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
|
||||
const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
|
||||
dispatch_apply(n_cb, ctx->d_queue, ^(size_t iter) {
|
||||
const int cb_idx = iter;
|
||||
|
||||
dispatch_async(ctx->d_queue, ^{
|
||||
size_t offs_src0 = 0;
|
||||
size_t offs_src1 = 0;
|
||||
size_t offs_dst = 0;
|
||||
@ -2248,10 +2248,6 @@ static bool ggml_metal_graph_compute(
|
||||
|
||||
[command_buffer commit];
|
||||
});
|
||||
}
|
||||
|
||||
// wait for all threads to finish
|
||||
dispatch_barrier_sync(ctx->d_queue, ^{});
|
||||
|
||||
// check status of command buffers
|
||||
// needed to detect if the device ran out-of-memory for example (#1881)
|
||||
|
Loading…
Reference in New Issue
Block a user