From 24ee66ed0d908d156bd0d1747b63a636a495cd7a Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Fri, 12 Apr 2024 13:49:21 +0200 Subject: [PATCH] server : coherent log output for KV cache full (#6637) --- examples/server/server.cpp | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2e791190b..b08a09a57 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1083,7 +1083,7 @@ struct server_context { }; if (llama_decode(ctx, batch_view) != 0) { - LOG_TEE("%s: llama_decode() failed\n", __func__); + LOG_ERROR("llama_decode() failed", {}); return; } } @@ -1281,7 +1281,11 @@ struct server_context { } void send_error(const int id_task, const int id_multi, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) { - LOG_TEE("task %i - error: %s\n", id_task, error.c_str()); + LOG_ERROR("task error", { + {"id_multi", id_multi}, + {"id_task", id_task}, + {"error", error}, + }); server_task_result res; res.id = id_task; @@ -2186,7 +2190,11 @@ struct server_context { if (ret != 0) { if (n_batch == 1 || ret < 0) { // if you get here, it means the KV cache is full - try increasing it via the context size - LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret); + LOG_ERROR("failed to decode the batch: KV cache is full - try increasing it via the context size", { + {"i", i}, + {"n_batch", ret}, + {"ret", ret}, + }); for (auto & slot : slots) { slot.state = SLOT_STATE_PROCESSING; slot.command = SLOT_COMMAND_NONE; @@ -2196,12 +2204,16 @@ struct server_context { break; // break loop of n_batch } - LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2); - // retry with half the batch size to try to find a free slot in the KV cache n_batch /= 2; i -= n_batch; + LOG_WARNING("failed to find free space in the KV cache, retrying with smaller batch size - try increasing it via the context size or enable defragmentation", { + {"i", i}, + {"n_batch", n_batch}, + {"ret", ret}, + }); + continue; // continue loop of n_batch }