server : fix prompt caching with system prompt (#4914)

This commit is contained in:
Georgi Gerganov 2024-01-13 19:31:26 +02:00 committed by GitHub
parent f172de03f1
commit 0ea069b87b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1180,8 +1180,9 @@ struct llama_server_context
return slot.images.size() > 0; return slot.images.size() > 0;
} }
void send_error(task_server& task, std::string error) void send_error(task_server& task, const std::string &error)
{ {
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
std::unique_lock<std::mutex> lock(mutex_results); std::unique_lock<std::mutex> lock(mutex_results);
task_result res; task_result res;
res.id = task.id; res.id = task.id;
@ -1570,12 +1571,22 @@ struct llama_server_context
LOG_TEE("slot unavailable\n"); LOG_TEE("slot unavailable\n");
// send error result // send error result
send_error(task, "slot unavailable"); send_error(task, "slot unavailable");
return; break;
} }
if (task.data.contains("system_prompt")) if (task.data.contains("system_prompt"))
{ {
if (!all_slots_are_idle) {
send_error(task, "system prompt can only be updated when all slots are idle");
break;
}
process_system_prompt_data(task.data["system_prompt"]); process_system_prompt_data(task.data["system_prompt"]);
// reset cache_tokens for all slots
for (llama_client_slot &slot : slots)
{
slot.cache_tokens.clear();
}
} }
slot->reset(); slot->reset();
@ -1652,8 +1663,7 @@ struct llama_server_context
// attend tasks // attend tasks
process_tasks(); process_tasks();
// update the system prompt wait until all slots are idle state if (system_need_update)
if (system_need_update && all_slots_are_idle)
{ {
LOG_TEE("updating system prompt\n"); LOG_TEE("updating system prompt\n");
update_system_prompt(); update_system_prompt();