mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
server : fix prompt caching with system prompt (#4914)
This commit is contained in:
parent
f172de03f1
commit
0ea069b87b
@ -1180,8 +1180,9 @@ struct llama_server_context
|
|||||||
return slot.images.size() > 0;
|
return slot.images.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void send_error(task_server& task, std::string error)
|
void send_error(task_server& task, const std::string &error)
|
||||||
{
|
{
|
||||||
|
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
||||||
std::unique_lock<std::mutex> lock(mutex_results);
|
std::unique_lock<std::mutex> lock(mutex_results);
|
||||||
task_result res;
|
task_result res;
|
||||||
res.id = task.id;
|
res.id = task.id;
|
||||||
@ -1570,12 +1571,22 @@ struct llama_server_context
|
|||||||
LOG_TEE("slot unavailable\n");
|
LOG_TEE("slot unavailable\n");
|
||||||
// send error result
|
// send error result
|
||||||
send_error(task, "slot unavailable");
|
send_error(task, "slot unavailable");
|
||||||
return;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (task.data.contains("system_prompt"))
|
if (task.data.contains("system_prompt"))
|
||||||
{
|
{
|
||||||
|
if (!all_slots_are_idle) {
|
||||||
|
send_error(task, "system prompt can only be updated when all slots are idle");
|
||||||
|
break;
|
||||||
|
}
|
||||||
process_system_prompt_data(task.data["system_prompt"]);
|
process_system_prompt_data(task.data["system_prompt"]);
|
||||||
|
|
||||||
|
// reset cache_tokens for all slots
|
||||||
|
for (llama_client_slot &slot : slots)
|
||||||
|
{
|
||||||
|
slot.cache_tokens.clear();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
slot->reset();
|
slot->reset();
|
||||||
@ -1652,8 +1663,7 @@ struct llama_server_context
|
|||||||
// attend tasks
|
// attend tasks
|
||||||
process_tasks();
|
process_tasks();
|
||||||
|
|
||||||
// update the system prompt wait until all slots are idle state
|
if (system_need_update)
|
||||||
if (system_need_update && all_slots_are_idle)
|
|
||||||
{
|
{
|
||||||
LOG_TEE("updating system prompt\n");
|
LOG_TEE("updating system prompt\n");
|
||||||
update_system_prompt();
|
update_system_prompt();
|
||||||
|
Loading…
Reference in New Issue
Block a user