server : some style changes

2025-01-04 01:57:53 +01:00 · 2023-11-24 10:49:08 +02:00 · 2023-11-24 10:49:08 +02:00 · f25308be5c
commit f25308be5c
parent e1516709f2
1 changed files with 53 additions and 57 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -385,7 +385,7 @@ struct llama_client_slot
    bool stopped_limit = false;

    bool oaicompat = false;
-    std::string oaicompat_model = "";
+    std::string oaicompat_model;

    std::string stopping_word;

@ -486,7 +486,7 @@ struct llama_client_slot
        };
    }

-    void print_timings() {
+    void print_timings() const {
        LOG_TEE("\n");
        LOG_TEE("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
            __func__, t_prompt_processing, num_prompt_tokens_processed, t_prompt_processing / num_prompt_tokens_processed, 1e3 / t_prompt_processing * num_prompt_tokens_processed);
@ -1284,7 +1284,7 @@ struct llama_server_context
        std::lock_guard<std::mutex> lock(mutex_tasks);
        task_server task;
        task.id = id_gen++;
-        task.data = data;
+        task.data = std::move(data);
        task.infill_mode = infill;
        task.embedding_mode = embedding;
        task.type = COMPLETION_TASK;
@ -2254,7 +2254,7 @@ json oaicompat_completion_params_parse(
    // Map OpenAI parameters to llama.cpp parameters
    llama_params["prompt"]            = format_chatml(body["messages"]); // OpenAI 'messages' to llama.cpp 'prompt'
    llama_params["temperature"]       = json_value(body, "temperature", 0.8);
-    llama_params["top_k"] = json_value(body, "max_tokens", 40);
+    llama_params["top_k"]             = json_value(body, "top_k", 40);
    llama_params["top_p"]             = json_value(body, "top_p", 0.95);
    llama_params["n_predict"]         = json_value(body, "max_tokens", -1);
    llama_params["logit_bias"]        = json_value(body, "logit_bias",json::object());
@ -2272,9 +2272,7 @@ json oaicompat_completion_params_parse(
    llama_params["tfs_z"]             = json_value(body, "tfs_z", 0.0);

    if (llama_params.count("grammar") != 0) {
-        llama_params["grammar"] = json_value(
-            body, "grammar",
-            json::object());
+        llama_params["grammar"] = json_value(body, "grammar", json::object());
    }

    // Handle 'stop' field
@ -2294,8 +2292,7 @@ json oaicompat_completion_params_parse(
    return llama_params;
 }

-static json format_final_response_oaicompat(json request, task_result response,
-                                            bool streaming = false)
+static json format_final_response_oaicompat(const json &request, const task_result &response, bool streaming = false)
 {
    json result = response.result_json;

@ -2345,7 +2342,7 @@ static json format_final_response_oaicompat(json request, task_result response,
 }

 // return value is vector as there is one case where we might need to generate two responses
-static std::vector<json> format_partial_response_oaicompat(task_result response) {
+static std::vector<json> format_partial_response_oaicompat(const task_result &response) {
    json result = response.result_json;

    if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
@ -2353,15 +2350,14 @@ static std::vector<json> format_partial_response_oaicompat(task_result response)
    }

    bool first = json_value(result, "oaicompat_token_ctr", 0) == 0;
-    std::string modelname =
-        json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
+    std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));

    bool stopped_word   = json_value(result, "stopped_word", false);
    bool stopped_eos    = json_value(result, "stopped_eos", false);
    bool stopped_limit  = json_value(result, "stopped_limit", false);
    std::string content = json_value(result, "content", std::string(""));

-    std::string finish_reason = "";
+    std::string finish_reason;
    if (stopped_word || stopped_eos) {
        finish_reason = "stop";
    }
@ -2655,8 +2651,8 @@ int main(int argc, char **argv)
            });


-    svr.Post("/v1/chat/completions", [&llama](const httplib::Request &req,
-                                            httplib::Response &res)
+    // TODO: add mount point without "/v1" prefix -- how?
+    svr.Post("/v1/chat/completions", [&llama](const httplib::Request &req, httplib::Response &res)
            {
                json data = oaicompat_completion_params_parse(json::parse(req.body));