mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
JSON: [key] -> .at(key), assert() -> GGML_ASSERT (#7143)
This commit is contained in:
parent
9da243b36a
commit
c12452c7ae
@ -1,4 +1,6 @@
|
|||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||||
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
#include "json-schema-to-grammar.h"
|
#include "json-schema-to-grammar.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
@ -1969,18 +1971,18 @@ static bool llama_download_file(const std::string & url, const std::string & pat
|
|||||||
try {
|
try {
|
||||||
metadata_in >> metadata;
|
metadata_in >> metadata;
|
||||||
fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
||||||
if (metadata.contains("url") && metadata["url"].is_string()) {
|
if (metadata.contains("url") && metadata.at("url").is_string()) {
|
||||||
auto previous_url = metadata["url"].get<std::string>();
|
auto previous_url = metadata.at("url").get<std::string>();
|
||||||
if (previous_url != url) {
|
if (previous_url != url) {
|
||||||
fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
|
fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (metadata.contains("etag") && metadata["etag"].is_string()) {
|
if (metadata.contains("etag") && metadata.at("etag").is_string()) {
|
||||||
etag = metadata["etag"];
|
etag = metadata.at("etag");
|
||||||
}
|
}
|
||||||
if (metadata.contains("lastModified") && metadata["lastModified"].is_string()) {
|
if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) {
|
||||||
last_modified = metadata["lastModified"];
|
last_modified = metadata.at("lastModified");
|
||||||
}
|
}
|
||||||
} catch (const nlohmann::json::exception & e) {
|
} catch (const nlohmann::json::exception & e) {
|
||||||
fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||||
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
|
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
// increase max payload length to allow use of larger context size
|
// increase max payload length to allow use of larger context size
|
||||||
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
|
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
|
||||||
#include "httplib.h"
|
#include "httplib.h"
|
||||||
|
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||||
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
// auto generated files (update with ./deps.sh)
|
// auto generated files (update with ./deps.sh)
|
||||||
@ -859,7 +861,7 @@ struct server_context {
|
|||||||
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
slot.sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
|
||||||
|
|
||||||
// process "json_schema" and "grammar"
|
// process "json_schema" and "grammar"
|
||||||
if (data.contains("json_schema") && !data["json_schema"].is_null() && data.contains("grammar") && !data["grammar"].is_null()) {
|
if (data.contains("json_schema") && !data.at("json_schema").is_null() && data.contains("grammar") && !data.at("grammar").is_null()) {
|
||||||
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
|
send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST);
|
||||||
return false;
|
return false;
|
||||||
} else if (data.contains("json_schema") && !data.contains("grammar")) {
|
} else if (data.contains("json_schema") && !data.contains("grammar")) {
|
||||||
@ -1512,7 +1514,7 @@ struct server_context {
|
|||||||
// add subtasks
|
// add subtasks
|
||||||
for (int i = 0; i < prompt_count; i++) {
|
for (int i = 0; i < prompt_count; i++) {
|
||||||
json subtask_data = multiprompt_task.data;
|
json subtask_data = multiprompt_task.data;
|
||||||
subtask_data["prompt"] = subtask_data["prompt"][i];
|
subtask_data["prompt"] = subtask_data.at("prompt")[i];
|
||||||
|
|
||||||
// subtasks inherit everything else (infill mode, embedding mode, etc.)
|
// subtasks inherit everything else (infill mode, embedding mode, etc.)
|
||||||
request_completion(subtask_ids[i], id_multi, subtask_data, multiprompt_task.infill, multiprompt_task.embedding);
|
request_completion(subtask_ids[i], id_multi, subtask_data, multiprompt_task.infill, multiprompt_task.embedding);
|
||||||
@ -1532,7 +1534,7 @@ struct server_context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (task.data.contains("system_prompt")) {
|
if (task.data.contains("system_prompt")) {
|
||||||
system_prompt_set(task.data["system_prompt"]);
|
system_prompt_set(task.data.at("system_prompt"));
|
||||||
|
|
||||||
for (server_slot & slot : slots) {
|
for (server_slot & slot : slots) {
|
||||||
slot.n_past = 0;
|
slot.n_past = 0;
|
||||||
@ -1644,7 +1646,7 @@ struct server_context {
|
|||||||
} break;
|
} break;
|
||||||
case SERVER_TASK_TYPE_SLOT_SAVE:
|
case SERVER_TASK_TYPE_SLOT_SAVE:
|
||||||
{
|
{
|
||||||
int id_slot = task.data["id_slot"];
|
int id_slot = task.data.at("id_slot");
|
||||||
server_slot * slot = get_slot(id_slot);
|
server_slot * slot = get_slot(id_slot);
|
||||||
if (slot == nullptr) {
|
if (slot == nullptr) {
|
||||||
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
|
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
|
||||||
@ -1654,8 +1656,8 @@ struct server_context {
|
|||||||
const size_t token_count = slot->cache_tokens.size();
|
const size_t token_count = slot->cache_tokens.size();
|
||||||
const int64_t t_start = ggml_time_us();
|
const int64_t t_start = ggml_time_us();
|
||||||
|
|
||||||
std::string filename = task.data["filename"];
|
std::string filename = task.data.at("filename");
|
||||||
std::string filepath = task.data["filepath"];
|
std::string filepath = task.data.at("filepath");
|
||||||
|
|
||||||
const size_t nwrite = llama_state_seq_save_file(ctx, filepath.c_str(), slot->id + 1, slot->cache_tokens.data(), token_count);
|
const size_t nwrite = llama_state_seq_save_file(ctx, filepath.c_str(), slot->id + 1, slot->cache_tokens.data(), token_count);
|
||||||
|
|
||||||
@ -1679,7 +1681,7 @@ struct server_context {
|
|||||||
} break;
|
} break;
|
||||||
case SERVER_TASK_TYPE_SLOT_RESTORE:
|
case SERVER_TASK_TYPE_SLOT_RESTORE:
|
||||||
{
|
{
|
||||||
int id_slot = task.data["id_slot"];
|
int id_slot = task.data.at("id_slot");
|
||||||
server_slot * slot = get_slot(id_slot);
|
server_slot * slot = get_slot(id_slot);
|
||||||
if (slot == nullptr) {
|
if (slot == nullptr) {
|
||||||
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
|
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
|
||||||
@ -1688,8 +1690,8 @@ struct server_context {
|
|||||||
|
|
||||||
const int64_t t_start = ggml_time_us();
|
const int64_t t_start = ggml_time_us();
|
||||||
|
|
||||||
std::string filename = task.data["filename"];
|
std::string filename = task.data.at("filename");
|
||||||
std::string filepath = task.data["filepath"];
|
std::string filepath = task.data.at("filepath");
|
||||||
|
|
||||||
slot->cache_tokens.resize(slot->n_ctx);
|
slot->cache_tokens.resize(slot->n_ctx);
|
||||||
size_t token_count = 0;
|
size_t token_count = 0;
|
||||||
@ -1721,7 +1723,7 @@ struct server_context {
|
|||||||
} break;
|
} break;
|
||||||
case SERVER_TASK_TYPE_SLOT_ERASE:
|
case SERVER_TASK_TYPE_SLOT_ERASE:
|
||||||
{
|
{
|
||||||
int id_slot = task.data["id_slot"];
|
int id_slot = task.data.at("id_slot");
|
||||||
server_slot * slot = get_slot(id_slot);
|
server_slot * slot = get_slot(id_slot);
|
||||||
if (slot == nullptr) {
|
if (slot == nullptr) {
|
||||||
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
|
send_error(task, "Invalid slot ID", ERROR_TYPE_INVALID_REQUEST);
|
||||||
@ -3136,8 +3138,8 @@ int main(int argc, char ** argv) {
|
|||||||
server_task_result result = ctx_server.queue_results.recv(task.id);
|
server_task_result result = ctx_server.queue_results.recv(task.id);
|
||||||
ctx_server.queue_results.remove_waiting_task_id(task.id);
|
ctx_server.queue_results.remove_waiting_task_id(task.id);
|
||||||
|
|
||||||
const int n_idle_slots = result.data["idle"];
|
const int n_idle_slots = result.data.at("idle");
|
||||||
const int n_processing_slots = result.data["processing"];
|
const int n_processing_slots = result.data.at("processing");
|
||||||
|
|
||||||
json health = {
|
json health = {
|
||||||
{"status", "ok"},
|
{"status", "ok"},
|
||||||
@ -3147,7 +3149,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
res.status = 200; // HTTP OK
|
res.status = 200; // HTTP OK
|
||||||
if (sparams.slots_endpoint && req.has_param("include_slots")) {
|
if (sparams.slots_endpoint && req.has_param("include_slots")) {
|
||||||
health["slots"] = result.data["slots"];
|
health["slots"] = result.data.at("slots");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (n_idle_slots == 0) {
|
if (n_idle_slots == 0) {
|
||||||
@ -3191,7 +3193,7 @@ int main(int argc, char ** argv) {
|
|||||||
server_task_result result = ctx_server.queue_results.recv(task.id);
|
server_task_result result = ctx_server.queue_results.recv(task.id);
|
||||||
ctx_server.queue_results.remove_waiting_task_id(task.id);
|
ctx_server.queue_results.remove_waiting_task_id(task.id);
|
||||||
|
|
||||||
res.set_content(result.data["slots"].dump(), "application/json");
|
res.set_content(result.data.at("slots").dump(), "application/json");
|
||||||
res.status = 200; // HTTP OK
|
res.status = 200; // HTTP OK
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3218,32 +3220,32 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
json data = result.data;
|
json data = result.data;
|
||||||
|
|
||||||
const uint64_t n_prompt_tokens_processed = data["n_prompt_tokens_processed"];
|
const uint64_t n_prompt_tokens_processed = data.at("n_prompt_tokens_processed");
|
||||||
const uint64_t t_prompt_processing = data["t_prompt_processing"];
|
const uint64_t t_prompt_processing = data.at("t_prompt_processing");
|
||||||
|
|
||||||
const uint64_t n_tokens_predicted = data["n_tokens_predicted"];
|
const uint64_t n_tokens_predicted = data.at("n_tokens_predicted");
|
||||||
const uint64_t t_tokens_generation = data["t_tokens_generation"];
|
const uint64_t t_tokens_generation = data.at("t_tokens_generation");
|
||||||
|
|
||||||
const int32_t kv_cache_used_cells = data["kv_cache_used_cells"];
|
const int32_t kv_cache_used_cells = data.at("kv_cache_used_cells");
|
||||||
|
|
||||||
// metrics definition: https://prometheus.io/docs/practices/naming/#metric-names
|
// metrics definition: https://prometheus.io/docs/practices/naming/#metric-names
|
||||||
json all_metrics_def = json {
|
json all_metrics_def = json {
|
||||||
{"counter", {{
|
{"counter", {{
|
||||||
{"name", "prompt_tokens_total"},
|
{"name", "prompt_tokens_total"},
|
||||||
{"help", "Number of prompt tokens processed."},
|
{"help", "Number of prompt tokens processed."},
|
||||||
{"value", (uint64_t) data["n_prompt_tokens_processed_total"]}
|
{"value", (uint64_t) data.at("n_prompt_tokens_processed_total")}
|
||||||
}, {
|
}, {
|
||||||
{"name", "prompt_seconds_total"},
|
{"name", "prompt_seconds_total"},
|
||||||
{"help", "Prompt process time"},
|
{"help", "Prompt process time"},
|
||||||
{"value", (uint64_t) data["t_prompt_processing_total"] / 1.e3}
|
{"value", (uint64_t) data.at("t_prompt_processing_total") / 1.e3}
|
||||||
}, {
|
}, {
|
||||||
{"name", "tokens_predicted_total"},
|
{"name", "tokens_predicted_total"},
|
||||||
{"help", "Number of generation tokens processed."},
|
{"help", "Number of generation tokens processed."},
|
||||||
{"value", (uint64_t) data["n_tokens_predicted_total"]}
|
{"value", (uint64_t) data.at("n_tokens_predicted_total")}
|
||||||
}, {
|
}, {
|
||||||
{"name", "tokens_predicted_seconds_total"},
|
{"name", "tokens_predicted_seconds_total"},
|
||||||
{"help", "Predict process time"},
|
{"help", "Predict process time"},
|
||||||
{"value", (uint64_t) data["t_tokens_generation_total"] / 1.e3}
|
{"value", (uint64_t) data.at("t_tokens_generation_total") / 1.e3}
|
||||||
}}},
|
}}},
|
||||||
{"gauge", {{
|
{"gauge", {{
|
||||||
{"name", "prompt_tokens_seconds"},
|
{"name", "prompt_tokens_seconds"},
|
||||||
@ -3260,15 +3262,15 @@ int main(int argc, char ** argv) {
|
|||||||
},{
|
},{
|
||||||
{"name", "kv_cache_tokens"},
|
{"name", "kv_cache_tokens"},
|
||||||
{"help", "KV-cache tokens."},
|
{"help", "KV-cache tokens."},
|
||||||
{"value", (uint64_t) data["kv_cache_tokens_count"]}
|
{"value", (uint64_t) data.at("kv_cache_tokens_count")}
|
||||||
},{
|
},{
|
||||||
{"name", "requests_processing"},
|
{"name", "requests_processing"},
|
||||||
{"help", "Number of request processing."},
|
{"help", "Number of request processing."},
|
||||||
{"value", (uint64_t) data["processing"]}
|
{"value", (uint64_t) data.at("processing")}
|
||||||
},{
|
},{
|
||||||
{"name", "requests_deferred"},
|
{"name", "requests_deferred"},
|
||||||
{"help", "Number of request deferred."},
|
{"help", "Number of request deferred."},
|
||||||
{"value", (uint64_t) data["deferred"]}
|
{"value", (uint64_t) data.at("deferred")}
|
||||||
}}}
|
}}}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3279,8 +3281,8 @@ int main(int argc, char ** argv) {
|
|||||||
const auto & metrics_def = el.value();
|
const auto & metrics_def = el.value();
|
||||||
|
|
||||||
for (const auto & metric_def : metrics_def) {
|
for (const auto & metric_def : metrics_def) {
|
||||||
const std::string name = metric_def["name"];
|
const std::string name = metric_def.at("name");
|
||||||
const std::string help = metric_def["help"];
|
const std::string help = metric_def.at("help");
|
||||||
|
|
||||||
auto value = json_value(metric_def, "value", 0.);
|
auto value = json_value(metric_def, "value", 0.);
|
||||||
prometheus << "# HELP llamacpp:" << name << " " << help << "\n"
|
prometheus << "# HELP llamacpp:" << name << " " << help << "\n"
|
||||||
@ -3289,7 +3291,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const int64_t t_start = data["t_start"];
|
const int64_t t_start = data.at("t_start");
|
||||||
res.set_header("Process-Start-Time-Unix", std::to_string(t_start));
|
res.set_header("Process-Start-Time-Unix", std::to_string(t_start));
|
||||||
|
|
||||||
res.set_content(prometheus.str(), "text/plain; version=0.0.4");
|
res.set_content(prometheus.str(), "text/plain; version=0.0.4");
|
||||||
@ -3298,7 +3300,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||||
json request_data = json::parse(req.body);
|
json request_data = json::parse(req.body);
|
||||||
std::string filename = request_data["filename"];
|
std::string filename = request_data.at("filename");
|
||||||
if (!validate_file_name(filename)) {
|
if (!validate_file_name(filename)) {
|
||||||
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return;
|
return;
|
||||||
@ -3328,7 +3330,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||||
json request_data = json::parse(req.body);
|
json request_data = json::parse(req.body);
|
||||||
std::string filename = request_data["filename"];
|
std::string filename = request_data.at("filename");
|
||||||
if (!validate_file_name(filename)) {
|
if (!validate_file_name(filename)) {
|
||||||
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return;
|
return;
|
||||||
@ -3648,7 +3650,7 @@ int main(int argc, char ** argv) {
|
|||||||
std::vector<llama_token> tokens;
|
std::vector<llama_token> tokens;
|
||||||
if (body.count("content") != 0) {
|
if (body.count("content") != 0) {
|
||||||
const bool add_special = json_value(body, "add_special", false);
|
const bool add_special = json_value(body, "add_special", false);
|
||||||
tokens = ctx_server.tokenize(body["content"], add_special);
|
tokens = ctx_server.tokenize(body.at("content"), add_special);
|
||||||
}
|
}
|
||||||
const json data = format_tokenizer_response(tokens);
|
const json data = format_tokenizer_response(tokens);
|
||||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||||
@ -3660,7 +3662,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
std::string content;
|
std::string content;
|
||||||
if (body.count("tokens") != 0) {
|
if (body.count("tokens") != 0) {
|
||||||
const std::vector<llama_token> tokens = body["tokens"];
|
const std::vector<llama_token> tokens = body.at("tokens");
|
||||||
content = tokens_to_str(ctx_server.ctx, tokens.cbegin(), tokens.cend());
|
content = tokens_to_str(ctx_server.ctx, tokens.cbegin(), tokens.cend());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3683,10 +3685,10 @@ int main(int argc, char ** argv) {
|
|||||||
json prompt;
|
json prompt;
|
||||||
if (body.count("input") != 0) {
|
if (body.count("input") != 0) {
|
||||||
is_openai = true;
|
is_openai = true;
|
||||||
prompt = body["input"];
|
prompt = body.at("input");
|
||||||
} else if (body.count("content") != 0) {
|
} else if (body.count("content") != 0) {
|
||||||
// with "content", we only support single prompt
|
// with "content", we only support single prompt
|
||||||
prompt = std::vector<std::string>{body["content"]};
|
prompt = std::vector<std::string>{body.at("content")};
|
||||||
} else {
|
} else {
|
||||||
res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return;
|
return;
|
||||||
@ -3705,7 +3707,7 @@ int main(int argc, char ** argv) {
|
|||||||
if (!result.error) {
|
if (!result.error) {
|
||||||
if (result.data.count("results")) {
|
if (result.data.count("results")) {
|
||||||
// result for multi-task
|
// result for multi-task
|
||||||
responses = result.data["results"];
|
responses = result.data.at("results");
|
||||||
} else {
|
} else {
|
||||||
// result for single task
|
// result for single task
|
||||||
responses = std::vector<json>{result.data};
|
responses = std::vector<json>{result.data};
|
||||||
|
@ -3,6 +3,8 @@
|
|||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||||
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -373,11 +375,11 @@ static json oaicompat_completion_params_parse(
|
|||||||
llama_params["top_p"] = json_value(body, "top_p", 1.0);
|
llama_params["top_p"] = json_value(body, "top_p", 1.0);
|
||||||
|
|
||||||
// Apply chat template to the list of messages
|
// Apply chat template to the list of messages
|
||||||
llama_params["prompt"] = format_chat(model, chat_template, body["messages"]);
|
llama_params["prompt"] = format_chat(model, chat_template, body.at("messages"));
|
||||||
|
|
||||||
// Handle "stop" field
|
// Handle "stop" field
|
||||||
if (body.contains("stop") && body["stop"].is_string()) {
|
if (body.contains("stop") && body.at("stop").is_string()) {
|
||||||
llama_params["stop"] = json::array({body["stop"].get<std::string>()});
|
llama_params["stop"] = json::array({body.at("stop").get<std::string>()});
|
||||||
} else {
|
} else {
|
||||||
llama_params["stop"] = json_value(body, "stop", json::array());
|
llama_params["stop"] = json_value(body, "stop", json::array());
|
||||||
}
|
}
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
#undef NDEBUG
|
#undef NDEBUG
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user