mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
common : normalize naming style (#7462)
* common : normalize naming style ggml-ci * common : match declaration / definition order * zig : try to fix build
This commit is contained in:
parent
38c03478a3
commit
6ff13987ad
12
build.zig
12
build.zig
@ -129,14 +129,14 @@ pub fn build(b: *std.build.Builder) !void {
|
|||||||
const clip = make.obj("clip", "examples/llava/clip.cpp");
|
const clip = make.obj("clip", "examples/llava/clip.cpp");
|
||||||
const llava = make.obj("llava", "examples/llava/llava.cpp");
|
const llava = make.obj("llava", "examples/llava/llava.cpp");
|
||||||
|
|
||||||
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, console, grammar_parser });
|
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo, console, grammar_parser });
|
||||||
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
|
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo });
|
||||||
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
|
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo });
|
||||||
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo });
|
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo });
|
||||||
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
|
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo, train });
|
||||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
|
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, train });
|
||||||
|
|
||||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, json_schema_to_grammar, buildinfo, sampling, grammar_parser, clip, llava });
|
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, sgemm, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, unicode_data, common, sampling, json_schema_to_grammar, buildinfo, grammar_parser, clip, llava });
|
||||||
if (server.target.isWindows()) {
|
if (server.target.isWindows()) {
|
||||||
server.linkSystemLibrary("ws2_32");
|
server.linkSystemLibrary("ws2_32");
|
||||||
}
|
}
|
||||||
|
1342
common/common.cpp
1342
common/common.cpp
File diff suppressed because it is too large
Load Diff
@ -41,8 +41,12 @@ extern char const *LLAMA_BUILD_TARGET;
|
|||||||
|
|
||||||
struct llama_control_vector_load_info;
|
struct llama_control_vector_load_info;
|
||||||
|
|
||||||
int get_math_cpu_count();
|
//
|
||||||
int32_t get_num_physical_cores();
|
// CPU utils
|
||||||
|
//
|
||||||
|
|
||||||
|
int32_t cpu_get_num_physical_cores();
|
||||||
|
int32_t cpu_get_num_math();
|
||||||
|
|
||||||
//
|
//
|
||||||
// CLI argument parsing
|
// CLI argument parsing
|
||||||
@ -51,7 +55,7 @@ int32_t get_num_physical_cores();
|
|||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||||
|
|
||||||
int32_t n_threads = get_math_cpu_count();
|
int32_t n_threads = cpu_get_num_math();
|
||||||
int32_t n_threads_draft = -1;
|
int32_t n_threads_draft = -1;
|
||||||
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|
||||||
int32_t n_threads_batch_draft = -1;
|
int32_t n_threads_batch_draft = -1;
|
||||||
@ -179,33 +183,34 @@ struct gpt_params {
|
|||||||
|
|
||||||
void gpt_params_handle_model_default(gpt_params & params);
|
void gpt_params_handle_model_default(gpt_params & params);
|
||||||
|
|
||||||
bool parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
|
||||||
|
|
||||||
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
|
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
|
||||||
|
|
||||||
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
|
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
|
||||||
|
|
||||||
void gpt_print_usage(int argc, char ** argv, const gpt_params & params);
|
|
||||||
|
|
||||||
bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
|
bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
|
||||||
|
void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
|
||||||
|
|
||||||
std::string get_system_info(const gpt_params & params);
|
std::string gpt_params_get_system_info(const gpt_params & params);
|
||||||
|
|
||||||
std::string gpt_random_prompt(std::mt19937 & rng);
|
|
||||||
|
|
||||||
void process_escapes(std::string& input);
|
|
||||||
|
|
||||||
bool validate_file_name(const std::string & filename);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// String utils
|
// String utils
|
||||||
//
|
//
|
||||||
|
|
||||||
std::vector<llama_sampler_type> sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
|
||||||
std::vector<llama_sampler_type> sampler_types_from_chars(const std::string & names_string);
|
|
||||||
std::vector<std::string> string_split(std::string input, char separator);
|
std::vector<std::string> string_split(std::string input, char separator);
|
||||||
|
|
||||||
std::string string_strip(const std::string & str);
|
std::string string_strip(const std::string & str);
|
||||||
std::string sampler_type_to_name_string(llama_sampler_type sampler_type);
|
std::string string_get_sortable_timestamp();
|
||||||
|
std::string string_random_prompt(std::mt19937 & rng);
|
||||||
|
|
||||||
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||||
|
void string_process_escapes(std::string & input);
|
||||||
|
|
||||||
|
//
|
||||||
|
// Filesystem utils
|
||||||
|
//
|
||||||
|
|
||||||
|
bool fs_validate_filename(const std::string & filename);
|
||||||
|
bool fs_create_directory_with_parents(const std::string & path);
|
||||||
|
|
||||||
|
std::string fs_get_cache_directory();
|
||||||
|
|
||||||
//
|
//
|
||||||
// Model utils
|
// Model utils
|
||||||
@ -276,30 +281,15 @@ std::string llama_detokenize_bpe(
|
|||||||
// defaults to true when model type is SPM, otherwise false.
|
// defaults to true when model type is SPM, otherwise false.
|
||||||
bool llama_should_add_bos_token(const llama_model * model);
|
bool llama_should_add_bos_token(const llama_model * model);
|
||||||
|
|
||||||
//
|
|
||||||
// YAML utils
|
|
||||||
//
|
|
||||||
|
|
||||||
bool create_directory_with_parents(const std::string & path);
|
|
||||||
std::string get_cache_directory();
|
|
||||||
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
|
|
||||||
void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
|
|
||||||
void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);
|
|
||||||
std::string get_sortable_timestamp();
|
|
||||||
|
|
||||||
void dump_non_result_info_yaml(
|
|
||||||
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
|
||||||
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// KV cache utils
|
// KV cache utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// Dump the KV cache view with the number of sequences per cell.
|
// Dump the KV cache view with the number of sequences per cell.
|
||||||
void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size = 80);
|
void llama_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||||
|
|
||||||
// Dump the KV cache view showing individual sequences in each cell (long output).
|
// Dump the KV cache view showing individual sequences in each cell (long output).
|
||||||
void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Embedding utils
|
// Embedding utils
|
||||||
@ -333,6 +323,20 @@ llama_control_vector_data llama_control_vector_load(const std::vector<llama_cont
|
|||||||
//
|
//
|
||||||
// Split utils
|
// Split utils
|
||||||
//
|
//
|
||||||
|
|
||||||
static const char * const LLM_KV_SPLIT_NO = "split.no";
|
static const char * const LLM_KV_SPLIT_NO = "split.no";
|
||||||
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
||||||
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
||||||
|
|
||||||
|
//
|
||||||
|
// YAML utils
|
||||||
|
//
|
||||||
|
|
||||||
|
void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float> & data);
|
||||||
|
void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int> & data);
|
||||||
|
void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data);
|
||||||
|
|
||||||
|
void yaml_dump_non_result_info(
|
||||||
|
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
||||||
|
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
||||||
|
|
||||||
|
@ -125,7 +125,7 @@ std::string llama_sampling_order_print(const llama_sampling_params & params) {
|
|||||||
std::string result = "CFG -> Penalties ";
|
std::string result = "CFG -> Penalties ";
|
||||||
if (params.mirostat == 0) {
|
if (params.mirostat == 0) {
|
||||||
for (auto sampler_type : params.samplers_sequence) {
|
for (auto sampler_type : params.samplers_sequence) {
|
||||||
const auto sampler_type_name = sampler_type_to_name_string(sampler_type);
|
const auto sampler_type_name = llama_sampling_type_to_str(sampler_type);
|
||||||
if (!sampler_type_name.empty()) {
|
if (!sampler_type_name.empty()) {
|
||||||
result += "-> " + sampler_type_name + " ";
|
result += "-> " + sampler_type_name + " ";
|
||||||
}
|
}
|
||||||
@ -137,6 +137,87 @@ std::string llama_sampling_order_print(const llama_sampling_params & params) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string llama_sampling_type_to_str(llama_sampler_type sampler_type) {
|
||||||
|
switch (sampler_type) {
|
||||||
|
case llama_sampler_type::TOP_K: return "top_k";
|
||||||
|
case llama_sampler_type::TFS_Z: return "tfs_z";
|
||||||
|
case llama_sampler_type::TYPICAL_P: return "typical_p";
|
||||||
|
case llama_sampler_type::TOP_P: return "top_p";
|
||||||
|
case llama_sampler_type::MIN_P: return "min_p";
|
||||||
|
case llama_sampler_type::TEMPERATURE: return "temperature";
|
||||||
|
default : return "";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
||||||
|
std::unordered_map<std::string, llama_sampler_type> sampler_canonical_name_map {
|
||||||
|
{"top_k", llama_sampler_type::TOP_K},
|
||||||
|
{"top_p", llama_sampler_type::TOP_P},
|
||||||
|
{"typical_p", llama_sampler_type::TYPICAL_P},
|
||||||
|
{"min_p", llama_sampler_type::MIN_P},
|
||||||
|
{"tfs_z", llama_sampler_type::TFS_Z},
|
||||||
|
{"temperature", llama_sampler_type::TEMPERATURE}
|
||||||
|
};
|
||||||
|
|
||||||
|
// since samplers names are written multiple ways
|
||||||
|
// make it ready for both system names and input names
|
||||||
|
std::unordered_map<std::string, llama_sampler_type> sampler_alt_name_map {
|
||||||
|
{"top-k", llama_sampler_type::TOP_K},
|
||||||
|
{"top-p", llama_sampler_type::TOP_P},
|
||||||
|
{"nucleus", llama_sampler_type::TOP_P},
|
||||||
|
{"typical-p", llama_sampler_type::TYPICAL_P},
|
||||||
|
{"typical", llama_sampler_type::TYPICAL_P},
|
||||||
|
{"min-p", llama_sampler_type::MIN_P},
|
||||||
|
{"tfs-z", llama_sampler_type::TFS_Z},
|
||||||
|
{"tfs", llama_sampler_type::TFS_Z},
|
||||||
|
{"temp", llama_sampler_type::TEMPERATURE}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> sampler_types;
|
||||||
|
sampler_types.reserve(names.size());
|
||||||
|
for (const auto & name : names)
|
||||||
|
{
|
||||||
|
auto sampler_item = sampler_canonical_name_map.find(name);
|
||||||
|
if (sampler_item != sampler_canonical_name_map.end())
|
||||||
|
{
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (allow_alt_names)
|
||||||
|
{
|
||||||
|
sampler_item = sampler_alt_name_map.find(name);
|
||||||
|
if (sampler_item != sampler_alt_name_map.end())
|
||||||
|
{
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sampler_types;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string) {
|
||||||
|
std::unordered_map<char, llama_sampler_type> sampler_name_map {
|
||||||
|
{'k', llama_sampler_type::TOP_K},
|
||||||
|
{'p', llama_sampler_type::TOP_P},
|
||||||
|
{'y', llama_sampler_type::TYPICAL_P},
|
||||||
|
{'m', llama_sampler_type::MIN_P},
|
||||||
|
{'f', llama_sampler_type::TFS_Z},
|
||||||
|
{'t', llama_sampler_type::TEMPERATURE}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> sampler_types;
|
||||||
|
sampler_types.reserve(names_string.size());
|
||||||
|
for (const auto & c : names_string) {
|
||||||
|
const auto sampler_item = sampler_name_map.find(c);
|
||||||
|
if (sampler_item != sampler_name_map.end()) {
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sampler_types;
|
||||||
|
}
|
||||||
|
|
||||||
// no reasons to expose this function in header
|
// no reasons to expose this function in header
|
||||||
static void sampler_queue(
|
static void sampler_queue(
|
||||||
struct llama_context * ctx_main,
|
struct llama_context * ctx_main,
|
||||||
|
@ -116,6 +116,11 @@ std::string llama_sampling_print(const llama_sampling_params & params);
|
|||||||
// Print sampling order into a string
|
// Print sampling order into a string
|
||||||
std::string llama_sampling_order_print(const llama_sampling_params & params);
|
std::string llama_sampling_order_print(const llama_sampling_params & params);
|
||||||
|
|
||||||
|
std::string llama_sampling_type_to_str(llama_sampler_type sampler_type);
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
||||||
|
std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string);
|
||||||
|
|
||||||
// this is a common sampling function used across the examples for convenience
|
// this is a common sampling function used across the examples for convenience
|
||||||
// it can serve as a starting point for implementing your own sampling function
|
// it can serve as a starting point for implementing your own sampling function
|
||||||
// Note: When using multiple sequences, it is the caller's responsibility to call
|
// Note: When using multiple sequences, it is the caller's responsibility to call
|
||||||
|
@ -1380,7 +1380,7 @@ bool consume_common_train_arg(
|
|||||||
|
|
||||||
void finish_processing_train_args(struct train_params_common * params) {
|
void finish_processing_train_args(struct train_params_common * params) {
|
||||||
if (params->escape) {
|
if (params->escape) {
|
||||||
process_escapes(params->sample_start);
|
string_process_escapes(params->sample_start);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -48,7 +48,7 @@ int main(int argc, char ** argv) {
|
|||||||
params.prompt = "Hello my name is";
|
params.prompt = "Hello my name is";
|
||||||
}
|
}
|
||||||
|
|
||||||
process_escapes(params.prompt);
|
string_process_escapes(params.prompt);
|
||||||
|
|
||||||
// init LLM
|
// init LLM
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
if (params.random_prompt) {
|
||||||
params.prompt = gpt_random_prompt(rng);
|
params.prompt = string_random_prompt(rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
@ -107,7 +107,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s\n", get_system_info(params).c_str());
|
fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// split the prompt into lines
|
// split the prompt into lines
|
||||||
|
@ -152,7 +152,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
if (params.random_prompt) {
|
||||||
params.prompt = gpt_random_prompt(rng);
|
params.prompt = string_random_prompt(rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
@ -176,7 +176,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s\n", get_system_info(params).c_str());
|
fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OK = run(ctx, params);
|
bool OK = run(ctx, params);
|
||||||
|
@ -598,7 +598,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
if (params.random_prompt) {
|
||||||
params.prompt = gpt_random_prompt(rng);
|
params.prompt = string_random_prompt(rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
sparams.dataset = params.prompt_file;
|
sparams.dataset = params.prompt_file;
|
||||||
@ -667,7 +667,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s\n", get_system_info(params).c_str());
|
fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
bool OK = compute_imatrix(ctx, params, compute_ppl, from_chunk);
|
bool OK = compute_imatrix(ctx, params, compute_ppl, from_chunk);
|
||||||
|
@ -50,9 +50,9 @@ static void write_logfile(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string timestamp = get_sortable_timestamp();
|
const std::string timestamp = string_get_sortable_timestamp();
|
||||||
|
|
||||||
const bool success = create_directory_with_parents(params.logdir);
|
const bool success = fs_create_directory_with_parents(params.logdir);
|
||||||
if (!success) {
|
if (!success) {
|
||||||
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
|
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
|
||||||
__func__, params.logdir.c_str());
|
__func__, params.logdir.c_str());
|
||||||
@ -70,7 +70,7 @@ static void write_logfile(
|
|||||||
fprintf(logfile, "binary: infill\n");
|
fprintf(logfile, "binary: infill\n");
|
||||||
char model_desc[128];
|
char model_desc[128];
|
||||||
llama_model_desc(model, model_desc, sizeof(model_desc));
|
llama_model_desc(model, model_desc, sizeof(model_desc));
|
||||||
dump_non_result_info_yaml(logfile, params, ctx, timestamp, input_tokens, model_desc);
|
yaml_dump_non_result_info(logfile, params, ctx, timestamp, input_tokens, model_desc);
|
||||||
|
|
||||||
fprintf(logfile, "\n");
|
fprintf(logfile, "\n");
|
||||||
fprintf(logfile, "######################\n");
|
fprintf(logfile, "######################\n");
|
||||||
@ -78,8 +78,8 @@ static void write_logfile(
|
|||||||
fprintf(logfile, "######################\n");
|
fprintf(logfile, "######################\n");
|
||||||
fprintf(logfile, "\n");
|
fprintf(logfile, "\n");
|
||||||
|
|
||||||
dump_string_yaml_multiline(logfile, "output", output.c_str());
|
yaml_dump_string_multiline(logfile, "output", output.c_str());
|
||||||
dump_vector_int_yaml(logfile, "output_tokens", output_tokens);
|
yaml_dump_vector_int(logfile, "output_tokens", output_tokens);
|
||||||
|
|
||||||
llama_dump_timing_info_yaml(logfile, ctx);
|
llama_dump_timing_info_yaml(logfile, ctx);
|
||||||
fclose(logfile);
|
fclose(logfile);
|
||||||
@ -236,7 +236,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
LOG_TEE("%s\n", get_system_info(params).c_str());
|
LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
const bool add_bos = llama_should_add_bos_token(model);
|
const bool add_bos = llama_should_add_bos_token(model);
|
||||||
GGML_ASSERT(llama_add_eos_token(model) != 1);
|
GGML_ASSERT(llama_add_eos_token(model) != 1);
|
||||||
@ -621,8 +621,8 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
if (params.escape) {
|
if (params.escape) {
|
||||||
//process escape sequences, for the initial prompt this is done in common.cpp when we load the params, but for the interactive mode we need to do it here
|
//process escape sequences, for the initial prompt this is done in common.cpp when we load the params, but for the interactive mode we need to do it here
|
||||||
process_escapes(params.input_prefix);
|
string_process_escapes(params.input_prefix);
|
||||||
process_escapes(params.input_suffix);
|
string_process_escapes(params.input_suffix);
|
||||||
}
|
}
|
||||||
suff_rm_leading_spc = params.escape;
|
suff_rm_leading_spc = params.escape;
|
||||||
if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
|
if (suff_rm_leading_spc && params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
|
||||||
|
@ -200,7 +200,7 @@ static const cmd_params cmd_params_defaults = {
|
|||||||
/* n_ubatch */ {512},
|
/* n_ubatch */ {512},
|
||||||
/* type_k */ {GGML_TYPE_F16},
|
/* type_k */ {GGML_TYPE_F16},
|
||||||
/* type_v */ {GGML_TYPE_F16},
|
/* type_v */ {GGML_TYPE_F16},
|
||||||
/* n_threads */ {get_math_cpu_count()},
|
/* n_threads */ {cpu_get_num_math()},
|
||||||
/* n_gpu_layers */ {99},
|
/* n_gpu_layers */ {99},
|
||||||
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
|
/* split_mode */ {LLAMA_SPLIT_MODE_LAYER},
|
||||||
/* main_gpu */ {0},
|
/* main_gpu */ {0},
|
||||||
|
@ -290,7 +290,7 @@ int main(int argc, char ** argv) {
|
|||||||
#endif // LOG_DISABLE_LOGS
|
#endif // LOG_DISABLE_LOGS
|
||||||
|
|
||||||
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
if (params.mmproj.empty() || (params.image.empty() && !prompt_contains_image(params.prompt))) {
|
||||||
gpt_print_usage(argc, argv, params);
|
gpt_params_print_usage(argc, argv, params);
|
||||||
show_additional_info(argc, argv);
|
show_additional_info(argc, argv);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
@ -174,7 +174,7 @@ int main(int argc, char ** argv) {
|
|||||||
// debug
|
// debug
|
||||||
if (dump_kv_cache) {
|
if (dump_kv_cache) {
|
||||||
llama_kv_cache_view_update(ctx, &kvc_view);
|
llama_kv_cache_view_update(ctx, &kvc_view);
|
||||||
dump_kv_cache_view_seqs(kvc_view, 40);
|
llama_kv_cache_dump_view_seqs(kvc_view, 40);
|
||||||
}
|
}
|
||||||
|
|
||||||
// build the mask from https://lmsys.org/blog/2023-11-21-lookahead-decoding/
|
// build the mask from https://lmsys.org/blog/2023-11-21-lookahead-decoding/
|
||||||
|
@ -121,7 +121,7 @@ int main(int argc, char ** argv){
|
|||||||
// debug
|
// debug
|
||||||
if (dump_kv_cache) {
|
if (dump_kv_cache) {
|
||||||
llama_kv_cache_view_update(ctx, &kvc_view);
|
llama_kv_cache_view_update(ctx, &kvc_view);
|
||||||
dump_kv_cache_view_seqs(kvc_view, 40);
|
llama_kv_cache_dump_view_seqs(kvc_view, 40);
|
||||||
}
|
}
|
||||||
|
|
||||||
// print current draft sequence
|
// print current draft sequence
|
||||||
|
@ -60,9 +60,9 @@ static void write_logfile(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string timestamp = get_sortable_timestamp();
|
const std::string timestamp = string_get_sortable_timestamp();
|
||||||
|
|
||||||
const bool success = create_directory_with_parents(params.logdir);
|
const bool success = fs_create_directory_with_parents(params.logdir);
|
||||||
if (!success) {
|
if (!success) {
|
||||||
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
|
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
|
||||||
__func__, params.logdir.c_str());
|
__func__, params.logdir.c_str());
|
||||||
@ -80,7 +80,7 @@ static void write_logfile(
|
|||||||
fprintf(logfile, "binary: main\n");
|
fprintf(logfile, "binary: main\n");
|
||||||
char model_desc[128];
|
char model_desc[128];
|
||||||
llama_model_desc(model, model_desc, sizeof(model_desc));
|
llama_model_desc(model, model_desc, sizeof(model_desc));
|
||||||
dump_non_result_info_yaml(logfile, params, ctx, timestamp, input_tokens, model_desc);
|
yaml_dump_non_result_info(logfile, params, ctx, timestamp, input_tokens, model_desc);
|
||||||
|
|
||||||
fprintf(logfile, "\n");
|
fprintf(logfile, "\n");
|
||||||
fprintf(logfile, "######################\n");
|
fprintf(logfile, "######################\n");
|
||||||
@ -88,8 +88,8 @@ static void write_logfile(
|
|||||||
fprintf(logfile, "######################\n");
|
fprintf(logfile, "######################\n");
|
||||||
fprintf(logfile, "\n");
|
fprintf(logfile, "\n");
|
||||||
|
|
||||||
dump_string_yaml_multiline(logfile, "output", output.c_str());
|
yaml_dump_string_multiline(logfile, "output", output.c_str());
|
||||||
dump_vector_int_yaml(logfile, "output_tokens", output_tokens);
|
yaml_dump_vector_int(logfile, "output_tokens", output_tokens);
|
||||||
|
|
||||||
llama_dump_timing_info_yaml(logfile, ctx);
|
llama_dump_timing_info_yaml(logfile, ctx);
|
||||||
fclose(logfile);
|
fclose(logfile);
|
||||||
@ -181,7 +181,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
if (params.random_prompt) {
|
||||||
params.prompt = gpt_random_prompt(rng);
|
params.prompt = string_random_prompt(rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG("%s: llama backend init\n", __func__);
|
LOG("%s: llama backend init\n", __func__);
|
||||||
@ -219,7 +219,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
LOG_TEE("%s\n", get_system_info(params).c_str());
|
LOG_TEE("%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string path_session = params.path_prompt_cache;
|
std::string path_session = params.path_prompt_cache;
|
||||||
@ -879,7 +879,7 @@ int main(int argc, char ** argv) {
|
|||||||
embd_inp.insert(embd_inp.end(), cml_pfx.begin(), cml_pfx.end());
|
embd_inp.insert(embd_inp.end(), cml_pfx.begin(), cml_pfx.end());
|
||||||
}
|
}
|
||||||
if (params.escape) {
|
if (params.escape) {
|
||||||
process_escapes(buffer);
|
string_process_escapes(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
||||||
|
@ -210,7 +210,7 @@ int main(int argc, char ** argv) {
|
|||||||
while (true) {
|
while (true) {
|
||||||
if (dump_kv_cache) {
|
if (dump_kv_cache) {
|
||||||
llama_kv_cache_view_update(ctx, &kvc_view);
|
llama_kv_cache_view_update(ctx, &kvc_view);
|
||||||
dump_kv_cache_view_seqs(kvc_view, 40);
|
llama_kv_cache_dump_view_seqs(kvc_view, 40);
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
|
@ -44,9 +44,9 @@ static void write_logfile(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::string timestamp = get_sortable_timestamp();
|
const std::string timestamp = string_get_sortable_timestamp();
|
||||||
|
|
||||||
const bool success = create_directory_with_parents(params.logdir);
|
const bool success = fs_create_directory_with_parents(params.logdir);
|
||||||
if (!success) {
|
if (!success) {
|
||||||
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
|
fprintf(stderr, "%s: warning: failed to create logdir %s, cannot write logfile\n",
|
||||||
__func__, params.logdir.c_str());
|
__func__, params.logdir.c_str());
|
||||||
@ -64,7 +64,7 @@ static void write_logfile(
|
|||||||
fprintf(logfile, "binary: main\n");
|
fprintf(logfile, "binary: main\n");
|
||||||
char model_desc[128];
|
char model_desc[128];
|
||||||
llama_model_desc(model, model_desc, sizeof(model_desc));
|
llama_model_desc(model, model_desc, sizeof(model_desc));
|
||||||
dump_non_result_info_yaml(logfile, params, ctx, timestamp, results.tokens, model_desc);
|
yaml_dump_non_result_info(logfile, params, ctx, timestamp, results.tokens, model_desc);
|
||||||
|
|
||||||
fprintf(logfile, "\n");
|
fprintf(logfile, "\n");
|
||||||
fprintf(logfile, "######################\n");
|
fprintf(logfile, "######################\n");
|
||||||
@ -72,9 +72,9 @@ static void write_logfile(
|
|||||||
fprintf(logfile, "######################\n");
|
fprintf(logfile, "######################\n");
|
||||||
fprintf(logfile, "\n");
|
fprintf(logfile, "\n");
|
||||||
|
|
||||||
dump_vector_float_yaml(logfile, "logits", results.logits);
|
yaml_dump_vector_float(logfile, "logits", results.logits);
|
||||||
fprintf(logfile, "ppl_value: %f\n", results.ppl_value);
|
fprintf(logfile, "ppl_value: %f\n", results.ppl_value);
|
||||||
dump_vector_float_yaml(logfile, "probs", results.probs);
|
yaml_dump_vector_float(logfile, "probs", results.probs);
|
||||||
|
|
||||||
llama_dump_timing_info_yaml(logfile, ctx);
|
llama_dump_timing_info_yaml(logfile, ctx);
|
||||||
fclose(logfile);
|
fclose(logfile);
|
||||||
@ -2007,7 +2007,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
std::mt19937 rng(params.seed);
|
std::mt19937 rng(params.seed);
|
||||||
if (params.random_prompt) {
|
if (params.random_prompt) {
|
||||||
params.prompt = gpt_random_prompt(rng);
|
params.prompt = string_random_prompt(rng);
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_backend_init();
|
llama_backend_init();
|
||||||
@ -2035,7 +2035,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s\n", get_system_info(params).c_str());
|
fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
struct results_perplexity results;
|
struct results_perplexity results;
|
||||||
|
@ -259,7 +259,7 @@ int main(int argc, char ** argv) {
|
|||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
}
|
}
|
||||||
} else if (strcmp(argv[arg_idx], "--override-kv") == 0) {
|
} else if (strcmp(argv[arg_idx], "--override-kv") == 0) {
|
||||||
if (arg_idx == argc-1 || !parse_kv_override(argv[++arg_idx], kv_overrides)) {
|
if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) {
|
||||||
usage(argv[0]);
|
usage(argv[0]);
|
||||||
}
|
}
|
||||||
} else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) {
|
} else if (strcmp(argv[arg_idx], "--allow-requantize") == 0) {
|
||||||
|
@ -11,7 +11,7 @@ struct retrieval_params {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void retrieval_params_print_usage(int argc, char ** argv, gpt_params & gpt_params, retrieval_params & params) {
|
static void retrieval_params_print_usage(int argc, char ** argv, gpt_params & gpt_params, retrieval_params & params) {
|
||||||
gpt_print_usage(argc, argv, gpt_params);
|
gpt_params_print_usage(argc, argv, gpt_params);
|
||||||
printf("retrieval options:\n");
|
printf("retrieval options:\n");
|
||||||
printf(" --context-file FNAME file containing context to embed.\n");
|
printf(" --context-file FNAME file containing context to embed.\n");
|
||||||
printf(" specify multiple files by providing --context-file option multiple times.\n");
|
printf(" specify multiple files by providing --context-file option multiple times.\n");
|
||||||
@ -226,7 +226,7 @@ int main(int argc, char ** argv) {
|
|||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s\n", get_system_info(params).c_str());
|
fprintf(stderr, "%s\n", gpt_params_get_system_info(params).c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// max batch size
|
// max batch size
|
||||||
|
@ -1019,7 +1019,7 @@ struct server_context {
|
|||||||
sampler_names.emplace_back(sampler_name);
|
sampler_names.emplace_back(sampler_name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
slot.sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
|
slot.sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
|
||||||
} else {
|
} else {
|
||||||
slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
|
slot.sparams.samplers_sequence = default_sparams.samplers_sequence;
|
||||||
}
|
}
|
||||||
@ -1256,7 +1256,7 @@ struct server_context {
|
|||||||
std::vector<std::string> samplers_sequence;
|
std::vector<std::string> samplers_sequence;
|
||||||
samplers_sequence.reserve(slot.sparams.samplers_sequence.size());
|
samplers_sequence.reserve(slot.sparams.samplers_sequence.size());
|
||||||
for (const auto & sampler_type : slot.sparams.samplers_sequence) {
|
for (const auto & sampler_type : slot.sparams.samplers_sequence) {
|
||||||
samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
|
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
|
||||||
}
|
}
|
||||||
|
|
||||||
return json {
|
return json {
|
||||||
@ -2852,7 +2852,7 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!parse_kv_override(argv[i], params.kv_overrides)) {
|
if (!string_parse_kv_override(argv[i], params.kv_overrides)) {
|
||||||
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
@ -3310,7 +3310,7 @@ int main(int argc, char ** argv) {
|
|||||||
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||||
json request_data = json::parse(req.body);
|
json request_data = json::parse(req.body);
|
||||||
std::string filename = request_data.at("filename");
|
std::string filename = request_data.at("filename");
|
||||||
if (!validate_file_name(filename)) {
|
if (!fs_validate_filename(filename)) {
|
||||||
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@ -3340,7 +3340,7 @@ int main(int argc, char ** argv) {
|
|||||||
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
|
||||||
json request_data = json::parse(req.body);
|
json request_data = json::parse(req.body);
|
||||||
std::string filename = request_data.at("filename");
|
std::string filename = request_data.at("filename");
|
||||||
if (!validate_file_name(filename)) {
|
if (!fs_validate_filename(filename)) {
|
||||||
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user