mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
ggml, common, examples, tests : fixed type arguments in printf (#5528)
This commit is contained in:
parent
fc0c8d286a
commit
5d3de51f97
@ -1741,7 +1741,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
|||||||
|
|
||||||
fprintf(stream, "rope_freq_base: %f # default: 10000.0\n", params.rope_freq_base);
|
fprintf(stream, "rope_freq_base: %f # default: 10000.0\n", params.rope_freq_base);
|
||||||
fprintf(stream, "rope_freq_scale: %f # default: 1.0\n", params.rope_freq_scale);
|
fprintf(stream, "rope_freq_scale: %f # default: 1.0\n", params.rope_freq_scale);
|
||||||
fprintf(stream, "seed: %d # default: -1 (random seed)\n", params.seed);
|
fprintf(stream, "seed: %u # default: -1 (random seed)\n", params.seed);
|
||||||
fprintf(stream, "simple_io: %s # default: false\n", params.simple_io ? "true" : "false");
|
fprintf(stream, "simple_io: %s # default: false\n", params.simple_io ? "true" : "false");
|
||||||
fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false");
|
fprintf(stream, "cont_batching: %s # default: false\n", params.cont_batching ? "true" : "false");
|
||||||
fprintf(stream, "temp: %f # default: 0.8\n", sparams.temp);
|
fprintf(stream, "temp: %f # default: 0.8\n", sparams.temp);
|
||||||
@ -1750,7 +1750,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
|
|||||||
dump_vector_float_yaml(stream, "tensor_split", tensor_split_vector);
|
dump_vector_float_yaml(stream, "tensor_split", tensor_split_vector);
|
||||||
|
|
||||||
fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
|
fprintf(stream, "tfs: %f # default: 1.0\n", sparams.tfs_z);
|
||||||
fprintf(stream, "threads: %d # default: %d\n", params.n_threads, std::thread::hardware_concurrency());
|
fprintf(stream, "threads: %d # default: %u\n", params.n_threads, std::thread::hardware_concurrency());
|
||||||
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);
|
fprintf(stream, "top_k: %d # default: 40\n", sparams.top_k);
|
||||||
fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p);
|
fprintf(stream, "top_p: %f # default: 0.95\n", sparams.top_p);
|
||||||
fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p);
|
fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p);
|
||||||
|
@ -159,7 +159,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, mmq = %d, n_threads = %d, n_threads_batch = %d\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, mmq, ctx_params.n_threads, ctx_params.n_threads_batch);
|
LOG_TEE("%s: n_kv_max = %d, is_pp_shared = %d, n_gpu_layers = %d, mmq = %d, n_threads = %u, n_threads_batch = %u\n", __func__, n_kv_max, is_pp_shared, n_gpu_layers, mmq, ctx_params.n_threads, ctx_params.n_threads_batch);
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
|
|
||||||
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
LOG_TEE("|%6s | %6s | %4s | %6s | %8s | %8s | %8s | %8s | %8s | %8s |\n", "PP", "TG", "B", "N_KV", "T_PP s", "S_PP t/s", "T_TG s", "S_TG t/s", "T s", "S t/s");
|
||||||
|
@ -92,7 +92,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
const int n_ctx = llama_n_ctx(ctx);
|
const int n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
LOG_TEE("\n%s: n_len = %d, n_ctx = %d, n_batch = %d, n_parallel = %d, n_kv_req = %d\n", __func__, n_len, n_ctx, ctx_params.n_batch, n_parallel, n_kv_req);
|
LOG_TEE("\n%s: n_len = %d, n_ctx = %d, n_batch = %u, n_parallel = %d, n_kv_req = %d\n", __func__, n_len, n_ctx, ctx_params.n_batch, n_parallel, n_kv_req);
|
||||||
|
|
||||||
// make sure the KV cache is big enough to hold all the prompt and generated tokens
|
// make sure the KV cache is big enough to hold all the prompt and generated tokens
|
||||||
if (n_kv_req > n_ctx) {
|
if (n_kv_req > n_ctx) {
|
||||||
|
@ -325,14 +325,14 @@ struct train_params {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static void print_params(struct my_llama_hparams * params) {
|
static void print_params(struct my_llama_hparams * params) {
|
||||||
printf("%s: n_vocab: %d\n", __func__, params->n_vocab);
|
printf("%s: n_vocab: %u\n", __func__, params->n_vocab);
|
||||||
printf("%s: n_ctx: %d\n", __func__, params->n_ctx);
|
printf("%s: n_ctx: %u\n", __func__, params->n_ctx);
|
||||||
printf("%s: n_embd: %d\n", __func__, params->n_embd);
|
printf("%s: n_embd: %u\n", __func__, params->n_embd);
|
||||||
printf("%s: n_mult: %d\n", __func__, params->n_mult);
|
printf("%s: n_mult: %u\n", __func__, params->n_mult);
|
||||||
printf("%s: n_head: %d\n", __func__, params->n_head);
|
printf("%s: n_head: %u\n", __func__, params->n_head);
|
||||||
printf("%s: n_ff: %d\n", __func__, params->n_ff);
|
printf("%s: n_ff: %u\n", __func__, params->n_ff);
|
||||||
printf("%s: n_layer: %d\n", __func__, params->n_layer);
|
printf("%s: n_layer: %u\n", __func__, params->n_layer);
|
||||||
printf("%s: n_rot: %d\n", __func__, params->n_rot);
|
printf("%s: n_rot: %u\n", __func__, params->n_rot);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void init_model(struct my_llama_model * model) {
|
static void init_model(struct my_llama_model * model) {
|
||||||
@ -350,25 +350,25 @@ static void init_model(struct my_llama_model * model) {
|
|||||||
model->train_tokens = 0;
|
model->train_tokens = 0;
|
||||||
|
|
||||||
model->tok_embeddings = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
model->tok_embeddings = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
||||||
printf("[%s:GG] Allocating [%d] x [%d] = [%d] float space for model->tok_embeddings\n",__func__,n_embd , n_vocab, n_embd * n_vocab);
|
printf("[%s:GG] Allocating [%u] x [%u] = [%u] float space for model->tok_embeddings\n",__func__,n_embd , n_vocab, n_embd * n_vocab);
|
||||||
|
|
||||||
model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
model->norm = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n_embd);
|
||||||
printf("[%s:GG] Allocating [%d] float space for model->norm\n",__func__,n_embd);
|
printf("[%s:GG] Allocating [%u] float space for model->norm\n",__func__,n_embd);
|
||||||
|
|
||||||
model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
model->output = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, n_embd, n_vocab);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for model->output\n",__func__,n_embd, n_vocab, n_embd * n_vocab);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for model->output\n",__func__,n_embd, n_vocab, n_embd * n_vocab);
|
||||||
|
|
||||||
// printing the per-layer allocations here so we dont print in the for loop.
|
// printing the per-layer allocations here so we dont print in the for loop.
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wq for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.wq for [%u] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wk for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.wk for [%u] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wv for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.wv for [%u] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.wo for [%d] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.wo for [%u] layers\n",__func__, n_embd, n_embd, n_embd * n_embd, n_layer);
|
||||||
|
|
||||||
printf("[%s:GG] Allocating [%d] float space for layer.ffn_norm for [%d] layers\n",__func__,n_embd, n_layer);
|
printf("[%s:GG] Allocating [%u] float space for layer.ffn_norm for [%u] layers\n",__func__,n_embd, n_layer);
|
||||||
|
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w1 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.w1 for [%u] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w2 for [%d] layers\n",__func__, n_embd, n_ff, n_ff * n_embd, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.w2 for [%u] layers\n",__func__, n_embd, n_ff, n_ff * n_embd, n_layer);
|
||||||
printf("[%s:GG] Allocating [%d] x[%d] = [%d] float space for layer.w3 for [%d] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
printf("[%s:GG] Allocating [%u] x[%u] = [%u] float space for layer.w3 for [%u] layers\n",__func__, n_ff, n_embd, n_embd * n_ff, n_layer);
|
||||||
|
|
||||||
ggml_set_name(model->tok_embeddings, "tok_embeddings.weight");
|
ggml_set_name(model->tok_embeddings, "tok_embeddings.weight");
|
||||||
ggml_set_name(model->norm, "norm.weight");
|
ggml_set_name(model->norm, "norm.weight");
|
||||||
|
@ -1623,7 +1623,7 @@ static void kl_divergence(llama_context * ctx, const gpt_params & params) {
|
|||||||
uint32_t n_ctx;
|
uint32_t n_ctx;
|
||||||
in.read((char *)&n_ctx, sizeof(n_ctx));
|
in.read((char *)&n_ctx, sizeof(n_ctx));
|
||||||
if (n_ctx > llama_n_ctx(ctx)) {
|
if (n_ctx > llama_n_ctx(ctx)) {
|
||||||
fprintf(stderr, "%s: %s has been computed with %d, while the current context is %d. Increase it with -c and retry\n",
|
fprintf(stderr, "%s: %s has been computed with %u, while the current context is %d. Increase it with -c and retry\n",
|
||||||
__func__, params.logits_file.c_str(), n_ctx, params.n_ctx);
|
__func__, params.logits_file.c_str(), n_ctx, params.n_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -111,13 +111,13 @@ static const char * LLM_TENSOR_FFN_DOWN = "blk.%d.ffn_down";
|
|||||||
static const char * LLM_TENSOR_FFN_UP = "blk.%d.ffn_up";
|
static const char * LLM_TENSOR_FFN_UP = "blk.%d.ffn_up";
|
||||||
|
|
||||||
static void print_params(struct my_llama_hparams * params) {
|
static void print_params(struct my_llama_hparams * params) {
|
||||||
printf("%s: n_vocab: %d\n", __func__, params->n_vocab);
|
printf("%s: n_vocab: %u\n", __func__, params->n_vocab);
|
||||||
printf("%s: n_ctx: %d\n", __func__, params->n_ctx);
|
printf("%s: n_ctx: %u\n", __func__, params->n_ctx);
|
||||||
printf("%s: n_embd: %d\n", __func__, params->n_embd);
|
printf("%s: n_embd: %u\n", __func__, params->n_embd);
|
||||||
printf("%s: n_head: %d\n", __func__, params->n_head);
|
printf("%s: n_head: %u\n", __func__, params->n_head);
|
||||||
printf("%s: n_ff: %d\n", __func__, params->n_ff);
|
printf("%s: n_ff: %u\n", __func__, params->n_ff);
|
||||||
printf("%s: n_layer: %d\n", __func__, params->n_layer);
|
printf("%s: n_layer: %u\n", __func__, params->n_layer);
|
||||||
printf("%s: n_rot: %d\n", __func__, params->n_rot);
|
printf("%s: n_rot: %u\n", __func__, params->n_rot);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void set_param_model(struct my_llama_model * model) {
|
static void set_param_model(struct my_llama_model * model) {
|
||||||
|
4
ggml.c
4
ggml.c
@ -17909,7 +17909,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
|
|
||||||
ptr += ggml_nbytes(tensor);
|
ptr += ggml_nbytes(tensor);
|
||||||
|
|
||||||
fprintf(stderr, "%s: loaded leaf %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
fprintf(stderr, "%s: loaded leaf %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -18012,7 +18012,7 @@ struct ggml_cgraph * ggml_graph_import(const char * fname, struct ggml_context *
|
|||||||
|
|
||||||
result->nodes[i] = tensor;
|
result->nodes[i] = tensor;
|
||||||
|
|
||||||
fprintf(stderr, "%s: loaded node %d: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
fprintf(stderr, "%s: loaded node %u: '%16s', %9zu bytes\n", __func__, i, tensor->name, ggml_nbytes(tensor));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -38,8 +38,8 @@ term ::= [0-9]+)""";
|
|||||||
// pretty print error message before asserting
|
// pretty print error message before asserting
|
||||||
if (expected_pair.first != key || expected_pair.second != value)
|
if (expected_pair.first != key || expected_pair.second != value)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "expected_pair: %s, %d\n", expected_pair.first.c_str(), expected_pair.second);
|
fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
|
||||||
fprintf(stderr, "actual_pair: %s, %d\n", key.c_str(), value);
|
fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
|
||||||
fprintf(stderr, "expected_pair != actual_pair\n");
|
fprintf(stderr, "expected_pair != actual_pair\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,9 +96,9 @@ term ::= [0-9]+)""";
|
|||||||
// pretty print error message before asserting
|
// pretty print error message before asserting
|
||||||
if (expected_element.type != element.type || expected_element.value != element.value)
|
if (expected_element.type != element.type || expected_element.value != element.value)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "index: %d\n", index);
|
fprintf(stderr, "index: %u\n", index);
|
||||||
fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
|
fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
|
||||||
fprintf(stderr, "actual_element: %d, %d\n", element.type, element.value);
|
fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
|
||||||
fprintf(stderr, "expected_element != actual_element\n");
|
fprintf(stderr, "expected_element != actual_element\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,8 +144,8 @@ term ::= [0-9]+)""";
|
|||||||
// pretty print error message before asserting
|
// pretty print error message before asserting
|
||||||
if (expected_pair.first != key || expected_pair.second != value)
|
if (expected_pair.first != key || expected_pair.second != value)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "expected_pair: %s, %d\n", expected_pair.first.c_str(), expected_pair.second);
|
fprintf(stderr, "expected_pair: %s, %u\n", expected_pair.first.c_str(), expected_pair.second);
|
||||||
fprintf(stderr, "actual_pair: %s, %d\n", key.c_str(), value);
|
fprintf(stderr, "actual_pair: %s, %u\n", key.c_str(), value);
|
||||||
fprintf(stderr, "expected_pair != actual_pair\n");
|
fprintf(stderr, "expected_pair != actual_pair\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -235,9 +235,9 @@ term ::= [0-9]+)""";
|
|||||||
// pretty print error message before asserting
|
// pretty print error message before asserting
|
||||||
if (expected_element.type != element.type || expected_element.value != element.value)
|
if (expected_element.type != element.type || expected_element.value != element.value)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "index: %d\n", index);
|
fprintf(stderr, "index: %u\n", index);
|
||||||
fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
|
fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
|
||||||
fprintf(stderr, "actual_element: %d, %d\n", element.type, element.value);
|
fprintf(stderr, "actual_element: %d, %u\n", element.type, element.value);
|
||||||
fprintf(stderr, "expected_element != actual_element\n");
|
fprintf(stderr, "expected_element != actual_element\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -180,8 +180,8 @@ int main()
|
|||||||
if (expected_element.type != element->type || expected_element.value != element->value)
|
if (expected_element.type != element->type || expected_element.value != element->value)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "index: %d\n", index);
|
fprintf(stderr, "index: %d\n", index);
|
||||||
fprintf(stderr, "expected_element: %d, %d\n", expected_element.type, expected_element.value);
|
fprintf(stderr, "expected_element: %d, %u\n", expected_element.type, expected_element.value);
|
||||||
fprintf(stderr, "actual_element: %d, %d\n", element->type, element->value);
|
fprintf(stderr, "actual_element: %d, %u\n", element->type, element->value);
|
||||||
fprintf(stderr, "expected_element != actual_element\n");
|
fprintf(stderr, "expected_element != actual_element\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user