mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 00:39:00 +01:00
llama : uniform variable names + struct init
This commit is contained in:
parent
4fe95c6985
commit
26cc1bd7a2
@ -915,14 +915,14 @@ static json format_timings(llama_server_context &llama)
|
||||
|
||||
return json{
|
||||
{"prompt_n", timings.n_eval},
|
||||
{"prompt_ms", timings.prompt_eval_time_ms},
|
||||
{"prompt_per_token_ms", timings.prompt_eval_time_ms / timings.n_p_eval},
|
||||
{"prompt_per_second", 1e3 / timings.prompt_eval_time_ms * timings.n_p_eval},
|
||||
{"prompt_ms", timings.t_p_eval_ms},
|
||||
{"prompt_per_token_ms", timings.t_p_eval_ms / timings.n_p_eval},
|
||||
{"prompt_per_second", 1e3 / timings.t_p_eval_ms * timings.n_p_eval},
|
||||
|
||||
{"predicted_n", timings.n_eval},
|
||||
{"predicted_ms", timings.eval_time_ms},
|
||||
{"predicted_per_token_ms", timings.eval_time_ms / timings.n_eval},
|
||||
{"predicted_per_second", 1e3 / timings.eval_time_ms * timings.n_eval},
|
||||
{"predicted_ms", timings.t_eval_ms},
|
||||
{"predicted_per_token_ms", timings.t_eval_ms / timings.n_eval},
|
||||
{"predicted_per_second", 1e3 / timings.t_eval_ms * timings.n_eval},
|
||||
};
|
||||
}
|
||||
|
||||
|
36
llama.cpp
36
llama.cpp
@ -3480,34 +3480,34 @@ llama_token llama_token_nl() {
|
||||
return 13;
|
||||
}
|
||||
|
||||
llama_timings llama_get_timings(struct llama_context * ctx) {
|
||||
llama_timings timings;
|
||||
struct llama_timings llama_get_timings(struct llama_context * ctx) {
|
||||
struct llama_timings result = {
|
||||
/*.t_start_ms =*/ 1e-3 * ctx->t_start_us,
|
||||
/*.t_end_ms =*/ 1.00 * ggml_time_ms(),
|
||||
/*.t_load_ms =*/ 1e-3 * ctx->t_load_us,
|
||||
/*.t_sample_ms =*/ 1e-3 * ctx->t_sample_us,
|
||||
/*.t_p_eval_ms =*/ 1e-3 * ctx->t_p_eval_us,
|
||||
/*.t_eval_ms =*/ 1e-3 * ctx->t_eval_us,
|
||||
|
||||
timings.t_end_ms = ggml_time_ms();
|
||||
timings.t_start_ms = 1e-3 * ctx->t_start_us;
|
||||
timings.load_time_ms = 1e-3 * ctx->t_load_us;
|
||||
timings.sample_time_ms = 1e-3 * ctx->t_sample_us;
|
||||
timings.prompt_eval_time_ms = 1e-3 * ctx->t_p_eval_us;
|
||||
timings.eval_time_ms = 1e-3 * ctx->t_eval_us;
|
||||
/*.n_sample =*/ std::max(1, ctx->n_sample),
|
||||
/*.n_p_eval =*/ std::max(1, ctx->n_p_eval),
|
||||
/*.n_eval =*/ std::max(1, ctx->n_eval),
|
||||
};
|
||||
|
||||
timings.n_sample = std::max(1, ctx->n_sample);
|
||||
timings.n_p_eval = std::max(1, ctx->n_p_eval);
|
||||
timings.n_eval = std::max(1, ctx->n_eval);
|
||||
|
||||
return timings;
|
||||
return result;
|
||||
}
|
||||
|
||||
void llama_print_timings(struct llama_context * ctx) {
|
||||
llama_timings timings = llama_get_timings(ctx);
|
||||
const llama_timings timings = llama_get_timings(ctx);
|
||||
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, timings.load_time_ms);
|
||||
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, timings.t_load_ms);
|
||||
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, timings.sample_time_ms, timings.n_sample, timings.sample_time_ms / timings.n_sample, 1e3 / timings.sample_time_ms * timings.n_sample);
|
||||
__func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
|
||||
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, timings.prompt_eval_time_ms, timings.n_p_eval, timings.prompt_eval_time_ms / timings.n_p_eval, 1e3 / timings.prompt_eval_time_ms * timings.n_p_eval);
|
||||
__func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
|
||||
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
|
||||
__func__, timings.eval_time_ms, timings.n_eval, timings.eval_time_ms / timings.n_eval, 1e3 / timings.eval_time_ms * timings.n_eval);
|
||||
__func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
|
||||
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
|
||||
}
|
||||
|
||||
|
10
llama.h
10
llama.h
@ -138,10 +138,10 @@ extern "C" {
|
||||
struct llama_timings {
|
||||
double t_start_ms;
|
||||
double t_end_ms;
|
||||
double load_time_ms;
|
||||
double sample_time_ms;
|
||||
double prompt_eval_time_ms;
|
||||
double eval_time_ms;
|
||||
double t_load_ms;
|
||||
double t_sample_ms;
|
||||
double t_p_eval_ms;
|
||||
double t_eval_ms;
|
||||
|
||||
int32_t n_sample;
|
||||
int32_t n_p_eval;
|
||||
@ -345,7 +345,7 @@ extern "C" {
|
||||
LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates);
|
||||
|
||||
// Performance information
|
||||
LLAMA_API llama_timings llama_get_timings(struct llama_context * ctx);
|
||||
LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
|
||||
LLAMA_API void llama_print_timings(struct llama_context * ctx);
|
||||
LLAMA_API void llama_reset_timings(struct llama_context * ctx);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user