llama : uniform variable names + struct init

This commit is contained in:
Georgi Gerganov 2023-07-05 23:15:54 +03:00
parent 4fe95c6985
commit 26cc1bd7a2
No known key found for this signature in database
GPG Key ID: 449E073F9DC10735
3 changed files with 29 additions and 29 deletions

View File

@ -915,14 +915,14 @@ static json format_timings(llama_server_context &llama)
return json{ return json{
{"prompt_n", timings.n_eval}, {"prompt_n", timings.n_eval},
{"prompt_ms", timings.prompt_eval_time_ms}, {"prompt_ms", timings.t_p_eval_ms},
{"prompt_per_token_ms", timings.prompt_eval_time_ms / timings.n_p_eval}, {"prompt_per_token_ms", timings.t_p_eval_ms / timings.n_p_eval},
{"prompt_per_second", 1e3 / timings.prompt_eval_time_ms * timings.n_p_eval}, {"prompt_per_second", 1e3 / timings.t_p_eval_ms * timings.n_p_eval},
{"predicted_n", timings.n_eval}, {"predicted_n", timings.n_eval},
{"predicted_ms", timings.eval_time_ms}, {"predicted_ms", timings.t_eval_ms},
{"predicted_per_token_ms", timings.eval_time_ms / timings.n_eval}, {"predicted_per_token_ms", timings.t_eval_ms / timings.n_eval},
{"predicted_per_second", 1e3 / timings.eval_time_ms * timings.n_eval}, {"predicted_per_second", 1e3 / timings.t_eval_ms * timings.n_eval},
}; };
} }

View File

@ -3480,34 +3480,34 @@ llama_token llama_token_nl() {
return 13; return 13;
} }
llama_timings llama_get_timings(struct llama_context * ctx) { struct llama_timings llama_get_timings(struct llama_context * ctx) {
llama_timings timings; struct llama_timings result = {
/*.t_start_ms =*/ 1e-3 * ctx->t_start_us,
/*.t_end_ms =*/ 1.00 * ggml_time_ms(),
/*.t_load_ms =*/ 1e-3 * ctx->t_load_us,
/*.t_sample_ms =*/ 1e-3 * ctx->t_sample_us,
/*.t_p_eval_ms =*/ 1e-3 * ctx->t_p_eval_us,
/*.t_eval_ms =*/ 1e-3 * ctx->t_eval_us,
timings.t_end_ms = ggml_time_ms(); /*.n_sample =*/ std::max(1, ctx->n_sample),
timings.t_start_ms = 1e-3 * ctx->t_start_us; /*.n_p_eval =*/ std::max(1, ctx->n_p_eval),
timings.load_time_ms = 1e-3 * ctx->t_load_us; /*.n_eval =*/ std::max(1, ctx->n_eval),
timings.sample_time_ms = 1e-3 * ctx->t_sample_us; };
timings.prompt_eval_time_ms = 1e-3 * ctx->t_p_eval_us;
timings.eval_time_ms = 1e-3 * ctx->t_eval_us;
timings.n_sample = std::max(1, ctx->n_sample); return result;
timings.n_p_eval = std::max(1, ctx->n_p_eval);
timings.n_eval = std::max(1, ctx->n_eval);
return timings;
} }
void llama_print_timings(struct llama_context * ctx) { void llama_print_timings(struct llama_context * ctx) {
llama_timings timings = llama_get_timings(ctx); const llama_timings timings = llama_get_timings(ctx);
fprintf(stderr, "\n"); fprintf(stderr, "\n");
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, timings.load_time_ms); fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, timings.t_load_ms);
fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.sample_time_ms, timings.n_sample, timings.sample_time_ms / timings.n_sample, 1e3 / timings.sample_time_ms * timings.n_sample); __func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample);
fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.prompt_eval_time_ms, timings.n_p_eval, timings.prompt_eval_time_ms / timings.n_p_eval, 1e3 / timings.prompt_eval_time_ms * timings.n_p_eval); __func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval);
fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
__func__, timings.eval_time_ms, timings.n_eval, timings.eval_time_ms / timings.n_eval, 1e3 / timings.eval_time_ms * timings.n_eval); __func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval);
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms));
} }

10
llama.h
View File

@ -138,10 +138,10 @@ extern "C" {
struct llama_timings { struct llama_timings {
double t_start_ms; double t_start_ms;
double t_end_ms; double t_end_ms;
double load_time_ms; double t_load_ms;
double sample_time_ms; double t_sample_ms;
double prompt_eval_time_ms; double t_p_eval_ms;
double eval_time_ms; double t_eval_ms;
int32_t n_sample; int32_t n_sample;
int32_t n_p_eval; int32_t n_p_eval;
@ -345,7 +345,7 @@ extern "C" {
LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates); LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates);
// Performance information // Performance information
LLAMA_API llama_timings llama_get_timings(struct llama_context * ctx); LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
LLAMA_API void llama_print_timings(struct llama_context * ctx); LLAMA_API void llama_print_timings(struct llama_context * ctx);
LLAMA_API void llama_reset_timings(struct llama_context * ctx); LLAMA_API void llama_reset_timings(struct llama_context * ctx);