From cea1486ecf34a1c7e014a9e290eb458f5a11f876 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 24 Sep 2024 10:15:35 +0300 Subject: [PATCH] log : add CONT level for continuing previous log entry (#9610) --- common/log.cpp | 2 +- common/log.h | 2 ++ examples/infill/infill.cpp | 14 +++++++------- examples/main/main.cpp | 28 ++++++++++++++-------------- ggml/include/ggml.h | 1 + src/llama-impl.h | 2 ++ src/llama.cpp | 4 ++-- 7 files changed, 29 insertions(+), 24 deletions(-) diff --git a/common/log.cpp b/common/log.cpp index 2825a227e..5a844ed59 100644 --- a/common/log.cpp +++ b/common/log.cpp @@ -82,7 +82,7 @@ struct gpt_log_entry { } } - if (level != GGML_LOG_LEVEL_NONE && prefix) { + if (level != GGML_LOG_LEVEL_NONE && level != GGML_LOG_LEVEL_CONT && prefix) { if (timestamp) { // [M.s.ms.us] fprintf(fcur, "%s%d.%02d.%03d.%03d%s ", diff --git a/common/log.h b/common/log.h index d13f72d89..84f9b3ed7 100644 --- a/common/log.h +++ b/common/log.h @@ -83,8 +83,10 @@ void gpt_log_set_timestamps(struct gpt_log * log, bool timestamps); // w #define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__) #define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__) #define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__) +#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__) #define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__) #define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__) #define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__) #define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__) +#define LOG_CNTV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_CONT, verbosity, __VA_ARGS__) diff --git a/examples/infill/infill.cpp b/examples/infill/infill.cpp index 35607276a..d52425ae6 100644 --- a/examples/infill/infill.cpp +++ b/examples/infill/infill.cpp @@ -263,9 +263,9 @@ int main(int argc, char ** argv) { if (params.n_keep > 0) { LOG_INF("%s: static prompt based on n_keep: '", __func__); for (int i = 0; i < params.n_keep; i++) { - LOG("%s", llama_token_to_piece(ctx, embd_inp[i]).c_str()); + LOG_CNT("%s", llama_token_to_piece(ctx, embd_inp[i]).c_str()); } - LOG("'\n"); + LOG_CNT("'\n"); } LOG_INF("\n"); } @@ -306,8 +306,8 @@ int main(int argc, char ** argv) { LOG_INF("generate: n_ctx = %d, n_batch = %d, n_predict = %d, n_keep = %d\n", n_ctx, params.n_batch, params.n_predict, params.n_keep); - LOG("\n"); - LOG("\n##### Infill mode #####\n\n"); + LOG_INF("\n"); + LOG_INF("\n##### Infill mode #####\n\n"); if (params.interactive) { const char *control_message; if (params.multiline_input) { @@ -318,11 +318,11 @@ int main(int argc, char ** argv) { " - To return control without starting a new line, end your input with '/'.\n" " - If you want to submit another line, end your input with '\\'.\n"; } - LOG("== Running in interactive mode. ==\n"); + LOG_INF("== Running in interactive mode. ==\n"); #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) - LOG( " - Press Ctrl+C to interject at any time.\n"); + LOG_INF( " - Press Ctrl+C to interject at any time.\n"); #endif - LOG( "%s\n", control_message); + LOG_INF( "%s\n", control_message); is_interacting = params.interactive_first; } diff --git a/examples/main/main.cpp b/examples/main/main.cpp index c3041f1fb..6bbb1e13e 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -385,9 +385,9 @@ int main(int argc, char ** argv) { if (params.n_keep > add_bos) { LOG_INF("%s: static prompt based on n_keep: '", __func__); for (int i = 0; i < params.n_keep; i++) { - LOG("%s", llama_token_to_piece(ctx, embd_inp[i]).c_str()); + LOG_CNT("%s", llama_token_to_piece(ctx, embd_inp[i]).c_str()); } - LOG("'\n"); + LOG_CNT("'\n"); } LOG_INF("\n"); } @@ -409,40 +409,40 @@ int main(int argc, char ** argv) { } if (params.interactive) { - LOG("%s: interactive mode on.\n", __func__); + LOG_INF("%s: interactive mode on.\n", __func__); if (!params.antiprompt.empty()) { for (const auto & antiprompt : params.antiprompt) { - LOG("Reverse prompt: '%s'\n", antiprompt.c_str()); + LOG_INF("Reverse prompt: '%s'\n", antiprompt.c_str()); if (params.verbose_prompt) { auto tmp = ::llama_tokenize(ctx, antiprompt, false, true); for (int i = 0; i < (int) tmp.size(); i++) { - LOG("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); + LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); } } } } if (params.input_prefix_bos) { - LOG("Input prefix with BOS\n"); + LOG_INF("Input prefix with BOS\n"); } if (!params.input_prefix.empty()) { - LOG("Input prefix: '%s'\n", params.input_prefix.c_str()); + LOG_INF("Input prefix: '%s'\n", params.input_prefix.c_str()); if (params.verbose_prompt) { auto tmp = ::llama_tokenize(ctx, params.input_prefix, true, true); for (int i = 0; i < (int) tmp.size(); i++) { - LOG("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); + LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); } } } if (!params.input_suffix.empty()) { - LOG("Input suffix: '%s'\n", params.input_suffix.c_str()); + LOG_INF("Input suffix: '%s'\n", params.input_suffix.c_str()); if (params.verbose_prompt) { auto tmp = ::llama_tokenize(ctx, params.input_suffix, false, true); for (int i = 0; i < (int) tmp.size(); i++) { - LOG("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); + LOG_INF("%6d -> '%s'\n", tmp[i], llama_token_to_piece(ctx, tmp[i]).c_str()); } } } @@ -474,7 +474,7 @@ int main(int argc, char ** argv) { //GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * grp_attn_n"); // NOLINT LOG_INF("self-extend: n_ctx_train = %d, grp_attn_n = %d, grp_attn_w = %d\n", n_ctx_train, ga_n, ga_w); } - LOG("\n"); + LOG_INF("\n"); if (params.interactive) { const char * control_message; @@ -486,11 +486,11 @@ int main(int argc, char ** argv) { " - To return control without starting a new line, end your input with '/'.\n" " - If you want to submit another line, end your input with '\\'.\n"; } - LOG("== Running in interactive mode. ==\n"); + LOG_INF("== Running in interactive mode. ==\n"); #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) - LOG( " - Press Ctrl+C to interject at any time.\n"); + LOG_INF( " - Press Ctrl+C to interject at any time.\n"); #endif - LOG( "%s\n", control_message); + LOG_INF( "%s\n", control_message); is_interacting = params.interactive_first; } diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 2035001e9..d6c45c948 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -570,6 +570,7 @@ extern "C" { GGML_LOG_LEVEL_WARN = 2, GGML_LOG_LEVEL_ERROR = 3, GGML_LOG_LEVEL_DEBUG = 4, + GGML_LOG_LEVEL_CONT = 5, // continue previous log }; // this tensor... diff --git a/src/llama-impl.h b/src/llama-impl.h index 2bde75ec1..70f16b61c 100644 --- a/src/llama-impl.h +++ b/src/llama-impl.h @@ -28,6 +28,8 @@ void llama_log_callback_default(ggml_log_level level, const char * text, void * #define LLAMA_LOG_INFO(...) llama_log_internal(GGML_LOG_LEVEL_INFO , __VA_ARGS__) #define LLAMA_LOG_WARN(...) llama_log_internal(GGML_LOG_LEVEL_WARN , __VA_ARGS__) #define LLAMA_LOG_ERROR(...) llama_log_internal(GGML_LOG_LEVEL_ERROR, __VA_ARGS__) +#define LLAMA_LOG_DEBUG(...) llama_log_internal(GGML_LOG_LEVEL_DEBUG, __VA_ARGS__) +#define LLAMA_LOG_CONT(...) llama_log_internal(GGML_LOG_LEVEL_CONT , __VA_ARGS__) // // helpers diff --git a/src/llama.cpp b/src/llama.cpp index e5e0d1a66..c1ba2b301 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -18671,9 +18671,9 @@ struct llama_model * llama_load_model_from_file( unsigned percentage = (unsigned) (100 * progress); while (percentage > *cur_percentage_p) { *cur_percentage_p = percentage; - LLAMA_LOG("."); + LLAMA_LOG_CONT("."); if (percentage >= 100) { - LLAMA_LOG("\n"); + LLAMA_LOG_CONT("\n"); } } return true;