Generate graph plots
diff --git a/llama.cpp b/llama.cpp
index 3413288..7578bfa 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2311,7 +2311,7 @@ static struct ggml_cgraph * llm_build_llama(
}
ggml_set_name(KQ_scale, "1/sqrt(n_embd_head)");
- for (int il = 0; il < n_layer; ++il) {
+ for (int il = 0; il < 1; ++il) {
ggml_format_name(inpL, "layer_inp_%d", il);
offload_func_t offload_func = llama_nop;
@@ -2993,9 +2993,10 @@ static bool llama_eval_internal(
#endif
// plot the computation graph in dot format (for debugging purposes)
- //if (n_past%100 == 0) {
- // ggml_graph_dump_dot(gf, NULL, "llama.dot");
- //}
+ //if (N == 7) {
+ if (n_past%45 == 0) {
+ ggml_graph_dump_dot(gf, NULL, "llama.dot");
+ }
// extract logits
{
Note: n_past
is now replaced with batch.pos[]
- LLaMAv2 7B,
n_past == 45
, n_batch == 1
- LLaMAv2 7B,
n_past == 0
, n_batch == 7
- LLaMAv2 7B,
n_past == 4
, n_batch == 3