From 0fd93cdef5e583aa980b3c0d693c0d207f0787a7 Mon Sep 17 00:00:00 2001
From: Nico Bosshard <nico@bosshome.ch>
Date: Mon, 12 Aug 2024 17:13:59 +0200
Subject: [PATCH] llama : model-based max number of graph nodes calculation
 (#8970)

* llama : model-based max number of graph nodes calculation

* Update src/llama.cpp

---------

Co-authored-by: slaren <slarengh@gmail.com>
---
 src/llama.cpp | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/src/llama.cpp b/src/llama.cpp
index aaf8db496..7f2f00031 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -3575,13 +3575,8 @@ namespace GGUFMeta {
 
 using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
 
-// TODO: update when needed or think of some clever automatic way to do this
-static size_t llama_model_max_nodes(const llama_model & /*model*/) {
-    //if (model.arch == LLM_ARCH_LLAMA && model.hparams.n_layer > ??) { // llama-3 405B
-    //    return 32768;
-    //}
-
-    return 8192;
+static size_t llama_model_max_nodes(const llama_model & model) {
+    return std::max<size_t>(8192, model.tensors_by_name.size()*5);
 }
 
 struct llama_model_loader {