diff --git a/src/llama.cpp b/src/llama.cpp index ffaa6f789..dcb015d12 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -17134,10 +17134,10 @@ static void llama_graph_compute( // static int llama_decode_internal( llama_context & lctx, - llama_batch batch_all) { // TODO: rename back to batch + llama_batch batch) { lctx.is_encoding = false; - const uint32_t n_tokens_all = batch_all.n_tokens; + const uint32_t n_tokens_all = batch.n_tokens; if (n_tokens_all == 0) { LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__); @@ -17148,12 +17148,12 @@ static int llama_decode_internal( const auto & hparams = model.hparams; const auto & cparams = lctx.cparams; - GGML_ASSERT((!batch_all.token && batch_all.embd) || (batch_all.token && !batch_all.embd)); // NOLINT + GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT - if (batch_all.token) { + if (batch.token) { for (uint32_t i = 0; i < n_tokens_all; ++i) { - if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= model.vocab.n_vocab) { - LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch_all.token[i]); + if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= model.vocab.n_vocab) { + LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch.token[i]); return -1; } } @@ -17184,9 +17184,9 @@ static int llama_decode_internal( lctx.embd_seq.clear(); // count outputs - if (batch_all.logits && !embd_pooled) { + if (batch.logits && !embd_pooled) { for (uint32_t i = 0; i < n_tokens_all; ++i) { - n_outputs += batch_all.logits[i] != 0; + n_outputs += batch.logits[i] != 0; } } else if (lctx.logits_all || embd_pooled) { n_outputs = n_tokens_all; @@ -17195,7 +17195,7 @@ static int llama_decode_internal( n_outputs = 1; } - lctx.sbatch.from_batch(batch_all, n_embd, + lctx.sbatch.from_batch(batch, n_embd, /* simple_split */ !kv_self.recurrent, /* logits_all */ n_outputs == n_tokens_all);