mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 22:59:24 +01:00
llama : rename batch_all to batch (#8881)
This commit addresses the TODO in the code to rename the `batch_all` parameter to `batch` in `llama_decode_internal`.
This commit is contained in:
parent
17bb928080
commit
6f55bccbb8
@ -17134,10 +17134,10 @@ static void llama_graph_compute(
|
|||||||
//
|
//
|
||||||
static int llama_decode_internal(
|
static int llama_decode_internal(
|
||||||
llama_context & lctx,
|
llama_context & lctx,
|
||||||
llama_batch batch_all) { // TODO: rename back to batch
|
llama_batch batch) {
|
||||||
|
|
||||||
lctx.is_encoding = false;
|
lctx.is_encoding = false;
|
||||||
const uint32_t n_tokens_all = batch_all.n_tokens;
|
const uint32_t n_tokens_all = batch.n_tokens;
|
||||||
|
|
||||||
if (n_tokens_all == 0) {
|
if (n_tokens_all == 0) {
|
||||||
LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
|
LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__);
|
||||||
@ -17148,12 +17148,12 @@ static int llama_decode_internal(
|
|||||||
const auto & hparams = model.hparams;
|
const auto & hparams = model.hparams;
|
||||||
const auto & cparams = lctx.cparams;
|
const auto & cparams = lctx.cparams;
|
||||||
|
|
||||||
GGML_ASSERT((!batch_all.token && batch_all.embd) || (batch_all.token && !batch_all.embd)); // NOLINT
|
GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT
|
||||||
|
|
||||||
if (batch_all.token) {
|
if (batch.token) {
|
||||||
for (uint32_t i = 0; i < n_tokens_all; ++i) {
|
for (uint32_t i = 0; i < n_tokens_all; ++i) {
|
||||||
if (batch_all.token[i] < 0 || (uint32_t)batch_all.token[i] >= model.vocab.n_vocab) {
|
if (batch.token[i] < 0 || (uint32_t)batch.token[i] >= model.vocab.n_vocab) {
|
||||||
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch_all.token[i]);
|
LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch.token[i]);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -17184,9 +17184,9 @@ static int llama_decode_internal(
|
|||||||
lctx.embd_seq.clear();
|
lctx.embd_seq.clear();
|
||||||
|
|
||||||
// count outputs
|
// count outputs
|
||||||
if (batch_all.logits && !embd_pooled) {
|
if (batch.logits && !embd_pooled) {
|
||||||
for (uint32_t i = 0; i < n_tokens_all; ++i) {
|
for (uint32_t i = 0; i < n_tokens_all; ++i) {
|
||||||
n_outputs += batch_all.logits[i] != 0;
|
n_outputs += batch.logits[i] != 0;
|
||||||
}
|
}
|
||||||
} else if (lctx.logits_all || embd_pooled) {
|
} else if (lctx.logits_all || embd_pooled) {
|
||||||
n_outputs = n_tokens_all;
|
n_outputs = n_tokens_all;
|
||||||
@ -17195,7 +17195,7 @@ static int llama_decode_internal(
|
|||||||
n_outputs = 1;
|
n_outputs = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
lctx.sbatch.from_batch(batch_all, n_embd,
|
lctx.sbatch.from_batch(batch, n_embd,
|
||||||
/* simple_split */ !kv_self.recurrent,
|
/* simple_split */ !kv_self.recurrent,
|
||||||
/* logits_all */ n_outputs == n_tokens_all);
|
/* logits_all */ n_outputs == n_tokens_all);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user