mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
main : evaluate tokens in batches after swapping context (#1014)
* examples : evaluate tokens in batches after swapping context * Update examples/main/main.cpp --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
8687c1f258
commit
9411288271
@ -264,7 +264,7 @@ int main(int argc, char ** argv) {
|
|||||||
// infinite text generation via context swapping
|
// infinite text generation via context swapping
|
||||||
// if we run out of context:
|
// if we run out of context:
|
||||||
// - take the n_keep first tokens from the original prompt (via n_past)
|
// - take the n_keep first tokens from the original prompt (via n_past)
|
||||||
// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in a batch
|
// - take half of the last (n_ctx - n_keep) tokens and recompute the logits in batches
|
||||||
if (n_past + (int) embd.size() > n_ctx) {
|
if (n_past + (int) embd.size() > n_ctx) {
|
||||||
const int n_left = n_past - params.n_keep;
|
const int n_left = n_past - params.n_keep;
|
||||||
|
|
||||||
@ -282,13 +282,21 @@ int main(int argc, char ** argv) {
|
|||||||
//printf("\n---\n");
|
//printf("\n---\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (llama_eval(ctx, embd.data(), embd.size(), n_past, params.n_threads)) {
|
// evaluate tokens in batches
|
||||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
// embd is typically prepared beforehand to fit within a batch, but not always
|
||||||
return 1;
|
for (int i = 0; i < (int) embd.size(); i += params.n_batch) {
|
||||||
|
int n_eval = (int) embd.size() - i;
|
||||||
|
if (n_eval > params.n_batch) {
|
||||||
|
n_eval = params.n_batch;
|
||||||
|
}
|
||||||
|
if (llama_eval(ctx, &embd[i], n_eval, n_past, params.n_threads)) {
|
||||||
|
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
n_past += n_eval;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
n_past += embd.size();
|
|
||||||
embd.clear();
|
embd.clear();
|
||||||
|
|
||||||
if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
|
if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
|
||||||
|
Loading…
Reference in New Issue
Block a user