mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-23 21:17:54 +01:00
* Work around for recalculating logits in cached prompts
This commit is contained in:
parent
0e730dd23b
commit
248367605e
@ -360,6 +360,12 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
}
|
||||
if (i > 0) {
|
||||
// check if we've used up all the prompt but not all cached tokens
|
||||
if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
|
||||
// force revaluation of the last token to recalculate logits
|
||||
i--;
|
||||
n_past--;
|
||||
}
|
||||
embd.erase(embd.begin(), embd.begin() + i);
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user