Work around for recalculating logits in cached prompts (Fixes #1585) (#1609)

* Work around for recalculating logits in cached prompts
2025-01-26 20:22:25 +01:00 · 2023-05-29 05:13:40 -07:00 · 2023-05-29 05:13:40 -07:00 · 248367605e
commit 248367605e
parent 0e730dd23b
1 changed files with 6 additions and 0 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@ -360,6 +360,12 @@ int main(int argc, char ** argv) {
                    }
                }
                if (i > 0) {
+                    // check if we've used up all the prompt but not all cached tokens
+                    if (embd.size() == i && n_session_consumed < (int) session_tokens.size()) {
+                        // force revaluation of the last token to recalculate logits
+                        i--;
+                        n_past--;
+                    }
                    embd.erase(embd.begin(), embd.begin() + i);
                }
            }