mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 06:10:29 +01:00
Respect the maximum number of tokens in interactive. (#298)
Co-authored-by: Johnman <johnman@github> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
50fae10d03
commit
368d0c8a9e
7
main.cpp
7
main.cpp
@ -1062,7 +1062,6 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// end of text token
|
// end of text token
|
||||||
|
|
||||||
if (embd.back() == EOS_TOKEN_ID) {
|
if (embd.back() == EOS_TOKEN_ID) {
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
@ -1071,6 +1070,12 @@ int main(int argc, char ** argv) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// In interactive mode, respect the maximum number of tokens and drop back to user input when reached.
|
||||||
|
if (params.interactive && remaining_tokens <= 0) {
|
||||||
|
remaining_tokens = params.n_predict;
|
||||||
|
is_interacting = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined (_WIN32)
|
#if defined (_WIN32)
|
||||||
|
Loading…
Reference in New Issue
Block a user