mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-05 18:44:51 +01:00
main : inject reverse prompt after EOS + update examples/chat.sh
This commit is contained in:
parent
8af1991e2a
commit
e3da126f2a
@ -11,6 +11,8 @@ cd ..
|
|||||||
#
|
#
|
||||||
# "--keep 48" is based on the contents of prompts/chat-with-bob.txt
|
# "--keep 48" is based on the contents of prompts/chat-with-bob.txt
|
||||||
#
|
#
|
||||||
./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \
|
./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n -1 --keep 48 \
|
||||||
--repeat_penalty 1.0 --color -i \
|
--repeat_penalty 1.0 --color \
|
||||||
-r "User:" -f prompts/chat-with-bob.txt
|
-i --interactive-first \
|
||||||
|
-r "User:" --in-prefix " " \
|
||||||
|
-f prompts/chat-with-bob.txt
|
||||||
|
@ -634,9 +634,14 @@ int main(int argc, char ** argv) {
|
|||||||
llama_grammar_accept_token(ctx, grammar, id);
|
llama_grammar_accept_token(ctx, grammar, id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// replace end of text token with newline token when in interactive mode
|
// replace end of text token with newline token and inject reverse prompt when in interactive mode
|
||||||
if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) {
|
if (id == llama_token_eos() && params.interactive && !params.instruct && !params.input_prefix_bos) {
|
||||||
id = llama_token_nl();
|
id = llama_token_nl();
|
||||||
|
if (params.antiprompt.size() != 0) {
|
||||||
|
// tokenize and inject first reverse prompt
|
||||||
|
const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false);
|
||||||
|
embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
last_n_tokens.erase(last_n_tokens.begin());
|
last_n_tokens.erase(last_n_tokens.begin());
|
||||||
|
Loading…
Reference in New Issue
Block a user