mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-13 13:52:22 +01:00
server : respect --special
cli arg (#8553)
This commit is contained in:
parent
e02b597be3
commit
3807c3de04
@ -1182,7 +1182,7 @@ struct server_context {
|
||||
|
||||
bool process_token(completion_token_output & result, server_slot & slot) {
|
||||
// remember which tokens were sampled - used for repetition penalties during sampling
|
||||
const std::string token_str = llama_token_to_piece(ctx, result.tok, false);
|
||||
const std::string token_str = llama_token_to_piece(ctx, result.tok, params.special);
|
||||
slot.sampled = result.tok;
|
||||
|
||||
// search stop word and delete it
|
||||
|
Loading…
x
Reference in New Issue
Block a user