Fixed bos/eos token (which is both 11 according to config.json of Falcon-7B/40B). Also: do not auto-insert a space or (b|e)os at the beginning of prompt (seems to be LLaMA-specific).

This commit is contained in:
Jan Ploski 2023-06-16 19:36:27 +02:00
parent 3778836046
commit 4a37251a18
2 changed files with 4 additions and 12 deletions

View File

@ -175,10 +175,8 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd_inp; std::vector<llama_token> embd_inp;
if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) { if (params.interactive_first || params.instruct || !params.prompt.empty() || session_tokens.empty()) {
// Add a space in front of the first character to match OG llama tokenizer behavior // Falcon does not have a dedicated bos token (bos==eos), so don't inject it here
params.prompt.insert(0, 1, ' '); embd_inp = ::falcon_tokenize(ctx, params.prompt, false);
embd_inp = ::falcon_tokenize(ctx, params.prompt, true);
} else { } else {
embd_inp = session_tokens; embd_inp = session_tokens;
} }

View File

@ -1348,12 +1348,6 @@ static bool falcon_eval_internal(
const int n_threads, const int n_threads,
const char * cgraph_fname) { const char * cgraph_fname) {
// enforce that the first token is BOS
if (n_past == 0 && tokens[0] != falcon_token_bos()) {
fprintf(stderr, "%s: first token must be BOS\n", __func__);
return false;
}
const int64_t t_start_us = ggml_time_us(); const int64_t t_start_us = ggml_time_us();
const int N = n_tokens; const int N = n_tokens;
@ -3389,11 +3383,11 @@ const char * falcon_token_to_str(const struct falcon_context * ctx, llama_token
} }
llama_token falcon_token_bos() { llama_token falcon_token_bos() {
return 1; return 11;
} }
llama_token falcon_token_eos() { llama_token falcon_token_eos() {
return 2; return 11;
} }
llama_token falcon_token_nl() { llama_token falcon_token_nl() {