diff --git a/ci/run.sh b/ci/run.sh
index 7d241ecc0..e06778219 100755
--- a/ci/run.sh
+++ b/ci/run.sh
@@ -1,4 +1,4 @@
-#/bin/bash
+#!/bin/bash
#
# sample usage:
#
@@ -751,7 +751,8 @@ function gg_run_rerank_tiny {
model_f16="${path_models}/ggml-model-f16.gguf"
- (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
+ # for this model, the SEP token is ""
+ (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
# sample output
# rerank score 0: 0.029
@@ -774,7 +775,7 @@ function gg_run_rerank_tiny {
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
- check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.15" | tee -a $OUT/${ci}-rk-f16.log
+ check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
set +e
}
diff --git a/common/common.cpp b/common/common.cpp
index a0611f3d1..29df16c95 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -838,6 +838,31 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
return iparams;
}
+ if (params.reranking) {
+ bool ok = true;
+
+ if (llama_token_bos(model) == LLAMA_TOKEN_NULL) {
+ LOG_WRN("%s: warning: model does not have a BOS token, reranking will not work\n", __func__);
+ ok = false;
+ }
+
+ if (llama_token_eos(model) == LLAMA_TOKEN_NULL) {
+ LOG_WRN("%s: warning: model does not have an EOS token, reranking will not work\n", __func__);
+ ok = false;
+ }
+
+ if (llama_token_sep(model) == LLAMA_TOKEN_NULL) {
+ LOG_WRN("%s: warning: model does not have a SEP token, reranking will not work\n", __func__);
+ ok = false;
+ }
+
+ if (!ok) {
+ llama_free_model(model);
+
+ return iparams;
+ }
+ }
+
auto cparams = llama_context_params_from_gpt_params(params);
llama_context * lctx = llama_new_context_with_model(model, cparams);
@@ -855,6 +880,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
if (cvec.n_embd == -1) {
llama_free(lctx);
llama_free_model(model);
+
return iparams;
}
@@ -867,6 +893,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
if (err) {
llama_free(lctx);
llama_free_model(model);
+
return iparams;
}
}
@@ -889,7 +916,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
llama_lora_adapters_apply(lctx, iparams.lora_adapters);
}
- if (params.sparams.ignore_eos && llama_token_eos(model) == -1) {
+ if (params.sparams.ignore_eos && llama_token_eos(model) == LLAMA_TOKEN_NULL) {
LOG_WRN("%s: warning: model does not have an EOS token, ignoring --ignore-eos\n", __func__);
params.sparams.ignore_eos = false;
}
@@ -930,6 +957,7 @@ struct llama_init_result llama_init_from_gpt_params(gpt_params & params) {
iparams.model = model;
iparams.context = lctx;
+
return iparams;
}
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index f343cc252..13e54e501 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2027,7 +2027,7 @@ struct server_context {
continue;
}
- // prompt: querydoc
+ // prompt: [BOS]query[EOS][SEP]doc[EOS]
prompt_tokens.clear();
prompt_tokens.push_back(llama_token_bos(model));
{
@@ -2035,7 +2035,7 @@ struct server_context {
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());
}
prompt_tokens.push_back(llama_token_eos(model));
- prompt_tokens.push_back(llama_token_bos(model));
+ prompt_tokens.push_back(llama_token_sep(model));
{
const auto part = tokenize(slot.prompt[1], false);
prompt_tokens.insert(prompt_tokens.end(), part.begin(), part.end());
diff --git a/src/llama-vocab.h b/src/llama-vocab.h
index 069bdc423..28bad9135 100644
--- a/src/llama-vocab.h
+++ b/src/llama-vocab.h
@@ -40,17 +40,17 @@ struct llama_vocab {
id special_bos_id = 1;
id special_eos_id = 2;
id special_unk_id = 0;
- id special_sep_id = -1;
- id special_pad_id = -1;
- id special_cls_id = -1;
- id special_mask_id = -1;
+ id special_sep_id = LLAMA_TOKEN_NULL;
+ id special_pad_id = LLAMA_TOKEN_NULL;
+ id special_cls_id = LLAMA_TOKEN_NULL;
+ id special_mask_id = LLAMA_TOKEN_NULL;
id linefeed_id = 13;
- id special_prefix_id = -1;
- id special_suffix_id = -1;
- id special_middle_id = -1;
- id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
- id special_eom_id = -1;
+ id special_prefix_id = LLAMA_TOKEN_NULL;
+ id special_suffix_id = LLAMA_TOKEN_NULL;
+ id special_middle_id = LLAMA_TOKEN_NULL;
+ id special_eot_id = LLAMA_TOKEN_NULL; // TODO: move above after "eos_id", and here add "file separator" token
+ id special_eom_id = LLAMA_TOKEN_NULL;
// set of all tokens that cause "end of generation"
std::set special_eog_ids;
diff --git a/src/llama.cpp b/src/llama.cpp
index 3443b0689..bf6fd9277 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -2412,7 +2412,7 @@ struct llama_hparams {
// needed by encoder-decoder models (e.g. T5, FLAN-T5)
// ref: https://github.com/ggerganov/llama.cpp/pull/8141
- llama_token dec_start_token_id = -1;
+ llama_token dec_start_token_id = LLAMA_TOKEN_NULL;
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_NONE;
enum llama_rope_type rope_type = LLAMA_ROPE_TYPE_NONE;