mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 06:39:25 +01:00
lookup: fibonacci hashing, fix crashes (#8548)
This commit is contained in:
parent
b3283448ce
commit
e02b597be3
@ -37,11 +37,18 @@ struct llama_ngram {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct llama_token_hash_function {
|
||||||
|
size_t operator()(const llama_token token) const {
|
||||||
|
// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
|
||||||
|
return token * 11400714819323198485llu;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
struct llama_ngram_hash_function {
|
struct llama_ngram_hash_function {
|
||||||
size_t operator()(const llama_ngram & ngram) const {
|
size_t operator()(const llama_ngram & ngram) const {
|
||||||
size_t hash = 0;
|
size_t hash = llama_token_hash_function{}(ngram.tokens[0]);
|
||||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
|
||||||
hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
|
hash ^= llama_token_hash_function{}(ngram.tokens[i]);
|
||||||
}
|
}
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
|
@ -31,7 +31,6 @@ int main(int argc, char ** argv){
|
|||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
|
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
std::vector<llama_token> inp;
|
std::vector<llama_token> inp;
|
||||||
@ -65,7 +64,7 @@ int main(int argc, char ** argv){
|
|||||||
}
|
}
|
||||||
|
|
||||||
const int n_input = inp.size();
|
const int n_input = inp.size();
|
||||||
const int n_ctx = params.n_ctx;
|
const int n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
int n_drafted = 0;
|
int n_drafted = 0;
|
||||||
int n_accept = 0;
|
int n_accept = 0;
|
||||||
|
@ -39,7 +39,6 @@ int main(int argc, char ** argv){
|
|||||||
|
|
||||||
// load the model
|
// load the model
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
GGML_ASSERT(llama_n_vocab(model) < (1 << 16));
|
|
||||||
|
|
||||||
// tokenize the prompt
|
// tokenize the prompt
|
||||||
std::vector<llama_token> inp;
|
std::vector<llama_token> inp;
|
||||||
|
Loading…
Reference in New Issue
Block a user