mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 13:27:21 +01:00
fix perplexity after c-api refactor (#390)
* preallocate a buffer of fitting size for tokenization (utils.cpp) * don't create a new std::string (especially here, where it's usually large)
This commit is contained in:
parent
40ea807a97
commit
56e659a0b2
2
main.cpp
2
main.cpp
@ -85,7 +85,7 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
|
|||||||
// Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
// Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
||||||
// Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
// Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
||||||
// Output: `perplexity: 13.5106 [114/114]`
|
// Output: `perplexity: 13.5106 [114/114]`
|
||||||
auto tokens = ::llama_tokenize(ctx, params.prompt.c_str(), true);
|
auto tokens = ::llama_tokenize(ctx, params.prompt, true);
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
double nll = 0.0;
|
double nll = 0.0;
|
||||||
|
@ -146,8 +146,10 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
|
|||||||
|
|
||||||
// TODO: not great allocating this every time
|
// TODO: not great allocating this every time
|
||||||
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::string & text, bool add_bos) {
|
||||||
std::vector<llama_token> res(8096);
|
// initialize to prompt numer of chars, since n_tokens <= n_prompt_chars
|
||||||
|
std::vector<llama_token> res(text.size() + (int)add_bos);
|
||||||
int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
|
int n = llama_tokenize(ctx, text.c_str(), res.data(), res.size(), add_bos);
|
||||||
|
assert(n >= 0);
|
||||||
res.resize(n);
|
res.resize(n);
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user