mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
Update README and comments for standalone perplexity tool (#525)
This commit is contained in:
parent
7a87d31f4f
commit
b391579db9
@ -248,7 +248,7 @@ cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach.
|
|||||||
|
|
||||||
### Perplexity (Measuring model quality)
|
### Perplexity (Measuring model quality)
|
||||||
|
|
||||||
You can pass `--perplexity` as a command line option to measure perplexity over the given prompt. For more background,
|
You can use the `perplexity` example to measure perplexity over the given prompt. For more background,
|
||||||
see https://huggingface.co/docs/transformers/perplexity. However, in general, lower perplexity is better for LLMs.
|
see https://huggingface.co/docs/transformers/perplexity. However, in general, lower perplexity is better for LLMs.
|
||||||
|
|
||||||
#### Latest measurements
|
#### Latest measurements
|
||||||
@ -271,10 +271,10 @@ Perplexity - model options
|
|||||||
#### How to run
|
#### How to run
|
||||||
|
|
||||||
1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
||||||
2. Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
2. Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
||||||
3. Output:
|
3. Output:
|
||||||
```
|
```
|
||||||
Calculating perplexity over 655 chunks
|
perplexity : calculating perplexity over 655 chunks
|
||||||
24.43 seconds per pass - ETA 4.45 hours
|
24.43 seconds per pass - ETA 4.45 hours
|
||||||
[1]4.5970,[2]5.1807,[3]6.0382,...
|
[1]4.5970,[2]5.1807,[3]6.0382,...
|
||||||
```
|
```
|
||||||
|
@ -19,7 +19,7 @@ std::vector<double> softmax(const std::vector<float>& logits) {
|
|||||||
|
|
||||||
void perplexity(llama_context * ctx, const gpt_params & params) {
|
void perplexity(llama_context * ctx, const gpt_params & params) {
|
||||||
// Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
// Download: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research
|
||||||
// Run `./main --perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
// Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw`
|
||||||
// Output: `perplexity: 13.5106 [114/114]`
|
// Output: `perplexity: 13.5106 [114/114]`
|
||||||
auto tokens = ::llama_tokenize(ctx, params.prompt, true);
|
auto tokens = ::llama_tokenize(ctx, params.prompt, true);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user