mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-24 13:28:50 +01:00
perplexity : fix ETA by warming up the model with an empty run
This commit is contained in:
parent
6519e9c99c
commit
8f429fa511
@ -752,6 +752,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||||||
params.logit_bias[llama_token_eos(lctx)] = -INFINITY;
|
params.logit_bias[llama_token_eos(lctx)] = -INFINITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
LOG("warming up the model with an empty run\n");
|
||||||
|
|
||||||
|
const std::vector<llama_token> tmp = { llama_token_bos(lctx), };
|
||||||
|
llama_eval(lctx, tmp.data(), tmp.size(), 0, params.n_threads);
|
||||||
|
llama_reset_timings(lctx);
|
||||||
|
}
|
||||||
|
|
||||||
return std::make_tuple(model, lctx);
|
return std::make_tuple(model, lctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -492,14 +492,6 @@ int main(int argc, char ** argv) {
|
|||||||
std::vector<llama_token> embd;
|
std::vector<llama_token> embd;
|
||||||
std::vector<llama_token> embd_guidance;
|
std::vector<llama_token> embd_guidance;
|
||||||
|
|
||||||
{
|
|
||||||
LOG("warming up the model with an empty run\n");
|
|
||||||
|
|
||||||
const std::vector<llama_token> tmp = { llama_token_bos(ctx), };
|
|
||||||
llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
|
|
||||||
llama_reset_timings(ctx);
|
|
||||||
}
|
|
||||||
|
|
||||||
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
|
||||||
// predict
|
// predict
|
||||||
if (embd.size() > 0) {
|
if (embd.size() > 0) {
|
||||||
|
Loading…
Reference in New Issue
Block a user