mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-01 00:39:00 +01:00
examples : utilize new llama_get_logits_ith()
This commit is contained in:
parent
4c72ab13b2
commit
d008733e6b
@ -905,7 +905,7 @@ llama_token llama_sample_token(
|
|||||||
|
|
||||||
llama_token id = 0;
|
llama_token id = 0;
|
||||||
|
|
||||||
float * logits = llama_get_logits(ctx) + idx * n_vocab;
|
float * logits = llama_get_logits_ith(ctx, idx);
|
||||||
|
|
||||||
// Apply params.logit_bias map
|
// Apply params.logit_bias map
|
||||||
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
|
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
|
||||||
|
@ -183,7 +183,7 @@ std::string llama_detokenize_bpe(
|
|||||||
// - ctx_guidance: context to use for classifier-free guidance, ignore if NULL
|
// - ctx_guidance: context to use for classifier-free guidance, ignore if NULL
|
||||||
// - grammar: grammar to use for sampling, ignore if NULL
|
// - grammar: grammar to use for sampling, ignore if NULL
|
||||||
// - last_tokens: needed for repetition penalty, ignore if empty
|
// - last_tokens: needed for repetition penalty, ignore if empty
|
||||||
// - idx: sample from llama_get_logits(ctx) + idx * n_vocab
|
// - idx: sample from llama_get_logits_ith(ctx, idx)
|
||||||
//
|
//
|
||||||
// returns:
|
// returns:
|
||||||
// - token: sampled token
|
// - token: sampled token
|
||||||
|
@ -150,7 +150,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
auto n_vocab = llama_n_vocab(ctx);
|
auto n_vocab = llama_n_vocab(ctx);
|
||||||
auto logits = llama_get_logits(ctx) + i_batch[i] * n_vocab;
|
auto logits = llama_get_logits_ith(ctx, i_batch[i]);
|
||||||
|
|
||||||
std::vector<llama_token_data> candidates;
|
std::vector<llama_token_data> candidates;
|
||||||
candidates.reserve(n_vocab);
|
candidates.reserve(n_vocab);
|
||||||
|
Loading…
Reference in New Issue
Block a user