llama.cpp/common/ngram-cache.h

#pragma once

#include "llama.h"

#include <unordered_map>
#include <string>
#include <vector>

#define LLAMA_NGRAM_MIN    1
#define LLAMA_NGRAM_MAX    4
#define LLAMA_NGRAM_STATIC 2

// Data structures to map n-grams to empirical token probabilities:

struct common_ngram {
    llama_token tokens[LLAMA_NGRAM_MAX];

    common_ngram() {
        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
            tokens[i] = -1;
        }
    }

    common_ngram(const llama_token * input, const int ngram_size) {
        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
            tokens[i] = i < ngram_size ? input[i] : -1;
        }
    }

    bool operator==(const common_ngram & other) const {
        for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
            if (tokens[i] != other.tokens[i]) {
                return false;
            }
        }
        return true;
    }
};

struct common_token_hash_function {
    size_t operator()(const llama_token token) const {
        // see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
        return token * 11400714819323198485llu;
    }
};

struct common_ngram_hash_function {
    size_t operator()(const common_ngram & ngram) const {
        size_t hash = common_token_hash_function{}(ngram.tokens[0]);
        for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
            hash ^= common_token_hash_function{}(ngram.tokens[i]);
        }
        return hash;
    }
};

// token -> number of times token has been seen
typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;

// n-gram -> empirical distribution of following tokens
typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;


// Update an ngram cache with tokens.
// ngram_cache:         the cache to modify.
// ngram_min/ngram_max: the min/max size of the ngrams to extract from inp_data.
// inp_data:            the token sequence with which to update ngram_cache.
// nnew:                how many new tokens have been appended to inp_data since the last call to this function.
// print_progress:      whether to print progress to stderr.
//
// In order to get correct results inp_data can ONLY BE APPENDED TO.
// Changes in the middle need a complete rebuild.
void common_ngram_cache_update(
    common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);

// Try to draft tokens from ngram caches.
// inp:                the tokens generated so far.
// draft:              the token sequence to draft. Expected to initially contain the previously sampled token.
// n_draft:            maximum number of tokens to add to draft.
// ngram_min/gram_max: the min/max size of the ngrams in nc_context and nc_dynamic.
// nc_context:         ngram cache based on current context.
// nc_dynamic:         ngram cache based on previous user generations.
// nc_static:          ngram cache generated from a large text corpus, used for validation.
void common_ngram_cache_draft(
    std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
    common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);

// Save an ngram cache to a file.
// ngram_cache: the ngram cache to save.
// filename:    the path under which to save the ngram cache.
void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);

// Load an ngram cache saved with common_ngram_cache_save.
// filename: the path from which to load the ngram cache.
// returns:  an ngram cache containing the information saved to filename.
common_ngram_cache common_ngram_cache_load(std::string & filename);

// Merge two ngram caches.
// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
// ngram_cache_add:    the ngram cache to add to ngram_cache_target.
void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`#pragma once`

			`#include "llama.h"`

			`#include <unordered_map>`
			`#include <string>`
			`#include <vector>`

			`#define LLAMA_NGRAM_MIN 1`
			`#define LLAMA_NGRAM_MAX 4`
			`#define LLAMA_NGRAM_STATIC 2`

			`// Data structures to map n-grams to empirical token probabilities:`

common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`struct common_ngram {`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`llama_token tokens[LLAMA_NGRAM_MAX];`

common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`common_ngram() {`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {`
			`tokens[i] = -1;`
			`}`
			`}`

common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`common_ngram(const llama_token * input, const int ngram_size) {`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {`
			`tokens[i] = i < ngram_size ? input[i] : -1;`
			`}`
			`}`

common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`bool operator==(const common_ngram & other) const {`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {`
			`if (tokens[i] != other.tokens[i]) {`
			`return false;`
			`}`
			`}`
			`return true;`
			`}`
			`};`

common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`struct common_token_hash_function {`
lookup: fibonacci hashing, fix crashes (#8548) 2024-07-17 23:35:44 +02:00			`size_t operator()(const llama_token token) const {`
			`// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/`
			`return token * 11400714819323198485llu;`
			`}`
			`};`

common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`struct common_ngram_hash_function {`
			`size_t operator()(const common_ngram & ngram) const {`
			`size_t hash = common_token_hash_function{}(ngram.tokens[0]);`
lookup: fibonacci hashing, fix crashes (#8548) 2024-07-17 23:35:44 +02:00			`for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`hash ^= common_token_hash_function{}(ngram.tokens[i]);`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`}`
			`return hash;`
			`}`
			`};`

			`// token -> number of times token has been seen`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00
			`// n-gram -> empirical distribution of following tokens`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00

			`// Update an ngram cache with tokens.`
			`// ngram_cache: the cache to modify.`
			`// ngram_min/ngram_max: the min/max size of the ngrams to extract from inp_data.`
			`// inp_data: the token sequence with which to update ngram_cache.`
			`// nnew: how many new tokens have been appended to inp_data since the last call to this function.`
			`// print_progress: whether to print progress to stderr.`
			`//`
			`// In order to get correct results inp_data can ONLY BE APPENDED TO.`
			`// Changes in the middle need a complete rebuild.`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`void common_ngram_cache_update(`
			`common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00
			`// Try to draft tokens from ngram caches.`
			`// inp: the tokens generated so far.`
			`// draft: the token sequence to draft. Expected to initially contain the previously sampled token.`
			`// n_draft: maximum number of tokens to add to draft.`
			`// ngram_min/gram_max: the min/max size of the ngrams in nc_context and nc_dynamic.`
			`// nc_context: ngram cache based on current context.`
			`// nc_dynamic: ngram cache based on previous user generations.`
			`// nc_static: ngram cache generated from a large text corpus, used for validation.`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`void common_ngram_cache_draft(`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00
			`// Save an ngram cache to a file.`
			`// ngram_cache: the ngram cache to save.`
			`// filename: the path under which to save the ngram cache.`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`// Load an ngram cache saved with common_ngram_cache_save.`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00			`// filename: the path from which to load the ngram cache.`
			`// returns: an ngram cache containing the information saved to filename.`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`common_ngram_cache common_ngram_cache_load(std::string & filename);`
lookup: complement data from context with general text statistics (#5479) * lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens * fixup! lookup: evaluation tools, use corpus/previous gens 2024-03-23 01:24:36 +01:00
			`// Merge two ngram caches.`
			`// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.`
			`// ngram_cache_add: the ngram cache to add to ngram_cache_target.`
common : use common_ prefix for common library functions (#9805) * common : use common_ prefix for common library functions --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-10-10 22:57:42 +02:00			`void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);`