diff --git a/gptneox-common.h b/gptneox-common.h deleted file mode 100644 index 60e5650c1..000000000 --- a/gptneox-common.h +++ /dev/null @@ -1,125 +0,0 @@ -// Various helper functions and utilities - -#pragma once - -#include -#include -#include -#include -#include - -// -// CLI argument parsing -// - -struct gpt_params { - int32_t seed = -1; // RNG seed - int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency()); - int32_t n_predict = 200; // new tokens to predict - int32_t n_batch = 8; // batch size for prompt processing - - // sampling parameters - int32_t top_k = 40; - float top_p = 0.9f; - float temp = 0.9f; - int32_t repeat_last_n = 64; - float repeat_penalty = 1.00f; - - std::string model = "models/gpt-2-117M/ggml-model.bin"; // model path - std::string prompt = ""; - std::string token_test = ""; - - bool interactive = false; - int32_t interactive_port = -1; - - int32_t n_gpu_layers = 0; -}; - -bool gpt_params_parse(int argc, char ** argv, gpt_params & params); - -void gpt_print_usage(int argc, char ** argv, const gpt_params & params); - -std::string gpt_random_prompt(std::mt19937 & rng); - -// -// Vocab utils -// - -std::string trim(const std::string & s); - -std::string replace( - const std::string & s, - const std::string & from, - const std::string & to); - -struct gpt_vocab { - using id = int32_t; - using token = std::string; - - std::map token_to_id; - std::map id_to_token; - std::vector special_tokens; - - void add_special_token(const std::string & token); -}; - -// poor-man's JSON parsing -std::map json_parse(const std::string & fname); - -std::string convert_to_utf8(const std::wstring & input); - -std::wstring convert_to_wstring(const std::string & input); - -void gpt_split_words(std::string str, std::vector& words); - -// split text into tokens -// -// ref: https://github.com/openai/gpt-2/blob/a74da5d99abaaba920de8131d64da2862a8f213b/src/encoder.py#L53 -// -// Regex (Python): -// r"""'s|'t|'re|'ve|'m|'ll|'d| ?\p{L}+| ?\p{N}+| ?[^\s\p{L}\p{N}]+|\s+(?!\S)|\s+""" -// -// Regex (C++): -// R"('s|'t|'re|'ve|'m|'ll|'d| ?[[:alpha:]]+| ?[[:digit:]]+| ?[^\s[:alpha:][:digit:]]+|\s+(?!\S)|\s+)" -// -std::vector gpt_tokenize(const gpt_vocab & vocab, const std::string & text); - -// test outputs of gpt_tokenize -// -// - compare with tokens generated by the huggingface tokenizer -// - test cases are chosen based on the model's main language (under 'prompt' directory) -// - if all sentences are tokenized identically, print 'All tests passed.' -// - otherwise, print sentence, huggingface tokens, ggml tokens -// -void test_gpt_tokenizer(gpt_vocab & vocab, const std::string & fpath_test); - -// load the tokens from encoder.json -bool gpt_vocab_init(const std::string & fname, gpt_vocab & vocab); - -// sample next token given probabilities for each embedding -// -// - consider only the top K tokens -// - from them, consider only the top tokens with cumulative probability > P -// -// TODO: not sure if this implementation is correct -// TODO: temperature is not implemented -// -gpt_vocab::id gpt_sample_top_k_top_p( - const gpt_vocab & vocab, - const float * logits, - int top_k, - double top_p, - double temp, - std::mt19937 & rng); - -gpt_vocab::id gpt_sample_top_k_top_p_repeat( - const gpt_vocab & vocab, - const float * logits, - const int32_t * last_n_tokens_data, - size_t last_n_tokens_data_size, - int top_k, - double top_p, - double temp, - int repeat_last_n, - float repeat_penalty, - std::mt19937 & rng);