mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-11 21:10:24 +01:00
common : change default parameters to pre-#1126 (#1223)
This commit is contained in:
parent
dd7eff57d8
commit
334637e43e
@ -17,7 +17,7 @@
|
|||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
int32_t seed = -1; // RNG seed
|
int32_t seed = -1; // RNG seed
|
||||||
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
||||||
int32_t n_predict = 128; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
||||||
int32_t n_ctx = 512; // context size
|
int32_t n_ctx = 512; // context size
|
||||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
@ -25,18 +25,18 @@ struct gpt_params {
|
|||||||
|
|
||||||
// sampling parameters
|
// sampling parameters
|
||||||
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||||
int32_t top_k = 0; // <= 0 to use vocab size
|
int32_t top_k = 40; // <= 0 to use vocab size
|
||||||
float top_p = 1.0f; // 1.0 = disabled
|
float top_p = 0.95f; // 1.0 = disabled
|
||||||
float tfs_z = 1.0f; // 1.0 = disabled
|
float tfs_z = 1.00f; // 1.0 = disabled
|
||||||
float typical_p = 1.0f; // 1.0 = disabled
|
float typical_p = 1.00f; // 1.0 = disabled
|
||||||
float temp = 1.0f; // 1.0 = disabled
|
float temp = 0.80f; // 1.0 = disabled
|
||||||
float repeat_penalty = 1.0f; // 1.0 = disabled
|
float repeat_penalty = 1.10f; // 1.0 = disabled
|
||||||
int32_t repeat_last_n = -1; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
int32_t repeat_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||||
float frequency_penalty = 0.0f; // 0.0 = disabled
|
float frequency_penalty = 0.00f; // 0.0 = disabled
|
||||||
float presence_penalty = 0.0f; // 0.0 = disabled
|
float presence_penalty = 0.00f; // 0.0 = disabled
|
||||||
int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
int mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||||
float mirostat_tau = 5.0f; // target entropy
|
float mirostat_tau = 5.00f; // target entropy
|
||||||
float mirostat_eta = 0.1f; // learning rate
|
float mirostat_eta = 0.10f; // learning rate
|
||||||
|
|
||||||
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
std::string model = "models/lamma-7B/ggml-model.bin"; // model path
|
||||||
std::string prompt = "";
|
std::string prompt = "";
|
||||||
|
@ -387,19 +387,19 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
|
if ((int) embd_inp.size() <= n_consumed && !is_interacting) {
|
||||||
// out of user input, sample next token
|
// out of user input, sample next token
|
||||||
const float temp = params.temp;
|
const float temp = params.temp;
|
||||||
const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k;
|
const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(ctx) : params.top_k;
|
||||||
const float top_p = params.top_p;
|
const float top_p = params.top_p;
|
||||||
const float tfs_z = params.tfs_z;
|
const float tfs_z = params.tfs_z;
|
||||||
const float typical_p = params.typical_p;
|
const float typical_p = params.typical_p;
|
||||||
const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n;
|
const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n;
|
||||||
const float repeat_penalty = params.repeat_penalty;
|
const float repeat_penalty = params.repeat_penalty;
|
||||||
const float alpha_presence = params.presence_penalty;
|
const float alpha_presence = params.presence_penalty;
|
||||||
const float alpha_frequency = params.frequency_penalty;
|
const float alpha_frequency = params.frequency_penalty;
|
||||||
const int mirostat = params.mirostat;
|
const int mirostat = params.mirostat;
|
||||||
const float mirostat_tau = params.mirostat_tau;
|
const float mirostat_tau = params.mirostat_tau;
|
||||||
const float mirostat_eta = params.mirostat_eta;
|
const float mirostat_eta = params.mirostat_eta;
|
||||||
const bool penalize_nl = params.penalize_nl;
|
const bool penalize_nl = params.penalize_nl;
|
||||||
|
|
||||||
// optionally save the session on first sample (for faster prompt loading next time)
|
// optionally save the session on first sample (for faster prompt loading next time)
|
||||||
if (!path_session.empty() && need_to_save_session) {
|
if (!path_session.empty() && need_to_save_session) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user