mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
Remove unused n_parts parameter (#1509)
This commit is contained in:
parent
c238b5873a
commit
dc271c52ed
@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if (arg == "--n-parts") {
|
|
||||||
if (++i >= argc) {
|
|
||||||
invalid_param = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
params.n_parts = std::stoi(argv[i]);
|
|
||||||
} else if (arg == "-h" || arg == "--help") {
|
} else if (arg == "-h" || arg == "--help") {
|
||||||
gpt_print_usage(argc, argv, default_params);
|
gpt_print_usage(argc, argv, default_params);
|
||||||
exit(0);
|
exit(0);
|
||||||
@ -418,7 +412,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||||||
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
|
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
|
||||||
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
|
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
|
||||||
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
|
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
|
||||||
fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n");
|
|
||||||
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||||
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
|
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
|
||||||
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
||||||
@ -473,7 +466,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
|
|||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
lparams.n_ctx = params.n_ctx;
|
lparams.n_ctx = params.n_ctx;
|
||||||
lparams.n_parts = params.n_parts;
|
|
||||||
lparams.n_gpu_layers = params.n_gpu_layers;
|
lparams.n_gpu_layers = params.n_gpu_layers;
|
||||||
lparams.seed = params.seed;
|
lparams.seed = params.seed;
|
||||||
lparams.f16_kv = params.memory_f16;
|
lparams.f16_kv = params.memory_f16;
|
||||||
|
@ -24,7 +24,6 @@ struct gpt_params {
|
|||||||
int32_t seed = -1; // RNG seed
|
int32_t seed = -1; // RNG seed
|
||||||
int32_t n_threads = get_num_physical_cores();
|
int32_t n_threads = get_num_physical_cores();
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
|
|
||||||
int32_t n_ctx = 512; // context size
|
int32_t n_ctx = 512; // context size
|
||||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
@ -321,7 +321,6 @@ int main(int argc, char ** argv) {
|
|||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
lparams.n_ctx = 256;
|
lparams.n_ctx = 256;
|
||||||
lparams.n_parts = 1;
|
|
||||||
lparams.seed = 1;
|
lparams.seed = 1;
|
||||||
lparams.f16_kv = false;
|
lparams.f16_kv = false;
|
||||||
lparams.use_mlock = false;
|
lparams.use_mlock = false;
|
||||||
|
@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
|
|||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
lparams.n_ctx = params.n_ctx;
|
lparams.n_ctx = params.n_ctx;
|
||||||
lparams.n_parts = params.n_parts;
|
|
||||||
lparams.seed = params.seed;
|
lparams.seed = params.seed;
|
||||||
lparams.f16_kv = params.memory_f16;
|
lparams.f16_kv = params.memory_f16;
|
||||||
lparams.use_mmap = params.use_mmap;
|
lparams.use_mmap = params.use_mmap;
|
||||||
|
@ -812,7 +812,6 @@ static bool kv_cache_init(
|
|||||||
struct llama_context_params llama_context_default_params() {
|
struct llama_context_params llama_context_default_params() {
|
||||||
struct llama_context_params result = {
|
struct llama_context_params result = {
|
||||||
/*.n_ctx =*/ 512,
|
/*.n_ctx =*/ 512,
|
||||||
/*.n_parts =*/ -1,
|
|
||||||
/*.gpu_layers =*/ 0,
|
/*.gpu_layers =*/ 0,
|
||||||
/*.seed =*/ -1,
|
/*.seed =*/ -1,
|
||||||
/*.f16_kv =*/ false,
|
/*.f16_kv =*/ false,
|
||||||
|
1
llama.h
1
llama.h
@ -55,7 +55,6 @@ extern "C" {
|
|||||||
|
|
||||||
struct llama_context_params {
|
struct llama_context_params {
|
||||||
int n_ctx; // text context
|
int n_ctx; // text context
|
||||||
int n_parts; // -1 for default
|
|
||||||
int n_gpu_layers; // number of layers to store in VRAM
|
int n_gpu_layers; // number of layers to store in VRAM
|
||||||
int seed; // RNG seed, -1 for random
|
int seed; // RNG seed, -1 for random
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user