mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 06:10:29 +01:00
examples: cache hf model when --model not provided (#7353)
* examples: cache hf model when --model not provided * examples: cache hf model when --model not provided * examples: cache hf model when --model not provided * examples: cache hf model when --model not provided * examples: cache hf model when --model not provided
This commit is contained in:
parent
d8ee902227
commit
11474e756d
@ -1354,7 +1354,12 @@ void gpt_params_handle_model_default(gpt_params & params) {
|
|||||||
}
|
}
|
||||||
params.hf_file = params.model;
|
params.hf_file = params.model;
|
||||||
} else if (params.model.empty()) {
|
} else if (params.model.empty()) {
|
||||||
params.model = "models/" + string_split(params.hf_file, '/').back();
|
std::string cache_directory = get_cache_directory();
|
||||||
|
const bool success = create_directory_with_parents(cache_directory);
|
||||||
|
if (!success) {
|
||||||
|
throw std::runtime_error("failed to create cache directory: " + cache_directory);
|
||||||
|
}
|
||||||
|
params.model = cache_directory + string_split(params.hf_file, '/').back();
|
||||||
}
|
}
|
||||||
} else if (!params.model_url.empty()) {
|
} else if (!params.model_url.empty()) {
|
||||||
if (params.model.empty()) {
|
if (params.model.empty()) {
|
||||||
@ -2516,6 +2521,31 @@ bool create_directory_with_parents(const std::string & path) {
|
|||||||
#endif // _WIN32
|
#endif // _WIN32
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string get_cache_directory() {
|
||||||
|
std::string cache_directory = "";
|
||||||
|
if (getenv("LLAMA_CACHE")) {
|
||||||
|
cache_directory = std::getenv("LLAMA_CACHE");
|
||||||
|
if (cache_directory.back() != DIRECTORY_SEPARATOR) {
|
||||||
|
cache_directory += DIRECTORY_SEPARATOR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#ifdef __linux__
|
||||||
|
if (std::getenv("XDG_CACHE_HOME")) {
|
||||||
|
cache_directory = std::getenv("XDG_CACHE_HOME");
|
||||||
|
} else {
|
||||||
|
cache_directory = std::getenv("HOME") + std::string("/.cache/");
|
||||||
|
}
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
cache_directory = std::getenv("HOME") + std::string("/Library/Caches/");
|
||||||
|
#elif defined(_WIN32)
|
||||||
|
cache_directory = std::getenv("APPDATA");
|
||||||
|
#endif // __linux__
|
||||||
|
cache_directory += "llama.cpp";
|
||||||
|
cache_directory += DIRECTORY_SEPARATOR;
|
||||||
|
}
|
||||||
|
return cache_directory;
|
||||||
|
}
|
||||||
|
|
||||||
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data) {
|
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data) {
|
||||||
if (data.empty()) {
|
if (data.empty()) {
|
||||||
fprintf(stream, "%s:\n", prop_name);
|
fprintf(stream, "%s:\n", prop_name);
|
||||||
|
@ -281,6 +281,7 @@ bool llama_should_add_bos_token(const llama_model * model);
|
|||||||
//
|
//
|
||||||
|
|
||||||
bool create_directory_with_parents(const std::string & path);
|
bool create_directory_with_parents(const std::string & path);
|
||||||
|
std::string get_cache_directory();
|
||||||
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
|
void dump_vector_float_yaml(FILE * stream, const char * prop_name, const std::vector<float> & data);
|
||||||
void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
|
void dump_vector_int_yaml(FILE * stream, const char * prop_name, const std::vector<int> & data);
|
||||||
void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);
|
void dump_string_yaml_multiline(FILE * stream, const char * prop_name, const char * data);
|
||||||
|
@ -325,3 +325,5 @@ These options provide extra functionality and customization when running the LLa
|
|||||||
- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance.
|
- `-ts SPLIT, --tensor-split SPLIT`: When using multiple GPUs this option controls how large tensors should be split across all GPUs. `SPLIT` is a comma-separated list of non-negative values that assigns the proportion of data that each GPU should get in order. For example, "3,2" will assign 60% of the data to GPU 0 and 40% to GPU 1. By default the data is split in proportion to VRAM but this may not be optimal for performance.
|
||||||
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
|
- `--lora FNAME`: Apply a LoRA (Low-Rank Adaptation) adapter to the model (implies --no-mmap). This allows you to adapt the pretrained model to specific tasks or domains.
|
||||||
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
|
- `--lora-base FNAME`: Optional model to use as a base for the layers modified by the LoRA adapter. This flag is used in conjunction with the `--lora` flag, and specifies the base model for the adaptation.
|
||||||
|
|
||||||
|
- `-hfr URL --hf-repo URL`: The url to the Hugging Face model repository. Used in conjunction with `--hf-file` or `-hff`. The model is downloaded and stored in the file provided by `-m` or `--model`. If `-m` is not provided, the model is auto-stored in the path specified by the `LLAMA_CACHE` environment variable or in an OS-specific local cache.
|
||||||
|
Loading…
Reference in New Issue
Block a user