mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 12:21:40 +01:00
Reduce model loading time (#43)
* Use buffering * Use vector * Minor --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
2a20f48efa
commit
63fd76fbb0
4
main.cpp
4
main.cpp
@ -87,7 +87,10 @@ struct llama_model {
|
||||
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
|
||||
printf("%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
|
||||
|
||||
std::vector<char> f_buf(1024*1024);
|
||||
|
||||
auto fin = std::ifstream(fname, std::ios::binary);
|
||||
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
|
||||
if (!fin) {
|
||||
fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
|
||||
return false;
|
||||
@ -325,6 +328,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
||||
printf("%s: loading model part %d/%d from '%s'\n", __func__, i+1, n_parts, fname_part.c_str());
|
||||
|
||||
fin = std::ifstream(fname_part, std::ios::binary);
|
||||
fin.rdbuf()->pubsetbuf(f_buf.data(), f_buf.size());
|
||||
fin.seekg(file_offset);
|
||||
|
||||
// load weights
|
||||
|
Loading…
Reference in New Issue
Block a user