mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
Unmap the file in llama_free
This commit is contained in:
parent
d68c5dc435
commit
276e5b7811
41
llama.cpp
41
llama.cpp
@ -149,6 +149,10 @@ struct llama_model {
|
|||||||
// the model memory buffer
|
// the model memory buffer
|
||||||
std::vector<uint8_t> buf;
|
std::vector<uint8_t> buf;
|
||||||
|
|
||||||
|
// model memory mapped file
|
||||||
|
void * mm_addr;
|
||||||
|
size_t mm_length;
|
||||||
|
|
||||||
// tensors
|
// tensors
|
||||||
int n_loaded;
|
int n_loaded;
|
||||||
std::unordered_map<std::string, struct ggml_tensor *> tensors;
|
std::unordered_map<std::string, struct ggml_tensor *> tensors;
|
||||||
@ -296,22 +300,32 @@ struct llama_context_params llama_context_default_params() {
|
|||||||
// model loading
|
// model loading
|
||||||
//
|
//
|
||||||
|
|
||||||
static void * mmap_file(const char* fname) {
|
static void mmap_file(const char* fname, void * &mm_addr, size_t &mm_length) {
|
||||||
#if defined(MAP_FAILED)
|
#if defined(MAP_FAILED)
|
||||||
// POSIX mmap
|
// POSIX
|
||||||
int fd = open(fname, O_RDONLY);
|
int fd = open(fname, O_RDONLY);
|
||||||
size_t len = lseek(fd, 0, SEEK_END);
|
mm_length = lseek(fd, 0, SEEK_END);
|
||||||
void * mm_addr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
|
mm_addr = mmap(NULL, mm_length, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
close(fd);
|
||||||
if (mm_addr == MAP_FAILED) {
|
if (mm_addr == MAP_FAILED) {
|
||||||
perror("mmap failed");
|
perror("mmap failed");
|
||||||
mm_addr = NULL;
|
mm_addr = NULL;
|
||||||
|
mm_length = 0;
|
||||||
}
|
}
|
||||||
close(fd);
|
|
||||||
return mm_addr;
|
|
||||||
#else
|
#else
|
||||||
// TODO: windows support
|
// TODO: windows support
|
||||||
(void)(fname); // suppress warnings
|
(void)(fname); // suppress warnings
|
||||||
return NULL;
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
static void munmap_file(void * addr, size_t length) {
|
||||||
|
#if defined(MAP_FAILED)
|
||||||
|
// POSIX
|
||||||
|
munmap(addr, length);
|
||||||
|
#else
|
||||||
|
// TODO: windows support
|
||||||
|
(void)(addr); // suppress warnings
|
||||||
|
(void)(length);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -480,12 +494,15 @@ static bool llama_model_load(
|
|||||||
bool use_mmap = (n_parts == 1);
|
bool use_mmap = (n_parts == 1);
|
||||||
|
|
||||||
// try to memory map the model file
|
// try to memory map the model file
|
||||||
void* mm_addr = NULL;
|
void * mm_addr = NULL;
|
||||||
if (use_mmap) {
|
if (use_mmap) {
|
||||||
mm_addr = mmap_file(fname.c_str());
|
mmap_file(fname.c_str(), model.mm_addr, model.mm_length);
|
||||||
if (mm_addr == NULL) {
|
if (model.mm_addr == NULL) {
|
||||||
use_mmap = false;
|
use_mmap = false;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
mm_addr = model.mm_addr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto & ctx = model.ctx;
|
auto & ctx = model.ctx;
|
||||||
@ -1750,6 +1767,10 @@ void llama_free(struct llama_context * ctx) {
|
|||||||
ggml_free(ctx->model.ctx);
|
ggml_free(ctx->model.ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctx->model.mm_addr) {
|
||||||
|
munmap_file(ctx->model.mm_addr, ctx->model.mm_length);
|
||||||
|
}
|
||||||
|
|
||||||
delete ctx;
|
delete ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user