mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-28 04:47:04 +01:00
Ensure --mlock works properly with mmap() support
This commit is contained in:
parent
78ca9838ee
commit
6f23ba5ee2
39
ggml.c
39
ggml.c
@ -2884,36 +2884,47 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define MLOCK_SUGGESTION \
|
||||||
|
"Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \
|
||||||
|
"decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n"
|
||||||
|
#else
|
||||||
|
#define MLOCK_SUGGESTION \
|
||||||
|
"Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n"
|
||||||
|
#endif
|
||||||
|
|
||||||
bool ggml_mlock_supported(void) {
|
bool ggml_mlock_supported(void) {
|
||||||
return GGML_MLOCK_SUPPORT;
|
return GGML_MLOCK_SUPPORT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ggml_mlock(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
const void *opt_extra_addr,
|
||||||
|
size_t opt_extra_len,
|
||||||
|
char **err_p) {
|
||||||
|
// TODO: Use SetProcessWorkingSetSize() + VirtualLock() on WIN32
|
||||||
#if GGML_MLOCK_SUPPORT
|
#if GGML_MLOCK_SUPPORT
|
||||||
#ifdef __APPLE__
|
|
||||||
#define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \
|
|
||||||
"decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l)."
|
|
||||||
#else
|
|
||||||
#define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)."
|
|
||||||
#endif
|
|
||||||
bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
|
|
||||||
if (ctx->mem_buffer_mlocked) {
|
if (ctx->mem_buffer_mlocked) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (mlock(ctx->mem_buffer, ctx->mem_size)) {
|
if (mlock(ctx->mem_buffer, ctx->mem_size) ||
|
||||||
int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
|
(opt_extra_len &&
|
||||||
ctx->mem_size, strerror(errno));
|
mlock(opt_extra_addr, opt_extra_len))) {
|
||||||
GGML_ASSERT(ret >= 0);
|
if ((*err_p = malloc(1024))) {
|
||||||
|
snprintf(*err_p, 1024,
|
||||||
|
"failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION,
|
||||||
|
ctx->mem_size + opt_extra_len,
|
||||||
|
strerror(errno));
|
||||||
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ctx->mem_buffer_mlocked = true;
|
ctx->mem_buffer_mlocked = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
#else // GGML_MLOCK_SUPPORT
|
#else // GGML_MLOCK_SUPPORT
|
||||||
bool ggml_mlock(struct ggml_context * ctx, char ** err_p) {
|
|
||||||
*err_p = strdup("can't mlock because it's not supported on this system");
|
*err_p = strdup("can't mlock because it's not supported on this system");
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
#endif // GGML_MLOCK_SUPPORT
|
#endif // GGML_MLOCK_SUPPORT
|
||||||
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
6
ggml.h
6
ggml.h
@ -345,7 +345,11 @@ size_t ggml_used_mem(const struct ggml_context * ctx);
|
|||||||
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||||
|
|
||||||
bool ggml_mlock_supported(void);
|
bool ggml_mlock_supported(void);
|
||||||
bool ggml_mlock(struct ggml_context * ctx, char ** err_p);
|
bool ggml_mlock(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
const void *opt_extra_addr,
|
||||||
|
size_t opt_extra_len,
|
||||||
|
char **err_p);
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_tensor(
|
struct ggml_tensor * ggml_new_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
|
@ -1595,7 +1595,10 @@ struct llama_context * llama_init_from_file(
|
|||||||
|
|
||||||
if (params.use_mlock) {
|
if (params.use_mlock) {
|
||||||
char *err;
|
char *err;
|
||||||
if (!ggml_mlock(ctx->model.ctx, &err)) {
|
if (!ggml_mlock(ctx->model.ctx,
|
||||||
|
ctx->model.mm_addr,
|
||||||
|
ctx->model.mm_length,
|
||||||
|
&err)) {
|
||||||
fprintf(stderr, "%s\n", err);
|
fprintf(stderr, "%s\n", err);
|
||||||
free(err);
|
free(err);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
|
Loading…
Reference in New Issue
Block a user