diff --git a/ggml.c b/ggml.c index 4ea715957..25fa72632 100644 --- a/ggml.c +++ b/ggml.c @@ -2884,36 +2884,47 @@ size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) return result; } +#ifdef __APPLE__ +#define MLOCK_SUGGESTION \ + "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or " \ + "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l).\n" +#else +#define MLOCK_SUGGESTION \ + "Try increasing RLIMIT_MLOCK ('ulimit -l' as root).\n" +#endif + bool ggml_mlock_supported(void) { return GGML_MLOCK_SUPPORT; } +bool ggml_mlock( + struct ggml_context * ctx, + const void *opt_extra_addr, + size_t opt_extra_len, + char **err_p) { + // TODO: Use SetProcessWorkingSetSize() + VirtualLock() on WIN32 #if GGML_MLOCK_SUPPORT -#ifdef __APPLE__ - #define MLOCK_SUGGESTION "Try increasing the sysctl values 'vm.user_wire_limit' and 'vm.global_user_wire_limit' and/or\n" \ - "decreasing 'vm.global_no_user_wire_amount'. Also try increasing RLIMIT_MLOCK (ulimit -l)." -#else - #define MLOCK_SUGGESTION "Try increasing RLIMIT_MLOCK (ulimit -l)." -#endif -bool ggml_mlock(struct ggml_context * ctx, char ** err_p) { if (ctx->mem_buffer_mlocked) { return true; } - if (mlock(ctx->mem_buffer, ctx->mem_size)) { - int ret = asprintf(err_p, "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION, - ctx->mem_size, strerror(errno)); - GGML_ASSERT(ret >= 0); + if (mlock(ctx->mem_buffer, ctx->mem_size) || + (opt_extra_len && + mlock(opt_extra_addr, opt_extra_len))) { + if ((*err_p = malloc(1024))) { + snprintf(*err_p, 1024, + "failed to mlock %zu-byte buffer: %s\n" MLOCK_SUGGESTION, + ctx->mem_size + opt_extra_len, + strerror(errno)); + } return false; } ctx->mem_buffer_mlocked = true; return true; -} #else // GGML_MLOCK_SUPPORT -bool ggml_mlock(struct ggml_context * ctx, char ** err_p) { *err_p = strdup("can't mlock because it's not supported on this system"); return false; -} #endif // GGML_MLOCK_SUPPORT +} //////////////////////////////////////////////////////////////////////////////// diff --git a/ggml.h b/ggml.h index 058dfe230..f7791ed11 100644 --- a/ggml.h +++ b/ggml.h @@ -345,7 +345,11 @@ size_t ggml_used_mem(const struct ggml_context * ctx); size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch); bool ggml_mlock_supported(void); -bool ggml_mlock(struct ggml_context * ctx, char ** err_p); +bool ggml_mlock( + struct ggml_context * ctx, + const void *opt_extra_addr, + size_t opt_extra_len, + char **err_p); struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, diff --git a/llama.cpp b/llama.cpp index b00e06523..28e885cef 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1595,7 +1595,10 @@ struct llama_context * llama_init_from_file( if (params.use_mlock) { char *err; - if (!ggml_mlock(ctx->model.ctx, &err)) { + if (!ggml_mlock(ctx->model.ctx, + ctx->model.mm_addr, + ctx->model.mm_length, + &err)) { fprintf(stderr, "%s\n", err); free(err); llama_free(ctx);