From e4881686b4160c74087ecc9d96df4ed0db6d70ef Mon Sep 17 00:00:00 2001 From: oKatanaaa Date: Tue, 21 Mar 2023 01:46:44 +0400 Subject: [PATCH] Make WIN32 mmap() improvements (#341) Still not fully working yet. Closes #341 --- .gitignore | 409 +++++++++++++++++++++++++++++++++++++++++++++++++++++ ggml.c | 6 +- ggml.h | 3 + main.cpp | 99 ++++++++++--- mmap.h | 70 ++++++++- 5 files changed, 565 insertions(+), 22 deletions(-) diff --git a/.gitignore b/.gitignore index e388b884e..ec7c50540 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,412 @@ models/* arm_neon.h compile_commands.json +CMakeFiles/ +CMakeCache.txt + +# Visual Studio stuff +*.exe +*.sln +*.vcxproj +*.vcxproj.filters +cmake_install.cmake +*.dir/ + +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files +*.ncb +*.aps + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml diff --git a/ggml.c b/ggml.c index 67fd54937..59a88554a 100644 --- a/ggml.c +++ b/ggml.c @@ -2,7 +2,7 @@ #include "ggml.h" #if defined(_MSC_VER) || defined(__MINGW32__) -#include // using malloc.h with MSC/MINGW +//#include // using malloc.h with MSC/MINGW #elif !defined(__FreeBSD__) && !defined(__NetBSD__) #include #endif @@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { *ctx = (struct ggml_context) { /*.mem_size =*/ params.mem_size, - /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size), + /*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size), /*.mem_buffer_owned =*/ params.mem_buffer ? false : true, /*.n_objects =*/ 0, /*.objects_begin =*/ NULL, @@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) { __func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size); if (ctx->mem_buffer_owned) { - free(ctx->mem_buffer); + _free(ctx->mem_buffer); } found = true; diff --git a/ggml.h b/ggml.h index 7ce655c1b..38b991f8a 100644 --- a/ggml.h +++ b/ggml.h @@ -183,6 +183,9 @@ extern "C" { #define GGML_MAX_CONTEXTS 64 #define GGML_MAX_OPT 4 +void* _malloc(size_t n); +void _free(void* p); + #ifdef __ARM_NEON // we use the built-in 16-bit float type typedef __fp16 ggml_fp16_t; diff --git a/main.cpp b/main.cpp index 4b2afa54d..9e8087a15 100644 --- a/main.cpp +++ b/main.cpp @@ -1,3 +1,7 @@ +#if defined(_MSC_VER) || defined(__MINGW32__) +#define NOMINMAX +#endif + #include "ggml.h" #include "utils.h" @@ -19,6 +23,10 @@ #include #include #include +#else +#include +#define msync(addr, len_bytes, flag) winMSync +#define MS_ASYNC 0 #endif #define ROUNDUP(X, K) (((X) + (K)-1) & -(K)) @@ -96,6 +104,7 @@ struct llama_model { std::map tensors; }; + struct magic { uint32_t magic; std::atomic lock; @@ -103,10 +112,37 @@ struct magic { size_t commit; size_t offset; size_t capacity; - gpt_vocab *vocab; - llama_model *model; + gpt_vocab* vocab; + llama_model* model; }; +static void winMSync(magic* addr, size_t len_bytes) { + bool success = FlushViewOfFile((void*)addr, len_bytes); + if (!success) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("failed with error %d: %s"), + error_code, lpMsgBuf); + } + HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd); + FlushFileBuffers(hFile); +} + + static struct magic *mag; static inline void spin_lock(std::atomic &lock) { @@ -129,17 +165,26 @@ static void magic_commit(void) { mag->offset = mag->capacity; mag->commit = mag->capacity; mag->magic = 0xFEEDABEE; - msync(mag, mag->commit, MS_ASYNC); + bool success = msync(mag, mag->commit, MS_ASYNC); } static void magic_init(void) { int fd; size_t n; +#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct stat st; +#else + struct _stat64 st; +#endif if (mag) return; n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN); if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) { - fstat(fd, &st); + int result = fstat(fd, &st); + int error = errno; + if (errno == EBADF) + fprintf(stderr, "Bad file descriptor.\n"); + else if (errno == EINVAL) + fprintf(stderr, "Invalid argument to _fstat.\n"); if (st.st_size >= n) { mag = (struct magic *)Mmap(MAGIC_ADDR, n, PROT_READ | PROT_WRITE, @@ -182,9 +227,9 @@ void *memalign(size_t a, size_t n) { i = i + sizeof(size_t); i = ROUNDUP(i, a); j = ROUNDUP(i + m, MAGIC_GRAN); - if (j > mag->capacity) { + //if (j > mag->capacity) { if (!mag->magic) { - ftruncate(mag->fd, j); + int result = ftruncate(mag->fd, j); p = mmap(MAGIC_ADDR + mag->capacity, j - mag->capacity, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity); @@ -199,7 +244,7 @@ void *memalign(size_t a, size_t n) { spin_unlock(mag->lock); return 0; } - } + //} mag->offset = i + m; spin_unlock(mag->lock); p = MAGIC_ADDR + i; @@ -207,7 +252,7 @@ void *memalign(size_t a, size_t n) { return p; } -void *malloc(size_t n) { +void *_malloc(size_t n) { return memalign(MAGIC_ALGN, n); } @@ -215,33 +260,53 @@ size_t malloc_usable_size(const void *p) { return ((const size_t *)p)[-1]; } -void *calloc(size_t n, size_t z) { +void *_calloc(size_t n, size_t z) { void *p; - if ((p = malloc((n *= z)))) { + if ((p = _malloc((n *= z)))) { memset(p, 0, n); } return p; } -void free(void *p) { +void _free(void *p) { // do nothing } -void *realloc(void *p, size_t n) { +void *_realloc(void *p, size_t n) { void *q; if (!p) { - return malloc(n); + return _malloc(n); } if (!n) { - free(p); + _free(p); return 0; } - if ((q = malloc(n))) { + if ((q = _malloc(n))) { memcpy(q, p, ((const size_t *)p)[-1]); } return q; } +#if defined(malloc) +# undef malloc +#endif +#define malloc(x) _malloc(x) + +#if defined(calloc) +# undef calloc +#endif +#define calloc(x) _calloc(x) + +#if defined(realloc) +# undef realloc +#endif +#define realloc(x) _realloc(x) + +#if defined(free) +# undef free +#endif +#define free(x) _free(x) + // load the model's weights from a file bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) { fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str()); @@ -707,7 +772,7 @@ bool llama_eval( const int d_key = n_embd/n_head; static size_t buf_size = 512u*1024*1024; - static void * buf = malloc(buf_size); + static void * buf = _malloc(buf_size); if (mem_per_token > 0 && mem_per_token*N > buf_size) { const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead @@ -715,7 +780,7 @@ bool llama_eval( // reallocate buf_size = buf_size_new; - buf = realloc(buf, buf_size); + buf = _realloc(buf, buf_size); if (buf == nullptr) { fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size); return false; diff --git a/mmap.h b/mmap.h index 3559f51a3..851553048 100644 --- a/mmap.h +++ b/mmap.h @@ -20,6 +20,7 @@ #if defined(_MSC_VER) || defined(__MINGW32__) #ifndef __MINGW32__ #include +#include #else #include #endif @@ -101,7 +102,7 @@ #define close _close #endif #ifndef fstat -#define fstat _fstat +#define fstat _fstati64 #endif #ifndef madvise #define madvise WinMadvise @@ -113,6 +114,7 @@ static std::atomic g_winlock; static std::map g_winmap; + static void WinLock(void) { while (!g_winlock.exchange(1, std::memory_order_acquire)); } @@ -121,7 +123,30 @@ static void WunLock(void) { g_winlock.store(0, std::memory_order_release); } -static int WinMadvise(int fd, size_t length, int flags) { +static int WinMadvise(char* fd, size_t length, int flags) { + auto p_handle = GetCurrentProcess(); + struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length); + bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0); + if (!success) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("%s failed with error %d: %s"), + error_code, lpMsgBuf); + } return 0; } @@ -215,11 +240,52 @@ static void *WinMap(void *addr, size_t length, int prot, (offset + length) >> 32, (offset + length), 0); if (!hand) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("%s failed with error %d: %s"), + error_code, lpMsgBuf); return MAP_FAILED; } + if (winprot == PAGE_WRITECOPY) { + access = FILE_MAP_COPY; + } + res = MapViewOfFileEx(hand, access, offset >> 32, offset, length, addr); if (!res) { + LPVOID lpMsgBuf; + LPVOID lpDisplayBuf; + DWORD error_code = GetLastError(); + FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error_code, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), + (LPTSTR)&lpMsgBuf, + 0, NULL); + lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT, + (lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR)); + StringCchPrintf((LPTSTR)lpDisplayBuf, + LocalSize(lpDisplayBuf) / sizeof(TCHAR), + TEXT("failed with error %d: %s"), + error_code, lpMsgBuf); + fprintf(stderr, (char*)lpDisplayBuf); CloseHandle(hand); return MAP_FAILED; }