mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 13:27:21 +01:00
Get mmap() working with WIN32 MSVC
- We have pretty high quality POSIX polyfills now - We no longer need to override malloc() Tracked by issue #91 Improves upon #341
This commit is contained in:
parent
e4881686b4
commit
cbddf4661b
407
.gitignore
vendored
407
.gitignore
vendored
@ -24,410 +24,3 @@ arm_neon.h
|
||||
compile_commands.json
|
||||
CMakeFiles/
|
||||
CMakeCache.txt
|
||||
|
||||
# Visual Studio stuff
|
||||
*.exe
|
||||
*.sln
|
||||
*.vcxproj
|
||||
*.vcxproj.filters
|
||||
cmake_install.cmake
|
||||
*.dir/
|
||||
|
||||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
*.rsuser
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Mono auto generated files
|
||||
mono_crash.*
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
[Ww][Ii][Nn]32/
|
||||
[Aa][Rr][Mm]/
|
||||
[Aa][Rr][Mm]64/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
[Ll]ogs/
|
||||
|
||||
# Visual Studio 2015/2017 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# Visual Studio 2017 auto generated files
|
||||
Generated\ Files/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUnit
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
nunit-*.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# Benchmark Results
|
||||
BenchmarkDotNet.Artifacts/
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
|
||||
# ASP.NET Scaffolding
|
||||
ScaffoldingReadMe.txt
|
||||
|
||||
# StyleCop
|
||||
StyleCopReport.xml
|
||||
|
||||
# Files built by Visual Studio
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_h.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.iobj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.ipdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*_wpftmp.csproj
|
||||
*.log
|
||||
*.tlog
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# Visual Studio Trace Files
|
||||
*.e2e
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# AxoCover is a Code Coverage Tool
|
||||
.axoCover/*
|
||||
!.axoCover/settings.json
|
||||
|
||||
# Coverlet is a free, cross platform Code Coverage Tool
|
||||
coverage*.json
|
||||
coverage*.xml
|
||||
coverage*.info
|
||||
|
||||
# Visual Studio code coverage results
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# Note: Comment the next line if you want to checkin your web deploy settings,
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# NuGet Symbol Packages
|
||||
*.snupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/[Pp]ackages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/[Pp]ackages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/[Pp]ackages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
*.appx
|
||||
*.appxbundle
|
||||
*.appxupload
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!?*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
orleans.codegen.cs
|
||||
|
||||
# Including strong name files can present a security risk
|
||||
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
|
||||
#*.snk
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
ServiceFabricBackup/
|
||||
*.rptproj.bak
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
*.ndf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
*.rptproj.rsuser
|
||||
*- [Bb]ackup.rdl
|
||||
*- [Bb]ackup ([0-9]).rdl
|
||||
*- [Bb]ackup ([0-9][0-9]).rdl
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
node_modules/
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
*.vbw
|
||||
|
||||
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
|
||||
*.vbp
|
||||
|
||||
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
|
||||
*.dsw
|
||||
*.dsp
|
||||
|
||||
# Visual Studio 6 technical files
|
||||
*.ncb
|
||||
*.aps
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# CodeRush personal settings
|
||||
.cr/personal
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Tabs Studio
|
||||
*.tss
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
*.jmconfig
|
||||
|
||||
# BizTalk build output
|
||||
*.btp.cs
|
||||
*.btm.cs
|
||||
*.odx.cs
|
||||
*.xsd.cs
|
||||
|
||||
# OpenCover UI analysis results
|
||||
OpenCover/
|
||||
|
||||
# Azure Stream Analytics local run output
|
||||
ASALocalRun/
|
||||
|
||||
# MSBuild Binary and Structured Log
|
||||
*.binlog
|
||||
|
||||
# NVidia Nsight GPU debugger configuration file
|
||||
*.nvuser
|
||||
|
||||
# MFractors (Xamarin productivity tool) working folder
|
||||
.mfractor/
|
||||
|
||||
# Local History for Visual Studio
|
||||
.localhistory/
|
||||
|
||||
# Visual Studio History (VSHistory) files
|
||||
.vshistory/
|
||||
|
||||
# BeatPulse healthcheck temp database
|
||||
healthchecksdb
|
||||
|
||||
# Backup folder for Package Reference Convert tool in Visual Studio 2017
|
||||
MigrationBackup/
|
||||
|
||||
# Ionide (cross platform F# VS Code tools) working folder
|
||||
.ionide/
|
||||
|
||||
# Fody - auto-generated XML schema
|
||||
FodyWeavers.xsd
|
||||
|
||||
# VS Code files for those working on multiple tools
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
*.code-workspace
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Windows Installer files from build outputs
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# JetBrains Rider
|
||||
*.sln.iml
|
||||
|
@ -107,7 +107,9 @@ endif()
|
||||
add_executable(llama
|
||||
main.cpp
|
||||
utils.cpp
|
||||
utils.h)
|
||||
utils.h
|
||||
mmap.c
|
||||
mmap.h)
|
||||
|
||||
add_executable(quantize
|
||||
quantize.cpp
|
||||
|
11
Makefile
11
Makefile
@ -30,8 +30,8 @@ endif
|
||||
# Compile flags
|
||||
#
|
||||
|
||||
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC -g -fno-omit-frame-pointer
|
||||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC -g -fno-omit-frame-pointer
|
||||
CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC
|
||||
CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
|
||||
LDFLAGS =
|
||||
|
||||
# OS specific
|
||||
@ -185,14 +185,17 @@ default: main quantize
|
||||
ggml.o: ggml.c ggml.h
|
||||
$(CC) $(CFLAGS) -c ggml.c -o ggml.o
|
||||
|
||||
mmap.o: mmap.c mmap.h
|
||||
$(CC) $(CFLAGS) -c mmap.c -o mmap.o
|
||||
|
||||
utils.o: utils.cpp utils.h
|
||||
$(CXX) $(CXXFLAGS) -c utils.cpp -o utils.o
|
||||
|
||||
clean:
|
||||
rm -f *.o main quantize
|
||||
|
||||
main: main.cpp ggml.o utils.o
|
||||
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o -o main $(LDFLAGS)
|
||||
main: main.cpp ggml.o utils.o mmap.o
|
||||
$(CXX) $(CXXFLAGS) main.cpp ggml.o utils.o mmap.o -o main $(LDFLAGS)
|
||||
./main -h
|
||||
|
||||
quantize: quantize.cpp ggml.o utils.o
|
||||
|
4
ggml.c
4
ggml.c
@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
||||
|
||||
*ctx = (struct ggml_context) {
|
||||
/*.mem_size =*/ params.mem_size,
|
||||
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size),
|
||||
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
|
||||
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
||||
/*.n_objects =*/ 0,
|
||||
/*.objects_begin =*/ NULL,
|
||||
@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) {
|
||||
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
|
||||
|
||||
if (ctx->mem_buffer_owned) {
|
||||
_free(ctx->mem_buffer);
|
||||
free(ctx->mem_buffer);
|
||||
}
|
||||
|
||||
found = true;
|
||||
|
3
ggml.h
3
ggml.h
@ -183,9 +183,6 @@ extern "C" {
|
||||
#define GGML_MAX_CONTEXTS 64
|
||||
#define GGML_MAX_OPT 4
|
||||
|
||||
void* _malloc(size_t n);
|
||||
void _free(void* p);
|
||||
|
||||
#ifdef __ARM_NEON
|
||||
// we use the built-in 16-bit float type
|
||||
typedef __fp16 ggml_fp16_t;
|
||||
|
182
main.cpp
182
main.cpp
@ -10,6 +10,7 @@
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <cerrno>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <map>
|
||||
@ -23,10 +24,6 @@
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
#else
|
||||
#include <errno.h>
|
||||
#define msync(addr, len_bytes, flag) winMSync
|
||||
#define MS_ASYNC 0
|
||||
#endif
|
||||
|
||||
#define ROUNDUP(X, K) (((X) + (K)-1) & -(K))
|
||||
@ -34,7 +31,7 @@
|
||||
|
||||
#define MAGIC_PATH "magic.dat"
|
||||
#define MAGIC_ADDR (char *)0x330000000000
|
||||
#define MAGIC_GRAN 2097152
|
||||
#define MAGIC_GRAN 65536
|
||||
#define MAGIC_ALGN (sizeof(size_t) * 2)
|
||||
|
||||
#define ANSI_COLOR_RED "\x1b[31m"
|
||||
@ -104,49 +101,21 @@ struct llama_model {
|
||||
std::map<std::string, struct ggml_tensor *> tensors;
|
||||
};
|
||||
|
||||
|
||||
struct magic {
|
||||
uint32_t magic;
|
||||
std::atomic<unsigned> lock;
|
||||
int fd;
|
||||
size_t commit;
|
||||
size_t offset;
|
||||
size_t capacity;
|
||||
uint64_t commit;
|
||||
uint64_t offset;
|
||||
uint64_t capacity;
|
||||
gpt_vocab *vocab;
|
||||
llama_model *model;
|
||||
};
|
||||
|
||||
static void winMSync(magic* addr, size_t len_bytes) {
|
||||
bool success = FlushViewOfFile((void*)addr, len_bytes);
|
||||
if (!success) {
|
||||
LPVOID lpMsgBuf;
|
||||
LPVOID lpDisplayBuf;
|
||||
DWORD error_code = GetLastError();
|
||||
FormatMessage(
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL,
|
||||
error_code,
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
(LPTSTR)&lpMsgBuf,
|
||||
0, NULL);
|
||||
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||
(lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR));
|
||||
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||
TEXT("failed with error %d: %s"),
|
||||
error_code, lpMsgBuf);
|
||||
}
|
||||
HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd);
|
||||
FlushFileBuffers(hFile);
|
||||
}
|
||||
|
||||
|
||||
static struct magic *mag;
|
||||
|
||||
static inline void spin_lock(std::atomic<unsigned> &lock) {
|
||||
while (!lock.exchange(1, std::memory_order_acquire));
|
||||
while (lock.exchange(1, std::memory_order_acquire));
|
||||
}
|
||||
|
||||
static inline void spin_unlock(std::atomic<unsigned> &lock) {
|
||||
@ -162,62 +131,64 @@ static void *Mmap(void *addr, size_t length, int prot, int flags, int fd, off_t
|
||||
}
|
||||
|
||||
static void magic_commit(void) {
|
||||
mag->offset = mag->capacity;
|
||||
mag->commit = mag->capacity;
|
||||
mag->commit = ROUNDUP(mag->offset, MAGIC_GRAN);
|
||||
mag->magic = 0xFEEDABEE;
|
||||
bool success = msync(mag, mag->commit, MS_ASYNC);
|
||||
if (msync(mag, mag->commit, MS_ASYNC) == -1) {
|
||||
perror("msync");
|
||||
exit(77);
|
||||
}
|
||||
}
|
||||
|
||||
static void magic_init(void) {
|
||||
int fd;
|
||||
size_t n;
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
struct stat st;
|
||||
#else
|
||||
struct _stat64 st;
|
||||
#endif
|
||||
int64_t size;
|
||||
if (mag) return;
|
||||
n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN);
|
||||
if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) {
|
||||
int result = fstat(fd, &st);
|
||||
int error = errno;
|
||||
if (errno == EBADF)
|
||||
fprintf(stderr, "Bad file descriptor.\n");
|
||||
else if (errno == EINVAL)
|
||||
fprintf(stderr, "Invalid argument to _fstat.\n");
|
||||
if (st.st_size >= n) {
|
||||
if ((size = lseek(fd, 0, SEEK_END)) == -1) {
|
||||
perror("lseek");
|
||||
exit(77);
|
||||
}
|
||||
if (size >= n) {
|
||||
mag = (struct magic *)Mmap(MAGIC_ADDR, n,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_FIXED, fd, 0);
|
||||
if (mag->magic == 0xFEEDABEE) {
|
||||
mag = (struct magic *)Mmap(MAGIC_ADDR, mag->capacity,
|
||||
mag = (struct magic *)Mmap(MAGIC_ADDR, mag->commit,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_FIXED, fd, 0);
|
||||
madvise(MAGIC_ADDR, mag->capacity, MADV_WILLNEED);
|
||||
ftruncate(fd, mag->commit);
|
||||
mag->offset = mag->commit;
|
||||
mag->capacity = mag->commit;
|
||||
mag->fd = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
ftruncate(fd, 0);
|
||||
if (ftruncate(fd, 0) == -1) {
|
||||
perror("ftruncate");
|
||||
exit(77);
|
||||
}
|
||||
} else if ((fd = open(MAGIC_PATH, O_RDWR | O_CREAT | O_TRUNC, 0644)) == -1) {
|
||||
perror(MAGIC_PATH);
|
||||
exit(77);
|
||||
}
|
||||
ftruncate(fd, n);
|
||||
if (ftruncate(fd, n) == -1) {
|
||||
perror("ftruncate");
|
||||
exit(77);
|
||||
}
|
||||
mag = (struct magic *)Mmap(MAGIC_ADDR, n,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, fd, 0);
|
||||
mag->offset = MAGIC_GRAN;
|
||||
mag->offset = n;
|
||||
mag->capacity = n;
|
||||
mag->fd = fd;
|
||||
}
|
||||
|
||||
void *memalign(size_t a, size_t n) {
|
||||
void *magic_memalign(size_t a, size_t n) {
|
||||
void *p;
|
||||
size_t i, j, k, m;
|
||||
static int count;
|
||||
size_t i, j, k, m, c2;
|
||||
magic_init();
|
||||
if (a < MAGIC_ALGN) a = MAGIC_ALGN;
|
||||
while (!IS2POW(a)) ++a;
|
||||
@ -227,24 +198,37 @@ void *memalign(size_t a, size_t n) {
|
||||
i = i + sizeof(size_t);
|
||||
i = ROUNDUP(i, a);
|
||||
j = ROUNDUP(i + m, MAGIC_GRAN);
|
||||
//if (j > mag->capacity) {
|
||||
if (!mag->magic) {
|
||||
int result = ftruncate(mag->fd, j);
|
||||
p = mmap(MAGIC_ADDR + mag->capacity,
|
||||
j - mag->capacity, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity);
|
||||
} else {
|
||||
p = mmap(MAGIC_ADDR + mag->capacity,
|
||||
j - mag->capacity, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||
if (j > mag->capacity) {
|
||||
c2 = mag->capacity;
|
||||
if (!c2) {
|
||||
c2 = MAGIC_GRAN;
|
||||
}
|
||||
if (p != MAP_FAILED) {
|
||||
mag->capacity = j;
|
||||
} else {
|
||||
while (j > c2) {
|
||||
c2 += c2 >> 4;
|
||||
c2 = ROUNDUP(c2, MAGIC_GRAN);
|
||||
}
|
||||
if (!mag->magic) {
|
||||
if (ftruncate(mag->fd, c2) == -1) {
|
||||
perror("ftruncate");
|
||||
spin_unlock(mag->lock);
|
||||
return 0;
|
||||
}
|
||||
//}
|
||||
p = mmap(MAGIC_ADDR + mag->capacity,
|
||||
c2 - mag->capacity, PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity);
|
||||
} else {
|
||||
p = mmap(MAGIC_ADDR + mag->capacity,
|
||||
c2 - mag->capacity, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||
}
|
||||
if (p != MAP_FAILED) {
|
||||
mag->capacity = c2;
|
||||
} else {
|
||||
perror("mmap");
|
||||
spin_unlock(mag->lock);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
mag->offset = i + m;
|
||||
spin_unlock(mag->lock);
|
||||
p = MAGIC_ADDR + i;
|
||||
@ -252,60 +236,44 @@ void *memalign(size_t a, size_t n) {
|
||||
return p;
|
||||
}
|
||||
|
||||
void *_malloc(size_t n) {
|
||||
return memalign(MAGIC_ALGN, n);
|
||||
void *magic_malloc(size_t n) {
|
||||
return magic_memalign(MAGIC_ALGN, n);
|
||||
}
|
||||
|
||||
size_t malloc_usable_size(const void *p) {
|
||||
return ((const size_t *)p)[-1];
|
||||
}
|
||||
|
||||
void *_calloc(size_t n, size_t z) {
|
||||
void *magic_calloc(size_t n, size_t z) {
|
||||
void *p;
|
||||
if ((p = _malloc((n *= z)))) {
|
||||
if ((p = magic_malloc((n *= z)))) {
|
||||
memset(p, 0, n);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
void _free(void *p) {
|
||||
void magic_free(void *p) {
|
||||
// do nothing
|
||||
}
|
||||
|
||||
void *_realloc(void *p, size_t n) {
|
||||
void *magic_realloc(void *p, size_t n) {
|
||||
void *q;
|
||||
if (!p) {
|
||||
return _malloc(n);
|
||||
return magic_malloc(n);
|
||||
}
|
||||
if (!n) {
|
||||
_free(p);
|
||||
magic_free(p);
|
||||
return 0;
|
||||
}
|
||||
if ((q = _malloc(n))) {
|
||||
if ((q = magic_malloc(n))) {
|
||||
memcpy(q, p, ((const size_t *)p)[-1]);
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
#if defined(malloc)
|
||||
# undef malloc
|
||||
#endif
|
||||
#define malloc(x) _malloc(x)
|
||||
void* operator new(size_t size) {
|
||||
return magic_malloc(size);
|
||||
}
|
||||
|
||||
#if defined(calloc)
|
||||
# undef calloc
|
||||
#endif
|
||||
#define calloc(x) _calloc(x)
|
||||
|
||||
#if defined(realloc)
|
||||
# undef realloc
|
||||
#endif
|
||||
#define realloc(x) _realloc(x)
|
||||
|
||||
#if defined(free)
|
||||
# undef free
|
||||
#endif
|
||||
#define free(x) _free(x)
|
||||
void operator delete(void* p) {
|
||||
magic_free(p);
|
||||
}
|
||||
|
||||
// load the model's weights from a file
|
||||
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
|
||||
@ -451,7 +419,7 @@ bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab
|
||||
{
|
||||
struct ggml_init_params params = {
|
||||
/*.mem_size =*/ ctx_size,
|
||||
/*.mem_buffer =*/ NULL,
|
||||
/*.mem_buffer =*/ magic_malloc(ctx_size),
|
||||
};
|
||||
|
||||
model.ctx = ggml_init(params);
|
||||
@ -772,7 +740,7 @@ bool llama_eval(
|
||||
const int d_key = n_embd/n_head;
|
||||
|
||||
static size_t buf_size = 512u*1024*1024;
|
||||
static void * buf = _malloc(buf_size);
|
||||
static void * buf = malloc(buf_size);
|
||||
|
||||
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
|
||||
const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||
@ -780,7 +748,7 @@ bool llama_eval(
|
||||
|
||||
// reallocate
|
||||
buf_size = buf_size_new;
|
||||
buf = _realloc(buf, buf_size);
|
||||
buf = realloc(buf, buf_size);
|
||||
if (buf == nullptr) {
|
||||
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
||||
return false;
|
||||
|
570
mmap.c
Normal file
570
mmap.c
Normal file
@ -0,0 +1,570 @@
|
||||
// Lightweight Portable mmap() Polyfill
|
||||
//
|
||||
// 1. Supports POSIX.1
|
||||
//
|
||||
// The baseline POSIX standard doesn't specify MAP_ANONYMOUS. This
|
||||
// library makes sure, on the hypothetical UNIX systems that don't
|
||||
// have it, or on the mainstream UNIX platforms where the user has
|
||||
// chosen to define _POSIX_C_SOURCE that cause headers to undefine
|
||||
// it, this implementation will fallback to creating a secure temp
|
||||
// file, for each anonymous mapping.
|
||||
//
|
||||
// 2. Supports Windows w/ Visual Studio
|
||||
//
|
||||
// On Windows Vista and later an API exists that's almost as good as
|
||||
// mmap(). However code that uses this library should conform to the
|
||||
// subset of behaviors Microsoft accommodates.
|
||||
//
|
||||
// Caveats
|
||||
//
|
||||
// - You should just assume the page size is 64kb. That's how it is on
|
||||
// Windows and it usually goes faster to assume that elsewhere too.
|
||||
//
|
||||
// - Not designed to support mprotect() at the moment. In order to
|
||||
// support this, we'd need to consider _open(O_ACCMODE) on Windows
|
||||
// and then have mmap() be more greedy about permissions.
|
||||
//
|
||||
// - There's limited support for being clever with memory intervals.
|
||||
// For example, you can't punch a hole in a memory map on Windows.
|
||||
// This abstraction does aim to offer more flexibility than WIN32.
|
||||
// There should also be good error reporting for unsupported uses.
|
||||
|
||||
#include "mmap.h"
|
||||
|
||||
#ifdef NEED_POSIX_MMAP
|
||||
#include <stdlib.h>
|
||||
|
||||
static void *PosixMmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset) {
|
||||
int tfd;
|
||||
void* res;
|
||||
char path[] = "/tmp/llama.dat.XXXXXX";
|
||||
if (~flags & MAP_ANONYMOUS) {
|
||||
res = mmap(addr, length, prot, flags, fd, offset);
|
||||
} else if ((tfd = mkstemp(path)) != -1) {
|
||||
unlink(path);
|
||||
if (!ftruncate(tfd, length)) {
|
||||
res = mmap(addr, length, prot, flags & ~MAP_ANONYMOUS, tfd, 0);
|
||||
} else {
|
||||
res = MAP_FAILED;
|
||||
}
|
||||
close(tfd);
|
||||
} else {
|
||||
res = MAP_FAILED;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#elif defined(NEED_WIN32_MMAP)
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <inttypes.h>
|
||||
|
||||
struct WinMap { // O(n) no ordering no overlaps
|
||||
HANDLE hand; // zero means array slots empty
|
||||
HANDLE fand; // for the original file, or -1
|
||||
uintptr_t addr; // base address (64 kb aligned)
|
||||
uintptr_t length; // byte size (>0, rounded 64kb)
|
||||
};
|
||||
|
||||
struct WinMaps {
|
||||
int n;
|
||||
struct WinMap *p;
|
||||
volatile long lock;
|
||||
};
|
||||
|
||||
static struct WinMaps g_winmaps;
|
||||
|
||||
static inline uintptr_t Min(uintptr_t x, uintptr_t y) {
|
||||
return y > x ? x : y;
|
||||
}
|
||||
|
||||
static inline uintptr_t Max(uintptr_t x, uintptr_t y) {
|
||||
return y < x ? x : y;
|
||||
}
|
||||
|
||||
static inline uintptr_t Roundup(uintptr_t x, intptr_t a) {
|
||||
assert(a > 0);
|
||||
assert(!(a & (a - 1)));
|
||||
return (x + (a - 1)) & -a;
|
||||
}
|
||||
|
||||
static inline void Lock(void) {
|
||||
long x;
|
||||
for (;;) {
|
||||
x = InterlockedExchange(&g_winmaps.lock, 1);
|
||||
if (!x) break;
|
||||
assert(x == 1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void Unlock(void) {
|
||||
assert(g_winmaps.lock == 1);
|
||||
g_winmaps.lock = 0;
|
||||
}
|
||||
|
||||
static int WinStrerror(int err, char *buf, int size) {
|
||||
return FormatMessageA(
|
||||
FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL, err, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
buf, size, NULL);
|
||||
}
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define LogError(thing) (void)0
|
||||
#else
|
||||
static void LogError(const char* file, int line, const char* thing) {
|
||||
#define LogError(thing) LogError(__FILE__, __LINE__, thing)
|
||||
fprintf(stderr, "%s:%d: error: %s\n", file, line, thing);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef NDEBUG
|
||||
#define LogWindowsError(thing) (void)0
|
||||
#else
|
||||
static void LogWindowsError(const char* file, int line, const char* thing) {
|
||||
#define LogWindowsError(thing) LogWindowsError(__FILE__, __LINE__, thing)
|
||||
char s[256];
|
||||
int e = GetLastError();
|
||||
WinStrerror(e, s, sizeof(s));
|
||||
fprintf(stderr, "%s:%d: error[%#x]: %s failed: %s\n", file, line, e, thing, s);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *Recalloc(void *ptr, uint64_t newSize) {
|
||||
HANDLE heap = GetProcessHeap();
|
||||
if (!ptr) {
|
||||
return HeapAlloc(heap, HEAP_ZERO_MEMORY, newSize);
|
||||
}
|
||||
if (!newSize) {
|
||||
HeapFree(heap, 0, ptr);
|
||||
return 0;
|
||||
}
|
||||
return HeapReAlloc(heap, HEAP_ZERO_MEMORY, ptr, newSize);
|
||||
}
|
||||
|
||||
uint64_t WinSeek(int fd, uint64_t offset, int whence) {
|
||||
HANDLE hFile;
|
||||
DWORD winwhence;
|
||||
LARGE_INTEGER distanceToMove;
|
||||
LARGE_INTEGER newFilePointer;
|
||||
distanceToMove.QuadPart = offset;
|
||||
switch (whence) {
|
||||
case SEEK_SET:
|
||||
winwhence = FILE_BEGIN;
|
||||
break;
|
||||
case SEEK_CUR:
|
||||
winwhence = FILE_CURRENT;
|
||||
break;
|
||||
case SEEK_END:
|
||||
winwhence = FILE_END;
|
||||
break;
|
||||
default:
|
||||
LogError("bad lseek() whence");
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
hFile = (HANDLE)_get_osfhandle(fd);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
LogWindowsError("_get_osfhandle");
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
if (GetFileType(hFile) != FILE_TYPE_DISK) {
|
||||
LogError("bad file type for lseek()");
|
||||
errno = ESPIPE;
|
||||
return -1;
|
||||
}
|
||||
if (!SetFilePointerEx(hFile, distanceToMove, &newFilePointer, winwhence)) {
|
||||
LogWindowsError("SetFilePointerEx");
|
||||
errno = EPERM;
|
||||
return -1;
|
||||
}
|
||||
return newFilePointer.QuadPart;
|
||||
}
|
||||
|
||||
int WinFtruncate(int fd, uint64_t length) {
|
||||
HANDLE hFile;
|
||||
LARGE_INTEGER old, neu;
|
||||
hFile = (HANDLE)_get_osfhandle(fd);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
LogWindowsError("_get_osfhandle");
|
||||
errno = EBADF;
|
||||
return -1;
|
||||
}
|
||||
// save current file position
|
||||
old.QuadPart = 0;
|
||||
neu.QuadPart = 0;
|
||||
if (!SetFilePointerEx(hFile, neu, &old, FILE_CURRENT)) {
|
||||
LogWindowsError("SetFilePointerEx#1");
|
||||
return -1;
|
||||
}
|
||||
// set current position to new file size
|
||||
neu.QuadPart = length;
|
||||
if (!SetFilePointerEx(hFile, neu, NULL, FILE_BEGIN)) {
|
||||
LogWindowsError("SetFilePointerEx#2");
|
||||
return -1;
|
||||
}
|
||||
// change the file size
|
||||
if (!SetEndOfFile(hFile)) {
|
||||
LogWindowsError("SetEndOfFile");
|
||||
SetFilePointerEx(hFile, old, NULL, FILE_BEGIN);
|
||||
return -1;
|
||||
}
|
||||
// restore the original file position
|
||||
// win32 allows this to exceed the end of file
|
||||
if (!SetFilePointerEx(hFile, old, NULL, FILE_BEGIN)) {
|
||||
LogWindowsError("SetFilePointerEx>3");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WinMadvise(void *addr, uintptr_t length, int advice) {
|
||||
switch (advice) {
|
||||
case MADV_NORMAL:
|
||||
case MADV_DONTNEED:
|
||||
case MADV_SEQUENTIAL:
|
||||
return 0;
|
||||
case MADV_RANDOM:
|
||||
case MADV_WILLNEED: {
|
||||
HANDLE proc;
|
||||
WIN32_MEMORY_RANGE_ENTRY entry;
|
||||
proc = GetCurrentProcess();
|
||||
entry.VirtualAddress = addr;
|
||||
entry.NumberOfBytes = length;
|
||||
if (!PrefetchVirtualMemory(proc, 1, &entry, 0)) {
|
||||
LogWindowsError("PrefetchVirtualMemory");
|
||||
errno = ENOMEM;
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
default:
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
int WinUnmap(void *addr, uintptr_t length) {
|
||||
void *view;
|
||||
HANDLE hand;
|
||||
HANDLE fand;
|
||||
int i, err = 0;
|
||||
uintptr_t a, b;
|
||||
uintptr_t x, y;
|
||||
// compute the requested interval
|
||||
// 1. length can't be zero
|
||||
// 2. length is rounded up to the page size
|
||||
// 3. addr must be aligned to page boundary
|
||||
a = (uintptr_t)addr;
|
||||
b = a + Roundup(length, 65536);
|
||||
if (!length) {
|
||||
LogError("tried to munmap zero bytes");
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (a & 65535) {
|
||||
LogError("tried to munmap an address that's not 64kb aligned");
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
// 1. we permit unmapping multiple maps in one call
|
||||
// 2. we don't care if the matched mappings aren't contiguous
|
||||
// 3. it's an error if a matched mapping only partially overlaps
|
||||
// 4. similar to close() we release all resources possible on error
|
||||
Lock();
|
||||
for (i = 0; i < g_winmaps.n; ++i) {
|
||||
if (!g_winmaps.p[i].hand) {
|
||||
// this array slot is empty
|
||||
continue;
|
||||
}
|
||||
// compute overlap between known mapping and requested interval
|
||||
x = Max(a, g_winmaps.p[i].addr);
|
||||
y = Min(b, g_winmaps.p[i].addr + g_winmaps.p[i].length);
|
||||
if (x >= y) {
|
||||
// there isn't any overlap
|
||||
continue;
|
||||
}
|
||||
if (y - x != g_winmaps.p[i].length) {
|
||||
// requested interval partially overlapped this mapping
|
||||
// therefore we can't unmap it and must report an error
|
||||
LogError("tried to partially unmap a mapping");
|
||||
err = ENOMEM;
|
||||
continue;
|
||||
}
|
||||
// save the information we care about
|
||||
view = (void *)g_winmaps.p[i].addr;
|
||||
hand = g_winmaps.p[i].hand;
|
||||
fand = g_winmaps.p[i].fand;
|
||||
// delete this mapping from the global array
|
||||
g_winmaps.p[i].hand = 0;
|
||||
// perform the systems operations
|
||||
// safe to release lock since g_winmaps.n is monotonic
|
||||
Unlock();
|
||||
if (!UnmapViewOfFile(view)) {
|
||||
LogWindowsError("UnmapViewOfFile");
|
||||
}
|
||||
if (!CloseHandle(hand)) {
|
||||
LogWindowsError("CloseHandle#1");
|
||||
}
|
||||
if (fand != INVALID_HANDLE_VALUE) {
|
||||
if (!CloseHandle(fand)) {
|
||||
LogWindowsError("CloseHandle#2");
|
||||
}
|
||||
}
|
||||
Lock();
|
||||
}
|
||||
Unlock();
|
||||
if (err) {
|
||||
errno = err;
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void* WinMap(void *addr, uintptr_t length, int prot, int flags, int fd, uint64_t offset) {
|
||||
int i;
|
||||
LPVOID res;
|
||||
HANDLE hand;
|
||||
HANDLE hFile;
|
||||
DWORD access;
|
||||
DWORD wiprot;
|
||||
uintptr_t fsize;
|
||||
if (!length) {
|
||||
LogError("mmap(length) was zero");
|
||||
errno = EINVAL;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
length = Roundup(length, 65536);
|
||||
if ((uintptr_t)addr & 65535) {
|
||||
if (~flags & MAP_FIXED) {
|
||||
addr = 0;
|
||||
} else {
|
||||
LogError("MAP_FIXED used with address that's not 64kb aligned");
|
||||
errno = EINVAL;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
}
|
||||
// these are the logical flag equivalents for creating mappings. please
|
||||
// note that any subsequent virtualprotect calls must be a subset of the
|
||||
// permissions we're using here. that's not a supported use case for us
|
||||
if (flags & MAP_PRIVATE) {
|
||||
// private mapping
|
||||
if (prot & PROT_EXEC) {
|
||||
if (prot & PROT_WRITE) {
|
||||
if (flags & MAP_ANONYMOUS) {
|
||||
wiprot = PAGE_EXECUTE_READWRITE;
|
||||
access = FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE;
|
||||
} else {
|
||||
wiprot = PAGE_EXECUTE_WRITECOPY;
|
||||
access = FILE_MAP_COPY | FILE_MAP_EXECUTE;
|
||||
}
|
||||
} else {
|
||||
wiprot = PAGE_EXECUTE_READ;
|
||||
access = FILE_MAP_READ | FILE_MAP_EXECUTE;
|
||||
}
|
||||
} else if (prot & PROT_WRITE) {
|
||||
if (flags & MAP_ANONYMOUS) {
|
||||
wiprot = PAGE_READWRITE;
|
||||
access = FILE_MAP_READ | FILE_MAP_WRITE;
|
||||
} else {
|
||||
wiprot = PAGE_WRITECOPY;
|
||||
access = FILE_MAP_COPY;
|
||||
}
|
||||
} else {
|
||||
wiprot = PAGE_READONLY;
|
||||
access = FILE_MAP_READ;
|
||||
}
|
||||
} else {
|
||||
// shared mapping
|
||||
if (prot & PROT_EXEC) {
|
||||
if (prot & PROT_WRITE) {
|
||||
wiprot = PAGE_EXECUTE_READWRITE;
|
||||
access = FILE_MAP_READ | FILE_MAP_WRITE | FILE_MAP_EXECUTE;
|
||||
} else {
|
||||
wiprot = PAGE_EXECUTE_READ;
|
||||
access = FILE_MAP_READ | FILE_MAP_EXECUTE;
|
||||
}
|
||||
} else if (prot & PROT_WRITE) {
|
||||
wiprot = PAGE_READWRITE;
|
||||
access = FILE_MAP_READ | FILE_MAP_WRITE;
|
||||
} else {
|
||||
wiprot = PAGE_READONLY;
|
||||
access = FILE_MAP_READ;
|
||||
}
|
||||
}
|
||||
if (flags & MAP_ANONYMOUS) {
|
||||
hFile = INVALID_HANDLE_VALUE;
|
||||
fsize = length;
|
||||
offset = 0;
|
||||
} else {
|
||||
fsize = 0;
|
||||
hFile = (HANDLE)_get_osfhandle(fd);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
LogWindowsError("_get_osfhandle");
|
||||
errno = EBADF;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
if (!DuplicateHandle(GetCurrentProcess(), hFile,
|
||||
GetCurrentProcess(), &hFile,
|
||||
0, FALSE, DUPLICATE_SAME_ACCESS)) {
|
||||
LogWindowsError("DuplicateHandle");
|
||||
errno = EBADF;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
}
|
||||
if (flags & MAP_FIXED) {
|
||||
if (!addr) {
|
||||
// zero chance of microsoft letting us map the null page
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(hFile);
|
||||
}
|
||||
errno = EINVAL;
|
||||
return MAP_FAILED;
|
||||
} else {
|
||||
// blow away any existing mappings on requested interval
|
||||
if (WinUnmap(addr, length) == -1) {
|
||||
// can only happen if we partially overlap an existing mapping
|
||||
assert(errno == ENOMEM);
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(hFile);
|
||||
}
|
||||
return MAP_FAILED;
|
||||
}
|
||||
}
|
||||
}
|
||||
hand = CreateFileMapping(hFile, 0, wiprot,
|
||||
(DWORD)(fsize >> 32),
|
||||
(DWORD)fsize,
|
||||
0);
|
||||
if (!hand) {
|
||||
LogWindowsError("CreateFileMapping");
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(hFile);
|
||||
}
|
||||
errno = EPERM;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
res = MapViewOfFileEx(hand, access,
|
||||
(DWORD)(offset >> 32),
|
||||
(DWORD)offset,
|
||||
length, addr);
|
||||
if (!res) {
|
||||
LogWindowsError("MapViewOfFileEx");
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(hFile);
|
||||
}
|
||||
CloseHandle(hand);
|
||||
errno = EPERM;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
if (flags & MAP_FIXED) {
|
||||
// this assertion could legitimately fail if two threads engage in a
|
||||
// race to create a MAP_FIXED mapping at the same address and that's
|
||||
// certainly not the kind of use case we're designed to support here
|
||||
assert(res == addr);
|
||||
}
|
||||
// record our new mapping in the global array
|
||||
Lock();
|
||||
for (i = 0; i < g_winmaps.n; ++i) {
|
||||
if (!g_winmaps.p[i].hand) {
|
||||
// we found an empty slot
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i == g_winmaps.n) {
|
||||
// we need to grow the array
|
||||
// it's important to use kernel32 memory
|
||||
// our malloc implementation depends on this
|
||||
int n2;
|
||||
struct WinMap *p2;
|
||||
p2 = g_winmaps.p;
|
||||
n2 = g_winmaps.n;
|
||||
if (n2) {
|
||||
n2 += n2 >> 1;
|
||||
} else {
|
||||
n2 = 7;
|
||||
}
|
||||
if ((p2 = (struct WinMap*)Recalloc(p2, n2 * sizeof(*p2)))) {
|
||||
g_winmaps.p = p2;
|
||||
g_winmaps.n = n2;
|
||||
} else {
|
||||
Unlock();
|
||||
LogError("recalloc failed");
|
||||
UnmapViewOfFile(res);
|
||||
CloseHandle(hand);
|
||||
if (hFile != INVALID_HANDLE_VALUE) {
|
||||
CloseHandle(hFile);
|
||||
}
|
||||
errno = ENOMEM;
|
||||
return MAP_FAILED;
|
||||
}
|
||||
}
|
||||
g_winmaps.p[i].hand = hand;
|
||||
g_winmaps.p[i].fand = hFile;
|
||||
g_winmaps.p[i].addr = (uintptr_t)res;
|
||||
g_winmaps.p[i].length = length;
|
||||
Unlock();
|
||||
return res;
|
||||
}
|
||||
|
||||
int WinMsync(void *addr, uintptr_t length, int flags) {
|
||||
int i, err;
|
||||
HANDLE hand;
|
||||
uintptr_t x, y;
|
||||
if (flags & ~(MS_ASYNC | MS_INVALIDATE | MS_SYNC)) {
|
||||
LogError("bad msync flags");
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
// 1. we do nothing if length is zero (unlike win32 api)
|
||||
// 2. the requested interval may envelop multiple known mappings
|
||||
// 3. we don't care if those mappings aren't contiguous or a hole exists
|
||||
// 4. the requested interval may specify a subrange of any given mapping
|
||||
Lock();
|
||||
for (err = i = 0; i < g_winmaps.n; ++i) {
|
||||
if (!g_winmaps.p[i].hand) {
|
||||
// this array slot is empty
|
||||
continue;
|
||||
}
|
||||
// compute overlap between known mapping and requested interval
|
||||
x = Max((uintptr_t)addr, g_winmaps.p[i].addr);
|
||||
y = Min((uintptr_t)addr + length, g_winmaps.p[i].addr + g_winmaps.p[i].length);
|
||||
if (x >= y) {
|
||||
// there isn't any overlap
|
||||
continue;
|
||||
}
|
||||
// it's safe to release lock temporarily, since g_winmaps.n is monotonic
|
||||
// any race conditions in handle being deleted should be caught by win32
|
||||
hand = g_winmaps.p[i].fand;
|
||||
Unlock();
|
||||
// ensure coherency and that filesystem flush *will* happen
|
||||
if (!FlushViewOfFile((void*)x, y - x)) {
|
||||
LogWindowsError("FlushViewOfFile");
|
||||
err = EPERM;
|
||||
}
|
||||
if (flags & MS_SYNC) {
|
||||
// ensure that filesystem flush *has* happened
|
||||
if (!FlushFileBuffers(hand)) {
|
||||
LogWindowsError("FlushFileBuffers");
|
||||
err = EPERM;
|
||||
}
|
||||
}
|
||||
Lock();
|
||||
}
|
||||
Unlock();
|
||||
if (err) {
|
||||
errno = err;
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else // NEED_*_MAP
|
||||
|
||||
// this is a normal unix platform
|
||||
// add some content to this object so the apple linker doesn't whine
|
||||
int justine_mmap_module;
|
||||
|
||||
#endif // NEED_*_MMAP
|
280
mmap.h
280
mmap.h
@ -1,32 +1,17 @@
|
||||
#pragma once
|
||||
|
||||
// portable mmap() implementation
|
||||
//
|
||||
// - supports win32 (needs vista+)
|
||||
// - supports posix.1 (no map_anonymous)
|
||||
//
|
||||
// notes on windows
|
||||
//
|
||||
// - no errno support
|
||||
// - not designed to support mprotect()
|
||||
// - very poor support for memory intervals
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
#ifndef __MINGW32__
|
||||
#include <Windows.h>
|
||||
#include <strsafe.h>
|
||||
#else
|
||||
#include <windows.h>
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define NEED_WIN32_MMAP
|
||||
#include <Windows.h>
|
||||
#include <io.h>
|
||||
#include <atomic>
|
||||
#include <map>
|
||||
|
||||
#ifndef PROT_READ
|
||||
#define PROT_READ 1
|
||||
@ -89,6 +74,26 @@
|
||||
#define MADV_WILLNEED 3
|
||||
#endif
|
||||
|
||||
#ifndef MS_ASYNC
|
||||
#define MS_ASYNC 1
|
||||
#endif
|
||||
#ifndef MS_INVALIDATE
|
||||
#define MS_INVALIDATE 2
|
||||
#endif
|
||||
#ifndef MS_SYNC
|
||||
#define MS_SYNC 4
|
||||
#endif
|
||||
|
||||
#ifndef SEEK_SET
|
||||
#define SEEK_SET 0
|
||||
#endif
|
||||
#ifndef SEEK_CUR
|
||||
#define SEEK_CUR 1
|
||||
#endif
|
||||
#ifndef SEEK_END
|
||||
#define SEEK_END 2
|
||||
#endif
|
||||
|
||||
#ifndef mmap
|
||||
#define mmap WinMap
|
||||
#endif
|
||||
@ -101,8 +106,11 @@
|
||||
#ifndef close
|
||||
#define close _close
|
||||
#endif
|
||||
#ifndef fstat
|
||||
#define fstat _fstati64
|
||||
#ifndef lseek
|
||||
#define lseek WinSeek
|
||||
#endif
|
||||
#ifndef msync
|
||||
#define msync WinMsync
|
||||
#endif
|
||||
#ifndef madvise
|
||||
#define madvise WinMadvise
|
||||
@ -111,219 +119,27 @@
|
||||
#define ftruncate WinFtruncate
|
||||
#endif
|
||||
|
||||
static std::atomic<unsigned> g_winlock;
|
||||
static std::map<LPVOID, HANDLE> g_winmap;
|
||||
uint64_t WinSeek(int, uint64_t, int);
|
||||
int WinMsync(void *, uintptr_t, int);
|
||||
int WinMadvise(void *, uintptr_t, int);
|
||||
int WinFtruncate(int, uint64_t);
|
||||
int WinUnmap(void *, uintptr_t);
|
||||
void *WinMap(void *, uintptr_t, int, int, int, uint64_t);
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
static void WinLock(void) {
|
||||
while (!g_winlock.exchange(1, std::memory_order_acquire));
|
||||
}
|
||||
|
||||
static void WunLock(void) {
|
||||
g_winlock.store(0, std::memory_order_release);
|
||||
}
|
||||
|
||||
static int WinMadvise(char* fd, size_t length, int flags) {
|
||||
auto p_handle = GetCurrentProcess();
|
||||
struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length);
|
||||
bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0);
|
||||
if (!success) {
|
||||
LPVOID lpMsgBuf;
|
||||
LPVOID lpDisplayBuf;
|
||||
DWORD error_code = GetLastError();
|
||||
FormatMessage(
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL,
|
||||
error_code,
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
(LPTSTR)&lpMsgBuf,
|
||||
0, NULL);
|
||||
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||
(lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR));
|
||||
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||
TEXT("%s failed with error %d: %s"),
|
||||
error_code, lpMsgBuf);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int WinFtruncate(int fd, uint64_t length) {
|
||||
return _chsize_s(fd, length) ? -1 : 0;
|
||||
}
|
||||
|
||||
static int WinUnmap(void *addr, size_t length) {
|
||||
HANDLE hand;
|
||||
WinLock();
|
||||
hand = g_winmap[addr];
|
||||
g_winmap[addr] = 0;
|
||||
WunLock();
|
||||
if (hand) {
|
||||
UnmapViewOfFile(addr);
|
||||
CloseHandle(hand);
|
||||
return 0;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void *WinMap(void *addr, size_t length, int prot,
|
||||
int flags, int fd, uint64_t offset) {
|
||||
HANDLE hFile;
|
||||
DWORD winprot;
|
||||
DWORD access = 0;
|
||||
HANDLE hand = NULL;
|
||||
LPVOID res = NULL;
|
||||
if (prot & PROT_READ) {
|
||||
access |= FILE_MAP_READ;
|
||||
}
|
||||
if (prot & PROT_WRITE) {
|
||||
access |= FILE_MAP_WRITE;
|
||||
}
|
||||
if (prot & PROT_EXEC) {
|
||||
access |= FILE_MAP_EXECUTE;
|
||||
}
|
||||
if (flags & MAP_PRIVATE) {
|
||||
// private mapping
|
||||
if (prot & PROT_EXEC) {
|
||||
if (prot & PROT_WRITE) {
|
||||
if (flags & MAP_ANONYMOUS) {
|
||||
winprot = PAGE_EXECUTE_READWRITE;
|
||||
} else {
|
||||
winprot = PAGE_EXECUTE_WRITECOPY;
|
||||
}
|
||||
} else {
|
||||
winprot = PAGE_EXECUTE_READ;
|
||||
}
|
||||
} else if (prot & PROT_WRITE) {
|
||||
if (flags & MAP_ANONYMOUS) {
|
||||
winprot = PAGE_READWRITE;
|
||||
} else {
|
||||
winprot = PAGE_WRITECOPY;
|
||||
}
|
||||
} else {
|
||||
winprot = PAGE_READONLY;
|
||||
}
|
||||
} else {
|
||||
// shared mapping
|
||||
if (prot & PROT_EXEC) {
|
||||
if (prot & PROT_WRITE) {
|
||||
winprot = PAGE_EXECUTE_READWRITE;
|
||||
} else {
|
||||
winprot = PAGE_EXECUTE_READ;
|
||||
}
|
||||
} else if (prot & PROT_WRITE) {
|
||||
winprot = PAGE_READWRITE;
|
||||
} else {
|
||||
winprot = PAGE_READONLY;
|
||||
}
|
||||
}
|
||||
if (flags & MAP_ANONYMOUS) {
|
||||
hFile = INVALID_HANDLE_VALUE;
|
||||
offset = 0;
|
||||
} else {
|
||||
hFile = (HANDLE)_get_osfhandle(fd);
|
||||
if (hFile == INVALID_HANDLE_VALUE) {
|
||||
return MAP_FAILED;
|
||||
}
|
||||
}
|
||||
if (flags & MAP_FIXED) {
|
||||
if (!addr) {
|
||||
return MAP_FAILED;
|
||||
} else {
|
||||
WinUnmap(addr, length);
|
||||
}
|
||||
}
|
||||
hand = CreateFileMapping(hFile, 0, winprot,
|
||||
(offset + length) >> 32,
|
||||
(offset + length), 0);
|
||||
if (!hand) {
|
||||
LPVOID lpMsgBuf;
|
||||
LPVOID lpDisplayBuf;
|
||||
DWORD error_code = GetLastError();
|
||||
FormatMessage(
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL,
|
||||
error_code,
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
(LPTSTR)&lpMsgBuf,
|
||||
0, NULL);
|
||||
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||
(lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR));
|
||||
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||
TEXT("%s failed with error %d: %s"),
|
||||
error_code, lpMsgBuf);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
if (winprot == PAGE_WRITECOPY) {
|
||||
access = FILE_MAP_COPY;
|
||||
}
|
||||
|
||||
res = MapViewOfFileEx(hand, access, offset >> 32,
|
||||
offset, length, addr);
|
||||
if (!res) {
|
||||
LPVOID lpMsgBuf;
|
||||
LPVOID lpDisplayBuf;
|
||||
DWORD error_code = GetLastError();
|
||||
FormatMessage(
|
||||
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||
NULL,
|
||||
error_code,
|
||||
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||
(LPTSTR)&lpMsgBuf,
|
||||
0, NULL);
|
||||
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||
(lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR));
|
||||
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||
TEXT("failed with error %d: %s"),
|
||||
error_code, lpMsgBuf);
|
||||
fprintf(stderr, (char*)lpDisplayBuf);
|
||||
CloseHandle(hand);
|
||||
return MAP_FAILED;
|
||||
}
|
||||
WinLock();
|
||||
g_winmap[res] = hand;
|
||||
WunLock();
|
||||
return res;
|
||||
}
|
||||
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#ifndef MAP_ANONYMOUS
|
||||
#define MAP_ANONYMOUS 0x10000000
|
||||
|
||||
static void *PosixMmap(void *addr, size_t length, int prot,
|
||||
int flags, int fd, uint64_t offset) {
|
||||
int tfd;
|
||||
void *res;
|
||||
char path[] = "/tmp/llama.dat.XXXXXX";
|
||||
if (~flags & MAP_ANONYMOUS) {
|
||||
res = mmap(addr, length, prot, flags, fd, offset);
|
||||
} else if ((tfd = mkstemp(path)) != -1) {
|
||||
unlink(path);
|
||||
if (!ftruncate(tfd, length)) {
|
||||
res = mmap(addr, length, prot, flags & ~MAP_ANONYMOUS, tfd, 0);
|
||||
} else {
|
||||
res = MAP_FAILED;
|
||||
}
|
||||
close(tfd);
|
||||
} else {
|
||||
res = MAP_FAILED;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifndef mmap
|
||||
#define NEED_POSIX_MMAP
|
||||
#define mmap PosixMmap
|
||||
#endif
|
||||
#define MAP_ANONYMOUS 0x10000000
|
||||
void *PosixMmap(void*, size_t, int, int, int, off_t);
|
||||
#endif // MAP_ANONYMOUS
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user