mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-27 04:23:06 +01:00
Make WIN32 mmap() improvements (#341)
Still not fully working yet. Closes #341
This commit is contained in:
parent
0b5448a3a4
commit
e4881686b4
409
.gitignore
vendored
409
.gitignore
vendored
@ -22,3 +22,412 @@ models/*
|
|||||||
|
|
||||||
arm_neon.h
|
arm_neon.h
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
|
CMakeFiles/
|
||||||
|
CMakeCache.txt
|
||||||
|
|
||||||
|
# Visual Studio stuff
|
||||||
|
*.exe
|
||||||
|
*.sln
|
||||||
|
*.vcxproj
|
||||||
|
*.vcxproj.filters
|
||||||
|
cmake_install.cmake
|
||||||
|
*.dir/
|
||||||
|
|
||||||
|
## Ignore Visual Studio temporary files, build results, and
|
||||||
|
## files generated by popular Visual Studio add-ons.
|
||||||
|
##
|
||||||
|
## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore
|
||||||
|
|
||||||
|
# User-specific files
|
||||||
|
*.rsuser
|
||||||
|
*.suo
|
||||||
|
*.user
|
||||||
|
*.userosscache
|
||||||
|
*.sln.docstates
|
||||||
|
|
||||||
|
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||||
|
*.userprefs
|
||||||
|
|
||||||
|
# Mono auto generated files
|
||||||
|
mono_crash.*
|
||||||
|
|
||||||
|
# Build results
|
||||||
|
[Dd]ebug/
|
||||||
|
[Dd]ebugPublic/
|
||||||
|
[Rr]elease/
|
||||||
|
[Rr]eleases/
|
||||||
|
x64/
|
||||||
|
x86/
|
||||||
|
[Ww][Ii][Nn]32/
|
||||||
|
[Aa][Rr][Mm]/
|
||||||
|
[Aa][Rr][Mm]64/
|
||||||
|
bld/
|
||||||
|
[Bb]in/
|
||||||
|
[Oo]bj/
|
||||||
|
[Ll]og/
|
||||||
|
[Ll]ogs/
|
||||||
|
|
||||||
|
# Visual Studio 2015/2017 cache/options directory
|
||||||
|
.vs/
|
||||||
|
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||||
|
#wwwroot/
|
||||||
|
|
||||||
|
# Visual Studio 2017 auto generated files
|
||||||
|
Generated\ Files/
|
||||||
|
|
||||||
|
# MSTest test Results
|
||||||
|
[Tt]est[Rr]esult*/
|
||||||
|
[Bb]uild[Ll]og.*
|
||||||
|
|
||||||
|
# NUnit
|
||||||
|
*.VisualState.xml
|
||||||
|
TestResult.xml
|
||||||
|
nunit-*.xml
|
||||||
|
|
||||||
|
# Build Results of an ATL Project
|
||||||
|
[Dd]ebugPS/
|
||||||
|
[Rr]eleasePS/
|
||||||
|
dlldata.c
|
||||||
|
|
||||||
|
# Benchmark Results
|
||||||
|
BenchmarkDotNet.Artifacts/
|
||||||
|
|
||||||
|
# .NET Core
|
||||||
|
project.lock.json
|
||||||
|
project.fragment.lock.json
|
||||||
|
artifacts/
|
||||||
|
|
||||||
|
# ASP.NET Scaffolding
|
||||||
|
ScaffoldingReadMe.txt
|
||||||
|
|
||||||
|
# StyleCop
|
||||||
|
StyleCopReport.xml
|
||||||
|
|
||||||
|
# Files built by Visual Studio
|
||||||
|
*_i.c
|
||||||
|
*_p.c
|
||||||
|
*_h.h
|
||||||
|
*.ilk
|
||||||
|
*.meta
|
||||||
|
*.obj
|
||||||
|
*.iobj
|
||||||
|
*.pch
|
||||||
|
*.pdb
|
||||||
|
*.ipdb
|
||||||
|
*.pgc
|
||||||
|
*.pgd
|
||||||
|
*.rsp
|
||||||
|
*.sbr
|
||||||
|
*.tlb
|
||||||
|
*.tli
|
||||||
|
*.tlh
|
||||||
|
*.tmp
|
||||||
|
*.tmp_proj
|
||||||
|
*_wpftmp.csproj
|
||||||
|
*.log
|
||||||
|
*.tlog
|
||||||
|
*.vspscc
|
||||||
|
*.vssscc
|
||||||
|
.builds
|
||||||
|
*.pidb
|
||||||
|
*.svclog
|
||||||
|
*.scc
|
||||||
|
|
||||||
|
# Chutzpah Test files
|
||||||
|
_Chutzpah*
|
||||||
|
|
||||||
|
# Visual C++ cache files
|
||||||
|
ipch/
|
||||||
|
*.aps
|
||||||
|
*.ncb
|
||||||
|
*.opendb
|
||||||
|
*.opensdf
|
||||||
|
*.sdf
|
||||||
|
*.cachefile
|
||||||
|
*.VC.db
|
||||||
|
*.VC.VC.opendb
|
||||||
|
|
||||||
|
# Visual Studio profiler
|
||||||
|
*.psess
|
||||||
|
*.vsp
|
||||||
|
*.vspx
|
||||||
|
*.sap
|
||||||
|
|
||||||
|
# Visual Studio Trace Files
|
||||||
|
*.e2e
|
||||||
|
|
||||||
|
# TFS 2012 Local Workspace
|
||||||
|
$tf/
|
||||||
|
|
||||||
|
# Guidance Automation Toolkit
|
||||||
|
*.gpState
|
||||||
|
|
||||||
|
# ReSharper is a .NET coding add-in
|
||||||
|
_ReSharper*/
|
||||||
|
*.[Rr]e[Ss]harper
|
||||||
|
*.DotSettings.user
|
||||||
|
|
||||||
|
# TeamCity is a build add-in
|
||||||
|
_TeamCity*
|
||||||
|
|
||||||
|
# DotCover is a Code Coverage Tool
|
||||||
|
*.dotCover
|
||||||
|
|
||||||
|
# AxoCover is a Code Coverage Tool
|
||||||
|
.axoCover/*
|
||||||
|
!.axoCover/settings.json
|
||||||
|
|
||||||
|
# Coverlet is a free, cross platform Code Coverage Tool
|
||||||
|
coverage*.json
|
||||||
|
coverage*.xml
|
||||||
|
coverage*.info
|
||||||
|
|
||||||
|
# Visual Studio code coverage results
|
||||||
|
*.coverage
|
||||||
|
*.coveragexml
|
||||||
|
|
||||||
|
# NCrunch
|
||||||
|
_NCrunch_*
|
||||||
|
.*crunch*.local.xml
|
||||||
|
nCrunchTemp_*
|
||||||
|
|
||||||
|
# MightyMoose
|
||||||
|
*.mm.*
|
||||||
|
AutoTest.Net/
|
||||||
|
|
||||||
|
# Web workbench (sass)
|
||||||
|
.sass-cache/
|
||||||
|
|
||||||
|
# Installshield output folder
|
||||||
|
[Ee]xpress/
|
||||||
|
|
||||||
|
# DocProject is a documentation generator add-in
|
||||||
|
DocProject/buildhelp/
|
||||||
|
DocProject/Help/*.HxT
|
||||||
|
DocProject/Help/*.HxC
|
||||||
|
DocProject/Help/*.hhc
|
||||||
|
DocProject/Help/*.hhk
|
||||||
|
DocProject/Help/*.hhp
|
||||||
|
DocProject/Help/Html2
|
||||||
|
DocProject/Help/html
|
||||||
|
|
||||||
|
# Click-Once directory
|
||||||
|
publish/
|
||||||
|
|
||||||
|
# Publish Web Output
|
||||||
|
*.[Pp]ublish.xml
|
||||||
|
*.azurePubxml
|
||||||
|
# Note: Comment the next line if you want to checkin your web deploy settings,
|
||||||
|
# but database connection strings (with potential passwords) will be unencrypted
|
||||||
|
*.pubxml
|
||||||
|
*.publishproj
|
||||||
|
|
||||||
|
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||||
|
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||||
|
# in these scripts will be unencrypted
|
||||||
|
PublishScripts/
|
||||||
|
|
||||||
|
# NuGet Packages
|
||||||
|
*.nupkg
|
||||||
|
# NuGet Symbol Packages
|
||||||
|
*.snupkg
|
||||||
|
# The packages folder can be ignored because of Package Restore
|
||||||
|
**/[Pp]ackages/*
|
||||||
|
# except build/, which is used as an MSBuild target.
|
||||||
|
!**/[Pp]ackages/build/
|
||||||
|
# Uncomment if necessary however generally it will be regenerated when needed
|
||||||
|
#!**/[Pp]ackages/repositories.config
|
||||||
|
# NuGet v3's project.json files produces more ignorable files
|
||||||
|
*.nuget.props
|
||||||
|
*.nuget.targets
|
||||||
|
|
||||||
|
# Microsoft Azure Build Output
|
||||||
|
csx/
|
||||||
|
*.build.csdef
|
||||||
|
|
||||||
|
# Microsoft Azure Emulator
|
||||||
|
ecf/
|
||||||
|
rcf/
|
||||||
|
|
||||||
|
# Windows Store app package directories and files
|
||||||
|
AppPackages/
|
||||||
|
BundleArtifacts/
|
||||||
|
Package.StoreAssociation.xml
|
||||||
|
_pkginfo.txt
|
||||||
|
*.appx
|
||||||
|
*.appxbundle
|
||||||
|
*.appxupload
|
||||||
|
|
||||||
|
# Visual Studio cache files
|
||||||
|
# files ending in .cache can be ignored
|
||||||
|
*.[Cc]ache
|
||||||
|
# but keep track of directories ending in .cache
|
||||||
|
!?*.[Cc]ache/
|
||||||
|
|
||||||
|
# Others
|
||||||
|
ClientBin/
|
||||||
|
~$*
|
||||||
|
*~
|
||||||
|
*.dbmdl
|
||||||
|
*.dbproj.schemaview
|
||||||
|
*.jfm
|
||||||
|
*.pfx
|
||||||
|
*.publishsettings
|
||||||
|
orleans.codegen.cs
|
||||||
|
|
||||||
|
# Including strong name files can present a security risk
|
||||||
|
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
|
||||||
|
#*.snk
|
||||||
|
|
||||||
|
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||||
|
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||||
|
#bower_components/
|
||||||
|
|
||||||
|
# RIA/Silverlight projects
|
||||||
|
Generated_Code/
|
||||||
|
|
||||||
|
# Backup & report files from converting an old project file
|
||||||
|
# to a newer Visual Studio version. Backup files are not needed,
|
||||||
|
# because we have git ;-)
|
||||||
|
_UpgradeReport_Files/
|
||||||
|
Backup*/
|
||||||
|
UpgradeLog*.XML
|
||||||
|
UpgradeLog*.htm
|
||||||
|
ServiceFabricBackup/
|
||||||
|
*.rptproj.bak
|
||||||
|
|
||||||
|
# SQL Server files
|
||||||
|
*.mdf
|
||||||
|
*.ldf
|
||||||
|
*.ndf
|
||||||
|
|
||||||
|
# Business Intelligence projects
|
||||||
|
*.rdl.data
|
||||||
|
*.bim.layout
|
||||||
|
*.bim_*.settings
|
||||||
|
*.rptproj.rsuser
|
||||||
|
*- [Bb]ackup.rdl
|
||||||
|
*- [Bb]ackup ([0-9]).rdl
|
||||||
|
*- [Bb]ackup ([0-9][0-9]).rdl
|
||||||
|
|
||||||
|
# Microsoft Fakes
|
||||||
|
FakesAssemblies/
|
||||||
|
|
||||||
|
# GhostDoc plugin setting file
|
||||||
|
*.GhostDoc.xml
|
||||||
|
|
||||||
|
# Node.js Tools for Visual Studio
|
||||||
|
.ntvs_analysis.dat
|
||||||
|
node_modules/
|
||||||
|
|
||||||
|
# Visual Studio 6 build log
|
||||||
|
*.plg
|
||||||
|
|
||||||
|
# Visual Studio 6 workspace options file
|
||||||
|
*.opt
|
||||||
|
|
||||||
|
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||||
|
*.vbw
|
||||||
|
|
||||||
|
# Visual Studio 6 auto-generated project file (contains which files were open etc.)
|
||||||
|
*.vbp
|
||||||
|
|
||||||
|
# Visual Studio 6 workspace and project file (working project files containing files to include in project)
|
||||||
|
*.dsw
|
||||||
|
*.dsp
|
||||||
|
|
||||||
|
# Visual Studio 6 technical files
|
||||||
|
*.ncb
|
||||||
|
*.aps
|
||||||
|
|
||||||
|
# Visual Studio LightSwitch build output
|
||||||
|
**/*.HTMLClient/GeneratedArtifacts
|
||||||
|
**/*.DesktopClient/GeneratedArtifacts
|
||||||
|
**/*.DesktopClient/ModelManifest.xml
|
||||||
|
**/*.Server/GeneratedArtifacts
|
||||||
|
**/*.Server/ModelManifest.xml
|
||||||
|
_Pvt_Extensions
|
||||||
|
|
||||||
|
# Paket dependency manager
|
||||||
|
.paket/paket.exe
|
||||||
|
paket-files/
|
||||||
|
|
||||||
|
# FAKE - F# Make
|
||||||
|
.fake/
|
||||||
|
|
||||||
|
# CodeRush personal settings
|
||||||
|
.cr/personal
|
||||||
|
|
||||||
|
# Python Tools for Visual Studio (PTVS)
|
||||||
|
__pycache__/
|
||||||
|
*.pyc
|
||||||
|
|
||||||
|
# Cake - Uncomment if you are using it
|
||||||
|
# tools/**
|
||||||
|
# !tools/packages.config
|
||||||
|
|
||||||
|
# Tabs Studio
|
||||||
|
*.tss
|
||||||
|
|
||||||
|
# Telerik's JustMock configuration file
|
||||||
|
*.jmconfig
|
||||||
|
|
||||||
|
# BizTalk build output
|
||||||
|
*.btp.cs
|
||||||
|
*.btm.cs
|
||||||
|
*.odx.cs
|
||||||
|
*.xsd.cs
|
||||||
|
|
||||||
|
# OpenCover UI analysis results
|
||||||
|
OpenCover/
|
||||||
|
|
||||||
|
# Azure Stream Analytics local run output
|
||||||
|
ASALocalRun/
|
||||||
|
|
||||||
|
# MSBuild Binary and Structured Log
|
||||||
|
*.binlog
|
||||||
|
|
||||||
|
# NVidia Nsight GPU debugger configuration file
|
||||||
|
*.nvuser
|
||||||
|
|
||||||
|
# MFractors (Xamarin productivity tool) working folder
|
||||||
|
.mfractor/
|
||||||
|
|
||||||
|
# Local History for Visual Studio
|
||||||
|
.localhistory/
|
||||||
|
|
||||||
|
# Visual Studio History (VSHistory) files
|
||||||
|
.vshistory/
|
||||||
|
|
||||||
|
# BeatPulse healthcheck temp database
|
||||||
|
healthchecksdb
|
||||||
|
|
||||||
|
# Backup folder for Package Reference Convert tool in Visual Studio 2017
|
||||||
|
MigrationBackup/
|
||||||
|
|
||||||
|
# Ionide (cross platform F# VS Code tools) working folder
|
||||||
|
.ionide/
|
||||||
|
|
||||||
|
# Fody - auto-generated XML schema
|
||||||
|
FodyWeavers.xsd
|
||||||
|
|
||||||
|
# VS Code files for those working on multiple tools
|
||||||
|
.vscode/*
|
||||||
|
!.vscode/settings.json
|
||||||
|
!.vscode/tasks.json
|
||||||
|
!.vscode/launch.json
|
||||||
|
!.vscode/extensions.json
|
||||||
|
*.code-workspace
|
||||||
|
|
||||||
|
# Local History for Visual Studio Code
|
||||||
|
.history/
|
||||||
|
|
||||||
|
# Windows Installer files from build outputs
|
||||||
|
*.cab
|
||||||
|
*.msi
|
||||||
|
*.msix
|
||||||
|
*.msm
|
||||||
|
*.msp
|
||||||
|
|
||||||
|
# JetBrains Rider
|
||||||
|
*.sln.iml
|
||||||
|
6
ggml.c
6
ggml.c
@ -2,7 +2,7 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#include <malloc.h> // using malloc.h with MSC/MINGW
|
//#include <malloc.h> // using malloc.h with MSC/MINGW
|
||||||
#elif !defined(__FreeBSD__) && !defined(__NetBSD__)
|
#elif !defined(__FreeBSD__) && !defined(__NetBSD__)
|
||||||
#include <alloca.h>
|
#include <alloca.h>
|
||||||
#endif
|
#endif
|
||||||
@ -2437,7 +2437,7 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
|
|||||||
|
|
||||||
*ctx = (struct ggml_context) {
|
*ctx = (struct ggml_context) {
|
||||||
/*.mem_size =*/ params.mem_size,
|
/*.mem_size =*/ params.mem_size,
|
||||||
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : malloc(params.mem_size),
|
/*.mem_buffer =*/ params.mem_buffer ? params.mem_buffer : _malloc(params.mem_size),
|
||||||
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
/*.mem_buffer_owned =*/ params.mem_buffer ? false : true,
|
||||||
/*.n_objects =*/ 0,
|
/*.n_objects =*/ 0,
|
||||||
/*.objects_begin =*/ NULL,
|
/*.objects_begin =*/ NULL,
|
||||||
@ -2469,7 +2469,7 @@ void ggml_free(struct ggml_context * ctx) {
|
|||||||
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
|
__func__, i, ctx->n_objects, ctx->objects_end->offs + ctx->objects_end->size);
|
||||||
|
|
||||||
if (ctx->mem_buffer_owned) {
|
if (ctx->mem_buffer_owned) {
|
||||||
free(ctx->mem_buffer);
|
_free(ctx->mem_buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
found = true;
|
found = true;
|
||||||
|
3
ggml.h
3
ggml.h
@ -183,6 +183,9 @@ extern "C" {
|
|||||||
#define GGML_MAX_CONTEXTS 64
|
#define GGML_MAX_CONTEXTS 64
|
||||||
#define GGML_MAX_OPT 4
|
#define GGML_MAX_OPT 4
|
||||||
|
|
||||||
|
void* _malloc(size_t n);
|
||||||
|
void _free(void* p);
|
||||||
|
|
||||||
#ifdef __ARM_NEON
|
#ifdef __ARM_NEON
|
||||||
// we use the built-in 16-bit float type
|
// we use the built-in 16-bit float type
|
||||||
typedef __fp16 ggml_fp16_t;
|
typedef __fp16 ggml_fp16_t;
|
||||||
|
99
main.cpp
99
main.cpp
@ -1,3 +1,7 @@
|
|||||||
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
|
#define NOMINMAX
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
@ -19,6 +23,10 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <sys/mman.h>
|
#include <sys/mman.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#else
|
||||||
|
#include <errno.h>
|
||||||
|
#define msync(addr, len_bytes, flag) winMSync
|
||||||
|
#define MS_ASYNC 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define ROUNDUP(X, K) (((X) + (K)-1) & -(K))
|
#define ROUNDUP(X, K) (((X) + (K)-1) & -(K))
|
||||||
@ -96,6 +104,7 @@ struct llama_model {
|
|||||||
std::map<std::string, struct ggml_tensor *> tensors;
|
std::map<std::string, struct ggml_tensor *> tensors;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
struct magic {
|
struct magic {
|
||||||
uint32_t magic;
|
uint32_t magic;
|
||||||
std::atomic<unsigned> lock;
|
std::atomic<unsigned> lock;
|
||||||
@ -103,10 +112,37 @@ struct magic {
|
|||||||
size_t commit;
|
size_t commit;
|
||||||
size_t offset;
|
size_t offset;
|
||||||
size_t capacity;
|
size_t capacity;
|
||||||
gpt_vocab *vocab;
|
gpt_vocab* vocab;
|
||||||
llama_model *model;
|
llama_model* model;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void winMSync(magic* addr, size_t len_bytes) {
|
||||||
|
bool success = FlushViewOfFile((void*)addr, len_bytes);
|
||||||
|
if (!success) {
|
||||||
|
LPVOID lpMsgBuf;
|
||||||
|
LPVOID lpDisplayBuf;
|
||||||
|
DWORD error_code = GetLastError();
|
||||||
|
FormatMessage(
|
||||||
|
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||||
|
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||||
|
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||||
|
NULL,
|
||||||
|
error_code,
|
||||||
|
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||||
|
(LPTSTR)&lpMsgBuf,
|
||||||
|
0, NULL);
|
||||||
|
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||||
|
(lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR));
|
||||||
|
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||||
|
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||||
|
TEXT("failed with error %d: %s"),
|
||||||
|
error_code, lpMsgBuf);
|
||||||
|
}
|
||||||
|
HANDLE hFile = (HANDLE)_get_osfhandle(addr->fd);
|
||||||
|
FlushFileBuffers(hFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct magic *mag;
|
static struct magic *mag;
|
||||||
|
|
||||||
static inline void spin_lock(std::atomic<unsigned> &lock) {
|
static inline void spin_lock(std::atomic<unsigned> &lock) {
|
||||||
@ -129,17 +165,26 @@ static void magic_commit(void) {
|
|||||||
mag->offset = mag->capacity;
|
mag->offset = mag->capacity;
|
||||||
mag->commit = mag->capacity;
|
mag->commit = mag->capacity;
|
||||||
mag->magic = 0xFEEDABEE;
|
mag->magic = 0xFEEDABEE;
|
||||||
msync(mag, mag->commit, MS_ASYNC);
|
bool success = msync(mag, mag->commit, MS_ASYNC);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void magic_init(void) {
|
static void magic_init(void) {
|
||||||
int fd;
|
int fd;
|
||||||
size_t n;
|
size_t n;
|
||||||
|
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||||
struct stat st;
|
struct stat st;
|
||||||
|
#else
|
||||||
|
struct _stat64 st;
|
||||||
|
#endif
|
||||||
if (mag) return;
|
if (mag) return;
|
||||||
n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN);
|
n = ROUNDUP(sizeof(struct magic), MAGIC_GRAN);
|
||||||
if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) {
|
if ((fd = open(MAGIC_PATH, O_RDWR)) != -1) {
|
||||||
fstat(fd, &st);
|
int result = fstat(fd, &st);
|
||||||
|
int error = errno;
|
||||||
|
if (errno == EBADF)
|
||||||
|
fprintf(stderr, "Bad file descriptor.\n");
|
||||||
|
else if (errno == EINVAL)
|
||||||
|
fprintf(stderr, "Invalid argument to _fstat.\n");
|
||||||
if (st.st_size >= n) {
|
if (st.st_size >= n) {
|
||||||
mag = (struct magic *)Mmap(MAGIC_ADDR, n,
|
mag = (struct magic *)Mmap(MAGIC_ADDR, n,
|
||||||
PROT_READ | PROT_WRITE,
|
PROT_READ | PROT_WRITE,
|
||||||
@ -182,9 +227,9 @@ void *memalign(size_t a, size_t n) {
|
|||||||
i = i + sizeof(size_t);
|
i = i + sizeof(size_t);
|
||||||
i = ROUNDUP(i, a);
|
i = ROUNDUP(i, a);
|
||||||
j = ROUNDUP(i + m, MAGIC_GRAN);
|
j = ROUNDUP(i + m, MAGIC_GRAN);
|
||||||
if (j > mag->capacity) {
|
//if (j > mag->capacity) {
|
||||||
if (!mag->magic) {
|
if (!mag->magic) {
|
||||||
ftruncate(mag->fd, j);
|
int result = ftruncate(mag->fd, j);
|
||||||
p = mmap(MAGIC_ADDR + mag->capacity,
|
p = mmap(MAGIC_ADDR + mag->capacity,
|
||||||
j - mag->capacity, PROT_READ | PROT_WRITE,
|
j - mag->capacity, PROT_READ | PROT_WRITE,
|
||||||
MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity);
|
MAP_SHARED | MAP_FIXED, mag->fd, mag->capacity);
|
||||||
@ -199,7 +244,7 @@ void *memalign(size_t a, size_t n) {
|
|||||||
spin_unlock(mag->lock);
|
spin_unlock(mag->lock);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
//}
|
||||||
mag->offset = i + m;
|
mag->offset = i + m;
|
||||||
spin_unlock(mag->lock);
|
spin_unlock(mag->lock);
|
||||||
p = MAGIC_ADDR + i;
|
p = MAGIC_ADDR + i;
|
||||||
@ -207,7 +252,7 @@ void *memalign(size_t a, size_t n) {
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *malloc(size_t n) {
|
void *_malloc(size_t n) {
|
||||||
return memalign(MAGIC_ALGN, n);
|
return memalign(MAGIC_ALGN, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,33 +260,53 @@ size_t malloc_usable_size(const void *p) {
|
|||||||
return ((const size_t *)p)[-1];
|
return ((const size_t *)p)[-1];
|
||||||
}
|
}
|
||||||
|
|
||||||
void *calloc(size_t n, size_t z) {
|
void *_calloc(size_t n, size_t z) {
|
||||||
void *p;
|
void *p;
|
||||||
if ((p = malloc((n *= z)))) {
|
if ((p = _malloc((n *= z)))) {
|
||||||
memset(p, 0, n);
|
memset(p, 0, n);
|
||||||
}
|
}
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
void free(void *p) {
|
void _free(void *p) {
|
||||||
// do nothing
|
// do nothing
|
||||||
}
|
}
|
||||||
|
|
||||||
void *realloc(void *p, size_t n) {
|
void *_realloc(void *p, size_t n) {
|
||||||
void *q;
|
void *q;
|
||||||
if (!p) {
|
if (!p) {
|
||||||
return malloc(n);
|
return _malloc(n);
|
||||||
}
|
}
|
||||||
if (!n) {
|
if (!n) {
|
||||||
free(p);
|
_free(p);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if ((q = malloc(n))) {
|
if ((q = _malloc(n))) {
|
||||||
memcpy(q, p, ((const size_t *)p)[-1]);
|
memcpy(q, p, ((const size_t *)p)[-1]);
|
||||||
}
|
}
|
||||||
return q;
|
return q;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(malloc)
|
||||||
|
# undef malloc
|
||||||
|
#endif
|
||||||
|
#define malloc(x) _malloc(x)
|
||||||
|
|
||||||
|
#if defined(calloc)
|
||||||
|
# undef calloc
|
||||||
|
#endif
|
||||||
|
#define calloc(x) _calloc(x)
|
||||||
|
|
||||||
|
#if defined(realloc)
|
||||||
|
# undef realloc
|
||||||
|
#endif
|
||||||
|
#define realloc(x) _realloc(x)
|
||||||
|
|
||||||
|
#if defined(free)
|
||||||
|
# undef free
|
||||||
|
#endif
|
||||||
|
#define free(x) _free(x)
|
||||||
|
|
||||||
// load the model's weights from a file
|
// load the model's weights from a file
|
||||||
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
|
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx) {
|
||||||
fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
|
fprintf(stderr, "%s: loading model from '%s' - please wait ...\n", __func__, fname.c_str());
|
||||||
@ -707,7 +772,7 @@ bool llama_eval(
|
|||||||
const int d_key = n_embd/n_head;
|
const int d_key = n_embd/n_head;
|
||||||
|
|
||||||
static size_t buf_size = 512u*1024*1024;
|
static size_t buf_size = 512u*1024*1024;
|
||||||
static void * buf = malloc(buf_size);
|
static void * buf = _malloc(buf_size);
|
||||||
|
|
||||||
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
|
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
|
||||||
const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
|
const size_t buf_size_new = 1.1*(mem_per_token*N); // add 10% to account for ggml object overhead
|
||||||
@ -715,7 +780,7 @@ bool llama_eval(
|
|||||||
|
|
||||||
// reallocate
|
// reallocate
|
||||||
buf_size = buf_size_new;
|
buf_size = buf_size_new;
|
||||||
buf = realloc(buf, buf_size);
|
buf = _realloc(buf, buf_size);
|
||||||
if (buf == nullptr) {
|
if (buf == nullptr) {
|
||||||
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
|
||||||
return false;
|
return false;
|
||||||
|
70
mmap.h
70
mmap.h
@ -20,6 +20,7 @@
|
|||||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||||
#ifndef __MINGW32__
|
#ifndef __MINGW32__
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
|
#include <strsafe.h>
|
||||||
#else
|
#else
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#endif
|
#endif
|
||||||
@ -101,7 +102,7 @@
|
|||||||
#define close _close
|
#define close _close
|
||||||
#endif
|
#endif
|
||||||
#ifndef fstat
|
#ifndef fstat
|
||||||
#define fstat _fstat
|
#define fstat _fstati64
|
||||||
#endif
|
#endif
|
||||||
#ifndef madvise
|
#ifndef madvise
|
||||||
#define madvise WinMadvise
|
#define madvise WinMadvise
|
||||||
@ -113,6 +114,7 @@
|
|||||||
static std::atomic<unsigned> g_winlock;
|
static std::atomic<unsigned> g_winlock;
|
||||||
static std::map<LPVOID, HANDLE> g_winmap;
|
static std::map<LPVOID, HANDLE> g_winmap;
|
||||||
|
|
||||||
|
|
||||||
static void WinLock(void) {
|
static void WinLock(void) {
|
||||||
while (!g_winlock.exchange(1, std::memory_order_acquire));
|
while (!g_winlock.exchange(1, std::memory_order_acquire));
|
||||||
}
|
}
|
||||||
@ -121,7 +123,30 @@ static void WunLock(void) {
|
|||||||
g_winlock.store(0, std::memory_order_release);
|
g_winlock.store(0, std::memory_order_release);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int WinMadvise(int fd, size_t length, int flags) {
|
static int WinMadvise(char* fd, size_t length, int flags) {
|
||||||
|
auto p_handle = GetCurrentProcess();
|
||||||
|
struct _WIN32_MEMORY_RANGE_ENTRY entry((void*)fd, length);
|
||||||
|
bool success = PrefetchVirtualMemory(p_handle, 1, &entry, 0);
|
||||||
|
if (!success) {
|
||||||
|
LPVOID lpMsgBuf;
|
||||||
|
LPVOID lpDisplayBuf;
|
||||||
|
DWORD error_code = GetLastError();
|
||||||
|
FormatMessage(
|
||||||
|
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||||
|
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||||
|
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||||
|
NULL,
|
||||||
|
error_code,
|
||||||
|
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||||
|
(LPTSTR)&lpMsgBuf,
|
||||||
|
0, NULL);
|
||||||
|
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||||
|
(lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR));
|
||||||
|
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||||
|
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||||
|
TEXT("%s failed with error %d: %s"),
|
||||||
|
error_code, lpMsgBuf);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -215,11 +240,52 @@ static void *WinMap(void *addr, size_t length, int prot,
|
|||||||
(offset + length) >> 32,
|
(offset + length) >> 32,
|
||||||
(offset + length), 0);
|
(offset + length), 0);
|
||||||
if (!hand) {
|
if (!hand) {
|
||||||
|
LPVOID lpMsgBuf;
|
||||||
|
LPVOID lpDisplayBuf;
|
||||||
|
DWORD error_code = GetLastError();
|
||||||
|
FormatMessage(
|
||||||
|
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||||
|
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||||
|
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||||
|
NULL,
|
||||||
|
error_code,
|
||||||
|
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||||
|
(LPTSTR)&lpMsgBuf,
|
||||||
|
0, NULL);
|
||||||
|
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||||
|
(lstrlen((LPCTSTR)lpMsgBuf) + 256) * sizeof(TCHAR));
|
||||||
|
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||||
|
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||||
|
TEXT("%s failed with error %d: %s"),
|
||||||
|
error_code, lpMsgBuf);
|
||||||
return MAP_FAILED;
|
return MAP_FAILED;
|
||||||
}
|
}
|
||||||
|
if (winprot == PAGE_WRITECOPY) {
|
||||||
|
access = FILE_MAP_COPY;
|
||||||
|
}
|
||||||
|
|
||||||
res = MapViewOfFileEx(hand, access, offset >> 32,
|
res = MapViewOfFileEx(hand, access, offset >> 32,
|
||||||
offset, length, addr);
|
offset, length, addr);
|
||||||
if (!res) {
|
if (!res) {
|
||||||
|
LPVOID lpMsgBuf;
|
||||||
|
LPVOID lpDisplayBuf;
|
||||||
|
DWORD error_code = GetLastError();
|
||||||
|
FormatMessage(
|
||||||
|
FORMAT_MESSAGE_ALLOCATE_BUFFER |
|
||||||
|
FORMAT_MESSAGE_FROM_SYSTEM |
|
||||||
|
FORMAT_MESSAGE_IGNORE_INSERTS,
|
||||||
|
NULL,
|
||||||
|
error_code,
|
||||||
|
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
|
||||||
|
(LPTSTR)&lpMsgBuf,
|
||||||
|
0, NULL);
|
||||||
|
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
|
||||||
|
(lstrlen((LPCTSTR)lpMsgBuf) + 40) * sizeof(TCHAR));
|
||||||
|
StringCchPrintf((LPTSTR)lpDisplayBuf,
|
||||||
|
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
|
||||||
|
TEXT("failed with error %d: %s"),
|
||||||
|
error_code, lpMsgBuf);
|
||||||
|
fprintf(stderr, (char*)lpDisplayBuf);
|
||||||
CloseHandle(hand);
|
CloseHandle(hand);
|
||||||
return MAP_FAILED;
|
return MAP_FAILED;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user