mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-23 09:59:18 +01:00
Merge branch 'master' into gguf
This commit is contained in:
commit
1da82c551f
@ -67,11 +67,13 @@ endif()
|
|||||||
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
|
||||||
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
option(LLAMA_BLAS "llama: use BLAS" OFF)
|
||||||
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
|
||||||
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
|
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
|
||||||
|
#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
|
||||||
|
set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels")
|
||||||
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||||
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
|
||||||
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
|
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
|
||||||
option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF)
|
option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
|
||||||
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
|
||||||
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
|
||||||
option(LLAMA_METAL "llama: use Metal" OFF)
|
option(LLAMA_METAL "llama: use Metal" OFF)
|
||||||
@ -251,6 +253,10 @@ if (LLAMA_CUBLAS)
|
|||||||
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
|
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
|
||||||
|
|
||||||
add_compile_definitions(GGML_USE_CUBLAS)
|
add_compile_definitions(GGML_USE_CUBLAS)
|
||||||
|
# if (LLAMA_CUDA_CUBLAS)
|
||||||
|
# add_compile_definitions(GGML_CUDA_CUBLAS)
|
||||||
|
# endif()
|
||||||
|
add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y})
|
||||||
if (LLAMA_CUDA_FORCE_DMMV)
|
if (LLAMA_CUDA_FORCE_DMMV)
|
||||||
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
|
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
|
||||||
endif()
|
endif()
|
||||||
@ -259,8 +265,8 @@ if (LLAMA_CUBLAS)
|
|||||||
if (DEFINED LLAMA_CUDA_DMMV_Y)
|
if (DEFINED LLAMA_CUDA_DMMV_Y)
|
||||||
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
|
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_CUDA_DMMV_F16)
|
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
|
||||||
add_compile_definitions(GGML_CUDA_DMMV_F16)
|
add_compile_definitions(GGML_CUDA_F16)
|
||||||
endif()
|
endif()
|
||||||
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
|
||||||
|
|
||||||
@ -271,10 +277,14 @@ if (LLAMA_CUBLAS)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
|
||||||
if (LLAMA_CUDA_DMMV_F16)
|
# 52 == lowest CUDA 12 standard
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics
|
# 60 == f16 CUDA intrinsics
|
||||||
|
# 61 == integer CUDA intrinsics
|
||||||
|
# 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
|
||||||
|
if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
|
||||||
|
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
|
||||||
else()
|
else()
|
||||||
set(CMAKE_CUDA_ARCHITECTURES "52;61") # lowest CUDA 12 standard + lowest for integer intrinsics
|
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
|
||||||
@ -357,6 +367,7 @@ if (LLAMA_ALL_WARNINGS)
|
|||||||
-Wshadow
|
-Wshadow
|
||||||
-Wstrict-prototypes
|
-Wstrict-prototypes
|
||||||
-Wpointer-arith
|
-Wpointer-arith
|
||||||
|
-Wmissing-prototypes
|
||||||
)
|
)
|
||||||
set(cxx_flags
|
set(cxx_flags
|
||||||
-Wall
|
-Wall
|
||||||
@ -496,6 +507,8 @@ endif()
|
|||||||
add_library(ggml OBJECT
|
add_library(ggml OBJECT
|
||||||
ggml.c
|
ggml.c
|
||||||
ggml.h
|
ggml.h
|
||||||
|
ggml-alloc.c
|
||||||
|
ggml-alloc.h
|
||||||
${GGML_SOURCES_CUDA}
|
${GGML_SOURCES_CUDA}
|
||||||
${GGML_SOURCES_OPENCL}
|
${GGML_SOURCES_OPENCL}
|
||||||
${GGML_SOURCES_METAL}
|
${GGML_SOURCES_METAL}
|
||||||
|
82
Makefile
82
Makefile
@ -63,7 +63,8 @@ ifdef LLAMA_SERVER_VERBOSE
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# warnings
|
# warnings
|
||||||
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith
|
CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \
|
||||||
|
-Wmissing-prototypes
|
||||||
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
|
CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
|
||||||
|
|
||||||
# OS specific
|
# OS specific
|
||||||
@ -141,6 +142,28 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
|
|||||||
#CXXFLAGS += -mssse3
|
#CXXFLAGS += -mssse3
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
||||||
|
# Apple M1, M2, etc.
|
||||||
|
# Raspberry Pi 3, 4, Zero 2 (64-bit)
|
||||||
|
CFLAGS += -mcpu=native
|
||||||
|
CXXFLAGS += -mcpu=native
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(filter armv6%,$(UNAME_M)),)
|
||||||
|
# Raspberry Pi 1, Zero
|
||||||
|
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(filter armv7%,$(UNAME_M)),)
|
||||||
|
# Raspberry Pi 2
|
||||||
|
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifneq ($(filter armv8%,$(UNAME_M)),)
|
||||||
|
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
||||||
|
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
||||||
|
endif
|
||||||
|
|
||||||
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
ifneq ($(filter ppc64%,$(UNAME_M)),)
|
||||||
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
|
||||||
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
ifneq (,$(findstring POWER9,$(POWER9_M)))
|
||||||
@ -193,7 +216,7 @@ ifdef LLAMA_CUBLAS
|
|||||||
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
|
||||||
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
|
||||||
OBJS += ggml-cuda.o
|
OBJS += ggml-cuda.o
|
||||||
NVCCFLAGS = --forward-unknown-to-host-compiler
|
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
|
||||||
ifdef LLAMA_CUDA_NVCC
|
ifdef LLAMA_CUDA_NVCC
|
||||||
NVCC = $(LLAMA_CUDA_NVCC)
|
NVCC = $(LLAMA_CUDA_NVCC)
|
||||||
else
|
else
|
||||||
@ -219,19 +242,30 @@ else ifdef LLAMA_CUDA_DMMV_Y
|
|||||||
else
|
else
|
||||||
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
|
||||||
endif # LLAMA_CUDA_MMV_Y
|
endif # LLAMA_CUDA_MMV_Y
|
||||||
|
ifdef LLAMA_CUDA_F16
|
||||||
|
NVCCFLAGS += -DGGML_CUDA_F16
|
||||||
|
endif # LLAMA_CUDA_F16
|
||||||
ifdef LLAMA_CUDA_DMMV_F16
|
ifdef LLAMA_CUDA_DMMV_F16
|
||||||
NVCCFLAGS += -DGGML_CUDA_DMMV_F16
|
NVCCFLAGS += -DGGML_CUDA_F16
|
||||||
endif # LLAMA_CUDA_DMMV_F16
|
endif # LLAMA_CUDA_DMMV_F16
|
||||||
ifdef LLAMA_CUDA_KQUANTS_ITER
|
ifdef LLAMA_CUDA_KQUANTS_ITER
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)
|
||||||
else
|
else
|
||||||
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
|
||||||
endif
|
endif
|
||||||
|
ifdef LLAMA_CUDA_MMQ_Y
|
||||||
|
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=$(LLAMA_CUDA_MMQ_Y)
|
||||||
|
else
|
||||||
|
NVCCFLAGS += -DGGML_CUDA_MMQ_Y=64
|
||||||
|
endif # LLAMA_CUDA_MMQ_Y
|
||||||
|
#ifdef LLAMA_CUDA_CUBLAS
|
||||||
|
# NVCCFLAGS += -DGGML_CUDA_CUBLAS
|
||||||
|
#endif # LLAMA_CUDA_CUBLAS
|
||||||
ifdef LLAMA_CUDA_CCBIN
|
ifdef LLAMA_CUDA_CCBIN
|
||||||
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN)
|
||||||
endif
|
endif
|
||||||
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
|
||||||
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
|
$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) -Wno-pedantic -c $< -o $@
|
||||||
endif # LLAMA_CUBLAS
|
endif # LLAMA_CUBLAS
|
||||||
|
|
||||||
ifdef LLAMA_CLBLAST
|
ifdef LLAMA_CLBLAST
|
||||||
@ -258,28 +292,6 @@ ifdef LLAMA_METAL
|
|||||||
OBJS += ggml-metal.o
|
OBJS += ggml-metal.o
|
||||||
endif # LLAMA_METAL
|
endif # LLAMA_METAL
|
||||||
|
|
||||||
ifneq ($(filter aarch64%,$(UNAME_M)),)
|
|
||||||
# Apple M1, M2, etc.
|
|
||||||
# Raspberry Pi 3, 4, Zero 2 (64-bit)
|
|
||||||
CFLAGS += -mcpu=native
|
|
||||||
CXXFLAGS += -mcpu=native
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(filter armv6%,$(UNAME_M)),)
|
|
||||||
# Raspberry Pi 1, Zero
|
|
||||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(filter armv7%,$(UNAME_M)),)
|
|
||||||
# Raspberry Pi 2
|
|
||||||
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifneq ($(filter armv8%,$(UNAME_M)),)
|
|
||||||
# Raspberry Pi 3, 4, Zero 2 (32-bit)
|
|
||||||
CFLAGS += -mfp16-format=ieee -mno-unaligned-access
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef LLAMA_METAL
|
ifdef LLAMA_METAL
|
||||||
ggml-metal.o: ggml-metal.m ggml-metal.h
|
ggml-metal.o: ggml-metal.m ggml-metal.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
@ -317,12 +329,20 @@ $(info )
|
|||||||
ggml.o: ggml.c ggml.h ggml-cuda.h
|
ggml.o: ggml.c ggml.h ggml-cuda.h
|
||||||
$(CC) $(CFLAGS) -c $< -o $@
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
llama.o: llama.cpp ggml.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
|
ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h
|
||||||
|
$(CC) $(CFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
OBJS += ggml-alloc.o
|
||||||
|
|
||||||
|
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-util.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
common.o: examples/common.cpp examples/common.h
|
common.o: examples/common.cpp examples/common.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
|
console.o: examples/console.cpp examples/console.h
|
||||||
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
|
grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h
|
||||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||||
|
|
||||||
@ -336,7 +356,7 @@ clean:
|
|||||||
# Examples
|
# Examples
|
||||||
#
|
#
|
||||||
|
|
||||||
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS)
|
main: examples/main/main.cpp build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
@echo
|
@echo
|
||||||
@echo '==== Run ./main -h for help. ===='
|
@echo '==== Run ./main -h for help. ===='
|
||||||
@ -400,13 +420,13 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o
|
|||||||
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-double-float: tests/test-double-float.c build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-grad0: tests/test-grad0.c build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-opt: tests/test-opt.c build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
|
31
README.md
31
README.md
@ -77,9 +77,10 @@ as the main playground for developing new features for the [ggml](https://github
|
|||||||
**Supported models:**
|
**Supported models:**
|
||||||
|
|
||||||
- [X] LLaMA 🦙
|
- [X] LLaMA 🦙
|
||||||
|
- [x] LLaMA 2 🦙🦙
|
||||||
- [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca)
|
- [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca)
|
||||||
- [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all)
|
- [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all)
|
||||||
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca)
|
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
|
||||||
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
|
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
|
||||||
- [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894)
|
- [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894)
|
||||||
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
|
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
|
||||||
@ -87,6 +88,7 @@ as the main playground for developing new features for the [ggml](https://github
|
|||||||
- [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
|
- [X] [Pygmalion 7B / Metharme 7B](#using-pygmalion-7b--metharme-7b)
|
||||||
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
|
- [X] [WizardLM](https://github.com/nlpxucan/WizardLM)
|
||||||
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
|
- [X] [Baichuan-7B](https://huggingface.co/baichuan-inc/baichuan-7B) and its derivations (such as [baichuan-7b-sft](https://huggingface.co/hiyouga/baichuan-7b-sft))
|
||||||
|
- [X] [Aquila-7B](https://huggingface.co/BAAI/Aquila-7B) / [AquilaChat-7B](https://huggingface.co/BAAI/AquilaChat-7B)
|
||||||
|
|
||||||
**Bindings:**
|
**Bindings:**
|
||||||
|
|
||||||
@ -399,12 +401,16 @@ Building the program with BLAS support may lead to some performance improvements
|
|||||||
|
|
||||||
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
|
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
|
||||||
|
|
||||||
|
<!---
|
||||||
|
| LLAMA_CUDA_CUBLAS | Boolean | false | Use cuBLAS instead of custom CUDA kernels for prompt processing. Faster for all quantization formats except for q4_0 and q8_0, especially for k-quants. Increases VRAM usage (700 MiB for 7b, 970 MiB for 13b, 1430 MiB for 33b). |
|
||||||
|
--->
|
||||||
| Option | Legal values | Default | Description |
|
| Option | Legal values | Default | Description |
|
||||||
|-------------------------|------------------------|---------|-------------|
|
|-------------------------|------------------------|---------|-------------|
|
||||||
|
| LLAMA_CUDA_MMQ_Y | Positive integer >= 32 | 64 | Tile size in y direction when using the custom CUDA kernels for prompt processing. Higher values can be faster depending on the amount of shared memory available. Power of 2 heavily recommended. |
|
||||||
| LLAMA_CUDA_FORCE_DMMV | Boolean | false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. |
|
| LLAMA_CUDA_FORCE_DMMV | Boolean | false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
| LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_DMMV_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels. Can improve performance on relatively recent GPUs. |
|
| LLAMA_CUDA_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. |
|
||||||
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
||||||
|
|
||||||
- #### CLBlast
|
- #### CLBlast
|
||||||
@ -487,6 +493,9 @@ Building the program with BLAS support may lead to some performance improvements
|
|||||||
# obtain the original LLaMA model weights and place them in ./models
|
# obtain the original LLaMA model weights and place them in ./models
|
||||||
ls ./models
|
ls ./models
|
||||||
65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
|
65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model
|
||||||
|
# [Optional] for models using BPE tokenizers
|
||||||
|
ls ./models
|
||||||
|
65B 30B 13B 7B vocab.json
|
||||||
|
|
||||||
# install Python dependencies
|
# install Python dependencies
|
||||||
python3 -m pip install -r requirements.txt
|
python3 -m pip install -r requirements.txt
|
||||||
@ -494,6 +503,9 @@ python3 -m pip install -r requirements.txt
|
|||||||
# convert the 7B model to ggml FP16 format
|
# convert the 7B model to ggml FP16 format
|
||||||
python3 convert.py models/7B/
|
python3 convert.py models/7B/
|
||||||
|
|
||||||
|
# [Optional] for models using BPE tokenizers
|
||||||
|
python convert.py models/7B/ --vocabtype bpe
|
||||||
|
|
||||||
# quantize the model to 4-bits (using q4_0 method)
|
# quantize the model to 4-bits (using q4_0 method)
|
||||||
./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
|
./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin q4_0
|
||||||
|
|
||||||
@ -650,6 +662,19 @@ python3 convert.py pygmalion-7b/ --outtype q4_1
|
|||||||
- The LLaMA models are officially distributed by Facebook and will **never** be provided through this repository.
|
- The LLaMA models are officially distributed by Facebook and will **never** be provided through this repository.
|
||||||
- Refer to [Facebook's LLaMA repository](https://github.com/facebookresearch/llama/pull/73/files) if you need to request access to the model data.
|
- Refer to [Facebook's LLaMA repository](https://github.com/facebookresearch/llama/pull/73/files) if you need to request access to the model data.
|
||||||
|
|
||||||
|
### Obtaining and using the Facebook LLaMA 2 model
|
||||||
|
|
||||||
|
- Refer to [Facebook's LLaMA download page](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) if you want to access the model data.
|
||||||
|
- Alternatively, if you want to save time and space, you can download already converted and quantized models from [TheBloke](https://huggingface.co/TheBloke), including:
|
||||||
|
- [LLaMA 2 7B base](https://huggingface.co/TheBloke/Llama-2-7B-GGML)
|
||||||
|
- [LLaMA 2 13B base](https://huggingface.co/TheBloke/Llama-2-13B-GGML)
|
||||||
|
- [LLaMA 2 70B base](https://huggingface.co/TheBloke/Llama-2-70B-GGML)
|
||||||
|
- [LLaMA 2 7B chat](https://huggingface.co/TheBloke/Llama-2-7B-chat-GGML)
|
||||||
|
- [LLaMA 2 13B chat](https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML)
|
||||||
|
- [LLaMA 2 70B chat](https://huggingface.co/TheBloke/Llama-2-70B-chat-GGML)
|
||||||
|
- Specify `-eps 1e-5` for best generation quality
|
||||||
|
- Specify `-gqa 8` for 70B models to work
|
||||||
|
|
||||||
### Verifying the model files
|
### Verifying the model files
|
||||||
|
|
||||||
Please verify the [sha256 checksums](SHA256SUMS) of all downloaded model files to confirm that you have the correct model data files before creating an issue relating to your model files.
|
Please verify the [sha256 checksums](SHA256SUMS) of all downloaded model files to confirm that you have the correct model data files before creating an issue relating to your model files.
|
||||||
|
139
build.zig
139
build.zig
@ -1,68 +1,87 @@
|
|||||||
|
// Compatible with Zig Version 0.11.0
|
||||||
const std = @import("std");
|
const std = @import("std");
|
||||||
const commit_hash = @embedFile(".git/refs/heads/master");
|
const Compile = std.Build.Step.Compile;
|
||||||
|
const ConfigHeader = std.Build.Step.ConfigHeader;
|
||||||
|
const Mode = std.builtin.Mode;
|
||||||
|
const CrossTarget = std.zig.CrossTarget;
|
||||||
|
|
||||||
|
const Maker = struct {
|
||||||
|
builder: *std.build.Builder,
|
||||||
|
target: CrossTarget,
|
||||||
|
optimize: Mode,
|
||||||
|
config_header: *ConfigHeader,
|
||||||
|
|
||||||
|
const cflags = .{"-std=c11"};
|
||||||
|
const cxxflags = .{"-std=c++11"};
|
||||||
|
|
||||||
|
fn init(builder: *std.build.Builder) Maker {
|
||||||
|
const commit_hash = @embedFile(".git/refs/heads/master");
|
||||||
|
const config_header = builder.addConfigHeader(
|
||||||
|
.{ .style = .blank, .include_path = "build-info.h" },
|
||||||
|
.{
|
||||||
|
.BUILD_NUMBER = 0,
|
||||||
|
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
|
||||||
|
},
|
||||||
|
);
|
||||||
|
return Maker{
|
||||||
|
.builder = builder,
|
||||||
|
.target = builder.standardTargetOptions(.{}),
|
||||||
|
.optimize = builder.standardOptimizeOption(.{}),
|
||||||
|
.config_header = config_header,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn obj(m: *const Maker, name: []const u8, src: []const u8) *Compile {
|
||||||
|
const o = m.builder.addObject(.{ .name = name, .target = m.target, .optimize = m.optimize });
|
||||||
|
if (std.mem.endsWith(u8, src, ".c")) {
|
||||||
|
o.addCSourceFiles(&.{src}, &cflags);
|
||||||
|
o.linkLibC();
|
||||||
|
} else {
|
||||||
|
o.addCSourceFiles(&.{src}, &cxxflags);
|
||||||
|
o.linkLibCpp();
|
||||||
|
}
|
||||||
|
o.addIncludePath(.{ .path = "." });
|
||||||
|
o.addIncludePath(.{ .path = "./examples" });
|
||||||
|
return o;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn exe(m: *const Maker, name: []const u8, src: []const u8, deps: []const *Compile) *Compile {
|
||||||
|
const e = m.builder.addExecutable(.{ .name = name, .target = m.target, .optimize = m.optimize });
|
||||||
|
e.addIncludePath(.{ .path = "." });
|
||||||
|
e.addIncludePath(.{ .path = "./examples" });
|
||||||
|
e.addCSourceFiles(&.{src}, &cxxflags);
|
||||||
|
for (deps) |d| e.addObject(d);
|
||||||
|
e.linkLibC();
|
||||||
|
e.linkLibCpp();
|
||||||
|
e.addConfigHeader(m.config_header);
|
||||||
|
m.builder.installArtifact(e);
|
||||||
|
|
||||||
|
// Currently a bug is preventing correct linking for optimized builds for Windows:
|
||||||
|
// https://github.com/ziglang/zig/issues/15958
|
||||||
|
if (e.target.isWindows()) {
|
||||||
|
e.want_lto = false;
|
||||||
|
}
|
||||||
|
return e;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
// Zig Version: 0.11.0-dev.3986+e05c242cd
|
|
||||||
pub fn build(b: *std.build.Builder) void {
|
pub fn build(b: *std.build.Builder) void {
|
||||||
const target = b.standardTargetOptions(.{});
|
const make = Maker.init(b);
|
||||||
const optimize = b.standardOptimizeOption(.{});
|
|
||||||
|
|
||||||
const config_header = b.addConfigHeader(
|
const ggml = make.obj("ggml", "ggml.c");
|
||||||
.{ .style = .blank, .include_path = "build-info.h" },
|
const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
|
||||||
.{
|
const llama = make.obj("llama", "llama.cpp");
|
||||||
.BUILD_NUMBER = 0,
|
const common = make.obj("common", "examples/common.cpp");
|
||||||
.BUILD_COMMIT = commit_hash[0 .. commit_hash.len - 1], // omit newline
|
const grammar_parser = make.obj("grammar-parser", "examples/grammar-parser.cpp");
|
||||||
},
|
|
||||||
);
|
|
||||||
|
|
||||||
const lib = b.addStaticLibrary(.{
|
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
|
||||||
.name = "llama",
|
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, llama });
|
||||||
.target = target,
|
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, llama, common });
|
||||||
.optimize = optimize,
|
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, llama, common });
|
||||||
});
|
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, llama });
|
||||||
lib.linkLibC();
|
|
||||||
lib.linkLibCpp();
|
|
||||||
lib.addIncludePath(".");
|
|
||||||
lib.addIncludePath("./examples");
|
|
||||||
lib.addConfigHeader(config_header);
|
|
||||||
lib.addCSourceFiles(&.{"ggml.c"}, &.{"-std=c11"});
|
|
||||||
lib.addCSourceFiles(&.{"llama.cpp"}, &.{"-std=c++11"});
|
|
||||||
b.installArtifact(lib);
|
|
||||||
|
|
||||||
const examples = .{
|
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, llama, common, grammar_parser });
|
||||||
"main",
|
if (server.target.isWindows()) {
|
||||||
"baby-llama",
|
server.linkSystemLibrary("ws2_32");
|
||||||
"embedding",
|
|
||||||
"metal",
|
|
||||||
"perplexity",
|
|
||||||
"quantize",
|
|
||||||
"quantize-stats",
|
|
||||||
"save-load-state",
|
|
||||||
"server",
|
|
||||||
"simple",
|
|
||||||
"train-text-from-scratch",
|
|
||||||
};
|
|
||||||
|
|
||||||
inline for (examples) |example_name| {
|
|
||||||
const exe = b.addExecutable(.{
|
|
||||||
.name = example_name,
|
|
||||||
.target = target,
|
|
||||||
.optimize = optimize,
|
|
||||||
});
|
|
||||||
exe.addIncludePath(".");
|
|
||||||
exe.addIncludePath("./examples");
|
|
||||||
exe.addConfigHeader(config_header);
|
|
||||||
exe.addCSourceFiles(&.{
|
|
||||||
std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{ example_name, example_name }),
|
|
||||||
"examples/common.cpp",
|
|
||||||
}, &.{"-std=c++11"});
|
|
||||||
exe.linkLibrary(lib);
|
|
||||||
b.installArtifact(exe);
|
|
||||||
|
|
||||||
const run_cmd = b.addRunArtifact(exe);
|
|
||||||
run_cmd.step.dependOn(b.getInstallStep());
|
|
||||||
if (b.args) |args| run_cmd.addArgs(args);
|
|
||||||
|
|
||||||
const run_step = b.step("run-" ++ example_name, "Run the app");
|
|
||||||
run_step.dependOn(&run_cmd.step);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
103
convert.py
Executable file → Normal file
103
convert.py
Executable file → Normal file
@ -133,7 +133,7 @@ TENSORS_SET = set(TENSORS_LIST)
|
|||||||
|
|
||||||
def find_n_mult(n_ff: int, n_embd: int) -> int:
|
def find_n_mult(n_ff: int, n_embd: int) -> int:
|
||||||
# hardcoded magic range
|
# hardcoded magic range
|
||||||
for n_mult in range(256, 1, -1):
|
for n_mult in range(8192, 1, -1):
|
||||||
calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
|
calc_ff = (((8*n_embd) // 3 + n_mult - 1) // n_mult)*n_mult
|
||||||
if calc_ff == n_ff:
|
if calc_ff == n_ff:
|
||||||
return n_mult
|
return n_mult
|
||||||
@ -141,11 +141,12 @@ def find_n_mult(n_ff: int, n_embd: int) -> int:
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Params:
|
class Params:
|
||||||
n_vocab: int
|
n_vocab: int
|
||||||
n_embd: int
|
n_embd: int
|
||||||
n_mult: int
|
n_mult: int
|
||||||
n_head: int
|
n_head: int
|
||||||
n_layer: int
|
n_layer: int
|
||||||
|
n_kv_head: Optional[int] # This parameter is only used for Llama 2
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def guessed(model: 'LazyModel') -> 'Params':
|
def guessed(model: 'LazyModel') -> 'Params':
|
||||||
@ -167,11 +168,12 @@ class Params:
|
|||||||
n_head=n_embd // 128 # guessed
|
n_head=n_embd // 128 # guessed
|
||||||
|
|
||||||
return Params(
|
return Params(
|
||||||
n_vocab = n_vocab,
|
n_vocab = n_vocab,
|
||||||
n_embd = n_embd,
|
n_embd = n_embd,
|
||||||
n_mult = 256,
|
n_mult = 256,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
|
n_kv_head = None,
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -183,15 +185,17 @@ class Params:
|
|||||||
n_head = config["num_attention_heads"];
|
n_head = config["num_attention_heads"];
|
||||||
n_layer = config["num_hidden_layers"];
|
n_layer = config["num_hidden_layers"];
|
||||||
n_ff = config["intermediate_size"];
|
n_ff = config["intermediate_size"];
|
||||||
|
n_kv_head = config.get("num_key_value_heads")
|
||||||
|
|
||||||
n_mult = find_n_mult(n_ff, n_embd);
|
n_mult = find_n_mult(n_ff, n_embd);
|
||||||
|
|
||||||
return Params(
|
return Params(
|
||||||
n_vocab = n_vocab,
|
n_vocab = n_vocab,
|
||||||
n_embd = n_embd,
|
n_embd = n_embd,
|
||||||
n_mult = n_mult,
|
n_mult = n_mult,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
|
n_kv_head = n_kv_head,
|
||||||
)
|
)
|
||||||
|
|
||||||
# LLaMA v2 70B params.json
|
# LLaMA v2 70B params.json
|
||||||
@ -200,21 +204,22 @@ class Params:
|
|||||||
def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
|
def loadOriginalParamsJson(model: 'LazyModel', config_path: 'Path') -> 'Params':
|
||||||
config = json.load(open(config_path))
|
config = json.load(open(config_path))
|
||||||
|
|
||||||
n_vocab = config["vocab_size"];
|
n_vocab = config["vocab_size"];
|
||||||
n_embd = config["dim"];
|
n_embd = config["dim"];
|
||||||
n_head = config["n_heads"];
|
n_head = config["n_heads"];
|
||||||
n_layer = config["n_layers"];
|
n_layer = config["n_layers"];
|
||||||
n_mult = config["multiple_of"];
|
n_mult = config["multiple_of"];
|
||||||
|
|
||||||
if n_vocab == -1:
|
if n_vocab == -1:
|
||||||
n_vocab = model["tok_embeddings.weight"].shape[0]
|
n_vocab = model["tok_embeddings.weight"].shape[0]
|
||||||
|
|
||||||
return Params(
|
return Params(
|
||||||
n_vocab = n_vocab,
|
n_vocab = n_vocab,
|
||||||
n_embd = n_embd,
|
n_embd = n_embd,
|
||||||
n_mult = n_mult,
|
n_mult = n_mult,
|
||||||
n_head = n_head,
|
n_head = n_head,
|
||||||
n_layer = n_layer,
|
n_layer = n_layer,
|
||||||
|
n_kv_head = None,
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@ -317,10 +322,12 @@ class GGMLVocab:
|
|||||||
Vocab = Union[SentencePieceVocab, GGMLVocab]
|
Vocab = Union[SentencePieceVocab, GGMLVocab]
|
||||||
|
|
||||||
|
|
||||||
def permute(weights: NDArray, n_head: int) -> NDArray:
|
def permute(weights: NDArray, n_head: int, n_kv_head: Optional[int] = None) -> NDArray:
|
||||||
|
if n_kv_head is not None and n_head != n_kv_head:
|
||||||
|
n_head //= n_kv_head
|
||||||
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
|
||||||
.swapaxes(1, 2)
|
.swapaxes(1, 2)
|
||||||
.reshape(weights.shape))
|
.reshape(weights.shape))
|
||||||
|
|
||||||
|
|
||||||
def dequantize_q4(qvalues_pack32: NDArray, scales: NDArray, addends: Optional[NDArray], g_idx: Optional[NDArray]) -> NDArray:
|
def dequantize_q4(qvalues_pack32: NDArray, scales: NDArray, addends: Optional[NDArray], g_idx: Optional[NDArray]) -> NDArray:
|
||||||
@ -368,7 +375,7 @@ class Tensor(metaclass=ABCMeta):
|
|||||||
@abstractmethod
|
@abstractmethod
|
||||||
def astype(self, data_type: DataType) -> 'Tensor': ...
|
def astype(self, data_type: DataType) -> 'Tensor': ...
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def permute(self, n_head: int) -> 'Tensor': ...
|
def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'Tensor': ...
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
|
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor': ...
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@ -406,8 +413,8 @@ class UnquantizedTensor(Tensor):
|
|||||||
r = self.ndarray.shape[0] // 3
|
r = self.ndarray.shape[0] // 3
|
||||||
return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
|
return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
|
||||||
|
|
||||||
def permute(self, n_head: int) -> 'UnquantizedTensor':
|
def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'UnquantizedTensor':
|
||||||
return UnquantizedTensor(permute(self.ndarray, n_head))
|
return UnquantizedTensor(permute(self.ndarray, n_head, n_kv_head))
|
||||||
|
|
||||||
|
|
||||||
def load_unquantized(lazy_tensor: 'LazyTensor', expected_dtype: Any = None, convert: bool = False) -> NDArray:
|
def load_unquantized(lazy_tensor: 'LazyTensor', expected_dtype: Any = None, convert: bool = False) -> NDArray:
|
||||||
@ -455,26 +462,34 @@ class GGMLQuantizedTensor(Tensor):
|
|||||||
def to_ggml(self) -> 'GGMLQuantizedTensor':
|
def to_ggml(self) -> 'GGMLQuantizedTensor':
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def permute(self, n_head: int) -> 'GGMLQuantizedTensor':
|
def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> 'GGMLQuantizedTensor':
|
||||||
return GGMLQuantizedTensor(permute(self.ndarray, n_head), self.shape, self.data_type)
|
return GGMLQuantizedTensor(permute(self.ndarray, n_head, n_kv_head), self.shape, self.data_type)
|
||||||
|
|
||||||
|
def permute_part(self, n_part: int, n_head: int) -> 'UnquantizedTensor':
|
||||||
|
r = self.ndarray.shape[0] // 3
|
||||||
|
return UnquantizedTensor(permute(self.ndarray[r * n_part : r * n_part + r, ...], n_head))
|
||||||
|
|
||||||
|
def part(self, n_part: int) -> 'UnquantizedTensor':
|
||||||
|
r = self.ndarray.shape[0] // 3
|
||||||
|
return UnquantizedTensor(self.ndarray[r * n_part : r * n_part + r, ...])
|
||||||
|
|
||||||
GGMLCompatibleTensor = Union[UnquantizedTensor, GGMLQuantizedTensor]
|
GGMLCompatibleTensor = Union[UnquantizedTensor, GGMLQuantizedTensor]
|
||||||
|
|
||||||
|
|
||||||
class DeferredPermutedTensor(Tensor):
|
class DeferredPermutedTensor(Tensor):
|
||||||
def __init__(self, base: Tensor, n_head: int) -> None:
|
def __init__(self, base: Tensor, n_head: int, n_kv_head: Optional[int] = None) -> None:
|
||||||
self.base = base
|
self.base = base
|
||||||
self.n_head = n_head
|
self.n_head = n_head
|
||||||
|
self.n_kv_head = n_kv_head
|
||||||
self.data_type = self.base.data_type
|
self.data_type = self.base.data_type
|
||||||
|
|
||||||
def astype(self, data_type: DataType) -> Tensor:
|
def astype(self, data_type: DataType) -> Tensor:
|
||||||
return self.base.astype(data_type).permute(self.n_head)
|
return self.base.astype(data_type).permute(self.n_head, self.n_kv_head)
|
||||||
|
|
||||||
def to_ggml(self) -> GGMLCompatibleTensor:
|
def to_ggml(self) -> GGMLCompatibleTensor:
|
||||||
return self.base.to_ggml().permute(self.n_head)
|
return self.base.to_ggml().permute(self.n_head, self.n_kv_head)
|
||||||
|
|
||||||
def permute(self, n_head: int) -> Tensor:
|
def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> Tensor:
|
||||||
raise Exception("shouldn't permute twice")
|
raise Exception("shouldn't permute twice")
|
||||||
|
|
||||||
|
|
||||||
@ -566,8 +581,8 @@ class GPTQForLLaMaQuantizedTensor(Tensor):
|
|||||||
ret.data_type = QuantizedDataType(groupsize=new_groupsize, have_addends=True, have_g_idx=False)
|
ret.data_type = QuantizedDataType(groupsize=new_groupsize, have_addends=True, have_g_idx=False)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def permute(self, n_head: int) -> Tensor:
|
def permute(self, n_head: int, n_kv_head: Optional[int] = None) -> Tensor:
|
||||||
return DeferredPermutedTensor(self, n_head)
|
return DeferredPermutedTensor(self, n_head, n_kv_head)
|
||||||
|
|
||||||
def to_ggml(self) -> GGMLQuantizedTensor:
|
def to_ggml(self) -> GGMLQuantizedTensor:
|
||||||
# The output format looks like this:
|
# The output format looks like this:
|
||||||
@ -698,10 +713,10 @@ def merge_multifile_models(models_plus: List[ModelPlus]) -> ModelPlus:
|
|||||||
return ModelPlus(model, paths, format, vocab)
|
return ModelPlus(model, paths, format, vocab)
|
||||||
|
|
||||||
|
|
||||||
def permute_lazy(lazy_tensor: LazyTensor, n_head: int) -> LazyTensor:
|
def permute_lazy(lazy_tensor: LazyTensor, n_head: int, n_kv_head: Optional[int] = None) -> LazyTensor:
|
||||||
def load() -> Tensor:
|
def load() -> Tensor:
|
||||||
return lazy_tensor.load().permute(n_head)
|
return lazy_tensor.load().permute(n_head, n_kv_head)
|
||||||
return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}) ' + lazy_tensor.description)
|
return LazyTensor(load, lazy_tensor.shape, lazy_tensor.data_type, f'permute({n_head}, {n_kv_head}) ' + lazy_tensor.description)
|
||||||
|
|
||||||
def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
|
def permute_part_lazy(lazy_tensor: LazyTensor, n_part: int, n_head: int) -> LazyTensor:
|
||||||
def load() -> Tensor:
|
def load() -> Tensor:
|
||||||
@ -726,7 +741,7 @@ def convert_transformers_to_orig(model: LazyModel, params: Params) -> LazyModel:
|
|||||||
for i in itertools.count():
|
for i in itertools.count():
|
||||||
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
if f"model.layers.{i}.self_attn.q_proj.weight" in model:
|
||||||
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
|
out[f"layers.{i}.attention.wq.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head)
|
||||||
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head)
|
out[f"layers.{i}.attention.wk.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_kv_head)
|
||||||
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
out[f"layers.{i}.attention.wv.weight"] = model[f"model.layers.{i}.self_attn.v_proj.weight"]
|
||||||
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
elif f"model.layers.{i}.self_attn.W_pack.weight" in model:
|
||||||
out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
|
out[f"layers.{i}.attention.wq.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head)
|
||||||
|
@ -13,6 +13,8 @@ set(TARGET common)
|
|||||||
add_library(${TARGET} OBJECT
|
add_library(${TARGET} OBJECT
|
||||||
common.h
|
common.h
|
||||||
common.cpp
|
common.cpp
|
||||||
|
console.h
|
||||||
|
console.cpp
|
||||||
grammar-parser.h
|
grammar-parser.h
|
||||||
grammar-parser.cpp
|
grammar-parser.cpp
|
||||||
)
|
)
|
||||||
|
@ -25,7 +25,6 @@
|
|||||||
#else
|
#else
|
||||||
#include <sys/ioctl.h>
|
#include <sys/ioctl.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <wchar.h>
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
@ -329,6 +328,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
params.instruct = true;
|
params.instruct = true;
|
||||||
} else if (arg == "--multiline-input") {
|
} else if (arg == "--multiline-input") {
|
||||||
params.multiline_input = true;
|
params.multiline_input = true;
|
||||||
|
} else if (arg == "--simple-io") {
|
||||||
|
params.simple_io = true;
|
||||||
} else if (arg == "--color") {
|
} else if (arg == "--color") {
|
||||||
params.use_color = true;
|
params.use_color = true;
|
||||||
} else if (arg == "--mlock") {
|
} else if (arg == "--mlock") {
|
||||||
@ -352,7 +353,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
params.main_gpu = std::stoi(argv[i]);
|
params.main_gpu = std::stoi(argv[i]);
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n");
|
||||||
#endif
|
#endif
|
||||||
} else if (arg == "--tensor-split" || arg == "-ts") {
|
} else if (arg == "--tensor-split" || arg == "-ts") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
@ -376,13 +377,19 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n");
|
||||||
|
#endif // GGML_USE_CUBLAS
|
||||||
|
} else if (arg == "--mul-mat-q" || arg == "-mmq") {
|
||||||
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
params.mul_mat_q = true;
|
||||||
|
#else
|
||||||
|
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to use mul_mat_q kernels.\n");
|
||||||
#endif // GGML_USE_CUBLAS
|
#endif // GGML_USE_CUBLAS
|
||||||
} else if (arg == "--low-vram" || arg == "-lv") {
|
} else if (arg == "--low-vram" || arg == "-lv") {
|
||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
params.low_vram = true;
|
params.low_vram = true;
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n");
|
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n");
|
||||||
#endif // GGML_USE_CUBLAS
|
#endif // GGML_USE_CUBLAS
|
||||||
} else if (arg == "--no-mmap") {
|
} else if (arg == "--no-mmap") {
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
@ -402,8 +409,14 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
params.antiprompt.push_back(argv[i]);
|
params.antiprompt.push_back(argv[i]);
|
||||||
} else if (arg == "--perplexity") {
|
} else if (arg == "--perplexity") {
|
||||||
params.perplexity = true;
|
params.perplexity = true;
|
||||||
} else if (arg == "--perplexity-lines") {
|
} else if (arg == "--hellaswag") {
|
||||||
params.perplexity_lines = true;
|
params.hellaswag = true;
|
||||||
|
} else if (arg == "--hellaswag-tasks") {
|
||||||
|
if (++i >= argc) {
|
||||||
|
invalid_param = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
params.hellaswag_tasks = std::stoi(argv[i]);
|
||||||
} else if (arg == "--ignore-eos") {
|
} else if (arg == "--ignore-eos") {
|
||||||
params.logit_bias[llama_token_eos()] = -INFINITY;
|
params.logit_bias[llama_token_eos()] = -INFINITY;
|
||||||
} else if (arg == "--no-penalize-nl") {
|
} else if (arg == "--no-penalize-nl") {
|
||||||
@ -559,8 +572,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||||||
fprintf(stdout, " not recommended: doubles context memory required and no measurable increase in quality\n");
|
fprintf(stdout, " not recommended: doubles context memory required and no measurable increase in quality\n");
|
||||||
fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp);
|
fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp);
|
||||||
fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n");
|
fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n");
|
||||||
fprintf(stdout, " --perplexity-lines compute perplexity over each line of the prompt\n");
|
fprintf(stdout, " --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n");
|
||||||
fprintf(stdout, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
fprintf(stdout, " --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks);
|
||||||
|
fprintf(stdout, " --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
|
||||||
fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
|
fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks);
|
||||||
if (llama_mlock_supported()) {
|
if (llama_mlock_supported()) {
|
||||||
fprintf(stdout, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
fprintf(stdout, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||||
@ -578,10 +592,14 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||||||
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
||||||
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" );
|
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" );
|
||||||
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n" );
|
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n" );
|
||||||
|
fprintf(stdout, " -mmq, --mul-mat-q use experimental mul_mat_q CUDA kernels instead of cuBLAS. TEMP!!!\n" );
|
||||||
|
fprintf(stdout, " Reduces VRAM usage by 700/970/1430 MiB for 7b/13b/33b but prompt processing speed\n" );
|
||||||
|
fprintf(stdout, " is still suboptimal, especially q2_K, q3_K, q5_K, and q6_K.\n" );
|
||||||
#endif
|
#endif
|
||||||
fprintf(stdout, " --mtest compute maximum memory usage\n");
|
fprintf(stdout, " --mtest compute maximum memory usage\n");
|
||||||
fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n");
|
fprintf(stdout, " --export export the computation graph to 'llama.ggml'\n");
|
||||||
fprintf(stdout, " --verbose-prompt print prompt before generation\n");
|
fprintf(stdout, " --verbose-prompt print prompt before generation\n");
|
||||||
|
fprintf(stderr, " --simple-io use basic IO for better compatibility in subprocesses and limited consoles\n");
|
||||||
fprintf(stdout, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
|
fprintf(stdout, " --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
|
||||||
fprintf(stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
|
fprintf(stdout, " --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
|
||||||
fprintf(stdout, " -m FNAME, --model FNAME\n");
|
fprintf(stdout, " -m FNAME, --model FNAME\n");
|
||||||
@ -630,6 +648,7 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
|||||||
lparams.main_gpu = params.main_gpu;
|
lparams.main_gpu = params.main_gpu;
|
||||||
lparams.tensor_split = params.tensor_split;
|
lparams.tensor_split = params.tensor_split;
|
||||||
lparams.low_vram = params.low_vram;
|
lparams.low_vram = params.low_vram;
|
||||||
|
lparams.mul_mat_q = params.mul_mat_q;
|
||||||
lparams.seed = params.seed;
|
lparams.seed = params.seed;
|
||||||
lparams.f16_kv = params.memory_f16;
|
lparams.f16_kv = params.memory_f16;
|
||||||
lparams.use_mmap = params.use_mmap;
|
lparams.use_mmap = params.use_mmap;
|
||||||
@ -673,376 +692,3 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
|||||||
|
|
||||||
return std::make_tuple(model, lctx);
|
return std::make_tuple(model, lctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
void console_init(console_state & con_st) {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
// Windows-specific console initialization
|
|
||||||
DWORD dwMode = 0;
|
|
||||||
con_st.hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
|
||||||
if (con_st.hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(con_st.hConsole, &dwMode)) {
|
|
||||||
con_st.hConsole = GetStdHandle(STD_ERROR_HANDLE);
|
|
||||||
if (con_st.hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(con_st.hConsole, &dwMode))) {
|
|
||||||
con_st.hConsole = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (con_st.hConsole) {
|
|
||||||
// Enable ANSI colors on Windows 10+
|
|
||||||
if (con_st.use_color && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
|
|
||||||
SetConsoleMode(con_st.hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
|
|
||||||
}
|
|
||||||
// Set console output codepage to UTF8
|
|
||||||
SetConsoleOutputCP(CP_UTF8);
|
|
||||||
}
|
|
||||||
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
|
|
||||||
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
|
|
||||||
// Set console input codepage to UTF16
|
|
||||||
_setmode(_fileno(stdin), _O_WTEXT);
|
|
||||||
|
|
||||||
// Turn off ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
|
|
||||||
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
|
|
||||||
SetConsoleMode(hConIn, dwMode);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// POSIX-specific console initialization
|
|
||||||
struct termios new_termios;
|
|
||||||
tcgetattr(STDIN_FILENO, &con_st.prev_state);
|
|
||||||
new_termios = con_st.prev_state;
|
|
||||||
new_termios.c_lflag &= ~(ICANON | ECHO);
|
|
||||||
new_termios.c_cc[VMIN] = 1;
|
|
||||||
new_termios.c_cc[VTIME] = 0;
|
|
||||||
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
|
|
||||||
|
|
||||||
con_st.tty = fopen("/dev/tty", "w+");
|
|
||||||
if (con_st.tty != nullptr) {
|
|
||||||
con_st.out = con_st.tty;
|
|
||||||
}
|
|
||||||
|
|
||||||
setlocale(LC_ALL, "");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void console_cleanup(console_state & con_st) {
|
|
||||||
// Reset console color
|
|
||||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
|
||||||
|
|
||||||
#if !defined(_WIN32)
|
|
||||||
if (con_st.tty != nullptr) {
|
|
||||||
con_st.out = stdout;
|
|
||||||
fclose(con_st.tty);
|
|
||||||
con_st.tty = nullptr;
|
|
||||||
}
|
|
||||||
// Restore the terminal settings on POSIX systems
|
|
||||||
tcsetattr(STDIN_FILENO, TCSANOW, &con_st.prev_state);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Keep track of current color of output, and emit ANSI code if it changes. */
|
|
||||||
void console_set_color(console_state & con_st, console_color_t color) {
|
|
||||||
if (con_st.use_color && con_st.color != color) {
|
|
||||||
fflush(stdout);
|
|
||||||
switch(color) {
|
|
||||||
case CONSOLE_COLOR_DEFAULT:
|
|
||||||
fprintf(con_st.out, ANSI_COLOR_RESET);
|
|
||||||
break;
|
|
||||||
case CONSOLE_COLOR_PROMPT:
|
|
||||||
fprintf(con_st.out, ANSI_COLOR_YELLOW);
|
|
||||||
break;
|
|
||||||
case CONSOLE_COLOR_USER_INPUT:
|
|
||||||
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_GREEN);
|
|
||||||
break;
|
|
||||||
case CONSOLE_COLOR_ERROR:
|
|
||||||
fprintf(con_st.out, ANSI_BOLD ANSI_COLOR_RED);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
con_st.color = color;
|
|
||||||
fflush(con_st.out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
char32_t getchar32() {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
|
||||||
wchar_t high_surrogate = 0;
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
INPUT_RECORD record;
|
|
||||||
DWORD count;
|
|
||||||
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
|
|
||||||
return WEOF;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
|
|
||||||
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
|
|
||||||
if (wc == 0) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
|
||||||
high_surrogate = wc;
|
|
||||||
continue;
|
|
||||||
} else if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
|
|
||||||
if (high_surrogate != 0) { // Check if we have a high surrogate
|
|
||||||
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
high_surrogate = 0; // Reset the high surrogate
|
|
||||||
return static_cast<char32_t>(wc);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
wchar_t wc = getwchar();
|
|
||||||
if (static_cast<wint_t>(wc) == WEOF) {
|
|
||||||
return WEOF;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if WCHAR_MAX == 0xFFFF
|
|
||||||
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
|
||||||
wchar_t low_surrogate = getwchar();
|
|
||||||
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
|
|
||||||
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
|
|
||||||
return 0xFFFD; // Return the replacement character U+FFFD
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return static_cast<char32_t>(wc);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void pop_cursor(console_state & con_st) {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
if (con_st.hConsole != NULL) {
|
|
||||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
|
||||||
GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo);
|
|
||||||
|
|
||||||
COORD newCursorPosition = bufferInfo.dwCursorPosition;
|
|
||||||
if (newCursorPosition.X == 0) {
|
|
||||||
newCursorPosition.X = bufferInfo.dwSize.X - 1;
|
|
||||||
newCursorPosition.Y -= 1;
|
|
||||||
} else {
|
|
||||||
newCursorPosition.X -= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
SetConsoleCursorPosition(con_st.hConsole, newCursorPosition);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
putc('\b', con_st.out);
|
|
||||||
}
|
|
||||||
|
|
||||||
int estimateWidth(char32_t codepoint) {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
return 1;
|
|
||||||
#else
|
|
||||||
return wcwidth(codepoint);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
int put_codepoint(console_state & con_st, const char* utf8_codepoint, size_t length, int expectedWidth) {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
|
||||||
if (!GetConsoleScreenBufferInfo(con_st.hConsole, &bufferInfo)) {
|
|
||||||
// go with the default
|
|
||||||
return expectedWidth;
|
|
||||||
}
|
|
||||||
COORD initialPosition = bufferInfo.dwCursorPosition;
|
|
||||||
DWORD nNumberOfChars = length;
|
|
||||||
WriteConsole(con_st.hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
|
|
||||||
|
|
||||||
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
|
|
||||||
GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo);
|
|
||||||
|
|
||||||
// Figure out our real position if we're in the last column
|
|
||||||
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
|
|
||||||
DWORD nNumberOfChars;
|
|
||||||
WriteConsole(con_st.hConsole, &" \b", 2, &nNumberOfChars, NULL);
|
|
||||||
GetConsoleScreenBufferInfo(con_st.hConsole, &newBufferInfo);
|
|
||||||
}
|
|
||||||
|
|
||||||
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
|
|
||||||
if (width < 0) {
|
|
||||||
width += newBufferInfo.dwSize.X;
|
|
||||||
}
|
|
||||||
return width;
|
|
||||||
#else
|
|
||||||
// we can trust expectedWidth if we've got one
|
|
||||||
if (expectedWidth >= 0 || con_st.tty == nullptr) {
|
|
||||||
fwrite(utf8_codepoint, length, 1, con_st.out);
|
|
||||||
return expectedWidth;
|
|
||||||
}
|
|
||||||
|
|
||||||
fputs("\033[6n", con_st.tty); // Query cursor position
|
|
||||||
int x1, x2, y1, y2;
|
|
||||||
int results = 0;
|
|
||||||
results = fscanf(con_st.tty, "\033[%d;%dR", &y1, &x1);
|
|
||||||
|
|
||||||
fwrite(utf8_codepoint, length, 1, con_st.tty);
|
|
||||||
|
|
||||||
fputs("\033[6n", con_st.tty); // Query cursor position
|
|
||||||
results += fscanf(con_st.tty, "\033[%d;%dR", &y2, &x2);
|
|
||||||
|
|
||||||
if (results != 4) {
|
|
||||||
return expectedWidth;
|
|
||||||
}
|
|
||||||
|
|
||||||
int width = x2 - x1;
|
|
||||||
if (width < 0) {
|
|
||||||
// Calculate the width considering text wrapping
|
|
||||||
struct winsize w;
|
|
||||||
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
|
|
||||||
width += w.ws_col;
|
|
||||||
}
|
|
||||||
return width;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void replace_last(console_state & con_st, char ch) {
|
|
||||||
#if defined(_WIN32)
|
|
||||||
pop_cursor(con_st);
|
|
||||||
put_codepoint(con_st, &ch, 1, 1);
|
|
||||||
#else
|
|
||||||
fprintf(con_st.out, "\b%c", ch);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
void append_utf8(char32_t ch, std::string & out) {
|
|
||||||
if (ch <= 0x7F) {
|
|
||||||
out.push_back(static_cast<unsigned char>(ch));
|
|
||||||
} else if (ch <= 0x7FF) {
|
|
||||||
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
|
|
||||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
|
||||||
} else if (ch <= 0xFFFF) {
|
|
||||||
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
|
|
||||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
|
||||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
|
||||||
} else if (ch <= 0x10FFFF) {
|
|
||||||
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
|
|
||||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
|
|
||||||
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
|
||||||
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
|
||||||
} else {
|
|
||||||
// Invalid Unicode code point
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to remove the last UTF-8 character from a string
|
|
||||||
void pop_back_utf8_char(std::string & line) {
|
|
||||||
if (line.empty()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t pos = line.length() - 1;
|
|
||||||
|
|
||||||
// Find the start of the last UTF-8 character (checking up to 4 bytes back)
|
|
||||||
for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) {
|
|
||||||
if ((line[pos] & 0xC0) != 0x80) break; // Found the start of the character
|
|
||||||
}
|
|
||||||
line.erase(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool console_readline(console_state & con_st, std::string & line) {
|
|
||||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
|
||||||
if (con_st.out != stdout) {
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
|
|
||||||
line.clear();
|
|
||||||
std::vector<int> widths;
|
|
||||||
bool is_special_char = false;
|
|
||||||
bool end_of_stream = false;
|
|
||||||
|
|
||||||
char32_t input_char;
|
|
||||||
while (true) {
|
|
||||||
fflush(con_st.out); // Ensure all output is displayed before waiting for input
|
|
||||||
input_char = getchar32();
|
|
||||||
|
|
||||||
if (input_char == '\r' || input_char == '\n') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
|
|
||||||
end_of_stream = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_special_char) {
|
|
||||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
|
||||||
replace_last(con_st, line.back());
|
|
||||||
is_special_char = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (input_char == '\033') { // Escape sequence
|
|
||||||
char32_t code = getchar32();
|
|
||||||
if (code == '[' || code == 0x1B) {
|
|
||||||
// Discard the rest of the escape sequence
|
|
||||||
while ((code = getchar32()) != (char32_t) WEOF) {
|
|
||||||
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
|
|
||||||
if (!widths.empty()) {
|
|
||||||
int count;
|
|
||||||
do {
|
|
||||||
count = widths.back();
|
|
||||||
widths.pop_back();
|
|
||||||
// Move cursor back, print space, and move cursor back again
|
|
||||||
for (int i = 0; i < count; i++) {
|
|
||||||
replace_last(con_st, ' ');
|
|
||||||
pop_cursor(con_st);
|
|
||||||
}
|
|
||||||
pop_back_utf8_char(line);
|
|
||||||
} while (count == 0 && !widths.empty());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int offset = line.length();
|
|
||||||
append_utf8(input_char, line);
|
|
||||||
int width = put_codepoint(con_st, line.c_str() + offset, line.length() - offset, estimateWidth(input_char));
|
|
||||||
if (width < 0) {
|
|
||||||
width = 0;
|
|
||||||
}
|
|
||||||
widths.push_back(width);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
|
|
||||||
console_set_color(con_st, CONSOLE_COLOR_PROMPT);
|
|
||||||
replace_last(con_st, line.back());
|
|
||||||
is_special_char = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool has_more = con_st.multiline_input;
|
|
||||||
if (is_special_char) {
|
|
||||||
replace_last(con_st, ' ');
|
|
||||||
pop_cursor(con_st);
|
|
||||||
|
|
||||||
char last = line.back();
|
|
||||||
line.pop_back();
|
|
||||||
if (last == '\\') {
|
|
||||||
line += '\n';
|
|
||||||
fputc('\n', con_st.out);
|
|
||||||
has_more = !has_more;
|
|
||||||
} else {
|
|
||||||
// llama will just eat the single space, it won't act as a space
|
|
||||||
if (line.length() == 1 && line.back() == ' ') {
|
|
||||||
line.clear();
|
|
||||||
pop_cursor(con_st);
|
|
||||||
}
|
|
||||||
has_more = false;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (end_of_stream) {
|
|
||||||
has_more = false;
|
|
||||||
} else {
|
|
||||||
line += '\n';
|
|
||||||
fputc('\n', con_st.out);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fflush(con_st.out);
|
|
||||||
return has_more;
|
|
||||||
}
|
|
||||||
|
@ -11,11 +11,6 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
#if !defined (_WIN32)
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <termios.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// CLI argument parsing
|
// CLI argument parsing
|
||||||
//
|
//
|
||||||
@ -70,7 +65,11 @@ struct gpt_params {
|
|||||||
std::string lora_adapter = ""; // lora adapter path
|
std::string lora_adapter = ""; // lora adapter path
|
||||||
std::string lora_base = ""; // base model path for the lora adapter
|
std::string lora_base = ""; // base model path for the lora adapter
|
||||||
|
|
||||||
bool low_vram = false; // if true, reduce VRAM usage at the cost of performance
|
bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt
|
||||||
|
size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score
|
||||||
|
|
||||||
|
bool low_vram = false; // if true, reduce VRAM usage at the cost of performance
|
||||||
|
bool mul_mat_q = false; // if true, use experimental mul_mat_q kernels
|
||||||
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
bool memory_f16 = true; // use f16 instead of f32 for memory kv
|
||||||
bool random_prompt = false; // do not randomize prompt if none provided
|
bool random_prompt = false; // do not randomize prompt if none provided
|
||||||
bool use_color = false; // use color to distinguish generations and inputs
|
bool use_color = false; // use color to distinguish generations and inputs
|
||||||
@ -81,12 +80,12 @@ struct gpt_params {
|
|||||||
bool embedding = false; // get only sentence embedding
|
bool embedding = false; // get only sentence embedding
|
||||||
bool interactive_first = false; // wait for user input immediately
|
bool interactive_first = false; // wait for user input immediately
|
||||||
bool multiline_input = false; // reverse the usage of `\`
|
bool multiline_input = false; // reverse the usage of `\`
|
||||||
|
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||||
|
|
||||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||||
bool instruct = false; // instruction mode (used for Alpaca models)
|
bool instruct = false; // instruction mode (used for Alpaca models)
|
||||||
bool penalize_nl = true; // consider newlines as a repeatable token
|
bool penalize_nl = true; // consider newlines as a repeatable token
|
||||||
bool perplexity = false; // compute perplexity over the prompt
|
bool perplexity = false; // compute perplexity over the prompt
|
||||||
bool perplexity_lines = false; // compute perplexity over each line of the prompt
|
|
||||||
bool use_mmap = true; // use mmap for faster loads
|
bool use_mmap = true; // use mmap for faster loads
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
bool mem_test = false; // compute maximum memory usage
|
bool mem_test = false; // compute maximum memory usage
|
||||||
@ -113,42 +112,3 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s
|
|||||||
|
|
||||||
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
|
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
|
||||||
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||||
|
|
||||||
//
|
|
||||||
// Console utils
|
|
||||||
//
|
|
||||||
|
|
||||||
#define ANSI_COLOR_RED "\x1b[31m"
|
|
||||||
#define ANSI_COLOR_GREEN "\x1b[32m"
|
|
||||||
#define ANSI_COLOR_YELLOW "\x1b[33m"
|
|
||||||
#define ANSI_COLOR_BLUE "\x1b[34m"
|
|
||||||
#define ANSI_COLOR_MAGENTA "\x1b[35m"
|
|
||||||
#define ANSI_COLOR_CYAN "\x1b[36m"
|
|
||||||
#define ANSI_COLOR_RESET "\x1b[0m"
|
|
||||||
#define ANSI_BOLD "\x1b[1m"
|
|
||||||
|
|
||||||
enum console_color_t {
|
|
||||||
CONSOLE_COLOR_DEFAULT=0,
|
|
||||||
CONSOLE_COLOR_PROMPT,
|
|
||||||
CONSOLE_COLOR_USER_INPUT,
|
|
||||||
CONSOLE_COLOR_ERROR
|
|
||||||
};
|
|
||||||
|
|
||||||
struct console_state {
|
|
||||||
bool multiline_input = false;
|
|
||||||
bool use_color = false;
|
|
||||||
console_color_t color = CONSOLE_COLOR_DEFAULT;
|
|
||||||
|
|
||||||
FILE* out = stdout;
|
|
||||||
#if defined (_WIN32)
|
|
||||||
void* hConsole;
|
|
||||||
#else
|
|
||||||
FILE* tty = nullptr;
|
|
||||||
termios prev_state;
|
|
||||||
#endif
|
|
||||||
};
|
|
||||||
|
|
||||||
void console_init(console_state & con_st);
|
|
||||||
void console_cleanup(console_state & con_st);
|
|
||||||
void console_set_color(console_state & con_st, console_color_t color);
|
|
||||||
bool console_readline(console_state & con_st, std::string & line);
|
|
||||||
|
496
examples/console.cpp
Normal file
496
examples/console.cpp
Normal file
@ -0,0 +1,496 @@
|
|||||||
|
#include "console.h"
|
||||||
|
#include <vector>
|
||||||
|
#include <iostream>
|
||||||
|
|
||||||
|
#if defined(_WIN32)
|
||||||
|
#define WIN32_LEAN_AND_MEAN
|
||||||
|
#ifndef NOMINMAX
|
||||||
|
#define NOMINMAX
|
||||||
|
#endif
|
||||||
|
#include <windows.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <io.h>
|
||||||
|
#else
|
||||||
|
#include <climits>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <wchar.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <termios.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define ANSI_COLOR_RED "\x1b[31m"
|
||||||
|
#define ANSI_COLOR_GREEN "\x1b[32m"
|
||||||
|
#define ANSI_COLOR_YELLOW "\x1b[33m"
|
||||||
|
#define ANSI_COLOR_BLUE "\x1b[34m"
|
||||||
|
#define ANSI_COLOR_MAGENTA "\x1b[35m"
|
||||||
|
#define ANSI_COLOR_CYAN "\x1b[36m"
|
||||||
|
#define ANSI_COLOR_RESET "\x1b[0m"
|
||||||
|
#define ANSI_BOLD "\x1b[1m"
|
||||||
|
|
||||||
|
namespace console {
|
||||||
|
|
||||||
|
//
|
||||||
|
// Console state
|
||||||
|
//
|
||||||
|
|
||||||
|
static bool advanced_display = false;
|
||||||
|
static bool simple_io = true;
|
||||||
|
static display_t current_display = reset;
|
||||||
|
|
||||||
|
static FILE* out = stdout;
|
||||||
|
|
||||||
|
#if defined (_WIN32)
|
||||||
|
static void* hConsole;
|
||||||
|
#else
|
||||||
|
static FILE* tty = nullptr;
|
||||||
|
static termios initial_state;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//
|
||||||
|
// Init and cleanup
|
||||||
|
//
|
||||||
|
|
||||||
|
void init(bool use_simple_io, bool use_advanced_display) {
|
||||||
|
advanced_display = use_advanced_display;
|
||||||
|
simple_io = use_simple_io;
|
||||||
|
#if defined(_WIN32)
|
||||||
|
// Windows-specific console initialization
|
||||||
|
DWORD dwMode = 0;
|
||||||
|
hConsole = GetStdHandle(STD_OUTPUT_HANDLE);
|
||||||
|
if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) {
|
||||||
|
hConsole = GetStdHandle(STD_ERROR_HANDLE);
|
||||||
|
if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) {
|
||||||
|
hConsole = nullptr;
|
||||||
|
simple_io = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (hConsole) {
|
||||||
|
// Enable ANSI colors on Windows 10+
|
||||||
|
if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING)) {
|
||||||
|
SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING);
|
||||||
|
}
|
||||||
|
// Set console output codepage to UTF8
|
||||||
|
SetConsoleOutputCP(CP_UTF8);
|
||||||
|
}
|
||||||
|
HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE);
|
||||||
|
if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) {
|
||||||
|
// Set console input codepage to UTF16
|
||||||
|
_setmode(_fileno(stdin), _O_WTEXT);
|
||||||
|
|
||||||
|
// Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT)
|
||||||
|
if (simple_io) {
|
||||||
|
dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT;
|
||||||
|
} else {
|
||||||
|
dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT);
|
||||||
|
}
|
||||||
|
if (!SetConsoleMode(hConIn, dwMode)) {
|
||||||
|
simple_io = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
// POSIX-specific console initialization
|
||||||
|
if (!simple_io) {
|
||||||
|
struct termios new_termios;
|
||||||
|
tcgetattr(STDIN_FILENO, &initial_state);
|
||||||
|
new_termios = initial_state;
|
||||||
|
new_termios.c_lflag &= ~(ICANON | ECHO);
|
||||||
|
new_termios.c_cc[VMIN] = 1;
|
||||||
|
new_termios.c_cc[VTIME] = 0;
|
||||||
|
tcsetattr(STDIN_FILENO, TCSANOW, &new_termios);
|
||||||
|
|
||||||
|
tty = fopen("/dev/tty", "w+");
|
||||||
|
if (tty != nullptr) {
|
||||||
|
out = tty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
setlocale(LC_ALL, "");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void cleanup() {
|
||||||
|
// Reset console display
|
||||||
|
set_display(reset);
|
||||||
|
|
||||||
|
#if !defined(_WIN32)
|
||||||
|
// Restore settings on POSIX systems
|
||||||
|
if (!simple_io) {
|
||||||
|
if (tty != nullptr) {
|
||||||
|
out = stdout;
|
||||||
|
fclose(tty);
|
||||||
|
tty = nullptr;
|
||||||
|
}
|
||||||
|
tcsetattr(STDIN_FILENO, TCSANOW, &initial_state);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Display and IO
|
||||||
|
//
|
||||||
|
|
||||||
|
// Keep track of current display and only emit ANSI code if it changes
|
||||||
|
void set_display(display_t display) {
|
||||||
|
if (advanced_display && current_display != display) {
|
||||||
|
fflush(stdout);
|
||||||
|
switch(display) {
|
||||||
|
case reset:
|
||||||
|
fprintf(out, ANSI_COLOR_RESET);
|
||||||
|
break;
|
||||||
|
case prompt:
|
||||||
|
fprintf(out, ANSI_COLOR_YELLOW);
|
||||||
|
break;
|
||||||
|
case user_input:
|
||||||
|
fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN);
|
||||||
|
break;
|
||||||
|
case error:
|
||||||
|
fprintf(out, ANSI_BOLD ANSI_COLOR_RED);
|
||||||
|
}
|
||||||
|
current_display = display;
|
||||||
|
fflush(out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
char32_t getchar32() {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE);
|
||||||
|
wchar_t high_surrogate = 0;
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
INPUT_RECORD record;
|
||||||
|
DWORD count;
|
||||||
|
if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) {
|
||||||
|
return WEOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) {
|
||||||
|
wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar;
|
||||||
|
if (wc == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
||||||
|
high_surrogate = wc;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate
|
||||||
|
if (high_surrogate != 0) { // Check if we have a high surrogate
|
||||||
|
return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
high_surrogate = 0; // Reset the high surrogate
|
||||||
|
return static_cast<char32_t>(wc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
wchar_t wc = getwchar();
|
||||||
|
if (static_cast<wint_t>(wc) == WEOF) {
|
||||||
|
return WEOF;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if WCHAR_MAX == 0xFFFF
|
||||||
|
if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate
|
||||||
|
wchar_t low_surrogate = getwchar();
|
||||||
|
if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate
|
||||||
|
return (static_cast<char32_t>(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair
|
||||||
|
return 0xFFFD; // Return the replacement character U+FFFD
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return static_cast<char32_t>(wc);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void pop_cursor() {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
if (hConsole != NULL) {
|
||||||
|
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||||
|
GetConsoleScreenBufferInfo(hConsole, &bufferInfo);
|
||||||
|
|
||||||
|
COORD newCursorPosition = bufferInfo.dwCursorPosition;
|
||||||
|
if (newCursorPosition.X == 0) {
|
||||||
|
newCursorPosition.X = bufferInfo.dwSize.X - 1;
|
||||||
|
newCursorPosition.Y -= 1;
|
||||||
|
} else {
|
||||||
|
newCursorPosition.X -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
SetConsoleCursorPosition(hConsole, newCursorPosition);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
putc('\b', out);
|
||||||
|
}
|
||||||
|
|
||||||
|
int estimateWidth(char32_t codepoint) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
return 1;
|
||||||
|
#else
|
||||||
|
return wcwidth(codepoint);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
CONSOLE_SCREEN_BUFFER_INFO bufferInfo;
|
||||||
|
if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) {
|
||||||
|
// go with the default
|
||||||
|
return expectedWidth;
|
||||||
|
}
|
||||||
|
COORD initialPosition = bufferInfo.dwCursorPosition;
|
||||||
|
DWORD nNumberOfChars = length;
|
||||||
|
WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL);
|
||||||
|
|
||||||
|
CONSOLE_SCREEN_BUFFER_INFO newBufferInfo;
|
||||||
|
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
|
||||||
|
|
||||||
|
// Figure out our real position if we're in the last column
|
||||||
|
if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) {
|
||||||
|
DWORD nNumberOfChars;
|
||||||
|
WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL);
|
||||||
|
GetConsoleScreenBufferInfo(hConsole, &newBufferInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
int width = newBufferInfo.dwCursorPosition.X - initialPosition.X;
|
||||||
|
if (width < 0) {
|
||||||
|
width += newBufferInfo.dwSize.X;
|
||||||
|
}
|
||||||
|
return width;
|
||||||
|
#else
|
||||||
|
// We can trust expectedWidth if we've got one
|
||||||
|
if (expectedWidth >= 0 || tty == nullptr) {
|
||||||
|
fwrite(utf8_codepoint, length, 1, out);
|
||||||
|
return expectedWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
fputs("\033[6n", tty); // Query cursor position
|
||||||
|
int x1;
|
||||||
|
int y1;
|
||||||
|
int x2;
|
||||||
|
int y2;
|
||||||
|
int results = 0;
|
||||||
|
results = fscanf(tty, "\033[%d;%dR", &y1, &x1);
|
||||||
|
|
||||||
|
fwrite(utf8_codepoint, length, 1, tty);
|
||||||
|
|
||||||
|
fputs("\033[6n", tty); // Query cursor position
|
||||||
|
results += fscanf(tty, "\033[%d;%dR", &y2, &x2);
|
||||||
|
|
||||||
|
if (results != 4) {
|
||||||
|
return expectedWidth;
|
||||||
|
}
|
||||||
|
|
||||||
|
int width = x2 - x1;
|
||||||
|
if (width < 0) {
|
||||||
|
// Calculate the width considering text wrapping
|
||||||
|
struct winsize w;
|
||||||
|
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
|
||||||
|
width += w.ws_col;
|
||||||
|
}
|
||||||
|
return width;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void replace_last(char ch) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
pop_cursor();
|
||||||
|
put_codepoint(&ch, 1, 1);
|
||||||
|
#else
|
||||||
|
fprintf(out, "\b%c", ch);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void append_utf8(char32_t ch, std::string & out) {
|
||||||
|
if (ch <= 0x7F) {
|
||||||
|
out.push_back(static_cast<unsigned char>(ch));
|
||||||
|
} else if (ch <= 0x7FF) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xC0 | ((ch >> 6) & 0x1F)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||||
|
} else if (ch <= 0xFFFF) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xE0 | ((ch >> 12) & 0x0F)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||||
|
} else if (ch <= 0x10FFFF) {
|
||||||
|
out.push_back(static_cast<unsigned char>(0xF0 | ((ch >> 18) & 0x07)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 12) & 0x3F)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80 | ((ch >> 6) & 0x3F)));
|
||||||
|
out.push_back(static_cast<unsigned char>(0x80 | (ch & 0x3F)));
|
||||||
|
} else {
|
||||||
|
// Invalid Unicode code point
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to remove the last UTF-8 character from a string
|
||||||
|
void pop_back_utf8_char(std::string & line) {
|
||||||
|
if (line.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t pos = line.length() - 1;
|
||||||
|
|
||||||
|
// Find the start of the last UTF-8 character (checking up to 4 bytes back)
|
||||||
|
for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) {
|
||||||
|
if ((line[pos] & 0xC0) != 0x80) {
|
||||||
|
break; // Found the start of the character
|
||||||
|
}
|
||||||
|
}
|
||||||
|
line.erase(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readline_advanced(std::string & line, bool multiline_input) {
|
||||||
|
if (out != stdout) {
|
||||||
|
fflush(stdout);
|
||||||
|
}
|
||||||
|
|
||||||
|
line.clear();
|
||||||
|
std::vector<int> widths;
|
||||||
|
bool is_special_char = false;
|
||||||
|
bool end_of_stream = false;
|
||||||
|
|
||||||
|
char32_t input_char;
|
||||||
|
while (true) {
|
||||||
|
fflush(out); // Ensure all output is displayed before waiting for input
|
||||||
|
input_char = getchar32();
|
||||||
|
|
||||||
|
if (input_char == '\r' || input_char == '\n') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) {
|
||||||
|
end_of_stream = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_special_char) {
|
||||||
|
set_display(user_input);
|
||||||
|
replace_last(line.back());
|
||||||
|
is_special_char = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (input_char == '\033') { // Escape sequence
|
||||||
|
char32_t code = getchar32();
|
||||||
|
if (code == '[' || code == 0x1B) {
|
||||||
|
// Discard the rest of the escape sequence
|
||||||
|
while ((code = getchar32()) != (char32_t) WEOF) {
|
||||||
|
if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (input_char == 0x08 || input_char == 0x7F) { // Backspace
|
||||||
|
if (!widths.empty()) {
|
||||||
|
int count;
|
||||||
|
do {
|
||||||
|
count = widths.back();
|
||||||
|
widths.pop_back();
|
||||||
|
// Move cursor back, print space, and move cursor back again
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
replace_last(' ');
|
||||||
|
pop_cursor();
|
||||||
|
}
|
||||||
|
pop_back_utf8_char(line);
|
||||||
|
} while (count == 0 && !widths.empty());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int offset = line.length();
|
||||||
|
append_utf8(input_char, line);
|
||||||
|
int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char));
|
||||||
|
if (width < 0) {
|
||||||
|
width = 0;
|
||||||
|
}
|
||||||
|
widths.push_back(width);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!line.empty() && (line.back() == '\\' || line.back() == '/')) {
|
||||||
|
set_display(prompt);
|
||||||
|
replace_last(line.back());
|
||||||
|
is_special_char = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool has_more = multiline_input;
|
||||||
|
if (is_special_char) {
|
||||||
|
replace_last(' ');
|
||||||
|
pop_cursor();
|
||||||
|
|
||||||
|
char last = line.back();
|
||||||
|
line.pop_back();
|
||||||
|
if (last == '\\') {
|
||||||
|
line += '\n';
|
||||||
|
fputc('\n', out);
|
||||||
|
has_more = !has_more;
|
||||||
|
} else {
|
||||||
|
// llama will just eat the single space, it won't act as a space
|
||||||
|
if (line.length() == 1 && line.back() == ' ') {
|
||||||
|
line.clear();
|
||||||
|
pop_cursor();
|
||||||
|
}
|
||||||
|
has_more = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (end_of_stream) {
|
||||||
|
has_more = false;
|
||||||
|
} else {
|
||||||
|
line += '\n';
|
||||||
|
fputc('\n', out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fflush(out);
|
||||||
|
return has_more;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readline_simple(std::string & line, bool multiline_input) {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
std::wstring wline;
|
||||||
|
if (!std::getline(std::wcin, wline)) {
|
||||||
|
// Input stream is bad or EOF received
|
||||||
|
line.clear();
|
||||||
|
GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL);
|
||||||
|
line.resize(size_needed);
|
||||||
|
WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL);
|
||||||
|
#else
|
||||||
|
if (!std::getline(std::cin, line)) {
|
||||||
|
// Input stream is bad or EOF received
|
||||||
|
line.clear();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
if (!line.empty()) {
|
||||||
|
char last = line.back();
|
||||||
|
if (last == '/') { // Always return control on '/' symbol
|
||||||
|
line.pop_back();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (last == '\\') { // '\\' changes the default action
|
||||||
|
line.pop_back();
|
||||||
|
multiline_input = !multiline_input;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
line += '\n';
|
||||||
|
|
||||||
|
// By default, continue input if multiline_input is set
|
||||||
|
return multiline_input;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool readline(std::string & line, bool multiline_input) {
|
||||||
|
set_display(user_input);
|
||||||
|
|
||||||
|
if (simple_io) {
|
||||||
|
return readline_simple(line, multiline_input);
|
||||||
|
}
|
||||||
|
return readline_advanced(line, multiline_input);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
19
examples/console.h
Normal file
19
examples/console.h
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
// Console functions
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace console {
|
||||||
|
enum display_t {
|
||||||
|
reset = 0,
|
||||||
|
prompt,
|
||||||
|
user_input,
|
||||||
|
error
|
||||||
|
};
|
||||||
|
|
||||||
|
void init(bool use_simple_io, bool use_advanced_display);
|
||||||
|
void cleanup();
|
||||||
|
void set_display(display_t display);
|
||||||
|
bool readline(std::string & line, bool multiline_input);
|
||||||
|
}
|
@ -30,7 +30,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
|
|||||||
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
|
fprintf(stderr, "%s: build = %d (%s)\n", __func__, BUILD_NUMBER, BUILD_COMMIT);
|
||||||
|
|
||||||
if (params.seed == LLAMA_DEFAULT_SEED) {
|
if (params.seed == LLAMA_DEFAULT_SEED) {
|
||||||
params.seed = time(NULL);
|
params.seed = uint32_t(time(NULL));
|
||||||
}
|
}
|
||||||
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
|
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
|
||||||
|
|
||||||
|
@ -405,7 +405,7 @@ namespace grammar_parser {
|
|||||||
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
||||||
// fprintf(file, "%zu: ", i);
|
// fprintf(file, "%zu: ", i);
|
||||||
// print_rule_binary(file, state.rules[i]);
|
// print_rule_binary(file, state.rules[i]);
|
||||||
print_rule(file, i, state.rules[i], symbol_id_names);
|
print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
|
||||||
// fprintf(file, "\n");
|
// fprintf(file, "\n");
|
||||||
}
|
}
|
||||||
} catch (const std::exception & err) {
|
} catch (const std::exception & err) {
|
||||||
|
132
examples/json-schema-to-grammar.py
Normal file
132
examples/json-schema-to-grammar.py
Normal file
@ -0,0 +1,132 @@
|
|||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# whitespace is constrained to a single space char to prevent model "running away" in
|
||||||
|
# whitespace. Also maybe improves generation quality?
|
||||||
|
SPACE_RULE = '" "?'
|
||||||
|
|
||||||
|
PRIMITIVE_RULES = {
|
||||||
|
'boolean': '("true" | "false") space',
|
||||||
|
'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space',
|
||||||
|
'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space',
|
||||||
|
'string': r''' "\"" (
|
||||||
|
[^"\\] |
|
||||||
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
|
||||||
|
)* "\"" space ''',
|
||||||
|
'null': '"null" space',
|
||||||
|
}
|
||||||
|
|
||||||
|
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
|
||||||
|
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
|
||||||
|
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"'}
|
||||||
|
|
||||||
|
|
||||||
|
class SchemaConverter:
|
||||||
|
def __init__(self, prop_order):
|
||||||
|
self._prop_order = prop_order
|
||||||
|
self._rules = {'space': SPACE_RULE}
|
||||||
|
|
||||||
|
def _format_literal(self, literal):
|
||||||
|
escaped = GRAMMAR_LITERAL_ESCAPE_RE.sub(
|
||||||
|
lambda m: GRAMMAR_LITERAL_ESCAPES.get(m.group(0)), json.dumps(literal)
|
||||||
|
)
|
||||||
|
return f'"{escaped}"'
|
||||||
|
|
||||||
|
def _add_rule(self, name, rule):
|
||||||
|
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||||
|
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||||
|
key = esc_name
|
||||||
|
else:
|
||||||
|
i = 0
|
||||||
|
while f'{esc_name}{i}' in self._rules:
|
||||||
|
i += 1
|
||||||
|
key = f'{esc_name}{i}'
|
||||||
|
self._rules[key] = rule
|
||||||
|
return key
|
||||||
|
|
||||||
|
def visit(self, schema, name):
|
||||||
|
schema_type = schema.get('type')
|
||||||
|
rule_name = name or 'root'
|
||||||
|
|
||||||
|
if 'oneOf' in schema or 'anyOf' in schema:
|
||||||
|
rule = ' | '.join((
|
||||||
|
self.visit(alt_schema, f'{name}{"-" if name else ""}{i}')
|
||||||
|
for i, alt_schema in enumerate(schema.get('oneOf') or schema['anyOf'])
|
||||||
|
))
|
||||||
|
return self._add_rule(rule_name, rule)
|
||||||
|
|
||||||
|
elif 'const' in schema:
|
||||||
|
return self._add_rule(rule_name, self._format_literal(schema['const']))
|
||||||
|
|
||||||
|
elif 'enum' in schema:
|
||||||
|
rule = ' | '.join((self._format_literal(v) for v in schema['enum']))
|
||||||
|
return self._add_rule(rule_name, rule)
|
||||||
|
|
||||||
|
elif schema_type == 'object' and 'properties' in schema:
|
||||||
|
# TODO: `required` keyword
|
||||||
|
prop_order = self._prop_order
|
||||||
|
prop_pairs = sorted(
|
||||||
|
schema['properties'].items(),
|
||||||
|
# sort by position in prop_order (if specified) then by key
|
||||||
|
key=lambda kv: (prop_order.get(kv[0], len(prop_order)), kv[0]),
|
||||||
|
)
|
||||||
|
|
||||||
|
rule = '"{" space'
|
||||||
|
for i, (prop_name, prop_schema) in enumerate(prop_pairs):
|
||||||
|
prop_rule_name = self.visit(prop_schema, f'{name}{"-" if name else ""}{prop_name}')
|
||||||
|
if i > 0:
|
||||||
|
rule += ' "," space'
|
||||||
|
rule += fr' {self._format_literal(prop_name)} space ":" space {prop_rule_name}'
|
||||||
|
rule += ' "}" space'
|
||||||
|
|
||||||
|
return self._add_rule(rule_name, rule)
|
||||||
|
|
||||||
|
elif schema_type == 'array' and 'items' in schema:
|
||||||
|
# TODO `prefixItems` keyword
|
||||||
|
item_rule_name = self.visit(schema['items'], f'{name}{"-" if name else ""}item')
|
||||||
|
rule = f'"[" space ({item_rule_name} ("," space {item_rule_name})*)? "]" space'
|
||||||
|
return self._add_rule(rule_name, rule)
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
|
||||||
|
return self._add_rule(
|
||||||
|
'root' if rule_name == 'root' else schema_type,
|
||||||
|
PRIMITIVE_RULES[schema_type]
|
||||||
|
)
|
||||||
|
|
||||||
|
def format_grammar(self):
|
||||||
|
return '\n'.join((f'{name} ::= {rule}' for name, rule in self._rules.items()))
|
||||||
|
|
||||||
|
|
||||||
|
def main(args_in = None):
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description='''
|
||||||
|
Generates a grammar (suitable for use in ./main) that produces JSON conforming to a
|
||||||
|
given JSON schema. Only a subset of JSON schema features are supported; more may be
|
||||||
|
added in the future.
|
||||||
|
''',
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--prop-order',
|
||||||
|
default=[],
|
||||||
|
type=lambda s: s.split(','),
|
||||||
|
help='''
|
||||||
|
comma-separated property names defining the order of precedence for object properties;
|
||||||
|
properties not specified here are given lower precedence than those that are, and are
|
||||||
|
sorted alphabetically
|
||||||
|
'''
|
||||||
|
)
|
||||||
|
parser.add_argument('schema', help='file containing JSON schema ("-" for stdin)')
|
||||||
|
args = parser.parse_args(args_in)
|
||||||
|
|
||||||
|
schema = json.load(sys.stdin if args.schema == '-' else open(args.schema))
|
||||||
|
prop_order = {name: idx for idx, name in enumerate(args.prop_order)}
|
||||||
|
converter = SchemaConverter(prop_order)
|
||||||
|
converter.visit(schema, '')
|
||||||
|
print(converter.format_grammar())
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -202,9 +202,9 @@ Example usage: `--top-p 0.95`
|
|||||||
|
|
||||||
- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
|
- `--tfs N`: Enable tail free sampling with parameter z (default: 1.0, 1.0 = disabled).
|
||||||
|
|
||||||
Tail free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. The method adjusts the logits (token probabilities) by raising them to the power of the parameter z. A higher value of z (e.g., 2.0) will further suppress less likely tokens from the tail of the distribution, while a value of 1.0 disables the effect of TFS. By setting the parameter z, you can control how much the probabilities of less likely tokens are reduced.
|
Tail free sampling (TFS) is a text generation technique that aims to reduce the impact of less likely tokens, which may be less relevant, less coherent, or nonsensical, on the output. Similar to Top-P it tries to determine the bulk of the most likely tokens dynamically. But TFS filters out logits based on the second derivative of their probabilities. Adding tokens is stopped after the sum of the second derivatives reaches the parameter z. In short: TFS looks how quickly the probabilities of the tokens decrease and cuts off the tail of unlikely tokens using the parameter z. Typical values for z are in the range of 0.9 to 0.95. A value of 1.0 would include all tokens, and thus disables the effect of TFS.
|
||||||
|
|
||||||
Example usage: `--tfs 2.0`
|
Example usage: `--tfs 0.95`
|
||||||
|
|
||||||
### Locally Typical Sampling
|
### Locally Typical Sampling
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "console.h"
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "build-info.h"
|
#include "build-info.h"
|
||||||
#include "grammar-parser.h"
|
#include "grammar-parser.h"
|
||||||
@ -35,9 +36,7 @@
|
|||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static console_state con_st;
|
|
||||||
static llama_context ** g_ctx;
|
static llama_context ** g_ctx;
|
||||||
|
|
||||||
static bool is_interacting = false;
|
static bool is_interacting = false;
|
||||||
|
|
||||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
|
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32)
|
||||||
@ -46,7 +45,7 @@ void sigint_handler(int signo) {
|
|||||||
if (!is_interacting) {
|
if (!is_interacting) {
|
||||||
is_interacting=true;
|
is_interacting=true;
|
||||||
} else {
|
} else {
|
||||||
console_cleanup(con_st);
|
console::cleanup();
|
||||||
printf("\n");
|
printf("\n");
|
||||||
llama_print_timings(*g_ctx);
|
llama_print_timings(*g_ctx);
|
||||||
_exit(130);
|
_exit(130);
|
||||||
@ -64,10 +63,8 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
// save choice to use color for later
|
// save choice to use color for later
|
||||||
// (note for later: this is a slightly awkward choice)
|
// (note for later: this is a slightly awkward choice)
|
||||||
con_st.use_color = params.use_color;
|
console::init(params.simple_io, params.use_color);
|
||||||
con_st.multiline_input = params.multiline_input;
|
atexit([]() { console::cleanup(); });
|
||||||
console_init(con_st);
|
|
||||||
atexit([]() { console_cleanup(con_st); });
|
|
||||||
|
|
||||||
if (params.perplexity) {
|
if (params.perplexity) {
|
||||||
printf("\n************\n");
|
printf("\n************\n");
|
||||||
@ -373,7 +370,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
const char *control_message;
|
const char *control_message;
|
||||||
if (con_st.multiline_input) {
|
if (params.multiline_input) {
|
||||||
control_message = " - To return control to LLaMa, end your input with '\\'.\n"
|
control_message = " - To return control to LLaMa, end your input with '\\'.\n"
|
||||||
" - To return control without starting a new line, end your input with '/'.\n";
|
" - To return control without starting a new line, end your input with '/'.\n";
|
||||||
} else {
|
} else {
|
||||||
@ -401,7 +398,7 @@ int main(int argc, char ** argv) {
|
|||||||
int n_past_guidance = 0;
|
int n_past_guidance = 0;
|
||||||
|
|
||||||
// the first thing we will do is to output the prompt, so set color accordingly
|
// the first thing we will do is to output the prompt, so set color accordingly
|
||||||
console_set_color(con_st, CONSOLE_COLOR_PROMPT);
|
console::set_display(console::prompt);
|
||||||
|
|
||||||
std::vector<llama_token> embd;
|
std::vector<llama_token> embd;
|
||||||
std::vector<llama_token> embd_guidance;
|
std::vector<llama_token> embd_guidance;
|
||||||
@ -422,9 +419,9 @@ int main(int argc, char ** argv) {
|
|||||||
// Ensure the input doesn't exceed the context size by truncating embd if necessary.
|
// Ensure the input doesn't exceed the context size by truncating embd if necessary.
|
||||||
if ((int)embd.size() > max_embd_size) {
|
if ((int)embd.size() > max_embd_size) {
|
||||||
auto skipped_tokens = embd.size() - max_embd_size;
|
auto skipped_tokens = embd.size() - max_embd_size;
|
||||||
console_set_color(con_st, CONSOLE_COLOR_ERROR);
|
console::set_display(console::error);
|
||||||
printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
|
printf("<<input too long: skipped %zu token%s>>", skipped_tokens, skipped_tokens != 1 ? "s" : "");
|
||||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
console::set_display(console::reset);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
embd.resize(max_embd_size);
|
embd.resize(max_embd_size);
|
||||||
}
|
}
|
||||||
@ -667,7 +664,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
// reset color to default if we there is no pending user input
|
// reset color to default if we there is no pending user input
|
||||||
if (input_echo && (int)embd_inp.size() == n_consumed) {
|
if (input_echo && (int)embd_inp.size() == n_consumed) {
|
||||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
console::set_display(console::reset);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if not currently processing queued inputs;
|
// if not currently processing queued inputs;
|
||||||
@ -693,7 +690,7 @@ int main(int argc, char ** argv) {
|
|||||||
if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) {
|
if (last_output.find(antiprompt.c_str(), search_start_pos) != std::string::npos) {
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
console::set_display(console::user_input);
|
||||||
}
|
}
|
||||||
is_antiprompt = true;
|
is_antiprompt = true;
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
@ -714,7 +711,7 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
printf("\n");
|
printf("\n");
|
||||||
console_set_color(con_st, CONSOLE_COLOR_USER_INPUT);
|
console::set_display(console::user_input);
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
} else if (params.instruct) {
|
} else if (params.instruct) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
@ -739,12 +736,12 @@ int main(int argc, char ** argv) {
|
|||||||
std::string line;
|
std::string line;
|
||||||
bool another_line = true;
|
bool another_line = true;
|
||||||
do {
|
do {
|
||||||
another_line = console_readline(con_st, line);
|
another_line = console::readline(line, params.multiline_input);
|
||||||
buffer += line;
|
buffer += line;
|
||||||
} while (another_line);
|
} while (another_line);
|
||||||
|
|
||||||
// done taking input, reset color
|
// done taking input, reset color
|
||||||
console_set_color(con_st, CONSOLE_COLOR_DEFAULT);
|
console::set_display(console::reset);
|
||||||
|
|
||||||
// Add tokens to embd only if the input buffer is non-empty
|
// Add tokens to embd only if the input buffer is non-empty
|
||||||
// Entering a empty line lets the user pass control back
|
// Entering a empty line lets the user pass control back
|
||||||
|
@ -121,8 +121,23 @@ void perplexity(llama_context * ctx, const gpt_params & params) {
|
|||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
void perplexity_lines(llama_context * ctx, const gpt_params & params) {
|
void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||||
// Calculates perplexity over each line of the prompt
|
// Calculates hellaswag score (acc_norm) from prompt
|
||||||
|
//
|
||||||
|
// Data extracted from the HellaSwag validation dataset (MIT license) https://github.com/rowanz/hellaswag/blob/master/data/hellaswag_val.jsonl
|
||||||
|
// All used data fields are preprocessed as in https://github.com/EleutherAI/lm-evaluation-harness/blob/df3da98c5405deafd519c2ddca52bb7c3fe36bef/lm_eval/tasks/hellaswag.py#L62-L68
|
||||||
|
//
|
||||||
|
// All 10042 tasks should be extracted to keep the results standardized like other implementations.
|
||||||
|
//
|
||||||
|
// Datafile layout:
|
||||||
|
// ['??'] denotes json fields
|
||||||
|
// 6 lines per task:
|
||||||
|
// ['activity_label'] + ": " +['ctx'] - The first part of the query, the context
|
||||||
|
// ['label'] - The index the best common sense ending aka gold ending
|
||||||
|
// ['endings'][0] - Endings added to the first part of the query
|
||||||
|
// ['endings'][1]
|
||||||
|
// ['endings'][2]
|
||||||
|
// ['endings'][3]
|
||||||
|
|
||||||
std::vector<std::string> prompt_lines;
|
std::vector<std::string> prompt_lines;
|
||||||
std::istringstream strstream(params.prompt);
|
std::istringstream strstream(params.prompt);
|
||||||
@ -132,63 +147,149 @@ void perplexity_lines(llama_context * ctx, const gpt_params & params) {
|
|||||||
prompt_lines.push_back(line);
|
prompt_lines.push_back(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if( prompt_lines.size() % 6 != 0) {
|
||||||
|
fprintf(stderr, "%s : number of lines in prompt not a multiple of 6.\n", __func__);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t hs_task_count = prompt_lines.size()/6;
|
||||||
|
fprintf(stderr, "%s : loaded %zu tasks from prompt.\n", __func__, hs_task_count);
|
||||||
|
|
||||||
|
// This is needed as usual for LLaMA models
|
||||||
|
bool prepend_bos = true;
|
||||||
|
|
||||||
|
// Number of tasks to use when computing the score
|
||||||
|
if ( params.hellaswag_tasks < hs_task_count ) {
|
||||||
|
hs_task_count = params.hellaswag_tasks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The tasks should be randomized so the score stabilizes quickly.
|
||||||
|
bool randomize_tasks = true;
|
||||||
|
|
||||||
|
// The random seed should not impact the final result if the computation is done over enough tasks, so kept hardcoded for now
|
||||||
|
std::mt19937 rng(1);
|
||||||
|
|
||||||
|
// Dataholder for hellaswag tasks
|
||||||
|
struct hs_data_t {
|
||||||
|
std::string context;
|
||||||
|
size_t gold_ending_idx;
|
||||||
|
std::string ending[4];
|
||||||
|
size_t ending_logprob_count[4];
|
||||||
|
double ending_logprob[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
fprintf(stderr, "%s : selecting %zu %s tasks.\n", __func__, hs_task_count, (randomize_tasks?"randomized":"the first") );
|
||||||
|
|
||||||
|
// Select and read data from prompt lines
|
||||||
|
hs_data_t *hs_data = new hs_data_t[hs_task_count];
|
||||||
|
for (size_t i=0; i < hs_task_count; i++) {
|
||||||
|
size_t idx = i;
|
||||||
|
|
||||||
|
// Select a random example of those left in the prompt
|
||||||
|
if (randomize_tasks) {
|
||||||
|
std::uniform_int_distribution<size_t> dist(0, prompt_lines.size()/6-1 ) ;
|
||||||
|
idx = dist(rng);
|
||||||
|
}
|
||||||
|
|
||||||
|
hs_data[i].context = prompt_lines[idx*6];
|
||||||
|
hs_data[i].gold_ending_idx = std::stoi( prompt_lines[idx*6+1] );
|
||||||
|
for (size_t j=0; j < 4; j++) {
|
||||||
|
hs_data[i].ending[j] = " " + prompt_lines[idx*6+2+j];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete the selected random example from the prompt
|
||||||
|
if (randomize_tasks) {
|
||||||
|
prompt_lines.erase( std::next(prompt_lines.begin(),idx*6) , std::next(prompt_lines.begin(),idx*6+6) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fprintf(stderr, "%s : calculating hellaswag score over selected tasks.\n", __func__);
|
||||||
|
printf("\ntask\tacc_norm\n");
|
||||||
|
|
||||||
|
double acc = 0.0f;
|
||||||
const int n_vocab = llama_n_vocab(ctx);
|
const int n_vocab = llama_n_vocab(ctx);
|
||||||
|
|
||||||
int counttotal = 0;
|
for (size_t task_idx = 0; task_idx < hs_task_count; task_idx++) {
|
||||||
size_t n_lines = prompt_lines.size();
|
|
||||||
|
|
||||||
double nll = 0.0;
|
// Tokenize the context to count tokens
|
||||||
|
std::vector<int> context_embd = ::llama_tokenize(ctx, hs_data[task_idx].context, prepend_bos);
|
||||||
|
size_t context_size = context_embd.size();
|
||||||
|
|
||||||
fprintf(stderr, "%s: calculating perplexity over %lu lines\n", __func__, n_lines);
|
for (size_t ending_idx=0;ending_idx<4;ending_idx++) {
|
||||||
|
|
||||||
printf("\nLine\tPPL line\tPPL cumulative\n");
|
// Tokenize the query
|
||||||
|
std::vector<int> query_embd = ::llama_tokenize(ctx, hs_data[task_idx].context + hs_data[task_idx].ending[ending_idx], prepend_bos);
|
||||||
|
size_t query_size = query_embd.size();
|
||||||
|
|
||||||
for (size_t i = 0; i < n_lines; ++i) {
|
// Stop if query wont fit the ctx window
|
||||||
|
if (query_size > (size_t)params.n_ctx) {
|
||||||
|
fprintf(stderr, "%s : number of tokens in query %zu > n_ctxl\n", __func__, query_size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Tokenize and insert BOS at start
|
// Speedup small evaluations by evaluating atleast 32 tokens
|
||||||
std::vector<int> batch_embd = ::llama_tokenize(ctx, prompt_lines[i], true);
|
if (query_size < 32) {
|
||||||
|
query_embd.resize(32);
|
||||||
|
}
|
||||||
|
|
||||||
size_t batch_size = batch_embd.size();
|
// Evaluate the query
|
||||||
|
if (llama_eval(ctx, query_embd.data(), query_embd.size(), 0, params.n_threads)) {
|
||||||
|
fprintf(stderr, "%s : failed to eval\n", __func__);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Stop if line is too long
|
const auto query_logits = llama_get_logits(ctx);
|
||||||
if( batch_size > (size_t)params.n_ctx ) {
|
std::vector<float> logits;
|
||||||
fprintf(stderr, "%s : tokens in line %lu > n_ctxl\n", __func__, i);
|
logits.insert(logits.end(), query_logits, query_logits + query_size * n_vocab);
|
||||||
return;
|
|
||||||
|
hs_data[task_idx].ending_logprob_count[ending_idx] = 0;
|
||||||
|
hs_data[task_idx].ending_logprob[ending_idx] = 0.0f;
|
||||||
|
|
||||||
|
// Calculate the logprobs over the ending
|
||||||
|
for (size_t j = context_size-1; j < query_size - 1; j++) {
|
||||||
|
// Calculate probability of next token, given the previous ones.
|
||||||
|
const std::vector<float> tok_logits(
|
||||||
|
logits.begin() + (j + 0) * n_vocab,
|
||||||
|
logits.begin() + (j + 1) * n_vocab);
|
||||||
|
|
||||||
|
const float prob = softmax(tok_logits)[query_embd[ j + 1]];
|
||||||
|
|
||||||
|
hs_data[task_idx].ending_logprob[ending_idx] += std::log(prob);
|
||||||
|
hs_data[task_idx].ending_logprob_count[ending_idx]++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate the mean token logprob for acc_norm
|
||||||
|
hs_data[task_idx].ending_logprob[ending_idx] /= hs_data[task_idx].ending_logprob_count[ending_idx];
|
||||||
|
|
||||||
|
|
||||||
|
// printf("task %lu, ending %lu, whole_len %lu, context_len %lu, ending_logprob_count %lu, ending_logprob %.4f\n",
|
||||||
|
// task_idx,ending_idx,whole_size,context_size, hs_data[task_idx].ending_logprob_count[ending_idx], hs_data[task_idx].ending_logprob[ending_idx] );
|
||||||
}
|
}
|
||||||
|
|
||||||
if (llama_eval(ctx, batch_embd.data(), batch_size, 0, params.n_threads)) {
|
// Find the ending with maximum logprob
|
||||||
fprintf(stderr, "%s : failed to eval\n", __func__);
|
size_t ending_logprob_max_idx = -1;
|
||||||
return;
|
double ending_logprob_max_val = -INFINITY;
|
||||||
|
for (size_t j=0; j < 4; j++) {
|
||||||
|
if (hs_data[task_idx].ending_logprob[j] > ending_logprob_max_val) {
|
||||||
|
ending_logprob_max_idx = j;
|
||||||
|
ending_logprob_max_val = hs_data[task_idx].ending_logprob[j];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const auto batch_logits = llama_get_logits(ctx);
|
// printf("max logprob ending idx %lu, gold ending idx %lu\n", ending_logprob_max_idx, hs_data[task_idx].gold_ending_idx);
|
||||||
std::vector<float> logits;
|
|
||||||
logits.insert(logits.end(), batch_logits, batch_logits + batch_size * n_vocab);
|
|
||||||
|
|
||||||
double nllline = 0.0;
|
// If the gold ending got the maximum logprobe add one accuracy point
|
||||||
int countline = 0;
|
if (ending_logprob_max_idx == hs_data[task_idx].gold_ending_idx) {
|
||||||
|
acc += 1.0;
|
||||||
// Perplexity over second half of the line
|
|
||||||
for (size_t j = batch_size/2; j < batch_size - 1; ++j) {
|
|
||||||
// Calculate probability of next token, given the previous ones.
|
|
||||||
const std::vector<float> tok_logits(
|
|
||||||
logits.begin() + (j + 0) * n_vocab,
|
|
||||||
logits.begin() + (j + 1) * n_vocab);
|
|
||||||
|
|
||||||
const float prob = softmax(tok_logits)[batch_embd[ j + 1]];
|
|
||||||
|
|
||||||
nllline += -std::log(prob);
|
|
||||||
++countline;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nll += nllline;
|
// Print the accumulated accuracy mean x 100
|
||||||
counttotal += countline;
|
printf("%zu\t%.8lf\n",task_idx+1, acc/double(task_idx+1)*100.0);
|
||||||
|
|
||||||
// perplexity is e^(average negative log-likelihood)
|
|
||||||
printf("%lu\t%.8lf\t%.8lf\n", i + 1, std::exp(nllline/countline), std::exp(nll / counttotal) );
|
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
delete [] hs_data;
|
||||||
|
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,8 +341,8 @@ int main(int argc, char ** argv) {
|
|||||||
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.perplexity_lines) {
|
if (params.hellaswag) {
|
||||||
perplexity_lines(ctx, params);
|
hellaswag_score(ctx, params);
|
||||||
} else {
|
} else {
|
||||||
perplexity(ctx, params);
|
perplexity(ctx, params);
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ int main(int argc, char ** argv) {
|
|||||||
auto lparams = llama_context_default_params();
|
auto lparams = llama_context_default_params();
|
||||||
|
|
||||||
lparams.n_ctx = params.n_ctx;
|
lparams.n_ctx = params.n_ctx;
|
||||||
|
lparams.n_gqa = params.n_gqa;
|
||||||
lparams.seed = params.seed;
|
lparams.seed = params.seed;
|
||||||
lparams.f16_kv = params.memory_f16;
|
lparams.f16_kv = params.memory_f16;
|
||||||
lparams.use_mmap = params.use_mmap;
|
lparams.use_mmap = params.use_mmap;
|
||||||
|
26
examples/server-llama2-13B.sh
Normal file
26
examples/server-llama2-13B.sh
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
cd "$(dirname "$0")/.." || exit
|
||||||
|
|
||||||
|
# Specify the model you want to use here:
|
||||||
|
MODEL="${MODEL:-./models/llama-2-13b-chat.ggmlv3.q5_K_M.bin}"
|
||||||
|
PROMPT_TEMPLATE=${PROMPT_TEMPLATE:-./prompts/chat-system.txt}
|
||||||
|
|
||||||
|
# Adjust to the number of CPU cores you want to use.
|
||||||
|
N_THREAD="${N_THREAD:-12}"
|
||||||
|
|
||||||
|
# Note: you can also override the generation options by specifying them on the command line:
|
||||||
|
GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 4096 --batch-size 1024}"
|
||||||
|
|
||||||
|
|
||||||
|
# shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS
|
||||||
|
./server $GEN_OPTIONS \
|
||||||
|
--model "$MODEL" \
|
||||||
|
--threads "$N_THREAD" \
|
||||||
|
--rope-freq-scale 1.0 \
|
||||||
|
"$@"
|
||||||
|
|
||||||
|
# I used this to test the model with mps, but omitted it from the general purpose. If you want to use it, just specify it on the command line.
|
||||||
|
# -ngl 1 \
|
@ -163,7 +163,7 @@ node .
|
|||||||
|
|
||||||
`content`: Set the text to tokenize.
|
`content`: Set the text to tokenize.
|
||||||
|
|
||||||
Note that the special `BOS` token is not added in fron of the text and also a space character is not inserted automatically as it is for `/completion`.
|
Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
|
||||||
|
|
||||||
- **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
|
- **POST** `/embedding`: Generate embedding of a given text just as [the embedding example](../embedding) does.
|
||||||
|
|
||||||
|
109
examples/server/chat-llama2.sh
Normal file
109
examples/server/chat-llama2.sh
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
API_URL="${API_URL:-http://127.0.0.1:8080}"
|
||||||
|
|
||||||
|
CHAT=(
|
||||||
|
"Hello, Assistant."
|
||||||
|
"Hello. How may I help you today?"
|
||||||
|
)
|
||||||
|
|
||||||
|
INSTRUCTION="A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
|
||||||
|
|
||||||
|
trim() {
|
||||||
|
shopt -s extglob
|
||||||
|
set -- "${1##+([[:space:]])}"
|
||||||
|
printf "%s" "${1%%+([[:space:]])}"
|
||||||
|
}
|
||||||
|
|
||||||
|
trim_trailing() {
|
||||||
|
shopt -s extglob
|
||||||
|
printf "%s" "${1%%+([[:space:]])}"
|
||||||
|
}
|
||||||
|
|
||||||
|
format_prompt() {
|
||||||
|
if [[ "${#CHAT[@]}" -eq 0 ]]; then
|
||||||
|
echo -n "[INST] <<SYS>>\n${INSTRUCTION}\n<</SYS>>"
|
||||||
|
else
|
||||||
|
LAST_INDEX=$(( ${#CHAT[@]} - 1 ))
|
||||||
|
echo -n "${CHAT[$LAST_INDEX]}\n[INST] $1 [/INST]"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
tokenize() {
|
||||||
|
curl \
|
||||||
|
--silent \
|
||||||
|
--request POST \
|
||||||
|
--url "${API_URL}/tokenize" \
|
||||||
|
--header "Content-Type: application/json" \
|
||||||
|
--data-raw "$(jq -ns --arg content "$1" '{content:$content}')" \
|
||||||
|
| jq '.tokens[]'
|
||||||
|
}
|
||||||
|
|
||||||
|
N_KEEP=$(tokenize "[INST] <<SYS>>\n${INSTRUCTION}\n<</SYS>>" | wc -l)
|
||||||
|
|
||||||
|
chat_completion() {
|
||||||
|
PROMPT="$(trim_trailing "$(format_prompt "$1")")"
|
||||||
|
DATA="$(echo -n "$PROMPT" | jq -Rs --argjson n_keep $N_KEEP '{
|
||||||
|
prompt: .,
|
||||||
|
temperature: 0.2,
|
||||||
|
top_k: 40,
|
||||||
|
top_p: 0.9,
|
||||||
|
n_keep: $n_keep,
|
||||||
|
n_predict: 1024,
|
||||||
|
stop: ["[INST]"],
|
||||||
|
stream: true
|
||||||
|
}')"
|
||||||
|
|
||||||
|
# Create a temporary file to hold the Python output
|
||||||
|
TEMPFILE=$(mktemp)
|
||||||
|
|
||||||
|
exec 3< <(curl \
|
||||||
|
--silent \
|
||||||
|
--no-buffer \
|
||||||
|
--request POST \
|
||||||
|
--url "${API_URL}/completion" \
|
||||||
|
--header "Content-Type: application/json" \
|
||||||
|
--data-raw "${DATA}")
|
||||||
|
|
||||||
|
python -c "
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
answer = ''
|
||||||
|
while True:
|
||||||
|
line = sys.stdin.readline()
|
||||||
|
if not line:
|
||||||
|
break
|
||||||
|
if line.startswith('data: '):
|
||||||
|
json_content = line[6:].strip()
|
||||||
|
content = json.loads(json_content)['content']
|
||||||
|
sys.stdout.write(content)
|
||||||
|
sys.stdout.flush()
|
||||||
|
answer += content
|
||||||
|
|
||||||
|
answer = answer.rstrip('\n')
|
||||||
|
|
||||||
|
# Write the answer to the temporary file
|
||||||
|
with open('$TEMPFILE', 'w') as f:
|
||||||
|
f.write(answer)
|
||||||
|
" <&3
|
||||||
|
|
||||||
|
exec 3<&-
|
||||||
|
|
||||||
|
# Read the answer from the temporary file
|
||||||
|
ANSWER=$(cat $TEMPFILE)
|
||||||
|
|
||||||
|
# Clean up the temporary file
|
||||||
|
rm $TEMPFILE
|
||||||
|
|
||||||
|
printf "\n"
|
||||||
|
|
||||||
|
CHAT+=("$1" "$(trim "$ANSWER")")
|
||||||
|
}
|
||||||
|
|
||||||
|
while true; do
|
||||||
|
echo -en "\033[0;32m" # Green color
|
||||||
|
read -r -e -p "> " QUESTION
|
||||||
|
echo -en "\033[0m" # Reset color
|
||||||
|
chat_completion "${QUESTION}"
|
||||||
|
done
|
@ -87,289 +87,342 @@ unsigned char completion_js[] = {
|
|||||||
0x20, 0x54, 0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72,
|
0x20, 0x54, 0x65, 0x78, 0x74, 0x44, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72,
|
||||||
0x28, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
|
0x28, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63,
|
||||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b,
|
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x0a, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
|
||||||
0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d,
|
0x76, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f,
|
||||||
0x20, 0x74, 0x72, 0x75, 0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x42, 0x75, 0x66, 0x66, 0x65, 0x72, 0x20, 0x66, 0x6f, 0x72, 0x20,
|
||||||
0x77, 0x68, 0x69, 0x6c, 0x65, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29,
|
0x70, 0x61, 0x72, 0x74, 0x69, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x72, 0x65,
|
||||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
0x61, 0x64, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x0a, 0x0a, 0x20, 0x20,
|
||||||
0x73, 0x74, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20,
|
0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65,
|
||||||
0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72,
|
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x74, 0x72, 0x75,
|
||||||
0x2e, 0x72, 0x65, 0x61, 0x64, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
0x65, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x77, 0x68, 0x69, 0x6c,
|
||||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
0x65, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
0x74, 0x2e, 0x64, 0x6f, 0x6e, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b,
|
0x65, 0x73, 0x75, 0x6c, 0x74, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20,
|
0x74, 0x20, 0x72, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x72, 0x65, 0x61,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x73, 0x65, 0x20, 0x61,
|
0x64, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
|
||||||
0x6e, 0x73, 0x77, 0x65, 0x72, 0x73, 0x20, 0x69, 0x6e, 0x20, 0x74, 0x68,
|
0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x6f,
|
||||||
0x65, 0x20, 0x66, 0x6f, 0x72, 0x6d, 0x20, 0x6d, 0x75, 0x6c, 0x74, 0x69,
|
0x6e, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x70, 0x6c, 0x65, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x6f, 0x66,
|
0x20, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x3a, 0x20, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5c, 0x6e, 0x20, 0x77, 0x69,
|
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x74, 0x68, 0x20, 0x64, 0x61, 0x74, 0x61, 0x20, 0x61, 0x6c, 0x77, 0x61,
|
0x2f, 0x2f, 0x20, 0x41, 0x64, 0x64, 0x20, 0x61, 0x6e, 0x79, 0x20, 0x6c,
|
||||||
0x79, 0x73, 0x20, 0x70, 0x72, 0x65, 0x73, 0x65, 0x6e, 0x74, 0x20, 0x61,
|
0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x64, 0x61, 0x74, 0x61,
|
||||||
0x73, 0x20, 0x61, 0x20, 0x6b, 0x65, 0x79, 0x2e, 0x20, 0x69, 0x6e, 0x20,
|
0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x75, 0x72, 0x72,
|
||||||
0x6f, 0x75, 0x72, 0x20, 0x63, 0x61, 0x73, 0x65, 0x20, 0x77, 0x65, 0x0a,
|
0x65, 0x6e, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x6d, 0x61, 0x69,
|
0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6e, 0x6c, 0x79, 0x20, 0x63, 0x61, 0x72, 0x65, 0x20, 0x61, 0x62, 0x6f,
|
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x3d,
|
||||||
0x75, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x64, 0x61, 0x74, 0x61, 0x3a,
|
0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72, 0x20, 0x2b, 0x20,
|
||||||
0x20, 0x6b, 0x65, 0x79, 0x20, 0x68, 0x65, 0x72, 0x65, 0x2c, 0x20, 0x77,
|
0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f,
|
||||||
0x68, 0x69, 0x63, 0x68, 0x20, 0x77, 0x65, 0x20, 0x65, 0x78, 0x70, 0x65,
|
0x64, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61,
|
||||||
0x63, 0x74, 0x20, 0x61, 0x73, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x0a, 0x20,
|
0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x74,
|
0x20, 0x2f, 0x2f, 0x20, 0x43, 0x68, 0x65, 0x63, 0x6b, 0x20, 0x69, 0x66,
|
||||||
0x65, 0x78, 0x74, 0x20, 0x3d, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
|
0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20, 0x63, 0x68,
|
||||||
0x72, 0x2e, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x28, 0x72, 0x65, 0x73,
|
0x61, 0x72, 0x61, 0x63, 0x74, 0x65, 0x72, 0x20, 0x69, 0x73, 0x20, 0x61,
|
||||||
0x75, 0x6c, 0x74, 0x2e, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x29, 0x3b, 0x0a,
|
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x0a,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x70, 0x61,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||||
0x72, 0x73, 0x65, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20,
|
0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69, 0x6e, 0x65,
|
||||||
0x65, 0x76, 0x65, 0x6e, 0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61,
|
0x42, 0x72, 0x65, 0x61, 0x6b, 0x20, 0x3d, 0x20, 0x74, 0x65, 0x78, 0x74,
|
||||||
0x64, 0x64, 0x20, 0x74, 0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72,
|
0x2e, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x28, 0x27, 0x5c,
|
||||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x6e, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20,
|
0x2f, 0x2f, 0x20, 0x53, 0x70, 0x6c, 0x69, 0x74, 0x20, 0x74, 0x68, 0x65,
|
||||||
0x3d, 0x20, 0x2f, 0x5e, 0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73,
|
0x20, 0x74, 0x65, 0x78, 0x74, 0x20, 0x69, 0x6e, 0x74, 0x6f, 0x20, 0x6c,
|
||||||
0x28, 0x2e, 0x2a, 0x29, 0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20,
|
0x69, 0x6e, 0x65, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e,
|
0x65, 0x74, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x20, 0x3d, 0x20, 0x74,
|
||||||
0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x20, 0x6f, 0x66, 0x20,
|
0x65, 0x78, 0x74, 0x2e, 0x73, 0x70, 0x6c, 0x69, 0x74, 0x28, 0x27, 0x5c,
|
||||||
0x74, 0x65, 0x78, 0x74, 0x2e, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x41, 0x6c,
|
0x6e, 0x27, 0x29, 0x3b, 0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6c, 0x28, 0x72, 0x65, 0x67, 0x65, 0x78, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
0x2f, 0x2f, 0x20, 0x49, 0x66, 0x20, 0x74, 0x68, 0x65, 0x20, 0x74, 0x65,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75,
|
0x78, 0x74, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x6e, 0x27, 0x74, 0x20, 0x65,
|
||||||
0x6c, 0x74, 0x5b, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d,
|
0x6e, 0x64, 0x20, 0x77, 0x69, 0x74, 0x68, 0x20, 0x61, 0x20, 0x6c, 0x69,
|
||||||
0x20, 0x3d, 0x20, 0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a,
|
0x6e, 0x65, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b, 0x2c, 0x20, 0x74, 0x68,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x20,
|
0x65, 0x6e, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6c, 0x61, 0x73, 0x74, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69, 0x6e, 0x63, 0x65, 0x20,
|
0x6c, 0x69, 0x6e, 0x65, 0x20, 0x69, 0x73, 0x20, 0x69, 0x6e, 0x63, 0x6f,
|
||||||
0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20, 0x74, 0x68, 0x69, 0x73,
|
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70,
|
0x20, 0x2f, 0x2f, 0x20, 0x53, 0x74, 0x6f, 0x72, 0x65, 0x20, 0x69, 0x74,
|
||||||
0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73, 0x20, 0x6a, 0x75, 0x73,
|
0x20, 0x69, 0x6e, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76, 0x65, 0x72,
|
||||||
0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65, 0x20, 0x74, 0x68, 0x65,
|
0x20, 0x74, 0x6f, 0x20, 0x62, 0x65, 0x20, 0x61, 0x64, 0x64, 0x65, 0x64,
|
||||||
0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e, 0x20, 0x64, 0x61, 0x74,
|
0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6e, 0x65, 0x78, 0x74,
|
||||||
0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75,
|
0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x64, 0x61,
|
||||||
0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d, 0x20, 0x4a, 0x53,
|
0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||||
0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73,
|
0x28, 0x21, 0x65, 0x6e, 0x64, 0x73, 0x57, 0x69, 0x74, 0x68, 0x4c, 0x69,
|
||||||
0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x3b, 0x0a, 0x20,
|
0x6e, 0x65, 0x42, 0x72, 0x65, 0x61, 0x6b, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f,
|
||||||
0x20, 0x2b, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
0x76, 0x65, 0x72, 0x20, 0x3d, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2e,
|
||||||
0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
|
0x70, 0x6f, 0x70, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x79,
|
0x20, 0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||||
0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76,
|
||||||
0x69, 0x65, 0x6c, 0x64, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b,
|
0x65, 0x72, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x20, 0x2f, 0x2f, 0x20,
|
||||||
0x0a, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x69,
|
0x52, 0x65, 0x73, 0x65, 0x74, 0x20, 0x6c, 0x65, 0x66, 0x74, 0x6f, 0x76,
|
||||||
0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20, 0x61, 0x20, 0x73,
|
0x65, 0x72, 0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x68, 0x61, 0x76,
|
||||||
0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e, 0x20, 0x66, 0x72,
|
0x65, 0x20, 0x61, 0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x62, 0x72, 0x65,
|
||||||
0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2c, 0x20, 0x77,
|
0x61, 0x6b, 0x20, 0x61, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x65, 0x6e,
|
||||||
0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72, 0x65, 0x61, 0x6b,
|
0x64, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20,
|
||||||
0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x50, 0x61, 0x72, 0x73,
|
||||||
0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
0x65, 0x20, 0x61, 0x6c, 0x6c, 0x20, 0x73, 0x73, 0x65, 0x20, 0x65, 0x76,
|
||||||
0x61, 0x74, 0x61, 0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a,
|
0x65, 0x6e, 0x74, 0x73, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x61, 0x64, 0x64,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
|
0x20, 0x74, 0x68, 0x65, 0x6d, 0x20, 0x74, 0x6f, 0x20, 0x72, 0x65, 0x73,
|
||||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e,
|
0x75, 0x6c, 0x74, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||||
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
0x6e, 0x73, 0x74, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x20, 0x3d, 0x20,
|
||||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
0x2f, 0x5e, 0x28, 0x5c, 0x53, 0x2b, 0x29, 0x3a, 0x5c, 0x73, 0x28, 0x2e,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e,
|
0x2a, 0x29, 0x24, 0x2f, 0x67, 0x6d, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||||
0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
0x20, 0x6c, 0x69, 0x6e, 0x65, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x69, 0x6e,
|
||||||
0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72,
|
0x65, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6d, 0x61, 0x74, 0x63,
|
||||||
0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x68, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x67, 0x65, 0x78, 0x2e, 0x65, 0x78,
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62, 0x72,
|
0x65, 0x63, 0x28, 0x6c, 0x69, 0x6e, 0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||||
0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x6d, 0x61,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63,
|
0x74, 0x63, 0x68, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x5b,
|
||||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d,
|
0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x31, 0x5d, 0x5d, 0x20, 0x3d, 0x20,
|
||||||
0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74,
|
0x6d, 0x61, 0x74, 0x63, 0x68, 0x5b, 0x32, 0x5d, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f, 0x2f, 0x20, 0x73, 0x69,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e,
|
0x6e, 0x63, 0x65, 0x20, 0x77, 0x65, 0x20, 0x6b, 0x6e, 0x6f, 0x77, 0x20,
|
||||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
0x74, 0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65,
|
0x61, 0x2e, 0x63, 0x70, 0x70, 0x2c, 0x20, 0x6c, 0x65, 0x74, 0x27, 0x73,
|
||||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20,
|
0x20, 0x6a, 0x75, 0x73, 0x74, 0x20, 0x64, 0x65, 0x63, 0x6f, 0x64, 0x65,
|
||||||
0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20,
|
0x20, 0x74, 0x68, 0x65, 0x20, 0x6a, 0x73, 0x6f, 0x6e, 0x20, 0x69, 0x6e,
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20,
|
0x20, 0x64, 0x61, 0x74, 0x61, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f,
|
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75,
|
||||||
0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29,
|
0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72,
|
||||||
0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
|
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x3d,
|
||||||
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20,
|
0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28,
|
||||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29,
|
||||||
0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74,
|
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79,
|
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d,
|
||||||
0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x63, 0x72,
|
0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
||||||
0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f,
|
0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x0a, 0x20,
|
||||||
0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
||||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72,
|
0x2f, 0x20, 0x79, 0x69, 0x65, 0x6c, 0x64, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x79, 0x69, 0x65, 0x6c,
|
||||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66,
|
0x64, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x3b, 0x0a, 0x0a, 0x20,
|
||||||
0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x2f,
|
||||||
0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a,
|
0x2f, 0x20, 0x69, 0x66, 0x20, 0x77, 0x65, 0x20, 0x67, 0x6f, 0x74, 0x20,
|
||||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
0x61, 0x20, 0x73, 0x74, 0x6f, 0x70, 0x20, 0x74, 0x6f, 0x6b, 0x65, 0x6e,
|
||||||
0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72,
|
||||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28,
|
0x2c, 0x20, 0x77, 0x65, 0x20, 0x77, 0x69, 0x6c, 0x6c, 0x20, 0x62, 0x72,
|
||||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20,
|
0x65, 0x61, 0x6b, 0x20, 0x68, 0x65, 0x72, 0x65, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||||
0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28,
|
0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61, 0x74, 0x61,
|
||||||
0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28,
|
0x2e, 0x73, 0x74, 0x6f, 0x70, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69,
|
||||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75,
|
0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64, 0x61,
|
||||||
0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63,
|
0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
|
||||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e,
|
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20,
|
||||||
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20,
|
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
||||||
0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c,
|
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
||||||
0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
0x73, 0x20, 0x3d, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x64,
|
||||||
0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
|
||||||
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b,
|
|
||||||
0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20,
|
|
||||||
0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63,
|
|
||||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
|
||||||
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45,
|
|
||||||
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29,
|
|
||||||
0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
|
|
||||||
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
|
||||||
0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
|
|
||||||
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72,
|
|
||||||
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73,
|
|
||||||
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c,
|
|
||||||
0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
|
|
||||||
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
|
|
||||||
0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
|
|
||||||
0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
|
||||||
0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74,
|
|
||||||
0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
|
|
||||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61,
|
|
||||||
0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77,
|
|
||||||
0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
|
||||||
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
|
|
||||||
0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68,
|
|
||||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29,
|
|
||||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
|
|
||||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72,
|
|
||||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
|
||||||
0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
|
||||||
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
|
||||||
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
|
|
||||||
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
|
|
||||||
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e,
|
|
||||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
|
||||||
0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74,
|
|
||||||
0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
|
|
||||||
0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
|
0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
|
||||||
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20,
|
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b,
|
||||||
0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63,
|
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x20,
|
||||||
0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
0x3d, 0x20, 0x66, 0x61, 0x6c, 0x73, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x62,
|
||||||
0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63,
|
0x72, 0x65, 0x61, 0x6b, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20,
|
||||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e,
|
0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a,
|
||||||
0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61,
|
||||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72,
|
||||||
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
|
||||||
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
|
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e,
|
0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20,
|
||||||
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||||
0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20,
|
||||||
0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28,
|
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79,
|
||||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||||
0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b,
|
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28,
|
||||||
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20,
|
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65,
|
||||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||||
0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20,
|
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
|
||||||
0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65,
|
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||||
0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d,
|
0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20,
|
||||||
0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e,
|
0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20,
|
||||||
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e,
|
0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x63,
|
||||||
0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73,
|
0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f,
|
||||||
0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a,
|
0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f,
|
||||||
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a,
|
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f,
|
||||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c,
|
0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76,
|
||||||
0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70,
|
0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20,
|
||||||
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28,
|
0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c,
|
||||||
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e,
|
0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f,
|
||||||
0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||||
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a,
|
0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
||||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f,
|
0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20,
|
||||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f,
|
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45,
|
||||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72,
|
||||||
0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
|
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
|
||||||
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
|
||||||
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63,
|
||||||
0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64,
|
0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28,
|
||||||
0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74,
|
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
||||||
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f,
|
0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f,
|
||||||
0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78,
|
||||||
0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d,
|
0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c,
|
||||||
0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
||||||
|
0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
||||||
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20,
|
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20,
|
||||||
0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d,
|
0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d,
|
||||||
0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||||
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50,
|
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
||||||
0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63,
|
0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20,
|
||||||
0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72,
|
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28,
|
||||||
0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20,
|
0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||||
0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f,
|
||||||
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e,
|
||||||
0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20,
|
||||||
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72,
|
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
||||||
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f,
|
||||||
0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b,
|
0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||||
0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||||
0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a,
|
||||||
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20,
|
0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70,
|
||||||
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65,
|
||||||
0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65,
|
0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e,
|
||||||
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c,
|
||||||
0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f,
|
0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63,
|
||||||
0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65,
|
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29,
|
||||||
0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65,
|
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
|
||||||
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75,
|
||||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
|
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65,
|
||||||
0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70,
|
0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69,
|
||||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
|
0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||||
0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45,
|
||||||
0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63,
|
0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73,
|
||||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f,
|
0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65,
|
||||||
0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61,
|
0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74,
|
||||||
0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70,
|
0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65,
|
||||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e,
|
0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
|
||||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20,
|
0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61,
|
0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73,
|
||||||
0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20,
|
0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74,
|
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28,
|
||||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69,
|
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74,
|
||||||
0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65,
|
0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||||
0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69,
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54,
|
||||||
0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20,
|
0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74,
|
||||||
0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20,
|
0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20,
|
||||||
0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20,
|
0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28,
|
||||||
0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73,
|
0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b,
|
||||||
0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74,
|
0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75,
|
||||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69,
|
||||||
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20,
|
0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20,
|
||||||
0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e,
|
0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45,
|
||||||
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73,
|
||||||
|
0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f,
|
||||||
|
0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69,
|
||||||
|
0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||||
|
0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29,
|
||||||
|
0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
|
||||||
|
0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
||||||
|
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
|
||||||
|
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||||
|
0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65,
|
||||||
|
0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
||||||
|
0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f,
|
||||||
|
0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74,
|
||||||
|
0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20,
|
||||||
|
0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20,
|
||||||
|
0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f,
|
||||||
|
0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a,
|
||||||
|
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
||||||
|
0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28,
|
||||||
|
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e,
|
||||||
|
0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d,
|
||||||
|
0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72,
|
||||||
|
0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
||||||
|
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f,
|
||||||
|
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a,
|
||||||
|
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||||
|
0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||||
|
0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
|
0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f,
|
||||||
|
0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
||||||
|
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a,
|
||||||
|
0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f,
|
||||||
|
0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f,
|
||||||
|
0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
||||||
|
0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
|
||||||
|
0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20,
|
||||||
|
0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
|
0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20,
|
||||||
|
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e,
|
||||||
|
0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20,
|
||||||
|
0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b,
|
||||||
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
||||||
|
0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74,
|
||||||
|
0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
||||||
|
0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70,
|
||||||
|
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d,
|
||||||
|
0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20,
|
||||||
|
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f,
|
||||||
|
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75,
|
||||||
|
0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74,
|
||||||
|
0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||||
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c,
|
||||||
|
0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b,
|
||||||
|
0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68,
|
||||||
|
0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
|
0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28,
|
||||||
|
0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||||
|
0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a,
|
||||||
|
0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72,
|
||||||
|
0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a,
|
||||||
|
0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74,
|
||||||
|
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
|
||||||
|
0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
|
||||||
|
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||||
|
0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c,
|
||||||
|
0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||||
|
0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28,
|
||||||
|
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20,
|
||||||
|
0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72,
|
||||||
|
0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20,
|
||||||
|
0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f,
|
||||||
|
0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29,
|
||||||
|
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
|
||||||
|
0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a,
|
||||||
|
0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65,
|
||||||
|
0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20,
|
||||||
|
0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68,
|
||||||
|
0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68,
|
||||||
|
0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c,
|
||||||
|
0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
||||||
|
0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74,
|
||||||
|
0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20,
|
||||||
|
0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72,
|
||||||
|
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d,
|
||||||
|
0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d,
|
||||||
|
0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e,
|
||||||
|
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65,
|
||||||
|
0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74,
|
||||||
|
0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||||
|
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||||
|
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61,
|
||||||
|
0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22,
|
||||||
|
0x2f, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22,
|
||||||
|
0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20,
|
||||||
|
0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20,
|
||||||
|
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||||
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73,
|
||||||
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x61, 0x77,
|
0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||||
0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63, 0x68, 0x28, 0x22, 0x2f,
|
|
||||||
0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x22, 0x29,
|
|
||||||
0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72,
|
|
||||||
0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
|
||||||
0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67,
|
|
||||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
|
||||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
|
||||||
};
|
};
|
||||||
unsigned int completion_js_len = 4462;
|
unsigned int completion_js_len = 5099;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -43,6 +43,7 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||||||
const decoder = new TextDecoder();
|
const decoder = new TextDecoder();
|
||||||
|
|
||||||
let content = "";
|
let content = "";
|
||||||
|
let leftover = ""; // Buffer for partially read lines
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let cont = true;
|
let cont = true;
|
||||||
@ -53,29 +54,47 @@ export async function* llama(prompt, params = {}, config = {}) {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// sse answers in the form multiple lines of: value\n with data always present as a key. in our case we
|
// Add any leftover data to the current chunk of data
|
||||||
// mainly care about the data: key here, which we expect as json
|
const text = leftover + decoder.decode(result.value);
|
||||||
const text = decoder.decode(result.value);
|
|
||||||
|
|
||||||
// parse all sse events and add them to result
|
// Check if the last character is a line break
|
||||||
const regex = /^(\S+):\s(.*)$/gm;
|
const endsWithLineBreak = text.endsWith('\n');
|
||||||
for (const match of text.matchAll(regex)) {
|
|
||||||
result[match[1]] = match[2]
|
// Split the text into lines
|
||||||
|
let lines = text.split('\n');
|
||||||
|
|
||||||
|
// If the text doesn't end with a line break, then the last line is incomplete
|
||||||
|
// Store it in leftover to be added to the next chunk of data
|
||||||
|
if (!endsWithLineBreak) {
|
||||||
|
leftover = lines.pop();
|
||||||
|
} else {
|
||||||
|
leftover = ""; // Reset leftover if we have a line break at the end
|
||||||
}
|
}
|
||||||
|
|
||||||
// since we know this is llama.cpp, let's just decode the json in data
|
// Parse all sse events and add them to result
|
||||||
result.data = JSON.parse(result.data);
|
const regex = /^(\S+):\s(.*)$/gm;
|
||||||
content += result.data.content;
|
for (const line of lines) {
|
||||||
|
const match = regex.exec(line);
|
||||||
|
if (match) {
|
||||||
|
result[match[1]] = match[2]
|
||||||
|
// since we know this is llama.cpp, let's just decode the json in data
|
||||||
|
if (result.data) {
|
||||||
|
result.data = JSON.parse(result.data);
|
||||||
|
content += result.data.content;
|
||||||
|
|
||||||
// yield
|
// yield
|
||||||
yield result;
|
yield result;
|
||||||
|
|
||||||
// if we got a stop token from server, we will break here
|
// if we got a stop token from server, we will break here
|
||||||
if (result.data.stop) {
|
if (result.data.stop) {
|
||||||
if (result.data.generation_settings) {
|
if (result.data.generation_settings) {
|
||||||
generation_settings = result.data.generation_settings;
|
generation_settings = result.data.generation_settings;
|
||||||
|
}
|
||||||
|
cont = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
@ -3,12 +3,11 @@
|
|||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
|
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1" />
|
||||||
|
<meta name="color-scheme" content="light dark">
|
||||||
<title>llama.cpp - chat</title>
|
<title>llama.cpp - chat</title>
|
||||||
|
|
||||||
<style>
|
<style>
|
||||||
body {
|
body {
|
||||||
background-color: #fff;
|
|
||||||
color: #000;
|
|
||||||
font-family: system-ui;
|
font-family: system-ui;
|
||||||
font-size: 90%;
|
font-size: 90%;
|
||||||
}
|
}
|
||||||
@ -283,8 +282,9 @@
|
|||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
// scroll to bottom (if needed)
|
// scroll to bottom (if needed)
|
||||||
if (container.current && container.current.scrollHeight <= container.current.scrollTop + container.current.offsetHeight + 300) {
|
const parent = container.current.parentElement;
|
||||||
container.current.scrollTo(0, container.current.scrollHeight)
|
if (parent && parent.scrollHeight <= parent.scrollTop + parent.offsetHeight + 300) {
|
||||||
|
parent.scrollTo(0, parent.scrollHeight)
|
||||||
}
|
}
|
||||||
}, [messages])
|
}, [messages])
|
||||||
|
|
||||||
|
@ -631,6 +631,9 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
|||||||
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
fprintf(stdout, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n");
|
||||||
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n");
|
fprintf(stdout, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n");
|
||||||
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n");
|
fprintf(stdout, " -lv, --low-vram don't allocate VRAM scratch buffer\n");
|
||||||
|
fprintf(stdout, " -mmq, --mul-mat-q use experimental mul_mat_q CUDA kernels instead of cuBLAS. TEMP!!!\n" );
|
||||||
|
fprintf(stdout, " Reduces VRAM usage by 700/970/1430 MiB for 7b/13b/33b but prompt processing speed\n" );
|
||||||
|
fprintf(stdout, " is still suboptimal, especially q2_K, q3_K, q5_K, and q6_K.\n" );
|
||||||
#endif
|
#endif
|
||||||
fprintf(stdout, " -m FNAME, --model FNAME\n");
|
fprintf(stdout, " -m FNAME, --model FNAME\n");
|
||||||
fprintf(stdout, " model path (default: %s)\n", params.model.c_str());
|
fprintf(stdout, " model path (default: %s)\n", params.model.c_str());
|
||||||
@ -827,7 +830,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.", {});
|
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n", {});
|
||||||
#endif // GGML_USE_CUBLAS
|
#endif // GGML_USE_CUBLAS
|
||||||
}
|
}
|
||||||
else if (arg == "--low-vram" || arg == "-lv")
|
else if (arg == "--low-vram" || arg == "-lv")
|
||||||
@ -835,7 +838,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||||||
#ifdef GGML_USE_CUBLAS
|
#ifdef GGML_USE_CUBLAS
|
||||||
params.low_vram = true;
|
params.low_vram = true;
|
||||||
#else
|
#else
|
||||||
fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n");
|
LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. It is not possible to set lower vram usage.\n", {});
|
||||||
|
#endif // GGML_USE_CUBLAS
|
||||||
|
}
|
||||||
|
else if (arg == "--mul-mat-q" || arg == "-mmq")
|
||||||
|
{
|
||||||
|
#ifdef GGML_USE_CUBLAS
|
||||||
|
params.mul_mat_q = true;
|
||||||
|
#else
|
||||||
|
LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. It is not possible to use mul_mat_q kernels.\n", {});
|
||||||
#endif // GGML_USE_CUBLAS
|
#endif // GGML_USE_CUBLAS
|
||||||
}
|
}
|
||||||
else if (arg == "--main-gpu" || arg == "-mg")
|
else if (arg == "--main-gpu" || arg == "-mg")
|
||||||
@ -1263,7 +1274,11 @@ int main(int argc, char **argv)
|
|||||||
sink.done();
|
sink.done();
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
res.set_chunked_content_provider("text/event-stream", chunked_content_provider);
|
const auto on_complete = [&](bool) {
|
||||||
|
llama.mutex.unlock();
|
||||||
|
};
|
||||||
|
lock.release();
|
||||||
|
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
|
||||||
} });
|
} });
|
||||||
|
|
||||||
svr.Get("/model.json", [&llama](const Request &, Response &res)
|
svr.Get("/model.json", [&llama](const Request &, Response &res)
|
||||||
|
@ -123,7 +123,7 @@ int main(int argc, char ** argv)
|
|||||||
// Evaluate the tokens :
|
// Evaluate the tokens :
|
||||||
//---------------------------------
|
//---------------------------------
|
||||||
|
|
||||||
if ( llama_eval( ctx , tokens_list.data() , tokens_list.size() , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) )
|
if ( llama_eval( ctx , tokens_list.data() , int(tokens_list.size()) , llama_get_kv_cache_token_count( ctx ) , params.n_threads ) )
|
||||||
{
|
{
|
||||||
fprintf( stderr, "%s : failed to eval\n" , __func__ );
|
fprintf( stderr, "%s : failed to eval\n" , __func__ );
|
||||||
return 1;
|
return 1;
|
||||||
|
541
ggml-alloc.c
Normal file
541
ggml-alloc.c
Normal file
@ -0,0 +1,541 @@
|
|||||||
|
#include "ggml-alloc.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#define UNUSED(x) (void)(x)
|
||||||
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
|
//#define GGML_ALLOCATOR_DEBUG
|
||||||
|
|
||||||
|
//#define AT_PRINTF printf
|
||||||
|
#define AT_PRINTF(...) ((void)0)
|
||||||
|
|
||||||
|
struct hash_node {
|
||||||
|
struct ggml_tensor * t;
|
||||||
|
int n_children;
|
||||||
|
int n_views;
|
||||||
|
};
|
||||||
|
|
||||||
|
static size_t hash(void * p) {
|
||||||
|
return (size_t)p % GGML_GRAPH_HASHTABLE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct hash_node * hash_get(struct hash_node hash_table[], struct ggml_tensor * t) {
|
||||||
|
size_t h = hash(t);
|
||||||
|
|
||||||
|
// linear probing
|
||||||
|
size_t i = h;
|
||||||
|
while (hash_table[i].t != NULL) {
|
||||||
|
if (hash_table[i].t == t) {
|
||||||
|
return &hash_table[i];
|
||||||
|
}
|
||||||
|
i = (i + 1) % GGML_GRAPH_HASHTABLE_SIZE;
|
||||||
|
if (i == h) {
|
||||||
|
// hash table is full
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hash_table[i].t = t;
|
||||||
|
return &hash_table[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: GGML_PAD ?
|
||||||
|
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {
|
||||||
|
assert(alignment && !(alignment & (alignment - 1))); // power of 2
|
||||||
|
size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment;
|
||||||
|
return offset + align;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct free_block {
|
||||||
|
void * addr;
|
||||||
|
size_t size;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define MAX_FREE_BLOCKS 128
|
||||||
|
|
||||||
|
struct ggml_allocr {
|
||||||
|
void * data;
|
||||||
|
size_t size;
|
||||||
|
size_t alignment;
|
||||||
|
int n_free_blocks;
|
||||||
|
struct free_block free_blocks[MAX_FREE_BLOCKS];
|
||||||
|
struct hash_node hash_table[GGML_GRAPH_HASHTABLE_SIZE];
|
||||||
|
size_t max_size;
|
||||||
|
bool measure;
|
||||||
|
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
struct ggml_tensor * allocated_tensors[1024];
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
static void add_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tensor * tensor) {
|
||||||
|
for (int i = 0; i < 1024; i++) {
|
||||||
|
if (alloc->allocated_tensors[i] == NULL) {
|
||||||
|
alloc->allocated_tensors[i] = tensor;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GGML_ASSERT(!"out of allocated_tensors");
|
||||||
|
}
|
||||||
|
static void remove_allocated_tensor(struct ggml_allocator * alloc, struct ggml_tensor * tensor) {
|
||||||
|
for (int i = 0; i < 1024; i++) {
|
||||||
|
if (alloc->allocated_tensors[i] == tensor ||
|
||||||
|
(alloc->allocated_tensors[i] != NULL && alloc->allocated_tensors[i]->data == tensor->data)) {
|
||||||
|
alloc->allocated_tensors[i] = NULL;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("tried to free tensor %s not found\n", tensor->name);
|
||||||
|
GGML_ASSERT(!"tensor not found");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
static size_t ggml_allocator_get_alloc_size(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||||
|
return ggml_nbytes(tensor);
|
||||||
|
|
||||||
|
UNUSED(alloc);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||||
|
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
|
||||||
|
size = aligned_offset(NULL, size, alloc->alignment);
|
||||||
|
|
||||||
|
AT_PRINTF("%s: allocating %s (%zu bytes) - ", __func__, tensor->name, size);
|
||||||
|
|
||||||
|
size_t max_avail = 0;
|
||||||
|
|
||||||
|
// find the best fitting free block
|
||||||
|
int best_fit_block = -1;
|
||||||
|
size_t best_fit_size = SIZE_MAX;
|
||||||
|
for (int i = 0; i < alloc->n_free_blocks; i++) {
|
||||||
|
struct free_block * block = &alloc->free_blocks[i];
|
||||||
|
max_avail = MAX(max_avail, block->size);
|
||||||
|
if (block->size >= size && block->size <= best_fit_size) {
|
||||||
|
best_fit_block = i;
|
||||||
|
best_fit_size = block->size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AT_PRINTF("block %d\n", best_fit_block);
|
||||||
|
|
||||||
|
if (best_fit_block == -1) {
|
||||||
|
fprintf(stderr, "%s: not enough space in the buffer (needed %zu, largest block available %zu)\n",
|
||||||
|
__func__, size, max_avail);
|
||||||
|
GGML_ASSERT(!"not enough space in the buffer");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
struct free_block * block = &alloc->free_blocks[best_fit_block];
|
||||||
|
void * addr = block->addr;
|
||||||
|
block->addr = (char*)block->addr + size;
|
||||||
|
block->size -= size;
|
||||||
|
if (block->size == 0) {
|
||||||
|
// remove block if empty
|
||||||
|
alloc->n_free_blocks--;
|
||||||
|
for (int j = best_fit_block; j < alloc->n_free_blocks; j++) {
|
||||||
|
alloc->free_blocks[j] = alloc->free_blocks[j+1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tensor->data = addr;
|
||||||
|
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
add_allocated_tensor(alloc, tensor);
|
||||||
|
size_t cur_max = (char*)addr - (char*)alloc->data + size;
|
||||||
|
if (cur_max > alloc->max_size) {
|
||||||
|
printf("max_size = %.2f MB: tensors: ", cur_max / 1024.0 / 1024.0);
|
||||||
|
for (int i = 0; i < 1024; i++) {
|
||||||
|
if (alloc->allocated_tensors[i]) {
|
||||||
|
printf("%s (%.2f MB) ", alloc->allocated_tensors[i]->name, ggml_nbytes(alloc->allocated_tensors[i]) / 1024.0 / 1024.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
alloc->max_size = MAX(alloc->max_size, (char*)addr - (char*)alloc->data + size);
|
||||||
|
}
|
||||||
|
|
||||||
|
// this is a very naive implementation, but for our case the number of free blocks should be very small
|
||||||
|
static void ggml_allocator_free_tensor(struct ggml_allocr * alloc, struct ggml_tensor * tensor) {
|
||||||
|
void * ptr = tensor->data;
|
||||||
|
|
||||||
|
if (ptr < alloc->data || (char*)ptr >= (char*)alloc->data + alloc->max_size) {
|
||||||
|
// the tensor was not allocated in this buffer
|
||||||
|
// this can happen because the graph allocator will try to free weights and other tensors from different buffers
|
||||||
|
// the easiest way to deal with this is just to ignore it
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t size = ggml_allocator_get_alloc_size(alloc, tensor);
|
||||||
|
size = aligned_offset(NULL, size, alloc->alignment);
|
||||||
|
AT_PRINTF("%s: freeing %s (%zu bytes) - n_free_blocks = %d\n", __func__, tensor->name, size, alloc->n_free_blocks);
|
||||||
|
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
remove_allocated_tensor(alloc, tensor);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// see if we can merge with an existing block
|
||||||
|
for (int i = 0; i < alloc->n_free_blocks; i++) {
|
||||||
|
struct free_block * block = &alloc->free_blocks[i];
|
||||||
|
// check if ptr is at the end of the block
|
||||||
|
if ((char*)block->addr + block->size == ptr) {
|
||||||
|
block->size += size;
|
||||||
|
// check if we can merge with the next block
|
||||||
|
if (i < alloc->n_free_blocks - 1 && (char*)block->addr + block->size == alloc->free_blocks[i+1].addr) {
|
||||||
|
block->size += alloc->free_blocks[i+1].size;
|
||||||
|
alloc->n_free_blocks--;
|
||||||
|
for (int j = i+1; j < alloc->n_free_blocks; j++) {
|
||||||
|
alloc->free_blocks[j] = alloc->free_blocks[j+1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// check if ptr is at the beginning of the block
|
||||||
|
if ((char*)ptr + size == block->addr) {
|
||||||
|
block->addr = ptr;
|
||||||
|
block->size += size;
|
||||||
|
// check if we can merge with the previous block
|
||||||
|
if (i > 0 && (char*)alloc->free_blocks[i-1].addr + alloc->free_blocks[i-1].size == block->addr) {
|
||||||
|
alloc->free_blocks[i-1].size += block->size;
|
||||||
|
alloc->n_free_blocks--;
|
||||||
|
for (int j = i; j < alloc->n_free_blocks; j++) {
|
||||||
|
alloc->free_blocks[j] = alloc->free_blocks[j+1];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// otherwise, add a new block
|
||||||
|
GGML_ASSERT(alloc->n_free_blocks < MAX_FREE_BLOCKS && "out of free blocks");
|
||||||
|
// insert the new block in the correct position to keep the array sorted by address (to make merging blocks faster)
|
||||||
|
int insert_pos = 0;
|
||||||
|
while (insert_pos < alloc->n_free_blocks && alloc->free_blocks[insert_pos].addr < ptr) {
|
||||||
|
insert_pos++;
|
||||||
|
}
|
||||||
|
// shift all blocks from insert_pos onward to make room for the new block
|
||||||
|
for (int i = alloc->n_free_blocks; i > insert_pos; i--) {
|
||||||
|
alloc->free_blocks[i] = alloc->free_blocks[i-1];
|
||||||
|
}
|
||||||
|
// insert the new block
|
||||||
|
alloc->free_blocks[insert_pos].addr = ptr;
|
||||||
|
alloc->free_blocks[insert_pos].size = size;
|
||||||
|
alloc->n_free_blocks++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_allocr_reset(struct ggml_allocr * alloc) {
|
||||||
|
alloc->n_free_blocks = 1;
|
||||||
|
size_t align_offset = aligned_offset(alloc->data, 0, alloc->alignment);
|
||||||
|
alloc->free_blocks[0].addr = (char *)alloc->data + align_offset;
|
||||||
|
alloc->free_blocks[0].size = alloc->size - align_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment) {
|
||||||
|
struct ggml_allocr * alloc = (struct ggml_allocr *)malloc(sizeof(struct ggml_allocr) /* + n_free_blocks * sizeof(struct free_block) */);
|
||||||
|
|
||||||
|
*alloc = (struct ggml_allocr){
|
||||||
|
/*.data = */ data,
|
||||||
|
/*.size = */ size,
|
||||||
|
/*.alignment = */ alignment,
|
||||||
|
/*.n_free_blocks = */ 0,
|
||||||
|
/*.free_blocks = */ {{0}},
|
||||||
|
/*.hash_table = */ {{0}},
|
||||||
|
/*.max_size = */ 0,
|
||||||
|
/*.measure = */ false,
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
/*.allocated_tensors = */ = {0},
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
ggml_allocr_reset(alloc);
|
||||||
|
|
||||||
|
return alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// address and size of the buffer when measuring
|
||||||
|
// it needs to be large enough to fit all the tensors, but it cannot overlap with other existing buffers
|
||||||
|
static void * const MEASURE_BASE_ADDR = (void *) 0x1000;
|
||||||
|
static const size_t MEASURE_MAX_SIZE = 1ULL<<40; // 1 TB
|
||||||
|
|
||||||
|
struct ggml_allocr * ggml_allocr_new_measure(size_t alignment) {
|
||||||
|
struct ggml_allocr * alloc = (struct ggml_allocr *)malloc(sizeof(struct ggml_allocr) /* + n_free_blocks * sizeof(struct free_block) */);
|
||||||
|
|
||||||
|
*alloc = (struct ggml_allocr){
|
||||||
|
/*.data = */ MEASURE_BASE_ADDR,
|
||||||
|
/*.size = */ MEASURE_MAX_SIZE,
|
||||||
|
/*.alignment = */ alignment,
|
||||||
|
/*.n_free_blocks = */ 0,
|
||||||
|
/*.free_blocks = */ {{0}},
|
||||||
|
/*.hash_table = */ {{0}},
|
||||||
|
/*.max_size = */ 0,
|
||||||
|
/*.measure = */ true,
|
||||||
|
#ifdef GGML_ALLOCATOR_DEBUG
|
||||||
|
/*.allocated_tensors = */ = {0},
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
ggml_allocr_reset(alloc);
|
||||||
|
|
||||||
|
return alloc;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ggml_allocr_free(struct ggml_allocr * alloc) {
|
||||||
|
free(alloc);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ggml_allocr_is_measure(struct ggml_allocr * alloc) {
|
||||||
|
return alloc->measure;
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////// compute graph allocator
|
||||||
|
|
||||||
|
static bool ggml_is_view(struct ggml_tensor * t) {
|
||||||
|
return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE ||
|
||||||
|
t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) {
|
||||||
|
if (a->type != b->type) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < GGML_MAX_DIMS; i++) {
|
||||||
|
if (a->ne[i] != b->ne[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (a->nb[i] != b->nb[i]) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) {
|
||||||
|
switch (t->op) {
|
||||||
|
case GGML_OP_PERMUTE:
|
||||||
|
case GGML_OP_RESHAPE:
|
||||||
|
case GGML_OP_TRANSPOSE:
|
||||||
|
case GGML_OP_VIEW:
|
||||||
|
return t->src[0];
|
||||||
|
case GGML_OP_CPY:
|
||||||
|
return t->src[1];
|
||||||
|
default:
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ggml_tensor * get_view_source(struct ggml_tensor * t) {
|
||||||
|
struct ggml_tensor * parent = t;
|
||||||
|
do {
|
||||||
|
parent = get_view_parent(parent);
|
||||||
|
} while (ggml_is_view(parent));
|
||||||
|
return parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool ggml_op_can_inplace(enum ggml_op op) {
|
||||||
|
switch (op) {
|
||||||
|
case GGML_OP_SCALE:
|
||||||
|
case GGML_OP_DIAG_MASK_ZERO:
|
||||||
|
case GGML_OP_DIAG_MASK_INF:
|
||||||
|
case GGML_OP_ADD:
|
||||||
|
case GGML_OP_ADD1:
|
||||||
|
case GGML_OP_ACC:
|
||||||
|
case GGML_OP_SUB:
|
||||||
|
case GGML_OP_MUL:
|
||||||
|
case GGML_OP_DIV:
|
||||||
|
case GGML_OP_SQR:
|
||||||
|
case GGML_OP_SQRT:
|
||||||
|
case GGML_OP_LOG:
|
||||||
|
case GGML_OP_UNARY:
|
||||||
|
case GGML_OP_ROPE:
|
||||||
|
case GGML_OP_RMS_NORM:
|
||||||
|
case GGML_OP_SET:
|
||||||
|
case GGML_OP_SOFT_MAX:
|
||||||
|
case GGML_OP_CONT:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node) {
|
||||||
|
struct hash_node * ht = alloc->hash_table;
|
||||||
|
if (node->data == NULL) {
|
||||||
|
if (ggml_is_view(node)) {
|
||||||
|
size_t offset;
|
||||||
|
switch(node->op) {
|
||||||
|
case GGML_OP_VIEW:
|
||||||
|
memcpy(&offset, node->op_params, sizeof(size_t));
|
||||||
|
node->data = (char *) node->src[0]->data + offset;
|
||||||
|
break;
|
||||||
|
case GGML_OP_PERMUTE:
|
||||||
|
case GGML_OP_RESHAPE:
|
||||||
|
case GGML_OP_TRANSPOSE:
|
||||||
|
node->data = node->src[0]->data;
|
||||||
|
break;
|
||||||
|
case GGML_OP_CPY:
|
||||||
|
node->data = node->src[1]->data;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
GGML_ASSERT(!"unknown view op");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// see if we can reuse a parent's buffer (inplace)
|
||||||
|
if (ggml_op_can_inplace(node->op)) {
|
||||||
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||||
|
struct ggml_tensor * parent = node->src[i];
|
||||||
|
if (parent == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
struct hash_node * p_hn = hash_get(ht, parent);
|
||||||
|
if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) {
|
||||||
|
if (ggml_is_view(parent)) {
|
||||||
|
struct ggml_tensor * view_src = get_view_source(parent);
|
||||||
|
struct hash_node * view_src_hn = hash_get(ht, view_src);
|
||||||
|
if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) {
|
||||||
|
// TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite
|
||||||
|
// the parent's data that it will need later (same layout requirement). the problem is that then
|
||||||
|
// we cannot free the tensor because the original address of the allocation is lost.
|
||||||
|
// adding a view_src pointer to the tensor would solve this and simplify the code dealing with views
|
||||||
|
// for now, we only reuse the parent's data if the offset is zero (view_src->data == parent->data)
|
||||||
|
AT_PRINTF("reusing view parent %s (%s) for %s\n", parent->name, view_src->name, node->name);
|
||||||
|
node->data = parent->data;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
|
||||||
|
node->data = parent->data;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ggml_allocr_alloc(alloc, node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t ggml_allocator_alloc_graph_tensors_n(
|
||||||
|
struct ggml_allocr * alloc,
|
||||||
|
struct ggml_cgraph ** graphs, int n_graphs,
|
||||||
|
struct ggml_tensor *** inputs, struct ggml_tensor *** outputs) {
|
||||||
|
|
||||||
|
// reset hash table
|
||||||
|
struct hash_node * ht = alloc->hash_table;
|
||||||
|
memset(ht, 0, sizeof(struct hash_node) * GGML_GRAPH_HASHTABLE_SIZE);
|
||||||
|
|
||||||
|
// count number of children and views
|
||||||
|
for (int g = 0; g < n_graphs; g++) {
|
||||||
|
struct ggml_cgraph * gf = graphs[g];
|
||||||
|
for (int i = 0; i < gf->n_nodes; i++) {
|
||||||
|
struct ggml_tensor * node = gf->nodes[i];
|
||||||
|
|
||||||
|
if (ggml_is_view(node)) {
|
||||||
|
struct ggml_tensor * view_src = get_view_source(node);
|
||||||
|
hash_get(ht, view_src)->n_views += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||||
|
struct ggml_tensor * parent = node->src[j];
|
||||||
|
if (parent == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
hash_get(ht, parent)->n_children += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate tensors
|
||||||
|
for (int g = 0; g < n_graphs; g++) {
|
||||||
|
struct ggml_cgraph * gf = graphs[g];
|
||||||
|
AT_PRINTF("####### graph %d/%d\n", g, n_graphs);
|
||||||
|
// graph inputs are allocated first to ensure that they are not overwritten by each other
|
||||||
|
if (inputs != NULL && inputs[g] != NULL) {
|
||||||
|
for (int i = 0; inputs[g][i] != NULL; i++) {
|
||||||
|
struct ggml_tensor * input = inputs[g][i];
|
||||||
|
AT_PRINTF("input: %s\n", input->name);
|
||||||
|
allocate_node(alloc, input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < gf->n_nodes; i++) {
|
||||||
|
struct ggml_tensor * node = gf->nodes[i];
|
||||||
|
|
||||||
|
// allocate parents (leafs)
|
||||||
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||||
|
struct ggml_tensor * parent = node->src[j];
|
||||||
|
if (parent == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
allocate_node(alloc, parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// allocate node
|
||||||
|
allocate_node(alloc, node);
|
||||||
|
|
||||||
|
AT_PRINTF("exec: %s (%s) <= ", ggml_op_name(node->op), node->name);
|
||||||
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||||
|
struct ggml_tensor * parent = node->src[j];
|
||||||
|
if (parent == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
AT_PRINTF("%s", parent->name);
|
||||||
|
if (j < GGML_MAX_SRC - 1 && node->src[j + 1] != NULL) {
|
||||||
|
AT_PRINTF(", ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AT_PRINTF("\n");
|
||||||
|
|
||||||
|
// update parents
|
||||||
|
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||||
|
struct ggml_tensor * parent = node->src[j];
|
||||||
|
if (parent == NULL) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
struct hash_node * p_hn = hash_get(ht, parent);
|
||||||
|
p_hn->n_children -= 1;
|
||||||
|
|
||||||
|
//AT_PRINTF("parent %s: %d children, %d views\n", parent->name, parent->n_children, parent->n_views);
|
||||||
|
|
||||||
|
if (p_hn->n_children == 0 && p_hn->n_views == 0) {
|
||||||
|
if (ggml_is_view(parent)) {
|
||||||
|
struct ggml_tensor * view_src = get_view_source(parent);
|
||||||
|
struct hash_node * view_src_hn = hash_get(ht, view_src);
|
||||||
|
view_src_hn->n_views -= 1;
|
||||||
|
AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src->n_children, view_src->n_views);
|
||||||
|
if (view_src_hn->n_views == 0 && view_src_hn->n_children == 0 && view_src->data != node->data) {
|
||||||
|
ggml_allocator_free_tensor(alloc, view_src);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (parent->data != node->data) {
|
||||||
|
ggml_allocator_free_tensor(alloc, parent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
AT_PRINTF("\n");
|
||||||
|
}
|
||||||
|
// free graph outputs here that wouldn't be freed otherwise because they have no children
|
||||||
|
if (outputs != NULL && outputs[g] != NULL) {
|
||||||
|
for (int i = 0; outputs[g][i] != NULL; i++) {
|
||||||
|
struct ggml_tensor * output = outputs[g][i];
|
||||||
|
AT_PRINTF("output: %s\n", output->name);
|
||||||
|
ggml_allocator_free_tensor(alloc, output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return alloc->max_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph) {
|
||||||
|
return ggml_allocator_alloc_graph_tensors_n(alloc, &graph, 1, NULL, NULL);
|
||||||
|
}
|
22
ggml-alloc.h
Normal file
22
ggml-alloc.h
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
GGML_API struct ggml_allocr * ggml_allocr_new(void * data, size_t size, size_t alignment);
|
||||||
|
GGML_API struct ggml_allocr * ggml_allocr_new_measure(size_t alignment);
|
||||||
|
|
||||||
|
GGML_API void ggml_allocr_free(struct ggml_allocr * alloc);
|
||||||
|
GGML_API bool ggml_allocr_is_measure(struct ggml_allocr * alloc);
|
||||||
|
GGML_API void ggml_allocr_reset(struct ggml_allocr * alloc);
|
||||||
|
GGML_API void ggml_allocr_alloc(struct ggml_allocr * alloc, struct ggml_tensor * tensor);
|
||||||
|
GGML_API size_t ggml_allocr_alloc_graph(struct ggml_allocr * alloc, struct ggml_cgraph * graph);
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
2512
ggml-cuda.cu
2512
ggml-cuda.cu
File diff suppressed because it is too large
Load Diff
@ -27,6 +27,7 @@ void ggml_cuda_assign_buffers(struct ggml_tensor * tensor);
|
|||||||
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
void ggml_cuda_assign_buffers_no_scratch(struct ggml_tensor * tensor);
|
||||||
void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
void ggml_cuda_assign_buffers_force_inplace(struct ggml_tensor * tensor);
|
||||||
void ggml_cuda_set_main_device(int main_device);
|
void ggml_cuda_set_main_device(int main_device);
|
||||||
|
void ggml_cuda_set_mul_mat_q(bool mul_mat_q);
|
||||||
void ggml_cuda_set_scratch_size(size_t scratch_size);
|
void ggml_cuda_set_scratch_size(size_t scratch_size);
|
||||||
void ggml_cuda_free_scratch(void);
|
void ggml_cuda_free_scratch(void);
|
||||||
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
|
||||||
|
90
ggml-metal.m
90
ggml-metal.m
@ -7,6 +7,11 @@
|
|||||||
#import <Metal/Metal.h>
|
#import <Metal/Metal.h>
|
||||||
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
|
#import <MetalPerformanceShaders/MetalPerformanceShaders.h>
|
||||||
|
|
||||||
|
#undef MIN
|
||||||
|
#undef MAX
|
||||||
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
#ifdef GGML_METAL_NDEBUG
|
#ifdef GGML_METAL_NDEBUG
|
||||||
#define metal_printf(...)
|
#define metal_printf(...)
|
||||||
#else
|
#else
|
||||||
@ -15,6 +20,8 @@
|
|||||||
|
|
||||||
#define UNUSED(x) (void)(x)
|
#define UNUSED(x) (void)(x)
|
||||||
|
|
||||||
|
#define GGML_MAX_CONCUR (2*GGML_MAX_NODES)
|
||||||
|
|
||||||
struct ggml_metal_buffer {
|
struct ggml_metal_buffer {
|
||||||
const char * name;
|
const char * name;
|
||||||
|
|
||||||
@ -36,7 +43,7 @@ struct ggml_metal_context {
|
|||||||
int n_buffers;
|
int n_buffers;
|
||||||
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
struct ggml_metal_buffer buffers[GGML_METAL_MAX_BUFFERS];
|
||||||
|
|
||||||
int concur_list[GGML_MAX_NODES];
|
int concur_list[GGML_MAX_CONCUR];
|
||||||
int concur_list_len;
|
int concur_list_len;
|
||||||
|
|
||||||
// custom kernels
|
// custom kernels
|
||||||
@ -370,15 +377,15 @@ void ggml_metal_graph_find_concurrency(
|
|||||||
struct ggml_metal_context * ctx,
|
struct ggml_metal_context * ctx,
|
||||||
struct ggml_cgraph * gf) {
|
struct ggml_cgraph * gf) {
|
||||||
int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time
|
int search_depth = gf->n_nodes; //we only find concurrency in this range to avoid wasting too much time
|
||||||
int nodes_unused[GGML_MAX_NODES];
|
int nodes_unused[GGML_MAX_CONCUR];
|
||||||
|
|
||||||
for (int i = 0; i < GGML_MAX_NODES; i++) {ctx->concur_list[i] = 0;}
|
for (int i = 0; i < GGML_MAX_CONCUR; i++) { ctx->concur_list[i] = 0; }
|
||||||
for (int i = 0; i < gf->n_nodes; i++) {nodes_unused[i] = 1;}
|
for (int i = 0; i < gf->n_nodes; i++) { nodes_unused[i] = 1; }
|
||||||
ctx->concur_list_len = 0;
|
ctx->concur_list_len = 0;
|
||||||
|
|
||||||
int n_left = gf->n_nodes;
|
int n_left = gf->n_nodes;
|
||||||
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
|
int n_start = 0; // all nodes before n_start at nodes_unused array have been sorted and store back to ctx->concur_list
|
||||||
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
|
int level_pos = 0; // at ctx->concur_list, the last layer (level) ends at level_pos
|
||||||
|
|
||||||
while (n_left > 0) {
|
while (n_left > 0) {
|
||||||
// number of nodes at a layer (that can be issued concurrently)
|
// number of nodes at a layer (that can be issued concurrently)
|
||||||
@ -386,28 +393,40 @@ void ggml_metal_graph_find_concurrency(
|
|||||||
for (int i = n_start; i < ((n_start + search_depth > gf->n_nodes) ? gf->n_nodes : n_start + search_depth); i++) {
|
for (int i = n_start; i < ((n_start + search_depth > gf->n_nodes) ? gf->n_nodes : n_start + search_depth); i++) {
|
||||||
if (nodes_unused[i]) {
|
if (nodes_unused[i]) {
|
||||||
// if the requirements for gf->nodes[i] are satisfied
|
// if the requirements for gf->nodes[i] are satisfied
|
||||||
int exe_flag=1;
|
int exe_flag = 1;
|
||||||
|
|
||||||
// scan all srcs
|
// scan all srcs
|
||||||
for (int src_ind = 0; src_ind < GGML_MAX_SRC; src_ind++) {
|
for (int src_ind = 0; src_ind < GGML_MAX_SRC; src_ind++) {
|
||||||
struct ggml_tensor * src_cur = gf->nodes[i]->src[src_ind];
|
struct ggml_tensor * src_cur = gf->nodes[i]->src[src_ind];
|
||||||
if (src_cur) {
|
if (src_cur) {
|
||||||
// if is leaf nodes it's satisfied.
|
// if is leaf nodes it's satisfied.
|
||||||
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {continue;}
|
// TODO: ggml_is_leaf()
|
||||||
|
if (src_cur->op == GGML_OP_NONE && src_cur->grad == NULL) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// otherwise this src should be the output from previous nodes.
|
// otherwise this src should be the output from previous nodes.
|
||||||
int is_found = 0;
|
int is_found = 0;
|
||||||
|
|
||||||
// scan 2*search_depth back because we inserted barrier.
|
// scan 2*search_depth back because we inserted barrier.
|
||||||
for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
|
//for (int j = ((level_pos - 2*search_depth) < 0 ? 0 : (level_pos - 2*search_depth)); j < level_pos; j++) {
|
||||||
if (gf->nodes[ctx->concur_list[j]] == src_cur) {is_found = 1; break;}
|
for (int j = MAX(0, level_pos - 2*search_depth); j < level_pos; j++) {
|
||||||
|
if (ctx->concur_list[j] >= 0 && gf->nodes[ctx->concur_list[j]] == src_cur) {
|
||||||
|
is_found = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (is_found == 0) {
|
||||||
|
exe_flag = 0;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
if (is_found == 0) {exe_flag = 0; break;}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (exe_flag) {
|
if (exe_flag) {
|
||||||
// check if nodes[i]'s data will be overwritten by a node before nodes[i].
|
// check if nodes[i]'s data will be overwritten by a node before nodes[i].
|
||||||
// if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
|
// if node[5] and node[3] write to the same memory region, then we can't issue node[5] before node[3]
|
||||||
int64_t data_start = (int64_t) gf->nodes[i]->data;
|
int64_t data_start = (int64_t) gf->nodes[i]->data;
|
||||||
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
|
int64_t length = (int64_t) ggml_nbytes(gf->nodes[i]);
|
||||||
for (int j = n_start; j < i; j++) {
|
for (int j = n_start; j < i; j++) {
|
||||||
if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \
|
if (nodes_unused[j] && gf->nodes[j]->op != GGML_OP_RESHAPE \
|
||||||
&& gf->nodes[j]->op != GGML_OP_VIEW \
|
&& gf->nodes[j]->op != GGML_OP_VIEW \
|
||||||
@ -416,9 +435,9 @@ void ggml_metal_graph_find_concurrency(
|
|||||||
if (((int64_t)gf->nodes[j]->data) >= data_start + length || \
|
if (((int64_t)gf->nodes[j]->data) >= data_start + length || \
|
||||||
((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) {
|
((int64_t)gf->nodes[j]->data) + (int64_t) ggml_nbytes(gf->nodes[j]) <= data_start) {
|
||||||
continue;
|
continue;
|
||||||
} else {
|
|
||||||
exe_flag = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
exe_flag = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -435,11 +454,13 @@ void ggml_metal_graph_find_concurrency(
|
|||||||
ctx->concur_list[level_pos + concurrency] = -1;
|
ctx->concur_list[level_pos + concurrency] = -1;
|
||||||
ctx->concur_list_len++;
|
ctx->concur_list_len++;
|
||||||
// jump all sorted nodes at nodes_bak
|
// jump all sorted nodes at nodes_bak
|
||||||
while (!nodes_unused[n_start]) {n_start++;}
|
while (!nodes_unused[n_start]) {
|
||||||
|
n_start++;
|
||||||
|
}
|
||||||
level_pos += concurrency + 1;
|
level_pos += concurrency + 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->concur_list_len > GGML_MAX_NODES) {
|
if (ctx->concur_list_len > GGML_MAX_CONCUR) {
|
||||||
fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
|
fprintf(stderr, "%s: too many elements for metal ctx->concur_list!\n", __func__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -453,7 +474,7 @@ void ggml_metal_graph_compute(
|
|||||||
// else fallback to serial dispatch
|
// else fallback to serial dispatch
|
||||||
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
MTLComputePassDescriptor * edesc = MTLComputePassDescriptor.computePassDescriptor;
|
||||||
|
|
||||||
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_NODES;
|
const bool has_concur = ctx->concur_list_len && ctx->concur_list_len <= GGML_MAX_CONCUR;
|
||||||
|
|
||||||
const int n_nodes = has_concur ? ctx->concur_list_len : gf->n_nodes;
|
const int n_nodes = has_concur ? ctx->concur_list_len : gf->n_nodes;
|
||||||
edesc.dispatchType = has_concur ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial;
|
edesc.dispatchType = has_concur ? MTLDispatchTypeConcurrent : MTLDispatchTypeSerial;
|
||||||
@ -718,7 +739,8 @@ void ggml_metal_graph_compute(
|
|||||||
// TODO: needs to be updated after PR: https://github.com/ggerganov/ggml/pull/224
|
// TODO: needs to be updated after PR: https://github.com/ggerganov/ggml/pull/224
|
||||||
|
|
||||||
GGML_ASSERT(ne00 == ne10);
|
GGML_ASSERT(ne00 == ne10);
|
||||||
GGML_ASSERT(ne02 == ne12);
|
// GGML_ASSERT(ne02 == ne12); // Should be checked on individual data types until broadcast is implemented everywhere
|
||||||
|
GGML_ASSERT(ne03 == ne13);
|
||||||
|
|
||||||
if (ggml_is_contiguous(src0) &&
|
if (ggml_is_contiguous(src0) &&
|
||||||
ggml_is_contiguous(src1) &&
|
ggml_is_contiguous(src1) &&
|
||||||
@ -746,11 +768,11 @@ void ggml_metal_graph_compute(
|
|||||||
initWithDevice:ctx->device transposeLeft:false transposeRight:true
|
initWithDevice:ctx->device transposeLeft:false transposeRight:true
|
||||||
resultRows:ne11 resultColumns:ne01 interiorColumns:ne00 alpha:1.0 beta:0.0];
|
resultRows:ne11 resultColumns:ne01 interiorColumns:ne00 alpha:1.0 beta:0.0];
|
||||||
|
|
||||||
// we need to do ne02 multiplications
|
// we need to do ne12 multiplications
|
||||||
// TODO: is there a way to do this in parallel - currently very slow ..
|
// TODO: is there a way to do this in parallel - currently very slow ..
|
||||||
// TODO: might be possible to offload part of the computation to ANE using Accelerate's CBLAS
|
// TODO: might be possible to offload part of the computation to ANE using Accelerate's CBLAS
|
||||||
for (int64_t i02 = 0; i02 < ne02; ++i02) {
|
for (int64_t i02 = 0; i02 < ne12; ++i02) {
|
||||||
size_t offs_src0_cur = offs_src0 + i02*nb02;
|
size_t offs_src0_cur = offs_src0 + i02/(ne12/ne02)*nb02; // gqa not used for now
|
||||||
size_t offs_src1_cur = offs_src1 + i02*nb12;
|
size_t offs_src1_cur = offs_src1 + i02*nb12;
|
||||||
size_t offs_dst_cur = offs_dst + i02*nb2;
|
size_t offs_dst_cur = offs_dst + i02*nb2;
|
||||||
|
|
||||||
@ -772,8 +794,6 @@ void ggml_metal_graph_compute(
|
|||||||
switch (src0t) {
|
switch (src0t) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
GGML_ASSERT(ne02 == ne12);
|
|
||||||
|
|
||||||
nth0 = 64;
|
nth0 = 64;
|
||||||
nth1 = 1;
|
nth1 = 1;
|
||||||
[encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32];
|
[encoder setComputePipelineState:ctx->pipeline_mul_mat_f16_f32];
|
||||||
@ -853,16 +873,18 @@ void ggml_metal_graph_compute(
|
|||||||
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
[encoder setBuffer:id_dst offset:offs_dst atIndex:2];
|
||||||
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
|
[encoder setBytes:&ne00 length:sizeof(ne00) atIndex:3];
|
||||||
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4];
|
[encoder setBytes:&ne01 length:sizeof(ne01) atIndex:4];
|
||||||
[encoder setBytes:&nb00 length:sizeof(nb00) atIndex:5];
|
[encoder setBytes:&ne02 length:sizeof(ne02) atIndex:5];
|
||||||
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:6];
|
[encoder setBytes:&nb00 length:sizeof(nb00) atIndex:6];
|
||||||
[encoder setBytes:&nb02 length:sizeof(nb02) atIndex:7];
|
[encoder setBytes:&nb01 length:sizeof(nb01) atIndex:7];
|
||||||
[encoder setBytes:&ne10 length:sizeof(ne10) atIndex:8];
|
[encoder setBytes:&nb02 length:sizeof(nb02) atIndex:8];
|
||||||
[encoder setBytes:&ne11 length:sizeof(ne11) atIndex:9];
|
[encoder setBytes:&ne10 length:sizeof(ne10) atIndex:9];
|
||||||
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:10];
|
[encoder setBytes:&ne11 length:sizeof(ne11) atIndex:10];
|
||||||
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:11];
|
[encoder setBytes:&ne12 length:sizeof(ne12) atIndex:11];
|
||||||
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:12];
|
[encoder setBytes:&nb10 length:sizeof(nb10) atIndex:12];
|
||||||
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:13];
|
[encoder setBytes:&nb11 length:sizeof(nb11) atIndex:13];
|
||||||
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:14];
|
[encoder setBytes:&nb12 length:sizeof(nb12) atIndex:14];
|
||||||
|
[encoder setBytes:&ne0 length:sizeof(ne0) atIndex:15];
|
||||||
|
[encoder setBytes:&ne1 length:sizeof(ne1) atIndex:16];
|
||||||
|
|
||||||
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
|
if (src0t == GGML_TYPE_Q4_0 || src0t == GGML_TYPE_Q4_1 ||
|
||||||
src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_Q4_K) {
|
src0t == GGML_TYPE_Q2_K || src0t == GGML_TYPE_Q4_K) {
|
||||||
|
@ -509,11 +509,13 @@ kernel void kernel_mul_mat_f16_f32(
|
|||||||
device float * dst,
|
device float * dst,
|
||||||
constant int64_t & ne00,
|
constant int64_t & ne00,
|
||||||
constant int64_t & ne01,
|
constant int64_t & ne01,
|
||||||
|
constant int64_t & ne02,
|
||||||
constant uint64_t & nb00,
|
constant uint64_t & nb00,
|
||||||
constant uint64_t & nb01,
|
constant uint64_t & nb01,
|
||||||
constant uint64_t & nb02,
|
constant uint64_t & nb02,
|
||||||
constant int64_t & ne10,
|
constant int64_t & ne10,
|
||||||
constant int64_t & ne11,
|
constant int64_t & ne11,
|
||||||
|
constant int64_t & ne12,
|
||||||
constant uint64_t & nb10,
|
constant uint64_t & nb10,
|
||||||
constant uint64_t & nb11,
|
constant uint64_t & nb11,
|
||||||
constant uint64_t & nb12,
|
constant uint64_t & nb12,
|
||||||
@ -529,7 +531,7 @@ kernel void kernel_mul_mat_f16_f32(
|
|||||||
const int64_t r1 = tgpig.y;
|
const int64_t r1 = tgpig.y;
|
||||||
const int64_t im = tgpig.z;
|
const int64_t im = tgpig.z;
|
||||||
|
|
||||||
device const half * x = (device const half *) (src0 + r0*nb01 + im*nb02);
|
device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
|
||||||
device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12);
|
device const float * y = (device const float *) (src1 + r1*nb11 + im*nb12);
|
||||||
|
|
||||||
sum[tpitg.x] = 0.0f;
|
sum[tpitg.x] = 0.0f;
|
||||||
@ -552,6 +554,7 @@ kernel void kernel_mul_mat_f16_f32(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
kernel void kernel_alibi_f32(
|
kernel void kernel_alibi_f32(
|
||||||
device const float * src0,
|
device const float * src0,
|
||||||
device float * dst,
|
device float * dst,
|
||||||
|
626
ggml.c
626
ggml.c
@ -195,8 +195,8 @@ typedef void * thread_ret_t;
|
|||||||
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
#define GGML_ALIGNED_MALLOC(size) _aligned_malloc(size, GGML_MEM_ALIGN)
|
||||||
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
#define GGML_ALIGNED_FREE(ptr) _aligned_free(ptr)
|
||||||
#else
|
#else
|
||||||
inline static void* ggml_aligned_malloc(size_t size) {
|
inline static void * ggml_aligned_malloc(size_t size) {
|
||||||
void* aligned_memory = NULL;
|
void * aligned_memory = NULL;
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
int result = posix_memalign(&aligned_memory, getpagesize(), size);
|
int result = posix_memalign(&aligned_memory, getpagesize(), size);
|
||||||
#else
|
#else
|
||||||
@ -3810,7 +3810,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
|
|||||||
"CROSS_ENTROPY_LOSS_BACK",
|
"CROSS_ENTROPY_LOSS_BACK",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||||
|
|
||||||
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
||||||
"none",
|
"none",
|
||||||
@ -3882,7 +3882,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
|
|||||||
"cross_entropy_loss_back(x,y)",
|
"cross_entropy_loss_back(x,y)",
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(GGML_OP_COUNT == 59, "GGML_OP_COUNT != 59");
|
static_assert(GGML_OP_COUNT == 62, "GGML_OP_COUNT != 62");
|
||||||
|
|
||||||
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2");
|
||||||
|
|
||||||
@ -4109,7 +4109,7 @@ size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
|||||||
//
|
//
|
||||||
// is enough, but just in case, adding the second part
|
// is enough, but just in case, adding the second part
|
||||||
|
|
||||||
return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]);
|
return GGML_PAD(MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*GGML_TYPE_SIZE[tensor->type])/GGML_BLCK_SIZE[tensor->type]), GGML_MEM_ALIGN);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) {
|
||||||
@ -4252,7 +4252,7 @@ static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) {
|
|||||||
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1) {
|
||||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||||
|
|
||||||
return
|
return
|
||||||
@ -4556,10 +4556,12 @@ static struct ggml_object * ggml_new_object(struct ggml_context * ctx, enum ggml
|
|||||||
|
|
||||||
static struct ggml_tensor * ggml_new_tensor_impl(
|
static struct ggml_tensor * ggml_new_tensor_impl(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
int n_dims,
|
int n_dims,
|
||||||
const int64_t* ne,
|
const int64_t * ne,
|
||||||
void* data) {
|
void * data) {
|
||||||
|
|
||||||
|
assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS);
|
||||||
|
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
|
|
||||||
@ -4599,7 +4601,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
/*.ne =*/ { 1, 1, 1, 1 },
|
/*.ne =*/ { 1, 1, 1, 1 },
|
||||||
/*.nb =*/ { 0, 0, 0, 0 },
|
/*.nb =*/ { 0, 0, 0, 0 },
|
||||||
/*.op =*/ GGML_OP_NONE,
|
/*.op =*/ GGML_OP_NONE,
|
||||||
/*.op_params =*/ {0},
|
/*.op_params =*/ { 0 },
|
||||||
/*.is_param =*/ false,
|
/*.is_param =*/ false,
|
||||||
/*.grad =*/ NULL,
|
/*.grad =*/ NULL,
|
||||||
/*.src =*/ { NULL },
|
/*.src =*/ { NULL },
|
||||||
@ -4631,6 +4633,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) {
|
||||||
|
GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings
|
||||||
assert(params_size <= GGML_MAX_OP_PARAMS);
|
assert(params_size <= GGML_MAX_OP_PARAMS);
|
||||||
memcpy(tensor->op_params, params, params_size);
|
memcpy(tensor->op_params, params, params_size);
|
||||||
}
|
}
|
||||||
@ -4647,22 +4650,22 @@ static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int3
|
|||||||
|
|
||||||
struct ggml_tensor * ggml_new_tensor(
|
struct ggml_tensor * ggml_new_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
int n_dims,
|
int n_dims,
|
||||||
const int64_t * ne) {
|
const int64_t * ne) {
|
||||||
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_tensor_1d(
|
struct ggml_tensor * ggml_new_tensor_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
int64_t ne0) {
|
int64_t ne0) {
|
||||||
return ggml_new_tensor(ctx, type, 1, &ne0);
|
return ggml_new_tensor(ctx, type, 1, &ne0);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_new_tensor_2d(
|
struct ggml_tensor * ggml_new_tensor_2d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
int64_t ne0,
|
int64_t ne0,
|
||||||
int64_t ne1) {
|
int64_t ne1) {
|
||||||
const int64_t ne[2] = { ne0, ne1 };
|
const int64_t ne[2] = { ne0, ne1 };
|
||||||
@ -4671,7 +4674,7 @@ struct ggml_tensor * ggml_new_tensor_2d(
|
|||||||
|
|
||||||
struct ggml_tensor * ggml_new_tensor_3d(
|
struct ggml_tensor * ggml_new_tensor_3d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
enum ggml_type type,
|
enum ggml_type type,
|
||||||
int64_t ne0,
|
int64_t ne0,
|
||||||
int64_t ne1,
|
int64_t ne1,
|
||||||
int64_t ne2) {
|
int64_t ne2) {
|
||||||
@ -4981,11 +4984,6 @@ enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
|||||||
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_set_unary_op(struct ggml_tensor * tensor, enum ggml_unary_op op) {
|
|
||||||
GGML_ASSERT(tensor->op = GGML_OP_UNARY);
|
|
||||||
ggml_set_op_params_i32(tensor, 0, (int32_t) op);
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
const char * ggml_get_name(const struct ggml_tensor * tensor) {
|
||||||
return tensor->name;
|
return tensor->name;
|
||||||
}
|
}
|
||||||
@ -6242,6 +6240,27 @@ struct ggml_tensor * ggml_reshape_4d(
|
|||||||
|
|
||||||
// ggml_view_1d
|
// ggml_view_1d
|
||||||
|
|
||||||
|
static struct ggml_tensor * ggml_view_tensor_offset(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int n_dims,
|
||||||
|
const int64_t * ne,
|
||||||
|
size_t offset) {
|
||||||
|
// don't calculate an offset from an unallocated tensor
|
||||||
|
void * data = NULL;
|
||||||
|
if (a->data != NULL) {
|
||||||
|
data = (char *) a->data + offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data);
|
||||||
|
|
||||||
|
ggml_format_name(result, "%s (view)", a->name);
|
||||||
|
|
||||||
|
ggml_set_op_params(result, &offset, sizeof(offset));
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_view_1d(
|
struct ggml_tensor * ggml_view_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -6254,10 +6273,7 @@ struct ggml_tensor * ggml_view_1d(
|
|||||||
is_node = true;
|
is_node = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, &ne0, (char *) a->data + offset);
|
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset);
|
||||||
ggml_format_name(result, "%s (view)", a->name);
|
|
||||||
|
|
||||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
|
||||||
|
|
||||||
result->op = GGML_OP_VIEW;
|
result->op = GGML_OP_VIEW;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6284,10 +6300,7 @@ struct ggml_tensor * ggml_view_2d(
|
|||||||
|
|
||||||
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 };
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, (char *) a->data + offset);
|
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset);
|
||||||
ggml_format_name(result, "%s (view)", a->name);
|
|
||||||
|
|
||||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
|
||||||
|
|
||||||
result->nb[1] = nb1;
|
result->nb[1] = nb1;
|
||||||
result->nb[2] = result->nb[1]*ne1;
|
result->nb[2] = result->nb[1]*ne1;
|
||||||
@ -6320,10 +6333,7 @@ struct ggml_tensor * ggml_view_3d(
|
|||||||
|
|
||||||
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 };
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, (char *) a->data + offset);
|
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset);
|
||||||
ggml_format_name(result, "%s (view)", a->name);
|
|
||||||
|
|
||||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
|
||||||
|
|
||||||
result->nb[1] = nb1;
|
result->nb[1] = nb1;
|
||||||
result->nb[2] = nb2;
|
result->nb[2] = nb2;
|
||||||
@ -6358,10 +6368,7 @@ struct ggml_tensor * ggml_view_4d(
|
|||||||
|
|
||||||
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 };
|
||||||
|
|
||||||
struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, (char *) a->data + offset);
|
struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset);
|
||||||
ggml_format_name(result, "%s (view)", a->name);
|
|
||||||
|
|
||||||
ggml_set_op_params(result, &offset, sizeof(offset));
|
|
||||||
|
|
||||||
result->nb[1] = nb1;
|
result->nb[1] = nb1;
|
||||||
result->nb[2] = nb2;
|
result->nb[2] = nb2;
|
||||||
@ -6432,7 +6439,7 @@ struct ggml_tensor * ggml_permute(
|
|||||||
result->src[0] = a;
|
result->src[0] = a;
|
||||||
|
|
||||||
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
int32_t params[] = { axis0, axis1, axis2, axis3 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -6558,7 +6565,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl(
|
|||||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_DIAG_MASK_INF;
|
result->op = GGML_OP_DIAG_MASK_INF;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6598,7 +6605,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl(
|
|||||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
int32_t params[] = { n_past, inplace ? 1 : 0 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_DIAG_MASK_ZERO;
|
result->op = GGML_OP_DIAG_MASK_ZERO;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6714,9 +6721,9 @@ static struct ggml_tensor * ggml_rope_impl(
|
|||||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
int32_t params[6] = { n_past, n_dims, mode, n_ctx };
|
||||||
memcpy(params + 4, &freq_base, sizeof(float));
|
memcpy(params + 4, &freq_base, sizeof(float));
|
||||||
memcpy(params + 5, &freq_scale, sizeof(float));
|
memcpy(params + 5, &freq_scale, sizeof(float));
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_ROPE;
|
result->op = GGML_OP_ROPE;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6745,6 +6752,18 @@ struct ggml_tensor * ggml_rope_inplace(
|
|||||||
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, 10000.0f, 1.0f, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_rope_custom(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int n_past,
|
||||||
|
int n_dims,
|
||||||
|
int mode,
|
||||||
|
int n_ctx,
|
||||||
|
float freq_base,
|
||||||
|
float freq_scale) {
|
||||||
|
return ggml_rope_impl(ctx, a, n_past, n_dims, mode, n_ctx, freq_base, freq_scale, false);
|
||||||
|
}
|
||||||
|
|
||||||
struct ggml_tensor * ggml_rope_custom_inplace(
|
struct ggml_tensor * ggml_rope_custom_inplace(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -6778,7 +6797,7 @@ struct ggml_tensor * ggml_rope_back(
|
|||||||
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
struct ggml_tensor * result = ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
int32_t params[] = { n_past, n_dims, mode, n_ctx };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_ROPE_BACK;
|
result->op = GGML_OP_ROPE_BACK;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6809,7 +6828,7 @@ struct ggml_tensor * ggml_alibi(
|
|||||||
|
|
||||||
int32_t op_params[3] = { n_past, n_head };
|
int32_t op_params[3] = { n_past, n_head };
|
||||||
memcpy(op_params + 2, &bias_max, sizeof(float));
|
memcpy(op_params + 2, &bias_max, sizeof(float));
|
||||||
ggml_set_op_params(result, &op_params, sizeof(op_params));
|
ggml_set_op_params(result, op_params, sizeof(op_params));
|
||||||
|
|
||||||
result->op = GGML_OP_ALIBI;
|
result->op = GGML_OP_ALIBI;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6836,7 +6855,7 @@ struct ggml_tensor * ggml_clamp(
|
|||||||
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
struct ggml_tensor * result = ggml_view_tensor(ctx, a);
|
||||||
|
|
||||||
float params[] = { min, max };
|
float params[] = { min, max };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_CLAMP;
|
result->op = GGML_OP_CLAMP;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6871,10 +6890,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|||||||
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
|
ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
|
||||||
a->ne[2], 1, 1,
|
a->ne[2], 1, 1,
|
||||||
};
|
};
|
||||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||||
|
|
||||||
int32_t params[] = { s0, p0, d0 };
|
int32_t params[] = { s0, p0, d0 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_CONV_1D;
|
result->op = GGML_OP_CONV_1D;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6886,10 +6905,10 @@ GGML_API struct ggml_tensor * ggml_conv_1d(
|
|||||||
|
|
||||||
// ggml_conv_2d
|
// ggml_conv_2d
|
||||||
|
|
||||||
struct ggml_tensor* ggml_conv_2d(
|
struct ggml_tensor * ggml_conv_2d(
|
||||||
struct ggml_context* ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
int s0,
|
int s0,
|
||||||
int s1,
|
int s1,
|
||||||
int p0,
|
int p0,
|
||||||
@ -6910,10 +6929,10 @@ struct ggml_tensor* ggml_conv_2d(
|
|||||||
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
|
ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
|
||||||
a->ne[3], b->ne[3],
|
a->ne[3], b->ne[3],
|
||||||
};
|
};
|
||||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
int32_t params[] = { s0, s1, p0, p1, d0, d1 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_CONV_2D;
|
result->op = GGML_OP_CONV_2D;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6926,7 +6945,7 @@ struct ggml_tensor* ggml_conv_2d(
|
|||||||
|
|
||||||
// ggml_conv_1d_ph
|
// ggml_conv_1d_ph
|
||||||
|
|
||||||
struct ggml_tensor* ggml_conv_1d_ph(
|
struct ggml_tensor * ggml_conv_1d_ph(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
@ -6944,7 +6963,7 @@ static int64_t ggml_calc_pool_output_size(int64_t ins, int ks, int s, int p) {
|
|||||||
|
|
||||||
// ggml_pool_1d
|
// ggml_pool_1d
|
||||||
|
|
||||||
struct ggml_tensor* ggml_pool_1d(
|
struct ggml_tensor * ggml_pool_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
enum ggml_op_pool op,
|
enum ggml_op_pool op,
|
||||||
@ -6963,10 +6982,10 @@ struct ggml_tensor* ggml_pool_1d(
|
|||||||
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
ggml_calc_pool_output_size(a->ne[0], k0, s0, p0),
|
||||||
a->ne[1],
|
a->ne[1],
|
||||||
};
|
};
|
||||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
|
||||||
|
|
||||||
int32_t params[] = { op, k0, s0, p0 };
|
int32_t params[] = { op, k0, s0, p0 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_POOL_1D;
|
result->op = GGML_OP_POOL_1D;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -6977,7 +6996,7 @@ struct ggml_tensor* ggml_pool_1d(
|
|||||||
|
|
||||||
// ggml_pool_2d
|
// ggml_pool_2d
|
||||||
|
|
||||||
struct ggml_tensor* ggml_pool_2d(
|
struct ggml_tensor * ggml_pool_2d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
enum ggml_op_pool op,
|
enum ggml_op_pool op,
|
||||||
@ -7000,10 +7019,10 @@ struct ggml_tensor* ggml_pool_2d(
|
|||||||
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
ggml_calc_pool_output_size(a->ne[1], k1, s1, p1),
|
||||||
a->ne[2],
|
a->ne[2],
|
||||||
};
|
};
|
||||||
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||||
|
|
||||||
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
int32_t params[] = { op, k0, k1, s0, s1, p0, p1 };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_POOL_2D;
|
result->op = GGML_OP_POOL_2D;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -7171,7 +7190,7 @@ struct ggml_tensor * ggml_win_part(
|
|||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
|
||||||
|
|
||||||
int32_t params[] = { npx, npy, w };
|
int32_t params[] = { npx, npy, w };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_WIN_PART;
|
result->op = GGML_OP_WIN_PART;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -7201,7 +7220,7 @@ struct ggml_tensor * ggml_win_unpart(
|
|||||||
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 3, ne);
|
||||||
|
|
||||||
int32_t params[] = { w };
|
int32_t params[] = { w };
|
||||||
ggml_set_op_params(result, ¶ms, sizeof(params));
|
ggml_set_op_params(result, params, sizeof(params));
|
||||||
|
|
||||||
result->op = GGML_OP_WIN_UNPART;
|
result->op = GGML_OP_WIN_UNPART;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -7225,7 +7244,7 @@ static struct ggml_tensor * ggml_unary_impl(
|
|||||||
|
|
||||||
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
ggml_set_unary_op(result, op);
|
ggml_set_op_params_i32(result, 0, (int32_t) op);
|
||||||
|
|
||||||
result->op = GGML_OP_UNARY;
|
result->op = GGML_OP_UNARY;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
@ -7330,7 +7349,7 @@ struct ggml_tensor * ggml_map_binary_inplace_f32(
|
|||||||
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
return ggml_map_binary_impl_f32(ctx, a, b, fun, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_map_custom1
|
// ggml_map_custom1_f32
|
||||||
|
|
||||||
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
@ -7347,7 +7366,7 @@ static struct ggml_tensor * ggml_map_custom1_impl_f32(
|
|||||||
|
|
||||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||||
|
|
||||||
result->op = GGML_OP_MAP_CUSTOM1;
|
result->op = GGML_OP_MAP_CUSTOM1_F32;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
result->src[0] = a;
|
result->src[0] = a;
|
||||||
|
|
||||||
@ -7368,7 +7387,7 @@ struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
|||||||
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
return ggml_map_custom1_impl_f32(ctx, a, fun, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_map_custom2
|
// ggml_map_custom2_f32
|
||||||
|
|
||||||
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
@ -7386,7 +7405,7 @@ static struct ggml_tensor * ggml_map_custom2_impl_f32(
|
|||||||
|
|
||||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||||
|
|
||||||
result->op = GGML_OP_MAP_CUSTOM2;
|
result->op = GGML_OP_MAP_CUSTOM2_F32;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
result->src[0] = a;
|
result->src[0] = a;
|
||||||
result->src[1] = b;
|
result->src[1] = b;
|
||||||
@ -7410,7 +7429,7 @@ struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
|||||||
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
return ggml_map_custom2_impl_f32(ctx, a, b, fun, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_map_custom3
|
// ggml_map_custom3_f32
|
||||||
|
|
||||||
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
@ -7429,7 +7448,7 @@ static struct ggml_tensor * ggml_map_custom3_impl_f32(
|
|||||||
|
|
||||||
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
ggml_set_op_params(result, (const void *) &fun, sizeof(fun));
|
||||||
|
|
||||||
result->op = GGML_OP_MAP_CUSTOM3;
|
result->op = GGML_OP_MAP_CUSTOM3_F32;
|
||||||
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
result->src[0] = a;
|
result->src[0] = a;
|
||||||
result->src[1] = b;
|
result->src[1] = b;
|
||||||
@ -7456,6 +7475,190 @@ struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
|||||||
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
return ggml_map_custom3_impl_f32(ctx, a, b, c, fun, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_map_custom1
|
||||||
|
struct ggml_map_custom1_op_params {
|
||||||
|
ggml_custom1_op_t fun;
|
||||||
|
int n_tasks;
|
||||||
|
void * userdata;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ggml_tensor * ggml_map_custom1_impl(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
const ggml_custom1_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata,
|
||||||
|
bool inplace) {
|
||||||
|
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||||
|
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (!inplace && a->grad) {
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
struct ggml_map_custom1_op_params params = {
|
||||||
|
/*.fun =*/ fun,
|
||||||
|
/*.n_tasks =*/ n_tasks,
|
||||||
|
/*.userdata =*/ userdata
|
||||||
|
};
|
||||||
|
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||||
|
|
||||||
|
result->op = GGML_OP_MAP_CUSTOM1;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src[0] = a;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom1(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
const ggml_custom1_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata) {
|
||||||
|
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom1_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
const ggml_custom1_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata) {
|
||||||
|
return ggml_map_custom1_impl(ctx, a, fun, n_tasks, userdata, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_map_custom2
|
||||||
|
|
||||||
|
struct ggml_map_custom2_op_params {
|
||||||
|
ggml_custom2_op_t fun;
|
||||||
|
int n_tasks;
|
||||||
|
void * userdata;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ggml_tensor * ggml_map_custom2_impl(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
const ggml_custom2_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata,
|
||||||
|
bool inplace) {
|
||||||
|
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||||
|
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (!inplace && (a->grad || b->grad)) {
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
struct ggml_map_custom2_op_params params = {
|
||||||
|
/*.fun =*/ fun,
|
||||||
|
/*.n_tasks =*/ n_tasks,
|
||||||
|
/*.userdata =*/ userdata
|
||||||
|
};
|
||||||
|
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||||
|
|
||||||
|
result->op = GGML_OP_MAP_CUSTOM2;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src[0] = a;
|
||||||
|
result->src[1] = b;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom2(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
const ggml_custom2_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata) {
|
||||||
|
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom2_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
const ggml_custom2_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata) {
|
||||||
|
return ggml_map_custom2_impl(ctx, a, b, fun, n_tasks, userdata, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_map_custom3
|
||||||
|
|
||||||
|
struct ggml_map_custom3_op_params {
|
||||||
|
ggml_custom3_op_t fun;
|
||||||
|
int n_tasks;
|
||||||
|
void * userdata;
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct ggml_tensor * ggml_map_custom3_impl(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
const ggml_custom3_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata,
|
||||||
|
bool inplace) {
|
||||||
|
GGML_ASSERT(n_tasks == GGML_N_TASKS_MAX || n_tasks > 0);
|
||||||
|
|
||||||
|
bool is_node = false;
|
||||||
|
|
||||||
|
if (!inplace && (a->grad || b->grad || c->grad)) {
|
||||||
|
is_node = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
|
||||||
|
|
||||||
|
struct ggml_map_custom3_op_params params = {
|
||||||
|
/*.fun =*/ fun,
|
||||||
|
/*.n_tasks =*/ n_tasks,
|
||||||
|
/*.userdata =*/ userdata
|
||||||
|
};
|
||||||
|
ggml_set_op_params(result, (const void *) ¶ms, sizeof(params));
|
||||||
|
|
||||||
|
result->op = GGML_OP_MAP_CUSTOM3;
|
||||||
|
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
|
||||||
|
result->src[0] = a;
|
||||||
|
result->src[1] = b;
|
||||||
|
result->src[2] = c;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom3(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
const ggml_custom3_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata) {
|
||||||
|
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_tensor * ggml_map_custom3_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
const ggml_custom3_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata) {
|
||||||
|
return ggml_map_custom3_impl(ctx, a, b, c, fun, n_tasks, userdata, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// ggml_cross_entropy_loss
|
// ggml_cross_entropy_loss
|
||||||
|
|
||||||
struct ggml_tensor * ggml_cross_entropy_loss(
|
struct ggml_tensor * ggml_cross_entropy_loss(
|
||||||
@ -9264,8 +9467,8 @@ static void ggml_compute_forward_sum_rows_f32(
|
|||||||
for (int64_t i3 = 0; i3 < ne03; i3++) {
|
for (int64_t i3 = 0; i3 < ne03; i3++) {
|
||||||
for (int64_t i2 = 0; i2 < ne02; i2++) {
|
for (int64_t i2 = 0; i2 < ne02; i2++) {
|
||||||
for (int64_t i1 = 0; i1 < ne01; i1++) {
|
for (int64_t i1 = 0; i1 < ne01; i1++) {
|
||||||
float* src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
float * src_row = (float *) ((char *) src0->data + i1*nb01 + i2*nb02 + i3*nb03);
|
||||||
float* dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
float * dst_row = (float *) ((char *) dst->data + i1*nb1 + i2*nb2 + i3*nb3);
|
||||||
float row_sum = 0;
|
float row_sum = 0;
|
||||||
ggml_vec_sum_f32(ne00, &row_sum, src_row);
|
ggml_vec_sum_f32(ne00, &row_sum, src_row);
|
||||||
dst_row[0] = row_sum;
|
dst_row[0] = row_sum;
|
||||||
@ -10527,72 +10730,96 @@ static void ggml_compute_forward_mul_mat(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// parallelize by src0 rows
|
|
||||||
const int64_t dr = (ne01 + nth - 1)/nth;
|
|
||||||
|
|
||||||
const int64_t ir10 = dr*ith;
|
|
||||||
const int64_t ir11 = MIN(ir10 + dr, ne01);
|
|
||||||
|
|
||||||
// src1 rows
|
|
||||||
const int64_t nr1 = ne11*ne12*ne13;
|
|
||||||
|
|
||||||
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
const void * wdata = (src1->type == vec_dot_type) ? src1->data : params->wdata;
|
||||||
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
|
const size_t row_size = ne10*GGML_TYPE_SIZE[vec_dot_type]/GGML_BLCK_SIZE[vec_dot_type];
|
||||||
|
|
||||||
for (int64_t ir1 = 0; ir1 < nr1; ++ir1) {
|
const int64_t nr0 = ne01; // src0 rows
|
||||||
const int64_t i13 = (ir1/(ne12*ne11));
|
const int64_t nr1 = ne11*ne12*ne13; // src1 rows
|
||||||
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
|
||||||
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
|
||||||
|
|
||||||
const int64_t ir0 = (ir1/ne11)%(ne02*ne03);
|
//printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
|
||||||
const int64_t i03 = (ir0/(ne02));
|
|
||||||
// Hack for "Falcon multi-query-attention key stutter" / alternative to ggml_repeat2.
|
|
||||||
// See https://github.com/ggerganov/llama.cpp/issues/1602#issuecomment-1606087470:
|
|
||||||
// GG: this is likely the correct way to broadcast, though need some more thought
|
|
||||||
// therefore leaving the comments to remind us for now
|
|
||||||
const int64_t i02 = (i12 / (ne12 / ne02));
|
|
||||||
// Original from PR/224 (and also essential/correct for non-broadcast matmuls in Falcon)
|
|
||||||
// const int64_t i02 = (ir0 - i03*ne02);
|
|
||||||
|
|
||||||
const int64_t i1 = i11;
|
// distribute the thread work across the inner or outer loop based on which one is larger
|
||||||
const int64_t i2 = i12;
|
|
||||||
const int64_t i3 = i13;
|
|
||||||
|
|
||||||
const char * src0_row = (const char *) src0->data + ( 0 + i02*nb02 + i03*nb03 );
|
const int64_t nth0 = nr0 > nr1 ? nth : 1; // parallelize by src0 rows
|
||||||
|
const int64_t nth1 = nr0 > nr1 ? 1 : nth; // parallelize by src1 rows
|
||||||
|
|
||||||
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
const int64_t ith0 = ith % nth0;
|
||||||
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
const int64_t ith1 = ith / nth0;
|
||||||
// the original src1 data pointer, so we should index using the indices directly
|
|
||||||
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
|
||||||
const char * src1_col = (const char *) wdata +
|
|
||||||
(src1_cont || src1->type != vec_dot_type
|
|
||||||
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
|
||||||
: (i11*nb11 + i12*nb12 + i13*nb13));
|
|
||||||
|
|
||||||
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
const int64_t dr0 = (nr0 + nth0 - 1)/nth0;
|
||||||
|
const int64_t dr1 = (nr1 + nth1 - 1)/nth1;
|
||||||
|
|
||||||
for (int64_t ir = ir10; ir < ir11; ++ir) {
|
const int64_t ir010 = dr0*ith0;
|
||||||
vec_dot(ne00, &dst_col[ir], src0_row + ir*nb01, src1_col);
|
const int64_t ir011 = MIN(ir010 + dr0, nr0);
|
||||||
}
|
|
||||||
|
const int64_t ir110 = dr1*ith1;
|
||||||
|
const int64_t ir111 = MIN(ir110 + dr1, nr1);
|
||||||
|
|
||||||
|
//printf("ir010 = %6lld, ir011 = %6lld, ir110 = %6lld, ir111 = %6lld\n", ir010, ir011, ir110, ir111);
|
||||||
|
|
||||||
|
// threads with no work simply yield (not sure if it helps)
|
||||||
|
if (ir010 >= ir011 || ir110 >= ir111) {
|
||||||
|
sched_yield();
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
//int64_t t1 = ggml_time_us();
|
assert(ne12 % ne02 == 0);
|
||||||
//static int64_t acc = 0;
|
assert(ne13 % ne03 == 0);
|
||||||
//acc += t1 - t0;
|
|
||||||
//if (t1 - t0 > 10) {
|
|
||||||
// printf("\n");
|
|
||||||
// printf("ne00 = %5d, ne01 = %5d, ne02 = %5d, ne03 = %5d\n", ne00, ne01, ne02, ne03);
|
|
||||||
// printf("nb00 = %5d, nb01 = %5d, nb02 = %5d, nb03 = %5d\n", nb00, nb01, nb02, nb03);
|
|
||||||
// printf("ne10 = %5d, ne11 = %5d, ne12 = %5d, ne13 = %5d\n", ne10, ne11, ne12, ne13);
|
|
||||||
|
|
||||||
// printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX task %d/%d: %d us, acc = %d\n", ith, nth, (int) (t1 - t0), (int) acc);
|
// broadcast factors
|
||||||
//}
|
const int64_t r2 = ne12/ne02;
|
||||||
|
const int64_t r3 = ne13/ne03;
|
||||||
|
|
||||||
|
// block-tiling attempt
|
||||||
|
const int64_t blck_0 = 16;
|
||||||
|
const int64_t blck_1 = 16;
|
||||||
|
|
||||||
|
// attempt to reduce false-sharing (does not seem to make a difference)
|
||||||
|
float tmp[16];
|
||||||
|
|
||||||
|
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
||||||
|
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
||||||
|
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
|
||||||
|
const int64_t i13 = (ir1/(ne12*ne11));
|
||||||
|
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
||||||
|
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
||||||
|
|
||||||
|
// broadcast src0 into src1
|
||||||
|
const int64_t i03 = i13/r3;
|
||||||
|
const int64_t i02 = i12/r2;
|
||||||
|
|
||||||
|
const int64_t i1 = i11;
|
||||||
|
const int64_t i2 = i12;
|
||||||
|
const int64_t i3 = i13;
|
||||||
|
|
||||||
|
const char * src0_row = (const char *) src0->data + (0 + i02*nb02 + i03*nb03);
|
||||||
|
|
||||||
|
// desc: when src1 is not a contiguous memory block we have to calculate the offset using the strides
|
||||||
|
// if it is, then we have either copied the data to params->wdata and made it contiguous or we are using
|
||||||
|
// the original src1 data pointer, so we should index using the indices directly
|
||||||
|
// TODO: this is a bit of a hack, we should probably have a better way to handle this
|
||||||
|
const char * src1_col = (const char *) wdata +
|
||||||
|
(src1_cont || src1->type != vec_dot_type
|
||||||
|
? (i11 + i12*ne11 + i13*ne12*ne11)*row_size
|
||||||
|
: (i11*nb11 + i12*nb12 + i13*nb13));
|
||||||
|
|
||||||
|
float * dst_col = (float *) ((char *) dst->data + (i1*nb1 + i2*nb2 + i3*nb3));
|
||||||
|
|
||||||
|
//for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||||
|
// vec_dot(ne00, &dst_col[ir0], src0_row + ir0*nb01, src1_col);
|
||||||
|
//}
|
||||||
|
|
||||||
|
for (int64_t ir0 = iir0; ir0 < iir0 + blck_0 && ir0 < ir011; ++ir0) {
|
||||||
|
vec_dot(ne00, &tmp[ir0 - iir0], src0_row + ir0*nb01, src1_col);
|
||||||
|
}
|
||||||
|
memcpy(&dst_col[iir0], tmp, (MIN(iir0 + blck_0, ir011) - iir0)*sizeof(float));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ggml_compute_forward_out_prod
|
// ggml_compute_forward_out_prod
|
||||||
|
|
||||||
|
|
||||||
static void ggml_compute_forward_out_prod_f32(
|
static void ggml_compute_forward_out_prod_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
@ -12875,7 +13102,7 @@ static void ggml_compute_forward_pool_1d(
|
|||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
struct ggml_tensor * dst) {
|
||||||
|
|
||||||
const int32_t* opts = (const int32_t*)dst->op_params;
|
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||||
enum ggml_op_pool op = opts[0];
|
enum ggml_op_pool op = opts[0];
|
||||||
const int k0 = opts[1];
|
const int k0 = opts[1];
|
||||||
const int s0 = opts[2];
|
const int s0 = opts[2];
|
||||||
@ -14208,24 +14435,6 @@ static void ggml_compute_forward_map_custom1_f32(
|
|||||||
fun(dst, a);
|
fun(dst, a);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void ggml_compute_forward_map_custom1(
|
|
||||||
const struct ggml_compute_params * params,
|
|
||||||
const struct ggml_tensor * a,
|
|
||||||
struct ggml_tensor * dst,
|
|
||||||
const ggml_custom1_op_f32_t fun) {
|
|
||||||
switch (a->type) {
|
|
||||||
case GGML_TYPE_F32:
|
|
||||||
{
|
|
||||||
ggml_compute_forward_map_custom1_f32(params, a, dst, fun);
|
|
||||||
} break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(false);
|
|
||||||
} break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ggml_compute_forward_map_custom2
|
// ggml_compute_forward_map_custom2
|
||||||
|
|
||||||
static void ggml_compute_forward_map_custom2_f32(
|
static void ggml_compute_forward_map_custom2_f32(
|
||||||
@ -14244,24 +14453,6 @@ static void ggml_compute_forward_map_custom2_f32(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static void ggml_compute_forward_map_custom2(
|
|
||||||
const struct ggml_compute_params * params,
|
|
||||||
const struct ggml_tensor * a,
|
|
||||||
const struct ggml_tensor * b,
|
|
||||||
struct ggml_tensor * dst,
|
|
||||||
const ggml_custom2_op_f32_t fun) {
|
|
||||||
switch (a->type) {
|
|
||||||
case GGML_TYPE_F32:
|
|
||||||
{
|
|
||||||
ggml_compute_forward_map_custom2_f32(params, a, b, dst, fun);
|
|
||||||
} break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(false);
|
|
||||||
} break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// ggml_compute_forward_map_custom3
|
// ggml_compute_forward_map_custom3
|
||||||
|
|
||||||
static void ggml_compute_forward_map_custom3_f32(
|
static void ggml_compute_forward_map_custom3_f32(
|
||||||
@ -14280,24 +14471,52 @@ static void ggml_compute_forward_map_custom3_f32(
|
|||||||
fun(dst, a, b, c);
|
fun(dst, a, b, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_map_custom1
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom1(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) dst->op_params;
|
||||||
|
|
||||||
|
p->fun(dst, a, params->ith, params->nth, p->userdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_map_custom2
|
||||||
|
|
||||||
|
static void ggml_compute_forward_map_custom2(
|
||||||
|
const struct ggml_compute_params * params,
|
||||||
|
const struct ggml_tensor * a,
|
||||||
|
const struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * dst) {
|
||||||
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) dst->op_params;
|
||||||
|
|
||||||
|
p->fun(dst, a, b, params->ith, params->nth, p->userdata);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ggml_compute_forward_map_custom3
|
||||||
|
|
||||||
static void ggml_compute_forward_map_custom3(
|
static void ggml_compute_forward_map_custom3(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * a,
|
const struct ggml_tensor * a,
|
||||||
const struct ggml_tensor * b,
|
const struct ggml_tensor * b,
|
||||||
const struct ggml_tensor * c,
|
const struct ggml_tensor * c,
|
||||||
struct ggml_tensor * dst,
|
struct ggml_tensor * dst) {
|
||||||
const ggml_custom3_op_f32_t fun) {
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
switch (a->type) {
|
return;
|
||||||
case GGML_TYPE_F32:
|
|
||||||
{
|
|
||||||
ggml_compute_forward_map_custom3_f32(params, a, b, c, dst, fun);
|
|
||||||
} break;
|
|
||||||
default:
|
|
||||||
{
|
|
||||||
GGML_ASSERT(false);
|
|
||||||
} break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) dst->op_params;
|
||||||
|
|
||||||
|
p->fun(dst, a, b, c, params->ith, params->nth, p->userdata);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ggml_compute_forward_cross_entropy_loss
|
// ggml_compute_forward_cross_entropy_loss
|
||||||
@ -14819,25 +15038,40 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||||||
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
ggml_compute_forward_map_binary(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MAP_CUSTOM1:
|
case GGML_OP_MAP_CUSTOM1_F32:
|
||||||
{
|
{
|
||||||
ggml_custom1_op_f32_t fun;
|
ggml_custom1_op_f32_t fun;
|
||||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||||
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor, fun);
|
ggml_compute_forward_map_custom1_f32(params, tensor->src[0], tensor, fun);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GGML_OP_MAP_CUSTOM2_F32:
|
||||||
|
{
|
||||||
|
ggml_custom2_op_f32_t fun;
|
||||||
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||||
|
ggml_compute_forward_map_custom2_f32(params, tensor->src[0], tensor->src[1], tensor, fun);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GGML_OP_MAP_CUSTOM3_F32:
|
||||||
|
{
|
||||||
|
ggml_custom3_op_f32_t fun;
|
||||||
|
memcpy(&fun, tensor->op_params, sizeof(fun));
|
||||||
|
ggml_compute_forward_map_custom3_f32(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
|
{
|
||||||
|
ggml_compute_forward_map_custom1(params, tensor->src[0], tensor);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MAP_CUSTOM2:
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
{
|
{
|
||||||
ggml_custom2_op_f32_t fun;
|
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor);
|
||||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
||||||
ggml_compute_forward_map_custom2(params, tensor->src[0], tensor->src[1], tensor, fun);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case GGML_OP_MAP_CUSTOM3:
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
{
|
{
|
||||||
ggml_custom3_op_f32_t fun;
|
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor);
|
||||||
memcpy(&fun, tensor->op_params, sizeof(fun));
|
|
||||||
ggml_compute_forward_map_custom3(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor, fun);
|
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||||
@ -15645,6 +15879,9 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
|
|||||||
} break;
|
} break;
|
||||||
case GGML_OP_MAP_UNARY:
|
case GGML_OP_MAP_UNARY:
|
||||||
case GGML_OP_MAP_BINARY:
|
case GGML_OP_MAP_BINARY:
|
||||||
|
case GGML_OP_MAP_CUSTOM1_F32:
|
||||||
|
case GGML_OP_MAP_CUSTOM2_F32:
|
||||||
|
case GGML_OP_MAP_CUSTOM3_F32:
|
||||||
case GGML_OP_MAP_CUSTOM1:
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
case GGML_OP_MAP_CUSTOM2:
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
case GGML_OP_MAP_CUSTOM3:
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
@ -16430,12 +16667,39 @@ struct ggml_cplan ggml_graph_plan(struct ggml_cgraph * cgraph, int n_threads) {
|
|||||||
case GGML_OP_WIN_UNPART:
|
case GGML_OP_WIN_UNPART:
|
||||||
case GGML_OP_MAP_UNARY:
|
case GGML_OP_MAP_UNARY:
|
||||||
case GGML_OP_MAP_BINARY:
|
case GGML_OP_MAP_BINARY:
|
||||||
case GGML_OP_MAP_CUSTOM1:
|
case GGML_OP_MAP_CUSTOM1_F32:
|
||||||
case GGML_OP_MAP_CUSTOM2:
|
case GGML_OP_MAP_CUSTOM2_F32:
|
||||||
case GGML_OP_MAP_CUSTOM3:
|
case GGML_OP_MAP_CUSTOM3_F32:
|
||||||
{
|
{
|
||||||
n_tasks = 1;
|
n_tasks = 1;
|
||||||
} break;
|
} break;
|
||||||
|
case GGML_OP_MAP_CUSTOM1:
|
||||||
|
{
|
||||||
|
struct ggml_map_custom1_op_params * p = (struct ggml_map_custom1_op_params *) node->op_params;
|
||||||
|
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||||
|
n_tasks = n_threads;
|
||||||
|
} else {
|
||||||
|
n_tasks = MIN(p->n_tasks, n_threads);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_OP_MAP_CUSTOM2:
|
||||||
|
{
|
||||||
|
struct ggml_map_custom2_op_params * p = (struct ggml_map_custom2_op_params *) node->op_params;
|
||||||
|
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||||
|
n_tasks = n_threads;
|
||||||
|
} else {
|
||||||
|
n_tasks = MIN(p->n_tasks, n_threads);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
|
case GGML_OP_MAP_CUSTOM3:
|
||||||
|
{
|
||||||
|
struct ggml_map_custom3_op_params * p = (struct ggml_map_custom3_op_params *) node->op_params;
|
||||||
|
if (p->n_tasks == GGML_N_TASKS_MAX) {
|
||||||
|
n_tasks = n_threads;
|
||||||
|
} else {
|
||||||
|
n_tasks = MIN(p->n_tasks, n_threads);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case GGML_OP_CROSS_ENTROPY_LOSS:
|
case GGML_OP_CROSS_ENTROPY_LOSS:
|
||||||
{
|
{
|
||||||
n_tasks = n_threads;
|
n_tasks = n_threads;
|
||||||
|
158
ggml.h
158
ggml.h
@ -183,6 +183,15 @@
|
|||||||
# define GGML_API
|
# define GGML_API
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// TODO: support for clang
|
||||||
|
#ifdef __GNUC__
|
||||||
|
# define GGML_DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
|
||||||
|
#elif defined(_MSC_VER)
|
||||||
|
# define GGML_DEPRECATED(func, hint) __declspec(deprecated(hint)) func
|
||||||
|
#else
|
||||||
|
# define GGML_DEPRECATED(func, hint) func
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
@ -378,6 +387,10 @@ extern "C" {
|
|||||||
GGML_OP_MAP_UNARY,
|
GGML_OP_MAP_UNARY,
|
||||||
GGML_OP_MAP_BINARY,
|
GGML_OP_MAP_BINARY,
|
||||||
|
|
||||||
|
GGML_OP_MAP_CUSTOM1_F32,
|
||||||
|
GGML_OP_MAP_CUSTOM2_F32,
|
||||||
|
GGML_OP_MAP_CUSTOM3_F32,
|
||||||
|
|
||||||
GGML_OP_MAP_CUSTOM1,
|
GGML_OP_MAP_CUSTOM1,
|
||||||
GGML_OP_MAP_CUSTOM2,
|
GGML_OP_MAP_CUSTOM2,
|
||||||
GGML_OP_MAP_CUSTOM3,
|
GGML_OP_MAP_CUSTOM3,
|
||||||
@ -574,6 +587,8 @@ extern "C" {
|
|||||||
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
GGML_API bool ggml_is_contiguous(const struct ggml_tensor * tensor);
|
||||||
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
GGML_API bool ggml_is_permuted (const struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
GGML_API bool ggml_are_same_shape(const struct ggml_tensor * t0, const struct ggml_tensor * t1);
|
||||||
|
|
||||||
// use this to compute the memory overhead of a tensor
|
// use this to compute the memory overhead of a tensor
|
||||||
GGML_API size_t ggml_tensor_overhead(void);
|
GGML_API size_t ggml_tensor_overhead(void);
|
||||||
|
|
||||||
@ -1174,7 +1189,18 @@ extern "C" {
|
|||||||
int mode,
|
int mode,
|
||||||
int n_ctx);
|
int n_ctx);
|
||||||
|
|
||||||
// custom RoPE, in-place, returns view(a)
|
// custom RoPE
|
||||||
|
GGML_API struct ggml_tensor * ggml_rope_custom(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
int n_past,
|
||||||
|
int n_dims,
|
||||||
|
int mode,
|
||||||
|
int n_ctx,
|
||||||
|
float freq_base,
|
||||||
|
float freq_scale);
|
||||||
|
|
||||||
|
// in-place, returns view(a)
|
||||||
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -1233,7 +1259,7 @@ extern "C" {
|
|||||||
|
|
||||||
// conv_1d with padding = half
|
// conv_1d with padding = half
|
||||||
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
// alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
|
||||||
GGML_API struct ggml_tensor* ggml_conv_1d_ph(
|
GGML_API struct ggml_tensor * ggml_conv_1d_ph(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
@ -1246,7 +1272,7 @@ extern "C" {
|
|||||||
GGML_OP_POOL_COUNT,
|
GGML_OP_POOL_COUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
GGML_API struct ggml_tensor* ggml_pool_1d(
|
GGML_API struct ggml_tensor * ggml_pool_1d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
enum ggml_op_pool op,
|
enum ggml_op_pool op,
|
||||||
@ -1254,7 +1280,7 @@ extern "C" {
|
|||||||
int s0, // stride
|
int s0, // stride
|
||||||
int p0); // padding
|
int p0); // padding
|
||||||
|
|
||||||
GGML_API struct ggml_tensor* ggml_pool_2d(
|
GGML_API struct ggml_tensor * ggml_pool_2d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
enum ggml_op_pool op,
|
enum ggml_op_pool op,
|
||||||
@ -1308,15 +1334,6 @@ extern "C" {
|
|||||||
int h0,
|
int h0,
|
||||||
int w);
|
int w);
|
||||||
|
|
||||||
// custom operators
|
|
||||||
|
|
||||||
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
|
||||||
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
|
||||||
|
|
||||||
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
|
||||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
|
||||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_unary(
|
GGML_API struct ggml_tensor * ggml_unary(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -1327,63 +1344,138 @@ extern "C" {
|
|||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
enum ggml_unary_op op);
|
enum ggml_unary_op op);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
// custom operators
|
||||||
|
|
||||||
|
typedef void (*ggml_unary_op_f32_t) (const int, float *, const float *);
|
||||||
|
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);
|
||||||
|
|
||||||
|
typedef void (*ggml_custom1_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
|
||||||
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
ggml_unary_op_f32_t fun);
|
ggml_unary_op_f32_t fun),
|
||||||
|
"use ggml_map_custom1 instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_unary_inplace_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
ggml_unary_op_f32_t fun);
|
ggml_unary_op_f32_t fun),
|
||||||
|
"use ggml_map_custom1_inplace instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
ggml_binary_op_f32_t fun);
|
ggml_binary_op_f32_t fun),
|
||||||
|
"use ggml_map_custom2 instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_binary_inplace_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
ggml_binary_op_f32_t fun);
|
ggml_binary_op_f32_t fun),
|
||||||
|
"use ggml_map_custom2_inplace instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
ggml_custom1_op_f32_t fun);
|
ggml_custom1_op_f32_t fun),
|
||||||
|
"use ggml_map_custom1 instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom1_inplace_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
ggml_custom1_op_f32_t fun);
|
ggml_custom1_op_f32_t fun),
|
||||||
|
"use ggml_map_custom1_inplace instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
ggml_custom2_op_f32_t fun);
|
ggml_custom2_op_f32_t fun),
|
||||||
|
"use ggml_map_custom2 instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom2_inplace_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
ggml_custom2_op_f32_t fun);
|
ggml_custom2_op_f32_t fun),
|
||||||
|
"use ggml_map_custom2_inplace instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
struct ggml_tensor * c,
|
struct ggml_tensor * c,
|
||||||
ggml_custom3_op_f32_t fun);
|
ggml_custom3_op_f32_t fun),
|
||||||
|
"use ggml_map_custom3 instead");
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_map_custom3_inplace_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b,
|
struct ggml_tensor * b,
|
||||||
struct ggml_tensor * c,
|
struct ggml_tensor * c,
|
||||||
ggml_custom3_op_f32_t fun);
|
ggml_custom3_op_f32_t fun),
|
||||||
|
"use ggml_map_custom3_inplace instead");
|
||||||
|
|
||||||
|
// custom operators v2
|
||||||
|
|
||||||
|
typedef void (*ggml_custom1_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, int ith, int nth, void * userdata);
|
||||||
|
typedef void (*ggml_custom2_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, int ith, int nth, void * userdata);
|
||||||
|
typedef void (*ggml_custom3_op_t)(struct ggml_tensor * dst , const struct ggml_tensor * a, const struct ggml_tensor * b, const struct ggml_tensor * c, int ith, int nth, void * userdata);
|
||||||
|
|
||||||
|
#define GGML_N_TASKS_MAX -1
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom1(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
ggml_custom1_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom1_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
ggml_custom1_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom2(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
ggml_custom2_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom2_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
ggml_custom2_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom3(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
ggml_custom3_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_map_custom3_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
struct ggml_tensor * c,
|
||||||
|
ggml_custom3_op_t fun,
|
||||||
|
int n_tasks,
|
||||||
|
void * userdata);
|
||||||
|
|
||||||
// loss function
|
// loss function
|
||||||
|
|
||||||
|
@ -1,29 +1,25 @@
|
|||||||
# Grammar for subset of JSON - doesn't support full string or number syntax
|
root ::= object
|
||||||
|
value ::= object | array | string | number | ("true" | "false" | "null") ws
|
||||||
root ::= object
|
|
||||||
value ::= object | array | string | number | boolean | "null"
|
|
||||||
|
|
||||||
object ::=
|
object ::=
|
||||||
"{" ws (
|
"{" ws (
|
||||||
string ":" ws value
|
string ":" ws value
|
||||||
("," ws string ":" ws value)*
|
("," ws string ":" ws value)*
|
||||||
)? "}"
|
)? "}" ws
|
||||||
|
|
||||||
array ::=
|
array ::=
|
||||||
"[" ws (
|
"[" ws (
|
||||||
value
|
value
|
||||||
("," ws value)*
|
("," ws value)*
|
||||||
)? "]"
|
)? "]" ws
|
||||||
|
|
||||||
string ::=
|
string ::=
|
||||||
"\"" (
|
"\"" (
|
||||||
[^"\\] |
|
[^"\\] |
|
||||||
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) # escapes
|
||||||
)* "\"" ws
|
)* "\"" ws
|
||||||
|
|
||||||
# Only plain integers currently
|
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? ws
|
||||||
number ::= "-"? [0-9]+ ws
|
|
||||||
boolean ::= ("true" | "false") ws
|
|
||||||
|
|
||||||
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
# Optional space: by convention, applied in this grammar after literal chars when allowed
|
||||||
ws ::= ([ \t\n] ws)?
|
ws ::= ([ \t\n] ws)?
|
||||||
|
62
k_quants.c
62
k_quants.c
@ -39,6 +39,8 @@
|
|||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
||||||
|
|
||||||
|
#define MM256_SET_M128I(a, b) _mm256_insertf128_si256(_mm256_castsi128_si256(b), (a), 1)
|
||||||
|
|
||||||
//
|
//
|
||||||
// 2-6 bit quantization in super-blocks
|
// 2-6 bit quantization in super-blocks
|
||||||
//
|
//
|
||||||
@ -1353,7 +1355,7 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m256i all_scales = _mm256_cvtepi8_epi16(scales8);
|
const __m256i all_scales = _mm256_cvtepi8_epi16(scales8);
|
||||||
const __m128i l_scales = _mm256_extracti128_si256(all_scales, 0);
|
const __m128i l_scales = _mm256_extracti128_si256(all_scales, 0);
|
||||||
const __m128i h_scales = _mm256_extracti128_si256(all_scales, 1);
|
const __m128i h_scales = _mm256_extracti128_si256(all_scales, 1);
|
||||||
const __m256i scales[2] = {_mm256_set_m128i(l_scales, l_scales), _mm256_set_m128i(h_scales, h_scales)};
|
const __m256i scales[2] = {MM256_SET_M128I(l_scales, l_scales), MM256_SET_M128I(h_scales, h_scales)};
|
||||||
|
|
||||||
__m256i sumi = _mm256_setzero_si256();
|
__m256i sumi = _mm256_setzero_si256();
|
||||||
|
|
||||||
@ -1421,7 +1423,7 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m128i summs_1 = _mm_madd_epi16(mins_1, _mm_loadu_si128((const __m128i*)&y[i].bsums[8]));
|
const __m128i summs_1 = _mm_madd_epi16(mins_1, _mm_loadu_si128((const __m128i*)&y[i].bsums[8]));
|
||||||
|
|
||||||
// sumf += -dmin * summs in 32bits*8
|
// sumf += -dmin * summs in 32bits*8
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&dmin), _mm256_cvtepi32_ps(_mm256_set_m128i(summs_1, summs_0))), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&dmin), _mm256_cvtepi32_ps(MM256_SET_M128I(summs_1, summs_0))), acc);
|
||||||
|
|
||||||
const __m128i scales_0 = _mm_cvtepi8_epi16(scales16);
|
const __m128i scales_0 = _mm_cvtepi8_epi16(scales16);
|
||||||
const __m128i scales_1 = _mm_cvtepi8_epi16(_mm_unpackhi_epi64(scales16, scales16));
|
const __m128i scales_1 = _mm_cvtepi8_epi16(_mm_unpackhi_epi64(scales16, scales16));
|
||||||
@ -1493,7 +1495,7 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// sumf += dall * isum - dmin * summs in 32bits
|
// sumf += dall * isum - dmin * summs in 32bits
|
||||||
__m256i sumi = _mm256_set_m128i(sumi_1, sumi_0);
|
__m256i sumi = MM256_SET_M128I(sumi_1, sumi_0);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&dall), _mm256_cvtepi32_ps(sumi)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&dall), _mm256_cvtepi32_ps(sumi)), acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1644,8 +1646,8 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
summs += dmin * smin;
|
summs += dmin * smin;
|
||||||
|
|
||||||
const __m128i q2bits = _mm_loadu_si128((const __m128i*)q2);
|
const __m128i q2bits = _mm_loadu_si128((const __m128i*)q2);
|
||||||
const __m256i q2_0 = _mm256_and_si256(_mm256_set_m128i(_mm_srli_epi16(q2bits, 2), q2bits), m3);
|
const __m256i q2_0 = _mm256_and_si256(MM256_SET_M128I(_mm_srli_epi16(q2bits, 2), q2bits), m3);
|
||||||
const __m256i q2_1 = _mm256_and_si256(_mm256_set_m128i(_mm_srli_epi16(q2bits, 6), _mm_srli_epi16(q2bits, 4)), m3);
|
const __m256i q2_1 = _mm256_and_si256(MM256_SET_M128I(_mm_srli_epi16(q2bits, 6), _mm_srli_epi16(q2bits, 4)), m3);
|
||||||
|
|
||||||
const __m256i q8_0 = _mm256_loadu_si256((const __m256i*)(q8+ 0));
|
const __m256i q8_0 = _mm256_loadu_si256((const __m256i*)(q8+ 0));
|
||||||
const __m256i q8_1 = _mm256_loadu_si256((const __m256i*)(q8+32));
|
const __m256i q8_1 = _mm256_loadu_si256((const __m256i*)(q8+32));
|
||||||
@ -1709,10 +1711,10 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m128i p2 = _mm_maddubs_epi16(q2_2, _mm256_extractf128_si256(q8_1, 0));
|
const __m128i p2 = _mm_maddubs_epi16(q2_2, _mm256_extractf128_si256(q8_1, 0));
|
||||||
const __m128i p3 = _mm_maddubs_epi16(q2_3, _mm256_extractf128_si256(q8_1, 1));
|
const __m128i p3 = _mm_maddubs_epi16(q2_3, _mm256_extractf128_si256(q8_1, 1));
|
||||||
|
|
||||||
const __m256i p_0 = _mm256_set_m128i(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p0, p0)), _mm_cvtepi16_epi32(p0));
|
const __m256i p_0 = MM256_SET_M128I(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p0, p0)), _mm_cvtepi16_epi32(p0));
|
||||||
const __m256i p_1 = _mm256_set_m128i(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p1, p1)), _mm_cvtepi16_epi32(p1));
|
const __m256i p_1 = MM256_SET_M128I(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p1, p1)), _mm_cvtepi16_epi32(p1));
|
||||||
const __m256i p_2 = _mm256_set_m128i(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p2, p2)), _mm_cvtepi16_epi32(p2));
|
const __m256i p_2 = MM256_SET_M128I(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p2, p2)), _mm_cvtepi16_epi32(p2));
|
||||||
const __m256i p_3 = _mm256_set_m128i(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p3, p3)), _mm_cvtepi16_epi32(p3));
|
const __m256i p_3 = MM256_SET_M128I(_mm_cvtepi16_epi32(_mm_unpackhi_epi64(p3, p3)), _mm_cvtepi16_epi32(p3));
|
||||||
|
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d * db[0]), _mm256_cvtepi32_ps(p_0)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d * db[0]), _mm256_cvtepi32_ps(p_0)), acc);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d * db[1]), _mm256_cvtepi32_ps(p_1)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d * db[1]), _mm256_cvtepi32_ps(p_1)), acc);
|
||||||
@ -1917,7 +1919,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m256i all_scales = _mm256_cvtepi8_epi16(scales128);
|
const __m256i all_scales = _mm256_cvtepi8_epi16(scales128);
|
||||||
const __m128i l_scales = _mm256_extracti128_si256(all_scales, 0);
|
const __m128i l_scales = _mm256_extracti128_si256(all_scales, 0);
|
||||||
const __m128i h_scales = _mm256_extracti128_si256(all_scales, 1);
|
const __m128i h_scales = _mm256_extracti128_si256(all_scales, 1);
|
||||||
const __m256i scales[2] = {_mm256_set_m128i(l_scales, l_scales), _mm256_set_m128i(h_scales, h_scales)};
|
const __m256i scales[2] = {MM256_SET_M128I(l_scales, l_scales), MM256_SET_M128I(h_scales, h_scales)};
|
||||||
|
|
||||||
// high bit
|
// high bit
|
||||||
const __m256i hbits = _mm256_loadu_si256((const __m256i*)x[i].hmask);
|
const __m256i hbits = _mm256_loadu_si256((const __m256i*)x[i].hmask);
|
||||||
@ -2128,7 +2130,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
}
|
}
|
||||||
|
|
||||||
// multiply with block scale and accumulate
|
// multiply with block scale and accumulate
|
||||||
__m256i sumi = _mm256_set_m128i(sumi_1, sumi_0);
|
__m256i sumi = MM256_SET_M128I(sumi_1, sumi_0);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi)), acc);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -2303,13 +2305,13 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
aux16[0] = a & 0x0f0f;
|
aux16[0] = a & 0x0f0f;
|
||||||
aux16[1] = (a >> 4) & 0x0f0f;
|
aux16[1] = (a >> 4) & 0x0f0f;
|
||||||
|
|
||||||
const __m256i scale_0 = _mm256_set_m128i(_mm_set1_epi16(aux8[2] - 8), _mm_set1_epi16(aux8[0] - 8));
|
const __m256i scale_0 = MM256_SET_M128I(_mm_set1_epi16(aux8[2] - 8), _mm_set1_epi16(aux8[0] - 8));
|
||||||
const __m256i scale_1 = _mm256_set_m128i(_mm_set1_epi16(aux8[3] - 8), _mm_set1_epi16(aux8[1] - 8));
|
const __m256i scale_1 = MM256_SET_M128I(_mm_set1_epi16(aux8[3] - 8), _mm_set1_epi16(aux8[1] - 8));
|
||||||
|
|
||||||
memcpy(&aux64, x[i].hmask, 8);
|
memcpy(&aux64, x[i].hmask, 8);
|
||||||
|
|
||||||
const __m128i haux = _mm_set_epi64x(aux64 >> 1, aux64 >> 0);
|
const __m128i haux = _mm_set_epi64x(aux64 >> 1, aux64 >> 0);
|
||||||
__m256i q3h_0 = _mm256_set_m128i(_mm_srli_epi16(haux, 2), haux);
|
__m256i q3h_0 = MM256_SET_M128I(_mm_srli_epi16(haux, 2), haux);
|
||||||
__m256i q3h_1 = _mm256_srli_epi16(q3h_0, 4);
|
__m256i q3h_1 = _mm256_srli_epi16(q3h_0, 4);
|
||||||
q3h_0 = _mm256_slli_epi16(_mm256_andnot_si256(q3h_0, m1), 2);
|
q3h_0 = _mm256_slli_epi16(_mm256_andnot_si256(q3h_0, m1), 2);
|
||||||
q3h_1 = _mm256_slli_epi16(_mm256_andnot_si256(q3h_1, m1), 2);
|
q3h_1 = _mm256_slli_epi16(_mm256_andnot_si256(q3h_1, m1), 2);
|
||||||
@ -2318,7 +2320,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m128i q3bits = _mm_loadu_si128((const __m128i*)q3);
|
const __m128i q3bits = _mm_loadu_si128((const __m128i*)q3);
|
||||||
|
|
||||||
// prepare low and high bits
|
// prepare low and high bits
|
||||||
const __m256i q3aux = _mm256_set_m128i(_mm_srli_epi16(q3bits, 2), q3bits);
|
const __m256i q3aux = MM256_SET_M128I(_mm_srli_epi16(q3bits, 2), q3bits);
|
||||||
const __m256i q3l_0 = _mm256_and_si256(q3aux, m3);
|
const __m256i q3l_0 = _mm256_and_si256(q3aux, m3);
|
||||||
const __m256i q3l_1 = _mm256_and_si256(_mm256_srli_epi16(q3aux, 4), m3);
|
const __m256i q3l_1 = _mm256_and_si256(_mm256_srli_epi16(q3aux, 4), m3);
|
||||||
|
|
||||||
@ -2429,7 +2431,7 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
p16_0 = _mm_add_epi32(p16_0, p16_2);
|
p16_0 = _mm_add_epi32(p16_0, p16_2);
|
||||||
p16_1 = _mm_add_epi32(p16_1, p16_3);
|
p16_1 = _mm_add_epi32(p16_1, p16_3);
|
||||||
__m256i p16 = _mm256_set_m128i(p16_1, p16_0);
|
__m256i p16 = MM256_SET_M128I(p16_1, p16_0);
|
||||||
|
|
||||||
// multiply with block scale and accumulate
|
// multiply with block scale and accumulate
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(p16)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(p16)), acc);
|
||||||
@ -2620,7 +2622,7 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
acc_m = _mm_fmadd_ps(_mm_set1_ps(dmin), _mm_cvtepi32_ps(prod), acc_m);
|
acc_m = _mm_fmadd_ps(_mm_set1_ps(dmin), _mm_cvtepi32_ps(prod), acc_m);
|
||||||
|
|
||||||
const __m128i sc128 = _mm256_extracti128_si256(mins_and_scales, 0);
|
const __m128i sc128 = _mm256_extracti128_si256(mins_and_scales, 0);
|
||||||
const __m256i scales = _mm256_set_m128i(sc128, sc128);
|
const __m256i scales = MM256_SET_M128I(sc128, sc128);
|
||||||
|
|
||||||
__m256i sumi = _mm256_setzero_si256();
|
__m256i sumi = _mm256_setzero_si256();
|
||||||
|
|
||||||
@ -2727,7 +2729,7 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
}
|
}
|
||||||
|
|
||||||
__m256 vd = _mm256_set1_ps(d);
|
__m256 vd = _mm256_set1_ps(d);
|
||||||
__m256i sumi = _mm256_set_m128i(sumi_1, sumi_0);
|
__m256i sumi = MM256_SET_M128I(sumi_1, sumi_0);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(sumi)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(sumi)), acc);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -2968,11 +2970,11 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
const __m128i p32_0 = _mm_madd_epi16(_mm_set1_epi16(scales[0]), p16_0);
|
const __m128i p32_0 = _mm_madd_epi16(_mm_set1_epi16(scales[0]), p16_0);
|
||||||
const __m128i p32_1 = _mm_madd_epi16(_mm_set1_epi16(scales[0]), p16_1);
|
const __m128i p32_1 = _mm_madd_epi16(_mm_set1_epi16(scales[0]), p16_1);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(_mm256_set_m128i(p32_1, p32_0))), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(MM256_SET_M128I(p32_1, p32_0))), acc);
|
||||||
|
|
||||||
const __m128i p32_2 = _mm_madd_epi16(_mm_set1_epi16(scales[1]), p16_2);
|
const __m128i p32_2 = _mm_madd_epi16(_mm_set1_epi16(scales[1]), p16_2);
|
||||||
const __m128i p32_3 = _mm_madd_epi16(_mm_set1_epi16(scales[1]), p16_3);
|
const __m128i p32_3 = _mm_madd_epi16(_mm_set1_epi16(scales[1]), p16_3);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(_mm256_set_m128i(p32_3, p32_2))), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(MM256_SET_M128I(p32_3, p32_2))), acc);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3160,7 +3162,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
summs += dmin * _mm_extract_epi32(hsum, 0);
|
summs += dmin * _mm_extract_epi32(hsum, 0);
|
||||||
|
|
||||||
const __m128i sc128 = _mm256_extracti128_si256(mins_and_scales, 0);
|
const __m128i sc128 = _mm256_extracti128_si256(mins_and_scales, 0);
|
||||||
const __m256i scales = _mm256_set_m128i(sc128, sc128);
|
const __m256i scales = MM256_SET_M128I(sc128, sc128);
|
||||||
|
|
||||||
const __m256i hbits = _mm256_loadu_si256((const __m256i*)x[i].qh);
|
const __m256i hbits = _mm256_loadu_si256((const __m256i*)x[i].qh);
|
||||||
__m256i hmask = mone;
|
__m256i hmask = mone;
|
||||||
@ -3299,7 +3301,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
}
|
}
|
||||||
|
|
||||||
__m256 vd = _mm256_set1_ps(d);
|
__m256 vd = _mm256_set1_ps(d);
|
||||||
__m256i sumi = _mm256_set_m128i(sumi_1, sumi_0);
|
__m256i sumi = MM256_SET_M128I(sumi_1, sumi_0);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(sumi)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(vd, _mm256_cvtepi32_ps(sumi)), acc);
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -3462,13 +3464,13 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
const __m256i q5bits = _mm256_loadu_si256((const __m256i*)q5);
|
const __m256i q5bits = _mm256_loadu_si256((const __m256i*)q5);
|
||||||
|
|
||||||
const __m256i scale_l = _mm256_set_m128i(_mm_set1_epi16(x[i].scales[1]), _mm_set1_epi16(x[i].scales[0]));
|
const __m256i scale_l = MM256_SET_M128I(_mm_set1_epi16(x[i].scales[1]), _mm_set1_epi16(x[i].scales[0]));
|
||||||
const __m256i scale_h = _mm256_set_m128i(_mm_set1_epi16(x[i].scales[3]), _mm_set1_epi16(x[i].scales[2]));
|
const __m256i scale_h = MM256_SET_M128I(_mm_set1_epi16(x[i].scales[3]), _mm_set1_epi16(x[i].scales[2]));
|
||||||
|
|
||||||
int64_t aux64;
|
int64_t aux64;
|
||||||
memcpy(&aux64, x[i].qh, 8);
|
memcpy(&aux64, x[i].qh, 8);
|
||||||
const __m128i haux128 = _mm_set_epi64x(aux64 >> 1, aux64);
|
const __m128i haux128 = _mm_set_epi64x(aux64 >> 1, aux64);
|
||||||
const __m256i haux256 = _mm256_set_m128i(_mm_srli_epi16(haux128, 2), haux128);
|
const __m256i haux256 = MM256_SET_M128I(_mm_srli_epi16(haux128, 2), haux128);
|
||||||
|
|
||||||
const __m256i q5h_0 = _mm256_slli_epi16(_mm256_andnot_si256(haux256, mone), 4);
|
const __m256i q5h_0 = _mm256_slli_epi16(_mm256_andnot_si256(haux256, mone), 4);
|
||||||
const __m256i q5h_1 = _mm256_slli_epi16(_mm256_andnot_si256(_mm256_srli_epi16(haux256, 4), mone), 4);
|
const __m256i q5h_1 = _mm256_slli_epi16(_mm256_andnot_si256(_mm256_srli_epi16(haux256, 4), mone), 4);
|
||||||
@ -3543,7 +3545,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m128i dot_0 = _mm_sub_epi32(_mm_add_epi32(p16_0, p16_2), _mm_add_epi32(s16_0, s16_2));
|
const __m128i dot_0 = _mm_sub_epi32(_mm_add_epi32(p16_0, p16_2), _mm_add_epi32(s16_0, s16_2));
|
||||||
const __m128i dot_1 = _mm_sub_epi32(_mm_add_epi32(p16_1, p16_3), _mm_add_epi32(s16_1, s16_3));
|
const __m128i dot_1 = _mm_sub_epi32(_mm_add_epi32(p16_1, p16_3), _mm_add_epi32(s16_1, s16_3));
|
||||||
|
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(_mm256_set_m128i(dot_1, dot_0))), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(MM256_SET_M128I(dot_1, dot_0))), acc);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3925,7 +3927,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
__m256i sumi = _mm256_set_m128i(sumi_1, sumi_0);
|
__m256i sumi = MM256_SET_M128I(sumi_1, sumi_0);
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi)), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(sumi)), acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4083,8 +4085,8 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
const __m256i q4bits1 = _mm256_loadu_si256((const __m256i*)q4);
|
const __m256i q4bits1 = _mm256_loadu_si256((const __m256i*)q4);
|
||||||
const __m128i q4bitsH = _mm_loadu_si128((const __m128i*)qh);
|
const __m128i q4bitsH = _mm_loadu_si128((const __m128i*)qh);
|
||||||
|
|
||||||
const __m256i q4h_0 = _mm256_slli_epi16(_mm256_and_si256(_mm256_set_m128i(_mm_srli_epi16(q4bitsH, 2), q4bitsH), m2), 4);
|
const __m256i q4h_0 = _mm256_slli_epi16(_mm256_and_si256(MM256_SET_M128I(_mm_srli_epi16(q4bitsH, 2), q4bitsH), m2), 4);
|
||||||
const __m256i q4h_1 = _mm256_slli_epi16(_mm256_and_si256(_mm256_set_m128i(_mm_srli_epi16(q4bitsH, 6), _mm_srli_epi16(q4bitsH, 4)), m2), 4);
|
const __m256i q4h_1 = _mm256_slli_epi16(_mm256_and_si256(MM256_SET_M128I(_mm_srli_epi16(q4bitsH, 6), _mm_srli_epi16(q4bitsH, 4)), m2), 4);
|
||||||
|
|
||||||
const __m256i q4_0 = _mm256_or_si256(_mm256_and_si256(q4bits1, m4), q4h_0);
|
const __m256i q4_0 = _mm256_or_si256(_mm256_and_si256(q4bits1, m4), q4h_0);
|
||||||
const __m256i q4_1 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi16(q4bits1, 4), m4), q4h_1);
|
const __m256i q4_1 = _mm256_or_si256(_mm256_and_si256(_mm256_srli_epi16(q4bits1, 4), m4), q4h_1);
|
||||||
@ -4177,7 +4179,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
sumi_0 = _mm_add_epi32(sumi_0, _mm_add_epi32(p16_0, p16_2));
|
sumi_0 = _mm_add_epi32(sumi_0, _mm_add_epi32(p16_0, p16_2));
|
||||||
sumi_1 = _mm_add_epi32(sumi_1, _mm_add_epi32(p16_1, p16_3));
|
sumi_1 = _mm_add_epi32(sumi_1, _mm_add_epi32(p16_1, p16_3));
|
||||||
|
|
||||||
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(_mm256_set_m128i(sumi_1, sumi_0))), acc);
|
acc = _mm256_add_ps(_mm256_mul_ps(_mm256_broadcast_ss(&d), _mm256_cvtepi32_ps(MM256_SET_M128I(sumi_1, sumi_0))), acc);
|
||||||
}
|
}
|
||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
42
llama-util.h
42
llama-util.h
@ -149,6 +149,46 @@ struct llama_file {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// llama_context_data
|
||||||
|
struct llama_data_context {
|
||||||
|
virtual void write(const void * src, size_t size) = 0;
|
||||||
|
virtual size_t get_size_written() = 0;
|
||||||
|
virtual ~llama_data_context() = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_data_buffer_context : llama_data_context {
|
||||||
|
uint8_t* ptr;
|
||||||
|
size_t size_written = 0;
|
||||||
|
|
||||||
|
llama_data_buffer_context(uint8_t * p) : ptr(p) {}
|
||||||
|
|
||||||
|
void write(const void * src, size_t size) override {
|
||||||
|
memcpy(ptr, src, size);
|
||||||
|
ptr += size;
|
||||||
|
size_written += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_size_written() override {
|
||||||
|
return size_written;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
struct llama_data_file_context : llama_data_context {
|
||||||
|
llama_file* file;
|
||||||
|
size_t size_written = 0;
|
||||||
|
|
||||||
|
llama_data_file_context(llama_file * f) : file(f) {}
|
||||||
|
|
||||||
|
void write(const void * src, size_t size) override {
|
||||||
|
file->write_raw(src, size);
|
||||||
|
size_written += size;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t get_size_written() override {
|
||||||
|
return size_written;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
static std::string llama_format_win_err(DWORD err) {
|
static std::string llama_format_win_err(DWORD err) {
|
||||||
LPSTR buf;
|
LPSTR buf;
|
||||||
@ -179,7 +219,7 @@ struct llama_mmap {
|
|||||||
// prefetch/readahead impairs performance on NUMA systems
|
// prefetch/readahead impairs performance on NUMA systems
|
||||||
if (numa) { prefetch = 0; }
|
if (numa) { prefetch = 0; }
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
if (prefetch) { flags |= MAP_POPULATE; }
|
if (prefetch >= file->size) { flags |= MAP_POPULATE; }
|
||||||
#endif
|
#endif
|
||||||
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
|
||||||
if (addr == MAP_FAILED) {
|
if (addr == MAP_FAILED) {
|
||||||
|
361
llama.cpp
361
llama.cpp
@ -56,8 +56,14 @@
|
|||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(GGML_USE_CUBLAS) && !defined(GGML_USE_METAL)
|
||||||
|
#include "ggml-alloc.h"
|
||||||
|
#define LLAMA_USE_ALLOCATOR
|
||||||
|
#else
|
||||||
#define LLAMA_USE_SCRATCH
|
#define LLAMA_USE_SCRATCH
|
||||||
#define LLAMA_MAX_SCRATCH_BUFFERS 16
|
#define LLAMA_MAX_SCRATCH_BUFFERS 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// available llama models
|
// available llama models
|
||||||
enum e_model {
|
enum e_model {
|
||||||
@ -327,13 +333,22 @@ struct llama_model {
|
|||||||
|
|
||||||
struct llama_context {
|
struct llama_context {
|
||||||
llama_context(const llama_model & model) : model(model), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
|
llama_context(const llama_model & model) : model(model), t_load_us(model.t_load_us), t_start_us(model.t_start_us) {}
|
||||||
#ifdef GGML_USE_METAL
|
|
||||||
~llama_context() {
|
~llama_context() {
|
||||||
|
if (model_owner) {
|
||||||
|
delete &model;
|
||||||
|
}
|
||||||
|
#ifdef GGML_USE_METAL
|
||||||
if (ctx_metal) {
|
if (ctx_metal) {
|
||||||
ggml_metal_free(ctx_metal);
|
ggml_metal_free(ctx_metal);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
if (alloc) {
|
||||||
|
ggml_allocr_free(alloc);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
std::mt19937 rng;
|
std::mt19937 rng;
|
||||||
|
|
||||||
bool has_evaluated_once = false;
|
bool has_evaluated_once = false;
|
||||||
@ -371,7 +386,17 @@ struct llama_context {
|
|||||||
// memory buffers used to evaluate the model
|
// memory buffers used to evaluate the model
|
||||||
// TODO: move in llama_state
|
// TODO: move in llama_state
|
||||||
llama_ctx_buffer buf_compute;
|
llama_ctx_buffer buf_compute;
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
llama_ctx_buffer buf_alloc;
|
||||||
|
ggml_allocr * alloc = NULL;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_SCRATCH
|
||||||
llama_ctx_buffer buf_scratch[LLAMA_MAX_SCRATCH_BUFFERS];
|
llama_ctx_buffer buf_scratch[LLAMA_MAX_SCRATCH_BUFFERS];
|
||||||
|
int buf_last = 0;
|
||||||
|
size_t buf_max_size[LLAMA_MAX_SCRATCH_BUFFERS] = { 0 };
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
ggml_metal_context * ctx_metal = NULL;
|
ggml_metal_context * ctx_metal = NULL;
|
||||||
@ -381,9 +406,6 @@ struct llama_context {
|
|||||||
ggml_mpi_context * ctx_mpi = NULL;
|
ggml_mpi_context * ctx_mpi = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
int buf_last = 0;
|
|
||||||
size_t buf_max_size[LLAMA_MAX_SCRATCH_BUFFERS] = { 0 };
|
|
||||||
|
|
||||||
void use_buf(struct ggml_context * ctx, int i) {
|
void use_buf(struct ggml_context * ctx, int i) {
|
||||||
#if defined(LLAMA_USE_SCRATCH)
|
#if defined(LLAMA_USE_SCRATCH)
|
||||||
size_t last_size = 0;
|
size_t last_size = 0;
|
||||||
@ -725,12 +747,12 @@ struct llama_model_loader {
|
|||||||
|
|
||||||
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
|
||||||
size_t data_size = 0;
|
size_t data_size = 0;
|
||||||
size_t prefetch_size = 0;
|
size_t prefetch_size = file_loader->file.size;
|
||||||
size_t lock_size = 0;
|
size_t lock_size = 0;
|
||||||
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
for (const llama_load_tensor & lt : tensors_map.tensors) {
|
||||||
data_size += lt.size;
|
data_size += lt.size;
|
||||||
if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
|
if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
|
||||||
prefetch_size += lt.size;
|
prefetch_size -= lt.size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -879,6 +901,7 @@ struct llama_context_params llama_context_default_params() {
|
|||||||
/*.progress_callback =*/ nullptr,
|
/*.progress_callback =*/ nullptr,
|
||||||
/*.progress_callback_user_data =*/ nullptr,
|
/*.progress_callback_user_data =*/ nullptr,
|
||||||
/*.low_vram =*/ false,
|
/*.low_vram =*/ false,
|
||||||
|
/*.mul_mat_q =*/ false,
|
||||||
/*.f16_kv =*/ true,
|
/*.f16_kv =*/ true,
|
||||||
/*.logits_all =*/ false,
|
/*.logits_all =*/ false,
|
||||||
/*.vocab_only =*/ false,
|
/*.vocab_only =*/ false,
|
||||||
@ -1006,6 +1029,7 @@ static void llama_model_load_internal(
|
|||||||
int n_gpu_layers,
|
int n_gpu_layers,
|
||||||
int main_gpu,
|
int main_gpu,
|
||||||
const float * tensor_split,
|
const float * tensor_split,
|
||||||
|
const bool mul_mat_q,
|
||||||
float rope_freq_base,
|
float rope_freq_base,
|
||||||
float rope_freq_scale,
|
float rope_freq_scale,
|
||||||
bool low_vram,
|
bool low_vram,
|
||||||
@ -1134,9 +1158,11 @@ static void llama_model_load_internal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
(void) main_gpu;
|
(void) main_gpu;
|
||||||
|
(void) mul_mat_q;
|
||||||
#if defined(GGML_USE_CUBLAS)
|
#if defined(GGML_USE_CUBLAS)
|
||||||
fprintf(stderr, "%s: using CUDA for GPU acceleration\n", __func__);
|
fprintf(stderr, "%s: using CUDA for GPU acceleration\n", __func__);
|
||||||
ggml_cuda_set_main_device(main_gpu);
|
ggml_cuda_set_main_device(main_gpu);
|
||||||
|
ggml_cuda_set_mul_mat_q(mul_mat_q);
|
||||||
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
|
#define LLAMA_BACKEND_OFFLOAD GGML_BACKEND_GPU
|
||||||
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT
|
#define LLAMA_BACKEND_OFFLOAD_SPLIT GGML_BACKEND_GPU_SPLIT
|
||||||
#elif defined(GGML_USE_CLBLAST)
|
#elif defined(GGML_USE_CLBLAST)
|
||||||
@ -1230,12 +1256,16 @@ static void llama_model_load_internal(
|
|||||||
const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1;
|
const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1;
|
||||||
|
|
||||||
// this is the total memory required to run the inference
|
// this is the total memory required to run the inference
|
||||||
const size_t mem_required =
|
size_t mem_required =
|
||||||
ctx_size +
|
ctx_size +
|
||||||
mmapped_size - vram_weights + // weights in VRAM not in memory
|
mmapped_size - vram_weights; // weights in VRAM not in memory
|
||||||
|
|
||||||
|
#ifndef LLAMA_USE_ALLOCATOR
|
||||||
|
mem_required +=
|
||||||
MEM_REQ_SCRATCH0(hparams.n_ctx).at(model.type) +
|
MEM_REQ_SCRATCH0(hparams.n_ctx).at(model.type) +
|
||||||
MEM_REQ_SCRATCH1().at(model.type) +
|
MEM_REQ_SCRATCH1().at(model.type) +
|
||||||
MEM_REQ_EVAL().at(model.type);
|
MEM_REQ_EVAL().at(model.type);
|
||||||
|
#endif
|
||||||
|
|
||||||
// this is the memory required by one llama_state
|
// this is the memory required by one llama_state
|
||||||
const size_t mem_required_state =
|
const size_t mem_required_state =
|
||||||
@ -1341,6 +1371,7 @@ static bool llama_model_load(
|
|||||||
int n_gpu_layers,
|
int n_gpu_layers,
|
||||||
int main_gpu,
|
int main_gpu,
|
||||||
const float * tensor_split,
|
const float * tensor_split,
|
||||||
|
const bool mul_mat_q,
|
||||||
float rope_freq_base,
|
float rope_freq_base,
|
||||||
float rope_freq_scale,
|
float rope_freq_scale,
|
||||||
bool low_vram,
|
bool low_vram,
|
||||||
@ -1351,7 +1382,8 @@ static bool llama_model_load(
|
|||||||
llama_progress_callback progress_callback,
|
llama_progress_callback progress_callback,
|
||||||
void *progress_callback_user_data) {
|
void *progress_callback_user_data) {
|
||||||
try {
|
try {
|
||||||
llama_model_load_internal(fname, model, vocab, n_ctx, n_batch, n_gqa, rms_norm_eps, n_gpu_layers, main_gpu, tensor_split, rope_freq_base, rope_freq_scale, low_vram, memory_type,
|
llama_model_load_internal(fname, model, vocab, n_ctx, n_batch, n_gqa, rms_norm_eps, n_gpu_layers,
|
||||||
|
main_gpu, tensor_split, mul_mat_q, rope_freq_base, rope_freq_scale, low_vram, memory_type,
|
||||||
use_mmap, use_mlock, vocab_only, progress_callback, progress_callback_user_data);
|
use_mmap, use_mlock, vocab_only, progress_callback, progress_callback_user_data);
|
||||||
return true;
|
return true;
|
||||||
} catch (const std::exception & err) {
|
} catch (const std::exception & err) {
|
||||||
@ -1360,32 +1392,15 @@ static bool llama_model_load(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// evaluate the transformer
|
static struct ggml_cgraph * llama_build_graph(
|
||||||
//
|
|
||||||
// - lctx: llama context
|
|
||||||
// - tokens: new batch of tokens to process
|
|
||||||
// - embd embeddings input
|
|
||||||
// - n_tokens number of tokens
|
|
||||||
// - n_past: the context size so far
|
|
||||||
// - n_threads: number of threads to use
|
|
||||||
//
|
|
||||||
static bool llama_eval_internal(
|
|
||||||
llama_context & lctx,
|
llama_context & lctx,
|
||||||
const llama_token * tokens,
|
const llama_token * tokens,
|
||||||
const float * embd,
|
const float * embd,
|
||||||
int n_tokens,
|
int n_tokens,
|
||||||
int n_past,
|
int n_past) {
|
||||||
int n_threads,
|
|
||||||
const char * cgraph_fname) {
|
|
||||||
|
|
||||||
LLAMA_ASSERT((!tokens && embd) || (tokens && !embd));
|
LLAMA_ASSERT((!tokens && embd) || (tokens && !embd));
|
||||||
|
|
||||||
#ifdef GGML_USE_MPI
|
|
||||||
ggml_mpi_eval_init(lctx.ctx_mpi, &n_tokens, &n_past, &n_threads);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int64_t t_start_us = ggml_time_us();
|
|
||||||
|
|
||||||
const int N = n_tokens;
|
const int N = n_tokens;
|
||||||
|
|
||||||
const auto & model = lctx.model;
|
const auto & model = lctx.model;
|
||||||
@ -1401,10 +1416,8 @@ static bool llama_eval_internal(
|
|||||||
const int64_t n_head = hparams.n_head;
|
const int64_t n_head = hparams.n_head;
|
||||||
const int64_t n_head_kv = hparams.n_head_kv;
|
const int64_t n_head_kv = hparams.n_head_kv;
|
||||||
const int64_t n_embd_head = hparams.n_embd_head();
|
const int64_t n_embd_head = hparams.n_embd_head();
|
||||||
const int64_t n_vocab = hparams.n_vocab;
|
|
||||||
const int64_t n_embd_gqa = hparams.n_embd_gqa();
|
const int64_t n_embd_gqa = hparams.n_embd_gqa();
|
||||||
|
|
||||||
|
|
||||||
LLAMA_ASSERT(n_embd_head == hparams.n_rot);
|
LLAMA_ASSERT(n_embd_head == hparams.n_rot);
|
||||||
|
|
||||||
const float freq_base = hparams.rope_freq_base;
|
const float freq_base = hparams.rope_freq_base;
|
||||||
@ -1416,26 +1429,35 @@ static bool llama_eval_internal(
|
|||||||
auto & mem_per_token = lctx.mem_per_token;
|
auto & mem_per_token = lctx.mem_per_token;
|
||||||
auto & buf_compute = lctx.buf_compute;
|
auto & buf_compute = lctx.buf_compute;
|
||||||
|
|
||||||
|
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/*.mem_size =*/ buf_compute.size,
|
/*.mem_size =*/ buf_compute.size,
|
||||||
/*.mem_buffer =*/ buf_compute.addr,
|
/*.mem_buffer =*/ buf_compute.addr,
|
||||||
/*.no_alloc =*/ false,
|
/*.no_alloc =*/ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
params.no_alloc = true;
|
||||||
|
#endif
|
||||||
|
|
||||||
struct ggml_context * ctx0 = ggml_init(params);
|
struct ggml_context * ctx0 = ggml_init(params);
|
||||||
|
|
||||||
ggml_cgraph * gf = ggml_new_graph(ctx0);
|
ggml_cgraph * gf = ggml_new_graph(ctx0);
|
||||||
|
|
||||||
// for big prompts, if BLAS is enabled, it is better to use only one thread
|
|
||||||
// otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
|
|
||||||
n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
|
|
||||||
if (tokens) {
|
if (tokens) {
|
||||||
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
struct ggml_tensor * inp_tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
ggml_allocr_alloc(lctx.alloc, inp_tokens);
|
||||||
|
if (!ggml_allocr_is_measure(lctx.alloc)) {
|
||||||
|
memcpy(inp_tokens->data, tokens, N*ggml_element_size(inp_tokens));
|
||||||
|
}
|
||||||
|
#else
|
||||||
memcpy(inp_tokens->data, tokens, N*ggml_element_size(inp_tokens));
|
memcpy(inp_tokens->data, tokens, N*ggml_element_size(inp_tokens));
|
||||||
|
#endif
|
||||||
ggml_set_name(inp_tokens, "inp_tokens");
|
ggml_set_name(inp_tokens, "inp_tokens");
|
||||||
|
|
||||||
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
|
inpL = ggml_get_rows(ctx0, model.tok_embeddings, inp_tokens);
|
||||||
@ -1445,7 +1467,15 @@ static bool llama_eval_internal(
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
|
inpL = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, N);
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
ggml_allocr_alloc(lctx.alloc, inpL);
|
||||||
|
if (!ggml_allocr_is_measure(lctx.alloc)) {
|
||||||
|
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
|
||||||
|
}
|
||||||
|
#else
|
||||||
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
|
memcpy(inpL->data, embd, N * n_embd * ggml_element_size(inpL));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
const int i_gpu_start = n_layer - n_gpu_layers;
|
const int i_gpu_start = n_layer - n_gpu_layers;
|
||||||
@ -1472,6 +1502,17 @@ static bool llama_eval_internal(
|
|||||||
}
|
}
|
||||||
#endif // GGML_USE_CUBLAS
|
#endif // GGML_USE_CUBLAS
|
||||||
|
|
||||||
|
struct ggml_tensor * KQ_scale = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, 1);
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
ggml_allocr_alloc(lctx.alloc, KQ_scale);
|
||||||
|
if (!ggml_allocr_is_measure(lctx.alloc)) {
|
||||||
|
ggml_set_f32(KQ_scale, 1.0f/sqrtf(float(n_embd)/n_head));
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ggml_set_f32(KQ_scale, 1.0f/sqrtf(float(n_embd)/n_head));
|
||||||
|
#endif
|
||||||
|
ggml_set_name(KQ_scale, "1/sqrt(n_embd_head)");
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
ggml_format_name(inpL, "layer_inp_%d", il);
|
ggml_format_name(inpL, "layer_inp_%d", il);
|
||||||
|
|
||||||
@ -1567,9 +1608,6 @@ static bool llama_eval_internal(
|
|||||||
ggml_set_name(KQ, "KQ");
|
ggml_set_name(KQ, "KQ");
|
||||||
|
|
||||||
// KQ_scaled = KQ / sqrt(n_embd_head)
|
// KQ_scaled = KQ / sqrt(n_embd_head)
|
||||||
struct ggml_tensor * KQ_scale = ggml_new_f32(ctx0, 1.0f/sqrtf(float(n_embd)/n_head));
|
|
||||||
ggml_set_name(KQ_scale, "1/sqrt(n_embd_head)");
|
|
||||||
|
|
||||||
// KQ_scaled shape [n_past + N, N, n_head, 1]
|
// KQ_scaled shape [n_past + N, N, n_head, 1]
|
||||||
struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, KQ_scale);
|
struct ggml_tensor * KQ_scaled = ggml_scale_inplace(ctx0, KQ, KQ_scale);
|
||||||
offload_func_kq(KQ_scaled);
|
offload_func_kq(KQ_scaled);
|
||||||
@ -1685,9 +1723,6 @@ static bool llama_eval_internal(
|
|||||||
|
|
||||||
lctx.use_buf(ctx0, 0);
|
lctx.use_buf(ctx0, 0);
|
||||||
|
|
||||||
// used at the end to optionally extract the embeddings
|
|
||||||
struct ggml_tensor * embeddings = NULL;
|
|
||||||
|
|
||||||
// norm
|
// norm
|
||||||
{
|
{
|
||||||
cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
|
||||||
@ -1698,8 +1733,6 @@ static bool llama_eval_internal(
|
|||||||
cur = ggml_mul(ctx0, cur, model.norm);
|
cur = ggml_mul(ctx0, cur, model.norm);
|
||||||
// offload_func_nr(cur); // TODO CPU + GPU mirrored backend
|
// offload_func_nr(cur); // TODO CPU + GPU mirrored backend
|
||||||
ggml_set_name(cur, "result_norm");
|
ggml_set_name(cur, "result_norm");
|
||||||
|
|
||||||
embeddings = cur;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// lm_head
|
// lm_head
|
||||||
@ -1711,23 +1744,103 @@ static bool llama_eval_internal(
|
|||||||
// logits -> probs
|
// logits -> probs
|
||||||
//cur = ggml_soft_max_inplace(ctx0, cur);
|
//cur = ggml_soft_max_inplace(ctx0, cur);
|
||||||
|
|
||||||
// run the computation
|
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
|
||||||
// fprintf(stderr, "graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf.n_nodes, gf.n_leafs);
|
if (mem_per_token == 0) {
|
||||||
|
mem_per_token = ggml_used_mem(ctx0)/N;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
printf("\n%s: used_mem: eval ctx %.3f MB, scratch %.3f MB %.3f MB, work buf %.3f MB, n_past = %d, N = %d\n", __func__,
|
||||||
|
ggml_used_mem(ctx0)/1024.0/1024.0,
|
||||||
|
lctx.get_buf_max_mem(0)/1024.0/1024.0,
|
||||||
|
lctx.get_buf_max_mem(1)/1024.0/1024.0,
|
||||||
|
lctx.work_buffer.size()/1024.0/1024.0,
|
||||||
|
n_past, N);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ggml_free(ctx0);
|
||||||
|
|
||||||
|
return gf;
|
||||||
|
}
|
||||||
|
|
||||||
|
// evaluate the transformer
|
||||||
|
//
|
||||||
|
// - lctx: llama context
|
||||||
|
// - tokens: new batch of tokens to process
|
||||||
|
// - embd embeddings input
|
||||||
|
// - n_tokens number of tokens
|
||||||
|
// - n_past: the context size so far
|
||||||
|
// - n_threads: number of threads to use
|
||||||
|
//
|
||||||
|
static bool llama_eval_internal(
|
||||||
|
llama_context & lctx,
|
||||||
|
const llama_token * tokens,
|
||||||
|
const float * embd,
|
||||||
|
int n_tokens,
|
||||||
|
int n_past,
|
||||||
|
int n_threads,
|
||||||
|
const char * cgraph_fname) {
|
||||||
|
|
||||||
|
LLAMA_ASSERT((!tokens && embd) || (tokens && !embd));
|
||||||
|
|
||||||
|
const int64_t t_start_us = ggml_time_us();
|
||||||
|
|
||||||
|
#ifdef GGML_USE_MPI
|
||||||
|
ggml_mpi_eval_init(lctx.ctx_mpi, &n_tokens, &n_past, &n_threads);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const int N = n_tokens;
|
||||||
|
|
||||||
|
const auto & model = lctx.model;
|
||||||
|
const auto & hparams = model.hparams;
|
||||||
|
|
||||||
|
const auto & kv_self = lctx.kv_self;
|
||||||
|
|
||||||
|
LLAMA_ASSERT(!!kv_self.ctx);
|
||||||
|
|
||||||
|
const int64_t n_embd = hparams.n_embd;
|
||||||
|
const int64_t n_vocab = hparams.n_vocab;
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
ggml_allocr_reset(lctx.alloc);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ggml_cgraph * gf = llama_build_graph(lctx, tokens, embd, n_tokens, n_past);
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
ggml_allocr_alloc_graph(lctx.alloc, gf);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// fprintf(stderr, "graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
|
||||||
|
|
||||||
|
// for big prompts, if BLAS is enabled, it is better to use only one thread
|
||||||
|
// otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance
|
||||||
|
n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads;
|
||||||
|
|
||||||
|
struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
|
||||||
|
struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2];
|
||||||
|
|
||||||
|
LLAMA_ASSERT(strcmp(res->name, "result_output") == 0);
|
||||||
|
LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0);
|
||||||
|
|
||||||
#if GGML_USE_MPI
|
#if GGML_USE_MPI
|
||||||
|
const int64_t n_layer = hparams.n_layer;
|
||||||
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
if (lctx.ctx_metal && N == 1) {
|
if (lctx.ctx_metal && N == 1) {
|
||||||
if (!ggml_metal_if_optimized(lctx.ctx_metal)) {
|
// TODO: disabled until #2413 is resolved
|
||||||
ggml_metal_graph_find_concurrency(lctx.ctx_metal, gf);
|
//if (!ggml_metal_if_optimized(lctx.ctx_metal)) {
|
||||||
}
|
// ggml_metal_graph_find_concurrency(lctx.ctx_metal, gf);
|
||||||
|
//}
|
||||||
ggml_metal_set_n_cb (lctx.ctx_metal, n_threads);
|
ggml_metal_set_n_cb (lctx.ctx_metal, n_threads);
|
||||||
ggml_metal_graph_compute(lctx.ctx_metal, gf);
|
ggml_metal_graph_compute(lctx.ctx_metal, gf);
|
||||||
ggml_metal_get_tensor (lctx.ctx_metal, cur);
|
ggml_metal_get_tensor (lctx.ctx_metal, res);
|
||||||
|
if (!lctx.embedding.empty()) {
|
||||||
|
ggml_metal_get_tensor(lctx.ctx_metal, embeddings);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
// IMPORTANT:
|
// IMPORTANT:
|
||||||
// Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
|
// Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla
|
||||||
@ -1758,8 +1871,6 @@ static bool llama_eval_internal(
|
|||||||
// update kv token count
|
// update kv token count
|
||||||
lctx.kv_self.n = n_past + N;
|
lctx.kv_self.n = n_past + N;
|
||||||
|
|
||||||
struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1];
|
|
||||||
|
|
||||||
if (cgraph_fname) {
|
if (cgraph_fname) {
|
||||||
ggml_graph_export(gf, cgraph_fname);
|
ggml_graph_export(gf, cgraph_fname);
|
||||||
}
|
}
|
||||||
@ -1797,21 +1908,6 @@ static bool llama_eval_internal(
|
|||||||
memcpy(embedding_out.data(), (float *) ggml_get_data(embeddings) + (n_embd*(N - 1)), sizeof(float)*n_embd);
|
memcpy(embedding_out.data(), (float *) ggml_get_data(embeddings) + (n_embd*(N - 1)), sizeof(float)*n_embd);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mem_per_token == 0) {
|
|
||||||
mem_per_token = ggml_used_mem(ctx0)/N;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
printf("\n%s: used_mem: eval ctx %.3f MB, scratch %.3f MB %.3f MB, work buf %.3f MB, n_past = %d, N = %d\n", __func__,
|
|
||||||
ggml_used_mem(ctx0)/1024.0/1024.0,
|
|
||||||
lctx.get_buf_max_mem(0)/1024.0/1024.0,
|
|
||||||
lctx.get_buf_max_mem(1)/1024.0/1024.0,
|
|
||||||
lctx.work_buffer.size()/1024.0/1024.0,
|
|
||||||
n_past, N);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
ggml_free(ctx0);
|
|
||||||
|
|
||||||
// measure the performance only for the single-token evals
|
// measure the performance only for the single-token evals
|
||||||
if (N == 1) {
|
if (N == 1) {
|
||||||
lctx.t_eval_us += ggml_time_us() - t_start_us;
|
lctx.t_eval_us += ggml_time_us() - t_start_us;
|
||||||
@ -1923,7 +2019,9 @@ struct llama_tokenizer {
|
|||||||
if (token == vocab_.token_to_id.end()) {
|
if (token == vocab_.token_to_id.end()) {
|
||||||
// output any symbols that did not form tokens as bytes.
|
// output any symbols that did not form tokens as bytes.
|
||||||
for (int j = 0; j < (int) symbol.n; ++j) {
|
for (int j = 0; j < (int) symbol.n; ++j) {
|
||||||
llama_vocab::id token_id = static_cast<uint8_t>(symbol.text[j]) + 3;
|
// NOTE: old version, before #2420 - not sure what are the implications of this
|
||||||
|
//llama_vocab::id token_id = static_cast<uint8_t>(symbol.text[j]) + 3;
|
||||||
|
llama_vocab::id token_id = vocab_.token_to_id.at(std::string(1, symbol.text[j]));
|
||||||
output.push_back(token_id);
|
output.push_back(token_id);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -3100,7 +3198,7 @@ struct llama_model * llama_load_model_from_file(
|
|||||||
ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
ggml_type memory_type = params.f16_kv ? GGML_TYPE_F16 : GGML_TYPE_F32;
|
||||||
|
|
||||||
if (!llama_model_load(path_model, *model, model->vocab, params.n_ctx, params.n_batch, params.n_gqa, params.rms_norm_eps, params.n_gpu_layers,
|
if (!llama_model_load(path_model, *model, model->vocab, params.n_ctx, params.n_batch, params.n_gqa, params.rms_norm_eps, params.n_gpu_layers,
|
||||||
params.main_gpu, params.tensor_split, params.rope_freq_base, params.rope_freq_scale,params.low_vram,
|
params.main_gpu, params.tensor_split, params.mul_mat_q, params.rope_freq_base, params.rope_freq_scale,params.low_vram,
|
||||||
memory_type, params.use_mmap, params.use_mlock, params.vocab_only, params.progress_callback,
|
memory_type, params.use_mmap, params.use_mlock, params.vocab_only, params.progress_callback,
|
||||||
params.progress_callback_user_data)) {
|
params.progress_callback_user_data)) {
|
||||||
delete model;
|
delete model;
|
||||||
@ -3177,10 +3275,47 @@ struct llama_context * llama_new_context_with_model(
|
|||||||
ctx->embedding.resize(hparams.n_embd);
|
ctx->embedding.resize(hparams.n_embd);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->buf_compute.resize(MEM_REQ_EVAL().at(ctx->model.type) + ggml_graph_overhead());
|
#ifdef LLAMA_USE_ALLOCATOR
|
||||||
|
{
|
||||||
|
static const size_t tensor_alignment = 32;
|
||||||
|
// the compute buffer is used to store the tensor and graph structs, while the allocator buffer is used for the tensor data
|
||||||
|
ctx->buf_compute.resize(ggml_tensor_overhead()*GGML_MAX_NODES + ggml_graph_overhead());
|
||||||
|
|
||||||
|
// create measure allocator
|
||||||
|
ctx->alloc = ggml_allocr_new_measure(tensor_alignment);
|
||||||
|
|
||||||
|
// build worst-case graph
|
||||||
|
int n_tokens = std::min((int)hparams.n_ctx, params.n_batch);
|
||||||
|
int n_past = hparams.n_ctx - n_tokens;
|
||||||
|
llama_token token = llama_token_bos(); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph
|
||||||
|
ggml_cgraph * gf = llama_build_graph(*ctx, &token, NULL, n_tokens, n_past);
|
||||||
|
|
||||||
|
// measure memory requirements for the graph
|
||||||
|
size_t alloc_size = ggml_allocr_alloc_graph(ctx->alloc, gf) + tensor_alignment;
|
||||||
|
|
||||||
|
fprintf(stderr, "%s: compute buffer total size = %7.2f MB\n", __func__, (ctx->buf_compute.size + alloc_size) / 1024.0 / 1024.0);
|
||||||
|
|
||||||
|
// debug - for comparison with scratch buffer
|
||||||
|
//size_t prev_req =
|
||||||
|
// MEM_REQ_SCRATCH0(hparams.n_ctx).at(ctx->model.type) +
|
||||||
|
// MEM_REQ_SCRATCH1().at(ctx->model.type) +
|
||||||
|
// MEM_REQ_EVAL().at(ctx->model.type);
|
||||||
|
//fprintf(stderr, "%s: (debug) equivalent with scratch buffer = %7.2f MB\n", __func__, prev_req / 1024.0 / 1024.0);
|
||||||
|
|
||||||
|
// recreate allocator with exact memory requirements
|
||||||
|
ggml_allocr_free(ctx->alloc);
|
||||||
|
|
||||||
|
ctx->buf_alloc.resize(alloc_size);
|
||||||
|
ctx->alloc = ggml_allocr_new(ctx->buf_alloc.addr, ctx->buf_alloc.size, tensor_alignment);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
ctx->buf_compute.resize(MEM_REQ_EVAL().at(ctx->model.type) + ggml_graph_overhead());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LLAMA_USE_SCRATCH
|
||||||
ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0(hparams.n_ctx).at(ctx->model.type));
|
ctx->buf_scratch[0].resize(MEM_REQ_SCRATCH0(hparams.n_ctx).at(ctx->model.type));
|
||||||
ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1().at(ctx->model.type));
|
ctx->buf_scratch[1].resize(MEM_REQ_SCRATCH1().at(ctx->model.type));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
#ifdef GGML_USE_METAL
|
||||||
@ -3250,9 +3385,6 @@ struct llama_context * llama_init_from_file(
|
|||||||
}
|
}
|
||||||
|
|
||||||
void llama_free(struct llama_context * ctx) {
|
void llama_free(struct llama_context * ctx) {
|
||||||
if (ctx->model_owner) {
|
|
||||||
delete &ctx->model;
|
|
||||||
}
|
|
||||||
delete ctx;
|
delete ctx;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3611,10 +3743,20 @@ size_t llama_get_state_size(const struct llama_context * ctx) {
|
|||||||
return s_total;
|
return s_total;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copies the state to the specified destination address
|
/** copy state data into either a buffer or file depending on the passed in context
|
||||||
size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
*
|
||||||
uint8_t * out = dst;
|
* file context:
|
||||||
|
* llama_file file("/path", "wb");
|
||||||
|
* llama_data_file_context data_ctx(&file);
|
||||||
|
* llama_copy_state_data(ctx, &data_ctx);
|
||||||
|
*
|
||||||
|
* buffer context:
|
||||||
|
* std::vector<uint8_t> buf(max_size, 0);
|
||||||
|
* llama_data_buffer_context data_ctx(&buf.data());
|
||||||
|
* llama_copy_state_data(ctx, &data_ctx);
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
void llama_copy_state_data_internal(struct llama_context * ctx, llama_data_context * data_ctx) {
|
||||||
// copy rng
|
// copy rng
|
||||||
{
|
{
|
||||||
std::stringstream rng_ss;
|
std::stringstream rng_ss;
|
||||||
@ -3626,8 +3768,8 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
|||||||
memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE);
|
memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE);
|
||||||
memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size());
|
memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size());
|
||||||
|
|
||||||
memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size);
|
data_ctx->write(&rng_size, sizeof(rng_size));
|
||||||
memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE;
|
data_ctx->write(&rng_buf[0], LLAMA_MAX_RNG_STATE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy logits
|
// copy logits
|
||||||
@ -3635,25 +3777,29 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
|||||||
const size_t logits_cap = ctx->logits.capacity();
|
const size_t logits_cap = ctx->logits.capacity();
|
||||||
const size_t logits_size = ctx->logits.size();
|
const size_t logits_size = ctx->logits.size();
|
||||||
|
|
||||||
memcpy(out, &logits_cap, sizeof(logits_cap)); out += sizeof(logits_cap);
|
data_ctx->write(&logits_cap, sizeof(logits_cap));
|
||||||
memcpy(out, &logits_size, sizeof(logits_size)); out += sizeof(logits_size);
|
data_ctx->write(&logits_size, sizeof(logits_size));
|
||||||
|
|
||||||
if (logits_size) {
|
if (logits_size) {
|
||||||
memcpy(out, ctx->logits.data(), logits_size * sizeof(float));
|
data_ctx->write(ctx->logits.data(), logits_size * sizeof(float));
|
||||||
}
|
}
|
||||||
|
|
||||||
out += logits_cap * sizeof(float);
|
// If there is a gap between the size and the capacity, write padding
|
||||||
|
size_t padding_size = (logits_cap - logits_size) * sizeof(float);
|
||||||
|
if (padding_size > 0) {
|
||||||
|
std::vector<uint8_t> padding(padding_size, 0); // Create a buffer filled with zeros
|
||||||
|
data_ctx->write(padding.data(), padding_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// copy embeddings
|
// copy embeddings
|
||||||
{
|
{
|
||||||
const size_t embedding_size = ctx->embedding.size();
|
const size_t embedding_size = ctx->embedding.size();
|
||||||
|
|
||||||
memcpy(out, &embedding_size, sizeof(embedding_size)); out += sizeof(embedding_size);
|
data_ctx->write(&embedding_size, sizeof(embedding_size));
|
||||||
|
|
||||||
if (embedding_size) {
|
if (embedding_size) {
|
||||||
memcpy(out, ctx->embedding.data(), embedding_size * sizeof(float));
|
data_ctx->write(ctx->embedding.data(), embedding_size * sizeof(float));
|
||||||
out += embedding_size * sizeof(float);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3662,14 +3808,14 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
|||||||
const auto & kv_self = ctx->kv_self;
|
const auto & kv_self = ctx->kv_self;
|
||||||
const auto & hparams = ctx->model.hparams;
|
const auto & hparams = ctx->model.hparams;
|
||||||
const int n_layer = hparams.n_layer;
|
const int n_layer = hparams.n_layer;
|
||||||
const int n_embd = hparams.n_embd;
|
const int n_embd = hparams.n_embd_gqa();
|
||||||
const int n_ctx = hparams.n_ctx;
|
const int n_ctx = hparams.n_ctx;
|
||||||
|
|
||||||
const size_t kv_size = kv_self.buf.size;
|
const size_t kv_size = kv_self.buf.size;
|
||||||
const int kv_ntok = llama_get_kv_cache_token_count(ctx);
|
const int kv_ntok = llama_get_kv_cache_token_count(ctx);
|
||||||
|
|
||||||
memcpy(out, &kv_size, sizeof(kv_size)); out += sizeof(kv_size);
|
data_ctx->write(&kv_size, sizeof(kv_size));
|
||||||
memcpy(out, &kv_ntok, sizeof(kv_ntok)); out += sizeof(kv_ntok);
|
data_ctx->write(&kv_ntok, sizeof(kv_ntok));
|
||||||
|
|
||||||
if (kv_size) {
|
if (kv_size) {
|
||||||
const size_t elt_size = ggml_element_size(kv_self.k);
|
const size_t elt_size = ggml_element_size(kv_self.k);
|
||||||
@ -3678,12 +3824,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
|||||||
ggml_cgraph gf{};
|
ggml_cgraph gf{};
|
||||||
|
|
||||||
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer);
|
ggml_tensor * kout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.k->type, n_embd, kv_ntok, n_layer);
|
||||||
kout3d->data = out;
|
std::vector<uint8_t> kout3d_data(ggml_nbytes(kout3d), 0);
|
||||||
out += ggml_nbytes(kout3d);
|
kout3d->data = kout3d_data.data();
|
||||||
|
|
||||||
ggml_tensor * vout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, n_embd, n_layer);
|
ggml_tensor * vout3d = ggml_new_tensor_3d(cpy_ctx, kv_self.v->type, kv_ntok, n_embd, n_layer);
|
||||||
vout3d->data = out;
|
std::vector<uint8_t> vout3d_data(ggml_nbytes(vout3d), 0);
|
||||||
out += ggml_nbytes(vout3d);
|
vout3d->data = vout3d_data.data();
|
||||||
|
|
||||||
ggml_tensor * k3d = ggml_view_3d(cpy_ctx, kv_self.k,
|
ggml_tensor * k3d = ggml_view_3d(cpy_ctx, kv_self.k,
|
||||||
n_embd, kv_ntok, n_layer,
|
n_embd, kv_ntok, n_layer,
|
||||||
@ -3698,15 +3844,20 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
|||||||
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
|
ggml_graph_compute_helper(ctx->work_buffer, &gf, /*n_threads*/ 1);
|
||||||
|
|
||||||
ggml_free(cpy_ctx);
|
ggml_free(cpy_ctx);
|
||||||
|
|
||||||
|
// our data is now in the kout3d_data and vout3d_data buffers
|
||||||
|
// write them to file
|
||||||
|
data_ctx->write(kout3d_data.data(), kout3d_data.size());
|
||||||
|
data_ctx->write(vout3d_data.data(), vout3d_data.size());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const size_t written = out - dst;
|
size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
||||||
const size_t max_size = llama_get_state_size(ctx);
|
llama_data_buffer_context data_ctx(dst);
|
||||||
|
llama_copy_state_data_internal(ctx, &data_ctx);
|
||||||
|
|
||||||
LLAMA_ASSERT(written <= max_size);
|
return data_ctx.get_size_written();
|
||||||
|
|
||||||
return written;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sets the state reading from the specified source address
|
// Sets the state reading from the specified source address
|
||||||
@ -3765,7 +3916,7 @@ size_t llama_set_state_data(struct llama_context * ctx, uint8_t * src) {
|
|||||||
const auto & kv_self = ctx->kv_self;
|
const auto & kv_self = ctx->kv_self;
|
||||||
const auto & hparams = ctx->model.hparams;
|
const auto & hparams = ctx->model.hparams;
|
||||||
const int n_layer = hparams.n_layer;
|
const int n_layer = hparams.n_layer;
|
||||||
const int n_embd = hparams.n_embd;
|
const int n_embd = hparams.n_embd_gqa();
|
||||||
const int n_ctx = hparams.n_ctx;
|
const int n_ctx = hparams.n_ctx;
|
||||||
|
|
||||||
size_t kv_size;
|
size_t kv_size;
|
||||||
@ -3891,15 +4042,9 @@ bool llama_save_session_file(struct llama_context * ctx, const char * path_sessi
|
|||||||
file.write_u32((uint32_t) n_token_count);
|
file.write_u32((uint32_t) n_token_count);
|
||||||
file.write_raw(tokens, sizeof(llama_token) * n_token_count);
|
file.write_raw(tokens, sizeof(llama_token) * n_token_count);
|
||||||
|
|
||||||
// save the context state
|
// save the context state using stream saving
|
||||||
{
|
llama_data_file_context data_ctx(&file);
|
||||||
const size_t n_state_size_max = llama_get_state_size(ctx);
|
llama_copy_state_data_internal(ctx, &data_ctx);
|
||||||
|
|
||||||
std::vector<uint8_t> state_data(n_state_size_max);
|
|
||||||
const size_t n_state_size_cur = llama_copy_state_data(ctx, state_data.data());
|
|
||||||
|
|
||||||
file.write_raw(state_data.data(), n_state_size_cur);
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
1
llama.h
1
llama.h
@ -108,6 +108,7 @@ extern "C" {
|
|||||||
|
|
||||||
// Keep the booleans together to avoid misalignment during copy-by-value.
|
// Keep the booleans together to avoid misalignment during copy-by-value.
|
||||||
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
bool low_vram; // if true, reduce VRAM usage at the cost of performance
|
||||||
|
bool mul_mat_q; // if true, use experimental mul_mat_q kernels
|
||||||
bool f16_kv; // use fp16 for KV cache
|
bool f16_kv; // use fp16 for KV cache
|
||||||
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
bool logits_all; // the llama_eval() call computes all logits, not just the last one
|
||||||
bool vocab_only; // only load the vocabulary, no weights
|
bool vocab_only; // only load the vocabulary, no weights
|
||||||
|
@ -10,5 +10,5 @@ cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
|
|||||||
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
|
||||||
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
|
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h
|
||||||
|
|
||||||
cp -rpv ../ggml/tests/test-opt.c ./tests/test-opt.c
|
cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp
|
||||||
cp -rpv ../ggml/tests/test-grad0.c ./tests/test-grad0.c
|
cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp
|
||||||
|
@ -6,10 +6,10 @@ function(llama_add_test source)
|
|||||||
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
# llama_add_test(test-double-float.c) # SLOW
|
# llama_add_test(test-double-float.cpp) # SLOW
|
||||||
llama_add_test(test-quantize-fns.cpp)
|
llama_add_test(test-quantize-fns.cpp)
|
||||||
llama_add_test(test-quantize-perf.cpp)
|
llama_add_test(test-quantize-perf.cpp)
|
||||||
llama_add_test(test-sampling.cpp)
|
llama_add_test(test-sampling.cpp)
|
||||||
llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
|
llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin)
|
||||||
llama_add_test(test-grad0.c) # SLOW
|
llama_add_test(test-grad0.cpp) # SLOW
|
||||||
# llama_add_test(test-opt.c) # SLOW
|
# llama_add_test(test-opt.cpp) # SLOW
|
||||||
|
@ -3,10 +3,11 @@
|
|||||||
// This is done by checking all finite (non-NaN, non-infinite) floats.
|
// This is done by checking all finite (non-NaN, non-infinite) floats.
|
||||||
|
|
||||||
#undef NDEBUG
|
#undef NDEBUG
|
||||||
#include <assert.h>
|
#include <cassert>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include <math.h>
|
#include <cmath>
|
||||||
#include <stdint.h>
|
#include <cstdint>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#pragma GCC diagnostic push
|
#pragma GCC diagnostic push
|
||||||
#pragma GCC diagnostic ignored "-Wdouble-promotion"
|
#pragma GCC diagnostic ignored "-Wdouble-promotion"
|
||||||
@ -32,8 +33,9 @@ inline static float silu_float(float x) {
|
|||||||
int main(void) {
|
int main(void) {
|
||||||
uint32_t x = UINT32_MAX;
|
uint32_t x = UINT32_MAX;
|
||||||
do {
|
do {
|
||||||
float f = *(float *)&x;
|
float f;
|
||||||
assert(!isfinite(f) || (round_orig(f) == round_float(f)));
|
memcpy(&f, &x, sizeof(x));
|
||||||
|
assert(!std::isfinite(f) || (round_orig(f) == round_float(f)));
|
||||||
} while (x--);
|
} while (x--);
|
||||||
|
|
||||||
#ifdef __F16C__
|
#ifdef __F16C__
|
@ -1,10 +1,10 @@
|
|||||||
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include <cmath>
|
||||||
#include <stdio.h>
|
#include <cstdio>
|
||||||
#include <stdlib.h>
|
#include <cstdlib>
|
||||||
#include <assert.h>
|
#include <cassert>
|
||||||
|
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
@ -47,16 +47,16 @@
|
|||||||
|
|
||||||
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
#define GGML_PRINT(...) printf(__VA_ARGS__)
|
||||||
|
|
||||||
float frand(void) {
|
static float frand(void) {
|
||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
int irand(int n) {
|
static int irand(int n) {
|
||||||
if (n == 0) return 0;
|
if (n == 0) return 0;
|
||||||
return rand()%n;
|
return rand()%n;
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_random_dims(int64_t * dims, int ndims) {
|
static void get_random_dims(int64_t * dims, int ndims) {
|
||||||
dims[0] = dims[1] = dims[2] = dims[3] = 1;
|
dims[0] = dims[1] = dims[2] = dims[3] = 1;
|
||||||
|
|
||||||
for (int i = 0; i < ndims; i++) {
|
for (int i = 0; i < ndims; i++) {
|
||||||
@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * get_random_tensor_f32(
|
static struct ggml_tensor * get_random_tensor_f32(
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
int ndims,
|
int ndims,
|
||||||
int64_t ne[],
|
int64_t ne[],
|
||||||
@ -112,7 +112,7 @@ struct ggml_tensor * get_random_tensor_f32(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * get_random_tensor_f16(
|
static struct ggml_tensor * get_random_tensor_f16(
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
int ndims,
|
int ndims,
|
||||||
int64_t ne[],
|
int64_t ne[],
|
||||||
@ -160,7 +160,7 @@ struct ggml_tensor * get_random_tensor_f16(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * get_random_tensor_i32(
|
static struct ggml_tensor * get_random_tensor_i32(
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
int ndims,
|
int ndims,
|
||||||
int64_t ne[],
|
int64_t ne[],
|
||||||
@ -208,7 +208,7 @@ struct ggml_tensor * get_random_tensor_i32(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_elements(const char* label, const struct ggml_tensor * t) {
|
static void print_elements(const char* label, const struct ggml_tensor * t) {
|
||||||
if (!t) {
|
if (!t) {
|
||||||
printf("%s: %s = null\n", __func__, label);
|
printf("%s: %s = null\n", __func__, label);
|
||||||
return;
|
return;
|
||||||
@ -228,7 +228,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool check_gradient(
|
static bool check_gradient(
|
||||||
const char * op_name,
|
const char * op_name,
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
struct ggml_tensor * x[],
|
struct ggml_tensor * x[],
|
||||||
@ -310,7 +310,7 @@ bool check_gradient(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: clean-up this ..
|
// TODO: clean-up this ..
|
||||||
bool check_mat_mul(
|
static bool check_mat_mul(
|
||||||
const struct ggml_tensor * y,
|
const struct ggml_tensor * y,
|
||||||
const struct ggml_tensor * x0,
|
const struct ggml_tensor * x0,
|
||||||
const struct ggml_tensor * x1) {
|
const struct ggml_tensor * x1) {
|
||||||
@ -373,9 +373,9 @@ bool check_mat_mul(
|
|||||||
|
|
||||||
int main(int argc, const char ** argv) {
|
int main(int argc, const char ** argv) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
.mem_size = 128*1024*1024,
|
/* .mem_size = */ 128*1024*1024,
|
||||||
.mem_buffer = NULL,
|
/* .mem_buffer = */ NULL,
|
||||||
.no_alloc = false,
|
/* .no_alloc = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
int64_t ne[4];
|
int64_t ne[4];
|
@ -1,9 +1,9 @@
|
|||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
|
|
||||||
#include <math.h>
|
#include <cmath>
|
||||||
#include <stdio.h>
|
#include <cstdio>
|
||||||
#include <stdlib.h>
|
#include <cstdlib>
|
||||||
#include <assert.h>
|
#include <cassert>
|
||||||
|
|
||||||
#define MAX_NARGS 2
|
#define MAX_NARGS 2
|
||||||
|
|
||||||
@ -119,10 +119,11 @@ void set_element(struct ggml_tensor * t, int idx, float value) {
|
|||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
.mem_size = 1024*1024*1024,
|
/* .mem_size = */ 1024*1024*1024,
|
||||||
.mem_buffer = NULL,
|
/* .mem_buffer = */ NULL,
|
||||||
.no_alloc = false,
|
/* .no_alloc = */ false,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_context * ctx = ggml_init(params);
|
struct ggml_context * ctx = ggml_init(params);
|
||||||
|
|
||||||
int64_t ne1[4] = {4, 128, 1, 1};
|
int64_t ne1[4] = {4, 128, 1, 1};
|
Loading…
Reference in New Issue
Block a user