mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 12:21:40 +01:00
Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
8bd24b2e5c
@ -1,6 +1,9 @@
|
|||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
.cache/
|
.cache/
|
||||||
|
.git/
|
||||||
|
.github/
|
||||||
|
.gitignore
|
||||||
.vs/
|
.vs/
|
||||||
.vscode/
|
.vscode/
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
43
.github/workflows/build.yml
vendored
43
.github/workflows/build.yml
vendored
@ -10,10 +10,10 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
|
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift']
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu']
|
paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift']
|
||||||
|
|
||||||
env:
|
env:
|
||||||
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
||||||
@ -188,7 +188,7 @@ jobs:
|
|||||||
sysctl -a
|
sysctl -a
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF ..
|
cmake ..
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
@ -253,6 +253,29 @@ jobs:
|
|||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
|
macOS-latest-swift:
|
||||||
|
runs-on: macos-latest
|
||||||
|
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Clone
|
||||||
|
id: checkout
|
||||||
|
uses: actions/checkout@v1
|
||||||
|
|
||||||
|
- name: Dependencies
|
||||||
|
id: depends
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
brew update
|
||||||
|
|
||||||
|
- name: xcodebuild for swift package
|
||||||
|
id: xcodebuild
|
||||||
|
run: |
|
||||||
|
xcodebuild -scheme llama -destination "${{ matrix.destination }}"
|
||||||
|
|
||||||
windows-latest-cmake:
|
windows-latest-cmake:
|
||||||
runs-on: windows-latest
|
runs-on: windows-latest
|
||||||
|
|
||||||
@ -265,17 +288,17 @@ jobs:
|
|||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build: 'noavx'
|
- build: 'noavx'
|
||||||
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx2'
|
- build: 'avx2'
|
||||||
defines: '-DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx'
|
- build: 'avx'
|
||||||
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx512'
|
- build: 'avx512'
|
||||||
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'clblast'
|
- build: 'clblast'
|
||||||
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CLBLAST=ON -DBUILD_SHARED_LIBS=ON -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/clblast"'
|
||||||
- build: 'openblas'
|
- build: 'openblas'
|
||||||
defines: '-DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
@ -414,7 +437,7 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
|
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUBLAS=ON -DBUILD_SHARED_LIBS=ON
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
||||||
|
|
||||||
- name: Determine tag name
|
- name: Determine tag name
|
||||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -91,4 +91,5 @@ tests/test-quantize-perf
|
|||||||
tests/test-sampling
|
tests/test-sampling
|
||||||
tests/test-tokenizer-0-llama
|
tests/test-tokenizer-0-llama
|
||||||
tests/test-tokenizer-0-falcon
|
tests/test-tokenizer-0-falcon
|
||||||
tests/test-tokenizer-1
|
tests/test-tokenizer-1-llama
|
||||||
|
tests/test-tokenizer-1-bpe
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason
|
cmake_minimum_required(VERSION 3.13) # for add_link_options
|
||||||
project("llama.cpp" C CXX)
|
project("llama.cpp" C CXX)
|
||||||
|
|
||||||
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
@ -44,7 +44,7 @@ endif()
|
|||||||
|
|
||||||
# general
|
# general
|
||||||
option(LLAMA_STATIC "llama: static link libraries" OFF)
|
option(LLAMA_STATIC "llama: static link libraries" OFF)
|
||||||
option(LLAMA_NATIVE "llama: enable -march=native flag" OFF)
|
option(LLAMA_NATIVE "llama: enable -march=native flag" ON)
|
||||||
option(LLAMA_LTO "llama: enable link time optimization" OFF)
|
option(LLAMA_LTO "llama: enable link time optimization" OFF)
|
||||||
|
|
||||||
# debug
|
# debug
|
||||||
@ -58,15 +58,21 @@ option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer"
|
|||||||
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
# instruction set specific
|
# instruction set specific
|
||||||
option(LLAMA_AVX "llama: enable AVX" ON)
|
if (LLAMA_NATIVE)
|
||||||
option(LLAMA_AVX2 "llama: enable AVX2" ON)
|
set(INS_ENB OFF)
|
||||||
|
else()
|
||||||
|
set(INS_ENB ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(LLAMA_AVX "llama: enable AVX" ${INS_ENB})
|
||||||
|
option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB})
|
||||||
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
|
option(LLAMA_AVX512 "llama: enable AVX512" OFF)
|
||||||
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
|
option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF)
|
||||||
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
|
option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF)
|
||||||
option(LLAMA_FMA "llama: enable FMA" ON)
|
option(LLAMA_FMA "llama: enable FMA" ${INS_ENB})
|
||||||
# in MSVC F16C is implied with AVX2/AVX512
|
# in MSVC F16C is implied with AVX2/AVX512
|
||||||
if (NOT MSVC)
|
if (NOT MSVC)
|
||||||
option(LLAMA_F16C "llama: enable F16C" ON)
|
option(LLAMA_F16C "llama: enable F16C" ${INS_ENB})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
@ -343,8 +349,9 @@ if (LLAMA_MPI)
|
|||||||
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
|
set(GGML_SOURCES_MPI ggml-mpi.c ggml-mpi.h)
|
||||||
add_compile_definitions(GGML_USE_MPI)
|
add_compile_definitions(GGML_USE_MPI)
|
||||||
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
|
add_compile_definitions(${MPI_C_COMPILE_DEFINITIONS})
|
||||||
set(cxx_flags ${cxx_flags} -Wno-cast-qual)
|
if (NOT MSVC)
|
||||||
set(c_flags ${c_flags} -Wno-cast-qual)
|
add_compile_options(-Wno-cast-qual)
|
||||||
|
endif()
|
||||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
|
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${MPI_C_LIBRARIES})
|
||||||
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
|
set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${MPI_C_INCLUDE_DIRS})
|
||||||
# Even if you're only using the C header, C++ programs may bring in MPI
|
# Even if you're only using the C header, C++ programs may bring in MPI
|
||||||
@ -418,10 +425,11 @@ if (LLAMA_ALL_WARNINGS)
|
|||||||
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
|
set(c_flags -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int
|
||||||
-Werror=implicit-function-declaration)
|
-Werror=implicit-function-declaration)
|
||||||
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
set(cxx_flags -Wmissing-declarations -Wmissing-noreturn)
|
||||||
|
set(host_cxx_flags "")
|
||||||
|
|
||||||
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
if (CMAKE_C_COMPILER_ID MATCHES "Clang")
|
||||||
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
|
set(warning_flags ${warning_flags} -Wunreachable-code-break -Wunreachable-code-return)
|
||||||
set(cxx_flags ${cxx_flags} -Wmissing-prototypes -Wextra-semi)
|
set(host_cxx_flags ${host_cxx_flags} -Wmissing-prototypes -Wextra-semi)
|
||||||
|
|
||||||
if (
|
if (
|
||||||
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
|
(CMAKE_C_COMPILER_ID STREQUAL "Clang" AND CMAKE_C_COMPILER_VERSION VERSION_GREATER_EQUAL 3.8.0) OR
|
||||||
@ -431,27 +439,38 @@ if (LLAMA_ALL_WARNINGS)
|
|||||||
endif()
|
endif()
|
||||||
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
elseif (CMAKE_C_COMPILER_ID STREQUAL "GNU")
|
||||||
set(c_flags ${c_flags} -Wdouble-promotion)
|
set(c_flags ${c_flags} -Wdouble-promotion)
|
||||||
set(cxx_flags ${cxx_flags} -Wno-array-bounds)
|
set(host_cxx_flags ${host_cxx_flags} -Wno-array-bounds)
|
||||||
|
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 7.1.0)
|
||||||
set(cxx_flags ${cxx_flags} -Wno-format-truncation)
|
set(host_cxx_flags ${host_cxx_flags} -Wno-format-truncation)
|
||||||
endif()
|
endif()
|
||||||
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
|
if (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER_EQUAL 8.1.0)
|
||||||
set(cxx_flags ${cxx_flags} -Wextra-semi)
|
set(host_cxx_flags ${host_cxx_flags} -Wextra-semi)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
# todo : msvc
|
# todo : msvc
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_compile_options(
|
set(c_flags ${c_flags} ${warning_flags})
|
||||||
${warning_flags}
|
set(cxx_flags ${cxx_flags} ${warning_flags})
|
||||||
"$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${c_flags}>"
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags}>"
|
"$<$<COMPILE_LANGUAGE:CXX>:${cxx_flags} ${host_cxx_flags}>")
|
||||||
)
|
|
||||||
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if (NOT MSVC)
|
||||||
|
set(cuda_flags -Wno-pedantic)
|
||||||
|
endif()
|
||||||
|
set(cuda_flags ${cxx_flags} -use_fast_math ${cuda_flags})
|
||||||
|
|
||||||
|
list(JOIN host_cxx_flags " " cuda_host_flags) # pass host compiler flags as a single argument
|
||||||
|
if (NOT cuda_host_flags STREQUAL "")
|
||||||
|
set(cuda_flags ${cuda_flags} -Xcompiler ${cuda_host_flags})
|
||||||
|
endif()
|
||||||
|
|
||||||
|
add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${cuda_flags}>")
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||||
|
|
||||||
@ -491,9 +510,6 @@ if (NOT MSVC)
|
|||||||
if (LLAMA_GPROF)
|
if (LLAMA_GPROF)
|
||||||
add_compile_options(-pg)
|
add_compile_options(-pg)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_NATIVE)
|
|
||||||
add_compile_options(-march=native)
|
|
||||||
endif()
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
|
if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") OR (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") OR ("${CMAKE_GENERATOR_PLATFORM_LWR}" MATCHES "arm64"))
|
||||||
@ -548,6 +564,9 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$" OR "${CMAKE_GE
|
|||||||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
|
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/arch:AVX>)
|
||||||
endif()
|
endif()
|
||||||
else()
|
else()
|
||||||
|
if (LLAMA_NATIVE)
|
||||||
|
add_compile_options(-march=native)
|
||||||
|
endif()
|
||||||
if (LLAMA_F16C)
|
if (LLAMA_F16C)
|
||||||
add_compile_options(-mf16c)
|
add_compile_options(-mf16c)
|
||||||
endif()
|
endif()
|
||||||
@ -705,6 +724,7 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR}
|
|||||||
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
|
||||||
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
|
||||||
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
|
||||||
|
get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||||
|
|
||||||
configure_package_config_file(
|
configure_package_config_file(
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in
|
${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in
|
||||||
|
9
Makefile
9
Makefile
@ -2,7 +2,7 @@
|
|||||||
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
|
BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot q8dot train-text-from-scratch convert-llama2c-to-ggml simple batched save-load-state server embd-input-test gguf llama-bench baby-llama beam-search speculative infill benchmark-matmult parallel finetune export-lora tests/test-c.o
|
||||||
|
|
||||||
# Binaries only useful for tests
|
# Binaries only useful for tests
|
||||||
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama
|
TEST_TARGETS = tests/test-llama-grammar tests/test-grammar-parser tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0-llama tests/test-tokenizer-0-falcon tests/test-tokenizer-1-llama tests/test-tokenizer-1-bpe
|
||||||
|
|
||||||
# Code coverage output files
|
# Code coverage output files
|
||||||
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
|
COV_TARGETS = *.gcno tests/*.gcno *.gcda tests/*.gcda *.gcov tests/*.gcov lcov-report gcovr-report
|
||||||
@ -62,9 +62,11 @@ test: $(TEST_TARGETS)
|
|||||||
if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
|
if [ "$$test_target" = "tests/test-tokenizer-0-llama" ]; then \
|
||||||
./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
|
./$$test_target $(CURDIR)/models/ggml-vocab-llama.gguf; \
|
||||||
elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
|
elif [ "$$test_target" = "tests/test-tokenizer-0-falcon" ]; then \
|
||||||
continue; \
|
./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
|
||||||
elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
|
elif [ "$$test_target" = "tests/test-tokenizer-1-llama" ]; then \
|
||||||
continue; \
|
continue; \
|
||||||
|
elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
|
||||||
|
continue; \
|
||||||
else \
|
else \
|
||||||
echo "Running test $$test_target..."; \
|
echo "Running test $$test_target..."; \
|
||||||
./$$test_target; \
|
./$$test_target; \
|
||||||
@ -670,6 +672,9 @@ tests/test-tokenizer-0-falcon: tests/test-tokenizer-0-falcon.cpp build-info.h gg
|
|||||||
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-tokenizer-0-llama: tests/test-tokenizer-0-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
tests/test-tokenizer-1-llama: tests/test-tokenizer-1-llama.cpp build-info.h ggml.o llama.o common.o $(OBJS)
|
||||||
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
@ -44,9 +44,12 @@ let package = Package(
|
|||||||
cSettings: [
|
cSettings: [
|
||||||
.unsafeFlags(["-Wno-shorten-64-to-32"]),
|
.unsafeFlags(["-Wno-shorten-64-to-32"]),
|
||||||
.define("GGML_USE_K_QUANTS"),
|
.define("GGML_USE_K_QUANTS"),
|
||||||
.define("GGML_USE_ACCELERATE"),
|
.define("GGML_USE_ACCELERATE")
|
||||||
.define("ACCELERATE_NEW_LAPACK"),
|
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||||
.define("ACCELERATE_LAPACK_ILP64")
|
// We should consider add this in the future when we drop support for iOS 14
|
||||||
|
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
||||||
|
// .define("ACCELERATE_NEW_LAPACK"),
|
||||||
|
// .define("ACCELERATE_LAPACK_ILP64")
|
||||||
] + additionalSettings,
|
] + additionalSettings,
|
||||||
linkerSettings: [
|
linkerSettings: [
|
||||||
.linkedFramework("Accelerate")
|
.linkedFramework("Accelerate")
|
||||||
|
@ -5,7 +5,7 @@
|
|||||||
[![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions)
|
[![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions)
|
||||||
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
|
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
|
||||||
|
|
||||||
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)
|
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml)
|
||||||
|
|
||||||
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++
|
||||||
|
|
||||||
|
@ -167,6 +167,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
// store the external file name in params
|
||||||
|
params.prompt_file = argv[i];
|
||||||
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
std::copy(std::istreambuf_iterator<char>(file), std::istreambuf_iterator<char>(), back_inserter(params.prompt));
|
||||||
if (params.prompt.back() == '\n') {
|
if (params.prompt.back() == '\n') {
|
||||||
params.prompt.pop_back();
|
params.prompt.pop_back();
|
||||||
@ -361,7 +363,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.push_back({argv[i], 1.0f});
|
params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f));
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
} else if (arg == "--lora-scaled") {
|
} else if (arg == "--lora-scaled") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
@ -373,7 +375,7 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.push_back({lora_adapter, std::stof(argv[i])});
|
params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i])));
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
} else if (arg == "--lora-base") {
|
} else if (arg == "--lora-base") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
@ -616,6 +618,9 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
process_escapes(params.prompt);
|
process_escapes(params.prompt);
|
||||||
process_escapes(params.input_prefix);
|
process_escapes(params.input_prefix);
|
||||||
process_escapes(params.input_suffix);
|
process_escapes(params.input_suffix);
|
||||||
|
for (auto & antiprompt : params.antiprompt) {
|
||||||
|
process_escapes(antiprompt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
@ -923,6 +928,7 @@ std::string llama_detokenize_bpe(llama_context * ctx, const std::vector<llama_to
|
|||||||
result += piece;
|
result += piece;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// NOTE: the original tokenizer decodes bytes after collecting the pieces.
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1016,10 +1022,11 @@ llama_token llama_sample_token(
|
|||||||
id = llama_sample_token_mirostat_v2(ctx, &cur_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
id = llama_sample_token_mirostat_v2(ctx, &cur_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||||
} else {
|
} else {
|
||||||
// Temperature sampling
|
// Temperature sampling
|
||||||
llama_sample_top_k (ctx, &cur_p, top_k, 1);
|
size_t min_keep = std::max(1, params.n_probs);
|
||||||
llama_sample_tail_free (ctx, &cur_p, tfs_z, 1);
|
llama_sample_top_k (ctx, &cur_p, top_k, min_keep);
|
||||||
llama_sample_typical (ctx, &cur_p, typical_p, 1);
|
llama_sample_tail_free (ctx, &cur_p, tfs_z, min_keep);
|
||||||
llama_sample_top_p (ctx, &cur_p, top_p, 1);
|
llama_sample_typical (ctx, &cur_p, typical_p, min_keep);
|
||||||
|
llama_sample_top_p (ctx, &cur_p, top_p, min_keep);
|
||||||
llama_sample_temp(ctx, &cur_p, temp);
|
llama_sample_temp(ctx, &cur_p, temp);
|
||||||
|
|
||||||
{
|
{
|
||||||
|
@ -79,6 +79,7 @@ struct gpt_params {
|
|||||||
std::string model_draft = ""; // draft model for speculative decoding
|
std::string model_draft = ""; // draft model for speculative decoding
|
||||||
std::string model_alias = "unknown"; // model alias
|
std::string model_alias = "unknown"; // model alias
|
||||||
std::string prompt = "";
|
std::string prompt = "";
|
||||||
|
std::string prompt_file = ""; // store the external prompt file name
|
||||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with
|
std::string input_prefix = ""; // string to prefix user inputs with
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with
|
std::string input_suffix = ""; // string to suffix user inputs with
|
||||||
|
@ -11,11 +11,14 @@ import sys
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Any
|
from typing import TYPE_CHECKING, Any
|
||||||
import itertools
|
import itertools
|
||||||
import gguf
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
from sentencepiece import SentencePieceProcessor # type: ignore[import]
|
from sentencepiece import SentencePieceProcessor # type: ignore[import]
|
||||||
|
|
||||||
|
if 'NO_LOCAL_GGUF' not in os.environ:
|
||||||
|
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
|
||||||
|
import gguf
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from typing import TypeAlias
|
from typing import TypeAlias
|
||||||
@ -174,8 +177,11 @@ if not tokenizer_model_file.is_file():
|
|||||||
print("gguf: get sentencepiece tokenizer vocab, scores and token types")
|
print("gguf: get sentencepiece tokenizer vocab, scores and token types")
|
||||||
|
|
||||||
tokenizer = SentencePieceProcessor(str(tokenizer_model_file))
|
tokenizer = SentencePieceProcessor(str(tokenizer_model_file))
|
||||||
|
vocab_size = hparams.get('vocab_size')
|
||||||
|
if vocab_size is None:
|
||||||
|
vocab_size = tokenizer.vocab_size()
|
||||||
|
|
||||||
for i in range(tokenizer.vocab_size()):
|
for i in range(vocab_size):
|
||||||
text: bytes
|
text: bytes
|
||||||
score: float
|
score: float
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import contextlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import struct
|
import struct
|
||||||
@ -20,32 +21,10 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
|||||||
import gguf
|
import gguf
|
||||||
|
|
||||||
|
|
||||||
def bytes_to_unicode():
|
def count_model_parts(dir_model: Path, prefix: str) -> int:
|
||||||
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
|
||||||
"""
|
|
||||||
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
|
||||||
The reversible bpe codes work on unicode strings.
|
|
||||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
|
||||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
|
||||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
|
||||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
|
||||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
|
||||||
"""
|
|
||||||
bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
|
|
||||||
cs = bs[:]
|
|
||||||
n = 0
|
|
||||||
for b in range(2**8):
|
|
||||||
if b not in bs:
|
|
||||||
bs.append(b)
|
|
||||||
cs.append(2**8+n)
|
|
||||||
n += 1
|
|
||||||
return dict(zip(bs, (chr(n) for n in cs)))
|
|
||||||
|
|
||||||
|
|
||||||
def count_model_parts(dir_model: Path) -> int:
|
|
||||||
num_parts = 0
|
num_parts = 0
|
||||||
for filename in os.listdir(dir_model):
|
for filename in os.listdir(dir_model):
|
||||||
if filename.startswith("pytorch_model-"):
|
if filename.startswith(prefix):
|
||||||
num_parts += 1
|
num_parts += 1
|
||||||
|
|
||||||
if num_parts > 0:
|
if num_parts > 0:
|
||||||
@ -99,20 +78,26 @@ print("gguf: loading model "+dir_model.name)
|
|||||||
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
||||||
hparams = json.load(f)
|
hparams = json.load(f)
|
||||||
|
|
||||||
if hparams["architectures"][0] != "RWForCausalLM":
|
if hparams["architectures"][0] != "FalconForCausalLM":
|
||||||
print("Model architecture not supported: " + hparams["architectures"][0])
|
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||||
|
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# get number of model parts
|
# get number of model parts
|
||||||
num_parts = count_model_parts(dir_model)
|
num_parts = count_model_parts(dir_model, "model-00")
|
||||||
|
if num_parts:
|
||||||
|
is_safetensors = True
|
||||||
|
from safetensors import safe_open
|
||||||
|
else:
|
||||||
|
is_safetensors = False
|
||||||
|
num_parts = count_model_parts(dir_model, "pytorch_model-")
|
||||||
|
|
||||||
ARCH=gguf.MODEL_ARCH.FALCON
|
ARCH=gguf.MODEL_ARCH.FALCON
|
||||||
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||||
|
|
||||||
print("gguf: get model metadata")
|
print("gguf: get model metadata")
|
||||||
|
|
||||||
block_count = hparams["n_layer"]
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
|
||||||
gguf_writer.add_name("Falcon")
|
gguf_writer.add_name("Falcon")
|
||||||
gguf_writer.add_context_length(2048) # not in config.json
|
gguf_writer.add_context_length(2048) # not in config.json
|
||||||
@ -120,9 +105,9 @@ gguf_writer.add_tensor_data_layout("jploski") # qkv tensor transform
|
|||||||
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
|
gguf_writer.add_feed_forward_length(4 * hparams["hidden_size"])
|
||||||
gguf_writer.add_block_count(block_count)
|
gguf_writer.add_block_count(block_count)
|
||||||
gguf_writer.add_head_count(hparams["n_head"])
|
gguf_writer.add_head_count(hparams["num_attention_heads"])
|
||||||
if "n_head_kv" in hparams:
|
if "num_kv_heads" in hparams:
|
||||||
gguf_writer.add_head_count_kv(hparams["n_head_kv"])
|
gguf_writer.add_head_count_kv(hparams["num_kv_heads"])
|
||||||
else:
|
else:
|
||||||
gguf_writer.add_head_count_kv(1)
|
gguf_writer.add_head_count_kv(1)
|
||||||
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
|
gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"])
|
||||||
@ -133,50 +118,32 @@ gguf_writer.add_file_type(ftype)
|
|||||||
print("gguf: get tokenizer metadata")
|
print("gguf: get tokenizer metadata")
|
||||||
|
|
||||||
tokens: list[bytearray] = []
|
tokens: list[bytearray] = []
|
||||||
|
scores: list[float] = []
|
||||||
tokenizer_json_file = dir_model / 'tokenizer.json'
|
toktypes: list[int] = []
|
||||||
if not tokenizer_json_file.is_file():
|
|
||||||
print(f'Error: Missing {tokenizer_json_file}', file = sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# gpt2 tokenizer
|
# gpt2 tokenizer
|
||||||
gguf_writer.add_tokenizer_model("gpt2")
|
gguf_writer.add_tokenizer_model("gpt2")
|
||||||
|
|
||||||
with open(tokenizer_json_file, "r", encoding="utf-8") as f:
|
|
||||||
tokenizer_json = json.load(f)
|
|
||||||
|
|
||||||
print("gguf: get gpt2 tokenizer vocab")
|
print("gguf: get gpt2 tokenizer vocab")
|
||||||
|
|
||||||
# The number of tokens in tokenizer.json can differ from the expected vocab size.
|
|
||||||
# This causes downstream issues with mismatched tensor sizes when running the inference
|
|
||||||
vocab_size = hparams["vocab_size"] if "vocab_size" in hparams else len(tokenizer_json["model"]["vocab"])
|
|
||||||
|
|
||||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
|
|
||||||
|
# The number of tokens in tokenizer.json can differ from the expected vocab size.
|
||||||
|
# This causes downstream issues with mismatched tensor sizes when running the inference
|
||||||
|
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
||||||
|
assert max(tokenizer.vocab.values()) < vocab_size
|
||||||
|
|
||||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||||
byte_encoder = bytes_to_unicode()
|
|
||||||
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
|
||||||
|
|
||||||
for i in range(vocab_size):
|
for i in range(vocab_size):
|
||||||
if i in reverse_vocab:
|
tokens.append(reverse_vocab[i])
|
||||||
try:
|
scores.append(0.0) # dummy
|
||||||
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
|
toktypes.append(gguf.TokenType.NORMAL)
|
||||||
except KeyError:
|
|
||||||
text = bytearray()
|
|
||||||
for c in reverse_vocab[i]:
|
|
||||||
if ord(c) < 256: # single byte character
|
|
||||||
text.append(byte_decoder[ord(c)])
|
|
||||||
else: # multibyte special token character
|
|
||||||
text.extend(c.encode('utf-8'))
|
|
||||||
else:
|
|
||||||
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
|
|
||||||
pad_token = f"[PAD{i}]".encode("utf8")
|
|
||||||
text = bytearray(pad_token)
|
|
||||||
|
|
||||||
tokens.append(text)
|
|
||||||
|
|
||||||
gguf_writer.add_token_list(tokens)
|
gguf_writer.add_token_list(tokens)
|
||||||
|
gguf_writer.add_token_scores(scores)
|
||||||
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
@ -186,8 +153,8 @@ special_vocab.add_to_gguf(gguf_writer)
|
|||||||
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
|
tensor_map = gguf.get_tensor_name_map(ARCH,block_count)
|
||||||
|
|
||||||
# params for qkv transform
|
# params for qkv transform
|
||||||
n_head = hparams["n_head"]
|
n_head = hparams["num_attention_heads"]
|
||||||
n_head_kv = hparams["n_head_kv"] if "n_head_kv" in hparams else 1
|
n_head_kv = hparams["num_kv_heads"] if "num_kv_heads" in hparams else 1
|
||||||
|
|
||||||
head_dim = hparams["hidden_size"] // n_head
|
head_dim = hparams["hidden_size"] // n_head
|
||||||
|
|
||||||
@ -196,6 +163,10 @@ print("gguf: get tensor metadata")
|
|||||||
|
|
||||||
if num_parts == 0:
|
if num_parts == 0:
|
||||||
part_names = iter(("pytorch_model.bin",))
|
part_names = iter(("pytorch_model.bin",))
|
||||||
|
elif is_safetensors:
|
||||||
|
part_names = (
|
||||||
|
f"model-{n:05}-of-{num_parts:05}.safetensors" for n in range(1, num_parts + 1)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
part_names = (
|
part_names = (
|
||||||
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
|
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
|
||||||
@ -205,10 +176,14 @@ for part_name in part_names:
|
|||||||
if args.vocab_only:
|
if args.vocab_only:
|
||||||
break
|
break
|
||||||
print("gguf: loading model part '" + part_name + "'")
|
print("gguf: loading model part '" + part_name + "'")
|
||||||
model_part = torch.load(dir_model / part_name, map_location="cpu")
|
if is_safetensors:
|
||||||
|
ctx = safe_open(dir_model / part_name, framework="pt", device="cpu")
|
||||||
|
else:
|
||||||
|
ctx = contextlib.nullcontext(torch.load(dir_model / part_name, map_location="cpu"))
|
||||||
|
|
||||||
|
with ctx as model_part:
|
||||||
for name in model_part.keys():
|
for name in model_part.keys():
|
||||||
data = model_part[name]
|
data = model_part.get_tensor(name) if is_safetensors else model_part[name]
|
||||||
|
|
||||||
old_dtype = data.dtype
|
old_dtype = data.dtype
|
||||||
|
|
||||||
|
@ -19,29 +19,6 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
|||||||
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
|
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
|
||||||
import gguf
|
import gguf
|
||||||
|
|
||||||
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
|
||||||
|
|
||||||
|
|
||||||
def bytes_to_unicode():
|
|
||||||
"""
|
|
||||||
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
|
||||||
The reversible bpe codes work on unicode strings.
|
|
||||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
|
||||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
|
||||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
|
||||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
|
||||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
|
||||||
"""
|
|
||||||
bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
|
|
||||||
cs = bs[:]
|
|
||||||
n = 0
|
|
||||||
for b in range(2**8):
|
|
||||||
if b not in bs:
|
|
||||||
bs.append(b)
|
|
||||||
cs.append(2**8+n)
|
|
||||||
n += 1
|
|
||||||
return dict(zip(bs, (chr(n) for n in cs)))
|
|
||||||
|
|
||||||
|
|
||||||
def count_model_parts(dir_model: Path) -> int:
|
def count_model_parts(dir_model: Path) -> int:
|
||||||
num_parts = 0
|
num_parts = 0
|
||||||
@ -130,48 +107,32 @@ gguf_writer.add_layer_norm_eps(hparams["layer_norm_eps"])
|
|||||||
print("gguf: get tokenizer metadata")
|
print("gguf: get tokenizer metadata")
|
||||||
|
|
||||||
tokens: list[bytearray] = []
|
tokens: list[bytearray] = []
|
||||||
|
scores: list[float] = []
|
||||||
tokenizer_json_file = dir_model / 'tokenizer.json'
|
toktypes: list[int] = []
|
||||||
if not tokenizer_json_file.is_file():
|
|
||||||
print(f'Error: Missing {tokenizer_json_file}', file = sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# gpt2 tokenizer
|
# gpt2 tokenizer
|
||||||
gguf_writer.add_tokenizer_model("gpt2")
|
gguf_writer.add_tokenizer_model("gpt2")
|
||||||
|
|
||||||
with open(tokenizer_json_file, "r", encoding="utf-8") as f:
|
|
||||||
tokenizer_json = json.load(f)
|
|
||||||
|
|
||||||
print("gguf: get gpt2 tokenizer vocab")
|
print("gguf: get gpt2 tokenizer vocab")
|
||||||
|
|
||||||
vocab_size = len(tokenizer_json["model"]["vocab"])
|
|
||||||
|
|
||||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
|
|
||||||
|
# The number of tokens in tokenizer.json can differ from the expected vocab size.
|
||||||
|
# This causes downstream issues with mismatched tensor sizes when running the inference
|
||||||
|
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
||||||
|
assert max(tokenizer.vocab.values()) < vocab_size
|
||||||
|
|
||||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||||
byte_encoder = bytes_to_unicode()
|
|
||||||
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
|
||||||
|
|
||||||
for i in range(vocab_size):
|
for i in range(vocab_size):
|
||||||
if i in reverse_vocab:
|
tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]")
|
||||||
try:
|
scores.append(0.0) # dummy
|
||||||
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
|
toktypes.append(gguf.TokenType.NORMAL)
|
||||||
except KeyError:
|
|
||||||
text = bytearray()
|
|
||||||
for c in reverse_vocab[i]:
|
|
||||||
if ord(c) < 256: # single byte character
|
|
||||||
text.append(byte_decoder[ord(c)])
|
|
||||||
else: # multibyte special token character
|
|
||||||
text.extend(c.encode('utf-8'))
|
|
||||||
else:
|
|
||||||
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
|
|
||||||
pad_token = f"[PAD{i}]".encode("utf8")
|
|
||||||
text = bytearray(pad_token)
|
|
||||||
|
|
||||||
tokens.append(text)
|
|
||||||
|
|
||||||
gguf_writer.add_token_list(tokens)
|
gguf_writer.add_token_list(tokens)
|
||||||
|
gguf_writer.add_token_scores(scores)
|
||||||
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
318
convert-refact-hf-to-gguf.py
Executable file
318
convert-refact-hf-to-gguf.py
Executable file
@ -0,0 +1,318 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
# HF refact--> gguf conversion
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import torch
|
||||||
|
from transformers import AutoTokenizer # type: ignore[import]
|
||||||
|
|
||||||
|
if "NO_LOCAL_GGUF" not in os.environ:
|
||||||
|
sys.path.insert(1, str(Path(__file__).parent / "gguf-py" / "gguf"))
|
||||||
|
import gguf
|
||||||
|
|
||||||
|
|
||||||
|
def bytes_to_unicode():
|
||||||
|
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
||||||
|
"""
|
||||||
|
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
||||||
|
The reversible bpe codes work on unicode strings.
|
||||||
|
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
||||||
|
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
||||||
|
This is a significant percentage of your normal, say, 32K bpe vocab.
|
||||||
|
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
||||||
|
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
||||||
|
"""
|
||||||
|
bs = (
|
||||||
|
list(range(ord("!"), ord("~") + 1))
|
||||||
|
+ list(range(ord("¡"), ord("¬") + 1))
|
||||||
|
+ list(range(ord("®"), ord("ÿ") + 1))
|
||||||
|
)
|
||||||
|
cs = bs[:]
|
||||||
|
n = 0
|
||||||
|
for b in range(2**8):
|
||||||
|
if b not in bs:
|
||||||
|
bs.append(b)
|
||||||
|
cs.append(2**8 + n)
|
||||||
|
n += 1
|
||||||
|
return dict(zip(bs, (chr(n) for n in cs)))
|
||||||
|
|
||||||
|
|
||||||
|
def count_model_parts(dir_model: Path) -> int:
|
||||||
|
num_parts = 0
|
||||||
|
for filename in os.listdir(dir_model):
|
||||||
|
if filename.startswith("pytorch_model-"):
|
||||||
|
num_parts += 1
|
||||||
|
|
||||||
|
if num_parts > 0:
|
||||||
|
print("gguf: found " + str(num_parts) + " model parts")
|
||||||
|
return num_parts
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Convert a Refact model to a GGML compatible file"
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--vocab-only",
|
||||||
|
action="store_true",
|
||||||
|
help="extract only the vocab",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--outfile",
|
||||||
|
type=Path,
|
||||||
|
help="path to write to; default: based on input",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"model",
|
||||||
|
type=Path,
|
||||||
|
help="directory containing model file, or model file itself (*.bin)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"ftype",
|
||||||
|
type=int,
|
||||||
|
choices=[0, 1],
|
||||||
|
default=1,
|
||||||
|
nargs="?",
|
||||||
|
help="output format - use 0 for float32, 1 for float16",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
dir_model = args.model
|
||||||
|
ftype = args.ftype
|
||||||
|
if not dir_model.is_dir():
|
||||||
|
print(f"Error: {args.model} is not a directory", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# possible tensor data types
|
||||||
|
# ftype == 0 -> float32
|
||||||
|
# ftype == 1 -> float16
|
||||||
|
|
||||||
|
# map from ftype to string
|
||||||
|
ftype_str = ["f32", "f16"]
|
||||||
|
|
||||||
|
if args.outfile is not None:
|
||||||
|
fname_out = args.outfile
|
||||||
|
else:
|
||||||
|
# output in the same directory as the model by default
|
||||||
|
fname_out = dir_model / f"ggml-model-{ftype_str[ftype]}.gguf"
|
||||||
|
|
||||||
|
print("gguf: loading model " + dir_model.name)
|
||||||
|
|
||||||
|
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
|
||||||
|
hparams = json.load(f)
|
||||||
|
|
||||||
|
if hparams["architectures"][0] != "GPTRefactForCausalLM":
|
||||||
|
print("Model architecture not supported: " + hparams["architectures"][0])
|
||||||
|
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# get number of model parts
|
||||||
|
num_parts = count_model_parts(dir_model)
|
||||||
|
|
||||||
|
ARCH = gguf.MODEL_ARCH.REFACT
|
||||||
|
gguf_writer = gguf.GGUFWriter(fname_out, gguf.MODEL_ARCH_NAMES[ARCH])
|
||||||
|
|
||||||
|
print("gguf: get model metadata")
|
||||||
|
|
||||||
|
# Get refact feed forward dimension
|
||||||
|
hidden_dim = hparams["n_embd"]
|
||||||
|
inner_dim = 4 * hidden_dim
|
||||||
|
hidden_dim = int(2 * inner_dim / 3)
|
||||||
|
multiple_of = 256
|
||||||
|
ff_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of)
|
||||||
|
|
||||||
|
block_count = hparams["n_layer"]
|
||||||
|
|
||||||
|
gguf_writer.add_name("Refact")
|
||||||
|
# refact uses Alibi. So this is from config.json which might be used by training.
|
||||||
|
gguf_writer.add_context_length(hparams["n_positions"])
|
||||||
|
gguf_writer.add_embedding_length(hparams["n_embd"])
|
||||||
|
|
||||||
|
gguf_writer.add_feed_forward_length(ff_dim)
|
||||||
|
gguf_writer.add_block_count(block_count)
|
||||||
|
gguf_writer.add_head_count(hparams["n_head"])
|
||||||
|
gguf_writer.add_head_count_kv(1)
|
||||||
|
gguf_writer.add_layer_norm_rms_eps(hparams["layer_norm_epsilon"])
|
||||||
|
gguf_writer.add_file_type(ftype)
|
||||||
|
|
||||||
|
# TOKENIZATION
|
||||||
|
|
||||||
|
print("gguf: get tokenizer metadata")
|
||||||
|
|
||||||
|
tokens: list[bytearray] = []
|
||||||
|
scores: list[float] = []
|
||||||
|
toktypes: list[int] = []
|
||||||
|
|
||||||
|
tokenizer_json_file = dir_model / "tokenizer.json"
|
||||||
|
if not tokenizer_json_file.is_file():
|
||||||
|
print(f"Error: Missing {tokenizer_json_file}", file=sys.stderr)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
# gpt2 tokenizer
|
||||||
|
gguf_writer.add_tokenizer_model("gpt2")
|
||||||
|
|
||||||
|
with open(tokenizer_json_file, "r", encoding="utf-8") as f:
|
||||||
|
tokenizer_json = json.load(f)
|
||||||
|
|
||||||
|
print("gguf: get gpt2 tokenizer vocab")
|
||||||
|
|
||||||
|
# The number of tokens in tokenizer.json can differ from the expected vocab size.
|
||||||
|
# This causes downstream issues with mismatched tensor sizes when running the inference
|
||||||
|
vocab_size = (
|
||||||
|
hparams["vocab_size"]
|
||||||
|
if "vocab_size" in hparams
|
||||||
|
else len(tokenizer_json["model"]["vocab"])
|
||||||
|
)
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(dir_model, trust_remote_code=True)
|
||||||
|
|
||||||
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||||
|
byte_encoder = bytes_to_unicode()
|
||||||
|
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
||||||
|
|
||||||
|
for i in range(vocab_size):
|
||||||
|
if i in reverse_vocab:
|
||||||
|
text = reverse_vocab[i]
|
||||||
|
try:
|
||||||
|
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
|
||||||
|
except KeyError:
|
||||||
|
text = bytearray()
|
||||||
|
for c in reverse_vocab[i]:
|
||||||
|
if ord(c) < 256: # single byte character
|
||||||
|
text.append(byte_decoder[ord(c)])
|
||||||
|
else: # multibyte special token character
|
||||||
|
text.extend(c.encode("utf-8"))
|
||||||
|
else:
|
||||||
|
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
|
||||||
|
pad_token = f"[PAD{i}]".encode("utf8")
|
||||||
|
text = bytearray(pad_token)
|
||||||
|
|
||||||
|
tokens.append(text)
|
||||||
|
scores.append(0.0) # dymmy
|
||||||
|
toktypes.append(gguf.TokenType.NORMAL) # dummy
|
||||||
|
|
||||||
|
gguf_writer.add_token_list(tokens)
|
||||||
|
gguf_writer.add_token_scores(scores)
|
||||||
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges=True)
|
||||||
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
|
||||||
|
# TENSORS
|
||||||
|
|
||||||
|
tensor_map = gguf.get_tensor_name_map(ARCH, block_count)
|
||||||
|
|
||||||
|
# params for qkv transform
|
||||||
|
n_head = hparams["n_head"]
|
||||||
|
n_head_kv = 1
|
||||||
|
|
||||||
|
head_dim = hparams["n_embd"] // n_head
|
||||||
|
|
||||||
|
# tensor info
|
||||||
|
print("gguf: get tensor metadata")
|
||||||
|
|
||||||
|
if num_parts == 0:
|
||||||
|
part_names = iter(("pytorch_model.bin",))
|
||||||
|
else:
|
||||||
|
part_names = (
|
||||||
|
f"pytorch_model-{n:05}-of-{num_parts:05}.bin" for n in range(1, num_parts + 1)
|
||||||
|
)
|
||||||
|
for part_name in part_names:
|
||||||
|
if args.vocab_only:
|
||||||
|
break
|
||||||
|
print("gguf: loading model part '" + part_name + "'")
|
||||||
|
model_part = torch.load(dir_model / part_name, map_location="cpu")
|
||||||
|
|
||||||
|
for i in range(block_count):
|
||||||
|
if f"transformer.h.{i}.attn.kv.weight" in model_part:
|
||||||
|
data = model_part[f"transformer.h.{i}.attn.kv.weight"]
|
||||||
|
model_part[f"model.layers.{i}.self_attn.k_proj.weight"] = data[
|
||||||
|
: n_head_kv * head_dim
|
||||||
|
]
|
||||||
|
model_part[f"model.layers.{i}.self_attn.v_proj.weight"] = data[
|
||||||
|
n_head_kv * head_dim :
|
||||||
|
]
|
||||||
|
del model_part[f"transformer.h.{i}.attn.kv.weight"]
|
||||||
|
if f"transformer.h.{i}.attn.q.weight" in model_part:
|
||||||
|
model_part[f"model.layers.{i}.self_attn.q_proj.weight"] = model_part[
|
||||||
|
f"transformer.h.{i}.attn.q.weight"
|
||||||
|
]
|
||||||
|
del model_part[f"transformer.h.{i}.attn.q.weight"]
|
||||||
|
if f"transformer.h.{i}.mlp.gate_up_proj.weight" in model_part:
|
||||||
|
data = model_part[f"transformer.h.{i}.mlp.gate_up_proj.weight"]
|
||||||
|
model_part[f"model.layers.{i}.mlp.gate_proj.weight"] = data[:ff_dim]
|
||||||
|
model_part[f"model.layers.{i}.mlp.up_proj.weight"] = data[ff_dim:]
|
||||||
|
del model_part[f"transformer.h.{i}.mlp.gate_up_proj.weight"]
|
||||||
|
|
||||||
|
for name in model_part.keys():
|
||||||
|
data = model_part[name]
|
||||||
|
|
||||||
|
old_dtype = data.dtype
|
||||||
|
|
||||||
|
# convert any unsupported data types to float32
|
||||||
|
if data.dtype != torch.float16 and data.dtype != torch.float32:
|
||||||
|
data = data.to(torch.float32)
|
||||||
|
|
||||||
|
data = data.squeeze().numpy()
|
||||||
|
|
||||||
|
# map tensor names
|
||||||
|
new_name = tensor_map.get_name(name, try_suffixes=(".weight",))
|
||||||
|
if new_name is None:
|
||||||
|
print("Can not map tensor '" + name + "'")
|
||||||
|
sys.exit()
|
||||||
|
|
||||||
|
n_dims = len(data.shape)
|
||||||
|
data_dtype = data.dtype
|
||||||
|
|
||||||
|
# if f32 desired, convert any float16 to float32
|
||||||
|
if ftype == 0 and data_dtype == np.float16:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# TODO: Why cant we use these float16 as-is? There should be not reason to store float16 as float32
|
||||||
|
if ftype == 1 and data_dtype == np.float16 and n_dims == 1:
|
||||||
|
data = data.astype(np.float32)
|
||||||
|
|
||||||
|
# if f16 desired, convert any float32 2-dim weight tensors to float16
|
||||||
|
if (
|
||||||
|
ftype == 1
|
||||||
|
and data_dtype == np.float32
|
||||||
|
and name.endswith(".weight")
|
||||||
|
and n_dims == 2
|
||||||
|
):
|
||||||
|
data = data.astype(np.float16)
|
||||||
|
|
||||||
|
print(
|
||||||
|
new_name
|
||||||
|
+ ", n_dims = "
|
||||||
|
+ str(n_dims)
|
||||||
|
+ ", "
|
||||||
|
+ str(old_dtype)
|
||||||
|
+ " --> "
|
||||||
|
+ str(data.dtype)
|
||||||
|
)
|
||||||
|
|
||||||
|
gguf_writer.add_tensor(new_name, data)
|
||||||
|
|
||||||
|
|
||||||
|
print("gguf: write header")
|
||||||
|
gguf_writer.write_header_to_file()
|
||||||
|
print("gguf: write metadata")
|
||||||
|
gguf_writer.write_kv_data_to_file()
|
||||||
|
if not args.vocab_only:
|
||||||
|
print("gguf: write tensors")
|
||||||
|
gguf_writer.write_tensors_to_file()
|
||||||
|
|
||||||
|
gguf_writer.close()
|
||||||
|
|
||||||
|
print(f"gguf: model successfully exported to '{fname_out}'")
|
||||||
|
print("")
|
@ -20,28 +20,6 @@ if 'NO_LOCAL_GGUF' not in os.environ:
|
|||||||
import gguf
|
import gguf
|
||||||
|
|
||||||
|
|
||||||
def bytes_to_unicode():
|
|
||||||
# ref: https://github.com/openai/gpt-2/blob/master/src/encoder.py
|
|
||||||
"""
|
|
||||||
Returns list of utf-8 byte and a corresponding list of unicode strings.
|
|
||||||
The reversible bpe codes work on unicode strings.
|
|
||||||
This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
|
|
||||||
When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
|
|
||||||
This is a significant percentage of your normal, say, 32K bpe vocab.
|
|
||||||
To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
|
|
||||||
And avoids mapping to whitespace/control characters the bpe code barfs on.
|
|
||||||
"""
|
|
||||||
bs = list(range(ord("!"), ord("~")+1))+list(range(ord("¡"), ord("¬")+1))+list(range(ord("®"), ord("ÿ")+1))
|
|
||||||
cs = bs[:]
|
|
||||||
n = 0
|
|
||||||
for b in range(2**8):
|
|
||||||
if b not in bs:
|
|
||||||
bs.append(b)
|
|
||||||
cs.append(2**8+n)
|
|
||||||
n += 1
|
|
||||||
return dict(zip(bs, (chr(n) for n in cs)))
|
|
||||||
|
|
||||||
|
|
||||||
def count_model_parts(dir_model: Path) -> int:
|
def count_model_parts(dir_model: Path) -> int:
|
||||||
num_parts = 0
|
num_parts = 0
|
||||||
for filename in os.listdir(dir_model):
|
for filename in os.listdir(dir_model):
|
||||||
@ -117,50 +95,32 @@ gguf_writer.add_file_type(ftype)
|
|||||||
print("gguf: get tokenizer metadata")
|
print("gguf: get tokenizer metadata")
|
||||||
|
|
||||||
tokens: list[bytearray] = []
|
tokens: list[bytearray] = []
|
||||||
|
scores: list[float] = []
|
||||||
tokenizer_json_file = dir_model / 'tokenizer.json'
|
toktypes: list[int] = []
|
||||||
if not tokenizer_json_file.is_file():
|
|
||||||
print(f'Error: Missing {tokenizer_json_file}', file = sys.stderr)
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
# gpt2 tokenizer
|
# gpt2 tokenizer
|
||||||
gguf_writer.add_tokenizer_model("gpt2")
|
gguf_writer.add_tokenizer_model("gpt2")
|
||||||
|
|
||||||
with open(tokenizer_json_file, "r", encoding="utf-8") as f:
|
|
||||||
tokenizer_json = json.load(f)
|
|
||||||
|
|
||||||
print("gguf: get gpt2 tokenizer vocab")
|
print("gguf: get gpt2 tokenizer vocab")
|
||||||
|
|
||||||
# The number of tokens in tokenizer.json can differ from the expected vocab size.
|
|
||||||
# This causes downstream issues with mismatched tensor sizes when running the inference
|
|
||||||
vocab_size = hparams["vocab_size"] if "vocab_size" in hparams else len(tokenizer_json["model"]["vocab"])
|
|
||||||
|
|
||||||
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
# ref: https://github.com/cmp-nct/ggllm.cpp/blob/master/falcon_convert.py
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
|
|
||||||
|
# The number of tokens in tokenizer.json can differ from the expected vocab size.
|
||||||
|
# This causes downstream issues with mismatched tensor sizes when running the inference
|
||||||
|
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
||||||
|
assert max(tokenizer.vocab.values()) < vocab_size
|
||||||
|
|
||||||
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.vocab.items()}
|
||||||
byte_encoder = bytes_to_unicode()
|
|
||||||
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
|
||||||
|
|
||||||
for i in range(vocab_size):
|
for i in range(vocab_size):
|
||||||
if i in reverse_vocab:
|
tokens.append(reverse_vocab[i] if i in reverse_vocab else f"[PAD{i}]")
|
||||||
try:
|
scores.append(0.0) # dummy
|
||||||
text = bytearray([byte_decoder[c] for c in reverse_vocab[i]])
|
toktypes.append(gguf.TokenType.NORMAL)
|
||||||
except KeyError:
|
|
||||||
text = bytearray()
|
|
||||||
for c in reverse_vocab[i]:
|
|
||||||
if ord(c) < 256: # single byte character
|
|
||||||
text.append(byte_decoder[ord(c)])
|
|
||||||
else: # multibyte special token character
|
|
||||||
text.extend(c.encode('utf-8'))
|
|
||||||
else:
|
|
||||||
print(f"Key {i} not in tokenizer vocabulary. Padding with an arbitrary token.")
|
|
||||||
pad_token = f"[PAD{i}]".encode("utf8")
|
|
||||||
text = bytearray(pad_token)
|
|
||||||
|
|
||||||
tokens.append(text)
|
|
||||||
|
|
||||||
gguf_writer.add_token_list(tokens)
|
gguf_writer.add_token_list(tokens)
|
||||||
|
gguf_writer.add_token_scores(scores)
|
||||||
|
gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
|
||||||
special_vocab.add_to_gguf(gguf_writer)
|
special_vocab.add_to_gguf(gguf_writer)
|
||||||
|
27
convert.py
27
convert.py
@ -42,7 +42,6 @@ if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'):
|
|||||||
NDArray: TypeAlias = 'np.ndarray[Any, Any]'
|
NDArray: TypeAlias = 'np.ndarray[Any, Any]'
|
||||||
|
|
||||||
ARCH = gguf.MODEL_ARCH.LLAMA
|
ARCH = gguf.MODEL_ARCH.LLAMA
|
||||||
NAMES=gguf.MODEL_TENSOR_NAMES[ARCH]
|
|
||||||
|
|
||||||
DEFAULT_CONCURRENCY = 8
|
DEFAULT_CONCURRENCY = 8
|
||||||
#
|
#
|
||||||
@ -339,29 +338,15 @@ class BpeVocab:
|
|||||||
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def bpe_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
tokenizer = self.bpe_tokenizer
|
tokenizer = self.bpe_tokenizer
|
||||||
from transformers.models.gpt2 import tokenization_gpt2 # type: ignore[import]
|
from transformers.models.gpt2 import tokenization_gpt2 # type: ignore[import]
|
||||||
byte_encoder = tokenization_gpt2.bytes_to_unicode()
|
reverse_vocab = {id: encoded_tok for encoded_tok, id in tokenizer.items()}
|
||||||
byte_decoder = {v: k for k, v in byte_encoder.items()}
|
|
||||||
score = 0.0
|
for i, _ in enumerate(tokenizer):
|
||||||
for i, item in enumerate(tokenizer):
|
yield reverse_vocab[i], 0.0, gguf.TokenType.NORMAL
|
||||||
text: bytes = item.encode("utf-8")
|
|
||||||
# FIXME: These shouldn't be hardcoded, but it's probably better than the current behavior?
|
|
||||||
if i <= 258 and text.startswith(b'<') and text.endswith(b'>'):
|
|
||||||
if i == 0 and text == b'<unk>':
|
|
||||||
toktype = gguf.TokenType.UNKNOWN
|
|
||||||
elif i == 1 or i == 2:
|
|
||||||
toktype = gguf.TokenType.CONTROL
|
|
||||||
elif i >= 3 and text.startswith(b'<0x'):
|
|
||||||
toktype = gguf.TokenType.BYTE
|
|
||||||
else:
|
|
||||||
toktype = gguf.TokenType.NORMAL
|
|
||||||
else:
|
|
||||||
toktype = gguf.TokenType.NORMAL
|
|
||||||
yield text, score, toktype
|
|
||||||
|
|
||||||
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def added_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
for text in self.added_tokens_list:
|
for text in self.added_tokens_list:
|
||||||
score = -1000.0
|
score = -1000.0
|
||||||
yield text.encode("utf-8"), score, gguf.TokenType.USER_DEFINED
|
yield text.encode("utf-8"), score, gguf.TokenType.CONTROL
|
||||||
|
|
||||||
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
def all_tokens(self) -> Iterable[tuple[bytes, float, gguf.TokenType]]:
|
||||||
yield from self.bpe_tokens()
|
yield from self.bpe_tokens()
|
||||||
@ -953,7 +938,7 @@ class OutputFile:
|
|||||||
of.close()
|
of.close()
|
||||||
|
|
||||||
def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
|
def pick_output_type(model: LazyModel, output_type_str: str | None) -> GGMLFileType:
|
||||||
wq_type = model[NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
|
wq_type = model[gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ATTN_Q].format(bid=0)+".weight"].data_type
|
||||||
|
|
||||||
if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
|
if output_type_str == "f32" or (output_type_str is None and wq_type == DT_F32):
|
||||||
return GGMLFileType.AllF32
|
return GGMLFileType.AllF32
|
||||||
|
@ -9,7 +9,7 @@ if [[ -z "${PROMPT_CACHE_FILE+x}" || -z "${CHAT_SAVE_DIR+x}" ]]; then
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
MODEL="${MODEL:-./models/13B/ggml-model-q4_0.bin}"
|
MODEL="${MODEL:-./models/llama-13b/ggml-model-q4_0.gguf}"
|
||||||
PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}"
|
PROMPT_TEMPLATE="${PROMPT_TEMPLATE:-./prompts/chat.txt}"
|
||||||
USER_NAME="${USER_NAME:-User}"
|
USER_NAME="${USER_NAME:-User}"
|
||||||
AI_NAME="${AI_NAME:-ChatLLaMa}"
|
AI_NAME="${AI_NAME:-ChatLLaMa}"
|
||||||
@ -61,9 +61,9 @@ fi
|
|||||||
|
|
||||||
if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then
|
if [[ ! -e "$PROMPT_CACHE_FILE" ]]; then
|
||||||
echo 'Prompt cache does not exist, building...'
|
echo 'Prompt cache does not exist, building...'
|
||||||
# Default batch_size to 8 here for better user feedback during initial prompt processing
|
# Default batch_size to 64 here for better user feedback during initial prompt processing
|
||||||
./main 2>>"$LOG" \
|
./main 2>>"$LOG" \
|
||||||
--batch_size 8 \
|
--batch_size 64 \
|
||||||
"${OPTS[@]}" \
|
"${OPTS[@]}" \
|
||||||
--prompt-cache "$PROMPT_CACHE_FILE" \
|
--prompt-cache "$PROMPT_CACHE_FILE" \
|
||||||
--file "$CUR_PROMPT_FILE" \
|
--file "$CUR_PROMPT_FILE" \
|
||||||
|
@ -61,7 +61,7 @@ For example to apply 40% of the 'shakespeare' LORA adapter, 80% of the 'bible' L
|
|||||||
--lora lora-open-llama-3b-v2-q8_0-yet-another-one-LATEST.bin
|
--lora lora-open-llama-3b-v2-q8_0-yet-another-one-LATEST.bin
|
||||||
```
|
```
|
||||||
|
|
||||||
The scale numbers don't need to add up to one, and you can also use numbers creater than 1 to further increase the influence of an adapter. But making the values to big will sometimes result in worse output. Play around to find good values.
|
The scale numbers don't need to add up to one, and you can also use numbers greater than 1 to further increase the influence of an adapter. But making the values to big will sometimes result in worse output. Play around to find good values.
|
||||||
|
|
||||||
Gradient checkpointing reduces the memory requirements by ~50% but increases the runtime.
|
Gradient checkpointing reduces the memory requirements by ~50% but increases the runtime.
|
||||||
If you have enough RAM, you can make finetuning a bit faster by disabling checkpointing with `--no-checkpointing`.
|
If you have enough RAM, you can make finetuning a bit faster by disabling checkpointing with `--no-checkpointing`.
|
||||||
|
@ -313,7 +313,7 @@ class ModelParams:
|
|||||||
gguf_writer.add_feed_forward_length(self.get_n_ff())
|
gguf_writer.add_feed_forward_length(self.get_n_ff())
|
||||||
|
|
||||||
def tensor_name(key, bid=None, suffix=".weight"):
|
def tensor_name(key, bid=None, suffix=".weight"):
|
||||||
return gguf.MODEL_TENSOR_NAMES[gguf.MODEL_ARCH.LLAMA][key].format(bid=bid) + suffix
|
return gguf.TENSOR_NAMES[key].format(bid=bid) + suffix
|
||||||
|
|
||||||
class Layer:
|
class Layer:
|
||||||
def __init__(self, params, lora_params, bid):
|
def __init__(self, params, lora_params, bid):
|
||||||
|
@ -332,8 +332,8 @@ static void init_model(struct llama_model * input, struct my_llama_model * model
|
|||||||
|
|
||||||
assert_shape_1d(layer.attention_norm, hparams.n_embd);
|
assert_shape_1d(layer.attention_norm, hparams.n_embd);
|
||||||
assert_shape_2d(layer.wq, hparams.n_embd, hparams.n_embd);
|
assert_shape_2d(layer.wq, hparams.n_embd, hparams.n_embd);
|
||||||
assert_shape_2d(layer.wk, hparams.n_embd, hparams.n_embd);
|
assert_shape_2d(layer.wk, hparams.n_embd, hparams.n_embd_gqa());
|
||||||
assert_shape_2d(layer.wv, hparams.n_embd, hparams.n_embd);
|
assert_shape_2d(layer.wv, hparams.n_embd, hparams.n_embd_gqa());
|
||||||
assert_shape_2d(layer.wo, hparams.n_embd, hparams.n_embd);
|
assert_shape_2d(layer.wo, hparams.n_embd, hparams.n_embd);
|
||||||
assert_shape_1d(layer.ffn_norm, hparams.n_embd);
|
assert_shape_1d(layer.ffn_norm, hparams.n_embd);
|
||||||
assert_shape_2d(layer.w1, hparams.n_embd, hparams.n_ff);
|
assert_shape_2d(layer.w1, hparams.n_embd, hparams.n_ff);
|
||||||
|
@ -28,6 +28,16 @@ configure_file(${_common_path}/../build-info.h
|
|||||||
target_include_directories(common PUBLIC ${LLAMA_INCLUDE_DIR}
|
target_include_directories(common PUBLIC ${LLAMA_INCLUDE_DIR}
|
||||||
${CMAKE_CURRENT_BINARY_DIR})
|
${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|
||||||
|
# If the common project was part of "main-cmake-pkg" the transient
|
||||||
|
# defines would automatically be attached. Because the common func-
|
||||||
|
# tionality is separate, but dependent upon the defines, it must be
|
||||||
|
# explicitly extracted from the "llama" target.
|
||||||
|
#
|
||||||
|
get_target_property(_llama_transient_defines llama
|
||||||
|
INTERFACE_COMPILE_DEFINITIONS)
|
||||||
|
|
||||||
|
target_compile_definitions(common PRIVATE "${_llama_transient_defines}")
|
||||||
|
|
||||||
add_executable(${TARGET} ${CMAKE_CURRENT_LIST_DIR}/../main/main.cpp)
|
add_executable(${TARGET} ${CMAKE_CURRENT_LIST_DIR}/../main/main.cpp)
|
||||||
target_include_directories(${TARGET} PRIVATE ${_common_path})
|
target_include_directories(${TARGET} PRIVATE ${_common_path})
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
|
@ -543,6 +543,9 @@ int main(int argc, char ** argv) {
|
|||||||
if (i > 0) {
|
if (i > 0) {
|
||||||
embd.erase(embd.begin(), embd.begin() + i);
|
embd.erase(embd.begin(), embd.begin() + i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remove any "future" tokens that we might have inherited from the session from the KV cache
|
||||||
|
llama_kv_cache_tokens_rm(ctx, n_past, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// evaluate tokens in batches
|
// evaluate tokens in batches
|
||||||
@ -667,7 +670,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
// reset color to default if we there is no pending user input
|
// reset color to default if there is no pending user input
|
||||||
if (input_echo && (int) embd_inp.size() == n_consumed) {
|
if (input_echo && (int) embd_inp.size() == n_consumed) {
|
||||||
console::set_display(console::reset);
|
console::set_display(console::reset);
|
||||||
}
|
}
|
||||||
@ -694,10 +697,8 @@ int main(int argc, char ** argv) {
|
|||||||
if (last_output.find(antiprompt, search_start_pos) != std::string::npos) {
|
if (last_output.find(antiprompt, search_start_pos) != std::string::npos) {
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
console::set_display(console::user_input);
|
|
||||||
}
|
}
|
||||||
is_antiprompt = true;
|
is_antiprompt = true;
|
||||||
fflush(stdout);
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -721,8 +722,6 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
printf("\n");
|
printf("\n");
|
||||||
console::set_display(console::user_input);
|
|
||||||
fflush(stdout);
|
|
||||||
} else if (params.instruct) {
|
} else if (params.instruct) {
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
}
|
}
|
||||||
@ -747,6 +746,9 @@ int main(int argc, char ** argv) {
|
|||||||
printf("%s", buffer.c_str());
|
printf("%s", buffer.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// color user input only
|
||||||
|
console::set_display(console::user_input);
|
||||||
|
|
||||||
std::string line;
|
std::string line;
|
||||||
bool another_line = true;
|
bool another_line = true;
|
||||||
do {
|
do {
|
||||||
|
@ -10,6 +10,7 @@
|
|||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <ctime>
|
||||||
|
|
||||||
// trim whitespace from the beginning and end of a string
|
// trim whitespace from the beginning and end of a string
|
||||||
static std::string trim(const std::string & str) {
|
static std::string trim(const std::string & str) {
|
||||||
@ -70,6 +71,26 @@ struct client {
|
|||||||
std::vector<llama_token> tokens_prev;
|
std::vector<llama_token> tokens_prev;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void print_date_time() {
|
||||||
|
std::time_t current_time = std::time(nullptr);
|
||||||
|
std::tm* local_time = std::localtime(¤t_time);
|
||||||
|
char buffer[80];
|
||||||
|
strftime(buffer, sizeof(buffer), "%Y-%m-%d %H:%M:%S", local_time);
|
||||||
|
|
||||||
|
printf("\n\033[35mrun parameters as at %s\033[0m\n", buffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Define a split string function to ...
|
||||||
|
static std::vector<std::string> split_string(const std::string& input, char delimiter) {
|
||||||
|
std::vector<std::string> tokens;
|
||||||
|
std::istringstream stream(input);
|
||||||
|
std::string token;
|
||||||
|
while (std::getline(stream, token, delimiter)) {
|
||||||
|
tokens.push_back(token);
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
srand(1234);
|
srand(1234);
|
||||||
|
|
||||||
@ -104,6 +125,23 @@ int main(int argc, char ** argv) {
|
|||||||
params.logits_all = true;
|
params.logits_all = true;
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
|
|
||||||
|
// load the prompts from an external file if there are any
|
||||||
|
if (params.prompt.empty()) {
|
||||||
|
printf("\n\033[32mNo new questions so proceed with build-in defaults.\033[0m\n");
|
||||||
|
} else {
|
||||||
|
// Output each line of the input params.prompts vector and copy to k_prompts
|
||||||
|
int index = 0;
|
||||||
|
printf("\n\033[32mNow printing the external prompt file %s\033[0m\n\n", params.prompt_file.c_str());
|
||||||
|
|
||||||
|
std::vector<std::string> prompts = split_string(params.prompt, '\n');
|
||||||
|
for (const auto& prompt : prompts) {
|
||||||
|
k_prompts.resize(index + 1);
|
||||||
|
k_prompts[index] = prompt;
|
||||||
|
index++;
|
||||||
|
printf("%3d prompt: %s\n", index, prompt.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fprintf(stderr, "\n\n");
|
fprintf(stderr, "\n\n");
|
||||||
fflush(stderr);
|
fflush(stderr);
|
||||||
|
|
||||||
@ -233,7 +271,7 @@ int main(int argc, char ** argv) {
|
|||||||
client.n_decoded = 0;
|
client.n_decoded = 0;
|
||||||
client.i_batch = batch.n_tokens - 1;
|
client.i_batch = batch.n_tokens - 1;
|
||||||
|
|
||||||
LOG_TEE("\033[1mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
|
LOG_TEE("\033[31mClient %3d, seq %4d, started decoding ...\033[0m\n", client.id, client.seq_id);
|
||||||
|
|
||||||
g_seq_id += 1;
|
g_seq_id += 1;
|
||||||
|
|
||||||
@ -332,12 +370,12 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// delete only the generated part of the sequence, i.e. keep the system prompt in the cache
|
// delete only the generated part of the sequence, i.e. keep the system prompt in the cache
|
||||||
llama_kv_cache_seq_rm(ctx, client.id, n_tokens_system, n_ctx);
|
llama_kv_cache_seq_rm(ctx, client.id, n_tokens_system, -1);
|
||||||
|
|
||||||
const auto t_main_end = ggml_time_us();
|
const auto t_main_end = ggml_time_us();
|
||||||
|
|
||||||
LOG_TEE("\033[1mClient %3d, seq %4d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \n\nInput: %s\nResponse: %s\n\n",
|
LOG_TEE("\033[31mClient %3d, seq %3d/%3d, prompt %4d t, response %4d t, time %5.2f s, speed %5.2f t/s, cache miss %d \033[0m \nInput: %s\n\033[35mResponse: %s\033[0m\n\n",
|
||||||
client.id, client.seq_id, client.n_prompt, client.n_decoded,
|
client.id, client.seq_id, n_seq, client.n_prompt, client.n_decoded,
|
||||||
(t_main_end - client.t_start_prompt) / 1e6,
|
(t_main_end - client.t_start_prompt) / 1e6,
|
||||||
(double) (client.n_prompt + client.n_decoded) / (t_main_end - client.t_start_prompt) * 1e6,
|
(double) (client.n_prompt + client.n_decoded) / (t_main_end - client.t_start_prompt) * 1e6,
|
||||||
n_cache_miss,
|
n_cache_miss,
|
||||||
@ -357,13 +395,21 @@ int main(int argc, char ** argv) {
|
|||||||
|
|
||||||
const auto t_main_end = ggml_time_us();
|
const auto t_main_end = ggml_time_us();
|
||||||
|
|
||||||
LOG_TEE("\n\n");
|
print_date_time();
|
||||||
|
|
||||||
|
LOG_TEE("\n%s: n_parallel = %d, n_sequences = %d, cont_batching = %d, system tokens = %d\n", __func__, n_clients, n_seq, cont_batching, n_tokens_system);
|
||||||
|
if (params.prompt_file.empty()) {
|
||||||
|
params.prompt_file = "used built-in defaults";
|
||||||
|
}
|
||||||
|
LOG_TEE("External prompt file: \033[32m%s\033[0m\n", params.prompt_file.c_str());
|
||||||
|
LOG_TEE("Model and path used: \033[32m%s\033[0m\n\n", params.model.c_str());
|
||||||
|
|
||||||
LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6);
|
LOG_TEE("Total prompt tokens: %6d, speed: %5.2f t/s\n", n_total_prompt, (double) (n_total_prompt ) / (t_main_end - t_main_start) * 1e6);
|
||||||
LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6);
|
LOG_TEE("Total gen tokens: %6d, speed: %5.2f t/s\n", n_total_gen, (double) (n_total_gen ) / (t_main_end - t_main_start) * 1e6);
|
||||||
LOG_TEE("Total speed (AVG): %6s speed: %5.2f t/s\n", "", (double) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6);
|
LOG_TEE("Total speed (AVG): %6s speed: %5.2f t/s\n", "", (double) (n_total_prompt + n_total_gen) / (t_main_end - t_main_start) * 1e6);
|
||||||
LOG_TEE("Cache misses: %6d\n", n_cache_miss);
|
LOG_TEE("Cache misses: %6d\n", n_cache_miss);
|
||||||
|
|
||||||
LOG_TEE("\n\n");
|
LOG_TEE("\n");
|
||||||
|
|
||||||
llama_print_timings(ctx);
|
llama_print_timings(ctx);
|
||||||
|
|
||||||
|
@ -448,7 +448,7 @@ struct llama_server_context
|
|||||||
n_past = common_part(embd, prompt_tokens);
|
n_past = common_part(embd, prompt_tokens);
|
||||||
|
|
||||||
// since #3228 we now have to manually manage the KV cache
|
// since #3228 we now have to manually manage the KV cache
|
||||||
llama_kv_cache_seq_rm(ctx, 0, n_past, params.n_ctx);
|
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
|
||||||
|
|
||||||
embd = prompt_tokens;
|
embd = prompt_tokens;
|
||||||
if (n_past == num_prompt_tokens)
|
if (n_past == num_prompt_tokens)
|
||||||
@ -504,9 +504,11 @@ struct llama_server_context
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool tg = true;
|
||||||
while (n_past < embd.size())
|
while (n_past < embd.size())
|
||||||
{
|
{
|
||||||
int n_eval = (int)embd.size() - n_past;
|
int n_eval = (int)embd.size() - n_past;
|
||||||
|
tg = n_eval == 1;
|
||||||
if (n_eval > params.n_batch)
|
if (n_eval > params.n_batch)
|
||||||
{
|
{
|
||||||
n_eval = params.n_batch;
|
n_eval = params.n_batch;
|
||||||
@ -532,99 +534,21 @@ struct llama_server_context
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
// out of user input, sample next token
|
// out of user input, sample next token
|
||||||
const float temp = params.temp;
|
|
||||||
const int32_t top_k = params.top_k <= 0 ? llama_n_vocab(model) : params.top_k;
|
|
||||||
const float top_p = params.top_p;
|
|
||||||
const float tfs_z = params.tfs_z;
|
|
||||||
const float typical_p = params.typical_p;
|
|
||||||
const int32_t repeat_last_n = params.repeat_last_n < 0 ? n_ctx : params.repeat_last_n;
|
|
||||||
const float repeat_penalty = params.repeat_penalty;
|
|
||||||
const float alpha_presence = params.presence_penalty;
|
|
||||||
const float alpha_frequency = params.frequency_penalty;
|
|
||||||
const int mirostat = params.mirostat;
|
|
||||||
const float mirostat_tau = params.mirostat_tau;
|
|
||||||
const float mirostat_eta = params.mirostat_eta;
|
|
||||||
const bool penalize_nl = params.penalize_nl;
|
|
||||||
const int32_t n_probs = params.n_probs;
|
|
||||||
|
|
||||||
{
|
|
||||||
auto *logits = llama_get_logits(ctx);
|
|
||||||
auto n_vocab = llama_n_vocab(model);
|
|
||||||
|
|
||||||
// Apply params.logit_bias map
|
|
||||||
for (const auto &it : params.logit_bias)
|
|
||||||
{
|
|
||||||
logits[it.first] += it.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<llama_token_data> candidates;
|
std::vector<llama_token_data> candidates;
|
||||||
candidates.reserve(n_vocab);
|
candidates.reserve(llama_n_vocab(model));
|
||||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++)
|
|
||||||
{
|
result.tok = llama_sample_token(ctx, NULL, grammar, params, last_n_tokens, candidates);
|
||||||
candidates.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
llama_token_data_array candidates_p = { candidates.data(), candidates.size(), false };
|
||||||
|
|
||||||
// Apply penalties
|
const int32_t n_probs = params.n_probs;
|
||||||
float nl_logit = logits[llama_token_nl(ctx)];
|
if (params.temp <= 0 && n_probs > 0)
|
||||||
auto last_n_repeat = std::min(std::min((int)last_n_tokens.size(), repeat_last_n), n_ctx);
|
|
||||||
llama_sample_repetition_penalty(ctx, &candidates_p,
|
|
||||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
|
||||||
last_n_repeat, repeat_penalty);
|
|
||||||
llama_sample_frequency_and_presence_penalties(ctx, &candidates_p,
|
|
||||||
last_n_tokens.data() + last_n_tokens.size() - last_n_repeat,
|
|
||||||
last_n_repeat, alpha_frequency, alpha_presence);
|
|
||||||
if (!penalize_nl)
|
|
||||||
{
|
|
||||||
logits[llama_token_nl(ctx)] = nl_logit;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (grammar != nullptr) {
|
|
||||||
llama_sample_grammar(ctx, &candidates_p, grammar);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (temp <= 0)
|
|
||||||
{
|
|
||||||
// Greedy sampling
|
|
||||||
result.tok = llama_sample_token_greedy(ctx, &candidates_p);
|
|
||||||
if (n_probs > 0)
|
|
||||||
{
|
{
|
||||||
|
// For llama_sample_token_greedy we need to sort candidates
|
||||||
llama_sample_softmax(ctx, &candidates_p);
|
llama_sample_softmax(ctx, &candidates_p);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (mirostat == 1)
|
|
||||||
{
|
|
||||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
|
||||||
const int mirostat_m = 100;
|
|
||||||
llama_sample_temp(ctx, &candidates_p, temp);
|
|
||||||
result.tok = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
|
||||||
}
|
|
||||||
else if (mirostat == 2)
|
|
||||||
{
|
|
||||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
|
||||||
llama_sample_temp(ctx, &candidates_p, temp);
|
|
||||||
result.tok = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Temperature sampling
|
|
||||||
size_t min_keep = std::max(1, n_probs);
|
|
||||||
llama_sample_top_k(ctx, &candidates_p, top_k, min_keep);
|
|
||||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z, min_keep);
|
|
||||||
llama_sample_typical(ctx, &candidates_p, typical_p, min_keep);
|
|
||||||
llama_sample_top_p(ctx, &candidates_p, top_p, min_keep);
|
|
||||||
llama_sample_temp(ctx, &candidates_p, temp);
|
|
||||||
result.tok = llama_sample_token(ctx, &candidates_p);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (grammar != nullptr) {
|
|
||||||
llama_grammar_accept_token(ctx, grammar, result.tok);
|
|
||||||
}
|
|
||||||
|
|
||||||
for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i)
|
for (size_t i = 0; i < std::min(candidates_p.size, (size_t)n_probs); ++i)
|
||||||
{
|
{
|
||||||
@ -633,8 +557,10 @@ struct llama_server_context
|
|||||||
|
|
||||||
last_n_tokens.erase(last_n_tokens.begin());
|
last_n_tokens.erase(last_n_tokens.begin());
|
||||||
last_n_tokens.push_back(result.tok);
|
last_n_tokens.push_back(result.tok);
|
||||||
|
if (tg) {
|
||||||
num_tokens_predicted++;
|
num_tokens_predicted++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// add it to the context
|
// add it to the context
|
||||||
embd.push_back(result.tok);
|
embd.push_back(result.tok);
|
||||||
@ -1011,7 +937,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.push_back({argv[i], 1.0f});
|
params.lora_adapter.push_back(std::make_tuple(argv[i], 1.0f));
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
else if (arg == "--lora-scaled")
|
else if (arg == "--lora-scaled")
|
||||||
@ -1027,7 +953,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
|
|||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
params.lora_adapter.push_back({lora_adapter, std::stof(argv[i])});
|
params.lora_adapter.push_back(std::make_tuple(lora_adapter, std::stof(argv[i])));
|
||||||
params.use_mmap = false;
|
params.use_mmap = false;
|
||||||
}
|
}
|
||||||
else if (arg == "--lora-base")
|
else if (arg == "--lora-base")
|
||||||
@ -1124,8 +1050,6 @@ static json format_timings(llama_server_context &llama)
|
|||||||
{
|
{
|
||||||
const auto timings = llama_get_timings(llama.ctx);
|
const auto timings = llama_get_timings(llama.ctx);
|
||||||
|
|
||||||
assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
|
|
||||||
|
|
||||||
return json{
|
return json{
|
||||||
{"prompt_n", timings.n_p_eval},
|
{"prompt_n", timings.n_p_eval},
|
||||||
{"prompt_ms", timings.t_p_eval_ms},
|
{"prompt_ms", timings.t_p_eval_ms},
|
||||||
|
@ -172,7 +172,7 @@ int main(int argc, char ** argv) {
|
|||||||
LOG("out of drafted tokens\n");
|
LOG("out of drafted tokens\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_kv_cache_seq_rm(ctx_dft, 0, n_past_dft, n_ctx);
|
llama_kv_cache_seq_rm(ctx_dft, 0, n_past_dft, -1);
|
||||||
llama_decode(ctx_dft, llama_batch_get_one(&id, 1, n_past_dft, 0));
|
llama_decode(ctx_dft, llama_batch_get_one(&id, 1, n_past_dft, 0));
|
||||||
++n_past_dft;
|
++n_past_dft;
|
||||||
|
|
||||||
@ -257,7 +257,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// evaluate the drafted token on the draft model
|
// evaluate the drafted token on the draft model
|
||||||
llama_kv_cache_seq_rm(ctx_dft, 0, n_past_cur, n_ctx);
|
llama_kv_cache_seq_rm(ctx_dft, 0, n_past_cur, -1);
|
||||||
llama_decode(ctx_dft, llama_batch_get_one(&drafted.back(), 1, n_past_cur, 0));
|
llama_decode(ctx_dft, llama_batch_get_one(&drafted.back(), 1, n_past_cur, 0));
|
||||||
++n_past_cur;
|
++n_past_cur;
|
||||||
|
|
||||||
@ -267,7 +267,7 @@ int main(int argc, char ** argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// evaluate the target model on the drafted tokens
|
// evaluate the target model on the drafted tokens
|
||||||
llama_kv_cache_seq_rm(ctx_tgt, 0, n_past_tgt, n_ctx);
|
llama_kv_cache_seq_rm(ctx_tgt, 0, n_past_tgt, -1);
|
||||||
llama_decode(ctx_tgt, llama_batch_get_one(drafted.data(), drafted.size(), n_past_tgt, 0));
|
llama_decode(ctx_tgt, llama_batch_get_one(drafted.data(), drafted.size(), n_past_tgt, 0));
|
||||||
++n_past_tgt;
|
++n_past_tgt;
|
||||||
|
|
||||||
|
@ -364,7 +364,7 @@ class ModelParams:
|
|||||||
gguf_writer.add_feed_forward_length(self.get_n_ff())
|
gguf_writer.add_feed_forward_length(self.get_n_ff())
|
||||||
|
|
||||||
def tensor_name(key, bid=None):
|
def tensor_name(key, bid=None):
|
||||||
return gguf.MODEL_TENSOR_NAMES[gguf.MODEL_ARCH.LLAMA][key].format(bid=bid) + ".weight"
|
return gguf.TENSOR_NAMES[key].format(bid=bid) + ".weight"
|
||||||
|
|
||||||
class Layer:
|
class Layer:
|
||||||
def __init__(self, params, bid):
|
def __init__(self, params, bid):
|
||||||
|
@ -62,7 +62,7 @@
|
|||||||
mkdir -p $out/include
|
mkdir -p $out/include
|
||||||
cp ${src}/llama.h $out/include/
|
cp ${src}/llama.h $out/include/
|
||||||
'';
|
'';
|
||||||
cmakeFlags = [ "-DLLAMA_BUILD_SERVER=ON" "-DLLAMA_MPI=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
|
cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
|
||||||
in
|
in
|
||||||
{
|
{
|
||||||
packages.default = pkgs.stdenv.mkDerivation {
|
packages.default = pkgs.stdenv.mkDerivation {
|
||||||
|
@ -1213,12 +1213,9 @@ void ggml_metal_graph_compute(
|
|||||||
float max_bias;
|
float max_bias;
|
||||||
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
memcpy(&max_bias, (int32_t *) dst->op_params + 2, sizeof(float));
|
||||||
|
|
||||||
if (__builtin_popcount(n_head) != 1) {
|
|
||||||
GGML_ASSERT(false && "only power-of-two n_head implemented");
|
|
||||||
}
|
|
||||||
|
|
||||||
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
const int n_heads_log2_floor = 1 << (int) floor(log2(n_head));
|
||||||
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
const float m0 = powf(2.0f, -(max_bias) / n_heads_log2_floor);
|
||||||
|
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_heads_log2_floor);
|
||||||
|
|
||||||
[encoder setComputePipelineState:ctx->pipeline_alibi_f32];
|
[encoder setComputePipelineState:ctx->pipeline_alibi_f32];
|
||||||
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
[encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
|
||||||
@ -1240,6 +1237,8 @@ void ggml_metal_graph_compute(
|
|||||||
[encoder setBytes:&nb2 length:sizeof(uint64_t) atIndex:16];
|
[encoder setBytes:&nb2 length:sizeof(uint64_t) atIndex:16];
|
||||||
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:17];
|
[encoder setBytes:&nb3 length:sizeof(uint64_t) atIndex:17];
|
||||||
[encoder setBytes:&m0 length:sizeof( float) atIndex:18];
|
[encoder setBytes:&m0 length:sizeof( float) atIndex:18];
|
||||||
|
[encoder setBytes:&m1 length:sizeof( float) atIndex:19];
|
||||||
|
[encoder setBytes:&n_heads_log2_floor length:sizeof(int) atIndex:20];
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
|
@ -831,6 +831,8 @@ kernel void kernel_alibi_f32(
|
|||||||
constant uint64_t & nb2,
|
constant uint64_t & nb2,
|
||||||
constant uint64_t & nb3,
|
constant uint64_t & nb3,
|
||||||
constant float & m0,
|
constant float & m0,
|
||||||
|
constant float & m1,
|
||||||
|
constant int & n_heads_log2_floor,
|
||||||
uint3 tgpig[[threadgroup_position_in_grid]],
|
uint3 tgpig[[threadgroup_position_in_grid]],
|
||||||
uint3 tpitg[[thread_position_in_threadgroup]],
|
uint3 tpitg[[thread_position_in_threadgroup]],
|
||||||
uint3 ntg[[threads_per_threadgroup]]) {
|
uint3 ntg[[threads_per_threadgroup]]) {
|
||||||
@ -846,7 +848,12 @@ kernel void kernel_alibi_f32(
|
|||||||
const int64_t i0 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0 - i1*ne0);
|
const int64_t i0 = (n - i3*ne2*ne1*ne0 - i2*ne1*ne0 - i1*ne0);
|
||||||
|
|
||||||
device float * dst_data = (device float *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
device float * dst_data = (device float *) ((device char *) dst + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0);
|
||||||
float m_k = pow(m0, i2 + 1);
|
float m_k;
|
||||||
|
if (i2 < n_heads_log2_floor) {
|
||||||
|
m_k = pow(m0, i2 + 1);
|
||||||
|
} else {
|
||||||
|
m_k = pow(m1, 2 * (i2 - n_heads_log2_floor) + 1);
|
||||||
|
}
|
||||||
for (int64_t i00 = tpitg.x; i00 < ne00; i00 += ntg.x) {
|
for (int64_t i00 = tpitg.x; i00 < ne00; i00 += ntg.x) {
|
||||||
device const float * src = (device float *)((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
|
device const float * src = (device float *)((device char *) src0 + i03*nb03 + i02*nb02 + i01*nb01 + i00*nb00);
|
||||||
dst_data[i00] = src[0] + m_k * (i00 - ne00 + 1);
|
dst_data[i00] = src[0] + m_k * (i00 - ne00 + 1);
|
||||||
|
168
ggml-opencl.cpp
168
ggml-opencl.cpp
@ -202,14 +202,14 @@ inline void get_scale_min_k4(int j, const __global uint8_t *q, uint8_t *d, uint8
|
|||||||
|
|
||||||
__kernel void dequantize_block_q2_K(__global const struct block_q2_K *x, __global float *yy)
|
__kernel void dequantize_block_q2_K(__global const struct block_q2_K *x, __global float *yy)
|
||||||
{
|
{
|
||||||
const int i = get_group_id(0);
|
const int i = get_group_id(0) + get_global_offset(0);
|
||||||
const int tid = get_local_id(0);
|
const int tid = get_local_id(0);
|
||||||
const int n = tid / 32;
|
const int n = tid / 32;
|
||||||
const int l = tid - 32 * n;
|
const int l = tid - 32 * n;
|
||||||
const int is = 8 * n + l / 16;
|
const int is = 8 * n + l / 16;
|
||||||
|
|
||||||
const uint8_t q = x[i].qs[32 * n + l];
|
const uint8_t q = x[i].qs[32 * n + l];
|
||||||
__global float *y = yy + i * QK_K + 128 * n;
|
__global float *y = yy + get_group_id(0) * QK_K + 128 * n;
|
||||||
|
|
||||||
const float dall = vload_half(0, &x[i].d);
|
const float dall = vload_half(0, &x[i].d);
|
||||||
const float dmin = vload_half(0, &x[i].dmin);
|
const float dmin = vload_half(0, &x[i].dmin);
|
||||||
@ -223,7 +223,7 @@ __kernel void dequantize_block_q2_K(__global const struct block_q2_K *x, __globa
|
|||||||
__kernel void dequantize_block_q3_K(__global const struct block_q3_K *x, __global float *yy)
|
__kernel void dequantize_block_q3_K(__global const struct block_q3_K *x, __global float *yy)
|
||||||
{
|
{
|
||||||
int r = get_local_id(0) / 4;
|
int r = get_local_id(0) / 4;
|
||||||
int i = get_group_id(0);
|
int i = get_group_id(0) + get_global_offset(0);
|
||||||
int tid = r / 2;
|
int tid = r / 2;
|
||||||
int is0 = r % 2;
|
int is0 = r % 2;
|
||||||
int l0 = 16 * is0 + 4 * (get_local_id(0) % 4);
|
int l0 = 16 * is0 + 4 * (get_local_id(0) % 4);
|
||||||
@ -241,7 +241,7 @@ __kernel void dequantize_block_q3_K(__global const struct block_q3_K *x, __globa
|
|||||||
float d_all = vload_half(0, &x[i].d);
|
float d_all = vload_half(0, &x[i].d);
|
||||||
float dl = d_all * (us - 32);
|
float dl = d_all * (us - 32);
|
||||||
|
|
||||||
__global float *y = yy + i * QK_K + 128 * n + 32 * j;
|
__global float *y = yy + get_group_id(0) * QK_K + 128 * n + 32 * j;
|
||||||
const __global uint8_t *q = x[i].qs + 32 * n;
|
const __global uint8_t *q = x[i].qs + 32 * n;
|
||||||
const __global uint8_t *hm = x[i].hmask;
|
const __global uint8_t *hm = x[i].hmask;
|
||||||
|
|
||||||
@ -251,14 +251,14 @@ __kernel void dequantize_block_q3_K(__global const struct block_q3_K *x, __globa
|
|||||||
|
|
||||||
__kernel void dequantize_block_q4_K(__global const struct block_q4_K *x, __global float *yy)
|
__kernel void dequantize_block_q4_K(__global const struct block_q4_K *x, __global float *yy)
|
||||||
{
|
{
|
||||||
const int i = get_group_id(0);
|
const int i = get_group_id(0) + get_global_offset(0);
|
||||||
const int tid = get_local_id(0);
|
const int tid = get_local_id(0);
|
||||||
const int il = tid / 8;
|
const int il = tid / 8;
|
||||||
const int ir = tid % 8;
|
const int ir = tid % 8;
|
||||||
const int is = 2 * il;
|
const int is = 2 * il;
|
||||||
const int n = 4;
|
const int n = 4;
|
||||||
|
|
||||||
__global float *y = yy + i * QK_K + 64 * il + n * ir;
|
__global float *y = yy + get_group_id(0) * QK_K + 64 * il + n * ir;
|
||||||
|
|
||||||
const float dall = vload_half(0, &x[i].d);
|
const float dall = vload_half(0, &x[i].d);
|
||||||
const float dmin = vload_half(0, &x[i].dmin);
|
const float dmin = vload_half(0, &x[i].dmin);
|
||||||
@ -281,13 +281,13 @@ __kernel void dequantize_block_q4_K(__global const struct block_q4_K *x, __globa
|
|||||||
|
|
||||||
__kernel void dequantize_block_q5_K(__global const struct block_q5_K *x, __global float *yy)
|
__kernel void dequantize_block_q5_K(__global const struct block_q5_K *x, __global float *yy)
|
||||||
{
|
{
|
||||||
const int i = get_group_id(0);
|
const int i = get_group_id(0) + get_global_offset(0);
|
||||||
const int tid = get_local_id(0);
|
const int tid = get_local_id(0);
|
||||||
const int il = tid / 16;
|
const int il = tid / 16;
|
||||||
const int ir = tid % 16;
|
const int ir = tid % 16;
|
||||||
const int is = 2 * il;
|
const int is = 2 * il;
|
||||||
|
|
||||||
__global float *y = yy + i * QK_K + 64 * il + 2 * ir;
|
__global float *y = yy + get_group_id(0) * QK_K + 64 * il + 2 * ir;
|
||||||
|
|
||||||
const float dall = vload_half(0, &x[i].d);
|
const float dall = vload_half(0, &x[i].d);
|
||||||
const float dmin = vload_half(0, &x[i].dmin);
|
const float dmin = vload_half(0, &x[i].dmin);
|
||||||
@ -313,13 +313,13 @@ __kernel void dequantize_block_q5_K(__global const struct block_q5_K *x, __globa
|
|||||||
|
|
||||||
__kernel void dequantize_block_q6_K(__global const struct block_q6_K *x, __global float *yy)
|
__kernel void dequantize_block_q6_K(__global const struct block_q6_K *x, __global float *yy)
|
||||||
{
|
{
|
||||||
const int i = get_group_id(0);
|
const int i = get_group_id(0) + get_global_offset(0);
|
||||||
const int tid = get_local_id(0);
|
const int tid = get_local_id(0);
|
||||||
const int ip = tid / 32;
|
const int ip = tid / 32;
|
||||||
const int il = tid - 32 * ip;
|
const int il = tid - 32 * ip;
|
||||||
const int is = 8 * ip + il / 16;
|
const int is = 8 * ip + il / 16;
|
||||||
|
|
||||||
__global float *y = yy + i * QK_K + 128 * ip + il;
|
__global float *y = yy + get_group_id(0) * QK_K + 128 * ip + il;
|
||||||
|
|
||||||
const float d = vload_half(0, &x[i].d);
|
const float d = vload_half(0, &x[i].d);
|
||||||
|
|
||||||
@ -730,7 +730,7 @@ __kernel void KERNEL_NAME(__global X_TYPE* x, __global float* y) {
|
|||||||
const uint qk = QUANT_K;
|
const uint qk = QUANT_K;
|
||||||
const uint qr = QUANT_R;
|
const uint qr = QUANT_R;
|
||||||
|
|
||||||
const int ib = i/qk; // block index
|
const int ib = i/qk + get_global_offset(0); // block index
|
||||||
const int iqs = (i%qk)/qr; // quant index
|
const int iqs = (i%qk)/qr; // quant index
|
||||||
const int iybs = i - i%qk; // y block start index
|
const int iybs = i - i%qk; // y block start index
|
||||||
const int y_offset = qr == 1 ? 1 : qk/2;
|
const int y_offset = qr == 1 ? 1 : qk/2;
|
||||||
@ -1349,31 +1349,43 @@ static cl_int ggml_cl_h2d_tensor_2d(cl_command_queue queue, cl_mem dst, size_t o
|
|||||||
const enum ggml_type type = src->type;
|
const enum ggml_type type = src->type;
|
||||||
const size_t ts = ggml_type_size(type);
|
const size_t ts = ggml_type_size(type);
|
||||||
const size_t bs = ggml_blck_size(type);
|
const size_t bs = ggml_blck_size(type);
|
||||||
|
const uint64_t row_size = ts*ne0/bs;
|
||||||
|
|
||||||
const void * x = (const void *) ((const char *) src->data + i2*nb2 + i3*nb3);
|
const char * x = (const char *) src->data + i2*nb2 + i3*nb3;
|
||||||
if (nb0 == ts && nb1 == ts*ne0/bs) {
|
if (nb0 == ts && nb1 == row_size) {
|
||||||
err = clEnqueueWriteBuffer(queue, dst, CL_FALSE, offset, ne1*nb1, x, 0, NULL, ev);
|
return clEnqueueWriteBuffer(queue, dst, CL_FALSE, offset, ne1*row_size, x, 0, NULL, ev);
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
if (nb0 == ts) {
|
if (nb0 == ts) {
|
||||||
const size_t buffer_origin[3] = { offset, 0, 0 };
|
const size_t buffer_origin[3] = { offset, 0, 0 };
|
||||||
const size_t host_origin[3] = { 0, 0, 0 };
|
const size_t host_origin[3] = { 0, 0, 0 };
|
||||||
const size_t region[3] = { ts*ne0/bs, ne1, 1 };
|
const size_t region[3] = { row_size, ne1, 1 };
|
||||||
err = clEnqueueWriteBufferRect(queue, dst, CL_FALSE, buffer_origin, host_origin, region, ts*ne0/bs, 0, nb1, 0, x, 0, NULL, ev);
|
return clEnqueueWriteBufferRect(queue, dst, CL_FALSE, buffer_origin, host_origin, region, row_size, 0, nb1, 0, x, 0, NULL, ev);
|
||||||
return err;
|
|
||||||
}
|
}
|
||||||
|
std::vector<cl_event> events;
|
||||||
|
if (ev && ne1>1) events.reserve(ne1-1);
|
||||||
for (uint64_t i1 = 0; i1 < ne1; i1++) {
|
for (uint64_t i1 = 0; i1 < ne1; i1++) {
|
||||||
// pretend the row is a matrix with cols=1
|
// pretend the row is a matrix with cols=1
|
||||||
const size_t buffer_origin[3] = { offset, i1, 0 };
|
const size_t buffer_origin[3] = { offset + i1*row_size, 0, 0 };
|
||||||
const size_t host_origin[3] = { 0, 0, 0 };
|
const size_t host_origin[3] = { 0, 0, 0 };
|
||||||
const size_t region[3] = { ts/bs, ne0, 1 };
|
const size_t region[3] = { ts, ne0/bs, 1 };
|
||||||
err = clEnqueueWriteBufferRect(queue, dst, CL_FALSE, buffer_origin, host_origin, region, 0, 0, nb0, 0, ((const char *)x) + i1*nb0, 0, NULL, ev);
|
// if an event is requested, make the last write wait for all previous writes to complete
|
||||||
if (err != CL_SUCCESS) {
|
if (ev && i1) {
|
||||||
break;
|
events.push_back(*ev);
|
||||||
}
|
}
|
||||||
|
cl_uint nevents = i1 == ne1-1 ? events.size() : 0U;
|
||||||
|
err = clEnqueueWriteBufferRect(queue, dst, CL_FALSE, buffer_origin, host_origin, region, ts, 0, nb0, 0, x + i1*nb1, nevents, nevents ? events.data() : nullptr, ev);
|
||||||
|
if (err != CL_SUCCESS) {
|
||||||
|
for (auto event : events) {
|
||||||
|
clReleaseEvent(event);
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
for (auto event : events) {
|
||||||
|
CL_CHECK(clReleaseEvent(event));
|
||||||
|
}
|
||||||
|
return CL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
|
GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
|
||||||
@ -1476,10 +1488,15 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
|
|
||||||
const int64_t ne10 = src1->ne[0];
|
const int64_t ne10 = src1->ne[0];
|
||||||
const int64_t ne11 = src1->ne[1];
|
const int64_t ne11 = src1->ne[1];
|
||||||
|
const int64_t ne12 = src1->ne[2];
|
||||||
|
const int64_t ne13 = src1->ne[3];
|
||||||
|
|
||||||
const int nb2 = dst->nb[2];
|
const int nb2 = dst->nb[2];
|
||||||
const int nb3 = dst->nb[3];
|
const int nb3 = dst->nb[3];
|
||||||
|
|
||||||
|
const int64_t r2 = ne12 / ne02;
|
||||||
|
const int64_t r3 = ne13 / ne03;
|
||||||
|
|
||||||
const float alpha = 1.0f;
|
const float alpha = 1.0f;
|
||||||
const float beta = 0.0f;
|
const float beta = 0.0f;
|
||||||
const int x_ne = ne01 * ne00;
|
const int x_ne = ne01 * ne00;
|
||||||
@ -1498,13 +1515,25 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
cl_mem d_Y = ggml_cl_pool_malloc(sizeof(float) * y_ne, &y_size);
|
||||||
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
cl_mem d_D = ggml_cl_pool_malloc(sizeof(float) * d_ne, &d_size);
|
||||||
|
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
size_t x_offset = 0;
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
int64_t pi02 = -1;
|
||||||
|
int64_t pi03 = -1;
|
||||||
|
|
||||||
|
for (int64_t i13 = 0; i13 < ne13; i13++) {
|
||||||
|
int64_t i03 = i13 / r3;
|
||||||
|
|
||||||
|
for (int64_t i12 = 0; i12 < ne12; i12++) {
|
||||||
|
int64_t i02 = i12 / r2;
|
||||||
|
|
||||||
// copy data to device
|
// copy data to device
|
||||||
if (src0->backend != GGML_BACKEND_GPU) {
|
if (src0->backend == GGML_BACKEND_GPU) {
|
||||||
|
x_offset = (i03 * ne02 + i02) * x_ne;
|
||||||
|
} else if (i02 != pi02 || i03 != pi03) {
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02, NULL));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02, NULL));
|
||||||
|
pi02 = i02;
|
||||||
|
pi03 = i03;
|
||||||
}
|
}
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i03, i02, NULL));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
||||||
|
|
||||||
CL_CHECK(clFinish(queue));
|
CL_CHECK(clFinish(queue));
|
||||||
|
|
||||||
@ -1514,7 +1543,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
clblast::Transpose::kYes, clblast::Transpose::kNo,
|
clblast::Transpose::kYes, clblast::Transpose::kNo,
|
||||||
ne01, ne11, ne10,
|
ne01, ne11, ne10,
|
||||||
alpha,
|
alpha,
|
||||||
d_X, 0, ne00,
|
d_X, x_offset, ne00,
|
||||||
d_Y, 0, ne10,
|
d_Y, 0, ne10,
|
||||||
beta,
|
beta,
|
||||||
d_D, 0, ne01,
|
d_D, 0, ne01,
|
||||||
@ -1525,7 +1554,7 @@ static void ggml_cl_mul_mat_f32(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
}
|
}
|
||||||
|
|
||||||
// copy dst to host
|
// copy dst to host
|
||||||
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &ev_sgemm, NULL));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1547,6 +1576,8 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
|
|
||||||
const int64_t ne10 = src1->ne[0];
|
const int64_t ne10 = src1->ne[0];
|
||||||
const int64_t ne11 = src1->ne[1];
|
const int64_t ne11 = src1->ne[1];
|
||||||
|
const int64_t ne12 = src1->ne[2];
|
||||||
|
const int64_t ne13 = src1->ne[3];
|
||||||
|
|
||||||
const int nb10 = src1->nb[0];
|
const int nb10 = src1->nb[0];
|
||||||
const int nb11 = src1->nb[1];
|
const int nb11 = src1->nb[1];
|
||||||
@ -1556,6 +1587,9 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
const int nb2 = dst->nb[2];
|
const int nb2 = dst->nb[2];
|
||||||
const int nb3 = dst->nb[3];
|
const int nb3 = dst->nb[3];
|
||||||
|
|
||||||
|
const int64_t r2 = ne12 / ne02;
|
||||||
|
const int64_t r3 = ne13 / ne03;
|
||||||
|
|
||||||
const ggml_fp16_t alpha = ggml_fp32_to_fp16(1.0f);
|
const ggml_fp16_t alpha = ggml_fp32_to_fp16(1.0f);
|
||||||
const ggml_fp16_t beta = ggml_fp32_to_fp16(0.0f);
|
const ggml_fp16_t beta = ggml_fp32_to_fp16(0.0f);
|
||||||
const int x_ne = ne01 * ne00;
|
const int x_ne = ne01 * ne00;
|
||||||
@ -1577,32 +1611,44 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
bool src1_cont_rows = nb10 == sizeof(float);
|
bool src1_cont_rows = nb10 == sizeof(float);
|
||||||
bool src1_cont_cols = (size_t)nb11 == ne11*sizeof(float);
|
bool src1_cont_cols = (size_t)nb11 == ne11*sizeof(float);
|
||||||
|
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
size_t x_offset = 0;
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
int64_t pi02 = -1;
|
||||||
|
int64_t pi03 = -1;
|
||||||
|
|
||||||
|
for (int64_t i13 = 0; i13 < ne13; i13++) {
|
||||||
|
int64_t i03 = i13 / r3;
|
||||||
|
|
||||||
|
for (int64_t i12 = 0; i12 < ne12; i12++) {
|
||||||
|
int64_t i02 = i12 / r2;
|
||||||
|
|
||||||
// copy src0 to device
|
// copy src0 to device
|
||||||
if (src0->backend != GGML_BACKEND_GPU) {
|
if (src0->backend == GGML_BACKEND_GPU) {
|
||||||
|
x_offset = (i03 * ne02 + i02) * x_ne;
|
||||||
|
} else if (i02 != pi02 || i03 != pi03) {
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02, NULL));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_X, 0, src0, i03, i02, NULL));
|
||||||
|
pi02 = i02;
|
||||||
|
pi03 = i03;
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert src1 to fp16
|
// convert src1 to fp16
|
||||||
// TODO: use multiple threads
|
// TODO: use multiple threads
|
||||||
ggml_fp16_t * const tmp = (ggml_fp16_t *) wdata + (ne11 * ne10) * (i03 * ne02 + i02);
|
ggml_fp16_t * const tmp = (ggml_fp16_t *) wdata + (ne11 * ne10) * (i13 * ne12 + i12);
|
||||||
char * src1i = (char *) src1->data + i03*nb13 + i02*nb12;
|
char * src1i = (char *) src1->data + i13*nb13 + i12*nb12;
|
||||||
if (src1_cont_rows) {
|
if (src1_cont_rows) {
|
||||||
if (src1_cont_cols) {
|
if (src1_cont_cols) {
|
||||||
ggml_fp32_to_fp16_row((float *) src1i, tmp, ne10*ne11);
|
ggml_fp32_to_fp16_row((float *) src1i, tmp, ne10*ne11);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (int64_t i01 = 0; i01 < ne11; i01++) {
|
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
||||||
ggml_fp32_to_fp16_row((float *) (src1i + i01*nb11), tmp + i01*ne10, ne10);
|
ggml_fp32_to_fp16_row((float *) (src1i + i11*nb11), tmp + i11*ne10, ne10);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
for (int64_t i01 = 0; i01 < ne11; i01++) {
|
for (int64_t i11 = 0; i11 < ne11; i11++) {
|
||||||
for (int64_t i00 = 0; i00 < ne10; i00++) {
|
for (int64_t i10 = 0; i10 < ne10; i10++) {
|
||||||
// very slow due to no inlining
|
// very slow due to no inlining
|
||||||
tmp[i01*ne10 + i00] = ggml_fp32_to_fp16(*(float *) (src1i + i01*nb11 + i00*nb10));
|
tmp[i11*ne10 + i10] = ggml_fp32_to_fp16(*(float *) (src1i + i11*nb11 + i10*nb10));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1618,7 +1664,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
clblast::Transpose::kYes, clblast::Transpose::kNo,
|
clblast::Transpose::kYes, clblast::Transpose::kNo,
|
||||||
ne01, ne11, ne10,
|
ne01, ne11, ne10,
|
||||||
alpha,
|
alpha,
|
||||||
d_X, 0, ne00,
|
d_X, x_offset, ne00,
|
||||||
d_Y, 0, ne10,
|
d_Y, 0, ne10,
|
||||||
beta,
|
beta,
|
||||||
d_D, 0, ne01,
|
d_D, 0, ne01,
|
||||||
@ -1631,7 +1677,7 @@ static void ggml_cl_mul_mat_f16(const ggml_tensor * src0, const ggml_tensor * sr
|
|||||||
// copy dst to host, then convert to float
|
// copy dst to host, then convert to float
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(ggml_fp16_t) * d_ne, tmp, 1, &ev_sgemm, NULL));
|
||||||
|
|
||||||
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||||
|
|
||||||
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
ggml_fp16_to_fp32_row(tmp, d, d_ne);
|
||||||
}
|
}
|
||||||
@ -1652,18 +1698,24 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||||||
|
|
||||||
const int64_t ne10 = src1->ne[0];
|
const int64_t ne10 = src1->ne[0];
|
||||||
const int64_t ne11 = src1->ne[1];
|
const int64_t ne11 = src1->ne[1];
|
||||||
|
const int64_t ne12 = src1->ne[2];
|
||||||
|
const int64_t ne13 = src1->ne[3];
|
||||||
|
|
||||||
const int nb2 = dst->nb[2];
|
const int nb2 = dst->nb[2];
|
||||||
const int nb3 = dst->nb[3];
|
const int nb3 = dst->nb[3];
|
||||||
const ggml_type type = src0->type;
|
const ggml_type type = src0->type;
|
||||||
const bool mul_mat_vec = ne11 == 1;
|
const bool mul_mat_vec = ne11 == 1;
|
||||||
|
|
||||||
|
const int64_t r2 = ne12 / ne02;
|
||||||
|
const int64_t r3 = ne13 / ne03;
|
||||||
|
|
||||||
const float alpha = 1.0f;
|
const float alpha = 1.0f;
|
||||||
const float beta = 0.0f;
|
const float beta = 0.0f;
|
||||||
const int x_ne = ne01 * ne00;
|
const int x_ne = ne01 * ne00;
|
||||||
const int y_ne = ne11 * ne10;
|
const int y_ne = ne11 * ne10;
|
||||||
const int d_ne = ne11 * ne01;
|
const int d_ne = ne11 * ne01;
|
||||||
const size_t q_sz = ggml_type_size(type) * x_ne / ggml_blck_size(type);
|
const int x_bps = x_ne / ggml_blck_size(type); // blocks per 2D slice
|
||||||
|
const size_t q_sz = ggml_type_size(type) * x_bps;
|
||||||
|
|
||||||
size_t x_size;
|
size_t x_size;
|
||||||
size_t y_size;
|
size_t y_size;
|
||||||
@ -1690,12 +1742,23 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||||||
size_t ev_idx = 0;
|
size_t ev_idx = 0;
|
||||||
std::vector<cl_event> events;
|
std::vector<cl_event> events;
|
||||||
|
|
||||||
for (int64_t i03 = 0; i03 < ne03; i03++) {
|
int64_t pi02 = -1;
|
||||||
for (int64_t i02 = 0; i02 < ne02; i02++) {
|
int64_t pi03 = -1;
|
||||||
|
|
||||||
|
for (int64_t i13 = 0; i13 < ne13; i13++) {
|
||||||
|
int64_t i03 = i13 / r3;
|
||||||
|
|
||||||
|
for (int64_t i12 = 0; i12 < ne12; i12++) {
|
||||||
|
int64_t i02 = i12 / r2;
|
||||||
|
|
||||||
// copy src0 to device if necessary
|
// copy src0 to device if necessary
|
||||||
if (src0->backend == GGML_BACKEND_CPU) {
|
if (src0->backend == GGML_BACKEND_CPU) {
|
||||||
|
if (i02 != pi02 || i03 != pi03) {
|
||||||
events.emplace_back();
|
events.emplace_back();
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Q, 0, src0, i03, i02, events.data() + ev_idx++));
|
||||||
|
pi02 = i02;
|
||||||
|
pi03 = i03;
|
||||||
|
}
|
||||||
} else if (src0->backend == GGML_BACKEND_GPU) {
|
} else if (src0->backend == GGML_BACKEND_GPU) {
|
||||||
d_Q = (cl_mem) src0->extra;
|
d_Q = (cl_mem) src0->extra;
|
||||||
} else {
|
} else {
|
||||||
@ -1704,7 +1767,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||||||
if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
|
if (mul_mat_vec) { // specialized dequantize_mul_mat_vec kernel
|
||||||
// copy src1 to device
|
// copy src1 to device
|
||||||
events.emplace_back();
|
events.emplace_back();
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i03, i02, events.data() + ev_idx++));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, events.data() + ev_idx++));
|
||||||
|
|
||||||
// compute
|
// compute
|
||||||
const size_t global = ne01 * CL_DMMV_BLOCK_SIZE;
|
const size_t global = ne01 * CL_DMMV_BLOCK_SIZE;
|
||||||
@ -1720,12 +1783,13 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||||||
} else { // general dequantization kernel + CLBlast matrix matrix multiplication
|
} else { // general dequantization kernel + CLBlast matrix matrix multiplication
|
||||||
// convert src0 to fp32 on device
|
// convert src0 to fp32 on device
|
||||||
const size_t global = x_ne / global_denom;
|
const size_t global = x_ne / global_denom;
|
||||||
|
const size_t offset = src0->backend == GGML_BACKEND_GPU ? (i03 * ne02 + i02) * x_bps : 0;
|
||||||
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
|
CL_CHECK(clSetKernelArg(*to_fp32_cl, 0, sizeof(cl_mem), &d_Q));
|
||||||
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
|
CL_CHECK(clSetKernelArg(*to_fp32_cl, 1, sizeof(cl_mem), &d_X));
|
||||||
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, NULL, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
CL_CHECK(clEnqueueNDRangeKernel(queue, *to_fp32_cl, 1, offset > 0 ? &offset : NULL, &global, local > 0 ? &local : NULL, events.size(), !events.empty() ? events.data() : NULL, NULL));
|
||||||
|
|
||||||
// copy src1 to device
|
// copy src1 to device
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i03, i02, NULL));
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, d_Y, 0, src1, i13, i12, NULL));
|
||||||
|
|
||||||
events.emplace_back();
|
events.emplace_back();
|
||||||
|
|
||||||
@ -1749,7 +1813,7 @@ static void ggml_cl_mul_mat_q_f32(const ggml_tensor * src0, const ggml_tensor *
|
|||||||
}
|
}
|
||||||
|
|
||||||
// copy dst to host
|
// copy dst to host
|
||||||
float * d = (float *) ((char *) dst->data + i02*nb2 + i03*nb3);
|
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||||
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &events[events.size() - 1], NULL));
|
CL_CHECK(clEnqueueReadBuffer(queue, d_D, true, 0, sizeof(float) * d_ne, d, 1, &events[events.size() - 1], NULL));
|
||||||
for (auto *event : events) {
|
for (auto *event : events) {
|
||||||
clReleaseEvent(event);
|
clReleaseEvent(event);
|
||||||
@ -1844,17 +1908,19 @@ void ggml_cl_transform_tensor(void * data, ggml_tensor * tensor) {
|
|||||||
const int64_t ne3 = tensor->ne[3];
|
const int64_t ne3 = tensor->ne[3];
|
||||||
|
|
||||||
const ggml_type type = tensor->type;
|
const ggml_type type = tensor->type;
|
||||||
const size_t q_sz = ggml_type_size(type) * ne0 * ne1 * ne2 * ne3 / ggml_blck_size(type);
|
const size_t s_sz = ggml_type_size(type) * (size_t) (ne0 * ne1 / ggml_blck_size(type));
|
||||||
|
const size_t q_sz = s_sz * (size_t) (ne2 * ne3);
|
||||||
|
|
||||||
size_t q_size;
|
size_t q_size;
|
||||||
cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size);
|
cl_mem dst = ggml_cl_pool_malloc(q_sz, &q_size);
|
||||||
|
|
||||||
tensor->data = data;
|
tensor->data = data;
|
||||||
// copy tensor to device
|
// copy tensor to device
|
||||||
|
size_t offset = 0;
|
||||||
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
for (int64_t i3 = 0; i3 < ne3; i3++) {
|
||||||
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
for (int64_t i2 = 0; i2 < ne2; i2++) {
|
||||||
int i = i3*ne2 + i2;
|
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, dst, offset, tensor, i3, i2, NULL));
|
||||||
CL_CHECK(ggml_cl_h2d_tensor_2d(queue, dst, i*ne0*ne1, tensor, i3, i2, NULL));
|
offset += s_sz;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
13
ggml.h
13
ggml.h
@ -401,10 +401,14 @@ extern "C" {
|
|||||||
GGML_OP_CLAMP,
|
GGML_OP_CLAMP,
|
||||||
GGML_OP_CONV_1D,
|
GGML_OP_CONV_1D,
|
||||||
GGML_OP_CONV_2D,
|
GGML_OP_CONV_2D,
|
||||||
|
GGML_OP_CONV_TRANSPOSE_1D,
|
||||||
GGML_OP_CONV_TRANSPOSE_2D,
|
GGML_OP_CONV_TRANSPOSE_2D,
|
||||||
GGML_OP_POOL_1D,
|
GGML_OP_POOL_1D,
|
||||||
GGML_OP_POOL_2D,
|
GGML_OP_POOL_2D,
|
||||||
|
|
||||||
|
GGML_OP_CONV_1D_STAGE_0, // internal
|
||||||
|
GGML_OP_CONV_1D_STAGE_1, // internal
|
||||||
|
|
||||||
GGML_OP_UPSCALE, // nearest interpolate
|
GGML_OP_UPSCALE, // nearest interpolate
|
||||||
|
|
||||||
GGML_OP_FLASH_ATTN,
|
GGML_OP_FLASH_ATTN,
|
||||||
@ -1386,6 +1390,14 @@ extern "C" {
|
|||||||
int s,
|
int s,
|
||||||
int d);
|
int d);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_conv_transpose_1d(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b,
|
||||||
|
int s0,
|
||||||
|
int p0,
|
||||||
|
int d0);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_conv_2d(
|
GGML_API struct ggml_tensor * ggml_conv_2d(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
@ -1759,6 +1771,7 @@ extern "C" {
|
|||||||
GGML_OPT_NO_CONTEXT,
|
GGML_OPT_NO_CONTEXT,
|
||||||
GGML_OPT_INVALID_WOLFE,
|
GGML_OPT_INVALID_WOLFE,
|
||||||
GGML_OPT_FAIL,
|
GGML_OPT_FAIL,
|
||||||
|
GGML_OPT_CANCEL,
|
||||||
|
|
||||||
GGML_LINESEARCH_FAIL = -128,
|
GGML_LINESEARCH_FAIL = -128,
|
||||||
GGML_LINESEARCH_MINIMUM_STEP,
|
GGML_LINESEARCH_MINIMUM_STEP,
|
||||||
|
@ -85,10 +85,13 @@ class MODEL_ARCH(IntEnum):
|
|||||||
GPTNEOX : int = auto()
|
GPTNEOX : int = auto()
|
||||||
MPT : int = auto()
|
MPT : int = auto()
|
||||||
STARCODER : int = auto()
|
STARCODER : int = auto()
|
||||||
|
REFACT : int = auto()
|
||||||
|
BERT : int = auto()
|
||||||
|
|
||||||
|
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
TOKEN_EMBD : int = auto()
|
TOKEN_EMBD : int = auto()
|
||||||
|
TOKEN_TYPES : int = auto()
|
||||||
POS_EMBD : int = auto()
|
POS_EMBD : int = auto()
|
||||||
OUTPUT : int = auto()
|
OUTPUT : int = auto()
|
||||||
OUTPUT_NORM : int = auto()
|
OUTPUT_NORM : int = auto()
|
||||||
@ -116,78 +119,153 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|||||||
MODEL_ARCH.GPTNEOX: "gptneox",
|
MODEL_ARCH.GPTNEOX: "gptneox",
|
||||||
MODEL_ARCH.MPT: "mpt",
|
MODEL_ARCH.MPT: "mpt",
|
||||||
MODEL_ARCH.STARCODER: "starcoder",
|
MODEL_ARCH.STARCODER: "starcoder",
|
||||||
|
MODEL_ARCH.REFACT: "refact",
|
||||||
|
MODEL_ARCH.BERT: "bert",
|
||||||
}
|
}
|
||||||
|
|
||||||
MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_ARCH.LLAMA: {
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
|
||||||
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
|
||||||
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
|
||||||
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
|
||||||
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
|
||||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
|
||||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
|
||||||
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
|
||||||
},
|
|
||||||
MODEL_ARCH.GPTNEOX: {
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
|
||||||
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
|
||||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
|
||||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
|
||||||
},
|
|
||||||
MODEL_ARCH.FALCON: {
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
|
||||||
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
|
|
||||||
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
|
||||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
|
||||||
},
|
|
||||||
MODEL_ARCH.BAICHUAN: {
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
|
||||||
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
|
||||||
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
|
||||||
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
|
||||||
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
|
||||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
|
||||||
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
|
||||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
|
||||||
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
|
||||||
},
|
|
||||||
MODEL_ARCH.STARCODER: {
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||||
|
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
||||||
MODEL_TENSOR.POS_EMBD: "position_embd",
|
MODEL_TENSOR.POS_EMBD: "position_embd",
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||||
MODEL_TENSOR.OUTPUT: "output",
|
MODEL_TENSOR.OUTPUT: "output",
|
||||||
|
MODEL_TENSOR.ROPE_FREQS: "rope_freqs",
|
||||||
|
|
||||||
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm",
|
||||||
|
MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2",
|
||||||
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv",
|
||||||
|
MODEL_TENSOR.ATTN_Q: "blk.{bid}.attn_q",
|
||||||
|
MODEL_TENSOR.ATTN_K: "blk.{bid}.attn_k",
|
||||||
|
MODEL_TENSOR.ATTN_V: "blk.{bid}.attn_v",
|
||||||
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
MODEL_TENSOR.ATTN_OUT: "blk.{bid}.attn_output",
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD: "blk.{bid}.attn_rot_embd",
|
||||||
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
MODEL_TENSOR.FFN_NORM: "blk.{bid}.ffn_norm",
|
||||||
|
MODEL_TENSOR.FFN_GATE: "blk.{bid}.ffn_gate",
|
||||||
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
MODEL_TENSOR.FFN_DOWN: "blk.{bid}.ffn_down",
|
||||||
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up",
|
||||||
},
|
}
|
||||||
MODEL_ARCH.GPT2: {
|
|
||||||
|
MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
|
MODEL_ARCH.LLAMA: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.GPTNEOX: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.FALCON: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_NORM_2,
|
||||||
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.BAICHUAN: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ROPE_FREQS,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.ATTN_ROT_EMBD,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.STARCODER: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.POS_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.BERT: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.TOKEN_TYPES,
|
||||||
|
MODEL_TENSOR.POS_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.MPT: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_QKV,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.GPTJ: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.REFACT: [
|
||||||
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
|
MODEL_TENSOR.OUTPUT_NORM,
|
||||||
|
MODEL_TENSOR.OUTPUT,
|
||||||
|
MODEL_TENSOR.ATTN_NORM,
|
||||||
|
MODEL_TENSOR.ATTN_Q,
|
||||||
|
MODEL_TENSOR.ATTN_K,
|
||||||
|
MODEL_TENSOR.ATTN_V,
|
||||||
|
MODEL_TENSOR.ATTN_OUT,
|
||||||
|
MODEL_TENSOR.FFN_NORM,
|
||||||
|
MODEL_TENSOR.FFN_GATE,
|
||||||
|
MODEL_TENSOR.FFN_DOWN,
|
||||||
|
MODEL_TENSOR.FFN_UP,
|
||||||
|
],
|
||||||
|
MODEL_ARCH.GPT2: [
|
||||||
# TODO
|
# TODO
|
||||||
},
|
],
|
||||||
# TODO
|
# TODO
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -209,30 +287,40 @@ class TensorNameMap:
|
|||||||
# Token embeddings
|
# Token embeddings
|
||||||
MODEL_TENSOR.TOKEN_EMBD: (
|
MODEL_TENSOR.TOKEN_EMBD: (
|
||||||
"gpt_neox.embed_in", # gptneox
|
"gpt_neox.embed_in", # gptneox
|
||||||
"transformer.wte", # gpt2 mpt
|
"transformer.wte", # gpt2 gpt-j mpt refact
|
||||||
"transformer.word_embeddings", # falcon
|
"transformer.word_embeddings", # falcon
|
||||||
"model.embed_tokens", # llama-hf
|
"model.embed_tokens", # llama-hf
|
||||||
"tok_embeddings", # llama-pth
|
"tok_embeddings", # llama-pth
|
||||||
|
"embeddings.word_embeddings", # bert
|
||||||
|
),
|
||||||
|
|
||||||
|
# Token type embeddings
|
||||||
|
MODEL_TENSOR.TOKEN_TYPES: (
|
||||||
|
"embeddings.token_type_embeddings", # bert
|
||||||
),
|
),
|
||||||
|
|
||||||
# Position embeddings
|
# Position embeddings
|
||||||
MODEL_TENSOR.POS_EMBD: (
|
MODEL_TENSOR.POS_EMBD: (
|
||||||
"transformer.wpe", # gpt2
|
"transformer.wpe", # gpt2
|
||||||
|
"embeddings.position_embeddings", # bert
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output
|
# Output
|
||||||
MODEL_TENSOR.OUTPUT: (
|
MODEL_TENSOR.OUTPUT: (
|
||||||
"embed_out", # gptneox
|
"embed_out", # gptneox
|
||||||
"lm_head", # gpt2 mpt falcon llama-hf baichuan
|
"lm_head", # gpt2 gpt-j mpt falcon llama-hf baichuan
|
||||||
"output", # llama-pth
|
"output", # llama-pth
|
||||||
),
|
),
|
||||||
|
|
||||||
# Output norm
|
# Output norm
|
||||||
MODEL_TENSOR.OUTPUT_NORM: (
|
MODEL_TENSOR.OUTPUT_NORM: (
|
||||||
"gpt_neox.final_layer_norm", # gptneox
|
"gpt_neox.final_layer_norm", # gptneox
|
||||||
"transformer.ln_f", # gpt2 falcon
|
"transformer.ln_f", # gpt2 gpt-j falcon
|
||||||
"model.norm", # llama-hf baichuan
|
"model.norm", # llama-hf baichuan
|
||||||
"norm", # llama-pth
|
"norm", # llama-pth
|
||||||
|
"embeddings.LayerNorm", # bert
|
||||||
|
"transformer.norm_f", # mpt
|
||||||
|
"ln_f", # refact
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rope frequencies
|
# Rope frequencies
|
||||||
@ -245,12 +333,13 @@ class TensorNameMap:
|
|||||||
# Attention norm
|
# Attention norm
|
||||||
MODEL_TENSOR.ATTN_NORM: (
|
MODEL_TENSOR.ATTN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
|
||||||
"transformer.h.{bid}.ln_1", # gpt2
|
"transformer.h.{bid}.ln_1", # gpt2 gpt-j refact
|
||||||
"transformer.blocks.{bid}.norm_1", # mpt
|
"transformer.blocks.{bid}.norm_1", # mpt
|
||||||
"transformer.h.{bid}.input_layernorm", # falcon7b
|
"transformer.h.{bid}.input_layernorm", # falcon7b
|
||||||
"transformer.h.{bid}.ln_mlp", # falcon40b
|
"transformer.h.{bid}.ln_mlp", # falcon40b
|
||||||
"model.layers.{bid}.input_layernorm", # llama-hf
|
"model.layers.{bid}.input_layernorm", # llama-hf
|
||||||
"layers.{bid}.attention_norm", # llama-pth
|
"layers.{bid}.attention_norm", # llama-pth
|
||||||
|
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention norm 2
|
# Attention norm 2
|
||||||
@ -270,28 +359,36 @@ class TensorNameMap:
|
|||||||
MODEL_TENSOR.ATTN_Q: (
|
MODEL_TENSOR.ATTN_Q: (
|
||||||
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
"model.layers.{bid}.self_attn.q_proj", # llama-hf
|
||||||
"layers.{bid}.attention.wq", # llama-pth
|
"layers.{bid}.attention.wq", # llama-pth
|
||||||
|
"encoder.layer.{bid}.attention.self.query", # bert
|
||||||
|
"transformer.h.{bid}.attn.q_proj", # gpt-j
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention key
|
# Attention key
|
||||||
MODEL_TENSOR.ATTN_K: (
|
MODEL_TENSOR.ATTN_K: (
|
||||||
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
"model.layers.{bid}.self_attn.k_proj", # llama-hf
|
||||||
"layers.{bid}.attention.wk", # llama-pth
|
"layers.{bid}.attention.wk", # llama-pth
|
||||||
|
"encoder.layer.{bid}.attention.self.key", # bert
|
||||||
|
"transformer.h.{bid}.attn.k_proj", # gpt-j
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention value
|
# Attention value
|
||||||
MODEL_TENSOR.ATTN_V: (
|
MODEL_TENSOR.ATTN_V: (
|
||||||
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
"model.layers.{bid}.self_attn.v_proj", # llama-hf
|
||||||
"layers.{bid}.attention.wv", # llama-pth
|
"layers.{bid}.attention.wv", # llama-pth
|
||||||
|
"encoder.layer.{bid}.attention.self.value", # bert
|
||||||
|
"transformer.h.{bid}.attn.v_proj", # gpt-j
|
||||||
),
|
),
|
||||||
|
|
||||||
# Attention output
|
# Attention output
|
||||||
MODEL_TENSOR.ATTN_OUT: (
|
MODEL_TENSOR.ATTN_OUT: (
|
||||||
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
"gpt_neox.layers.{bid}.attention.dense", # gptneox
|
||||||
"transformer.h.{bid}.attn.c_proj", # gpt2
|
"transformer.h.{bid}.attn.c_proj", # gpt2 refact
|
||||||
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
"transformer.blocks.{bid}.attn.out_proj", # mpt
|
||||||
"transformer.h.{bid}.self_attention.dense", # falcon
|
"transformer.h.{bid}.self_attention.dense", # falcon
|
||||||
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
"model.layers.{bid}.self_attn.o_proj", # llama-hf
|
||||||
"layers.{bid}.attention.wo", # llama-pth
|
"layers.{bid}.attention.wo", # llama-pth
|
||||||
|
"encoder.layer.{bid}.attention.output.dense", # bert
|
||||||
|
"transformer.h.{bid}.attn.out_proj", # gpt-j
|
||||||
),
|
),
|
||||||
|
|
||||||
# Rotary embeddings
|
# Rotary embeddings
|
||||||
@ -303,10 +400,11 @@ class TensorNameMap:
|
|||||||
# Feed-forward norm
|
# Feed-forward norm
|
||||||
MODEL_TENSOR.FFN_NORM: (
|
MODEL_TENSOR.FFN_NORM: (
|
||||||
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
|
||||||
"transformer.h.{bid}.ln_2", # gpt2
|
"transformer.h.{bid}.ln_2", # gpt2 refact
|
||||||
"transformer.blocks.{bid}.norm_2", # mpt
|
"transformer.blocks.{bid}.norm_2", # mpt
|
||||||
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
"model.layers.{bid}.post_attention_layernorm", # llama-hf
|
||||||
"layers.{bid}.ffn_norm", # llama-pth
|
"layers.{bid}.ffn_norm", # llama-pth
|
||||||
|
"encoder.layer.{bid}.output.LayerNorm", # bert
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward up
|
# Feed-forward up
|
||||||
@ -315,51 +413,51 @@ class TensorNameMap:
|
|||||||
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
"transformer.h.{bid}.mlp.c_fc", # gpt2
|
||||||
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
"transformer.blocks.{bid}.ffn.up_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
|
||||||
"model.layers.{bid}.mlp.up_proj", # llama-hf
|
"model.layers.{bid}.mlp.up_proj", # llama-hf refact
|
||||||
"layers.{bid}.feed_forward.w3", # llama-pth
|
"layers.{bid}.feed_forward.w3", # llama-pth
|
||||||
|
"encoder.layer.{bid}.intermediate.dense", # bert
|
||||||
|
"transformer.h.{bid}.mlp.fc_in", # gpt-j
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward gate
|
# Feed-forward gate
|
||||||
MODEL_TENSOR.FFN_GATE: (
|
MODEL_TENSOR.FFN_GATE: (
|
||||||
"model.layers.{bid}.mlp.gate_proj", # llama-hf
|
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
|
||||||
"layers.{bid}.feed_forward.w1", # llama-pth
|
"layers.{bid}.feed_forward.w1", # llama-pth
|
||||||
),
|
),
|
||||||
|
|
||||||
# Feed-forward down
|
# Feed-forward down
|
||||||
MODEL_TENSOR.FFN_DOWN: (
|
MODEL_TENSOR.FFN_DOWN: (
|
||||||
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
|
||||||
"transformer.h.{bid}.mlp.c_proj", # gpt2
|
"transformer.h.{bid}.mlp.c_proj", # gpt2 refact
|
||||||
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
"transformer.blocks.{bid}.ffn.down_proj", # mpt
|
||||||
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
|
||||||
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
"model.layers.{bid}.mlp.down_proj", # llama-hf
|
||||||
"layers.{bid}.feed_forward.w2", # llama-pth
|
"layers.{bid}.feed_forward.w2", # llama-pth
|
||||||
|
"encoder.layer.{bid}.output.dense", # bert
|
||||||
|
"transformer.h.{bid}.mlp.fc_out", # gpt-j
|
||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
mapping: dict[str, tuple[MODEL_TENSOR, str]]
|
||||||
|
|
||||||
tensor_names: dict[MODEL_TENSOR, str]
|
|
||||||
|
|
||||||
def __init__(self, arch: MODEL_ARCH, n_blocks: int):
|
def __init__(self, arch: MODEL_ARCH, n_blocks: int):
|
||||||
mapping = self.mapping = {}
|
self.mapping = {}
|
||||||
tensor_names = self.tensor_names = MODEL_TENSOR_NAMES[arch]
|
|
||||||
for tensor, keys in self.mappings_cfg.items():
|
for tensor, keys in self.mappings_cfg.items():
|
||||||
tensor_name = tensor_names.get(tensor)
|
if tensor not in MODEL_TENSORS[arch]:
|
||||||
if tensor_name is None:
|
|
||||||
continue
|
continue
|
||||||
mapping[tensor_name] = (tensor, tensor_name)
|
tensor_name = TENSOR_NAMES[tensor]
|
||||||
|
self.mapping[tensor_name] = (tensor, tensor_name)
|
||||||
for key in keys:
|
for key in keys:
|
||||||
mapping[key] = (tensor, tensor_name)
|
self.mapping[key] = (tensor, tensor_name)
|
||||||
for bid in range(n_blocks):
|
for bid in range(n_blocks):
|
||||||
for tensor, keys in self.block_mappings_cfg.items():
|
for tensor, keys in self.block_mappings_cfg.items():
|
||||||
tensor_name = tensor_names.get(tensor)
|
if tensor not in MODEL_TENSORS[arch]:
|
||||||
if tensor_name is None:
|
|
||||||
continue
|
continue
|
||||||
tensor_name = tensor_name.format(bid = bid)
|
tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
|
||||||
mapping[tensor_name] = (tensor, tensor_name)
|
self.mapping[tensor_name] = (tensor, tensor_name)
|
||||||
for key in keys:
|
for key in keys:
|
||||||
key = key.format(bid = bid)
|
key = key.format(bid = bid)
|
||||||
mapping[key] = (tensor, tensor_name)
|
self.mapping[key] = (tensor, tensor_name)
|
||||||
|
|
||||||
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
|
||||||
result = self.mapping.get(key)
|
result = self.mapping.get(key)
|
||||||
@ -800,22 +898,25 @@ class SpecialVocab:
|
|||||||
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
|
special_token_types: tuple[str, ...] = ('bos', 'eos', 'unk', 'sep', 'pad')
|
||||||
special_token_ids: dict[str, int] = {}
|
special_token_ids: dict[str, int] = {}
|
||||||
|
|
||||||
def __init__(self, path: Path, load_merges: bool = False, special_token_types: tuple[str, ...] | None = None):
|
def __init__(
|
||||||
|
self, path: str | os.PathLike[str], load_merges: bool = False,
|
||||||
|
special_token_types: tuple[str, ...] | None = None,
|
||||||
|
):
|
||||||
self.special_token_ids = {}
|
self.special_token_ids = {}
|
||||||
self.load_merges = load_merges
|
self.load_merges = load_merges
|
||||||
if special_token_types is not None:
|
if special_token_types is not None:
|
||||||
self.special_token_types = special_token_types
|
self.special_token_types = special_token_types
|
||||||
self.load(path)
|
self._load(Path(path))
|
||||||
|
|
||||||
def load(self, path: Path):
|
def _load(self, path: Path) -> None:
|
||||||
if not self.try_load_from_tokenizer_json(path):
|
if not self._try_load_from_tokenizer_json(path):
|
||||||
self.try_load_from_config_json(path)
|
self._try_load_from_config_json(path)
|
||||||
|
|
||||||
def try_load_from_tokenizer_json(self, path: Path) -> bool:
|
def _try_load_from_tokenizer_json(self, path: Path) -> bool:
|
||||||
tokenizer_file = path / 'tokenizer.json'
|
tokenizer_file = path / 'tokenizer.json'
|
||||||
if not tokenizer_file.is_file():
|
if not tokenizer_file.is_file():
|
||||||
return False
|
return False
|
||||||
with open(tokenizer_file, 'r', encoding = 'utf-8') as f:
|
with open(tokenizer_file, encoding = 'utf-8') as f:
|
||||||
tokenizer = json.load(f)
|
tokenizer = json.load(f)
|
||||||
if self.load_merges:
|
if self.load_merges:
|
||||||
merges = tokenizer.get('model', {}).get('merges')
|
merges = tokenizer.get('model', {}).get('merges')
|
||||||
@ -825,7 +926,7 @@ class SpecialVocab:
|
|||||||
added_tokens = tokenizer.get('added_tokens')
|
added_tokens = tokenizer.get('added_tokens')
|
||||||
if added_tokens is None or not tokenizer_config_file.is_file():
|
if added_tokens is None or not tokenizer_config_file.is_file():
|
||||||
return True
|
return True
|
||||||
with open(tokenizer_config_file, 'r', encoding = 'utf-8') as f:
|
with open(tokenizer_config_file, encoding = 'utf-8') as f:
|
||||||
tokenizer_config = json.load(f)
|
tokenizer_config = json.load(f)
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
entry = tokenizer_config.get(f'{typ}_token')
|
entry = tokenizer_config.get(f'{typ}_token')
|
||||||
@ -844,11 +945,11 @@ class SpecialVocab:
|
|||||||
break
|
break
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def try_load_from_config_json(self, path: Path) -> bool:
|
def _try_load_from_config_json(self, path: Path) -> bool:
|
||||||
config_file = path / 'config.json'
|
config_file = path / 'config.json'
|
||||||
if not config_file.is_file():
|
if not config_file.is_file():
|
||||||
return False
|
return False
|
||||||
with open(config_file, 'r', encoding = 'utf-8') as f:
|
with open(config_file, encoding = 'utf-8') as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
for typ in self.special_token_types:
|
for typ in self.special_token_types:
|
||||||
maybe_token_id = config.get(f'{typ}_token_id')
|
maybe_token_id = config.get(f'{typ}_token_id')
|
||||||
@ -856,7 +957,7 @@ class SpecialVocab:
|
|||||||
self.special_token_ids[typ] = maybe_token_id
|
self.special_token_ids[typ] = maybe_token_id
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def add_to_gguf(self, gw: GGUFWriter):
|
def add_to_gguf(self, gw: GGUFWriter) -> None:
|
||||||
if len(self.merges) > 0:
|
if len(self.merges) > 0:
|
||||||
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
print(f'gguf: Adding {len(self.merges)} merge(s).')
|
||||||
gw.add_token_merges(self.merges)
|
gw.add_token_merges(self.merges)
|
||||||
@ -868,8 +969,8 @@ class SpecialVocab:
|
|||||||
print(f'gguf: Setting special token type {typ} to {tokid}')
|
print(f'gguf: Setting special token type {typ} to {tokid}')
|
||||||
handler(tokid)
|
handler(tokid)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self) -> str:
|
||||||
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids if self.special_token_ids else "unset"}>'
|
return f'<SpecialVocab with {len(self.merges)} merges and special tokens {self.special_token_ids or "unset"}>'
|
||||||
|
|
||||||
|
|
||||||
# Example usage:
|
# Example usage:
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
[tool.poetry]
|
[tool.poetry]
|
||||||
name = "gguf"
|
name = "gguf"
|
||||||
version = "0.3.3"
|
version = "0.4.0"
|
||||||
description = "Write ML models in GGUF for GGML"
|
description = "Write ML models in GGUF for GGML"
|
||||||
authors = ["GGML <ggml@ggml.ai>"]
|
authors = ["GGML <ggml@ggml.ai>"]
|
||||||
packages = [
|
packages = [
|
||||||
|
746
k_quants.c
746
k_quants.c
@ -54,6 +54,10 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
|
|||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __riscv_v_intrinsic
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#undef MIN
|
#undef MIN
|
||||||
#undef MAX
|
#undef MAX
|
||||||
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
#define MIN(a, b) ((a) < (b) ? (a) : (b))
|
||||||
@ -65,7 +69,6 @@ inline static int32_t vaddvq_s32(int32x4_t v) {
|
|||||||
// 2-6 bit quantization in super-blocks
|
// 2-6 bit quantization in super-blocks
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// ===================== Helper functions
|
// ===================== Helper functions
|
||||||
//
|
//
|
||||||
@ -344,7 +347,6 @@ void quantize_row_q2_K_reference(const float * restrict x, block_q2_K * restrict
|
|||||||
const float q4scale = 15.f;
|
const float q4scale = 15.f;
|
||||||
|
|
||||||
for (int i = 0; i < nb; i++) {
|
for (int i = 0; i < nb; i++) {
|
||||||
|
|
||||||
float max_scale = 0; // as we are deducting the min, scales are always positive
|
float max_scale = 0; // as we are deducting the min, scales are always positive
|
||||||
float max_min = 0;
|
float max_min = 0;
|
||||||
for (int j = 0; j < QK_K/16; ++j) {
|
for (int j = 0; j < QK_K/16; ++j) {
|
||||||
@ -1582,6 +1584,90 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
uint8_t temp_01[32] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const uint8_t * q2 = x[i].qs;
|
||||||
|
const int8_t * q8 = y[i].qs;
|
||||||
|
const uint8_t * sc = x[i].scales;
|
||||||
|
|
||||||
|
const float dall = y[i].d * ggml_fp16_to_fp32(x[i].d);
|
||||||
|
const float dmin = -y[i].d * ggml_fp16_to_fp32(x[i].dmin);
|
||||||
|
|
||||||
|
size_t vl = 16;
|
||||||
|
|
||||||
|
vuint8m1_t scales = __riscv_vle8_v_u8m1(sc, vl);
|
||||||
|
vuint8m1_t aux = __riscv_vand_vx_u8m1(scales, 0x0F, vl);
|
||||||
|
|
||||||
|
vint16m1_t q8sums = __riscv_vle16_v_i16m1(y[i].bsums, vl);
|
||||||
|
|
||||||
|
vuint8mf2_t scales_2 = __riscv_vle8_v_u8mf2(sc, vl);
|
||||||
|
vuint8mf2_t mins8 = __riscv_vsrl_vx_u8mf2(scales_2, 0x4, vl);
|
||||||
|
vint16m1_t mins = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vzext_vf2_u16m1(mins8, vl));
|
||||||
|
vint32m2_t prod = __riscv_vwmul_vv_i32m2(q8sums, mins, vl);
|
||||||
|
vint32m1_t vsums = __riscv_vredsum_vs_i32m2_i32m1(prod, __riscv_vmv_v_x_i32m1(0, 1), vl);
|
||||||
|
|
||||||
|
sumf += dmin * __riscv_vmv_x_s_i32m1_i32(vsums);
|
||||||
|
|
||||||
|
vl = 32;
|
||||||
|
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
vuint8m1_t v_b = __riscv_vle8_v_u8m1(temp_01, vl);
|
||||||
|
|
||||||
|
uint8_t is=0;
|
||||||
|
int isum=0;
|
||||||
|
|
||||||
|
for (int j = 0; j < QK_K/128; ++j) {
|
||||||
|
// load Q2
|
||||||
|
vuint8m1_t q2_x = __riscv_vle8_v_u8m1(q2, vl);
|
||||||
|
|
||||||
|
vuint8m1_t q2_0 = __riscv_vand_vx_u8m1(q2_x, 0x03, vl);
|
||||||
|
vuint8m1_t q2_1 = __riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(q2_x, 0x2, vl), 0x03 , vl);
|
||||||
|
vuint8m1_t q2_2 = __riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(q2_x, 0x4, vl), 0x03 , vl);
|
||||||
|
vuint8m1_t q2_3 = __riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(q2_x, 0x6, vl), 0x03 , vl);
|
||||||
|
|
||||||
|
// duplicate scale elements for product
|
||||||
|
vuint8m1_t sc0 = __riscv_vrgather_vv_u8m1(aux, __riscv_vadd_vx_u8m1(v_b, 0+is, vl), vl);
|
||||||
|
vuint8m1_t sc1 = __riscv_vrgather_vv_u8m1(aux, __riscv_vadd_vx_u8m1(v_b, 2+is, vl), vl);
|
||||||
|
vuint8m1_t sc2 = __riscv_vrgather_vv_u8m1(aux, __riscv_vadd_vx_u8m1(v_b, 4+is, vl), vl);
|
||||||
|
vuint8m1_t sc3 = __riscv_vrgather_vv_u8m1(aux, __riscv_vadd_vx_u8m1(v_b, 6+is, vl), vl);
|
||||||
|
|
||||||
|
vint16m2_t p0 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwmulu_vv_u16m2(q2_0, sc0, vl));
|
||||||
|
vint16m2_t p1 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwmulu_vv_u16m2(q2_1, sc1, vl));
|
||||||
|
vint16m2_t p2 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwmulu_vv_u16m2(q2_2, sc2, vl));
|
||||||
|
vint16m2_t p3 = __riscv_vreinterpret_v_u16m2_i16m2(__riscv_vwmulu_vv_u16m2(q2_3, sc3, vl));
|
||||||
|
|
||||||
|
// load Q8
|
||||||
|
vint8m1_t q8_0 = __riscv_vle8_v_i8m1(q8, vl);
|
||||||
|
vint8m1_t q8_1 = __riscv_vle8_v_i8m1(q8+32, vl);
|
||||||
|
vint8m1_t q8_2 = __riscv_vle8_v_i8m1(q8+64, vl);
|
||||||
|
vint8m1_t q8_3 = __riscv_vle8_v_i8m1(q8+96, vl);
|
||||||
|
|
||||||
|
vint32m4_t s0 = __riscv_vwmul_vv_i32m4(p0, __riscv_vwcvt_x_x_v_i16m2(q8_0, vl), vl);
|
||||||
|
vint32m4_t s1 = __riscv_vwmul_vv_i32m4(p1, __riscv_vwcvt_x_x_v_i16m2(q8_1, vl), vl);
|
||||||
|
vint32m4_t s2 = __riscv_vwmul_vv_i32m4(p2, __riscv_vwcvt_x_x_v_i16m2(q8_2, vl), vl);
|
||||||
|
vint32m4_t s3 = __riscv_vwmul_vv_i32m4(p3, __riscv_vwcvt_x_x_v_i16m2(q8_3, vl), vl);
|
||||||
|
|
||||||
|
vint32m1_t isum0 = __riscv_vredsum_vs_i32m4_i32m1(__riscv_vadd_vv_i32m4(s0, s1, vl), vzero, vl);
|
||||||
|
vint32m1_t isum1 = __riscv_vredsum_vs_i32m4_i32m1(__riscv_vadd_vv_i32m4(s2, s3, vl), isum0, vl);
|
||||||
|
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(isum1);
|
||||||
|
|
||||||
|
q2+=32; q8+=128; is=8;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sumf += dall * isum;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
float sumf = 0;
|
float sumf = 0;
|
||||||
@ -1807,6 +1893,64 @@ void ggml_vec_dot_q2_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc) + summs;
|
*s = hsum_float_8(acc) + summs;
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
uint32_t aux32[2];
|
||||||
|
const uint8_t * scales = (const uint8_t *)aux32;
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const float d = y[i].d * (float)x[i].d;
|
||||||
|
const float dmin = -y[i].d * (float)x[i].dmin;
|
||||||
|
|
||||||
|
const uint8_t * restrict q2 = x[i].qs;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
const uint32_t * restrict sc = (const uint32_t *)x[i].scales;
|
||||||
|
|
||||||
|
aux32[0] = sc[0] & 0x0f0f0f0f;
|
||||||
|
aux32[1] = (sc[0] >> 4) & 0x0f0f0f0f;
|
||||||
|
|
||||||
|
sumf += dmin * (scales[4] * y[i].bsums[0] + scales[5] * y[i].bsums[1] + scales[6] * y[i].bsums[2] + scales[7] * y[i].bsums[3]);
|
||||||
|
|
||||||
|
int isum1 = 0;
|
||||||
|
int isum2 = 0;
|
||||||
|
|
||||||
|
size_t vl = 16;
|
||||||
|
|
||||||
|
vint16m1_t vzero = __riscv_vmv_v_x_i16m1(0, 1);
|
||||||
|
|
||||||
|
// load Q2
|
||||||
|
vuint8mf2_t q2_x = __riscv_vle8_v_u8mf2(q2, vl);
|
||||||
|
|
||||||
|
vint8mf2_t q2_0 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vand_vx_u8mf2(q2_x, 0x03, vl));
|
||||||
|
vint8mf2_t q2_1 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(q2_x, 0x2, vl), 0x03 , vl));
|
||||||
|
vint8mf2_t q2_2 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(q2_x, 0x4, vl), 0x03 , vl));
|
||||||
|
vint8mf2_t q2_3 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(q2_x, 0x6, vl), 0x03 , vl));
|
||||||
|
|
||||||
|
// load Q8, and take product with Q2
|
||||||
|
vint16m1_t p0 = __riscv_vwmul_vv_i16m1(q2_0, __riscv_vle8_v_i8mf2(q8, vl), vl);
|
||||||
|
vint16m1_t p1 = __riscv_vwmul_vv_i16m1(q2_1, __riscv_vle8_v_i8mf2(q8+16, vl), vl);
|
||||||
|
vint16m1_t p2 = __riscv_vwmul_vv_i16m1(q2_2, __riscv_vle8_v_i8mf2(q8+32, vl), vl);
|
||||||
|
vint16m1_t p3 = __riscv_vwmul_vv_i16m1(q2_3, __riscv_vle8_v_i8mf2(q8+48, vl), vl);
|
||||||
|
|
||||||
|
vint16m1_t vs_0 = __riscv_vredsum_vs_i16m1_i16m1(p0, vzero, vl);
|
||||||
|
vint16m1_t vs_1 = __riscv_vredsum_vs_i16m1_i16m1(p1, vzero, vl);
|
||||||
|
vint16m1_t vs_2 = __riscv_vredsum_vs_i16m1_i16m1(p2, vzero, vl);
|
||||||
|
vint16m1_t vs_3 = __riscv_vredsum_vs_i16m1_i16m1(p3, vzero, vl);
|
||||||
|
|
||||||
|
isum1 += __riscv_vmv_x_s_i16m1_i16(vs_0) * scales[0];
|
||||||
|
isum2 += __riscv_vmv_x_s_i16m1_i16(vs_1) * scales[1];
|
||||||
|
isum1 += __riscv_vmv_x_s_i16m1_i16(vs_2) * scales[2];
|
||||||
|
isum2 += __riscv_vmv_x_s_i16m1_i16(vs_3) * scales[3];
|
||||||
|
|
||||||
|
sumf += d * (isum1 + isum2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
float sumf = 0;
|
float sumf = 0;
|
||||||
@ -2220,6 +2364,106 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
uint32_t aux[3];
|
||||||
|
uint32_t utmp[4];
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const uint8_t * restrict q3 = x[i].qs;
|
||||||
|
const uint8_t * restrict qh = x[i].hmask;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
memcpy(aux, x[i].scales, 12);
|
||||||
|
utmp[3] = ((aux[1] >> 4) & kmask2) | (((aux[2] >> 6) & kmask1) << 4);
|
||||||
|
utmp[2] = ((aux[0] >> 4) & kmask2) | (((aux[2] >> 4) & kmask1) << 4);
|
||||||
|
utmp[1] = (aux[1] & kmask2) | (((aux[2] >> 2) & kmask1) << 4);
|
||||||
|
utmp[0] = (aux[0] & kmask2) | (((aux[2] >> 0) & kmask1) << 4);
|
||||||
|
|
||||||
|
int8_t * scale = (int8_t *)utmp;
|
||||||
|
for (int j = 0; j < 16; ++j) scale[j] -= 32;
|
||||||
|
|
||||||
|
|
||||||
|
size_t vl = 32;
|
||||||
|
uint8_t m = 1;
|
||||||
|
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
vuint8m1_t vqh = __riscv_vle8_v_u8m1(qh, vl);
|
||||||
|
|
||||||
|
int sum_t = 0;
|
||||||
|
|
||||||
|
for (int j = 0; j < QK_K; j += 128) {
|
||||||
|
|
||||||
|
vl = 32;
|
||||||
|
|
||||||
|
// load Q3
|
||||||
|
vuint8m1_t q3_x = __riscv_vle8_v_u8m1(q3, vl);
|
||||||
|
|
||||||
|
vint8m1_t q3_0 = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q3_x, 0x03, vl));
|
||||||
|
vint8m1_t q3_1 = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(q3_x, 0x2, vl), 0x03 , vl));
|
||||||
|
vint8m1_t q3_2 = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(q3_x, 0x4, vl), 0x03 , vl));
|
||||||
|
vint8m1_t q3_3 = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(q3_x, 0x6, vl), 0x03 , vl));
|
||||||
|
|
||||||
|
// compute mask for subtraction
|
||||||
|
vuint8m1_t qh_m0 = __riscv_vand_vx_u8m1(vqh, m, vl);
|
||||||
|
vbool8_t vmask_0 = __riscv_vmseq_vx_u8m1_b8(qh_m0, 0, vl);
|
||||||
|
vint8m1_t q3_m0 = __riscv_vsub_vx_i8m1_m(vmask_0, q3_0, 0x4, vl);
|
||||||
|
m <<= 1;
|
||||||
|
|
||||||
|
vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
|
||||||
|
vbool8_t vmask_1 = __riscv_vmseq_vx_u8m1_b8(qh_m1, 0, vl);
|
||||||
|
vint8m1_t q3_m1 = __riscv_vsub_vx_i8m1_m(vmask_1, q3_1, 0x4, vl);
|
||||||
|
m <<= 1;
|
||||||
|
|
||||||
|
vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
|
||||||
|
vbool8_t vmask_2 = __riscv_vmseq_vx_u8m1_b8(qh_m2, 0, vl);
|
||||||
|
vint8m1_t q3_m2 = __riscv_vsub_vx_i8m1_m(vmask_2, q3_2, 0x4, vl);
|
||||||
|
m <<= 1;
|
||||||
|
|
||||||
|
vuint8m1_t qh_m3 = __riscv_vand_vx_u8m1(vqh, m, vl);
|
||||||
|
vbool8_t vmask_3 = __riscv_vmseq_vx_u8m1_b8(qh_m3, 0, vl);
|
||||||
|
vint8m1_t q3_m3 = __riscv_vsub_vx_i8m1_m(vmask_3, q3_3, 0x4, vl);
|
||||||
|
m <<= 1;
|
||||||
|
|
||||||
|
// load Q8 and take product with Q3
|
||||||
|
vint16m2_t a0 = __riscv_vwmul_vv_i16m2(q3_m0, __riscv_vle8_v_i8m1(q8, vl), vl);
|
||||||
|
vint16m2_t a1 = __riscv_vwmul_vv_i16m2(q3_m1, __riscv_vle8_v_i8m1(q8+32, vl), vl);
|
||||||
|
vint16m2_t a2 = __riscv_vwmul_vv_i16m2(q3_m2, __riscv_vle8_v_i8m1(q8+64, vl), vl);
|
||||||
|
vint16m2_t a3 = __riscv_vwmul_vv_i16m2(q3_m3, __riscv_vle8_v_i8m1(q8+96, vl), vl);
|
||||||
|
|
||||||
|
vl = 16;
|
||||||
|
|
||||||
|
// retreive lane to multiply with scale
|
||||||
|
vint32m2_t aux0_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 0), (scale[0]), vl);
|
||||||
|
vint32m2_t aux0_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a0, 1), (scale[1]), vl);
|
||||||
|
vint32m2_t aux1_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 0), (scale[2]), vl);
|
||||||
|
vint32m2_t aux1_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a1, 1), (scale[3]), vl);
|
||||||
|
vint32m2_t aux2_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a2, 0), (scale[4]), vl);
|
||||||
|
vint32m2_t aux2_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a2, 1), (scale[5]), vl);
|
||||||
|
vint32m2_t aux3_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a3, 0), (scale[6]), vl);
|
||||||
|
vint32m2_t aux3_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(a3, 1), (scale[7]), vl);
|
||||||
|
|
||||||
|
vint32m1_t isum0 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(aux0_0, aux0_1, vl), vzero, vl);
|
||||||
|
vint32m1_t isum1 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(aux1_0, aux1_1, vl), isum0, vl);
|
||||||
|
vint32m1_t isum2 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(aux2_0, aux2_1, vl), isum1, vl);
|
||||||
|
vint32m1_t isum3 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(aux3_0, aux3_1, vl), isum2, vl);
|
||||||
|
|
||||||
|
sum_t += __riscv_vmv_x_s_i32m1_i32(isum3);
|
||||||
|
|
||||||
|
q3 += 32; q8 += 128; scale += 8;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
|
||||||
|
|
||||||
|
sumf += d*sum_t;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
// scalar version
|
// scalar version
|
||||||
// This function is written like this so the compiler can manage to vectorize most of it
|
// This function is written like this so the compiler can manage to vectorize most of it
|
||||||
@ -2523,6 +2767,79 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
uint16_t aux16[2];
|
||||||
|
int8_t * scales = (int8_t *)aux16;
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const uint8_t * restrict q3 = x[i].qs;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
const uint16_t a = *(const uint16_t *)x[i].scales;
|
||||||
|
aux16[0] = a & 0x0f0f;
|
||||||
|
aux16[1] = (a >> 4) & 0x0f0f;
|
||||||
|
|
||||||
|
for (int j = 0; j < 4; ++j) scales[j] -= 8;
|
||||||
|
|
||||||
|
int32_t isum = -4*(scales[0] * y[i].bsums[0] + scales[2] * y[i].bsums[1] + scales[1] * y[i].bsums[2] + scales[3] * y[i].bsums[3]);
|
||||||
|
|
||||||
|
const float d = y[i].d * (float)x[i].d;
|
||||||
|
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
|
||||||
|
// load qh
|
||||||
|
vuint8mf4_t qh_x1 = __riscv_vle8_v_u8mf4(x[i].hmask, 8);
|
||||||
|
vuint8mf2_t qh_x2 = __riscv_vlmul_ext_v_u8mf4_u8mf2(__riscv_vsrl_vx_u8mf4(qh_x1, 1, 8));
|
||||||
|
|
||||||
|
size_t vl = 16;
|
||||||
|
|
||||||
|
// extend and combine both qh_x1 and qh_x2
|
||||||
|
vuint8mf2_t qh_x = __riscv_vslideup_vx_u8mf2(__riscv_vlmul_ext_v_u8mf4_u8mf2(qh_x1), qh_x2, vl/2, vl);
|
||||||
|
|
||||||
|
vuint8mf2_t qh_0 = __riscv_vand_vx_u8mf2(__riscv_vsll_vx_u8mf2(qh_x, 0x2, vl), 0x4, vl);
|
||||||
|
vuint8mf2_t qh_1 = __riscv_vand_vx_u8mf2(qh_x, 0x4, vl);
|
||||||
|
vuint8mf2_t qh_2 = __riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(qh_x, 0x2, vl), 0x4, vl);
|
||||||
|
vuint8mf2_t qh_3 = __riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(qh_x, 0x4, vl), 0x4, vl);
|
||||||
|
|
||||||
|
// load Q3
|
||||||
|
vuint8mf2_t q3_x = __riscv_vle8_v_u8mf2(q3, vl);
|
||||||
|
|
||||||
|
vuint8mf2_t q3h_0 = __riscv_vor_vv_u8mf2(__riscv_vand_vx_u8mf2(q3_x, 0x3, vl), qh_0, vl);
|
||||||
|
vuint8mf2_t q3h_1 = __riscv_vor_vv_u8mf2(__riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(q3_x, 2, vl), 0x3, vl), qh_1, vl);
|
||||||
|
vuint8mf2_t q3h_2 = __riscv_vor_vv_u8mf2(__riscv_vand_vx_u8mf2(__riscv_vsrl_vx_u8mf2(q3_x, 4, vl), 0x3, vl), qh_2, vl);
|
||||||
|
vuint8mf2_t q3h_3 = __riscv_vor_vv_u8mf2(__riscv_vsrl_vx_u8mf2(q3_x, 0x6, vl), qh_3, vl);
|
||||||
|
|
||||||
|
vint8mf2_t q3_0 = __riscv_vreinterpret_v_u8mf2_i8mf2(q3h_0);
|
||||||
|
vint8mf2_t q3_1 = __riscv_vreinterpret_v_u8mf2_i8mf2(q3h_1);
|
||||||
|
vint8mf2_t q3_2 = __riscv_vreinterpret_v_u8mf2_i8mf2(q3h_2);
|
||||||
|
vint8mf2_t q3_3 = __riscv_vreinterpret_v_u8mf2_i8mf2(q3h_3);
|
||||||
|
|
||||||
|
// load Q8 and take product with Q3
|
||||||
|
vint16m1_t p0 = __riscv_vwmul_vv_i16m1(q3_0, __riscv_vle8_v_i8mf2(q8, vl), vl);
|
||||||
|
vint16m1_t p1 = __riscv_vwmul_vv_i16m1(q3_1, __riscv_vle8_v_i8mf2(q8+16, vl), vl);
|
||||||
|
vint16m1_t p2 = __riscv_vwmul_vv_i16m1(q3_2, __riscv_vle8_v_i8mf2(q8+32, vl), vl);
|
||||||
|
vint16m1_t p3 = __riscv_vwmul_vv_i16m1(q3_3, __riscv_vle8_v_i8mf2(q8+48, vl), vl);
|
||||||
|
|
||||||
|
vint32m1_t vs_0 = __riscv_vwredsum_vs_i16m1_i32m1(p0, vzero, vl);
|
||||||
|
vint32m1_t vs_1 = __riscv_vwredsum_vs_i16m1_i32m1(p1, vzero, vl);
|
||||||
|
vint32m1_t vs_2 = __riscv_vwredsum_vs_i16m1_i32m1(p2, vzero, vl);
|
||||||
|
vint32m1_t vs_3 = __riscv_vwredsum_vs_i16m1_i32m1(p3, vzero, vl);
|
||||||
|
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_0) * scales[0];
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_1) * scales[2];
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_2) * scales[1];
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_3) * scales[3];
|
||||||
|
|
||||||
|
sumf += d * isum;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int8_t aux8[QK_K];
|
int8_t aux8[QK_K];
|
||||||
@ -2823,6 +3140,78 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc) + _mm_cvtss_f32(acc_m);
|
*s = hsum_float_8(acc) + _mm_cvtss_f32(acc_m);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
const uint8_t * scales = (const uint8_t*)&utmp[0];
|
||||||
|
const uint8_t * mins = (const uint8_t*)&utmp[2];
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
size_t vl = 8;
|
||||||
|
|
||||||
|
const float d = y[i].d * ggml_fp16_to_fp32(x[i].d);
|
||||||
|
const float dmin = y[i].d * ggml_fp16_to_fp32(x[i].dmin);
|
||||||
|
|
||||||
|
vint16mf2_t q8sums_0 = __riscv_vlse16_v_i16mf2(y[i].bsums, 4, vl);
|
||||||
|
vint16mf2_t q8sums_1 = __riscv_vlse16_v_i16mf2(y[i].bsums+1, 4, vl);
|
||||||
|
vint16mf2_t q8sums = __riscv_vadd_vv_i16mf2(q8sums_0, q8sums_1, vl);
|
||||||
|
|
||||||
|
memcpy(utmp, x[i].scales, 12);
|
||||||
|
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
||||||
|
const uint32_t uaux = utmp[1] & kmask1;
|
||||||
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
||||||
|
utmp[2] = uaux;
|
||||||
|
utmp[0] &= kmask1;
|
||||||
|
|
||||||
|
vuint8mf4_t mins8 = __riscv_vle8_v_u8mf4(mins, vl);
|
||||||
|
vint16mf2_t v_mins = __riscv_vreinterpret_v_u16mf2_i16mf2(__riscv_vzext_vf2_u16mf2(mins8, vl));
|
||||||
|
vint32m1_t prod = __riscv_vwmul_vv_i32m1(q8sums, v_mins, vl);
|
||||||
|
|
||||||
|
vint32m1_t sumi = __riscv_vredsum_vs_i32m1_i32m1(prod, __riscv_vmv_v_x_i32m1(0, 1), vl);
|
||||||
|
sumf -= dmin * __riscv_vmv_x_s_i32m1_i32(sumi);
|
||||||
|
|
||||||
|
const uint8_t * restrict q4 = x[i].qs;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
vl = 32;
|
||||||
|
|
||||||
|
int32_t sum_1 = 0;
|
||||||
|
int32_t sum_2 = 0;
|
||||||
|
|
||||||
|
vint16m1_t vzero = __riscv_vmv_v_x_i16m1(0, 1);
|
||||||
|
|
||||||
|
for (int j = 0; j < QK_K/64; ++j) {
|
||||||
|
// load Q4
|
||||||
|
vuint8m1_t q4_x = __riscv_vle8_v_u8m1(q4, vl);
|
||||||
|
|
||||||
|
// load Q8 and multiply it with lower Q4 nibble
|
||||||
|
vint8m1_t q8_0 = __riscv_vle8_v_i8m1(q8, vl);
|
||||||
|
vint8m1_t q4_0 = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q4_x, 0x0F, vl));
|
||||||
|
vint16m2_t qv_0 = __riscv_vwmul_vv_i16m2(q4_0, q8_0, vl);
|
||||||
|
vint16m1_t vs_0 = __riscv_vredsum_vs_i16m2_i16m1(qv_0, vzero, vl);
|
||||||
|
|
||||||
|
sum_1 += __riscv_vmv_x_s_i16m1_i16(vs_0) * scales[2*j+0];
|
||||||
|
|
||||||
|
// load Q8 and multiply it with upper Q4 nibble
|
||||||
|
vint8m1_t q8_1 = __riscv_vle8_v_i8m1(q8+32, vl);
|
||||||
|
vint8m1_t q4_1 = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vsrl_vx_u8m1(q4_x, 0x04, vl));
|
||||||
|
vint16m2_t qv_1 = __riscv_vwmul_vv_i16m2(q4_1, q8_1, vl);
|
||||||
|
vint16m1_t vs_1 = __riscv_vredsum_vs_i16m2_i16m1(qv_1, vzero, vl);
|
||||||
|
|
||||||
|
sum_2 += __riscv_vmv_x_s_i16m1_i16(vs_1) * scales[2*j+1];
|
||||||
|
|
||||||
|
q4 += 32; q8 += 64;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sumf += d*(sum_1 + sum_2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
|
||||||
@ -3064,6 +3453,50 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc) - summs;
|
*s = hsum_float_8(acc) - summs;
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
uint16_t s16[2];
|
||||||
|
const uint8_t * restrict scales = (const uint8_t *)s16;
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const uint8_t * restrict q4 = x[i].qs;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
const uint16_t * restrict b = (const uint16_t *)x[i].scales;
|
||||||
|
s16[0] = b[0] & 0x0f0f;
|
||||||
|
s16[1] = (b[0] >> 4) & 0x0f0f;
|
||||||
|
|
||||||
|
sumf -= y[i].d * ggml_fp16_to_fp32(x[i].d[1]) * (scales[2] * (y[i].bsums[0] + y[i].bsums[1]) + scales[3] * (y[i].bsums[2] + y[i].bsums[3]));
|
||||||
|
const float d = y[i].d * ggml_fp16_to_fp32(x[i].d[0]);
|
||||||
|
|
||||||
|
size_t vl = 32;
|
||||||
|
|
||||||
|
vint16m1_t vzero = __riscv_vmv_v_x_i16m1(0, 1);
|
||||||
|
|
||||||
|
// load Q4
|
||||||
|
vuint8m1_t q4_x = __riscv_vle8_v_u8m1(q4, vl);
|
||||||
|
|
||||||
|
// load Q8 and multiply it with lower Q4 nibble
|
||||||
|
vint8m1_t q4_a = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q4_x, 0x0F, vl));
|
||||||
|
vint16m2_t va_0 = __riscv_vwmul_vv_i16m2(q4_a, __riscv_vle8_v_i8m1(q8, vl), vl);
|
||||||
|
vint16m1_t aux1 = __riscv_vredsum_vs_i16m2_i16m1(va_0, vzero, vl);
|
||||||
|
|
||||||
|
sumf += d*scales[0]*__riscv_vmv_x_s_i16m1_i16(aux1);
|
||||||
|
|
||||||
|
// load Q8 and multiply it with upper Q4 nibble
|
||||||
|
vint8m1_t q4_s = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vsrl_vx_u8m1(q4_x, 0x04, vl));
|
||||||
|
vint16m2_t va_1 = __riscv_vwmul_vv_i16m2(q4_s, __riscv_vle8_v_i8m1(q8+32, vl), vl);
|
||||||
|
vint16m1_t aux2 = __riscv_vredsum_vs_i16m2_i16m1(va_1, vzero, vl);
|
||||||
|
|
||||||
|
sumf += d*scales[1]*__riscv_vmv_x_s_i16m1_i16(aux2);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
uint8_t aux8[QK_K];
|
uint8_t aux8[QK_K];
|
||||||
@ -3394,6 +3827,93 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc) + summs;
|
*s = hsum_float_8(acc) + summs;
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
const uint8_t * scales = (const uint8_t*)&utmp[0];
|
||||||
|
const uint8_t * mins = (const uint8_t*)&utmp[2];
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
float sums = 0.0;
|
||||||
|
|
||||||
|
size_t vl;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
vl = 8;
|
||||||
|
|
||||||
|
const uint8_t * restrict q5 = x[i].qs;
|
||||||
|
const uint8_t * restrict hm = x[i].qh;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
|
||||||
|
const float dmin = ggml_fp16_to_fp32(x[i].dmin) * y[i].d;
|
||||||
|
|
||||||
|
vint16mf2_t q8sums_0 = __riscv_vlse16_v_i16mf2(y[i].bsums, 4, vl);
|
||||||
|
vint16mf2_t q8sums_1 = __riscv_vlse16_v_i16mf2(y[i].bsums+1, 4, vl);
|
||||||
|
vint16mf2_t q8sums = __riscv_vadd_vv_i16mf2(q8sums_0, q8sums_1, vl);
|
||||||
|
|
||||||
|
memcpy(utmp, x[i].scales, 12);
|
||||||
|
utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4);
|
||||||
|
const uint32_t uaux = utmp[1] & kmask1;
|
||||||
|
utmp[1] = (utmp[2] & kmask2) | (((utmp[0] >> 6) & kmask3) << 4);
|
||||||
|
utmp[2] = uaux;
|
||||||
|
utmp[0] &= kmask1;
|
||||||
|
|
||||||
|
vuint8mf4_t mins8 = __riscv_vle8_v_u8mf4(mins, vl);
|
||||||
|
vint16mf2_t v_mins = __riscv_vreinterpret_v_u16mf2_i16mf2(__riscv_vzext_vf2_u16mf2(mins8, vl));
|
||||||
|
vint32m1_t prod = __riscv_vwmul_vv_i32m1(q8sums, v_mins, vl);
|
||||||
|
|
||||||
|
vint32m1_t sumi = __riscv_vredsum_vs_i32m1_i32m1(prod, __riscv_vmv_v_x_i32m1(0, 1), vl);
|
||||||
|
sumf -= dmin * __riscv_vmv_x_s_i32m1_i32(sumi);
|
||||||
|
|
||||||
|
vl = 32;
|
||||||
|
int32_t aux32 = 0;
|
||||||
|
int is = 0;
|
||||||
|
|
||||||
|
uint8_t m = 1;
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
vuint8m1_t vqh = __riscv_vle8_v_u8m1(hm, vl);
|
||||||
|
|
||||||
|
for (int j = 0; j < QK_K/64; ++j) {
|
||||||
|
// load Q5 and Q8
|
||||||
|
vuint8m1_t q5_x = __riscv_vle8_v_u8m1(q5, vl);
|
||||||
|
vint8m1_t q8_y1 = __riscv_vle8_v_i8m1(q8, vl);
|
||||||
|
vint8m1_t q8_y2 = __riscv_vle8_v_i8m1(q8+32, vl);
|
||||||
|
|
||||||
|
// compute mask for addition
|
||||||
|
vint8m1_t q5_a = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vand_vx_u8m1(q5_x, 0x0F, vl));
|
||||||
|
vuint8m1_t qh_m1 = __riscv_vand_vx_u8m1(vqh, m, vl);
|
||||||
|
vbool8_t vmask_1 = __riscv_vmsne_vx_u8m1_b8(qh_m1, 0, vl);
|
||||||
|
vint8m1_t q5_m1 = __riscv_vadd_vx_i8m1_m(vmask_1, q5_a, 16, vl);
|
||||||
|
m <<= 1;
|
||||||
|
|
||||||
|
vint8m1_t q5_l = __riscv_vreinterpret_v_u8m1_i8m1(__riscv_vsrl_vx_u8m1(q5_x, 0x04, vl));
|
||||||
|
vuint8m1_t qh_m2 = __riscv_vand_vx_u8m1(vqh, m, vl);
|
||||||
|
vbool8_t vmask_2 = __riscv_vmsne_vx_u8m1_b8(qh_m2, 0, vl);
|
||||||
|
vint8m1_t q5_m2 = __riscv_vadd_vx_i8m1_m(vmask_2, q5_l, 16, vl);
|
||||||
|
m <<= 1;
|
||||||
|
|
||||||
|
vint16m2_t v0 = __riscv_vwmul_vv_i16m2(q5_m1, q8_y1, vl);
|
||||||
|
vint16m2_t v1 = __riscv_vwmul_vv_i16m2(q5_m2, q8_y2, vl);
|
||||||
|
|
||||||
|
vint32m4_t vs1 = __riscv_vwmul_vx_i32m4(v0, scales[is++], vl);
|
||||||
|
vint32m4_t vs2 = __riscv_vwmul_vx_i32m4(v1, scales[is++], vl);
|
||||||
|
|
||||||
|
vint32m1_t vacc1 = __riscv_vredsum_vs_i32m4_i32m1(vs1, vzero, vl);
|
||||||
|
vint32m1_t vacc2 = __riscv_vredsum_vs_i32m4_i32m1(vs2, vzero, vl);
|
||||||
|
|
||||||
|
aux32 += __riscv_vmv_x_s_i32m1_i32(vacc1) + __riscv_vmv_x_s_i32m1_i32(vacc2);
|
||||||
|
q5 += 32; q8 += 64;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
vfloat32m1_t vaux = __riscv_vfmul_vf_f32m1(__riscv_vfmv_v_f_f32m1(aux32, 1), d, 1);
|
||||||
|
sums += __riscv_vfmv_f_s_f32m1_f32(vaux);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf+sums;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
const uint8_t * scales = (const uint8_t*)&utmp[0];
|
const uint8_t * scales = (const uint8_t*)&utmp[0];
|
||||||
@ -3639,6 +4159,76 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const float d = y[i].d * (float)x[i].d;
|
||||||
|
const int8_t * sc = x[i].scales;
|
||||||
|
|
||||||
|
const uint8_t * restrict q5 = x[i].qs;
|
||||||
|
const uint8_t * restrict qh = x[i].qh;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
|
||||||
|
// load qh
|
||||||
|
vuint8mf4_t qh_x1 = __riscv_vle8_v_u8mf4(qh, 8);
|
||||||
|
vuint8mf2_t qh_x2 = __riscv_vlmul_ext_v_u8mf4_u8mf2(__riscv_vsrl_vx_u8mf4(qh_x1, 1, 8));
|
||||||
|
|
||||||
|
size_t vl = 16;
|
||||||
|
|
||||||
|
// combine both qh_1 and qh_2
|
||||||
|
vuint8mf2_t qh_x = __riscv_vslideup_vx_u8mf2(__riscv_vlmul_ext_v_u8mf4_u8mf2(qh_x1), qh_x2, vl/2, vl);
|
||||||
|
|
||||||
|
vuint8mf2_t qh_h0 = __riscv_vand_vx_u8mf2(__riscv_vnot_v_u8mf2(__riscv_vsll_vx_u8mf2(qh_x, 0x4, vl), vl), 16, vl);
|
||||||
|
vuint8mf2_t qh_h1 = __riscv_vand_vx_u8mf2(__riscv_vnot_v_u8mf2(__riscv_vsll_vx_u8mf2(qh_x, 0x2, vl), vl), 16, vl);
|
||||||
|
vuint8mf2_t qh_h2 = __riscv_vand_vx_u8mf2(__riscv_vnot_v_u8mf2(qh_x, vl), 16, vl);
|
||||||
|
vuint8mf2_t qh_h3 = __riscv_vand_vx_u8mf2(__riscv_vnot_v_u8mf2(__riscv_vsrl_vx_u8mf2(qh_x, 0x4, vl), vl), 16, vl);
|
||||||
|
|
||||||
|
vint8mf2_t qh_0 = __riscv_vreinterpret_v_u8mf2_i8mf2(qh_h0);
|
||||||
|
vint8mf2_t qh_1 = __riscv_vreinterpret_v_u8mf2_i8mf2(qh_h1);
|
||||||
|
vint8mf2_t qh_2 = __riscv_vreinterpret_v_u8mf2_i8mf2(qh_h2);
|
||||||
|
vint8mf2_t qh_3 = __riscv_vreinterpret_v_u8mf2_i8mf2(qh_h3);
|
||||||
|
|
||||||
|
// load q5
|
||||||
|
vuint8mf2_t q5_x1 = __riscv_vle8_v_u8mf2(q5, vl);
|
||||||
|
vuint8mf2_t q5_x2 = __riscv_vle8_v_u8mf2(q5+16, vl);
|
||||||
|
|
||||||
|
vint8mf2_t q5s_0 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vand_vx_u8mf2(q5_x1, 0xF, vl));
|
||||||
|
vint8mf2_t q5s_1 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vand_vx_u8mf2(q5_x2, 0xF, vl));
|
||||||
|
vint8mf2_t q5s_2 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vsrl_vx_u8mf2(q5_x1, 0x4, vl));
|
||||||
|
vint8mf2_t q5s_3 = __riscv_vreinterpret_v_u8mf2_i8mf2(__riscv_vsrl_vx_u8mf2(q5_x2, 0x4, vl));
|
||||||
|
|
||||||
|
vint8mf2_t q5_0 = __riscv_vsub_vv_i8mf2(q5s_0, qh_0, vl);
|
||||||
|
vint8mf2_t q5_1 = __riscv_vsub_vv_i8mf2(q5s_1, qh_1, vl);
|
||||||
|
vint8mf2_t q5_2 = __riscv_vsub_vv_i8mf2(q5s_2, qh_2, vl);
|
||||||
|
vint8mf2_t q5_3 = __riscv_vsub_vv_i8mf2(q5s_3, qh_3, vl);
|
||||||
|
|
||||||
|
// load Q8 and multiply it with Q5
|
||||||
|
vint16m1_t p0 = __riscv_vwmul_vv_i16m1(q5_0, __riscv_vle8_v_i8mf2(q8, vl), vl);
|
||||||
|
vint16m1_t p1 = __riscv_vwmul_vv_i16m1(q5_1, __riscv_vle8_v_i8mf2(q8+16, vl), vl);
|
||||||
|
vint16m1_t p2 = __riscv_vwmul_vv_i16m1(q5_2, __riscv_vle8_v_i8mf2(q8+32, vl), vl);
|
||||||
|
vint16m1_t p3 = __riscv_vwmul_vv_i16m1(q5_3, __riscv_vle8_v_i8mf2(q8+48, vl), vl);
|
||||||
|
|
||||||
|
vint32m1_t vs_0 = __riscv_vwredsum_vs_i16m1_i32m1(p0, vzero, vl);
|
||||||
|
vint32m1_t vs_1 = __riscv_vwredsum_vs_i16m1_i32m1(p1, vzero, vl);
|
||||||
|
vint32m1_t vs_2 = __riscv_vwredsum_vs_i16m1_i32m1(p2, vzero, vl);
|
||||||
|
vint32m1_t vs_3 = __riscv_vwredsum_vs_i16m1_i32m1(p3, vzero, vl);
|
||||||
|
|
||||||
|
int32_t sumi1 = sc[0] * __riscv_vmv_x_s_i32m1_i32(vs_0);
|
||||||
|
int32_t sumi2 = sc[1] * __riscv_vmv_x_s_i32m1_i32(vs_1);
|
||||||
|
int32_t sumi3 = sc[2] * __riscv_vmv_x_s_i32m1_i32(vs_2);
|
||||||
|
int32_t sumi4 = sc[3] * __riscv_vmv_x_s_i32m1_i32(vs_3);
|
||||||
|
|
||||||
|
sumf += d * (sumi1 + sumi2 + sumi3 + sumi4);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int8_t aux8[QK_K];
|
int8_t aux8[QK_K];
|
||||||
@ -4023,6 +4613,91 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const float d = ggml_fp16_to_fp32(x[i].d) * y[i].d;
|
||||||
|
|
||||||
|
const uint8_t * restrict q6 = x[i].ql;
|
||||||
|
const uint8_t * restrict qh = x[i].qh;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
const int8_t * restrict scale = x[i].scales;
|
||||||
|
|
||||||
|
size_t vl;
|
||||||
|
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
|
||||||
|
int sum_t = 0;
|
||||||
|
int is = 0;
|
||||||
|
|
||||||
|
for (int j = 0; j < QK_K/128; ++j) {
|
||||||
|
|
||||||
|
vl = 32;
|
||||||
|
|
||||||
|
// load qh
|
||||||
|
vuint8m1_t qh_x = __riscv_vle8_v_u8m1(qh, vl);
|
||||||
|
|
||||||
|
// load Q6
|
||||||
|
vuint8m1_t q6_0 = __riscv_vle8_v_u8m1(q6, vl);
|
||||||
|
vuint8m1_t q6_1 = __riscv_vle8_v_u8m1(q6+32, vl);
|
||||||
|
|
||||||
|
vuint8m1_t q6a_0 = __riscv_vand_vx_u8m1(q6_0, 0x0F, vl);
|
||||||
|
vuint8m1_t q6a_1 = __riscv_vand_vx_u8m1(q6_1, 0x0F, vl);
|
||||||
|
vuint8m1_t q6s_0 = __riscv_vsrl_vx_u8m1(q6_0, 0x04, vl);
|
||||||
|
vuint8m1_t q6s_1 = __riscv_vsrl_vx_u8m1(q6_1, 0x04, vl);
|
||||||
|
|
||||||
|
vuint8m1_t qh_0 = __riscv_vand_vx_u8m1(qh_x, 0x03, vl);
|
||||||
|
vuint8m1_t qh_1 = __riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(qh_x, 0x2, vl), 0x03 , vl);
|
||||||
|
vuint8m1_t qh_2 = __riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(qh_x, 0x4, vl), 0x03 , vl);
|
||||||
|
vuint8m1_t qh_3 = __riscv_vand_vx_u8m1(__riscv_vsrl_vx_u8m1(qh_x, 0x6, vl), 0x03 , vl);
|
||||||
|
|
||||||
|
vuint8m1_t qhi_0 = __riscv_vor_vv_u8m1(q6a_0, __riscv_vsll_vx_u8m1(qh_0, 0x04, vl), vl);
|
||||||
|
vuint8m1_t qhi_1 = __riscv_vor_vv_u8m1(q6a_1, __riscv_vsll_vx_u8m1(qh_1, 0x04, vl), vl);
|
||||||
|
vuint8m1_t qhi_2 = __riscv_vor_vv_u8m1(q6s_0, __riscv_vsll_vx_u8m1(qh_2, 0x04, vl), vl);
|
||||||
|
vuint8m1_t qhi_3 = __riscv_vor_vv_u8m1(q6s_1, __riscv_vsll_vx_u8m1(qh_3, 0x04, vl), vl);
|
||||||
|
|
||||||
|
vint8m1_t a_0 = __riscv_vsub_vx_i8m1(__riscv_vreinterpret_v_u8m1_i8m1(qhi_0), 32, vl);
|
||||||
|
vint8m1_t a_1 = __riscv_vsub_vx_i8m1(__riscv_vreinterpret_v_u8m1_i8m1(qhi_1), 32, vl);
|
||||||
|
vint8m1_t a_2 = __riscv_vsub_vx_i8m1(__riscv_vreinterpret_v_u8m1_i8m1(qhi_2), 32, vl);
|
||||||
|
vint8m1_t a_3 = __riscv_vsub_vx_i8m1(__riscv_vreinterpret_v_u8m1_i8m1(qhi_3), 32, vl);
|
||||||
|
|
||||||
|
// load Q8 and take product
|
||||||
|
vint16m2_t va_q_0 = __riscv_vwmul_vv_i16m2(a_0, __riscv_vle8_v_i8m1(q8, vl), vl);
|
||||||
|
vint16m2_t va_q_1 = __riscv_vwmul_vv_i16m2(a_1, __riscv_vle8_v_i8m1(q8+32, vl), vl);
|
||||||
|
vint16m2_t va_q_2 = __riscv_vwmul_vv_i16m2(a_2, __riscv_vle8_v_i8m1(q8+64, vl), vl);
|
||||||
|
vint16m2_t va_q_3 = __riscv_vwmul_vv_i16m2(a_3, __riscv_vle8_v_i8m1(q8+96, vl), vl);
|
||||||
|
|
||||||
|
vl = 16;
|
||||||
|
|
||||||
|
vint32m2_t vaux_0 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_0, 0), scale[is+0], vl);
|
||||||
|
vint32m2_t vaux_1 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_0, 1), scale[is+1], vl);
|
||||||
|
vint32m2_t vaux_2 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_1, 0), scale[is+2], vl);
|
||||||
|
vint32m2_t vaux_3 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_1, 1), scale[is+3], vl);
|
||||||
|
vint32m2_t vaux_4 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_2, 0), scale[is+4], vl);
|
||||||
|
vint32m2_t vaux_5 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_2, 1), scale[is+5], vl);
|
||||||
|
vint32m2_t vaux_6 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_3, 0), scale[is+6], vl);
|
||||||
|
vint32m2_t vaux_7 = __riscv_vwmul_vx_i32m2(__riscv_vget_v_i16m2_i16m1(va_q_3, 1), scale[is+7], vl);
|
||||||
|
|
||||||
|
vint32m1_t isum0 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(vaux_0, vaux_1, vl), vzero, vl);
|
||||||
|
vint32m1_t isum1 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(vaux_2, vaux_3, vl), isum0, vl);
|
||||||
|
vint32m1_t isum2 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(vaux_4, vaux_5, vl), isum1, vl);
|
||||||
|
vint32m1_t isum3 = __riscv_vredsum_vs_i32m2_i32m1(__riscv_vadd_vv_i32m2(vaux_6, vaux_7, vl), isum2, vl);
|
||||||
|
|
||||||
|
sum_t += __riscv_vmv_x_s_i32m1_i32(isum3);
|
||||||
|
|
||||||
|
q6 += 64; qh += 32; q8 += 128; is=8;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sumf += d * sum_t;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int8_t aux8[QK_K];
|
int8_t aux8[QK_K];
|
||||||
@ -4276,6 +4951,73 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
|
|||||||
|
|
||||||
*s = hsum_float_8(acc);
|
*s = hsum_float_8(acc);
|
||||||
|
|
||||||
|
#elif defined __riscv_v_intrinsic
|
||||||
|
|
||||||
|
float sumf = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < nb; ++i) {
|
||||||
|
|
||||||
|
const float d_all = (float)x[i].d;
|
||||||
|
|
||||||
|
const uint8_t * restrict q6 = x[i].ql;
|
||||||
|
const uint8_t * restrict qh = x[i].qh;
|
||||||
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
|
|
||||||
|
const int8_t * restrict scale = x[i].scales;
|
||||||
|
|
||||||
|
int32_t isum = 0;
|
||||||
|
|
||||||
|
size_t vl = 16;
|
||||||
|
|
||||||
|
vint32m1_t vzero = __riscv_vmv_v_x_i32m1(0, 1);
|
||||||
|
|
||||||
|
// load Q6
|
||||||
|
vuint8mf2_t q6_0 = __riscv_vle8_v_u8mf2(q6, vl);
|
||||||
|
vuint8mf2_t q6_1 = __riscv_vle8_v_u8mf2(q6+16, vl);
|
||||||
|
|
||||||
|
// load qh
|
||||||
|
vuint8mf2_t qh_x = __riscv_vle8_v_u8mf2(qh, vl);
|
||||||
|
|
||||||
|
vuint8mf2_t qh0 = __riscv_vsll_vx_u8mf2(__riscv_vand_vx_u8mf2(qh_x, 0x3, vl), 0x4, vl);
|
||||||
|
qh_x = __riscv_vsrl_vx_u8mf2(qh_x, 0x2, vl);
|
||||||
|
vuint8mf2_t qh1 = __riscv_vsll_vx_u8mf2(__riscv_vand_vx_u8mf2(qh_x, 0x3, vl), 0x4, vl);
|
||||||
|
qh_x = __riscv_vsrl_vx_u8mf2(qh_x, 0x2, vl);
|
||||||
|
vuint8mf2_t qh2 = __riscv_vsll_vx_u8mf2(__riscv_vand_vx_u8mf2(qh_x, 0x3, vl), 0x4, vl);
|
||||||
|
qh_x = __riscv_vsrl_vx_u8mf2(qh_x, 0x2, vl);
|
||||||
|
vuint8mf2_t qh3 = __riscv_vsll_vx_u8mf2(__riscv_vand_vx_u8mf2(qh_x, 0x3, vl), 0x4, vl);
|
||||||
|
|
||||||
|
vuint8mf2_t q6h_0 = __riscv_vor_vv_u8mf2(__riscv_vand_vx_u8mf2(q6_0, 0xF, vl), qh0, vl);
|
||||||
|
vuint8mf2_t q6h_1 = __riscv_vor_vv_u8mf2(__riscv_vand_vx_u8mf2(q6_1, 0xF, vl), qh1, vl);
|
||||||
|
vuint8mf2_t q6h_2 = __riscv_vor_vv_u8mf2(__riscv_vsrl_vx_u8mf2(q6_0, 0x4, vl), qh2, vl);
|
||||||
|
vuint8mf2_t q6h_3 = __riscv_vor_vv_u8mf2(__riscv_vsrl_vx_u8mf2(q6_1, 0x4, vl), qh3, vl);
|
||||||
|
|
||||||
|
vint8mf2_t q6v_0 = __riscv_vsub_vx_i8mf2(__riscv_vreinterpret_v_u8mf2_i8mf2(q6h_0), 32, vl);
|
||||||
|
vint8mf2_t q6v_1 = __riscv_vsub_vx_i8mf2(__riscv_vreinterpret_v_u8mf2_i8mf2(q6h_1), 32, vl);
|
||||||
|
vint8mf2_t q6v_2 = __riscv_vsub_vx_i8mf2(__riscv_vreinterpret_v_u8mf2_i8mf2(q6h_2), 32, vl);
|
||||||
|
vint8mf2_t q6v_3 = __riscv_vsub_vx_i8mf2(__riscv_vreinterpret_v_u8mf2_i8mf2(q6h_3), 32, vl);
|
||||||
|
|
||||||
|
// load Q8 and take product
|
||||||
|
vint16m1_t p0 = __riscv_vwmul_vv_i16m1(q6v_0, __riscv_vle8_v_i8mf2(q8, vl), vl);
|
||||||
|
vint16m1_t p1 = __riscv_vwmul_vv_i16m1(q6v_1, __riscv_vle8_v_i8mf2(q8+16, vl), vl);
|
||||||
|
vint16m1_t p2 = __riscv_vwmul_vv_i16m1(q6v_2, __riscv_vle8_v_i8mf2(q8+32, vl), vl);
|
||||||
|
vint16m1_t p3 = __riscv_vwmul_vv_i16m1(q6v_3, __riscv_vle8_v_i8mf2(q8+48, vl), vl);
|
||||||
|
|
||||||
|
vint32m1_t vs_0 = __riscv_vwredsum_vs_i16m1_i32m1(p0, vzero, vl);
|
||||||
|
vint32m1_t vs_1 = __riscv_vwredsum_vs_i16m1_i32m1(p1, vzero, vl);
|
||||||
|
vint32m1_t vs_2 = __riscv_vwredsum_vs_i16m1_i32m1(p2, vzero, vl);
|
||||||
|
vint32m1_t vs_3 = __riscv_vwredsum_vs_i16m1_i32m1(p3, vzero, vl);
|
||||||
|
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_0) * scale[0];
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_1) * scale[1];
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_2) * scale[2];
|
||||||
|
isum += __riscv_vmv_x_s_i32m1_i32(vs_3) * scale[3];
|
||||||
|
|
||||||
|
sumf += isum * d_all * y[i].d;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = sumf;
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
int8_t aux8[QK_K];
|
int8_t aux8[QK_K];
|
||||||
|
13
llama.h
13
llama.h
@ -42,7 +42,7 @@
|
|||||||
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
#define LLAMA_FILE_MAGIC_GGSN 0x6767736eu // 'ggsn'
|
||||||
|
|
||||||
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
|
||||||
#define LLAMA_SESSION_VERSION 1
|
#define LLAMA_SESSION_VERSION 2
|
||||||
|
|
||||||
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
|
#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
|
||||||
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
|
||||||
@ -282,6 +282,9 @@ extern "C" {
|
|||||||
LLAMA_API int llama_n_ctx_train(const struct llama_model * model);
|
LLAMA_API int llama_n_ctx_train(const struct llama_model * model);
|
||||||
LLAMA_API int llama_n_embd (const struct llama_model * model);
|
LLAMA_API int llama_n_embd (const struct llama_model * model);
|
||||||
|
|
||||||
|
// Get the model's RoPE frequency scaling factor
|
||||||
|
LLAMA_API float llama_rope_freq_scale_train(const struct llama_model * model);
|
||||||
|
|
||||||
// Get a string describing the model type
|
// Get a string describing the model type
|
||||||
LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
|
LLAMA_API int llama_model_desc(const struct llama_model * model, char * buf, size_t buf_size);
|
||||||
|
|
||||||
@ -330,12 +333,16 @@ extern "C" {
|
|||||||
"avoid using this, it will be removed in the future, instead - count the tokens in user code");
|
"avoid using this, it will be removed in the future, instead - count the tokens in user code");
|
||||||
|
|
||||||
// Remove all tokens data of cells in [c0, c1)
|
// Remove all tokens data of cells in [c0, c1)
|
||||||
|
// c0 < 0 : [0, c1]
|
||||||
|
// c1 < 0 : [c0, inf)
|
||||||
LLAMA_API void llama_kv_cache_tokens_rm(
|
LLAMA_API void llama_kv_cache_tokens_rm(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
int32_t c0,
|
int32_t c0,
|
||||||
int32_t c1);
|
int32_t c1);
|
||||||
|
|
||||||
// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
// Removes all tokens that belong to the specified sequence and have positions in [p0, p1)
|
||||||
|
// p0 < 0 : [0, p1]
|
||||||
|
// p1 < 0 : [p0, inf)
|
||||||
LLAMA_API void llama_kv_cache_seq_rm(
|
LLAMA_API void llama_kv_cache_seq_rm(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_seq_id seq_id,
|
llama_seq_id seq_id,
|
||||||
@ -344,6 +351,8 @@ extern "C" {
|
|||||||
|
|
||||||
// Copy all tokens that belong to the specified sequence to another sequence
|
// Copy all tokens that belong to the specified sequence to another sequence
|
||||||
// Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
|
// Note that this does not allocate extra KV cache memory - it simply assigns the tokens to the new sequence
|
||||||
|
// p0 < 0 : [0, p1]
|
||||||
|
// p1 < 0 : [p0, inf)
|
||||||
LLAMA_API void llama_kv_cache_seq_cp(
|
LLAMA_API void llama_kv_cache_seq_cp(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_seq_id seq_id_src,
|
llama_seq_id seq_id_src,
|
||||||
@ -358,6 +367,8 @@ extern "C" {
|
|||||||
|
|
||||||
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
// Adds relative position "delta" to all tokens that belong to the specified sequence and have positions in [p0, p1)
|
||||||
// If the KV cache is RoPEd, the KV data is updated accordingly
|
// If the KV cache is RoPEd, the KV data is updated accordingly
|
||||||
|
// p0 < 0 : [0, p1]
|
||||||
|
// p1 < 0 : [p0, inf)
|
||||||
LLAMA_API void llama_kv_cache_seq_shift(
|
LLAMA_API void llama_kv_cache_seq_shift(
|
||||||
struct llama_context * ctx,
|
struct llama_context * ctx,
|
||||||
llama_seq_id seq_id,
|
llama_seq_id seq_id,
|
||||||
|
BIN
models/ggml-vocab-aquila.gguf
Normal file
BIN
models/ggml-vocab-aquila.gguf
Normal file
Binary file not shown.
BIN
models/ggml-vocab-falcon.gguf
Normal file
BIN
models/ggml-vocab-falcon.gguf
Normal file
Binary file not shown.
49
prompts/LLM-questions.txt
Normal file
49
prompts/LLM-questions.txt
Normal file
@ -0,0 +1,49 @@
|
|||||||
|
In the context of LLMs, what is "Attention"?
|
||||||
|
In the context of LLMs, what is a completion?
|
||||||
|
In the context of LLMs, what is a prompt?
|
||||||
|
In the context of LLMs, what is GELU?
|
||||||
|
In the context of LLMs, what is RELU?
|
||||||
|
In the context of LLMs, what is softmax?
|
||||||
|
In the context of LLMs, what is decoding?
|
||||||
|
In the context of LLMs, what is encoding?
|
||||||
|
In the context of LLMs, what is tokenizing?
|
||||||
|
In the context of LLMs, what is an embedding?
|
||||||
|
In the context of LLMs, what is quantization?
|
||||||
|
In the context of LLMs, what is a tensor?
|
||||||
|
In the context of LLMs, what is a sparse tensor?
|
||||||
|
In the context of LLMs, what is a vector?
|
||||||
|
In the context of LLMs, how is attention implemented?
|
||||||
|
In the context of LLMs, why is attention all you need?
|
||||||
|
In the context of LLMs, what is "RoPe" and what is it used for?
|
||||||
|
In the context of LLMs, what is "LoRA" and what is it used for?
|
||||||
|
In the context of LLMs, what are weights?
|
||||||
|
In the context of LLMs, what are biases?
|
||||||
|
In the context of LLMs, what are checkpoints?
|
||||||
|
In the context of LLMs, what is "perplexity"?
|
||||||
|
In the context of LLMs, what are models?
|
||||||
|
In the context of machine-learning, what is "catastrophic forgetting"?
|
||||||
|
In the context of machine-learning, what is "elastic weight consolidation (EWC)"?
|
||||||
|
In the context of neural nets, what is a hidden layer?
|
||||||
|
In the context of neural nets, what is a convolution?
|
||||||
|
In the context of neural nets, what is dropout?
|
||||||
|
In the context of neural nets, what is cross-entropy?
|
||||||
|
In the context of neural nets, what is over-fitting?
|
||||||
|
In the context of neural nets, what is under-fitting?
|
||||||
|
What is the difference between an interpreted computer language and a compiled computer language?
|
||||||
|
In the context of software development, what is a debugger?
|
||||||
|
When processing using a GPU, what is off-loading?
|
||||||
|
When processing using a GPU, what is a batch?
|
||||||
|
When processing using a GPU, what is a block?
|
||||||
|
When processing using a GPU, what is the difference between a batch and a block?
|
||||||
|
When processing using a GPU, what is a scratch tensor?
|
||||||
|
When processing using a GPU, what is a layer?
|
||||||
|
When processing using a GPU, what is a cache?
|
||||||
|
When processing using a GPU, what is unified memory?
|
||||||
|
When processing using a GPU, what is VRAM?
|
||||||
|
When processing using a GPU, what is a kernel?
|
||||||
|
When processing using a GPU, what is "metal"?
|
||||||
|
In the context of LLMs, what are "Zero-Shot", "One-Shot" and "Few-Shot" learning models?
|
||||||
|
In the context of LLMs, what is the "Transformer-model" architecture?
|
||||||
|
In the context of LLMs, what is "Multi-Head Attention"?
|
||||||
|
In the context of LLMs, what is "Self-Attention"?
|
||||||
|
In the context of transformer-model architectures, how do attention mechanisms use masks?
|
43
prompts/parallel-questions.txt
Normal file
43
prompts/parallel-questions.txt
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
What do you know about Hobbits?
|
||||||
|
What is quantum field theory?
|
||||||
|
Why did the chicken cross the road?
|
||||||
|
Who is the president of the United States?
|
||||||
|
How do I run CMake on MacOS?
|
||||||
|
Do you agree that C++ is a really finicky language compared with Python3?
|
||||||
|
Is it a good idea to invest in technology?
|
||||||
|
Do you like Wagner's Ring?
|
||||||
|
Do you think this file input option is really neat?
|
||||||
|
What should we all do about climate change?
|
||||||
|
Is time-travel possible within the laws of current physics?
|
||||||
|
Is it like anything to be a bat?
|
||||||
|
Once the chicken has crossed the road, does it try to go back?
|
||||||
|
Who is the greatest of all musical composers?
|
||||||
|
What is art?
|
||||||
|
Is there life elsewhere in the universe?
|
||||||
|
What is intelligence?
|
||||||
|
What is the difference between knowledge and intelligence?
|
||||||
|
Will religion ever die?
|
||||||
|
Do we understand ourselves?
|
||||||
|
What is the best way to cook eggs?
|
||||||
|
If you cannot see things, on what basis do you evaluate them?
|
||||||
|
Explain the role of the np junction in photovoltaic cells?
|
||||||
|
Is professional sport a good or bad influence on human behaviour?
|
||||||
|
Is capital punishment immoral?
|
||||||
|
Should we care about other people?
|
||||||
|
Who are you?
|
||||||
|
Which sense would you surrender if you could?
|
||||||
|
Was Henry Ford a hero or a villain?
|
||||||
|
Do we need leaders?
|
||||||
|
What is nucleosynthesis?
|
||||||
|
Who is the greatest scientist of all time?
|
||||||
|
Who first observed what came to be known as the photovoltaic effect?
|
||||||
|
What is nuclear fusion and why does it release energy?
|
||||||
|
Can you know that you exist?
|
||||||
|
What is an exoplanet?
|
||||||
|
Do you like cream?
|
||||||
|
What is the difference?
|
||||||
|
Can I know that I exist while I'm dreaming that I'm Descartes?
|
||||||
|
Who said "I didn't know I thought that until I heard myself saying it"?
|
||||||
|
Does anything really matter?
|
||||||
|
Can you explain the unreasonable effectiveness of mathematics?
|
||||||
|
|
@ -56,11 +56,13 @@ find_library(llama_LIBRARY llama
|
|||||||
HINTS ${LLAMA_LIB_DIR})
|
HINTS ${LLAMA_LIB_DIR})
|
||||||
|
|
||||||
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
||||||
|
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
||||||
add_library(llama UNKNOWN IMPORTED)
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
set_target_properties(llama
|
set_target_properties(llama
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
||||||
|
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
IMPORTED_LOCATION "${llama_LIBRARY}"
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
INTERFACE_COMPILE_FEATURES cxx_std_11
|
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||||
|
@ -7,9 +7,6 @@ endfunction()
|
|||||||
|
|
||||||
function(llama_test_executable name source)
|
function(llama_test_executable name source)
|
||||||
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
get_filename_component(TEST_TARGET ${source} NAME_WE)
|
||||||
# add_executable(${TEST_TARGET} ${source})
|
|
||||||
# install(TARGETS ${TEST_TARGET} RUNTIME)
|
|
||||||
# target_link_libraries(${TEST_TARGET} PRIVATE llama)
|
|
||||||
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
add_test(NAME ${name} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN})
|
||||||
endfunction()
|
endfunction()
|
||||||
|
|
||||||
@ -28,10 +25,12 @@ llama_build_and_test_executable(test-sampling.cpp)
|
|||||||
llama_build_executable(test-tokenizer-0-llama.cpp)
|
llama_build_executable(test-tokenizer-0-llama.cpp)
|
||||||
llama_test_executable (test-tokenizer-0-llama test-tokenizer-0-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
llama_test_executable (test-tokenizer-0-llama test-tokenizer-0-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||||
llama_build_executable(test-tokenizer-0-falcon.cpp)
|
llama_build_executable(test-tokenizer-0-falcon.cpp)
|
||||||
#llama_test_executable (test-tokenizer-0-falcon test-tokenizer-0-falcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
llama_test_executable (test-tokenizer-0-falcon test-tokenizer-0-falcon.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||||
llama_build_executable(test-tokenizer-1-llama.cpp)
|
llama_build_executable(test-tokenizer-1-llama.cpp)
|
||||||
llama_test_executable (test-tokenizer-1-llama test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
llama_test_executable (test-tokenizer-1-llama test-tokenizer-1-llama.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-llama.gguf)
|
||||||
#llama_test_executable(test-tokenizer-1.aquila test-tokenizer-1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
llama_build_executable(test-tokenizer-1-bpe.cpp)
|
||||||
|
llama_test_executable (test-tokenizer-1-falcon test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-falcon.gguf)
|
||||||
|
llama_test_executable(test-tokenizer-1-aquila test-tokenizer-1-bpe.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab-aquila.gguf)
|
||||||
llama_build_and_test_executable(test-grammar-parser.cpp)
|
llama_build_and_test_executable(test-grammar-parser.cpp)
|
||||||
llama_build_and_test_executable(test-llama-grammar.cpp)
|
llama_build_and_test_executable(test-llama-grammar.cpp)
|
||||||
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
llama_build_and_test_executable(test-grad0.cpp) # SLOW
|
||||||
|
@ -208,26 +208,6 @@ static struct ggml_tensor * get_random_tensor_i32(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void print_elements(const char* label, const struct ggml_tensor * t) {
|
|
||||||
if (!t) {
|
|
||||||
printf("%s: %s = null\n", __func__, label);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const int nelements = ggml_nelements(t);
|
|
||||||
printf("%s: %s = [", __func__, label);
|
|
||||||
for (int k = 0; k < nelements; ++k) {
|
|
||||||
if (k > 0) { printf(", "); }
|
|
||||||
printf("%.5f", ggml_get_f32_1d(t, k));
|
|
||||||
}
|
|
||||||
printf("] shape: [");
|
|
||||||
for (int k = 0; k < t->n_dims; ++k) {
|
|
||||||
if (k > 0) { printf(", "); }
|
|
||||||
printf("%d", (int)t->ne[k]);
|
|
||||||
}
|
|
||||||
printf("]\n");
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool check_gradient(
|
static bool check_gradient(
|
||||||
const char * op_name,
|
const char * op_name,
|
||||||
struct ggml_context * ctx0,
|
struct ggml_context * ctx0,
|
||||||
|
@ -40,27 +40,6 @@ static float frand(void) {
|
|||||||
return (float)rand()/(float)RAND_MAX;
|
return (float)rand()/(float)RAND_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int irand(int n) {
|
|
||||||
return rand()%n;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_random_dims(int64_t * dims, int ndims) {
|
|
||||||
dims[0] = dims[1] = dims[2] = dims[3] = 1;
|
|
||||||
|
|
||||||
for (int i = 0; i < ndims; i++) {
|
|
||||||
dims[i] = 1 + irand(4);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {
|
|
||||||
dims[0] = dims[1] = dims[2] = dims[3] = 1;
|
|
||||||
|
|
||||||
for (int i = 0; i < ndims; i++) {
|
|
||||||
dims[i] = min + irand(max-min);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
static struct ggml_tensor * get_random_tensor(
|
static struct ggml_tensor * get_random_tensor(
|
||||||
struct ggml_context * ctx0, int ndims, int64_t ne[], float fmin, float fmax
|
struct ggml_context * ctx0, int ndims, int64_t ne[], float fmin, float fmax
|
||||||
) {
|
) {
|
||||||
@ -106,14 +85,6 @@ static struct ggml_tensor * get_random_tensor(
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float get_element(const struct ggml_tensor * t, int idx) {
|
|
||||||
return ((float *)t->data)[idx];
|
|
||||||
}
|
|
||||||
|
|
||||||
static void set_element(struct ggml_tensor * t, int idx, float value) {
|
|
||||||
((float *)t->data)[idx] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
struct ggml_init_params params = {
|
struct ggml_init_params params = {
|
||||||
/* .mem_size = */ 1024*1024*1024,
|
/* .mem_size = */ 1024*1024*1024,
|
||||||
|
@ -76,22 +76,21 @@ static void * align_with_offset(void * ptr, int offset) {
|
|||||||
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
|
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<size_t(void)> & function) {
|
static void benchmark_function(size_t size, size_t q_size, int64_t iterations, const std::function<float(void)> & func) {
|
||||||
int64_t min_time_us = INT64_MAX;
|
int64_t min_time_us = INT64_MAX;
|
||||||
int64_t total_time_us = 0;
|
int64_t total_time_us = 0;
|
||||||
int64_t min_time_cycles = INT64_MAX;
|
int64_t min_time_cycles = INT64_MAX;
|
||||||
int64_t total_time_cycles = 0;
|
int64_t total_time_cycles = 0;
|
||||||
|
|
||||||
for (int i = 0; i < WARMUP; i++) {
|
for (int i = 0; i < WARMUP; i++) {
|
||||||
function();
|
func();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
for (int i = 0; i < iterations; i++) {
|
for (int i = 0; i < iterations; i++) {
|
||||||
const int64_t start_time = ggml_time_us();
|
const int64_t start_time = ggml_time_us();
|
||||||
const int64_t start_cycles = cpu_cycles();
|
const int64_t start_cycles = cpu_cycles();
|
||||||
|
|
||||||
function();
|
func();
|
||||||
|
|
||||||
const int64_t end_cycles = cpu_cycles();
|
const int64_t end_cycles = cpu_cycles();
|
||||||
const int64_t end_time = ggml_time_us();
|
const int64_t end_time = ggml_time_us();
|
||||||
@ -283,7 +282,7 @@ int main(int argc, char * argv[]) {
|
|||||||
printf(" quantize_row_q_reference\n");
|
printf(" quantize_row_q_reference\n");
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
qfns.from_float_reference(test_data1, test_q1, size);
|
qfns.from_float_reference(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
};
|
};
|
||||||
@ -297,7 +296,7 @@ int main(int argc, char * argv[]) {
|
|||||||
printf(" quantize_row_q\n");
|
printf(" quantize_row_q\n");
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
qfns.from_float(test_data1, test_q1, size);
|
qfns.from_float(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
};
|
};
|
||||||
@ -312,7 +311,7 @@ int main(int argc, char * argv[]) {
|
|||||||
qfns.from_float(test_data1, test_q1, largest);
|
qfns.from_float(test_data1, test_q1, largest);
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
qfns.to_float(test_q1, test_out, size);
|
qfns.to_float(test_q1, test_out, size);
|
||||||
return test_out[0];
|
return test_out[0];
|
||||||
};
|
};
|
||||||
@ -326,7 +325,7 @@ int main(int argc, char * argv[]) {
|
|||||||
printf(" quantize_row_q_dot\n");
|
printf(" quantize_row_q_dot\n");
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
auto vdot = ggml_internal_get_type_traits(qfns.vec_dot_type);
|
||||||
vdot.from_float(test_data1, test_q1, size);
|
vdot.from_float(test_data1, test_q1, size);
|
||||||
return test_q1[0];
|
return test_q1[0];
|
||||||
@ -343,7 +342,7 @@ int main(int argc, char * argv[]) {
|
|||||||
qfns.from_float(test_data2, test_q2, largest);
|
qfns.from_float(test_data2, test_q2, largest);
|
||||||
for (size_t size : params.test_sizes) {
|
for (size_t size : params.test_sizes) {
|
||||||
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
printf(" %zu values (%.2f MB)\n", size, 4*size/(float)(1024*1024));
|
||||||
auto quantize_fn = [&](void ) {
|
auto quantize_fn = [&](void) -> float {
|
||||||
float result;
|
float result;
|
||||||
qfns.vec_dot(size, &result, test_q1, test_q2);
|
qfns.vec_dot(size, &result, test_q1, test_q2);
|
||||||
return result;
|
return result;
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "console.h"
|
||||||
|
|
||||||
#include <cstdio>
|
#include <cstdio>
|
||||||
#include <string>
|
#include <string>
|
||||||
@ -85,12 +86,18 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
|
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
|
||||||
fprintf(stderr, "%s : error: vocab type is not SPM\n", __func__);
|
fprintf(stderr, "%s : error: vocab type is not BPE\n", __func__);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
// We need this for unicode console support
|
||||||
|
console::init(false, false);
|
||||||
|
atexit([]() { console::cleanup(); });
|
||||||
|
#endif
|
||||||
|
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
for (const auto & test_kv : k_tests()) {
|
for (const auto & test_kv : k_tests()) {
|
||||||
|
113
tests/test-tokenizer-1-bpe.cpp
Normal file
113
tests/test-tokenizer-1-bpe.cpp
Normal file
@ -0,0 +1,113 @@
|
|||||||
|
#include "llama.h"
|
||||||
|
#include "common.h"
|
||||||
|
#include "unicode.h"
|
||||||
|
#include "console.h"
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdio>
|
||||||
|
#include <cstring>
|
||||||
|
#include <string>
|
||||||
|
#include <codecvt>
|
||||||
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
#include <locale>
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
if (argc < 2) {
|
||||||
|
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string fname = argv[1];
|
||||||
|
|
||||||
|
fprintf(stderr, "%s : reading vocab from: '%s'\n", __func__, fname.c_str());
|
||||||
|
|
||||||
|
llama_model * model;
|
||||||
|
llama_context * ctx;
|
||||||
|
|
||||||
|
llama_backend_init(false);
|
||||||
|
|
||||||
|
// load the vocab
|
||||||
|
{
|
||||||
|
auto mparams = llama_model_default_params();
|
||||||
|
|
||||||
|
mparams.vocab_only = true;
|
||||||
|
|
||||||
|
model = llama_load_model_from_file(fname.c_str(), mparams);
|
||||||
|
|
||||||
|
if (model == NULL) {
|
||||||
|
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto cparams = llama_context_default_params();
|
||||||
|
|
||||||
|
ctx = llama_new_context_with_model(model, cparams);
|
||||||
|
|
||||||
|
if (ctx == NULL) {
|
||||||
|
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||||
|
llama_free_model(model);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
|
||||||
|
|
||||||
|
#ifdef _WIN32
|
||||||
|
// We need this for unicode console support
|
||||||
|
console::init(false, false);
|
||||||
|
atexit([]() { console::cleanup(); });
|
||||||
|
#endif
|
||||||
|
|
||||||
|
const int n_vocab = llama_n_vocab(model);
|
||||||
|
|
||||||
|
for (int i = 0; i < n_vocab; ++i) {
|
||||||
|
std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
|
||||||
|
try {
|
||||||
|
auto cps = codepoints_from_utf8(str);
|
||||||
|
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||||
|
std::string check = llama_detokenize_bpe(ctx, tokens);
|
||||||
|
if (check != str) {
|
||||||
|
fprintf(stderr, "%s : error: token %d detokenizes to '%s'(%zu) but tokenization of this detokenizes to '%s'(%zu)\n",
|
||||||
|
__func__, i, str.c_str(), str.length(), check.c_str(), check.length());
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (const std::invalid_argument &) {
|
||||||
|
fprintf(stderr, "%s : info: utf8 conversion %d '%s'\n", __func__, i, str.c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint32_t cp = 0x0000; cp < 0xffff; ++cp) {
|
||||||
|
// NOTE: these exceptions seem to be necessary, because the GPT2 tokenizer doesn't want to interfere with some ASCII control characters
|
||||||
|
if ((cp < 0x03 || cp > 0x05) && cp != 0x0b && cp != 0x11 && (cp < 0x13 || cp > 0x17) && cp != 0x19 && (cp < 0x1c || cp > 0x1e) && (cp < 0xd800 || cp > 0xdfff)) {
|
||||||
|
std::string str = " " + codepoint_to_utf8(cp);
|
||||||
|
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||||
|
std::string check = llama_detokenize_bpe(ctx, tokens);
|
||||||
|
if (str != check) {
|
||||||
|
fprintf(stderr, "%s : error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
|
||||||
|
__func__, cp, check.c_str(), check.length(), str.c_str(), str.length());
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// TODO: why doesn't this work for the full range of Unicodes?
|
||||||
|
// for (uint32_t cp = 0x10000; cp < 0x0010ffff; ++cp) {
|
||||||
|
for (uint32_t cp = 0x10000; cp < 0x00080000; ++cp) {
|
||||||
|
std::string str = codepoint_to_utf8(cp);
|
||||||
|
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||||
|
std::string check = llama_detokenize_bpe(ctx, tokens);
|
||||||
|
if (str != check) {
|
||||||
|
fprintf(stderr, "%s : error: codepoint %x detokenizes to '%s'(%zu) instead of '%s'(%zu)\n",
|
||||||
|
__func__, cp, check.c_str(), check.length(), str.c_str(), str.length());
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_free_model(model);
|
||||||
|
llama_free(ctx);
|
||||||
|
|
||||||
|
llama_backend_free();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "unicode.h"
|
||||||
#include "console.h"
|
#include "console.h"
|
||||||
|
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
@ -11,30 +12,6 @@
|
|||||||
#include <vector>
|
#include <vector>
|
||||||
#include <locale>
|
#include <locale>
|
||||||
|
|
||||||
typedef int codepoint;
|
|
||||||
|
|
||||||
static std::string codepoint_to_utf8(codepoint cp) {
|
|
||||||
std::string result;
|
|
||||||
if (0x00 <= cp && cp <= 0x7f) {
|
|
||||||
result.push_back(cp);
|
|
||||||
} else if (0x80 <= cp && cp <= 0x7ff) {
|
|
||||||
result.push_back(0xc0 | ((cp >> 6) & 0x1f));
|
|
||||||
result.push_back(0x80 | (cp & 0x3f));
|
|
||||||
} else if (0x800 <= cp && cp <= 0xffff) {
|
|
||||||
result.push_back(0xe0 | ((cp >> 12) & 0x0f));
|
|
||||||
result.push_back(0x80 | ((cp >> 6) & 0x3f));
|
|
||||||
result.push_back(0x80 | (cp & 0x3f));
|
|
||||||
} else if (0x10000 <= cp && cp <= 0x10ffff) {
|
|
||||||
result.push_back(0xf0 | ((cp >> 18) & 0x07));
|
|
||||||
result.push_back(0x80 | ((cp >> 12) & 0x3f));
|
|
||||||
result.push_back(0x80 | ((cp >> 6) & 0x3f));
|
|
||||||
result.push_back(0x80 | (cp & 0x3f));
|
|
||||||
} else {
|
|
||||||
throw std::invalid_argument("invalid codepoint");
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
fprintf(stderr, "Usage: %s <vocab-file>\n", argv[0]);
|
||||||
@ -95,7 +72,7 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (codepoint cp = 0x0000; cp < 0xffff; ++cp) {
|
for (uint32_t cp = 0x0000; cp < 0xffff; ++cp) {
|
||||||
if (cp < 0xd800 || cp > 0xdfff) {
|
if (cp < 0xd800 || cp > 0xdfff) {
|
||||||
std::string str = codepoint_to_utf8(cp);
|
std::string str = codepoint_to_utf8(cp);
|
||||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||||
@ -107,7 +84,7 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (codepoint cp = 0x10000; cp < 0x0010ffff; ++cp) {
|
for (uint32_t cp = 0x10000; cp < 0x0010ffff; ++cp) {
|
||||||
std::string str = codepoint_to_utf8(cp);
|
std::string str = codepoint_to_utf8(cp);
|
||||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||||
std::string check = llama_detokenize_spm(ctx, tokens);
|
std::string check = llama_detokenize_spm(ctx, tokens);
|
||||||
|
462
unicode.h
Normal file
462
unicode.h
Normal file
@ -0,0 +1,462 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cassert>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <vector>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
|
||||||
|
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
|
||||||
|
{0xCE6, 0xCEF}, {0xD66, 0xD6F}, {0xDE6, 0xDEF}, {0xE50, 0xE59}, {0xED0, 0xED9}, {0xF20, 0xF29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x1369, 0x1371}, {0x17E0, 0x17E9}, {0x1810, 0x1819}, {0x1946, 0x194F},
|
||||||
|
{0x19D0, 0x19DA}, {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1B50, 0x1B59}, {0x1BB0, 0x1BB9}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2460, 0x2468},
|
||||||
|
{0x2474, 0x247C}, {0x2488, 0x2490}, {0x24EA, 0x24EA}, {0x24F5, 0x24FD}, {0x24FF, 0x24FF}, {0x2776, 0x277E}, {0x2780, 0x2788}, {0x278A, 0x2792}, {0xA620, 0xA629}, {0xA8D0, 0xA8D9}, {0xA900, 0xA909},
|
||||||
|
{0xA9D0, 0xA9D9}, {0xA9F0, 0xA9F9}, {0xAA50, 0xAA59}, {0xABF0, 0xABF9}, {0xFF10, 0xFF19}, {0x104A0, 0x104A9}, {0x10A40, 0x10A43}, {0x10D30, 0x10D39}, {0x10E60, 0x10E68}, {0x11052, 0x1105A},
|
||||||
|
{0x11066, 0x1106F}, {0x110F0, 0x110F9}, {0x11136, 0x1113F}, {0x111D0, 0x111D9}, {0x112F0, 0x112F9}, {0x11450, 0x11459}, {0x114D0, 0x114D9}, {0x11650, 0x11659}, {0x116C0, 0x116C9}, {0x11730, 0x11739},
|
||||||
|
{0x118E0, 0x118E9}, {0x11950, 0x11959}, {0x11C50, 0x11C59}, {0x11D50, 0x11D59}, {0x11DA0, 0x11DA9}, {0x16A60, 0x16A69}, {0x16B50, 0x16B59}, {0x1D7CE, 0x1D7FF}, {0x1E140, 0x1E149}, {0x1E2F0, 0x1E2F9},
|
||||||
|
{0x1E950, 0x1E959}, {0x1F100, 0x1F10A}, {0x1FBF0, 0x1FBF9},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> letter_ranges = {
|
||||||
|
{0x41, 0x5A}, {0x61, 0x7A}, {0xAA, 0xAA}, {0xB5, 0xB5}, {0xBA, 0xBA}, {0xC0, 0xD6}, {0xD8, 0xF6}, {0xF8, 0x2C1}, {0x2C6, 0x2D1}, {0x2E0, 0x2E4}, {0x2EC, 0x2EC}, {0x2EE, 0x2EE}, {0x370, 0x374},
|
||||||
|
{0x376, 0x377}, {0x37A, 0x37D}, {0x37F, 0x37F}, {0x386, 0x386}, {0x388, 0x38A}, {0x38C, 0x38C}, {0x38E, 0x3A1}, {0x3A3, 0x3F5}, {0x3F7, 0x481}, {0x48A, 0x52F}, {0x531, 0x556}, {0x559, 0x559},
|
||||||
|
{0x560, 0x588}, {0x5D0, 0x5EA}, {0x5EF, 0x5F2}, {0x620, 0x64A}, {0x66E, 0x66F}, {0x671, 0x6D3}, {0x6D5, 0x6D5}, {0x6E5, 0x6E6}, {0x6EE, 0x6EF}, {0x6FA, 0x6FC}, {0x6FF, 0x6FF}, {0x710, 0x710},
|
||||||
|
{0x712, 0x72F}, {0x74D, 0x7A5}, {0x7B1, 0x7B1}, {0x7CA, 0x7EA}, {0x7F4, 0x7F5}, {0x7FA, 0x7FA}, {0x800, 0x815}, {0x81A, 0x81A}, {0x824, 0x824}, {0x828, 0x828}, {0x840, 0x858}, {0x860, 0x86A},
|
||||||
|
{0x8A0, 0x8B4}, {0x8B6, 0x8C7}, {0x904, 0x939}, {0x93D, 0x93D}, {0x950, 0x950}, {0x958, 0x961}, {0x971, 0x980}, {0x985, 0x98C}, {0x98F, 0x990}, {0x993, 0x9A8}, {0x9AA, 0x9B0}, {0x9B2, 0x9B2},
|
||||||
|
{0x9B6, 0x9B9}, {0x9BD, 0x9BD}, {0x9CE, 0x9CE}, {0x9DC, 0x9DD}, {0x9DF, 0x9E1}, {0x9F0, 0x9F1}, {0x9FC, 0x9FC}, {0xA05, 0xA0A}, {0xA0F, 0xA10}, {0xA13, 0xA28}, {0xA2A, 0xA30}, {0xA32, 0xA33},
|
||||||
|
{0xA35, 0xA36}, {0xA38, 0xA39}, {0xA59, 0xA5C}, {0xA5E, 0xA5E}, {0xA72, 0xA74}, {0xA85, 0xA8D}, {0xA8F, 0xA91}, {0xA93, 0xAA8}, {0xAAA, 0xAB0}, {0xAB2, 0xAB3}, {0xAB5, 0xAB9}, {0xABD, 0xABD},
|
||||||
|
{0xAD0, 0xAD0}, {0xAE0, 0xAE1}, {0xAF9, 0xAF9}, {0xB05, 0xB0C}, {0xB0F, 0xB10}, {0xB13, 0xB28}, {0xB2A, 0xB30}, {0xB32, 0xB33}, {0xB35, 0xB39}, {0xB3D, 0xB3D}, {0xB5C, 0xB5D}, {0xB5F, 0xB61},
|
||||||
|
{0xB71, 0xB71}, {0xB83, 0xB83}, {0xB85, 0xB8A}, {0xB8E, 0xB90}, {0xB92, 0xB95}, {0xB99, 0xB9A}, {0xB9C, 0xB9C}, {0xB9E, 0xB9F}, {0xBA3, 0xBA4}, {0xBA8, 0xBAA}, {0xBAE, 0xBB9}, {0xBD0, 0xBD0},
|
||||||
|
{0xC05, 0xC0C}, {0xC0E, 0xC10}, {0xC12, 0xC28}, {0xC2A, 0xC39}, {0xC3D, 0xC3D}, {0xC58, 0xC5A}, {0xC60, 0xC61}, {0xC80, 0xC80}, {0xC85, 0xC8C}, {0xC8E, 0xC90}, {0xC92, 0xCA8}, {0xCAA, 0xCB3},
|
||||||
|
{0xCB5, 0xCB9}, {0xCBD, 0xCBD}, {0xCDE, 0xCDE}, {0xCE0, 0xCE1}, {0xCF1, 0xCF2}, {0xD04, 0xD0C}, {0xD0E, 0xD10}, {0xD12, 0xD3A}, {0xD3D, 0xD3D}, {0xD4E, 0xD4E}, {0xD54, 0xD56}, {0xD5F, 0xD61},
|
||||||
|
{0xD7A, 0xD7F}, {0xD85, 0xD96}, {0xD9A, 0xDB1}, {0xDB3, 0xDBB}, {0xDBD, 0xDBD}, {0xDC0, 0xDC6}, {0xE01, 0xE30}, {0xE32, 0xE33}, {0xE40, 0xE46}, {0xE81, 0xE82}, {0xE84, 0xE84}, {0xE86, 0xE8A},
|
||||||
|
{0xE8C, 0xEA3}, {0xEA5, 0xEA5}, {0xEA7, 0xEB0}, {0xEB2, 0xEB3}, {0xEBD, 0xEBD}, {0xEC0, 0xEC4}, {0xEC6, 0xEC6}, {0xEDC, 0xEDF}, {0xF00, 0xF00}, {0xF40, 0xF47}, {0xF49, 0xF6C}, {0xF88, 0xF8C},
|
||||||
|
{0x1000, 0x102A}, {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066}, {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7},
|
||||||
|
{0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5},
|
||||||
|
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C},
|
||||||
|
{0x166F, 0x167F}, {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16F1, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1780, 0x17B3},
|
||||||
|
{0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1878}, {0x1880, 0x1884}, {0x1887, 0x18A8}, {0x18AA, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
|
||||||
|
{0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, {0x1C4D, 0x1C4F},
|
||||||
|
{0x1C5A, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, {0x1D00, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D},
|
||||||
|
{0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4},
|
||||||
|
{0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107},
|
||||||
|
{0x210A, 0x2113}, {0x2115, 0x2115}, {0x2119, 0x211D}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212F, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E},
|
||||||
|
{0x2183, 0x2184}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, {0x2C60, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3}, {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F},
|
||||||
|
{0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2E2F, 0x2E2F}, {0x3005, 0x3006},
|
||||||
|
{0x3031, 0x3035}, {0x303B, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309F}, {0x30A1, 0x30FA}, {0x30FC, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF},
|
||||||
|
{0x4E00, 0x9FFC}, {0xA000, 0xA48C}, {0xA4D0, 0xA4FD}, {0xA500, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66E}, {0xA67F, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA717, 0xA71F}, {0xA722, 0xA788},
|
||||||
|
{0xA78B, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA801}, {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE},
|
||||||
|
{0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF}, {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B},
|
||||||
|
{0xAA60, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4},
|
||||||
|
{0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
|
||||||
|
{0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
|
||||||
|
{0xFB46, 0xFBB1}, {0xFBD3, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFB}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFFBE}, {0xFFC2, 0xFFC7},
|
||||||
|
{0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
|
||||||
|
{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x10400, 0x1049D},
|
||||||
|
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
|
||||||
|
{0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7},
|
||||||
|
{0x109BE, 0x109BF}, {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35},
|
||||||
|
{0x10B40, 0x10B55}, {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C},
|
||||||
|
{0x10F27, 0x10F27}, {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF}, {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147},
|
||||||
|
{0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288},
|
||||||
|
{0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339},
|
||||||
|
{0x1133D, 0x1133D}, {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE},
|
||||||
|
{0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909},
|
||||||
|
{0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00},
|
||||||
|
{0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F},
|
||||||
|
{0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0},
|
||||||
|
{0x12000, 0x12399}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77},
|
||||||
|
{0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
|
||||||
|
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
|
||||||
|
{0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
|
||||||
|
{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},
|
||||||
|
{0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D},
|
||||||
|
{0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
|
||||||
|
{0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
|
||||||
|
{0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72},
|
||||||
|
{0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734},
|
||||||
|
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> whitespace_ranges = {
|
||||||
|
{0x9, 0xD}, {0x1C, 0x20}, {0x85, 0x85}, {0xA0, 0xA0}, {0x1680, 0x1680}, {0x2000, 0x200A}, {0x2028, 0x2029}, {0x202F, 0x202F}, {0x205F, 0x205F}, {0x3000, 0x3000},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> accent_mark_ranges = {
|
||||||
|
{0x300, 0x36F}, {0x483, 0x489}, {0x591, 0x5BD}, {0x5BF, 0x5BF}, {0x5C1, 0x5C2}, {0x5C4, 0x5C5}, {0x5C7, 0x5C7}, {0x610, 0x61A}, {0x64B, 0x65F}, {0x670, 0x670}, {0x6D6, 0x6DC}, {0x6DF, 0x6E4},
|
||||||
|
{0x6E7, 0x6E8}, {0x6EA, 0x6ED}, {0x711, 0x711}, {0x730, 0x74A}, {0x7A6, 0x7B0}, {0x7EB, 0x7F3}, {0x7FD, 0x7FD}, {0x816, 0x819}, {0x81B, 0x823}, {0x825, 0x827}, {0x829, 0x82D}, {0x859, 0x85B},
|
||||||
|
{0x8D3, 0x8E1}, {0x8E3, 0x903}, {0x93A, 0x93C}, {0x93E, 0x94F}, {0x951, 0x957}, {0x962, 0x963}, {0x981, 0x983}, {0x9BC, 0x9BC}, {0x9BE, 0x9C4}, {0x9C7, 0x9C8}, {0x9CB, 0x9CD}, {0x9D7, 0x9D7},
|
||||||
|
{0x9E2, 0x9E3}, {0x9FE, 0x9FE}, {0xA01, 0xA03}, {0xA3C, 0xA3C}, {0xA3E, 0xA42}, {0xA47, 0xA48}, {0xA4B, 0xA4D}, {0xA51, 0xA51}, {0xA70, 0xA71}, {0xA75, 0xA75}, {0xA81, 0xA83}, {0xABC, 0xABC},
|
||||||
|
{0xABE, 0xAC5}, {0xAC7, 0xAC9}, {0xACB, 0xACD}, {0xAE2, 0xAE3}, {0xAFA, 0xAFF}, {0xB01, 0xB03}, {0xB3C, 0xB3C}, {0xB3E, 0xB44}, {0xB47, 0xB48}, {0xB4B, 0xB4D}, {0xB55, 0xB57}, {0xB62, 0xB63},
|
||||||
|
{0xB82, 0xB82}, {0xBBE, 0xBC2}, {0xBC6, 0xBC8}, {0xBCA, 0xBCD}, {0xBD7, 0xBD7}, {0xC00, 0xC04}, {0xC3E, 0xC44}, {0xC46, 0xC48}, {0xC4A, 0xC4D}, {0xC55, 0xC56}, {0xC62, 0xC63}, {0xC81, 0xC83},
|
||||||
|
{0xCBC, 0xCBC}, {0xCBE, 0xCC4}, {0xCC6, 0xCC8}, {0xCCA, 0xCCD}, {0xCD5, 0xCD6}, {0xCE2, 0xCE3}, {0xD00, 0xD03}, {0xD3B, 0xD3C}, {0xD3E, 0xD44}, {0xD46, 0xD48}, {0xD4A, 0xD4D}, {0xD57, 0xD57},
|
||||||
|
{0xD62, 0xD63}, {0xD81, 0xD83}, {0xDCA, 0xDCA}, {0xDCF, 0xDD4}, {0xDD6, 0xDD6}, {0xDD8, 0xDDF}, {0xDF2, 0xDF3}, {0xE31, 0xE31}, {0xE34, 0xE3A}, {0xE47, 0xE4E}, {0xEB1, 0xEB1}, {0xEB4, 0xEBC},
|
||||||
|
{0xEC8, 0xECD}, {0xF18, 0xF19}, {0xF35, 0xF35}, {0xF37, 0xF37}, {0xF39, 0xF39}, {0xF3E, 0xF3F}, {0xF71, 0xF84}, {0xF86, 0xF87}, {0xF8D, 0xF97}, {0xF99, 0xFBC}, {0xFC6, 0xFC6}, {0x102B, 0x103E},
|
||||||
|
{0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D}, {0x108F, 0x108F}, {0x109A, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1734},
|
||||||
|
{0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E},
|
||||||
|
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, {0x1CD0, 0x1CD2},
|
||||||
|
{0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
|
||||||
|
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
|
||||||
|
{0xA8B4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9E5, 0xA9E5}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D},
|
||||||
|
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xFB1E, 0xFB1E},
|
||||||
|
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},
|
||||||
|
{0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11000, 0x11002}, {0x11038, 0x11046}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x11100, 0x11102}, {0x11127, 0x11134},
|
||||||
|
{0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, {0x111CE, 0x111CF}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x11300, 0x11303},
|
||||||
|
{0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11435, 0x11446}, {0x1145E, 0x1145E},
|
||||||
|
{0x114B0, 0x114C3}, {0x115AF, 0x115B5}, {0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640}, {0x116AB, 0x116B7}, {0x1171D, 0x1172B}, {0x1182C, 0x1183A}, {0x11930, 0x11935}, {0x11937, 0x11938},
|
||||||
|
{0x1193B, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11943}, {0x119D1, 0x119D7}, {0x119DA, 0x119E0}, {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A39}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
|
||||||
|
{0x11A51, 0x11A5B}, {0x11A8A, 0x11A99}, {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
|
||||||
|
{0x11D47, 0x11D47}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, {0x11EF3, 0x11EF6}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87}, {0x16F8F, 0x16F92},
|
||||||
|
{0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1DA00, 0x1DA36},
|
||||||
|
{0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
|
||||||
|
{0x1E130, 0x1E136}, {0x1E2EC, 0x1E2EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, {0xE0100, 0xE01EF},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> punctuation_ranges = {
|
||||||
|
{0x21, 0x23}, {0x25, 0x2A}, {0x2C, 0x2F}, {0x3A, 0x3B}, {0x3F, 0x40}, {0x5B, 0x5D}, {0x5F, 0x5F}, {0x7B, 0x7B}, {0x7D, 0x7D}, {0xA1, 0xA1}, {0xA7, 0xA7}, {0xAB, 0xAB}, {0xB6, 0xB7}, {0xBB, 0xBB},
|
||||||
|
{0xBF, 0xBF}, {0x37E, 0x37E}, {0x387, 0x387}, {0x55A, 0x55F}, {0x589, 0x58A}, {0x5BE, 0x5BE}, {0x5C0, 0x5C0}, {0x5C3, 0x5C3}, {0x5C6, 0x5C6}, {0x5F3, 0x5F4}, {0x609, 0x60A}, {0x60C, 0x60D},
|
||||||
|
{0x61B, 0x61B}, {0x61E, 0x61F}, {0x66A, 0x66D}, {0x6D4, 0x6D4}, {0x700, 0x70D}, {0x7F7, 0x7F9}, {0x830, 0x83E}, {0x85E, 0x85E}, {0x964, 0x965}, {0x970, 0x970}, {0x9FD, 0x9FD}, {0xA76, 0xA76},
|
||||||
|
{0xAF0, 0xAF0}, {0xC77, 0xC77}, {0xC84, 0xC84}, {0xDF4, 0xDF4}, {0xE4F, 0xE4F}, {0xE5A, 0xE5B}, {0xF04, 0xF12}, {0xF14, 0xF14}, {0xF3A, 0xF3D}, {0xF85, 0xF85}, {0xFD0, 0xFD4}, {0xFD9, 0xFDA},
|
||||||
|
{0x104A, 0x104F}, {0x10FB, 0x10FB}, {0x1360, 0x1368}, {0x1400, 0x1400}, {0x166E, 0x166E}, {0x169B, 0x169C}, {0x16EB, 0x16ED}, {0x1735, 0x1736}, {0x17D4, 0x17D6}, {0x17D8, 0x17DA}, {0x1800, 0x180A},
|
||||||
|
{0x1944, 0x1945}, {0x1A1E, 0x1A1F}, {0x1AA0, 0x1AA6}, {0x1AA8, 0x1AAD}, {0x1B5A, 0x1B60}, {0x1BFC, 0x1BFF}, {0x1C3B, 0x1C3F}, {0x1C7E, 0x1C7F}, {0x1CC0, 0x1CC7}, {0x1CD3, 0x1CD3}, {0x2010, 0x2027},
|
||||||
|
{0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x205E}, {0x207D, 0x207E}, {0x208D, 0x208E}, {0x2308, 0x230B}, {0x2329, 0x232A}, {0x2768, 0x2775}, {0x27C5, 0x27C6}, {0x27E6, 0x27EF}, {0x2983, 0x2998},
|
||||||
|
{0x29D8, 0x29DB}, {0x29FC, 0x29FD}, {0x2CF9, 0x2CFC}, {0x2CFE, 0x2CFF}, {0x2D70, 0x2D70}, {0x2E00, 0x2E2E}, {0x2E30, 0x2E4F}, {0x2E52, 0x2E52}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301F},
|
||||||
|
{0x3030, 0x3030}, {0x303D, 0x303D}, {0x30A0, 0x30A0}, {0x30FB, 0x30FB}, {0xA4FE, 0xA4FF}, {0xA60D, 0xA60F}, {0xA673, 0xA673}, {0xA67E, 0xA67E}, {0xA6F2, 0xA6F7}, {0xA874, 0xA877}, {0xA8CE, 0xA8CF},
|
||||||
|
{0xA8F8, 0xA8FA}, {0xA8FC, 0xA8FC}, {0xA92E, 0xA92F}, {0xA95F, 0xA95F}, {0xA9C1, 0xA9CD}, {0xA9DE, 0xA9DF}, {0xAA5C, 0xAA5F}, {0xAADE, 0xAADF}, {0xAAF0, 0xAAF1}, {0xABEB, 0xABEB}, {0xFD3E, 0xFD3F},
|
||||||
|
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE61}, {0xFE63, 0xFE63}, {0xFE68, 0xFE68}, {0xFE6A, 0xFE6B}, {0xFF01, 0xFF03}, {0xFF05, 0xFF0A}, {0xFF0C, 0xFF0F}, {0xFF1A, 0xFF1B}, {0xFF1F, 0xFF20},
|
||||||
|
{0xFF3B, 0xFF3D}, {0xFF3F, 0xFF3F}, {0xFF5B, 0xFF5B}, {0xFF5D, 0xFF5D}, {0xFF5F, 0xFF65}, {0x10100, 0x10102}, {0x1039F, 0x1039F}, {0x103D0, 0x103D0}, {0x1056F, 0x1056F}, {0x10857, 0x10857},
|
||||||
|
{0x1091F, 0x1091F}, {0x1093F, 0x1093F}, {0x10A50, 0x10A58}, {0x10A7F, 0x10A7F}, {0x10AF0, 0x10AF6}, {0x10B39, 0x10B3F}, {0x10B99, 0x10B9C}, {0x10EAD, 0x10EAD}, {0x10F55, 0x10F59}, {0x11047, 0x1104D},
|
||||||
|
{0x110BB, 0x110BC}, {0x110BE, 0x110C1}, {0x11140, 0x11143}, {0x11174, 0x11175}, {0x111C5, 0x111C8}, {0x111CD, 0x111CD}, {0x111DB, 0x111DB}, {0x111DD, 0x111DF}, {0x11238, 0x1123D}, {0x112A9, 0x112A9},
|
||||||
|
{0x1144B, 0x1144F}, {0x1145A, 0x1145B}, {0x1145D, 0x1145D}, {0x114C6, 0x114C6}, {0x115C1, 0x115D7}, {0x11641, 0x11643}, {0x11660, 0x1166C}, {0x1173C, 0x1173E}, {0x1183B, 0x1183B}, {0x11944, 0x11946},
|
||||||
|
{0x119E2, 0x119E2}, {0x11A3F, 0x11A46}, {0x11A9A, 0x11A9C}, {0x11A9E, 0x11AA2}, {0x11C41, 0x11C45}, {0x11C70, 0x11C71}, {0x11EF7, 0x11EF8}, {0x11FFF, 0x11FFF}, {0x12470, 0x12474}, {0x16A6E, 0x16A6F},
|
||||||
|
{0x16AF5, 0x16AF5}, {0x16B37, 0x16B3B}, {0x16B44, 0x16B44}, {0x16E97, 0x16E9A}, {0x16FE2, 0x16FE2}, {0x1BC9F, 0x1BC9F}, {0x1DA87, 0x1DA8B}, {0x1E95E, 0x1E95F},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> symbol_ranges = {
|
||||||
|
{0x24, 0x24}, {0x2B, 0x2B}, {0x3C, 0x3E}, {0x5E, 0x5E}, {0x60, 0x60}, {0x7C, 0x7C}, {0x7E, 0x7E}, {0xA2, 0xA6}, {0xA8, 0xA9}, {0xAC, 0xAC}, {0xAE, 0xB1}, {0xB4, 0xB4}, {0xB8, 0xB8}, {0xD7, 0xD7},
|
||||||
|
{0xF7, 0xF7}, {0x2C2, 0x2C5}, {0x2D2, 0x2DF}, {0x2E5, 0x2EB}, {0x2ED, 0x2ED}, {0x2EF, 0x2FF}, {0x375, 0x375}, {0x384, 0x385}, {0x3F6, 0x3F6}, {0x482, 0x482}, {0x58D, 0x58F}, {0x606, 0x608},
|
||||||
|
{0x60B, 0x60B}, {0x60E, 0x60F}, {0x6DE, 0x6DE}, {0x6E9, 0x6E9}, {0x6FD, 0x6FE}, {0x7F6, 0x7F6}, {0x7FE, 0x7FF}, {0x9F2, 0x9F3}, {0x9FA, 0x9FB}, {0xAF1, 0xAF1}, {0xB70, 0xB70}, {0xBF3, 0xBFA},
|
||||||
|
{0xC7F, 0xC7F}, {0xD4F, 0xD4F}, {0xD79, 0xD79}, {0xE3F, 0xE3F}, {0xF01, 0xF03}, {0xF13, 0xF13}, {0xF15, 0xF17}, {0xF1A, 0xF1F}, {0xF34, 0xF34}, {0xF36, 0xF36}, {0xF38, 0xF38}, {0xFBE, 0xFC5},
|
||||||
|
{0xFC7, 0xFCC}, {0xFCE, 0xFCF}, {0xFD5, 0xFD8}, {0x109E, 0x109F}, {0x1390, 0x1399}, {0x166D, 0x166D}, {0x17DB, 0x17DB}, {0x1940, 0x1940}, {0x19DE, 0x19FF}, {0x1B61, 0x1B6A}, {0x1B74, 0x1B7C},
|
||||||
|
{0x1FBD, 0x1FBD}, {0x1FBF, 0x1FC1}, {0x1FCD, 0x1FCF}, {0x1FDD, 0x1FDF}, {0x1FED, 0x1FEF}, {0x1FFD, 0x1FFE}, {0x2044, 0x2044}, {0x2052, 0x2052}, {0x207A, 0x207C}, {0x208A, 0x208C}, {0x20A0, 0x20BF},
|
||||||
|
{0x2100, 0x2101}, {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x213A, 0x213B},
|
||||||
|
{0x2140, 0x2144}, {0x214A, 0x214D}, {0x214F, 0x214F}, {0x218A, 0x218B}, {0x2190, 0x2307}, {0x230C, 0x2328}, {0x232B, 0x2426}, {0x2440, 0x244A}, {0x249C, 0x24E9}, {0x2500, 0x2767}, {0x2794, 0x27C4},
|
||||||
|
{0x27C7, 0x27E5}, {0x27F0, 0x2982}, {0x2999, 0x29D7}, {0x29DC, 0x29FB}, {0x29FE, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2BFF}, {0x2CE5, 0x2CEA}, {0x2E50, 0x2E51}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
|
||||||
|
{0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303E, 0x303F}, {0x309B, 0x309C}, {0x3190, 0x3191}, {0x3196, 0x319F}, {0x31C0, 0x31E3},
|
||||||
|
{0x3200, 0x321E}, {0x322A, 0x3247}, {0x3250, 0x3250}, {0x3260, 0x327F}, {0x328A, 0x32B0}, {0x32C0, 0x33FF}, {0x4DC0, 0x4DFF}, {0xA490, 0xA4C6}, {0xA700, 0xA716}, {0xA720, 0xA721}, {0xA789, 0xA78A},
|
||||||
|
{0xA828, 0xA82B}, {0xA836, 0xA839}, {0xAA77, 0xAA79}, {0xAB5B, 0xAB5B}, {0xAB6A, 0xAB6B}, {0xFB29, 0xFB29}, {0xFBB2, 0xFBC1}, {0xFDFC, 0xFDFD}, {0xFE62, 0xFE62}, {0xFE64, 0xFE66}, {0xFE69, 0xFE69},
|
||||||
|
{0xFF04, 0xFF04}, {0xFF0B, 0xFF0B}, {0xFF1C, 0xFF1E}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF40}, {0xFF5C, 0xFF5C}, {0xFF5E, 0xFF5E}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}, {0x10137, 0x1013F},
|
||||||
|
{0x10179, 0x10189}, {0x1018C, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FC}, {0x10877, 0x10878}, {0x10AC8, 0x10AC8}, {0x1173F, 0x1173F}, {0x11FD5, 0x11FF1}, {0x16B3C, 0x16B3F},
|
||||||
|
{0x16B45, 0x16B45}, {0x1BC9C, 0x1BC9C}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D164}, {0x1D16A, 0x1D16C}, {0x1D183, 0x1D184}, {0x1D18C, 0x1D1A9}, {0x1D1AE, 0x1D1E8}, {0x1D200, 0x1D241},
|
||||||
|
{0x1D245, 0x1D245}, {0x1D300, 0x1D356}, {0x1D6C1, 0x1D6C1}, {0x1D6DB, 0x1D6DB}, {0x1D6FB, 0x1D6FB}, {0x1D715, 0x1D715}, {0x1D735, 0x1D735}, {0x1D74F, 0x1D74F}, {0x1D76F, 0x1D76F}, {0x1D789, 0x1D789},
|
||||||
|
{0x1D7A9, 0x1D7A9}, {0x1D7C3, 0x1D7C3}, {0x1D800, 0x1D9FF}, {0x1DA37, 0x1DA3A}, {0x1DA6D, 0x1DA74}, {0x1DA76, 0x1DA83}, {0x1DA85, 0x1DA86}, {0x1E14F, 0x1E14F}, {0x1E2FF, 0x1E2FF}, {0x1ECAC, 0x1ECAC},
|
||||||
|
{0x1ECB0, 0x1ECB0}, {0x1ED2E, 0x1ED2E}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F10D, 0x1F1AD},
|
||||||
|
{0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, {0x1F6E0, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8},
|
||||||
|
{0x1F7E0, 0x1F7EB}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F978}, {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1FA53},
|
||||||
|
{0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA},
|
||||||
|
};
|
||||||
|
|
||||||
|
static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
|
||||||
|
{0x0, 0x8}, {0xE, 0x1B}, {0x7F, 0x84}, {0x86, 0x9F}, {0xAD, 0xAD}, {0x378, 0x379}, {0x380, 0x383}, {0x38B, 0x38B}, {0x38D, 0x38D}, {0x3A2, 0x3A2}, {0x530, 0x530}, {0x557, 0x558}, {0x58B, 0x58C},
|
||||||
|
{0x590, 0x590}, {0x5C8, 0x5CF}, {0x5EB, 0x5EE}, {0x5F5, 0x605}, {0x61C, 0x61D}, {0x6DD, 0x6DD}, {0x70E, 0x70F}, {0x74B, 0x74C}, {0x7B2, 0x7BF}, {0x7FB, 0x7FC}, {0x82E, 0x82F}, {0x83F, 0x83F},
|
||||||
|
{0x85C, 0x85D}, {0x85F, 0x85F}, {0x86B, 0x89F}, {0x8B5, 0x8B5}, {0x8C8, 0x8D2}, {0x8E2, 0x8E2}, {0x984, 0x984}, {0x98D, 0x98E}, {0x991, 0x992}, {0x9A9, 0x9A9}, {0x9B1, 0x9B1}, {0x9B3, 0x9B5},
|
||||||
|
{0x9BA, 0x9BB}, {0x9C5, 0x9C6}, {0x9C9, 0x9CA}, {0x9CF, 0x9D6}, {0x9D8, 0x9DB}, {0x9DE, 0x9DE}, {0x9E4, 0x9E5}, {0x9FF, 0xA00}, {0xA04, 0xA04}, {0xA0B, 0xA0E}, {0xA11, 0xA12}, {0xA29, 0xA29},
|
||||||
|
{0xA31, 0xA31}, {0xA34, 0xA34}, {0xA37, 0xA37}, {0xA3A, 0xA3B}, {0xA3D, 0xA3D}, {0xA43, 0xA46}, {0xA49, 0xA4A}, {0xA4E, 0xA50}, {0xA52, 0xA58}, {0xA5D, 0xA5D}, {0xA5F, 0xA65}, {0xA77, 0xA80},
|
||||||
|
{0xA84, 0xA84}, {0xA8E, 0xA8E}, {0xA92, 0xA92}, {0xAA9, 0xAA9}, {0xAB1, 0xAB1}, {0xAB4, 0xAB4}, {0xABA, 0xABB}, {0xAC6, 0xAC6}, {0xACA, 0xACA}, {0xACE, 0xACF}, {0xAD1, 0xADF}, {0xAE4, 0xAE5},
|
||||||
|
{0xAF2, 0xAF8}, {0xB00, 0xB00}, {0xB04, 0xB04}, {0xB0D, 0xB0E}, {0xB11, 0xB12}, {0xB29, 0xB29}, {0xB31, 0xB31}, {0xB34, 0xB34}, {0xB3A, 0xB3B}, {0xB45, 0xB46}, {0xB49, 0xB4A}, {0xB4E, 0xB54},
|
||||||
|
{0xB58, 0xB5B}, {0xB5E, 0xB5E}, {0xB64, 0xB65}, {0xB78, 0xB81}, {0xB84, 0xB84}, {0xB8B, 0xB8D}, {0xB91, 0xB91}, {0xB96, 0xB98}, {0xB9B, 0xB9B}, {0xB9D, 0xB9D}, {0xBA0, 0xBA2}, {0xBA5, 0xBA7},
|
||||||
|
{0xBAB, 0xBAD}, {0xBBA, 0xBBD}, {0xBC3, 0xBC5}, {0xBC9, 0xBC9}, {0xBCE, 0xBCF}, {0xBD1, 0xBD6}, {0xBD8, 0xBE5}, {0xBFB, 0xBFF}, {0xC0D, 0xC0D}, {0xC11, 0xC11}, {0xC29, 0xC29}, {0xC3A, 0xC3C},
|
||||||
|
{0xC45, 0xC45}, {0xC49, 0xC49}, {0xC4E, 0xC54}, {0xC57, 0xC57}, {0xC5B, 0xC5F}, {0xC64, 0xC65}, {0xC70, 0xC76}, {0xC8D, 0xC8D}, {0xC91, 0xC91}, {0xCA9, 0xCA9}, {0xCB4, 0xCB4}, {0xCBA, 0xCBB},
|
||||||
|
{0xCC5, 0xCC5}, {0xCC9, 0xCC9}, {0xCCE, 0xCD4}, {0xCD7, 0xCDD}, {0xCDF, 0xCDF}, {0xCE4, 0xCE5}, {0xCF0, 0xCF0}, {0xCF3, 0xCFF}, {0xD0D, 0xD0D}, {0xD11, 0xD11}, {0xD45, 0xD45}, {0xD49, 0xD49},
|
||||||
|
{0xD50, 0xD53}, {0xD64, 0xD65}, {0xD80, 0xD80}, {0xD84, 0xD84}, {0xD97, 0xD99}, {0xDB2, 0xDB2}, {0xDBC, 0xDBC}, {0xDBE, 0xDBF}, {0xDC7, 0xDC9}, {0xDCB, 0xDCE}, {0xDD5, 0xDD5}, {0xDD7, 0xDD7},
|
||||||
|
{0xDE0, 0xDE5}, {0xDF0, 0xDF1}, {0xDF5, 0xE00}, {0xE3B, 0xE3E}, {0xE5C, 0xE80}, {0xE83, 0xE83}, {0xE85, 0xE85}, {0xE8B, 0xE8B}, {0xEA4, 0xEA4}, {0xEA6, 0xEA6}, {0xEBE, 0xEBF}, {0xEC5, 0xEC5},
|
||||||
|
{0xEC7, 0xEC7}, {0xECE, 0xECF}, {0xEDA, 0xEDB}, {0xEE0, 0xEFF}, {0xF48, 0xF48}, {0xF6D, 0xF70}, {0xF98, 0xF98}, {0xFBD, 0xFBD}, {0xFCD, 0xFCD}, {0xFDB, 0xFFF}, {0x10C6, 0x10C6}, {0x10C8, 0x10CC},
|
||||||
|
{0x10CE, 0x10CF}, {0x1249, 0x1249}, {0x124E, 0x124F}, {0x1257, 0x1257}, {0x1259, 0x1259}, {0x125E, 0x125F}, {0x1289, 0x1289}, {0x128E, 0x128F}, {0x12B1, 0x12B1}, {0x12B6, 0x12B7}, {0x12BF, 0x12BF},
|
||||||
|
{0x12C1, 0x12C1}, {0x12C6, 0x12C7}, {0x12D7, 0x12D7}, {0x1311, 0x1311}, {0x1316, 0x1317}, {0x135B, 0x135C}, {0x137D, 0x137F}, {0x139A, 0x139F}, {0x13F6, 0x13F7}, {0x13FE, 0x13FF}, {0x169D, 0x169F},
|
||||||
|
{0x16F9, 0x16FF}, {0x170D, 0x170D}, {0x1715, 0x171F}, {0x1737, 0x173F}, {0x1754, 0x175F}, {0x176D, 0x176D}, {0x1771, 0x1771}, {0x1774, 0x177F}, {0x17DE, 0x17DF}, {0x17EA, 0x17EF}, {0x17FA, 0x17FF},
|
||||||
|
{0x180E, 0x180F}, {0x181A, 0x181F}, {0x1879, 0x187F}, {0x18AB, 0x18AF}, {0x18F6, 0x18FF}, {0x191F, 0x191F}, {0x192C, 0x192F}, {0x193C, 0x193F}, {0x1941, 0x1943}, {0x196E, 0x196F}, {0x1975, 0x197F},
|
||||||
|
{0x19AC, 0x19AF}, {0x19CA, 0x19CF}, {0x19DB, 0x19DD}, {0x1A1C, 0x1A1D}, {0x1A5F, 0x1A5F}, {0x1A7D, 0x1A7E}, {0x1A8A, 0x1A8F}, {0x1A9A, 0x1A9F}, {0x1AAE, 0x1AAF}, {0x1AC1, 0x1AFF}, {0x1B4C, 0x1B4F},
|
||||||
|
{0x1B7D, 0x1B7F}, {0x1BF4, 0x1BFB}, {0x1C38, 0x1C3A}, {0x1C4A, 0x1C4C}, {0x1C89, 0x1C8F}, {0x1CBB, 0x1CBC}, {0x1CC8, 0x1CCF}, {0x1CFB, 0x1CFF}, {0x1DFA, 0x1DFA}, {0x1F16, 0x1F17}, {0x1F1E, 0x1F1F},
|
||||||
|
{0x1F46, 0x1F47}, {0x1F4E, 0x1F4F}, {0x1F58, 0x1F58}, {0x1F5A, 0x1F5A}, {0x1F5C, 0x1F5C}, {0x1F5E, 0x1F5E}, {0x1F7E, 0x1F7F}, {0x1FB5, 0x1FB5}, {0x1FC5, 0x1FC5}, {0x1FD4, 0x1FD5}, {0x1FDC, 0x1FDC},
|
||||||
|
{0x1FF0, 0x1FF1}, {0x1FF5, 0x1FF5}, {0x1FFF, 0x1FFF}, {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x206F}, {0x2072, 0x2073}, {0x208F, 0x208F}, {0x209D, 0x209F}, {0x20C0, 0x20CF}, {0x20F1, 0x20FF},
|
||||||
|
{0x218C, 0x218F}, {0x2427, 0x243F}, {0x244B, 0x245F}, {0x2B74, 0x2B75}, {0x2B96, 0x2B96}, {0x2C2F, 0x2C2F}, {0x2C5F, 0x2C5F}, {0x2CF4, 0x2CF8}, {0x2D26, 0x2D26}, {0x2D28, 0x2D2C}, {0x2D2E, 0x2D2F},
|
||||||
|
{0x2D68, 0x2D6E}, {0x2D71, 0x2D7E}, {0x2D97, 0x2D9F}, {0x2DA7, 0x2DA7}, {0x2DAF, 0x2DAF}, {0x2DB7, 0x2DB7}, {0x2DBF, 0x2DBF}, {0x2DC7, 0x2DC7}, {0x2DCF, 0x2DCF}, {0x2DD7, 0x2DD7}, {0x2DDF, 0x2DDF},
|
||||||
|
{0x2E53, 0x2E7F}, {0x2E9A, 0x2E9A}, {0x2EF4, 0x2EFF}, {0x2FD6, 0x2FEF}, {0x2FFC, 0x2FFF}, {0x3040, 0x3040}, {0x3097, 0x3098}, {0x3100, 0x3104}, {0x3130, 0x3130}, {0x318F, 0x318F}, {0x31E4, 0x31EF},
|
||||||
|
{0x321F, 0x321F}, {0x9FFD, 0x9FFF}, {0xA48D, 0xA48F}, {0xA4C7, 0xA4CF}, {0xA62C, 0xA63F}, {0xA6F8, 0xA6FF}, {0xA7C0, 0xA7C1}, {0xA7CB, 0xA7F4}, {0xA82D, 0xA82F}, {0xA83A, 0xA83F}, {0xA878, 0xA87F},
|
||||||
|
{0xA8C6, 0xA8CD}, {0xA8DA, 0xA8DF}, {0xA954, 0xA95E}, {0xA97D, 0xA97F}, {0xA9CE, 0xA9CE}, {0xA9DA, 0xA9DD}, {0xA9FF, 0xA9FF}, {0xAA37, 0xAA3F}, {0xAA4E, 0xAA4F}, {0xAA5A, 0xAA5B}, {0xAAC3, 0xAADA},
|
||||||
|
{0xAAF7, 0xAB00}, {0xAB07, 0xAB08}, {0xAB0F, 0xAB10}, {0xAB17, 0xAB1F}, {0xAB27, 0xAB27}, {0xAB2F, 0xAB2F}, {0xAB6C, 0xAB6F}, {0xABEE, 0xABEF}, {0xABFA, 0xABFF}, {0xD7A4, 0xD7AF}, {0xD7C7, 0xD7CA},
|
||||||
|
{0xD7FC, 0xF8FF}, {0xFA6E, 0xFA6F}, {0xFADA, 0xFAFF}, {0xFB07, 0xFB12}, {0xFB18, 0xFB1C}, {0xFB37, 0xFB37}, {0xFB3D, 0xFB3D}, {0xFB3F, 0xFB3F}, {0xFB42, 0xFB42}, {0xFB45, 0xFB45}, {0xFBC2, 0xFBD2},
|
||||||
|
{0xFD40, 0xFD4F}, {0xFD90, 0xFD91}, {0xFDC8, 0xFDEF}, {0xFDFE, 0xFDFF}, {0xFE1A, 0xFE1F}, {0xFE53, 0xFE53}, {0xFE67, 0xFE67}, {0xFE6C, 0xFE6F}, {0xFE75, 0xFE75}, {0xFEFD, 0xFF00}, {0xFFBF, 0xFFC1},
|
||||||
|
{0xFFC8, 0xFFC9}, {0xFFD0, 0xFFD1}, {0xFFD8, 0xFFD9}, {0xFFDD, 0xFFDF}, {0xFFE7, 0xFFE7}, {0xFFEF, 0xFFFB}, {0xFFFE, 0xFFFF}, {0x1000C, 0x1000C}, {0x10027, 0x10027}, {0x1003B, 0x1003B},
|
||||||
|
{0x1003E, 0x1003E}, {0x1004E, 0x1004F}, {0x1005E, 0x1007F}, {0x100FB, 0x100FF}, {0x10103, 0x10106}, {0x10134, 0x10136}, {0x1018F, 0x1018F}, {0x1019D, 0x1019F}, {0x101A1, 0x101CF}, {0x101FE, 0x1027F},
|
||||||
|
{0x1029D, 0x1029F}, {0x102D1, 0x102DF}, {0x102FC, 0x102FF}, {0x10324, 0x1032C}, {0x1034B, 0x1034F}, {0x1037B, 0x1037F}, {0x1039E, 0x1039E}, {0x103C4, 0x103C7}, {0x103D6, 0x103FF}, {0x1049E, 0x1049F},
|
||||||
|
{0x104AA, 0x104AF}, {0x104D4, 0x104D7}, {0x104FC, 0x104FF}, {0x10528, 0x1052F}, {0x10564, 0x1056E}, {0x10570, 0x105FF}, {0x10737, 0x1073F}, {0x10756, 0x1075F}, {0x10768, 0x107FF}, {0x10806, 0x10807},
|
||||||
|
{0x10809, 0x10809}, {0x10836, 0x10836}, {0x10839, 0x1083B}, {0x1083D, 0x1083E}, {0x10856, 0x10856}, {0x1089F, 0x108A6}, {0x108B0, 0x108DF}, {0x108F3, 0x108F3}, {0x108F6, 0x108FA}, {0x1091C, 0x1091E},
|
||||||
|
{0x1093A, 0x1093E}, {0x10940, 0x1097F}, {0x109B8, 0x109BB}, {0x109D0, 0x109D1}, {0x10A04, 0x10A04}, {0x10A07, 0x10A0B}, {0x10A14, 0x10A14}, {0x10A18, 0x10A18}, {0x10A36, 0x10A37}, {0x10A3B, 0x10A3E},
|
||||||
|
{0x10A49, 0x10A4F}, {0x10A59, 0x10A5F}, {0x10AA0, 0x10ABF}, {0x10AE7, 0x10AEA}, {0x10AF7, 0x10AFF}, {0x10B36, 0x10B38}, {0x10B56, 0x10B57}, {0x10B73, 0x10B77}, {0x10B92, 0x10B98}, {0x10B9D, 0x10BA8},
|
||||||
|
{0x10BB0, 0x10BFF}, {0x10C49, 0x10C7F}, {0x10CB3, 0x10CBF}, {0x10CF3, 0x10CF9}, {0x10D28, 0x10D2F}, {0x10D3A, 0x10E5F}, {0x10E7F, 0x10E7F}, {0x10EAA, 0x10EAA}, {0x10EAE, 0x10EAF}, {0x10EB2, 0x10EFF},
|
||||||
|
{0x10F28, 0x10F2F}, {0x10F5A, 0x10FAF}, {0x10FCC, 0x10FDF}, {0x10FF7, 0x10FFF}, {0x1104E, 0x11051}, {0x11070, 0x1107E}, {0x110BD, 0x110BD}, {0x110C2, 0x110CF}, {0x110E9, 0x110EF}, {0x110FA, 0x110FF},
|
||||||
|
{0x11135, 0x11135}, {0x11148, 0x1114F}, {0x11177, 0x1117F}, {0x111E0, 0x111E0}, {0x111F5, 0x111FF}, {0x11212, 0x11212}, {0x1123F, 0x1127F}, {0x11287, 0x11287}, {0x11289, 0x11289}, {0x1128E, 0x1128E},
|
||||||
|
{0x1129E, 0x1129E}, {0x112AA, 0x112AF}, {0x112EB, 0x112EF}, {0x112FA, 0x112FF}, {0x11304, 0x11304}, {0x1130D, 0x1130E}, {0x11311, 0x11312}, {0x11329, 0x11329}, {0x11331, 0x11331}, {0x11334, 0x11334},
|
||||||
|
{0x1133A, 0x1133A}, {0x11345, 0x11346}, {0x11349, 0x1134A}, {0x1134E, 0x1134F}, {0x11351, 0x11356}, {0x11358, 0x1135C}, {0x11364, 0x11365}, {0x1136D, 0x1136F}, {0x11375, 0x113FF}, {0x1145C, 0x1145C},
|
||||||
|
{0x11462, 0x1147F}, {0x114C8, 0x114CF}, {0x114DA, 0x1157F}, {0x115B6, 0x115B7}, {0x115DE, 0x115FF}, {0x11645, 0x1164F}, {0x1165A, 0x1165F}, {0x1166D, 0x1167F}, {0x116B9, 0x116BF}, {0x116CA, 0x116FF},
|
||||||
|
{0x1171B, 0x1171C}, {0x1172C, 0x1172F}, {0x11740, 0x117FF}, {0x1183C, 0x1189F}, {0x118F3, 0x118FE}, {0x11907, 0x11908}, {0x1190A, 0x1190B}, {0x11914, 0x11914}, {0x11917, 0x11917}, {0x11936, 0x11936},
|
||||||
|
{0x11939, 0x1193A}, {0x11947, 0x1194F}, {0x1195A, 0x1199F}, {0x119A8, 0x119A9}, {0x119D8, 0x119D9}, {0x119E5, 0x119FF}, {0x11A48, 0x11A4F}, {0x11AA3, 0x11ABF}, {0x11AF9, 0x11BFF}, {0x11C09, 0x11C09},
|
||||||
|
{0x11C37, 0x11C37}, {0x11C46, 0x11C4F}, {0x11C6D, 0x11C6F}, {0x11C90, 0x11C91}, {0x11CA8, 0x11CA8}, {0x11CB7, 0x11CFF}, {0x11D07, 0x11D07}, {0x11D0A, 0x11D0A}, {0x11D37, 0x11D39}, {0x11D3B, 0x11D3B},
|
||||||
|
{0x11D3E, 0x11D3E}, {0x11D48, 0x11D4F}, {0x11D5A, 0x11D5F}, {0x11D66, 0x11D66}, {0x11D69, 0x11D69}, {0x11D8F, 0x11D8F}, {0x11D92, 0x11D92}, {0x11D99, 0x11D9F}, {0x11DAA, 0x11EDF}, {0x11EF9, 0x11FAF},
|
||||||
|
{0x11FB1, 0x11FBF}, {0x11FF2, 0x11FFE}, {0x1239A, 0x123FF}, {0x1246F, 0x1246F}, {0x12475, 0x1247F}, {0x12544, 0x12FFF}, {0x1342F, 0x143FF}, {0x14647, 0x167FF}, {0x16A39, 0x16A3F}, {0x16A5F, 0x16A5F},
|
||||||
|
{0x16A6A, 0x16A6D}, {0x16A70, 0x16ACF}, {0x16AEE, 0x16AEF}, {0x16AF6, 0x16AFF}, {0x16B46, 0x16B4F}, {0x16B5A, 0x16B5A}, {0x16B62, 0x16B62}, {0x16B78, 0x16B7C}, {0x16B90, 0x16E3F}, {0x16E9B, 0x16EFF},
|
||||||
|
{0x16F4B, 0x16F4E}, {0x16F88, 0x16F8E}, {0x16FA0, 0x16FDF}, {0x16FE5, 0x16FEF}, {0x16FF2, 0x16FFF}, {0x187F8, 0x187FF}, {0x18CD6, 0x18CFF}, {0x18D09, 0x1AFFF}, {0x1B11F, 0x1B14F}, {0x1B153, 0x1B163},
|
||||||
|
{0x1B168, 0x1B16F}, {0x1B2FC, 0x1BBFF}, {0x1BC6B, 0x1BC6F}, {0x1BC7D, 0x1BC7F}, {0x1BC89, 0x1BC8F}, {0x1BC9A, 0x1BC9B}, {0x1BCA0, 0x1CFFF}, {0x1D0F6, 0x1D0FF}, {0x1D127, 0x1D128}, {0x1D173, 0x1D17A},
|
||||||
|
{0x1D1E9, 0x1D1FF}, {0x1D246, 0x1D2DF}, {0x1D2F4, 0x1D2FF}, {0x1D357, 0x1D35F}, {0x1D379, 0x1D3FF}, {0x1D455, 0x1D455}, {0x1D49D, 0x1D49D}, {0x1D4A0, 0x1D4A1}, {0x1D4A3, 0x1D4A4}, {0x1D4A7, 0x1D4A8},
|
||||||
|
{0x1D4AD, 0x1D4AD}, {0x1D4BA, 0x1D4BA}, {0x1D4BC, 0x1D4BC}, {0x1D4C4, 0x1D4C4}, {0x1D506, 0x1D506}, {0x1D50B, 0x1D50C}, {0x1D515, 0x1D515}, {0x1D51D, 0x1D51D}, {0x1D53A, 0x1D53A}, {0x1D53F, 0x1D53F},
|
||||||
|
{0x1D545, 0x1D545}, {0x1D547, 0x1D549}, {0x1D551, 0x1D551}, {0x1D6A6, 0x1D6A7}, {0x1D7CC, 0x1D7CD}, {0x1DA8C, 0x1DA9A}, {0x1DAA0, 0x1DAA0}, {0x1DAB0, 0x1DFFF}, {0x1E007, 0x1E007}, {0x1E019, 0x1E01A},
|
||||||
|
{0x1E022, 0x1E022}, {0x1E025, 0x1E025}, {0x1E02B, 0x1E0FF}, {0x1E12D, 0x1E12F}, {0x1E13E, 0x1E13F}, {0x1E14A, 0x1E14D}, {0x1E150, 0x1E2BF}, {0x1E2FA, 0x1E2FE}, {0x1E300, 0x1E7FF}, {0x1E8C5, 0x1E8C6},
|
||||||
|
{0x1E8D7, 0x1E8FF}, {0x1E94C, 0x1E94F}, {0x1E95A, 0x1E95D}, {0x1E960, 0x1EC70}, {0x1ECB5, 0x1ED00}, {0x1ED3E, 0x1EDFF}, {0x1EE04, 0x1EE04}, {0x1EE20, 0x1EE20}, {0x1EE23, 0x1EE23}, {0x1EE25, 0x1EE26},
|
||||||
|
{0x1EE28, 0x1EE28}, {0x1EE33, 0x1EE33}, {0x1EE38, 0x1EE38}, {0x1EE3A, 0x1EE3A}, {0x1EE3C, 0x1EE41}, {0x1EE43, 0x1EE46}, {0x1EE48, 0x1EE48}, {0x1EE4A, 0x1EE4A}, {0x1EE4C, 0x1EE4C}, {0x1EE50, 0x1EE50},
|
||||||
|
{0x1EE53, 0x1EE53}, {0x1EE55, 0x1EE56}, {0x1EE58, 0x1EE58}, {0x1EE5A, 0x1EE5A}, {0x1EE5C, 0x1EE5C}, {0x1EE5E, 0x1EE5E}, {0x1EE60, 0x1EE60}, {0x1EE63, 0x1EE63}, {0x1EE65, 0x1EE66}, {0x1EE6B, 0x1EE6B},
|
||||||
|
{0x1EE73, 0x1EE73}, {0x1EE78, 0x1EE78}, {0x1EE7D, 0x1EE7D}, {0x1EE7F, 0x1EE7F}, {0x1EE8A, 0x1EE8A}, {0x1EE9C, 0x1EEA0}, {0x1EEA4, 0x1EEA4}, {0x1EEAA, 0x1EEAA}, {0x1EEBC, 0x1EEEF}, {0x1EEF2, 0x1EFFF},
|
||||||
|
{0x1F02C, 0x1F02F}, {0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0}, {0x1F0D0, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F1AE, 0x1F1E5}, {0x1F203, 0x1F20F}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F24F},
|
||||||
|
{0x1F252, 0x1F25F}, {0x1F266, 0x1F2FF}, {0x1F6D8, 0x1F6DF}, {0x1F6ED, 0x1F6EF}, {0x1F6FD, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F7D9, 0x1F7DF}, {0x1F7EC, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
|
||||||
|
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8AF}, {0x1F8B2, 0x1F8FF}, {0x1F979, 0x1F979}, {0x1F9CC, 0x1F9CC}, {0x1FA54, 0x1FA5F}, {0x1FA6E, 0x1FA6F}, {0x1FA75, 0x1FA77}, {0x1FA7B, 0x1FA7F},
|
||||||
|
{0x1FA87, 0x1FA8F}, {0x1FAA9, 0x1FAAF}, {0x1FAB7, 0x1FABF}, {0x1FAC3, 0x1FACF}, {0x1FAD7, 0x1FAFF}, {0x1FB93, 0x1FB93}, {0x1FBCB, 0x1FBEF}, {0x1FBFA, 0x1FFFF}, {0x2A6DE, 0x2A6FF}, {0x2B735, 0x2B73F},
|
||||||
|
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
|
||||||
|
};
|
||||||
|
|
||||||
|
static std::string codepoint_to_utf8(uint32_t cp) {
|
||||||
|
std::string result;
|
||||||
|
if (/* 0x00 <= cp && */ cp <= 0x7f) {
|
||||||
|
result.push_back(cp);
|
||||||
|
}
|
||||||
|
else if (0x80 <= cp && cp <= 0x7ff) {
|
||||||
|
result.push_back(0xc0 | ((cp >> 6) & 0x1f));
|
||||||
|
result.push_back(0x80 | (cp & 0x3f));
|
||||||
|
}
|
||||||
|
else if (0x800 <= cp && cp <= 0xffff) {
|
||||||
|
result.push_back(0xe0 | ((cp >> 12) & 0x0f));
|
||||||
|
result.push_back(0x80 | ((cp >> 6) & 0x3f));
|
||||||
|
result.push_back(0x80 | (cp & 0x3f));
|
||||||
|
}
|
||||||
|
else if (0x10000 <= cp && cp <= 0x10ffff) {
|
||||||
|
result.push_back(0xf0 | ((cp >> 18) & 0x07));
|
||||||
|
result.push_back(0x80 | ((cp >> 12) & 0x3f));
|
||||||
|
result.push_back(0x80 | ((cp >> 6) & 0x3f));
|
||||||
|
result.push_back(0x80 | (cp & 0x3f));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw std::invalid_argument("invalid codepoint");
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
|
||||||
|
std::string result;
|
||||||
|
for (size_t i = 0; i < cps.size(); ++i) {
|
||||||
|
result.append(codepoint_to_utf8(cps[i]));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
|
||||||
|
assert(offset < utf8.size());
|
||||||
|
if (!(utf8[offset + 0] & 0x80)) {
|
||||||
|
auto result = utf8[offset + 0];
|
||||||
|
offset += 1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else if (!(utf8[offset + 0] & 0x40)) {
|
||||||
|
throw std::invalid_argument("invalid character");
|
||||||
|
}
|
||||||
|
else if (!(utf8[offset + 0] & 0x20)) {
|
||||||
|
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80))
|
||||||
|
throw std::invalid_argument("invalid character");
|
||||||
|
auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
|
||||||
|
offset += 2;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else if (!(utf8[offset + 0] & 0x10)) {
|
||||||
|
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80))
|
||||||
|
throw std::invalid_argument("invalid character");
|
||||||
|
auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
|
||||||
|
offset += 3;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else if (!(utf8[offset + 0] & 0x08)) {
|
||||||
|
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80))
|
||||||
|
throw std::invalid_argument("invalid character");
|
||||||
|
auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
|
||||||
|
offset += 4;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
throw std::invalid_argument("invalid string");
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<uint32_t> codepoints_from_utf8(const std::string & utf8) {
|
||||||
|
std::vector<uint32_t> result;
|
||||||
|
size_t offset = 0;
|
||||||
|
while (offset < utf8.size()) {
|
||||||
|
result.push_back(codepoint_from_utf8(utf8, offset));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<uint16_t> codepoint_to_utf16(uint32_t cp) {
|
||||||
|
std::vector<uint16_t> result;
|
||||||
|
if (/* 0x0000 <= cp && */ cp <= 0xffff) {
|
||||||
|
result.emplace_back(cp);
|
||||||
|
}
|
||||||
|
else if (0x10000 <= cp && cp <= 0x10ffff) {
|
||||||
|
result.emplace_back(0xd800 | ((cp - 0x10000) >> 10));
|
||||||
|
result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw std::invalid_argument("invalid codepoint");
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t> & cps) {
|
||||||
|
std::vector<uint16_t> result;
|
||||||
|
for (size_t i = 0; i < cps.size(); ++i) {
|
||||||
|
auto temp = codepoint_to_utf16(cps[i]);
|
||||||
|
result.insert(result.end(), temp.begin(), temp.end());
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t & offset) {
|
||||||
|
assert(offset < utf16.size());
|
||||||
|
if (((utf16[0] >> 10) << 10) != 0xd800) {
|
||||||
|
auto result = utf16[offset + 0];
|
||||||
|
offset += 1;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00))
|
||||||
|
throw std::invalid_argument("invalid character");
|
||||||
|
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
|
||||||
|
offset += 2;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
throw std::invalid_argument("invalid string");
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
||||||
|
std::vector<uint32_t> result;
|
||||||
|
size_t offset = 0;
|
||||||
|
while (offset < utf16.size())
|
||||||
|
result.push_back(codepoint_from_utf16(utf16, offset));
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#define CODEPOINT_TYPE_UNIDENTIFIED 0
|
||||||
|
#define CODEPOINT_TYPE_DIGIT 1
|
||||||
|
#define CODEPOINT_TYPE_LETTER 2
|
||||||
|
#define CODEPOINT_TYPE_WHITESPACE 3
|
||||||
|
#define CODEPOINT_TYPE_ACCENT_MARK 4
|
||||||
|
#define CODEPOINT_TYPE_PUNCTUATION 5
|
||||||
|
#define CODEPOINT_TYPE_SYMBOL 6
|
||||||
|
#define CODEPOINT_TYPE_CONTROL 7
|
||||||
|
|
||||||
|
static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
||||||
|
std::unordered_map<uint32_t, int> codepoint_types;
|
||||||
|
for (auto p : digit_ranges) {
|
||||||
|
for(auto i = p.first; i <= p.second; ++ i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
|
||||||
|
}
|
||||||
|
for(auto p : letter_ranges) {
|
||||||
|
for(auto i = p.first; i <= p.second; ++ i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_LETTER;
|
||||||
|
}
|
||||||
|
for(auto p : whitespace_ranges) {
|
||||||
|
for(auto i = p.first; i <= p.second; ++ i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
|
||||||
|
}
|
||||||
|
for(auto p : accent_mark_ranges) {
|
||||||
|
for(auto i = p.first; i <= p.second; ++ i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
|
||||||
|
}
|
||||||
|
for(auto p : punctuation_ranges) {
|
||||||
|
for(auto i = p.first; i <= p.second; ++ i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
|
||||||
|
}
|
||||||
|
for (auto p : symbol_ranges) {
|
||||||
|
for (auto i = p.first; i <= p.second; ++i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
|
||||||
|
}
|
||||||
|
for(auto p : control_ranges) {
|
||||||
|
for(auto i = p.first; i <= p.second; ++ i)
|
||||||
|
codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
|
||||||
|
}
|
||||||
|
return codepoint_types;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int codepoint_type(uint32_t cp) {
|
||||||
|
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
||||||
|
return codepoint_types[cp];
|
||||||
|
}
|
||||||
|
|
||||||
|
static int codepoint_type(const std::string & utf8) {
|
||||||
|
if (utf8.length() == 0)
|
||||||
|
return CODEPOINT_TYPE_UNIDENTIFIED;
|
||||||
|
size_t offset = 0;
|
||||||
|
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unordered_map<uint8_t, std::string> bytes_to_unicode_map_bpe() {
|
||||||
|
std::unordered_map<uint8_t, std::string> map;
|
||||||
|
for (int ch = u'!'; ch <= u'~'; ++ch) {
|
||||||
|
assert(0 <= ch && ch < 256);
|
||||||
|
map[ch] = codepoint_to_utf8(ch);
|
||||||
|
}
|
||||||
|
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
|
||||||
|
assert(0 <= ch && ch < 256);
|
||||||
|
map[ch] = codepoint_to_utf8(ch);
|
||||||
|
}
|
||||||
|
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
|
||||||
|
assert(0 <= ch && ch < 256);
|
||||||
|
map[ch] = codepoint_to_utf8(ch);
|
||||||
|
}
|
||||||
|
auto n = 0;
|
||||||
|
for (int ch = 0; ch < 256; ++ch) {
|
||||||
|
if (map.find(ch) == map.end()) {
|
||||||
|
map[ch] = codepoint_to_utf8(256 + n);
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string bytes_to_unicode_bpe(uint8_t byte) {
|
||||||
|
static std::unordered_map<uint8_t, std::string> map = bytes_to_unicode_map_bpe();
|
||||||
|
return map.at(byte);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unordered_map<std::string, uint8_t> unicode_to_bytes_map_bpe() {
|
||||||
|
std::unordered_map<std::string, uint8_t> map;
|
||||||
|
for (int ch = u'!'; ch <= u'~'; ++ch) {
|
||||||
|
assert(0 <= ch && ch < 256);
|
||||||
|
map[codepoint_to_utf8(ch)] = ch;
|
||||||
|
}
|
||||||
|
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
|
||||||
|
assert(0 <= ch && ch < 256);
|
||||||
|
map[codepoint_to_utf8(ch)] = ch;
|
||||||
|
}
|
||||||
|
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
|
||||||
|
assert(0 <= ch && ch < 256);
|
||||||
|
map[codepoint_to_utf8(ch)] = ch;
|
||||||
|
}
|
||||||
|
auto n = 0;
|
||||||
|
for (int ch = 0; ch < 256; ++ch) {
|
||||||
|
if (map.find(codepoint_to_utf8(ch)) == map.end()) {
|
||||||
|
map[codepoint_to_utf8(256 + n)] = ch;
|
||||||
|
++n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint8_t unicode_to_bytes_bpe(const std::string & utf8) {
|
||||||
|
static std::unordered_map<std::string, uint8_t> map = unicode_to_bytes_map_bpe();
|
||||||
|
return map.at(utf8);
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user