2023-07-18 13:24:43 +02:00
#/bin/bash
2023-07-22 10:48:22 +02:00
#
# sample usage:
#
# mkdir tmp
#
# # CPU-only build
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
# # with CUDA support
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
ggml : add unified SYCL backend for Intel GPUs (#2690)
* first update for migration
* update init_cublas
* add debug functio, commit all help code
* step 1
* step 2
* step3 add fp16, slower 31->28
* add GGML_LIST_DEVICE function
* step 5 format device and print
* step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue
* support main device is non-zero
* step7 add debug for code path, rm log
* step 8, rename all macro & func from cuda by sycl
* fix error of select non-zero device, format device list
* ren ggml-sycl.hpp -> ggml-sycl.h
* clear CMAKE to rm unused lib and options
* correct queue: rm dtct:get_queue
* add print tensor function to debug
* fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481
* summary dpct definition in one header file to replace folder:dpct
* refactor device log
* mv dpct definition from folder dpct to ggml-sycl.h
* update readme, refactor build script
* fix build with sycl
* set nthread=1 when sycl, increase performance
* add run script, comment debug code
* add ls-sycl-device tool
* add ls-sycl-device, rm unused files
* rm rear space
* dos2unix
* Update README_sycl.md
* fix return type
* remove sycl version from include path
* restore rm code to fix hang issue
* add syc and link for sycl readme
* rm original sycl code before refactor
* fix code err
* add know issue for pvc hang issue
* enable SYCL_F16 support
* align pr4766
* check for sycl blas, better performance
* cleanup 1
* remove extra endif
* add build&run script, clean CMakefile, update guide by review comments
* rename macro to intel hardware
* editor config format
* format fixes
* format fixes
* editor format fix
* Remove unused headers
* skip build sycl tool for other code path
* replace tab by space
* fix blas matmul function
* fix mac build
* restore hip dependency
* fix conflict
* ren as review comments
* mv internal function to .cpp file
* export funciton print_sycl_devices(), mv class dpct definition to source file
* update CI/action for sycl code, fix CI error of repeat/dup
* fix action ID format issue
* rm unused strategy
* enable llama_f16 in ci
* fix conflict
* fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml
* fix ci cases for unsupported data type
* revert unrelated changed in cuda cmake
remove useless nommq
fix typo of GGML_USE_CLBLAS_SYCL
* revert hip cmake changes
* fix indent
* add prefix in func name
* revert no mmq
* rm cpu blas duplicate
* fix no_new_line
* fix src1->type==F16 bug.
* pass batch offset for F16 src1
* fix batch error
* fix wrong code
* revert sycl checking in test-sampling
* pass void as arguments of ggml_backend_sycl_print_sycl_devices
* remove extra blank line in test-sampling
* revert setting n_threads in sycl
* implement std::isinf for icpx with fast math.
* Update ci/run.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* add copyright and MIT license declare
* update the cmd example
---------
Co-authored-by: jianyuzh <jianyu.zhang@intel.com>
Co-authored-by: luoyu-intel <yu.luo@intel.com>
Co-authored-by: Meng, Hengyu <hengyu.meng@intel.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-01-28 16:56:23 +01:00
# # with SYCL support
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
#
2023-07-18 13:24:43 +02:00
if [ -z " $2 " ] ; then
echo " usage: $0 <output-dir> <mnt-dir> "
exit 1
fi
mkdir -p " $1 "
mkdir -p " $2 "
OUT = $( realpath " $1 " )
MNT = $( realpath " $2 " )
2024-01-26 13:18:00 +01:00
rm -f " $OUT /*.log "
rm -f " $OUT /*.exit "
rm -f " $OUT /*.md "
2023-07-18 13:24:43 +02:00
sd = ` dirname $0 `
cd $sd /../
SRC = ` pwd `
2024-02-17 22:03:14 +01:00
CMAKE_EXTRA = "-DLLAMA_FATAL_WARNINGS=ON"
2024-01-02 09:57:44 +01:00
if [ ! -z ${ GG_BUILD_METAL } ] ; then
CMAKE_EXTRA = " ${ CMAKE_EXTRA } -DLLAMA_METAL_SHADER_DEBUG=ON "
fi
2024-01-17 17:54:56 +01:00
if [ ! -z ${ GG_BUILD_CUDA } ] ; then
2024-03-26 01:16:01 +01:00
CMAKE_EXTRA = " ${ CMAKE_EXTRA } -DLLAMA_CUDA=1 "
2024-01-17 17:54:56 +01:00
fi
ggml : add unified SYCL backend for Intel GPUs (#2690)
* first update for migration
* update init_cublas
* add debug functio, commit all help code
* step 1
* step 2
* step3 add fp16, slower 31->28
* add GGML_LIST_DEVICE function
* step 5 format device and print
* step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue
* support main device is non-zero
* step7 add debug for code path, rm log
* step 8, rename all macro & func from cuda by sycl
* fix error of select non-zero device, format device list
* ren ggml-sycl.hpp -> ggml-sycl.h
* clear CMAKE to rm unused lib and options
* correct queue: rm dtct:get_queue
* add print tensor function to debug
* fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481
* summary dpct definition in one header file to replace folder:dpct
* refactor device log
* mv dpct definition from folder dpct to ggml-sycl.h
* update readme, refactor build script
* fix build with sycl
* set nthread=1 when sycl, increase performance
* add run script, comment debug code
* add ls-sycl-device tool
* add ls-sycl-device, rm unused files
* rm rear space
* dos2unix
* Update README_sycl.md
* fix return type
* remove sycl version from include path
* restore rm code to fix hang issue
* add syc and link for sycl readme
* rm original sycl code before refactor
* fix code err
* add know issue for pvc hang issue
* enable SYCL_F16 support
* align pr4766
* check for sycl blas, better performance
* cleanup 1
* remove extra endif
* add build&run script, clean CMakefile, update guide by review comments
* rename macro to intel hardware
* editor config format
* format fixes
* format fixes
* editor format fix
* Remove unused headers
* skip build sycl tool for other code path
* replace tab by space
* fix blas matmul function
* fix mac build
* restore hip dependency
* fix conflict
* ren as review comments
* mv internal function to .cpp file
* export funciton print_sycl_devices(), mv class dpct definition to source file
* update CI/action for sycl code, fix CI error of repeat/dup
* fix action ID format issue
* rm unused strategy
* enable llama_f16 in ci
* fix conflict
* fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml
* fix ci cases for unsupported data type
* revert unrelated changed in cuda cmake
remove useless nommq
fix typo of GGML_USE_CLBLAS_SYCL
* revert hip cmake changes
* fix indent
* add prefix in func name
* revert no mmq
* rm cpu blas duplicate
* fix no_new_line
* fix src1->type==F16 bug.
* pass batch offset for F16 src1
* fix batch error
* fix wrong code
* revert sycl checking in test-sampling
* pass void as arguments of ggml_backend_sycl_print_sycl_devices
* remove extra blank line in test-sampling
* revert setting n_threads in sycl
* implement std::isinf for icpx with fast math.
* Update ci/run.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* add copyright and MIT license declare
* update the cmd example
---------
Co-authored-by: jianyuzh <jianyu.zhang@intel.com>
Co-authored-by: luoyu-intel <yu.luo@intel.com>
Co-authored-by: Meng, Hengyu <hengyu.meng@intel.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-01-28 16:56:23 +01:00
if [ ! -z ${ GG_BUILD_SYCL } ] ; then
if [ -z ${ ONEAPI_ROOT } ] ; then
2024-03-06 05:08:32 +01:00
echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:"
echo "source /opt/intel/oneapi/setvars.sh"
ggml : add unified SYCL backend for Intel GPUs (#2690)
* first update for migration
* update init_cublas
* add debug functio, commit all help code
* step 1
* step 2
* step3 add fp16, slower 31->28
* add GGML_LIST_DEVICE function
* step 5 format device and print
* step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue
* support main device is non-zero
* step7 add debug for code path, rm log
* step 8, rename all macro & func from cuda by sycl
* fix error of select non-zero device, format device list
* ren ggml-sycl.hpp -> ggml-sycl.h
* clear CMAKE to rm unused lib and options
* correct queue: rm dtct:get_queue
* add print tensor function to debug
* fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481
* summary dpct definition in one header file to replace folder:dpct
* refactor device log
* mv dpct definition from folder dpct to ggml-sycl.h
* update readme, refactor build script
* fix build with sycl
* set nthread=1 when sycl, increase performance
* add run script, comment debug code
* add ls-sycl-device tool
* add ls-sycl-device, rm unused files
* rm rear space
* dos2unix
* Update README_sycl.md
* fix return type
* remove sycl version from include path
* restore rm code to fix hang issue
* add syc and link for sycl readme
* rm original sycl code before refactor
* fix code err
* add know issue for pvc hang issue
* enable SYCL_F16 support
* align pr4766
* check for sycl blas, better performance
* cleanup 1
* remove extra endif
* add build&run script, clean CMakefile, update guide by review comments
* rename macro to intel hardware
* editor config format
* format fixes
* format fixes
* editor format fix
* Remove unused headers
* skip build sycl tool for other code path
* replace tab by space
* fix blas matmul function
* fix mac build
* restore hip dependency
* fix conflict
* ren as review comments
* mv internal function to .cpp file
* export funciton print_sycl_devices(), mv class dpct definition to source file
* update CI/action for sycl code, fix CI error of repeat/dup
* fix action ID format issue
* rm unused strategy
* enable llama_f16 in ci
* fix conflict
* fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml
* fix ci cases for unsupported data type
* revert unrelated changed in cuda cmake
remove useless nommq
fix typo of GGML_USE_CLBLAS_SYCL
* revert hip cmake changes
* fix indent
* add prefix in func name
* revert no mmq
* rm cpu blas duplicate
* fix no_new_line
* fix src1->type==F16 bug.
* pass batch offset for F16 src1
* fix batch error
* fix wrong code
* revert sycl checking in test-sampling
* pass void as arguments of ggml_backend_sycl_print_sycl_devices
* remove extra blank line in test-sampling
* revert setting n_threads in sycl
* implement std::isinf for icpx with fast math.
* Update ci/run.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update examples/sycl/run-llama2.sh
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Update CMakeLists.txt
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* add copyright and MIT license declare
* update the cmd example
---------
Co-authored-by: jianyuzh <jianyu.zhang@intel.com>
Co-authored-by: luoyu-intel <yu.luo@intel.com>
Co-authored-by: Meng, Hengyu <hengyu.meng@intel.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2024-01-28 16:56:23 +01:00
exit 1
fi
CMAKE_EXTRA = " ${ CMAKE_EXTRA } -DLLAMA_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON "
fi
2023-07-18 13:24:43 +02:00
## helpers
# download a file if it does not exist or if it is outdated
function gg_wget {
local out = $1
local url = $2
local cwd = ` pwd `
mkdir -p $out
cd $out
# should not re-download if file is the same
wget -nv -N $url
cd $cwd
}
function gg_printf {
printf -- " $@ " >> $OUT /README.md
}
function gg_run {
ci = $1
set -o pipefail
set -x
gg_run_$ci | tee $OUT /$ci .log
cur = $?
echo " $cur " > $OUT /$ci .exit
set +x
set +o pipefail
gg_sum_$ci
ret = $(( ret | cur))
}
## ci
# ctest_debug
function gg_run_ctest_debug {
cd ${ SRC }
rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug
set -e
2024-01-02 09:57:44 +01:00
( time cmake -DCMAKE_BUILD_TYPE= Debug ${ CMAKE_EXTRA } .. ) 2>& 1 | tee -a $OUT /${ ci } -cmake.log
( time make -j ) 2>& 1 | tee -a $OUT /${ ci } -make.log
2023-07-18 13:24:43 +02:00
2024-01-26 13:18:00 +01:00
( time ctest --output-on-failure -L main -E test-opt ) 2>& 1 | tee -a $OUT /${ ci } -ctest.log
2023-07-18 13:24:43 +02:00
set +e
}
function gg_sum_ctest_debug {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'Runs ctest in debug mode\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '```\n'
gg_printf '%s\n' " $( cat $OUT /${ ci } -ctest.log) "
gg_printf '```\n'
gg_printf '\n'
}
# ctest_release
function gg_run_ctest_release {
cd ${ SRC }
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
set -e
2024-01-02 09:57:44 +01:00
( time cmake -DCMAKE_BUILD_TYPE= Release ${ CMAKE_EXTRA } .. ) 2>& 1 | tee -a $OUT /${ ci } -cmake.log
( time make -j ) 2>& 1 | tee -a $OUT /${ ci } -make.log
2023-07-18 13:24:43 +02:00
2023-07-22 10:48:22 +02:00
if [ -z ${ GG_BUILD_LOW_PERF } ] ; then
2024-01-26 13:18:00 +01:00
( time ctest --output-on-failure -L main ) 2>& 1 | tee -a $OUT /${ ci } -ctest.log
2023-07-18 13:24:43 +02:00
else
2024-01-26 13:18:00 +01:00
( time ctest --output-on-failure -L main -E test-opt ) 2>& 1 | tee -a $OUT /${ ci } -ctest.log
2023-07-18 13:24:43 +02:00
fi
set +e
}
function gg_sum_ctest_release {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'Runs ctest in release mode\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '```\n'
gg_printf '%s\n' " $( cat $OUT /${ ci } -ctest.log) "
gg_printf '```\n'
}
2024-01-26 13:18:00 +01:00
function gg_get_model {
local gguf_3b = " $MNT /models/open-llama/3B-v2/ggml-model-f16.gguf "
local gguf_7b = " $MNT /models/open-llama/7B-v2/ggml-model-f16.gguf "
if [ [ -s $gguf_3b ] ] ; then
echo -n " $gguf_3b "
elif [ [ -s $gguf_7b ] ] ; then
echo -n " $gguf_7b "
else
echo >& 2 "No model found. Can't run gg_run_ctest_with_model."
exit 1
fi
}
function gg_run_ctest_with_model_debug {
cd ${ SRC }
local model; model = $( gg_get_model)
cd build-ci-debug
set -e
( LLAMACPP_TEST_MODELFILE = " $model " time ctest --output-on-failure -L model) 2>& 1 | tee -a $OUT /${ ci } -ctest.log
set +e
cd ..
}
function gg_run_ctest_with_model_release {
cd ${ SRC }
local model; model = $( gg_get_model)
cd build-ci-release
set -e
( LLAMACPP_TEST_MODELFILE = " $model " time ctest --output-on-failure -L model) 2>& 1 | tee -a $OUT /${ ci } -ctest.log
set +e
cd ..
}
function gg_sum_ctest_with_model_debug {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'Runs ctest with model files in debug mode\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '```\n'
gg_printf '%s\n' " $( cat $OUT /${ ci } -ctest.log) "
gg_printf '```\n'
}
function gg_sum_ctest_with_model_release {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'Runs ctest with model files in release mode\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '```\n'
gg_printf '%s\n' " $( cat $OUT /${ ci } -ctest.log) "
gg_printf '```\n'
}
2023-07-18 13:24:43 +02:00
# open_llama_3b_v2
function gg_run_open_llama_3b_v2 {
cd ${ SRC }
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/config.json
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/tokenizer.model
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/tokenizer_config.json
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/special_tokens_map.json
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/resolve/main/pytorch_model.bin
gg_wget models-mnt/open-llama/3B-v2/ https://huggingface.co/openlm-research/open_llama_3b_v2/raw/main/generation_config.json
2024-02-18 21:39:30 +01:00
gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
2023-07-18 13:24:43 +02:00
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw
path_models = "../models-mnt/open-llama/3B-v2"
path_wiki = "../models-mnt/wikitext/wikitext-2-raw"
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
set -e
2024-01-17 17:54:56 +01:00
( time cmake -DCMAKE_BUILD_TYPE= Release ${ CMAKE_EXTRA } -DLLAMA_QKK_64= 1 .. ) 2>& 1 | tee -a $OUT /${ ci } -cmake.log
( time make -j ) 2>& 1 | tee -a $OUT /${ ci } -make.log
2023-07-18 13:24:43 +02:00
python3 ../convert.py ${ path_models }
2023-08-21 22:07:43 +02:00
model_f16 = " ${ path_models } /ggml-model-f16.gguf "
model_q8_0 = " ${ path_models } /ggml-model-q8_0.gguf "
model_q4_0 = " ${ path_models } /ggml-model-q4_0.gguf "
model_q4_1 = " ${ path_models } /ggml-model-q4_1.gguf "
model_q5_0 = " ${ path_models } /ggml-model-q5_0.gguf "
model_q5_1 = " ${ path_models } /ggml-model-q5_1.gguf "
model_q2_k = " ${ path_models } /ggml-model-q2_k.gguf "
model_q3_k = " ${ path_models } /ggml-model-q3_k.gguf "
model_q4_k = " ${ path_models } /ggml-model-q4_k.gguf "
model_q5_k = " ${ path_models } /ggml-model-q5_k.gguf "
model_q6_k = " ${ path_models } /ggml-model-q6_k.gguf "
2023-07-18 13:24:43 +02:00
wiki_test_60 = " ${ path_wiki } /wiki.test-60.raw "
./bin/quantize ${ model_f16 } ${ model_q8_0 } q8_0
./bin/quantize ${ model_f16 } ${ model_q4_0 } q4_0
./bin/quantize ${ model_f16 } ${ model_q4_1 } q4_1
./bin/quantize ${ model_f16 } ${ model_q5_0 } q5_0
./bin/quantize ${ model_f16 } ${ model_q5_1 } q5_1
2023-07-22 10:48:22 +02:00
./bin/quantize ${ model_f16 } ${ model_q2_k } q2_k
2023-07-18 13:24:43 +02:00
./bin/quantize ${ model_f16 } ${ model_q3_k } q3_k
./bin/quantize ${ model_f16 } ${ model_q4_k } q4_k
./bin/quantize ${ model_f16 } ${ model_q5_k } q5_k
./bin/quantize ${ model_f16 } ${ model_q6_k } q6_k
2023-07-22 11:00:56 +02:00
( time ./bin/main --model ${ model_f16 } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-f16.log
( time ./bin/main --model ${ model_q8_0 } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q8_0.log
( time ./bin/main --model ${ model_q4_0 } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_0.log
( time ./bin/main --model ${ model_q4_1 } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_1.log
( time ./bin/main --model ${ model_q5_0 } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_0.log
( time ./bin/main --model ${ model_q5_1 } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_1.log
( time ./bin/main --model ${ model_q2_k } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q2_k.log
( time ./bin/main --model ${ model_q3_k } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q3_k.log
( time ./bin/main --model ${ model_q4_k } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_k.log
( time ./bin/main --model ${ model_q5_k } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_k.log
( time ./bin/main --model ${ model_q6_k } -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q6_k.log
2023-07-18 13:24:43 +02:00
2024-02-28 20:44:21 +01:00
( time ./bin/perplexity --model ${ model_f16 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-f16.log
( time ./bin/perplexity --model ${ model_q8_0 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q8_0.log
( time ./bin/perplexity --model ${ model_q4_0 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_0.log
( time ./bin/perplexity --model ${ model_q4_1 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_1.log
( time ./bin/perplexity --model ${ model_q5_0 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_0.log
( time ./bin/perplexity --model ${ model_q5_1 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_1.log
( time ./bin/perplexity --model ${ model_q2_k } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q2_k.log
( time ./bin/perplexity --model ${ model_q3_k } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q3_k.log
( time ./bin/perplexity --model ${ model_q4_k } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_k.log
( time ./bin/perplexity --model ${ model_q5_k } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_k.log
( time ./bin/perplexity --model ${ model_q6_k } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q6_k.log
( time ./bin/imatrix --model ${ model_f16 } -f ${ wiki_test_60 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -imatrix.log
2024-01-17 17:46:30 +01:00
2023-10-17 18:12:46 +02:00
( time ./bin/save-load-state --model ${ model_q4_0 } ) 2>& 1 | tee -a $OUT /${ ci } -save-load-state.log
2023-07-18 13:24:43 +02:00
function check_ppl {
qnt = " $1 "
ppl = $( echo " $2 " | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
if [ $( echo " $ppl > 20.0 " | bc) -eq 1 ] ; then
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' " $qnt " " $ppl "
return 20
fi
printf ' - %s @ %s OK\n' " $qnt " " $ppl "
return 0
}
check_ppl "f16" " $( cat $OUT /${ ci } -tg-f16.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q8_0" " $( cat $OUT /${ ci } -tg-q8_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q4_0" " $( cat $OUT /${ ci } -tg-q4_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q4_1" " $( cat $OUT /${ ci } -tg-q4_1.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q5_0" " $( cat $OUT /${ ci } -tg-q5_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q5_1" " $( cat $OUT /${ ci } -tg-q5_1.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
2023-07-22 10:48:22 +02:00
check_ppl "q2_k" " $( cat $OUT /${ ci } -tg-q2_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
2023-07-18 13:24:43 +02:00
check_ppl "q3_k" " $( cat $OUT /${ ci } -tg-q3_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q4_k" " $( cat $OUT /${ ci } -tg-q4_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q5_k" " $( cat $OUT /${ ci } -tg-q5_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q6_k" " $( cat $OUT /${ ci } -tg-q6_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
2024-01-17 17:46:30 +01:00
cat $OUT /${ ci } -imatrix.log | grep "Final" >> $OUT /${ ci } -imatrix-sum.log
2023-08-27 09:03:27 +02:00
# lora
function compare_ppl {
qnt = " $1 "
ppl1 = $( echo " $2 " | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
ppl2 = $( echo " $3 " | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
if [ $( echo " $ppl1 < $ppl2 " | bc) -eq 1 ] ; then
printf ' - %s @ %s (FAIL: %s > %s)\n' " $qnt " " $ppl " " $ppl1 " " $ppl2 "
return 20
fi
printf ' - %s @ %s %s OK\n' " $qnt " " $ppl1 " " $ppl2 "
return 0
}
path_lora = "../models-mnt/open-llama/3B-v2/lora"
path_shakespeare = "../models-mnt/shakespeare"
shakespeare = " ${ path_shakespeare } /shakespeare.txt "
lora_shakespeare = " ${ path_lora } /ggml-adapter-model.bin "
gg_wget ${ path_lora } https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_config.json
gg_wget ${ path_lora } https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/adapter_model.bin
gg_wget ${ path_shakespeare } https://huggingface.co/slaren/open_llama_3b_v2_shakespeare_lora/resolve/main/shakespeare.txt
python3 ../convert-lora-to-ggml.py ${ path_lora }
# f16
2024-02-28 20:44:21 +01:00
( time ./bin/perplexity --model ${ model_f16 } -f ${ shakespeare } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-f16.log
( time ./bin/perplexity --model ${ model_f16 } -f ${ shakespeare } --lora ${ lora_shakespeare } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-lora-f16.log
2023-08-27 09:03:27 +02:00
compare_ppl "f16 shakespeare" " $( cat $OUT /${ ci } -ppl-shakespeare-f16.log | grep "^\[1\]" ) " " $( cat $OUT /${ ci } -ppl-shakespeare-lora-f16.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -lora-ppl.log
# q8_0
2024-02-28 20:44:21 +01:00
( time ./bin/perplexity --model ${ model_q8_0 } -f ${ shakespeare } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-q8_0.log
( time ./bin/perplexity --model ${ model_q8_0 } -f ${ shakespeare } --lora ${ lora_shakespeare } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-lora-q8_0.log
2023-08-27 09:03:27 +02:00
compare_ppl "q8_0 shakespeare" " $( cat $OUT /${ ci } -ppl-shakespeare-q8_0.log | grep "^\[1\]" ) " " $( cat $OUT /${ ci } -ppl-shakespeare-lora-q8_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -lora-ppl.log
# q8_0 + f16 lora-base
2024-02-28 20:44:21 +01:00
( time ./bin/perplexity --model ${ model_q8_0 } -f ${ shakespeare } --lora ${ lora_shakespeare } --lora-base ${ model_f16 } -c 128 -b 128 --chunks 1 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-lora-q8_0-f16.log
2023-08-27 09:03:27 +02:00
compare_ppl "q8_0 / f16 base shakespeare" " $( cat $OUT /${ ci } -ppl-shakespeare-q8_0.log | grep "^\[1\]" ) " " $( cat $OUT /${ ci } -ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -lora-ppl.log
2023-07-18 13:24:43 +02:00
set +e
}
function gg_sum_open_llama_3b_v2 {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'OpenLLaMA 3B-v2:\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '- perplexity:\n%s\n' " $( cat $OUT /${ ci } -ppl.log) "
2024-01-17 17:46:30 +01:00
gg_printf '- imatrix:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -imatrix-sum.log) "
2023-08-27 09:03:27 +02:00
gg_printf '- lora:\n%s\n' " $( cat $OUT /${ ci } -lora-ppl.log) "
2023-07-18 13:24:43 +02:00
gg_printf '- f16: \n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-f16.log) "
gg_printf '- q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q8_0.log) "
gg_printf '- q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q4_0.log) "
gg_printf '- q4_1:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q4_1.log) "
gg_printf '- q5_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q5_0.log) "
gg_printf '- q5_1:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q5_1.log) "
2023-07-22 10:48:22 +02:00
gg_printf '- q2_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q2_k.log) "
gg_printf '- q3_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q3_k.log) "
gg_printf '- q4_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q4_k.log) "
gg_printf '- q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q5_k.log) "
gg_printf '- q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q6_k.log) "
2023-10-17 18:12:46 +02:00
gg_printf '- save-load-state: \n```\n%s\n```\n' " $( cat $OUT /${ ci } -save-load-state.log) "
2023-08-27 09:03:27 +02:00
gg_printf '- shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-f16.log) "
gg_printf '- shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-lora-f16.log) "
gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-q8_0.log) "
gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-lora-q8_0.log) "
gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-lora-q8_0-f16.log) "
2023-07-22 10:48:22 +02:00
}
# open_llama_7b_v2
# requires: GG_BUILD_CUDA
function gg_run_open_llama_7b_v2 {
cd ${ SRC }
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
2024-02-18 21:39:30 +01:00
gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip
2023-07-22 10:48:22 +02:00
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
path_models = "../models-mnt/open-llama/7B-v2"
path_wiki = "../models-mnt/wikitext/wikitext-2-raw"
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
set -e
2024-03-26 01:16:01 +01:00
( time cmake -DCMAKE_BUILD_TYPE= Release ${ CMAKE_EXTRA } -DLLAMA_CUDA= 1 .. ) 2>& 1 | tee -a $OUT /${ ci } -cmake.log
( time make -j ) 2>& 1 | tee -a $OUT /${ ci } -make.log
2023-07-22 10:48:22 +02:00
python3 ../convert.py ${ path_models }
2023-08-21 22:07:43 +02:00
model_f16 = " ${ path_models } /ggml-model-f16.gguf "
model_q8_0 = " ${ path_models } /ggml-model-q8_0.gguf "
model_q4_0 = " ${ path_models } /ggml-model-q4_0.gguf "
model_q4_1 = " ${ path_models } /ggml-model-q4_1.gguf "
model_q5_0 = " ${ path_models } /ggml-model-q5_0.gguf "
model_q5_1 = " ${ path_models } /ggml-model-q5_1.gguf "
model_q2_k = " ${ path_models } /ggml-model-q2_k.gguf "
model_q3_k = " ${ path_models } /ggml-model-q3_k.gguf "
model_q4_k = " ${ path_models } /ggml-model-q4_k.gguf "
model_q5_k = " ${ path_models } /ggml-model-q5_k.gguf "
model_q6_k = " ${ path_models } /ggml-model-q6_k.gguf "
2023-07-22 10:48:22 +02:00
wiki_test = " ${ path_wiki } /wiki.test.raw "
./bin/quantize ${ model_f16 } ${ model_q8_0 } q8_0
./bin/quantize ${ model_f16 } ${ model_q4_0 } q4_0
./bin/quantize ${ model_f16 } ${ model_q4_1 } q4_1
./bin/quantize ${ model_f16 } ${ model_q5_0 } q5_0
./bin/quantize ${ model_f16 } ${ model_q5_1 } q5_1
./bin/quantize ${ model_f16 } ${ model_q2_k } q2_k
./bin/quantize ${ model_f16 } ${ model_q3_k } q3_k
./bin/quantize ${ model_f16 } ${ model_q4_k } q4_k
./bin/quantize ${ model_f16 } ${ model_q5_k } q5_k
./bin/quantize ${ model_f16 } ${ model_q6_k } q6_k
2023-08-27 09:03:27 +02:00
( time ./bin/main --model ${ model_f16 } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-f16.log
( time ./bin/main --model ${ model_q8_0 } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q8_0.log
( time ./bin/main --model ${ model_q4_0 } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_0.log
( time ./bin/main --model ${ model_q4_1 } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_1.log
( time ./bin/main --model ${ model_q5_0 } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_0.log
( time ./bin/main --model ${ model_q5_1 } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_1.log
( time ./bin/main --model ${ model_q2_k } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q2_k.log
( time ./bin/main --model ${ model_q3_k } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q3_k.log
( time ./bin/main --model ${ model_q4_k } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_k.log
( time ./bin/main --model ${ model_q5_k } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_k.log
( time ./bin/main --model ${ model_q6_k } -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q6_k.log
2023-07-22 10:48:22 +02:00
( time ./bin/perplexity --model ${ model_f16 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-f16.log
( time ./bin/perplexity --model ${ model_q8_0 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q8_0.log
( time ./bin/perplexity --model ${ model_q4_0 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_0.log
( time ./bin/perplexity --model ${ model_q4_1 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_1.log
( time ./bin/perplexity --model ${ model_q5_0 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_0.log
( time ./bin/perplexity --model ${ model_q5_1 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_1.log
( time ./bin/perplexity --model ${ model_q2_k } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q2_k.log
( time ./bin/perplexity --model ${ model_q3_k } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q3_k.log
( time ./bin/perplexity --model ${ model_q4_k } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q4_k.log
( time ./bin/perplexity --model ${ model_q5_k } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q5_k.log
( time ./bin/perplexity --model ${ model_q6_k } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -tg-q6_k.log
2024-01-17 17:46:30 +01:00
( time ./bin/imatrix --model ${ model_f16 } -f ${ wiki_test } -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>& 1 | tee -a $OUT /${ ci } -imatrix.log
2023-10-17 18:12:46 +02:00
( time ./bin/save-load-state --model ${ model_q4_0 } ) 2>& 1 | tee -a $OUT /${ ci } -save-load-state.log
2023-07-22 10:48:22 +02:00
function check_ppl {
qnt = " $1 "
ppl = $( echo " $2 " | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
if [ $( echo " $ppl > 20.0 " | bc) -eq 1 ] ; then
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' " $qnt " " $ppl "
return 20
fi
printf ' - %s @ %s OK\n' " $qnt " " $ppl "
return 0
}
check_ppl "f16" " $( cat $OUT /${ ci } -tg-f16.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q8_0" " $( cat $OUT /${ ci } -tg-q8_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q4_0" " $( cat $OUT /${ ci } -tg-q4_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q4_1" " $( cat $OUT /${ ci } -tg-q4_1.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q5_0" " $( cat $OUT /${ ci } -tg-q5_0.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q5_1" " $( cat $OUT /${ ci } -tg-q5_1.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q2_k" " $( cat $OUT /${ ci } -tg-q2_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q3_k" " $( cat $OUT /${ ci } -tg-q3_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q4_k" " $( cat $OUT /${ ci } -tg-q4_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q5_k" " $( cat $OUT /${ ci } -tg-q5_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
check_ppl "q6_k" " $( cat $OUT /${ ci } -tg-q6_k.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -ppl.log
2024-01-17 17:46:30 +01:00
cat $OUT /${ ci } -imatrix.log | grep "Final" >> $OUT /${ ci } -imatrix-sum.log
2023-08-27 09:03:27 +02:00
# lora
function compare_ppl {
qnt = " $1 "
ppl1 = $( echo " $2 " | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
ppl2 = $( echo " $3 " | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
if [ $( echo " $ppl1 < $ppl2 " | bc) -eq 1 ] ; then
printf ' - %s @ %s (FAIL: %s > %s)\n' " $qnt " " $ppl " " $ppl1 " " $ppl2 "
return 20
fi
printf ' - %s @ %s %s OK\n' " $qnt " " $ppl1 " " $ppl2 "
return 0
}
path_lora = "../models-mnt/open-llama/7B-v2/lora"
path_shakespeare = "../models-mnt/shakespeare"
shakespeare = " ${ path_shakespeare } /shakespeare.txt "
lora_shakespeare = " ${ path_lora } /ggml-adapter-model.bin "
gg_wget ${ path_lora } https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_config.json
gg_wget ${ path_lora } https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/adapter_model.bin
gg_wget ${ path_shakespeare } https://huggingface.co/slaren/open_llama_7b_v2_shakespeare_lora/resolve/main/shakespeare.txt
python3 ../convert-lora-to-ggml.py ${ path_lora }
# f16
( time ./bin/perplexity --model ${ model_f16 } -f ${ shakespeare } -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-f16.log
( time ./bin/perplexity --model ${ model_f16 } -f ${ shakespeare } --lora ${ lora_shakespeare } -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>& 1 | tee -a $OUT /${ ci } -ppl-shakespeare-lora-f16.log
compare_ppl "f16 shakespeare" " $( cat $OUT /${ ci } -ppl-shakespeare-f16.log | grep "^\[1\]" ) " " $( cat $OUT /${ ci } -ppl-shakespeare-lora-f16.log | grep "^\[1\]" ) " | tee -a $OUT /${ ci } -lora-ppl.log
# currently not supported by the CUDA backend
# q8_0
#(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-q8_0.log
#(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0.log
#compare_ppl "q8_0 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
# q8_0 + f16 lora-base
#(time ./bin/perplexity --model ${model_q8_0} -f ${shakespeare} --lora ${lora_shakespeare} --lora-base ${model_f16} -t 1 -ngl 999 -c 2048 -b 512 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log
#compare_ppl "q8_0 / f16 shakespeare" "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log | grep "^\[1\]")" "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-lora-ppl.log
2023-07-22 10:48:22 +02:00
set +e
}
function gg_sum_open_llama_7b_v2 {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'OpenLLaMA 7B-v2:\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '- perplexity:\n%s\n' " $( cat $OUT /${ ci } -ppl.log) "
2024-01-17 17:46:30 +01:00
gg_printf '- imatrix:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -imatrix-sum.log) "
2023-08-27 09:03:27 +02:00
gg_printf '- lora:\n%s\n' " $( cat $OUT /${ ci } -lora-ppl.log) "
2023-07-22 10:48:22 +02:00
gg_printf '- f16: \n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-f16.log) "
gg_printf '- q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q8_0.log) "
gg_printf '- q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q4_0.log) "
gg_printf '- q4_1:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q4_1.log) "
gg_printf '- q5_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q5_0.log) "
gg_printf '- q5_1:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q5_1.log) "
gg_printf '- q2_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q2_k.log) "
2023-07-18 13:24:43 +02:00
gg_printf '- q3_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q3_k.log) "
gg_printf '- q4_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q4_k.log) "
gg_printf '- q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q5_k.log) "
gg_printf '- q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q6_k.log) "
2023-10-17 18:12:46 +02:00
gg_printf '- save-load-state: \n```\n%s\n```\n' " $( cat $OUT /${ ci } -save-load-state.log) "
2023-08-27 09:03:27 +02:00
gg_printf '- shakespeare (f16):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-f16.log) "
gg_printf '- shakespeare (f16 lora):\n```\n%s\n```\n' " $( cat $OUT /${ ci } -ppl-shakespeare-lora-f16.log) "
#gg_printf '- shakespeare (q8_0):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-q8_0.log)"
#gg_printf '- shakespeare (q8_0 lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0.log)"
#gg_printf '- shakespeare (q8_0 / f16 base lora):\n```\n%s\n```\n' "$(cat $OUT/${ci}-ppl-shakespeare-lora-q8_0-f16.log)"
2023-07-18 13:24:43 +02:00
}
2024-02-13 12:01:29 +01:00
# bge-small
function gg_run_embd_bge_small {
cd ${ SRC }
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/tokenizer.model
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt
2024-02-16 08:57:55 +01:00
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json
gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json
gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json
2024-02-13 12:01:29 +01:00
path_models = "../models-mnt/bge-small"
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
set -e
( time cmake -DCMAKE_BUILD_TYPE= Release ${ CMAKE_EXTRA } .. ) 2>& 1 | tee -a $OUT /${ ci } -cmake.log
( time make -j ) 2>& 1 | tee -a $OUT /${ ci } -make.log
python3 ../convert-hf-to-gguf.py ${ path_models }
model_f16 = " ${ path_models } /ggml-model-f16.gguf "
model_q8_0 = " ${ path_models } /ggml-model-q8_0.gguf "
./bin/quantize ${ model_f16 } ${ model_q8_0 } q8_0
( time ./bin/embedding --model ${ model_f16 } -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-f16.log
( time ./bin/embedding --model ${ model_q8_0 } -p "I believe the meaning of life is" ) 2>& 1 | tee -a $OUT /${ ci } -tg-q8_0.log
set +e
}
function gg_sum_embd_bge_small {
gg_printf '### %s\n\n' " ${ ci } "
gg_printf 'BGE Small (BERT):\n'
gg_printf '- status: %s\n' " $( cat $OUT /${ ci } .exit) "
gg_printf '- f16: \n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-f16.log) "
gg_printf '- q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ ci } -tg-q8_0.log) "
}
2023-07-18 13:24:43 +02:00
## main
2023-07-22 10:48:22 +02:00
if [ -z ${ GG_BUILD_LOW_PERF } ] ; then
2024-01-26 13:18:00 +01:00
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
2023-07-18 13:24:43 +02:00
rm -rf ${ SRC } /models-mnt
2023-07-21 12:48:18 +02:00
mnt_models = ${ MNT } /models
2023-07-18 13:24:43 +02:00
mkdir -p ${ mnt_models }
ln -sfn ${ mnt_models } ${ SRC } /models-mnt
2024-01-26 13:18:00 +01:00
# Create a fresh python3 venv and enter it
python3 -m venv " $MNT /venv "
source " $MNT /venv/bin/activate "
pip install -r ${ SRC } /requirements.txt --disable-pip-version-check
pip install --editable gguf-py --disable-pip-version-check
2023-07-18 13:24:43 +02:00
fi
ret = 0
2023-07-22 10:48:22 +02:00
test $ret -eq 0 && gg_run ctest_debug
test $ret -eq 0 && gg_run ctest_release
2023-07-18 13:24:43 +02:00
2023-07-22 10:48:22 +02:00
if [ -z ${ GG_BUILD_LOW_PERF } ] ; then
2024-02-13 12:01:29 +01:00
test $ret -eq 0 && gg_run embd_bge_small
2023-10-12 12:44:56 +02:00
if [ -z ${ GG_BUILD_VRAM_GB } ] || [ ${ GG_BUILD_VRAM_GB } -ge 8 ] ; then
if [ -z ${ GG_BUILD_CUDA } ] ; then
test $ret -eq 0 && gg_run open_llama_3b_v2
else
test $ret -eq 0 && gg_run open_llama_7b_v2
fi
2024-01-26 13:18:00 +01:00
test $ret -eq 0 && gg_run ctest_with_model_debug
test $ret -eq 0 && gg_run ctest_with_model_release
2023-07-22 10:48:22 +02:00
fi
2023-07-18 13:24:43 +02:00
fi
exit $ret