llama.cpp/ggml/include/ggml-sycl.h

//
//  MIT license
//  Copyright (C) 2024 Intel Corporation
//  SPDX-License-Identifier: MIT
//

#pragma once

#include "ggml.h"
#include "ggml-backend.h"

#define GGML_SYCL_NAME "SYCL"
#define GGML_SYCL_MAX_DEVICES 48

#ifdef  __cplusplus
extern "C" {
#endif

// backend API
GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);

GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);

// devide buffer
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);

// split tensor buffer that splits matrices by rows across multiple devices
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);

// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);

GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);
GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);
GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,
                                                       char *description,
                                                       size_t description_size);
GGML_BACKEND_API int  ggml_backend_sycl_get_device_count();
GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);

// SYCL doesn't support registering host memory, keep here for reference
// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);

GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);

#ifdef  __cplusplus
}
#endif
format license text, restore apache license by legal suggestion (#5233) 2024-01-31 14:04:46 +01:00			`//`
			`// MIT license`
			`// Copyright (C) 2024 Intel Corporation`
			`// SPDX-License-Identifier: MIT`
			`//`
ggml : add unified SYCL backend for Intel GPUs (#2690) * first update for migration * update init_cublas * add debug functio, commit all help code * step 1 * step 2 * step3 add fp16, slower 31->28 * add GGML_LIST_DEVICE function * step 5 format device and print * step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue * support main device is non-zero * step7 add debug for code path, rm log * step 8, rename all macro & func from cuda by sycl * fix error of select non-zero device, format device list * ren ggml-sycl.hpp -> ggml-sycl.h * clear CMAKE to rm unused lib and options * correct queue: rm dtct:get_queue * add print tensor function to debug * fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481 * summary dpct definition in one header file to replace folder:dpct * refactor device log * mv dpct definition from folder dpct to ggml-sycl.h * update readme, refactor build script * fix build with sycl * set nthread=1 when sycl, increase performance * add run script, comment debug code * add ls-sycl-device tool * add ls-sycl-device, rm unused files * rm rear space * dos2unix * Update README_sycl.md * fix return type * remove sycl version from include path * restore rm code to fix hang issue * add syc and link for sycl readme * rm original sycl code before refactor * fix code err * add know issue for pvc hang issue * enable SYCL_F16 support * align pr4766 * check for sycl blas, better performance * cleanup 1 * remove extra endif * add build&run script, clean CMakefile, update guide by review comments * rename macro to intel hardware * editor config format * format fixes * format fixes * editor format fix * Remove unused headers * skip build sycl tool for other code path * replace tab by space * fix blas matmul function * fix mac build * restore hip dependency * fix conflict * ren as review comments * mv internal function to .cpp file * export funciton print_sycl_devices(), mv class dpct definition to source file * update CI/action for sycl code, fix CI error of repeat/dup * fix action ID format issue * rm unused strategy * enable llama_f16 in ci * fix conflict * fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml * fix ci cases for unsupported data type * revert unrelated changed in cuda cmake remove useless nommq fix typo of GGML_USE_CLBLAS_SYCL * revert hip cmake changes * fix indent * add prefix in func name * revert no mmq * rm cpu blas duplicate * fix no_new_line * fix src1->type==F16 bug. * pass batch offset for F16 src1 * fix batch error * fix wrong code * revert sycl checking in test-sampling * pass void as arguments of ggml_backend_sycl_print_sycl_devices * remove extra blank line in test-sampling * revert setting n_threads in sycl * implement std::isinf for icpx with fast math. * Update ci/run.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update examples/sycl/run-llama2.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update examples/sycl/run-llama2.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add copyright and MIT license declare * update the cmd example --------- Co-authored-by: jianyuzh <jianyu.zhang@intel.com> Co-authored-by: luoyu-intel <yu.luo@intel.com> Co-authored-by: Meng, Hengyu <hengyu.meng@intel.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-01-28 16:56:23 +01:00
			`#pragma once`

			`#include "ggml.h"`
			`#include "ggml-backend.h"`
llama : reorganize source code + improve CMake (#8006) * scripts : update sync [no ci] * files : relocate [no ci] * ci : disable kompute build [no ci] * cmake : fixes [no ci] * server : fix mingw build ggml-ci * cmake : minor [no ci] * cmake : link math library [no ci] * cmake : build normal ggml library (not object library) [no ci] * cmake : fix kompute build ggml-ci * make,cmake : fix LLAMA_CUDA + replace GGML_CDEF_PRIVATE ggml-ci * move public backend headers to the public include directory (#8122) * move public backend headers to the public include directory * nix test * spm : fix metal header --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * scripts : fix sync paths [no ci] * scripts : sync ggml-blas.h [no ci] --------- Co-authored-by: slaren <slarengh@gmail.com> 2024-06-26 17:33:02 +02:00
			`#define GGML_SYCL_NAME "SYCL"`
			`#define GGML_SYCL_MAX_DEVICES 48`
ggml : add unified SYCL backend for Intel GPUs (#2690) * first update for migration * update init_cublas * add debug functio, commit all help code * step 1 * step 2 * step3 add fp16, slower 31->28 * add GGML_LIST_DEVICE function * step 5 format device and print * step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue * support main device is non-zero * step7 add debug for code path, rm log * step 8, rename all macro & func from cuda by sycl * fix error of select non-zero device, format device list * ren ggml-sycl.hpp -> ggml-sycl.h * clear CMAKE to rm unused lib and options * correct queue: rm dtct:get_queue * add print tensor function to debug * fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481 * summary dpct definition in one header file to replace folder:dpct * refactor device log * mv dpct definition from folder dpct to ggml-sycl.h * update readme, refactor build script * fix build with sycl * set nthread=1 when sycl, increase performance * add run script, comment debug code * add ls-sycl-device tool * add ls-sycl-device, rm unused files * rm rear space * dos2unix * Update README_sycl.md * fix return type * remove sycl version from include path * restore rm code to fix hang issue * add syc and link for sycl readme * rm original sycl code before refactor * fix code err * add know issue for pvc hang issue * enable SYCL_F16 support * align pr4766 * check for sycl blas, better performance * cleanup 1 * remove extra endif * add build&run script, clean CMakefile, update guide by review comments * rename macro to intel hardware * editor config format * format fixes * format fixes * editor format fix * Remove unused headers * skip build sycl tool for other code path * replace tab by space * fix blas matmul function * fix mac build * restore hip dependency * fix conflict * ren as review comments * mv internal function to .cpp file * export funciton print_sycl_devices(), mv class dpct definition to source file * update CI/action for sycl code, fix CI error of repeat/dup * fix action ID format issue * rm unused strategy * enable llama_f16 in ci * fix conflict * fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml * fix ci cases for unsupported data type * revert unrelated changed in cuda cmake remove useless nommq fix typo of GGML_USE_CLBLAS_SYCL * revert hip cmake changes * fix indent * add prefix in func name * revert no mmq * rm cpu blas duplicate * fix no_new_line * fix src1->type==F16 bug. * pass batch offset for F16 src1 * fix batch error * fix wrong code * revert sycl checking in test-sampling * pass void as arguments of ggml_backend_sycl_print_sycl_devices * remove extra blank line in test-sampling * revert setting n_threads in sycl * implement std::isinf for icpx with fast math. * Update ci/run.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update examples/sycl/run-llama2.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update examples/sycl/run-llama2.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add copyright and MIT license declare * update the cmd example --------- Co-authored-by: jianyuzh <jianyu.zhang@intel.com> Co-authored-by: luoyu-intel <yu.luo@intel.com> Co-authored-by: Meng, Hengyu <hengyu.meng@intel.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-01-28 16:56:23 +01:00
			`#ifdef __cplusplus`
			`extern "C" {`
			`#endif`

[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00			`// backend API`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API ggml_backend_t ggml_backend_sycl_init(int device);`
[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API bool ggml_backend_is_sycl(ggml_backend_t backend);`
[SYCL] Add SYCL Backend registry, device and Event Interfaces (#9705) * implemented missing SYCL event APIs * sycl : Added device and backend reg interfaces * Restructured ggml-sycl.cpp 2024-10-18 07:46:16 +02:00
[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00			`// devide buffer`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);`
[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00
			`// split tensor buffer that splits matrices by rows across multiple devices`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);`
[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00
			`// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);`
[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API void ggml_backend_sycl_print_sycl_devices(void);`
			`GGML_BACKEND_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len);`
			`GGML_BACKEND_API void ggml_backend_sycl_get_device_description(int device,`
[SYCL] Add SYCL Backend registry, device and Event Interfaces (#9705) * implemented missing SYCL event APIs * sycl : Added device and backend reg interfaces * Restructured ggml-sycl.cpp 2024-10-18 07:46:16 +02:00			`char *description,`
			`size_t description_size);`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API int ggml_backend_sycl_get_device_count();`
			`GGML_BACKEND_API void ggml_backend_sycl_get_device_memory(int device, size_t free, size_t total);`
[SYCL] offload op (#6217) * remove no USM methods * leave the schedule to ggml_backend_sched entirely 2024-03-24 05:04:25 +01:00
			`// SYCL doesn't support registering host memory, keep here for reference`
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`// GGML_BACKEND_API bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);`
			`// GGML_BACKEND_API void ggml_backend_sycl_unregister_host_buffer(void * buffer);`
[SYCL] Add SYCL Backend registry, device and Event Interfaces (#9705) * implemented missing SYCL event APIs * sycl : Added device and backend reg interfaces * Restructured ggml-sycl.cpp 2024-10-18 07:46:16 +02:00
ggml : build backends as libraries (#10256) * ggml : build backends as libraries --------- Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com> 2024-11-14 18:04:35 +01:00			`GGML_BACKEND_API ggml_backend_reg_t ggml_backend_sycl_reg(void);`
[SYCL] Add SYCL Backend registry, device and Event Interfaces (#9705) * implemented missing SYCL event APIs * sycl : Added device and backend reg interfaces * Restructured ggml-sycl.cpp 2024-10-18 07:46:16 +02:00
ggml : add unified SYCL backend for Intel GPUs (#2690) * first update for migration * update init_cublas * add debug functio, commit all help code * step 1 * step 2 * step3 add fp16, slower 31->28 * add GGML_LIST_DEVICE function * step 5 format device and print * step6, enhance error check, remove CUDA macro, enhance device id to fix none-zero id issue * support main device is non-zero * step7 add debug for code path, rm log * step 8, rename all macro & func from cuda by sycl * fix error of select non-zero device, format device list * ren ggml-sycl.hpp -> ggml-sycl.h * clear CMAKE to rm unused lib and options * correct queue: rm dtct:get_queue * add print tensor function to debug * fix error: wrong result in 658746bb26702e50f2c59c0e4ada8e9da6010481 * summary dpct definition in one header file to replace folder:dpct * refactor device log * mv dpct definition from folder dpct to ggml-sycl.h * update readme, refactor build script * fix build with sycl * set nthread=1 when sycl, increase performance * add run script, comment debug code * add ls-sycl-device tool * add ls-sycl-device, rm unused files * rm rear space * dos2unix * Update README_sycl.md * fix return type * remove sycl version from include path * restore rm code to fix hang issue * add syc and link for sycl readme * rm original sycl code before refactor * fix code err * add know issue for pvc hang issue * enable SYCL_F16 support * align pr4766 * check for sycl blas, better performance * cleanup 1 * remove extra endif * add build&run script, clean CMakefile, update guide by review comments * rename macro to intel hardware * editor config format * format fixes * format fixes * editor format fix * Remove unused headers * skip build sycl tool for other code path * replace tab by space * fix blas matmul function * fix mac build * restore hip dependency * fix conflict * ren as review comments * mv internal function to .cpp file * export funciton print_sycl_devices(), mv class dpct definition to source file * update CI/action for sycl code, fix CI error of repeat/dup * fix action ID format issue * rm unused strategy * enable llama_f16 in ci * fix conflict * fix build break on MacOS, due to CI of MacOS depend on external ggml, instead of internal ggml * fix ci cases for unsupported data type * revert unrelated changed in cuda cmake remove useless nommq fix typo of GGML_USE_CLBLAS_SYCL * revert hip cmake changes * fix indent * add prefix in func name * revert no mmq * rm cpu blas duplicate * fix no_new_line * fix src1->type==F16 bug. * pass batch offset for F16 src1 * fix batch error * fix wrong code * revert sycl checking in test-sampling * pass void as arguments of ggml_backend_sycl_print_sycl_devices * remove extra blank line in test-sampling * revert setting n_threads in sycl * implement std::isinf for icpx with fast math. * Update ci/run.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update examples/sycl/run-llama2.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update examples/sycl/run-llama2.sh Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update CMakeLists.txt Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add copyright and MIT license declare * update the cmd example --------- Co-authored-by: jianyuzh <jianyu.zhang@intel.com> Co-authored-by: luoyu-intel <yu.luo@intel.com> Co-authored-by: Meng, Hengyu <hengyu.meng@intel.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> 2024-01-28 16:56:23 +01:00			`#ifdef __cplusplus`
			`}`
			`#endif`