Tidy ggml-sycl (#5261)

* Tidy some code in ggml-sycl

* Remove blank space

* Remove std::printf comments

---------

Co-authored-by: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
This commit is contained in:
AidanBeltonS 2024-02-02 08:39:48 +00:00 committed by GitHub
parent 6b91b1e0a9
commit b05102fe8c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1366,6 +1366,7 @@ namespace dpct
} }
#else #else
return q.memcpy(to_ptr, from_ptr, size, dep_events); return q.memcpy(to_ptr, from_ptr, size, dep_events);
GGML_UNUSED(direction);
#endif // DPCT_USM_LEVEL_NONE #endif // DPCT_USM_LEVEL_NONE
} }
@ -1667,7 +1668,7 @@ namespace dpct
using Ty = typename DataType<T>::T2; using Ty = typename DataType<T>::T2;
Ty s_h; Ty s_h;
if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only) if (get_pointer_attribute(q, s) == pointer_access_attribute::device_only)
detail::dpct_memcpy(q, (void *)&s_h, (void *)s, sizeof(T), device_to_host) detail::dpct_memcpy(q, (void *)&s_h, (const void *)s, sizeof(T), device_to_host)
.wait(); .wait();
else else
s_h = *reinterpret_cast<const Ty *>(s); s_h = *reinterpret_cast<const Ty *>(s);
@ -1691,6 +1692,20 @@ namespace dpct
int ldb, const void *beta, void *c, int ldc) int ldb, const void *beta, void *c, int ldc)
{ {
#ifndef __INTEL_MKL__ #ifndef __INTEL_MKL__
GGML_UNUSED(q);
GGML_UNUSED(a_trans);
GGML_UNUSED(b_trans);
GGML_UNUSED(m);
GGML_UNUSED(n);
GGML_UNUSED(k);
GGML_UNUSED(alpha);
GGML_UNUSED(a);
GGML_UNUSED(lda);
GGML_UNUSED(b);
GGML_UNUSED(ldb);
GGML_UNUSED(beta);
GGML_UNUSED(c);
GGML_UNUSED(ldc);
throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces " throw std::runtime_error("The oneAPI Math Kernel Library (oneMKL) Interfaces "
"Project does not support this API."); "Project does not support this API.");
#else #else
@ -1830,7 +1845,7 @@ namespace dpct
template <typename T> template <typename T>
T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask, T permute_sub_group_by_xor(sycl::sub_group g, T x, unsigned int mask,
int logical_sub_group_size = 32) unsigned int logical_sub_group_size = 32)
{ {
unsigned int id = g.get_local_linear_id(); unsigned int id = g.get_local_linear_id();
unsigned int start_index = unsigned int start_index =
@ -2160,6 +2175,7 @@ namespace dpct
} }
#else #else
return q.memcpy(to_ptr, from_ptr, size, dep_events); return q.memcpy(to_ptr, from_ptr, size, dep_events);
GGML_UNUSED(direction);
#endif // DPCT_USM_LEVEL_NONE #endif // DPCT_USM_LEVEL_NONE
} }
@ -3302,7 +3318,7 @@ void log_ggml_var_device(const char*name, float *src, size_t total_elements, boo
std::ofstream logfile; std::ofstream logfile;
logfile.open(filename); logfile.open(filename);
// printf("local buf element %d\n", total_elements); // printf("local buf element %d\n", total_elements);
for(int i=0; i<total_elements; i++){ for(size_t i=0; i<total_elements; i++){
if((i+1)%20 ==0) logfile <<std::endl; if((i+1)%20 ==0) logfile <<std::endl;
else logfile << local_buf[i] <<" "; else logfile << local_buf[i] <<" ";
} }
@ -3396,6 +3412,7 @@ static __dpct_inline__ float warp_reduce_max(float x,
static __dpct_inline__ float op_repeat(const float a, const float b) { static __dpct_inline__ float op_repeat(const float a, const float b) {
return b; return b;
GGML_UNUSED(a);
} }
static __dpct_inline__ float op_add(const float a, const float b) { static __dpct_inline__ float op_add(const float a, const float b) {
@ -11156,10 +11173,10 @@ DPCT1082:64: Migration of CUmemGenericAllocationHandle type is not supported.
// g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES]; // g_sycl_pool_handles[GGML_SYCL_MAX_DEVICES];
static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0}; static dpct::device_ptr g_sycl_pool_addr[GGML_SYCL_MAX_DEVICES] = {0};
static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0}; static size_t g_sycl_pool_used[GGML_SYCL_MAX_DEVICES] = {0};
static const size_t SYCL_POOL_VMM_MAX_SIZE = 1ull << 36; // 64 GB
static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try { static void *ggml_sycl_pool_malloc_vmm(size_t size, size_t *actual_size) try {
GGML_UNUSED(size);
GGML_UNUSED(actual_size);
return NULL; return NULL;
} }
catch (sycl::exception const &exc) { catch (sycl::exception const &exc) {
@ -11349,9 +11366,8 @@ void ggml_init_sycl() try {
if(id!=user_device_id) continue; if(id!=user_device_id) continue;
device_inx++; device_inx++;
int device_vmm = 0;
g_device_caps[device_inx].vmm = !!device_vmm; g_device_caps[device_inx].vmm = 0;
g_device_caps[device_inx].device_id = id; g_device_caps[device_inx].device_id = id;
g_sycl_device_id2index[id].index = device_inx; g_sycl_device_id2index[id].index = device_inx;
@ -11359,18 +11375,12 @@ void ggml_init_sycl() try {
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info( SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
prop, dpct::dev_mgr::instance().get_device(id)))); prop, dpct::dev_mgr::instance().get_device(id))));
// fprintf(stderr,
// " Device %d: %s, compute capability %d.%d, VMM: %s\n", id,
// prop.get_name(), prop.get_major_version(),
// prop.get_minor_version(), device_vmm ? "yes" : "no");
g_tensor_split[device_inx] = total_vram; g_tensor_split[device_inx] = total_vram;
total_vram += prop.get_global_mem_size(); total_vram += prop.get_global_mem_size();
g_device_caps[device_inx].cc = g_device_caps[device_inx].cc =
100 * prop.get_major_version() + 10 * prop.get_minor_version(); 100 * prop.get_major_version() + 10 * prop.get_minor_version();
// printf("g_device_caps[%d].cc=%d\n", device_inx, g_device_caps[device_inx].cc);
} }
device_inx = -1; device_inx = -1;
for (int id = 0; id < g_all_sycl_device_count; ++id) { for (int id = 0; id < g_all_sycl_device_count; ++id) {
@ -12206,7 +12216,6 @@ inline void ggml_sycl_op_mul_mat_sycl(
// ldc == nrows of the matrix that cuBLAS writes into // ldc == nrows of the matrix that cuBLAS writes into
int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff; int ldc = dst->backend == GGML_BACKEND_GPU && device_id == g_main_device ? ne0 : row_diff;
const int compute_capability = g_device_caps[id].cc;
#ifdef GGML_SYCL_F16 #ifdef GGML_SYCL_F16
bool use_fp16 = true; // TODO(Yu) SYCL capability check bool use_fp16 = true; // TODO(Yu) SYCL capability check
#else #else
@ -12691,7 +12700,7 @@ static void ggml_sycl_set_peer_access(const int n_tokens) {
continue; continue;
} }
int can_access_peer; // int can_access_peer;
// SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other)); // SYCL_CHECK(syclDeviceCanAccessPeer(&can_access_peer, id, id_other));
// if (can_access_peer) { // if (can_access_peer) {
// if (enable_peer_access) { // if (enable_peer_access) {
@ -12716,7 +12725,6 @@ static void ggml_sycl_op_mul_mat(const ggml_tensor *src0,
const int64_t ne01 = src0->ne[1]; const int64_t ne01 = src0->ne[1];
const int64_t ne02 = src0->ne[2]; const int64_t ne02 = src0->ne[2];
const int64_t ne03 = src0->ne[3]; const int64_t ne03 = src0->ne[3];
const int64_t nrows0 = ggml_nrows(src0);
const int64_t ne10 = src1->ne[0]; const int64_t ne10 = src1->ne[0];
const int64_t ne11 = src1->ne[1]; const int64_t ne11 = src1->ne[1];
@ -13812,13 +13820,6 @@ static void ggml_sycl_mul_mat_id(const ggml_tensor *src0,
src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get(); src1_row_extra.data_device[g_main_device_index] = src1_contiguous.get();
dst_row_extra.data_device[g_main_device_index] = dst_contiguous.get(); dst_row_extra.data_device[g_main_device_index] = dst_contiguous.get();
const dpct::memcpy_direction src1_kind =
src1->backend == GGML_BACKEND_CPU ? dpct::host_to_device
: dpct::device_to_device;
const dpct::memcpy_direction dst_kind = dst->backend == GGML_BACKEND_CPU
? dpct::device_to_host
: dpct::device_to_device;
for (int32_t row_id = 0; row_id < n_as; ++row_id) { for (int32_t row_id = 0; row_id < n_as; ++row_id) {
const struct ggml_tensor * src0_row = dst->src[row_id + 2]; const struct ggml_tensor * src0_row = dst->src[row_id + 2];