SYCL: Refactor ggml_sycl_compute_forward (#11121)

* SYCL: refactor ggml_sycl_compute_forward

* SYCL: add back GGML_USED(dst) to ggml_sycl_cpy

* SYCL: add function name to noop debug

* SYCL: Some device info print refactoring and add details of XMX availability
This commit is contained in:
Akarshan Biswas 2025-01-10 05:43:03 +05:30 committed by GitHub
parent 1204f97270
commit c6860cc734
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 223 additions and 230 deletions

View File

@ -51,6 +51,10 @@ void ggml_sycl_host_free(void* ptr) try {
std::exit(1); std::exit(1);
} }
bool gpu_has_xmx(sycl::device &dev) {
return dev.has(sycl::aspect::ext_intel_matrix);
}
int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) { int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block_size) {
const int64_t max_range = std::numeric_limits<int>::max(); const int64_t max_range = std::numeric_limits<int>::max();
int64_t sycl_down_blk_size = block_size; int64_t sycl_down_blk_size = block_size;

View File

@ -662,6 +662,7 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
} }
} }
bool gpu_has_xmx(sycl::device &dev);
void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
const ggml_tensor *src1, ggml_tensor *dst, const ggml_tensor *src1, ggml_tensor *dst,

View File

@ -158,8 +158,9 @@ static void concat_f32_sycl_non_cont(
}); });
} }
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst) { const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
queue_ptr stream = ctx.stream(); queue_ptr stream = ctx.stream();
const int32_t dim = ((int32_t *)dst->op_params)[0]; const int32_t dim = ((int32_t *)dst->op_params)[0];

View File

@ -15,7 +15,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
const ggml_tensor *src1, ggml_tensor *dst);
#endif // GGML_SYCL_CONCAT_HPP #endif // GGML_SYCL_CONCAT_HPP

View File

@ -71,8 +71,9 @@ static void conv_transpose_1d_f32_f32_sycl(
}); });
} }
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
const ggml_tensor *src1, ggml_tensor *dst) { const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data; const float * src0_d = (const float *)src0->data;
const float * src1_d = (const float *)src1->data; const float * src1_d = (const float *)src1->data;

View File

@ -15,7 +15,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst);
const ggml_tensor *src1, ggml_tensor *dst);
#endif // GGML_SYCL_CONV_HPP #endif // GGML_SYCL_CONV_HPP

View File

@ -882,149 +882,149 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor
} }
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqrt);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sin);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_cos);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_acc);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_silu);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_gelu_quick);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_tanh);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_relu);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sigmoid);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardsigmoid);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_hardswish);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_exp);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_log);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_neg);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_step);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_leaky_relu);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sqr);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_upscale);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pad);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_add);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sub);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_mul);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_div);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }

View File

@ -25,52 +25,52 @@ static __dpct_inline__ float op_div(const float a, const float b) {
} }
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_acc(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_add(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_sub(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_mul(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); void ggml_sycl_div(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
#endif // GGML_SYCL_ELEMENTWISE_HPP #endif // GGML_SYCL_ELEMENTWISE_HPP

View File

@ -54,18 +54,12 @@ static ggml_sycl_device_info ggml_sycl_init() {
GGML_ASSERT(info.device_count <= GGML_SYCL_MAX_DEVICES); GGML_ASSERT(info.device_count <= GGML_SYCL_MAX_DEVICES);
int64_t total_vram = 0; int64_t total_vram = 0;
#if defined(GGML_SYCL_FORCE_MMQ) /* This is a bit misleading; reserved for later */
GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: yes\n", __func__); // #if defined(SYCL_USE_XMX)
#else // GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
GGML_LOG_INFO("%s: GGML_SYCL_FORCE_MMQ: no\n", __func__); // #else
#endif // GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
#if defined(SYCL_USE_XMX) // #endif
GGML_LOG_INFO("%s: SYCL_USE_XMX: yes\n", __func__);
#else
GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
#endif
GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
for (int i = 0; i < info.device_count; ++i) { for (int i = 0; i < info.device_count; ++i) {
info.devices[i].vmm = 0; info.devices[i].vmm = 0;
dpct::device_info prop; dpct::device_info prop;
@ -109,11 +103,11 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
name = std::regex_replace(name, std::regex("\\(TM\\)"), ""); name = std::regex_replace(name, std::regex("\\(TM\\)"), "");
auto global_mem_size = prop.get_global_mem_size()/1000000; auto global_mem_size = prop.get_global_mem_size()/1000000;
std::string xmx = gpu_has_xmx(device) ? "yes" : "no";
GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|\n", id, device_type.c_str(), GGML_LOG_INFO("|%2d|%19s|%39s|%7s|%7d|%8d|%5d|%6luM|%21s|%14s|\n", id, device_type.c_str(),
name.c_str(), version.c_str(), prop.get_max_compute_units(), name.c_str(), version.c_str(), prop.get_max_compute_units(),
prop.get_max_work_group_size(), prop.get_max_sub_group_size(), prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str()); global_mem_size, device.get_info<sycl::info::device::driver_version>().c_str(), xmx.c_str());
} }
void ggml_backend_sycl_print_sycl_devices() { void ggml_backend_sycl_print_sycl_devices() {
@ -124,16 +118,16 @@ void ggml_backend_sycl_print_sycl_devices() {
GGML_LOG_INFO( GGML_LOG_INFO(
"| | | | " "| | | | "
" |Max | |Max |Global | |\n"); " |Max | |Max |Global | | XMX |\n");
GGML_LOG_INFO( GGML_LOG_INFO(
"| | | | " "| | | | "
" |compute|Max work|sub |mem | |\n"); " |compute|Max work|sub |mem | | or |\n");
GGML_LOG_INFO( GGML_LOG_INFO(
"|ID| Device Type| " "|ID| Device Type| "
"Name|Version|units |group |group|size | Driver version|\n"); "Name|Version|units |group |group|size | Driver version| Tensor Cores |\n");
GGML_LOG_INFO( GGML_LOG_INFO(
"|--|-------------------|---------------------------------------|------" "|--|-------------------|---------------------------------------|------"
"-|-------|--------|-----|-------|---------------------|\n"); "-|-------|--------|-----|-------|---------------------|--------------|\n");
for (int id = 0; id < device_count; ++id) { for (int id = 0; id < device_count; ++id) {
sycl::device device = dpct::dev_mgr::instance().get_device(id); sycl::device device = dpct::dev_mgr::instance().get_device(id);
@ -164,14 +158,18 @@ static void ggml_check_sycl() try {
static bool initialized = false; static bool initialized = false;
if (!initialized) { if (!initialized) {
GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n"); GGML_SYCL_DEBUG("[SYCL] call ggml_check_sycl\n");
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0); g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug); GGML_LOG_INFO("GGML_SYCL_DEBUG: %d\n", g_ggml_sycl_debug);
#if defined(GGML_SYCL_FORCE_MMQ)
#if defined(GGML_SYCL_F16) GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: yes\n");
GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
#else #else
GGML_LOG_INFO("%s: GGML_SYCL_F16: no\n", __func__); GGML_LOG_INFO("GGML_SYCL_FORCE_MMQ: no\n");
#endif
#if defined(GGML_SYCL_F16)
GGML_LOG_INFO("GGML_SYCL_F16: yes\n");
#else
GGML_LOG_INFO("GGML_SYCL_F16: no\n");
#endif #endif
/* NOT REMOVE, keep it for next optimize for XMX. /* NOT REMOVE, keep it for next optimize for XMX.
@ -1189,7 +1187,6 @@ std::unique_ptr<ggml_sycl_pool> ggml_backend_sycl_context::new_pool_for_device(q
/// kernels /// kernels
typedef void (*cpy_kernel_t)(const char * cx, char * cdst); typedef void (*cpy_kernel_t)(const char * cx, char * cdst);
typedef void (*ggml_sycl_func_t)(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst);
typedef void (*ggml_sycl_op_mul_mat_t)( typedef void (*ggml_sycl_op_mul_mat_t)(
ggml_backend_sycl_context & ctx, ggml_backend_sycl_context & ctx,
const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst,
@ -3171,33 +3168,33 @@ catch (sycl::exception const &exc) {
} }
static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_repeat);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_get_rows);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_norm);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rms_norm);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_SYCL_DEBUG("call %s\n", __func__); GGML_SYCL_DEBUG("call %s\n", __func__);
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_group_norm);
GGML_SYCL_DEBUG("call %s done\n", __func__); GGML_SYCL_DEBUG("call %s done\n", __func__);
} }
@ -3572,9 +3569,10 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
} }
} }
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx,
const ggml_tensor *src1,
ggml_tensor *dst) try { ggml_tensor *dst) try {
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers"); GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
const ggml_tensor *ids = dst->src[2]; const ggml_tensor *ids = dst->src[2];
@ -3740,12 +3738,12 @@ catch (sycl::exception const &exc) {
std::exit(1); std::exit(1);
} }
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_scale);
} }
static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_clamp);
} }
static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
@ -3787,7 +3785,6 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
ggml_type_name(src0->type), ggml_type_name(src1->type)); ggml_type_name(src0->type), ggml_type_name(src1->type));
GGML_ABORT("fatal error"); GGML_ABORT("fatal error");
} }
GGML_UNUSED(dst); GGML_UNUSED(dst);
} }
catch (sycl::exception const &exc) { catch (sycl::exception const &exc) {
@ -3796,59 +3793,52 @@ catch (sycl::exception const &exc) {
std::exit(1); std::exit(1);
} }
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
// TODO: why do we pass dst as src1 here? // TODO: why do we pass dst as src1 here?
ggml_sycl_cpy(ctx, src0, dst, nullptr); ggml_sycl_cpy(ctx, dst->src[0], dst, nullptr);
GGML_UNUSED(src1);
} }
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_diag_mask_inf);
} }
static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_soft_max);
} }
static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented GGML_ASSERT(ggml_is_contiguous(dst->src[0])); // TODO: this restriction is temporary until non-cont support is implemented
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_rope);
} }
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_pool2d);
} }
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_im2col);
} }
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum);
} }
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_sum_rows);
} }
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argsort);
} }
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
GGML_ASSERT(ggml_is_contiguous(src0)); GGML_ASSERT(ggml_is_contiguous(dst->src[0]));
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax); ggml_sycl_op_flatten(ctx, dst->src[0], dst->src[1], dst, ggml_sycl_op_argmax);
} }
static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
GGML_UNUSED(src0);
GGML_UNUSED(src1);
GGML_UNUSED(dst);
GGML_UNUSED(ctx);
}
void ggml_sycl_set_main_device(const int main_device) try { void ggml_sycl_set_main_device(const int main_device) try {
if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) { if (dpct::get_current_device_id() == static_cast<unsigned int> (main_device)) {
@ -3871,191 +3861,189 @@ catch (sycl::exception const &exc) {
std::exit(1); std::exit(1);
} }
bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * tensor) { bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tensor * dst) {
if (!g_sycl_loaded) return false; if (!g_sycl_loaded) return false;
ggml_sycl_func_t func; if (dst->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(dst->src[0]->buffer)) {
ggml_sycl_set_peer_access(dst->src[1]->ne[1], ctx.device);
}
switch (tensor->op) { switch (dst->op) {
case GGML_OP_ARGMAX: case GGML_OP_ARGMAX:
func = ggml_sycl_argmax; ggml_sycl_argmax(ctx, dst);
break; break;
case GGML_OP_CONV_TRANSPOSE_1D: case GGML_OP_CONV_TRANSPOSE_1D:
func = ggml_sycl_op_conv_transpose_1d; ggml_sycl_op_conv_transpose_1d(ctx, dst);
break; break;
case GGML_OP_REPEAT: case GGML_OP_REPEAT:
func = ggml_sycl_repeat; ggml_sycl_repeat(ctx, dst);
break; break;
case GGML_OP_GET_ROWS: case GGML_OP_GET_ROWS:
func = ggml_sycl_get_rows; ggml_sycl_get_rows(ctx, dst);
break; break;
case GGML_OP_DUP: case GGML_OP_DUP:
func = ggml_sycl_dup; ggml_sycl_dup(ctx, dst);
break; break;
case GGML_OP_ADD: case GGML_OP_ADD:
case GGML_OP_ADD1: // TODO: more efficient implementation case GGML_OP_ADD1: // TODO: more efficient implementation
func = ggml_sycl_add; ggml_sycl_add(ctx, dst);
break; break;
case GGML_OP_SUB: case GGML_OP_SUB:
func = ggml_sycl_sub; ggml_sycl_sub(ctx, dst);
break; break;
case GGML_OP_ACC: case GGML_OP_ACC:
func = ggml_sycl_acc; ggml_sycl_acc(ctx, dst);
break; break;
case GGML_OP_MUL: case GGML_OP_MUL:
func = ggml_sycl_mul; ggml_sycl_mul(ctx, dst);
break; break;
case GGML_OP_LOG: case GGML_OP_LOG:
func = ggml_sycl_log; ggml_sycl_log(ctx, dst);
break; break;
case GGML_OP_DIV: case GGML_OP_DIV:
func = ggml_sycl_div; ggml_sycl_div(ctx, dst);
break; break;
case GGML_OP_UNARY: case GGML_OP_UNARY:
switch (ggml_get_unary_op(tensor)) { switch (ggml_get_unary_op(dst)) {
case GGML_UNARY_OP_NEG: case GGML_UNARY_OP_NEG:
func = ggml_sycl_neg; ggml_sycl_neg(ctx, dst);
break; break;
case GGML_UNARY_OP_STEP: case GGML_UNARY_OP_STEP:
func = ggml_sycl_step; ggml_sycl_step(ctx, dst);
break; break;
case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_GELU:
func = ggml_sycl_gelu; ggml_sycl_gelu(ctx, dst);
break; break;
case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_SILU:
func = ggml_sycl_silu; ggml_sycl_silu(ctx, dst);
break; break;
case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_GELU_QUICK:
func = ggml_sycl_gelu_quick; ggml_sycl_gelu_quick(ctx, dst);
break; break;
case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_TANH:
func = ggml_sycl_tanh; ggml_sycl_tanh(ctx, dst);
break; break;
case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_RELU:
func = ggml_sycl_relu; ggml_sycl_relu(ctx, dst);
break; break;
case GGML_UNARY_OP_SIGMOID: case GGML_UNARY_OP_SIGMOID:
func = ggml_sycl_sigmoid; ggml_sycl_sigmoid(ctx, dst);
break; break;
case GGML_UNARY_OP_HARDSIGMOID: case GGML_UNARY_OP_HARDSIGMOID:
func = ggml_sycl_hardsigmoid; ggml_sycl_hardsigmoid(ctx, dst);
break; break;
case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_HARDSWISH:
func = ggml_sycl_hardswish; ggml_sycl_hardswish(ctx, dst);
break; break;
case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_EXP:
func = ggml_sycl_exp; ggml_sycl_exp(ctx, dst);
break; break;
default: default:
return false; return false;
} }
break; break;
case GGML_OP_NORM: case GGML_OP_NORM:
func = ggml_sycl_norm; ggml_sycl_norm(ctx, dst);
break; break;
case GGML_OP_GROUP_NORM: case GGML_OP_GROUP_NORM:
func = ggml_sycl_group_norm; ggml_sycl_group_norm(ctx, dst);
break; break;
case GGML_OP_CONCAT: case GGML_OP_CONCAT:
func = ggml_sycl_op_concat; ggml_sycl_op_concat(ctx, dst);
break; break;
case GGML_OP_UPSCALE: case GGML_OP_UPSCALE:
func = ggml_sycl_upscale; ggml_sycl_upscale(ctx, dst);
break; break;
case GGML_OP_PAD: case GGML_OP_PAD:
func = ggml_sycl_pad; ggml_sycl_pad(ctx, dst);
break; break;
case GGML_OP_LEAKY_RELU: case GGML_OP_LEAKY_RELU:
func = ggml_sycl_leaky_relu; ggml_sycl_leaky_relu(ctx, dst);
break; break;
case GGML_OP_RMS_NORM: case GGML_OP_RMS_NORM:
func = ggml_sycl_rms_norm; ggml_sycl_rms_norm(ctx, dst);
break; break;
case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT:
if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) { if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
return false; return false;
} }
func = ggml_sycl_mul_mat; /* ggml_sycl_mul_mat_id is dependent on ggml_sycl_mul_mat */
ggml_sycl_mul_mat(ctx, dst->src[0], dst->src[1], dst);
break; break;
case GGML_OP_MUL_MAT_ID: case GGML_OP_MUL_MAT_ID:
if (tensor->src[0]->ne[3] != tensor->src[1]->ne[3]) { if (dst->src[0]->ne[3] != dst->src[1]->ne[3]) {
return false; return false;
} }
func = ggml_sycl_mul_mat_id; ggml_sycl_mul_mat_id(ctx, dst);
break; break;
case GGML_OP_OUT_PROD: case GGML_OP_OUT_PROD:
func = ggml_sycl_op_out_prod; ggml_sycl_op_out_prod(ctx, dst);
break; break;
case GGML_OP_SCALE: case GGML_OP_SCALE:
func = ggml_sycl_scale; ggml_sycl_scale(ctx, dst);
break; break;
case GGML_OP_SQR: case GGML_OP_SQR:
func = ggml_sycl_sqr; ggml_sycl_sqr(ctx, dst);
break; break;
case GGML_OP_SQRT: case GGML_OP_SQRT:
func = ggml_sycl_sqrt; ggml_sycl_sqrt(ctx, dst);
break; break;
case GGML_OP_SIN: case GGML_OP_SIN:
func = ggml_sycl_sin; ggml_sycl_sin(ctx, dst);
break; break;
case GGML_OP_COS: case GGML_OP_COS:
func = ggml_sycl_cos; ggml_sycl_cos(ctx, dst);
break; break;
case GGML_OP_CLAMP: case GGML_OP_CLAMP:
func = ggml_sycl_clamp; ggml_sycl_clamp(ctx, dst);
break; break;
case GGML_OP_CPY: case GGML_OP_CPY:
func = ggml_sycl_cpy; ggml_sycl_cpy(ctx, dst->src[0], dst->src[1], dst);
break; break;
case GGML_OP_CONT: case GGML_OP_CONT:
func = ggml_sycl_dup; ggml_sycl_dup(ctx, dst);
break; break;
case GGML_OP_NONE: case GGML_OP_NONE:
case GGML_OP_RESHAPE: case GGML_OP_RESHAPE:
case GGML_OP_VIEW: case GGML_OP_VIEW:
case GGML_OP_PERMUTE: case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE: case GGML_OP_TRANSPOSE:
func = ggml_sycl_nop; GGML_SYCL_DEBUG("%s: Tensor NO-OP\n", __func__);
break; break;
case GGML_OP_DIAG_MASK_INF: case GGML_OP_DIAG_MASK_INF:
func = ggml_sycl_diag_mask_inf; ggml_sycl_diag_mask_inf(ctx, dst);
break; break;
case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX:
func = ggml_sycl_soft_max; ggml_sycl_soft_max(ctx, dst);
break; break;
case GGML_OP_ROPE: case GGML_OP_ROPE:
func = ggml_sycl_rope; ggml_sycl_rope(ctx, dst);
break; break;
case GGML_OP_IM2COL: case GGML_OP_IM2COL:
func = ggml_sycl_im2col; ggml_sycl_im2col(ctx, dst);
break; break;
case GGML_OP_POOL_2D: case GGML_OP_POOL_2D:
func = ggml_sycl_pool2d; ggml_sycl_pool2d(ctx, dst);
break; break;
case GGML_OP_SUM: case GGML_OP_SUM:
func = ggml_sycl_sum; ggml_sycl_sum(ctx, dst);
break; break;
case GGML_OP_SUM_ROWS: case GGML_OP_SUM_ROWS:
func = ggml_sycl_sum_rows; ggml_sycl_sum_rows(ctx, dst);
break; break;
case GGML_OP_ARGSORT: case GGML_OP_ARGSORT:
func = ggml_sycl_argsort; ggml_sycl_argsort(ctx, dst);
break; break;
case GGML_OP_TIMESTEP_EMBEDDING: case GGML_OP_TIMESTEP_EMBEDDING:
func = ggml_sycl_op_timestep_embedding; ggml_sycl_op_timestep_embedding(ctx, dst);
break; break;
case GGML_OP_RWKV_WKV6: case GGML_OP_RWKV_WKV6:
func = ggml_sycl_op_rwkv_wkv6; ggml_sycl_op_rwkv_wkv6(ctx, dst);
break; break;
default: default:
return false; return false;
} }
if (tensor->src[0] != nullptr && ggml_backend_buffer_is_sycl_split(tensor->src[0]->buffer)) {
ggml_sycl_set_peer_access(tensor->src[1]->ne[1], ctx.device);
}
func(ctx, tensor->src[0], tensor->src[1], tensor);
return true; return true;
} }

View File

@ -3,9 +3,9 @@
#include "outprod.hpp" #include "outprod.hpp"
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const ggml_tensor* src1, ggml_tensor* dst) { const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(src0->type == GGML_TYPE_F32);
GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32);

View File

@ -3,8 +3,7 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_out_prod(ggml_backend_sycl_context& ctx, ggml_tensor* dst);
const ggml_tensor* src1, ggml_tensor* dst);
#endif // GGML_SYCL_OUTPROD_HPP #endif // GGML_SYCL_OUTPROD_HPP

View File

@ -55,8 +55,9 @@ static void timestep_embedding_f32_sycl(
}); });
} }
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
const ggml_tensor *src1, ggml_tensor * dst) { const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const float * src0_d = (const float *)src0->data; const float * src0_d = (const float *)src0->data;
float * dst_d = (float *)dst->data; float * dst_d = (float *)dst->data;
dpct::queue_ptr stream = ctx.stream(); dpct::queue_ptr stream = ctx.stream();

View File

@ -15,7 +15,6 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
const ggml_tensor *src1, ggml_tensor * dst);
#endif // GGML_SYCL_TSEMBD_HPP #endif // GGML_SYCL_TSEMBD_HPP

View File

@ -95,8 +95,10 @@ static void rwkv_wkv_f32_kernel(
} }
} }
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, const ggml_tensor* src0, void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) {
const ggml_tensor* src1, ggml_tensor* dst) {
const ggml_tensor *src0 = dst->src[0];
const ggml_tensor *src1 = dst->src[1];
const float* k_d = (const float*)dst->src[0]->data; const float* k_d = (const float*)dst->src[0]->data;
const float* v_d = (const float*)dst->src[1]->data; const float* v_d = (const float*)dst->src[1]->data;

View File

@ -3,8 +3,7 @@
#include "common.hpp" #include "common.hpp"
void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
const ggml_tensor *src1, ggml_tensor * dst);
#endif // GGML_SYCL_WKV6_HPP #endif // GGML_SYCL_WKV6_HPP