mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-27 06:39:25 +01:00
SYCL: Integrate debug logs with GGML_LOG and other fixes
This commit is contained in:
parent
19ce4b64b7
commit
2607b7de0f
@ -82,8 +82,8 @@ void ggml_sycl_op_flatten(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
|
|||||||
|
|
||||||
ggml_sycl_set_device(ctx.device);
|
ggml_sycl_set_device(ctx.device);
|
||||||
queue_ptr main_stream = ctx.stream();
|
queue_ptr main_stream = ctx.stream();
|
||||||
// GGML_SYCL_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n",
|
// GGML_LOG_DEBUG("ctx.device=%d, main_stream=%p src0_on_device=%d, src1_on_device=%d, dst_on_device=%d\n",
|
||||||
// ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device);
|
// ctx.device, main_stream, src0_on_device, src1_on_device, dst_on_device);
|
||||||
|
|
||||||
// do the computation
|
// do the computation
|
||||||
op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream);
|
op(ctx, src0, src1, dst, src0_ddf, src1_ddf, dst_ddf, main_stream);
|
||||||
|
@ -31,17 +31,11 @@
|
|||||||
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
#pragma clang diagnostic ignored "-Wnested-anon-types"
|
||||||
#include "ggml-common.h"
|
#include "ggml-common.h"
|
||||||
#pragma clang diagnostic pop
|
#pragma clang diagnostic pop
|
||||||
|
#include "ggml-impl.h"
|
||||||
|
|
||||||
void* ggml_sycl_host_malloc(size_t size);
|
void* ggml_sycl_host_malloc(size_t size);
|
||||||
void ggml_sycl_host_free(void* ptr);
|
void ggml_sycl_host_free(void* ptr);
|
||||||
|
|
||||||
static int g_ggml_sycl_debug = 0;
|
|
||||||
#define GGML_SYCL_DEBUG(...) \
|
|
||||||
do { \
|
|
||||||
if (g_ggml_sycl_debug) \
|
|
||||||
fprintf(stderr, __VA_ARGS__); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
#define CHECK_TRY_ERROR(expr) \
|
#define CHECK_TRY_ERROR(expr) \
|
||||||
[&]() { \
|
[&]() { \
|
||||||
try { \
|
try { \
|
||||||
@ -167,8 +161,7 @@ inline dpct::err0 ggml_sycl_set_device(const int device) try {
|
|||||||
int current_device_id;
|
int current_device_id;
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));
|
SYCL_CHECK(CHECK_TRY_ERROR(current_device_id = get_current_device_id()));
|
||||||
|
|
||||||
// GGML_SYCL_DEBUG("ggml_sycl_set_device device_id=%d,
|
GGML_LOG_DEBUG("ggml_sycl_set_device device_id=%d,current_device_id=%d\n", device, current_device_id);
|
||||||
// current_device_id=%d\n", device, current_device);
|
|
||||||
if (device == current_device_id) {
|
if (device == current_device_id) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
#include "common.hpp"
|
#include "common.hpp"
|
||||||
#include "element_wise.hpp"
|
#include "element_wise.hpp"
|
||||||
|
#include "ggml-impl.h"
|
||||||
|
|
||||||
void acc_f32(const float * x, const float * y, float * dst, const int ne,
|
void acc_f32(const float * x, const float * y, float * dst, const int ne,
|
||||||
const int ne10, const int ne11, const int ne12,
|
const int ne10, const int ne11, const int ne12,
|
||||||
@ -883,148 +884,148 @@ inline void ggml_sycl_op_div(ggml_backend_sycl_context & ctx, const ggml_tensor
|
|||||||
|
|
||||||
|
|
||||||
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqrt);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_sin(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sin);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_cos(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_cos);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_acc(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_acc);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_gelu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_silu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_silu);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_gelu_quick(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_gelu_quick);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_tanh(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_tanh);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_relu);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_sigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sigmoid);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_hardsigmoid(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardsigmoid);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_hardswish);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_exp);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_log(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_log);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_neg(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_neg);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_step(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_step);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_leaky_relu(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_leaky_relu);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_sqr(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sqr);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_upscale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_upscale);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_pad(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pad);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_add(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_add);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_sub(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sub);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_mul(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_mul);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
void ggml_sycl_div(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_div);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
@ -64,7 +64,6 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
|||||||
#else
|
#else
|
||||||
GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
|
GGML_LOG_INFO("%s: SYCL_USE_XMX: no\n", __func__);
|
||||||
#endif
|
#endif
|
||||||
GGML_LOG_INFO("%s: found %d %s devices:\n", __func__, info.device_count, GGML_SYCL_NAME);
|
|
||||||
|
|
||||||
for (int i = 0; i < info.device_count; ++i) {
|
for (int i = 0; i < info.device_count; ++i) {
|
||||||
info.devices[i].vmm = 0;
|
info.devices[i].vmm = 0;
|
||||||
@ -117,7 +116,7 @@ void print_device_detail(int id, sycl::device &device, std::string device_type)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ggml_backend_sycl_print_sycl_devices() {
|
void ggml_backend_sycl_print_sycl_devices() {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_print_sycl_devices\n");
|
||||||
int device_count = dpct::dev_mgr::instance().device_count();
|
int device_count = dpct::dev_mgr::instance().device_count();
|
||||||
std::map<std::string, size_t> DeviceNums;
|
std::map<std::string, size_t> DeviceNums;
|
||||||
GGML_LOG_INFO("Found %d SYCL devices:\n", device_count);
|
GGML_LOG_INFO("Found %d SYCL devices:\n", device_count);
|
||||||
@ -146,27 +145,11 @@ void ggml_backend_sycl_print_sycl_devices() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int get_sycl_env(const char *env_name, int default_val) {
|
|
||||||
char *user_device_string = getenv(env_name);
|
|
||||||
int user_number = default_val;
|
|
||||||
|
|
||||||
unsigned n;
|
|
||||||
if (user_device_string != NULL &&
|
|
||||||
sscanf(user_device_string, " %u", &n) == 1) {
|
|
||||||
user_number = (int)n;
|
|
||||||
} else {
|
|
||||||
user_number = default_val;
|
|
||||||
}
|
|
||||||
return user_number;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void ggml_check_sycl() try {
|
static void ggml_check_sycl() try {
|
||||||
static bool initialized = false;
|
static bool initialized = false;
|
||||||
|
|
||||||
if (!initialized) {
|
if (!initialized) {
|
||||||
GGML_LOG_INFO("[SYCL] call ggml_check_sycl\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_check_sycl\n");
|
||||||
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
|
||||||
GGML_LOG_INFO("%s: GGML_SYCL_DEBUG: %d\n", __func__, g_ggml_sycl_debug);
|
|
||||||
|
|
||||||
#if defined(GGML_SYCL_F16)
|
#if defined(GGML_SYCL_F16)
|
||||||
GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
|
GGML_LOG_INFO("%s: GGML_SYCL_F16: yes\n", __func__);
|
||||||
@ -221,7 +204,7 @@ inline void check_allow_gpu_index(const int device_index) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
GGML_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len) try {
|
GGML_API void ggml_backend_sycl_get_gpu_list(int *id_list, int max_len) try {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_gpu_list\n");
|
||||||
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
for(int i=0;i<max_len;i++) id_list[i] = -1;
|
||||||
|
|
||||||
for (int i=0;i< ggml_sycl_info().device_count;i++){
|
for (int i=0;i< ggml_sycl_info().device_count;i++){
|
||||||
@ -532,12 +515,12 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
|||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
||||||
|
|
||||||
auto dev_count = ggml_backend_sycl_get_device_count();
|
auto dev_count = ggml_backend_sycl_get_device_count();
|
||||||
|
|
||||||
if (device>=dev_count or device<0) {
|
if (device>=dev_count or device<0) {
|
||||||
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
||||||
device, dev_count-1);
|
device, dev_count-1);
|
||||||
GGML_ASSERT(device<dev_count);
|
GGML_ASSERT(device<dev_count);
|
||||||
}
|
}
|
||||||
@ -561,11 +544,11 @@ ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_context * ctx) {
|
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(ggml_backend_sycl_context * ctx) {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_buffer_type\n");
|
||||||
|
|
||||||
int device = ctx->device;
|
int device = ctx->device;
|
||||||
if (device>=ggml_sycl_info().device_count or device<0) {
|
if (device>=ggml_sycl_info().device_count or device<0) {
|
||||||
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
GGML_LOG_ERROR("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
||||||
device, ggml_sycl_info().device_count-1);
|
device, ggml_sycl_info().device_count-1);
|
||||||
GGML_ASSERT(device<ggml_sycl_info().device_count);
|
GGML_ASSERT(device<ggml_sycl_info().device_count);
|
||||||
}
|
}
|
||||||
@ -990,7 +973,7 @@ ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * ten
|
|||||||
static std::mutex mutex;
|
static std::mutex mutex;
|
||||||
std::lock_guard<std::mutex> lock(mutex);
|
std::lock_guard<std::mutex> lock(mutex);
|
||||||
|
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_split_buffer_type\n");
|
||||||
ggml_check_sycl();
|
ggml_check_sycl();
|
||||||
// FIXME: this is not thread safe
|
// FIXME: this is not thread safe
|
||||||
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
|
||||||
@ -1055,7 +1038,7 @@ static ggml_backend_buffer_t ggml_backend_sycl_host_buffer_type_alloc_buffer(ggm
|
|||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_host_buffer_type\n");
|
||||||
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
|
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_type_host = {
|
||||||
/* .iface = */ {
|
/* .iface = */ {
|
||||||
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
|
/* .get_name = */ ggml_backend_sycl_host_buffer_type_name,
|
||||||
@ -1156,7 +1139,7 @@ struct ggml_sycl_pool_leg : public ggml_sycl_pool {
|
|||||||
(uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
|
(uint32_t)(max_size/1024/1024), (uint32_t)(g_sycl_pool_size[id]/1024/1024), (uint32_t)(size/1024/1024));
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// GGML_SYCL_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
|
GGML_LOG_DEBUG("ggml_sycl_pool_malloc_leg look_ahead_size=%lu, return %p\n", look_ahead_size, ptr);
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2348,41 +2331,31 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|||||||
char * src_ptr;
|
char * src_ptr;
|
||||||
if (ggml_backend_buffer_is_host(src->buffer)) {
|
if (ggml_backend_buffer_is_host(src->buffer)) {
|
||||||
kind = dpct::host_to_device;
|
kind = dpct::host_to_device;
|
||||||
|
GGML_LOG_DEBUG("%s: Host buffer type src tensor: %p\n", __func__, src_ptr);
|
||||||
src_ptr = (char *) src->data;
|
src_ptr = (char *) src->data;
|
||||||
// GGML_SYCL_DEBUG("ggml_sycl_cpy_tensor_2d GGML_BACKEND_TYPE_CPU src_ptr %p\n", src_ptr);
|
|
||||||
} else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) {
|
} else if (ggml_backend_buffer_is_sycl(src->buffer) || ggml_backend_buffer_is_sycl_split(src->buffer)) {
|
||||||
if (!ggml_backend_buffer_is_sycl_split(src->buffer)){
|
if (!ggml_backend_buffer_is_sycl_split(src->buffer)){
|
||||||
// If buffer is not a SYCL split buffer
|
// If buffer is not single GPU SYCL buffer
|
||||||
/*
|
GGML_LOG_DEBUG("%s: SYCL buffer type src tensor: %p\n", __func__, src->data);
|
||||||
What memcpy_direction kind we need here?
|
kind = dpct::device_to_device;
|
||||||
Refer: dpct/helper.hpp:
|
|
||||||
enum memcpy_direction
|
|
||||||
{
|
|
||||||
host_to_host,
|
|
||||||
host_to_device,
|
|
||||||
device_to_host,
|
|
||||||
device_to_device,
|
|
||||||
automatic
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
kind = dpct::device_to_device;
|
|
||||||
src_ptr = (char *) src->data;
|
src_ptr = (char *) src->data;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/*
|
/*
|
||||||
If buffer is a SYCL split buffer
|
If buffer is a SYCL split buffer
|
||||||
*/
|
*/
|
||||||
GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]);
|
GGML_LOG_DEBUG("%s: Split buffer type src tensor\n", __func__);
|
||||||
kind = dpct::device_to_device;
|
GGML_ASSERT(i1_low == 0 && i1_high == src->ne[1]);
|
||||||
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
kind = dpct::device_to_device;
|
||||||
int id;
|
ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) src->extra;
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(
|
int id;
|
||||||
id = get_current_device_id()));
|
SYCL_CHECK(CHECK_TRY_ERROR(
|
||||||
// GGML_SYCL_DEBUG("current device index %d\n", id);
|
id = get_current_device_id()));
|
||||||
src_ptr = (char *) extra->data_device[id];
|
GGML_LOG_DEBUG("current device index %d\n", id);
|
||||||
|
src_ptr = (char *) extra->data_device[id];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// GGML_SYCL_DEBUG("GGML_ABORT("fatal error")\n");
|
GGML_LOG_DEBUG("%s: GGML_ABORT(\"fatal error\")\n", __func__);
|
||||||
GGML_ABORT("fatal error");
|
GGML_ABORT("fatal error");
|
||||||
}
|
}
|
||||||
char * dst_ptr = (char *) dst;
|
char * dst_ptr = (char *) dst;
|
||||||
@ -2396,7 +2369,7 @@ static dpct::err0 ggml_sycl_cpy_tensor_2d(void *dst,
|
|||||||
|
|
||||||
const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3;
|
const char * x = src_ptr + i1_low*nb1 + i2*nb2 + i3*nb3;
|
||||||
if (nb0 == ts && nb1 == ts*ne0/bs) {
|
if (nb0 == ts && nb1 == ts*ne0/bs) {
|
||||||
// GGML_SYCL_DEBUG("stream->memcpy: dst_ptr=%p, x=%p, size=%lu\n", dst_ptr, x, i1_diff * nb1);
|
GGML_LOG_DEBUG("stream->memcpy: dst_ptr=%p, x=%p, size=%lu\n", dst_ptr, x, i1_diff * nb1);
|
||||||
// return CHECK_TRY_ERROR(stream->memcpy(dst_ptr, x, i1_diff * nb1));
|
// return CHECK_TRY_ERROR(stream->memcpy(dst_ptr, x, i1_diff * nb1));
|
||||||
return CHECK_TRY_ERROR(dpct::async_dpct_memcpy(dst_ptr, x, i1_diff * nb1,
|
return CHECK_TRY_ERROR(dpct::async_dpct_memcpy(dst_ptr, x, i1_diff * nb1,
|
||||||
kind, *stream));
|
kind, *stream));
|
||||||
@ -2526,7 +2499,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|||||||
use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] &&
|
use_fp16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1] &&
|
||||||
dst->op_params[0] == GGML_PREC_DEFAULT) {
|
dst->op_params[0] == GGML_PREC_DEFAULT) {
|
||||||
|
|
||||||
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
|
GGML_LOG_DEBUG("ggml_sycl_op_mul_mat_sycl - fp16 path\n");
|
||||||
ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
|
ggml_sycl_pool_alloc<sycl::half> src0_as_f16(ctx.pool());
|
||||||
if (src0->type != GGML_TYPE_F16) {
|
if (src0->type != GGML_TYPE_F16) {
|
||||||
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type);
|
const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src0->type);
|
||||||
@ -2573,7 +2546,7 @@ inline void ggml_sycl_op_mul_mat_sycl(
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// GGML_SYCL_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n");
|
GGML_LOG_DEBUG("ggml_sycl_op_mul_mat_sycl - fp32 path\n");
|
||||||
ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
|
ggml_sycl_pool_alloc<float> src0_ddq_as_f32(ctx.pool());
|
||||||
ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
|
ggml_sycl_pool_alloc<float> src1_ddq_as_f32(ctx.pool());
|
||||||
if (src0->type != GGML_TYPE_F32) {
|
if (src0->type != GGML_TYPE_F32) {
|
||||||
@ -3184,33 +3157,33 @@ catch (sycl::exception const &exc) {
|
|||||||
|
|
||||||
|
|
||||||
static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_repeat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_repeat);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_get_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_get_rows);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_norm);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_rms_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rms_norm);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_group_norm(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_SYCL_DEBUG("call %s\n", __func__);
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_group_norm);
|
||||||
GGML_SYCL_DEBUG("call %s done\n", __func__);
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
static void ggml_sycl_mul_mat_vec_p021(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||||
@ -3448,6 +3421,7 @@ bool ggml_sycl_supports_dmmv(enum ggml_type type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("[SYCL]: call %s\n", __func__);
|
||||||
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
|
const bool split = ggml_backend_buffer_is_sycl_split(src0->buffer);
|
||||||
int64_t min_compute_capability = INT_MAX;
|
int64_t min_compute_capability = INT_MAX;
|
||||||
|
|
||||||
@ -3587,6 +3561,7 @@ __dpct_inline__ static void k_copy_dst_from_contiguous(
|
|||||||
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, const ggml_tensor *src0,
|
||||||
const ggml_tensor *src1,
|
const ggml_tensor *src1,
|
||||||
ggml_tensor *dst) try {
|
ggml_tensor *dst) try {
|
||||||
|
GGML_LOG_DEBUG("SYCL call %s\n", __func__);
|
||||||
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
|
GGML_ASSERT(!ggml_backend_buffer_is_sycl_split(src0->buffer) && "mul_mat_id does not support split buffers");
|
||||||
|
|
||||||
const ggml_tensor *ids = dst->src[2];
|
const ggml_tensor *ids = dst->src[2];
|
||||||
@ -3753,11 +3728,15 @@ catch (sycl::exception const &exc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_scale(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_scale);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_clamp(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_clamp);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1,
|
||||||
@ -3810,52 +3789,74 @@ catch (sycl::exception const &exc) {
|
|||||||
|
|
||||||
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_dup(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
// TODO: why do we pass dst as src1 here?
|
// TODO: why do we pass dst as src1 here?
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_cpy(ctx, src0, dst, nullptr);
|
ggml_sycl_cpy(ctx, src0, dst, nullptr);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
GGML_UNUSED(src1);
|
GGML_UNUSED(src1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_diag_mask_inf(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_diag_mask_inf);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_soft_max(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_soft_max);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_rope(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented
|
GGML_ASSERT(ggml_is_contiguous(src0)); // TODO: this restriction is temporary until non-cont support is implemented
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_rope);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_pool2d);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_im2col);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_sum(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_sum_rows(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_sum_rows);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_argsort(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argsort);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_argmax(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(src0));
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax);
|
ggml_sycl_op_flatten(ctx, src0, src1, dst, ggml_sycl_op_argmax);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
static void ggml_sycl_nop(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||||
|
GGML_LOG_DEBUG("call %s\n", __func__);
|
||||||
|
GGML_LOG_DEBUG("call %s done\n", __func__);
|
||||||
GGML_UNUSED(src0);
|
GGML_UNUSED(src0);
|
||||||
GGML_UNUSED(src1);
|
GGML_UNUSED(src1);
|
||||||
GGML_UNUSED(dst);
|
GGML_UNUSED(dst);
|
||||||
@ -3869,13 +3870,11 @@ void ggml_sycl_set_main_device(const int main_device) try {
|
|||||||
check_allow_gpu_index(main_device);
|
check_allow_gpu_index(main_device);
|
||||||
dpct::select_device(main_device);
|
dpct::select_device(main_device);
|
||||||
|
|
||||||
if (g_ggml_sycl_debug) {
|
dpct::device_info prop;
|
||||||
dpct::device_info prop;
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
prop, dpct::dev_mgr::instance().get_device(main_device))));
|
||||||
prop, dpct::dev_mgr::instance().get_device(main_device))));
|
GGML_LOG_DEBUG("Using device %d (%s) as main device\n",
|
||||||
GGML_LOG_INFO("Using device %d (%s) as main device\n",
|
main_device, prop.get_name());
|
||||||
main_device, prop.get_name());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
catch (sycl::exception const &exc) {
|
catch (sycl::exception const &exc) {
|
||||||
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||||
@ -4073,7 +4072,7 @@ bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct ggml_tens
|
|||||||
|
|
||||||
GGML_API void ggml_backend_sycl_get_device_description(int device, char *description,
|
GGML_API void ggml_backend_sycl_get_device_description(int device, char *description,
|
||||||
size_t description_size) try {
|
size_t description_size) try {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_description\n");
|
||||||
dpct::device_info prop;
|
dpct::device_info prop;
|
||||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||||
prop, dpct::dev_mgr::instance().get_device(device))));
|
prop, dpct::dev_mgr::instance().get_device(device))));
|
||||||
@ -4087,7 +4086,7 @@ catch (sycl::exception const &exc) {
|
|||||||
|
|
||||||
void ggml_backend_sycl_get_device_memory(int device, size_t *free,
|
void ggml_backend_sycl_get_device_memory(int device, size_t *free,
|
||||||
size_t *total) try {
|
size_t *total) try {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_memory\n");
|
||||||
ggml_sycl_set_device(device);
|
ggml_sycl_set_device(device);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4289,7 +4288,7 @@ bool ggml_backend_is_sycl(ggml_backend_t backend) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int ggml_backend_sycl_get_device_count() {
|
int ggml_backend_sycl_get_device_count() {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_get_device_count\n");
|
||||||
return ggml_sycl_info().device_count;
|
return ggml_sycl_info().device_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4646,17 +4645,14 @@ static ggml_backend_dev_t ggml_backend_sycl_reg_get_device(ggml_backend_reg_t re
|
|||||||
}
|
}
|
||||||
|
|
||||||
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
|
static void *ggml_backend_sycl_reg_get_proc_address(ggml_backend_reg_t reg, const char *name) {
|
||||||
GGML_UNUSED(reg);
|
if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
||||||
|
return (void *)ggml_backend_sycl_split_buffer_type;
|
||||||
// TODO: update to the current function signature
|
}
|
||||||
//if (strcmp(name, "ggml_backend_split_buffer_type") == 0) {
|
|
||||||
// return (void *)ggml_backend_sycl_split_buffer_type;
|
|
||||||
//}
|
|
||||||
|
|
||||||
// SYCL doesn't support registering host memory, left here for reference
|
// SYCL doesn't support registering host memory, left here for reference
|
||||||
// "ggml_backend_register_host_buffer"
|
// "ggml_backend_register_host_buffer"
|
||||||
// "ggml_backend_unregister_host_buffer"
|
// "ggml_backend_unregister_host_buffer"
|
||||||
GGML_UNUSED(name);
|
GGML_UNUSED(reg);
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -4715,7 +4711,7 @@ ggml_backend_reg_t ggml_backend_sycl_reg() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
ggml_backend_t ggml_backend_sycl_init(int device) {
|
ggml_backend_t ggml_backend_sycl_init(int device) {
|
||||||
GGML_SYCL_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
|
GGML_LOG_DEBUG("[SYCL] call ggml_backend_sycl_init\n");
|
||||||
ggml_check_sycl();
|
ggml_check_sycl();
|
||||||
|
|
||||||
check_allow_gpu_index(device);
|
check_allow_gpu_index(device);
|
||||||
|
Loading…
Reference in New Issue
Block a user