diff --git a/ggml-cuda.cu b/ggml-cuda.cu index af10f21a0..c6bc3f64c 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -2740,7 +2740,7 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_TANH: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml-cuda/unary.cu b/ggml-cuda/unary.cu index ac03d5c6f..a5ff96320 100644 --- a/ggml-cuda/unary.cu +++ b/ggml-cuda/unary.cu @@ -148,6 +148,8 @@ void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -160,6 +162,8 @@ void ggml_cuda_op_silu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -172,6 +176,8 @@ void ggml_cuda_op_gelu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst) float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -184,6 +190,8 @@ void ggml_cuda_op_tanh(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -196,6 +204,8 @@ void ggml_cuda_op_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -208,6 +218,8 @@ void ggml_cuda_op_sigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -220,6 +232,8 @@ void ggml_cuda_op_hardsigmoid(ggml_backend_cuda_context & ctx, ggml_tensor * dst float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -232,6 +246,8 @@ void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst) float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -244,6 +260,8 @@ void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -259,6 +277,8 @@ void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { float * dst_d = (float *)dst->data; cudaStream_t stream = ctx.stream(); + GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp index 5592741be..18c6f4a10 100644 --- a/ggml-kompute.cpp +++ b/ggml-kompute.cpp @@ -1340,7 +1340,7 @@ static bool ggml_vk_supports_op(const struct ggml_tensor * op) { case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_SILU: - return true; + return ggml_is_contiguous(op->src[0]); default: ; } diff --git a/ggml-metal.m b/ggml-metal.m index 946f11813..b5c287347 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -744,7 +744,7 @@ static bool ggml_metal_supports_op(const struct ggml_metal_context * ctx, const case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_SILU: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index 42fc0df20..e7d260bd4 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -17190,7 +17190,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons case GGML_UNARY_OP_HARDSWISH: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_TANH: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml-vulkan.cpp b/ggml-vulkan.cpp index 06ba23313..5b9280491 100644 --- a/ggml-vulkan.cpp +++ b/ggml-vulkan.cpp @@ -6439,7 +6439,7 @@ GGML_CALL static bool ggml_backend_vk_supports_op(ggml_backend_t backend, const case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_RELU: - return true; + return ggml_is_contiguous(op->src[0]); default: return false; } diff --git a/ggml.c b/ggml.c index 5fb9e9a32..2ea1d7677 100644 --- a/ggml.c +++ b/ggml.c @@ -7345,6 +7345,8 @@ static struct ggml_tensor * ggml_unary_impl( struct ggml_tensor * a, enum ggml_unary_op op, bool inplace) { + GGML_ASSERT(ggml_is_contiguous_1(a)); + bool is_node = false; if (!inplace && (a->grad)) { @@ -11009,6 +11011,8 @@ static void ggml_compute_forward_abs_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11018,9 +11022,6 @@ static void ggml_compute_forward_abs_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_abs_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11055,6 +11056,8 @@ static void ggml_compute_forward_sgn_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11064,9 +11067,6 @@ static void ggml_compute_forward_sgn_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_sgn_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11101,6 +11101,8 @@ static void ggml_compute_forward_neg_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11110,9 +11112,6 @@ static void ggml_compute_forward_neg_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_neg_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11147,6 +11146,8 @@ static void ggml_compute_forward_step_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11156,9 +11157,6 @@ static void ggml_compute_forward_step_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_step_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11193,6 +11191,8 @@ static void ggml_compute_forward_tanh_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11202,9 +11202,6 @@ static void ggml_compute_forward_tanh_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_tanh_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11239,6 +11236,8 @@ static void ggml_compute_forward_elu_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11248,9 +11247,6 @@ static void ggml_compute_forward_elu_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_elu_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11285,6 +11281,8 @@ static void ggml_compute_forward_relu_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11294,9 +11292,6 @@ static void ggml_compute_forward_relu_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_relu_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11331,6 +11326,8 @@ static void ggml_compute_forward_sigmoid_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11340,9 +11337,6 @@ static void ggml_compute_forward_sigmoid_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_sigmoid_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11376,9 +11370,9 @@ static void ggml_compute_forward_gelu_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11439,9 +11433,9 @@ static void ggml_compute_forward_gelu_quick_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11502,9 +11496,9 @@ static void ggml_compute_forward_silu_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11565,6 +11559,8 @@ static void ggml_compute_forward_leaky_relu_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11614,11 +11610,11 @@ static void ggml_compute_forward_silu_back_f32( const struct ggml_tensor * src0 = dst->src[0]; const struct ggml_tensor * grad = dst->src[1]; - GGML_ASSERT(ggml_is_contiguous_1(grad)); - GGML_ASSERT(ggml_is_contiguous_1(src0)); - GGML_ASSERT(ggml_is_contiguous_1(dst)); - GGML_ASSERT(ggml_are_same_shape(src0, dst)); - GGML_ASSERT(ggml_are_same_shape(src0, grad)); + assert(ggml_is_contiguous_1(grad)); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); + assert(ggml_are_same_shape(src0, grad)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -11680,6 +11676,8 @@ static void ggml_compute_forward_hardswish_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11689,9 +11687,6 @@ static void ggml_compute_forward_hardswish_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_hardswish_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -11723,6 +11718,8 @@ static void ggml_compute_forward_hardsigmoid_f32( const struct ggml_tensor * src0 = dst->src[0]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -11732,9 +11729,6 @@ static void ggml_compute_forward_hardsigmoid_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert(dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { ggml_vec_hardsigmoid_f32(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -16681,7 +16675,10 @@ static void ggml_compute_forward_map_unary_f32( const struct ggml_tensor * src0 = dst->src[0]; - GGML_ASSERT(ggml_are_same_shape(src0, dst)); + assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(dst)); + assert(ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { return; @@ -16690,9 +16687,6 @@ static void ggml_compute_forward_map_unary_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert( dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { fun(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), @@ -16730,6 +16724,9 @@ static void ggml_compute_forward_map_binary_f32( const struct ggml_tensor * src1 = dst->src[1]; assert(params->ith == 0); + assert(ggml_is_contiguous_1(src0)); + assert(ggml_is_contiguous_1(src1)); + assert(ggml_is_contiguous_1(dst)); assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) { @@ -16739,10 +16736,6 @@ static void ggml_compute_forward_map_binary_f32( const int n = ggml_nrows(src0); const int nc = src0->ne[0]; - assert( dst->nb[0] == sizeof(float)); - assert(src0->nb[0] == sizeof(float)); - assert(src1->nb[0] == sizeof(float)); - for (int i = 0; i < n; i++) { fun(nc, (float *) ((char *) dst->data + i*( dst->nb[1])), diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index ce406a8af..2b48e623e 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -642,20 +642,29 @@ struct test_case { struct test_unary : public test_case { const ggml_unary_op op; const ggml_type type; - const std::array ne; + const std::array ne_a; + int v; // view (1 : non-contiguous a) std::string vars() override { - return VARS_TO_STR2(type, ne); + return VARS_TO_STR3(type, ne_a, v); } test_unary(ggml_unary_op op, ggml_type type = GGML_TYPE_F32, - std::array ne = {128, 10, 10, 10}) - : op(op), type(type), ne(ne) {} + std::array ne_a = {128, 10, 10, 10}, + int v = 0) + : op(op), type(type), ne_a(ne_a), v(v) {} ggml_tensor * build_graph(ggml_context * ctx) override { - ggml_tensor * in = ggml_new_tensor(ctx, type, 4, ne.data()); - ggml_tensor * out = ggml_unary(ctx, in, op); + ggml_tensor * a; + if (v & 1) { + auto ne = ne_a; ne[0] *= 3; + a = ggml_new_tensor(ctx, type, 4, ne.data()); + a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0); + } else { + a = ggml_new_tensor(ctx, type, 4, ne_a.data()); + } + ggml_tensor * out = ggml_unary(ctx, a, op); return out; } @@ -2016,9 +2025,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op }; // unary ops - for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) { - test_cases.emplace_back(new test_unary((ggml_unary_op) op)); - test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 7, 13, 19, 23 })); + for (int v : {0, 1}) { + for (int op = 0; op < GGML_UNARY_OP_COUNT; op++) { + test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 128, 10, 10, 10 }, v)); + test_cases.emplace_back(new test_unary((ggml_unary_op) op, GGML_TYPE_F32, { 7, 13, 19, 23 }, v)); + } } test_cases.emplace_back(new test_get_rows(GGML_TYPE_F32, 1, 8, 2, 1, false));