From c513c5ca5f1b7b8e85fbf58396bc11967a48e558 Mon Sep 17 00:00:00 2001 From: shani-f Date: Sun, 9 Nov 2025 19:42:22 +0200 Subject: [PATCH 1/5] SYCL: add full ABS operator support --- ggml/src/ggml-sycl/element_wise.cpp | 52 ++++++++++++++++++++++++++--- ggml/src/ggml-sycl/ggml-sycl.cpp | 27 ++++++++++++--- 2 files changed, 70 insertions(+), 9 deletions(-) diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index aeeb387595017..efc7ae409b0e9 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -158,9 +158,30 @@ static void unary_op_sgn_kernel(const T * x, T * dst, const int k, const sycl::n } template -static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { +static void unary_op_abs_kernel( + const T * x, + T * dst, + const int k, + const int64_t ne0, const int64_t ne1, const int64_t ne2, const int64_t ne3, + const size_t nb0, const size_t nb1, const size_t nb2, const size_t nb3, + const size_t nbd0, const size_t nbd1, const size_t nbd2, const size_t nbd3, + const sycl::nd_item<1> & item_ct1) { + + (void) ne3; + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { - dst[i] = op_abs(x[i]); + const int64_t i0 = i % ne0; + const int64_t i1 = (i / ne0) % ne1; + const int64_t i2 = (i / (ne0*ne1)) % ne2; + const int64_t i3 = i / (ne0*ne1*ne2); + + const char * src_base = (const char *) x; + char * dst_base = (char *) dst; + + const T * srcp = (const T *)(src_base + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3); + T * dstp = (T *)(const_cast(dst_base) + i0*nbd0 + i1*nbd1 + i2*nbd2 + i3*nbd3); + + *dstp = op_abs(*srcp); } } @@ -583,14 +604,37 @@ static inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor } static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_tensor * src0 = dst->src[0]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb0 = src0->nb[0]; + const size_t nb1 = src0->nb[1]; + const size_t nb2 = src0->nb[2]; + const size_t nb3 = src0->nb[3]; + + const size_t nbd0 = dst->nb[0]; + const size_t nbd1 = dst->nb[1]; + const size_t nbd2 = dst->nb[2]; + const size_t nbd3 = dst->nb[3]; + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, - [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + [=](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { const int num_blocks = ceil_div(k_elements, 256); stream->parallel_for( sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), sycl::range<1>(256)), [=](sycl::nd_item<1> item_ct1) { - unary_op_abs_kernel(src, dst_ptr, k_elements, item_ct1); + unary_op_abs_kernel( + src, dst_ptr, k_elements, + ne0, ne1, ne2, ne3, + nb0, nb1, nb2, nb3, + nbd0, nbd1, nbd2, nbd3, + item_ct1 + ); }); }); } diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index f3407a813d731..50c507cb07b73 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4253,16 +4253,33 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_SGN: - case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_ELU: -#if defined (GGML_SYCL_F16) + case GGML_UNARY_OP_TRUNC: + #if defined(GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); -#else - return ggml_is_contiguous(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) && (op->type == op->src[0]->type); -#endif + #else + return ggml_is_contiguous(op->src[0]) && + (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) && + (op->type == op->src[0]->type); + #endif + + case GGML_UNARY_OP_ABS: + { + ggml_type src0_type = op->src[0]->type; + #if defined(GGML_SYCL_F16) + return (op->type == src0_type) && + (src0_type == GGML_TYPE_F32 || src0_type == GGML_TYPE_F16); + #else + return (op->type == src0_type) && + (src0_type == GGML_TYPE_F32); + #endif + } + default: return false; } + break; + case GGML_OP_GLU: switch (ggml_get_glu_op(op)) { case GGML_GLU_OP_REGLU: From 0f3f565f631322198c1c54dc5f90add77e3c4021 Mon Sep 17 00:00:00 2001 From: shani-f Date: Sun, 9 Nov 2025 20:40:01 +0200 Subject: [PATCH 2/5] docs: add ABS to supported SYCL unary ops --- docs/ops.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/ops.md b/docs/ops.md index 775b938bd12b9..7c1d4233857c8 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -14,7 +14,7 @@ Legend: | Operation | BLAS | CANN | CPU | CUDA | Metal | OpenCL | SYCL | Vulkan | zDNN | |-----------|------|------|------|------|------|------|------|------|------| -| ABS | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | +| ABS | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | ❌ | ❌ | | ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | From 8a33154b7a8a34d2e2263437ba07c92216884550 Mon Sep 17 00:00:00 2001 From: shani-f Date: Sun, 9 Nov 2025 21:28:47 +0200 Subject: [PATCH 3/5] docs: update SYCL.csv for ABS v1 support --- docs/ops/SYCL.csv | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index d7e71990a88cc..c77d2eb88ccb0 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -1,6 +1,6 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" -"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -39,8 +39,8 @@ "SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" -"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" +"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" @@ -71,8 +71,8 @@ "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","XIELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" "SYCL0","XIELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" -"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -111,8 +111,8 @@ "SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" -"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" +"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" From 7a06f5fc9664da95b4da91af702f5335bd35bff6 Mon Sep 17 00:00:00 2001 From: shani-f Date: Mon, 10 Nov 2025 01:10:08 +0200 Subject: [PATCH 4/5] Remove redundant void cast in element_wise.cpp --- ggml/src/ggml-sycl/element_wise.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index adc8fae1df8bc..a2d8c55ea24d1 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -187,7 +187,7 @@ static void unary_op_abs_kernel( const size_t nbd0, const size_t nbd1, const size_t nbd2, const size_t nbd3, const sycl::nd_item<1> & item_ct1) { - (void) ne3; + (void) ne3; SYCL_GLOBAL_ID_LOOP(k, item_ct1) { const int64_t i0 = i % ne0; @@ -695,7 +695,7 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor const size_t nbd1 = dst->nb[1]; const size_t nbd2 = dst->nb[2]; const size_t nbd3 = dst->nb[3]; - + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [=](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { const int num_blocks = ceil_div(k_elements, 256); From fda5c31d11202e7c2874825960991af0535ab620 Mon Sep 17 00:00:00 2001 From: shani-f Date: Mon, 10 Nov 2025 01:25:50 +0200 Subject: [PATCH 5/5] Simplify unary operation dispatch in element_wise.cpp --- ggml/src/ggml-sycl/element_wise.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index a2d8c55ea24d1..e5b95d5c5599e 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -695,8 +695,7 @@ static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor const size_t nbd1 = dst->nb[1]; const size_t nbd2 = dst->nb[2]; const size_t nbd3 = dst->nb[3]; - - ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, +ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, [=](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { const int num_blocks = ceil_div(k_elements, 256); stream->parallel_for(