diff --git a/docs/ops.md b/docs/ops.md index 775b938bd12b9..7c1d4233857c8 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -14,7 +14,7 @@ Legend: | Operation | BLAS | CANN | CPU | CUDA | Metal | OpenCL | SYCL | Vulkan | zDNN | |-----------|------|------|------|------|------|------|------|------|------| -| ABS | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | 🟡 | ❌ | ❌ | +| ABS | ❌ | ✅ | ✅ | 🟡 | 🟡 | ❌ | ✅ | ❌ | ❌ | | ACC | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | | ADD | ❌ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | | ADD1 | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index d7e71990a88cc..c77d2eb88ccb0 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -1,6 +1,6 @@ "backend_name","op_name","op_params","test_mode","supported","error_message","backend_reg_name" -"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -39,8 +39,8 @@ "SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" -"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" +"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" "SYCL0","SGN","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","NEG","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" @@ -71,8 +71,8 @@ "SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","XIELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" "SYCL0","XIELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" -"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" +"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" @@ -111,8 +111,8 @@ "SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL" "SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL" -"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" -"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" +"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL" +"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL" "SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" "SYCL0","SGN","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL" "SYCL0","NEG","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL" diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 810995d0cbf74..e5b95d5c5599e 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -178,9 +178,30 @@ static void unary_op_sgn_kernel(const T * x, T * dst, const int k, const sycl::n } template -static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { +static void unary_op_abs_kernel( + const T * x, + T * dst, + const int k, + const int64_t ne0, const int64_t ne1, const int64_t ne2, const int64_t ne3, + const size_t nb0, const size_t nb1, const size_t nb2, const size_t nb3, + const size_t nbd0, const size_t nbd1, const size_t nbd2, const size_t nbd3, + const sycl::nd_item<1> & item_ct1) { + + (void) ne3; + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { - dst[i] = op_abs(x[i]); + const int64_t i0 = i % ne0; + const int64_t i1 = (i / ne0) % ne1; + const int64_t i2 = (i / (ne0*ne1)) % ne2; + const int64_t i3 = i / (ne0*ne1*ne2); + + const char * src_base = (const char *) x; + char * dst_base = (char *) dst; + + const T * srcp = (const T *)(src_base + i0*nb0 + i1*nb1 + i2*nb2 + i3*nb3); + T * dstp = (T *)(const_cast(dst_base) + i0*nbd0 + i1*nbd1 + i2*nbd2 + i3*nbd3); + + *dstp = op_abs(*srcp); } } @@ -658,14 +679,36 @@ static inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor } static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, - [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + ggml_tensor * src0 = dst->src[0]; + + const int64_t ne0 = dst->ne[0]; + const int64_t ne1 = dst->ne[1]; + const int64_t ne2 = dst->ne[2]; + const int64_t ne3 = dst->ne[3]; + + const size_t nb0 = src0->nb[0]; + const size_t nb1 = src0->nb[1]; + const size_t nb2 = src0->nb[2]; + const size_t nb3 = src0->nb[3]; + + const size_t nbd0 = dst->nb[0]; + const size_t nbd1 = dst->nb[1]; + const size_t nbd2 = dst->nb[2]; + const size_t nbd3 = dst->nb[3]; +ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [=](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { const int num_blocks = ceil_div(k_elements, 256); stream->parallel_for( sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), sycl::range<1>(256)), [=](sycl::nd_item<1> item_ct1) { - unary_op_abs_kernel(src, dst_ptr, k_elements, item_ct1); + unary_op_abs_kernel( + src, dst_ptr, k_elements, + ne0, ne1, ne2, ne3, + nb0, nb1, nb2, nb3, + nbd0, nbd1, nbd2, nbd3, + item_ct1 + ); }); }); } diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index f3b3e365740b4..25ee05e330cdc 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -4372,20 +4372,36 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_EXP: case GGML_UNARY_OP_SGN: - case GGML_UNARY_OP_ABS: case GGML_UNARY_OP_ELU: case GGML_UNARY_OP_FLOOR: case GGML_UNARY_OP_CEIL: case GGML_UNARY_OP_ROUND: case GGML_UNARY_OP_TRUNC: -#if defined (GGML_SYCL_F16) + #if defined(GGML_SYCL_F16) return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type); -#else - return ggml_is_contiguous(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) && (op->type == op->src[0]->type); -#endif + #else + return ggml_is_contiguous(op->src[0]) && + (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) && + (op->type == op->src[0]->type); + #endif + + case GGML_UNARY_OP_ABS: + { + ggml_type src0_type = op->src[0]->type; + #if defined(GGML_SYCL_F16) + return (op->type == src0_type) && + (src0_type == GGML_TYPE_F32 || src0_type == GGML_TYPE_F16); + #else + return (op->type == src0_type) && + (src0_type == GGML_TYPE_F32); + #endif + } + default: return false; } + break; + case GGML_OP_GLU: switch (ggml_get_glu_op(op)) { case GGML_GLU_OP_REGLU: