Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions filelist.json
Original file line number Diff line number Diff line change
Expand Up @@ -1807,6 +1807,7 @@
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sme1_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp",
Expand Down
1 change: 1 addition & 0 deletions src/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ filegroup(
"core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp",
"core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp",
"core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp",
"core/NEON/kernels/arm_gemm/kernels/sme1_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp",
"core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp",
"core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp",
"core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp",
Expand Down
3 changes: 2 additions & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ target_sources(
core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_1VLx4VL/generic.cpp
core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_2VLx2VL/generic.cpp
core/NEON/kernels/arm_gemm/kernels/sme2_interleaved_nomerge_u8q_mopa_4VLx1VL/generic.cpp
core/NEON/kernels/arm_gemm/kernels/sme1_interleaved_nomerge_fp32_mopa_2VLx2VL/generic.cpp
core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_bf16fp32_mmla_6x4VL/generic.cpp
core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/a64fx.cpp
core/NEON/kernels/arm_gemm/kernels/sve_ffhybrid_fp16_mla_6x4VL/generic.cpp
Expand Down Expand Up @@ -1081,4 +1082,4 @@ target_sources(
cpu/kernels/select/generic/neon/fp16.cpp
cpu/kernels/softmax/generic/neon/fp16.cpp
cpu/kernels/sub/neon/fp16.cpp
)
)
15 changes: 15 additions & 0 deletions src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,11 @@
#include "kernels/sve_ffinterleaved_fp32_mla_8x3VL.hpp"
#include "kernels/sve_ffinterleaved_bf16fp32_mmla_8x3VL.hpp"
#endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS

#ifdef ARM_COMPUTE_ENABLE_SME
#include "kernels/sme1_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp"
#endif // ARM_COMPUTE_ENABLE_SME

#ifdef ARM_COMPUTE_ENABLE_SME2
#include "kernels/sme2_gemv_fp32_mla_16VL.hpp"
#include "kernels/sme2_gemv_fp32bf16fp32_dot_16VL.hpp"
Expand Down Expand Up @@ -118,6 +123,15 @@ GemmImplementation<float, float, float>::with_estimate(
[](const GemmArgs &args) { return new GemmHybridIndirect<cls_a64_hybrid_fp32bf16fp32_mmla_4x24, float, float, float>(args); }
),
#endif // ARM_COMPUTE_ENABLE_BF16
#ifdef ARM_COMPUTE_ENABLE_SME
{
GemmMethod::GEMM_INTERLEAVED,
"sme1_interleaved_nomerge_fp32_mopa_2VLx2VL",
[](const GemmArgs &args) { return args._ci->has_sme() && !args._accumulate; },
nullptr,
[](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme1_interleaved_nomerge_fp32_mopa_2VLx2VL, float, float>(args); }
},
#endif // ARM_COMPUTE_ENABLE_SME
#ifdef ARM_COMPUTE_ENABLE_SVE
#ifdef ARM_COMPUTE_ENABLE_SME2
// SME kernels
Expand Down Expand Up @@ -187,6 +201,7 @@ GemmImplementation<float, float, float>::with_estimate(
nullptr,
[](const GemmArgs &args) { return new GemmInterleavedNoMerge<cls_sme2_interleaved_nomerge_fp32_mopa_2VLx2VL, float, float>(args); }
},

#endif // ARM_COMPUTE_ENABLE_SME2
#ifdef ARM_COMPUTE_ENABLE_BF16
GemmImplementation<float, float, float>::with_estimate(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
/*
* Copyright (c) 2022-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
// + Changes from Qualcomm Technologies, Inc. are provided under the following license:
// + Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries.
// + SPDX-License-Identifier: MIT
//


#pragma once

#ifdef ARM_COMPUTE_ENABLE_SME


#include "../std_transforms_sme.hpp"

namespace arm_gemm
{

// Implementations
void sme1_interleaved_nomerge_fp32_mopa_2VLx2VL(const float *const A, const float *const B, float *const C, int ldc, const int M, const int N, const int K, const float *const bias, const Activation act, bool accumulate, float *const accumulator_buffer);

class cls_sme1_interleaved_nomerge_fp32_mopa_2VLx2VL
{
public:
typedef float lhs_operand_type;
typedef float rhs_operand_type;
typedef float result_type;

typedef void (*kern_type)(const float *const A, const float *const B, float *const C, int ldc, const int M, const int N, const int K, const float *const bias, const Activation act, bool accumulate, float *const accumulator_buffer);

/* Kernel blocking parameters */
static unsigned int out_height()
{
return sme::get_vector_length<float>() * 2;
}

static unsigned int out_width()
{
return sme::get_vector_length<float>() * 2;
}

static constexpr unsigned int k_unroll()
{
return 1;
}

static constexpr bool supports_accumulate()
{
return true;
}

static constexpr bool supports_bias()
{
return true;
}

static constexpr bool supports_activation()
{
return true;
}

static constexpr bool is_sme()
{
return true;
}

// Default to the generic kernel
kern_type kernel = sme1_interleaved_nomerge_fp32_mopa_2VLx2VL;

StdTransformsSME<lhs_operand_type, result_type, 2, 2, 1> transforms = {};

cls_sme1_interleaved_nomerge_fp32_mopa_2VLx2VL(const CPUInfo *)
{
}
};

} // namespace arm_gemm

#endif // ARM_COMPUTE_ENABLE_SME
Loading