Skip to content

Commit 1eafda1

Browse files
committed
Merge branch 'develop' of https://github.com/OpenMathLib/OpenBLAS into develop
2 parents dd7a1d6 + e939c6c commit 1eafda1

File tree

11 files changed

+357
-212
lines changed

11 files changed

+357
-212
lines changed

common_level3.h

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,19 @@ void sgemm_direct_alpha_beta(BLASLONG M, BLASLONG N, BLASLONG K,
5959
float beta,
6060
float * R, BLASLONG strideR);
6161

62+
void ssymm_direct_alpha_betaLU(BLASLONG M, BLASLONG N,
63+
float alpha,
64+
float * A, BLASLONG strideA,
65+
float * B, BLASLONG strideB,
66+
float beta,
67+
float * R, BLASLONG strideR);
68+
void ssymm_direct_alpha_betaLL(BLASLONG M, BLASLONG N,
69+
float alpha,
70+
float * A, BLASLONG strideA,
71+
float * B, BLASLONG strideB,
72+
float beta,
73+
float * R, BLASLONG strideR);
74+
6275
int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
6376

6477
int shgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,

common_param.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,8 @@ int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
257257
#ifdef ARCH_ARM64
258258
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
259259
void (*sgemm_direct_alpha_beta) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG);
260+
void (*ssymm_direct_alpha_betaLU) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG);
261+
void (*ssymm_direct_alpha_betaLL) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float, float * , BLASLONG);
260262
#endif
261263

262264

common_s.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@
5050
#define SGEMM_DIRECT_PERFORMANT sgemm_direct_performant
5151
#define SGEMM_DIRECT sgemm_direct
5252
#define SGEMM_DIRECT_ALPHA_BETA sgemm_direct_alpha_beta
53+
#define SSYMM_DIRECT_ALPHA_BETA_LU ssymm_direct_alpha_betaLU
54+
#define SSYMM_DIRECT_ALPHA_BETA_LL ssymm_direct_alpha_betaLL
5355

5456
#define SGEMM_ONCOPY sgemm_oncopy
5557
#define SGEMM_OTCOPY sgemm_otcopy
@@ -220,6 +222,8 @@
220222
#define SGEMM_DIRECT_PERFORMANT sgemm_direct_performant
221223
#define SGEMM_DIRECT gotoblas -> sgemm_direct
222224
#define SGEMM_DIRECT_ALPHA_BETA gotoblas -> sgemm_direct_alpha_beta
225+
#define SSYMM_DIRECT_ALPHA_BETA_LU gotoblas -> ssymm_direct_alpha_betaLU
226+
#define SSYMM_DIRECT_ALPHA_BETA_LL gotoblas -> ssymm_direct_alpha_betaLL
223227
#endif
224228

225229
#define SGEMM_ONCOPY gotoblas -> sgemm_oncopy

f_check

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,11 @@ nofortran=0
3030
shift 2
3131
compiler="$*"
3232
compiler_bin="$1"
33+
shift
34+
compiler_args="$*"
3335

3436
# f77 is too ambiguous
35-
[ "$compiler" = "f77" ] && compiler=''
37+
[ "$compiler_bin" = "f77" ] && compiler=''
3638

3739
path=`split "$PATH" ':'`
3840

@@ -50,7 +52,7 @@ if [ -z "$compiler" ]; then
5052
for list in $lists; do
5153
for p in $path; do
5254
if [ -x "$p/$list" ]; then
53-
compiler=$list
55+
compiler="$list $compiler_args"
5456
compiler_bin=$list
5557
break 2
5658
fi

interface/symm.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,24 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
371371
return;
372372
}
373373

374+
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && !defined(HFLOAT16)
375+
#if defined(ARCH_ARM64) && (defined(USE_SSYMM_KERNEL_DIRECT)||defined(DYNAMIC_ARCH))
376+
#if defined(DYNAMIC_ARCH)
377+
if (support_sme1())
378+
#endif
379+
if (args.m == 0 || args.n == 0) return;
380+
if (order == CblasRowMajor && m == lda && n == ldb && n == ldc)
381+
{
382+
if (Side == CblasLeft && Uplo == CblasUpper) {
383+
SSYMM_DIRECT_ALPHA_BETA_LU(m, n, alpha, a, lda, b, ldb, beta, c, ldc); return;
384+
}
385+
else if (Side == CblasLeft && Uplo == CblasLower) {
386+
SSYMM_DIRECT_ALPHA_BETA_LL(m, n, alpha, a, lda, b, ldb, beta, c, ldc); return;
387+
}
388+
}
389+
#endif
390+
#endif
391+
374392
#endif
375393

376394
if (args.m == 0 || args.n == 0) return;

kernel/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -241,6 +241,10 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
241241
if (X86_64 OR ARM64)
242242
set(USE_DIRECT_SGEMM true)
243243
endif()
244+
set(USE_DIRECT_SSYMM false)
245+
if (ARM64)
246+
set(USE_DIRECT_SSYMM true)
247+
endif()
244248
if (UC_TARGET_CORE MATCHES ARMV9SME)
245249
set (HAVE_SME true)
246250
endif ()
@@ -267,6 +271,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
267271
endif ()
268272
endif()
269273

274+
if (USE_DIRECT_SSYMM)
275+
if (ARM64)
276+
set (SSYMMDIRECTKERNEL_ALPHA_BETA ssymm_direct_alpha_beta_arm64_sme1.c)
277+
GenerateNamedObjects("${KERNELDIR}/${SSYMMDIRECTKERNEL_ALPHA_BETA}" "" "symm_direct_alpha_betaLU" false "" "" false SINGLE)
278+
GenerateNamedObjects("${KERNELDIR}/${SSYMMDIRECTKERNEL_ALPHA_BETA}" "" "symm_direct_alpha_betaLL" false "" "" false SINGLE)
279+
endif ()
280+
endif()
281+
270282
foreach (float_type SINGLE DOUBLE)
271283
string(SUBSTRING ${float_type} 0 1 float_char)
272284
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})

kernel/Makefile.L3

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ endif
5252
ifeq ($(ARCH), arm64)
5353
USE_TRMM = 1
5454
USE_DIRECT_SGEMM = 1
55+
USE_DIRECT_SSYMM = 1
5556
endif
5657

5758
ifeq ($(ARCH), riscv64)
@@ -137,6 +138,17 @@ endif
137138
endif
138139
endif
139140

141+
ifdef USE_DIRECT_SSYMM
142+
ifndef SSYMMDIRECTKERNEL_ALPHA_BETA
143+
ifeq ($(ARCH), arm64)
144+
ifeq ($(TARGET_CORE), ARMV9SME)
145+
HAVE_SME = 1
146+
endif
147+
SSYMMDIRECTKERNEL_ALPHA_BETA = ssymm_direct_alpha_beta_arm64_sme1.c
148+
endif
149+
endif
150+
endif
151+
140152
ifeq ($(BUILD_BFLOAT16), 1)
141153
ifndef BGEMMKERNEL
142154
BGEMM_BETA = ../generic/gemm_beta.c
@@ -220,6 +232,14 @@ endif
220232
endif
221233
endif
222234

235+
ifdef USE_DIRECT_SSYMM
236+
ifeq ($(ARCH), arm64)
237+
SKERNELOBJS += \
238+
ssymm_direct_alpha_betaLU$(TSUFFIX).$(SUFFIX) \
239+
ssymm_direct_alpha_betaLL$(TSUFFIX).$(SUFFIX)
240+
endif
241+
endif
242+
223243
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
224244
DKERNELOBJS += \
225245
dgemm_beta$(TSUFFIX).$(SUFFIX) \
@@ -982,6 +1002,15 @@ endif
9821002
endif
9831003
endif
9841004

1005+
ifdef USE_DIRECT_SSYMM
1006+
ifeq ($(ARCH), arm64)
1007+
$(KDIR)ssymm_direct_alpha_betaLU$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMDIRECTKERNEL_ALPHA_BETA)
1008+
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DLEFT -DUPPER $< -o $@
1009+
$(KDIR)ssymm_direct_alpha_betaLL$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMDIRECTKERNEL_ALPHA_BETA)
1010+
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX -DLEFT -DLOWER $< -o $@
1011+
endif
1012+
endif
1013+
9851014
ifeq ($(BUILD_BFLOAT16), 1)
9861015
$(KDIR)bgemm_kernel$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(BGEMMKERNEL)
9871016
$(CC) $(CFLAGS) -c -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX $< -o $@

0 commit comments

Comments
 (0)