Skip to content

Commit ba143f3

Browse files
committed
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
2 parents 1eafda1 + de43ccc commit ba143f3

32 files changed

+427
-101
lines changed

.cirrus.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ task:
127127
FreeBSD_task:
128128
name: FreeBSD-gcc
129129
freebsd_instance:
130-
image_family: freebsd-14-2
130+
image_family: freebsd-14-3
131131
install_script:
132132
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
133133
compile_script:
@@ -138,7 +138,7 @@ FreeBSD_task:
138138
FreeBSD_task:
139139
name: freebsd-gcc-ilp64
140140
freebsd_instance:
141-
image_family: freebsd-14-2
141+
image_family: freebsd-14-3
142142
install_script:
143143
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
144144
compile_script:
@@ -148,7 +148,7 @@ FreeBSD_task:
148148
FreeBSD_task:
149149
name: FreeBSD-clang-openmp
150150
freebsd_instance:
151-
image_family: freebsd-14-2
151+
image_family: freebsd-14-3
152152
install_script:
153153
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
154154
- ln -s /usr/local/lib/gcc13/libgfortran.so.5.0.0 /usr/lib/libgfortran.so

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ test/SBLAT3_3M.SUMM
8080
test/ZBLAT2.SUMM
8181
test/ZBLAT3.SUMM
8282
test/ZBLAT3_3M.SUMM
83+
test/SHBLAT2.SUMM
8384
test/SHBLAT3.SUMM
8485
test/SBBLAT2.SUMM
8586
test/SBBLAT3.SUMM
@@ -98,6 +99,7 @@ test/sblat2
9899
test/sblat3
99100
test/sblat3_3m
100101
test/test_shgemm
102+
test/test_shgemv
101103
test/test_sbgemm
102104
test/test_sbgemv
103105
test/test_bgemm

cmake/kernel.cmake

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,10 @@ if (BUILD_BFLOAT16)
175175
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
176176
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
177177
endif ()
178+
if (BUILD_HFLOAT16)
179+
SetFallback(SHGEMVNKERNEL ../generic/gemv_n.c)
180+
SetFallback(SHGEMVTKERNEL ../generic/gemv_t.c)
181+
endif ()
178182
endmacro ()
179183

180184
macro(SetDefaultL2)
@@ -226,6 +230,8 @@ macro(SetDefaultL2)
226230
if (BUILD_BFLOAT16)
227231
SetFallback(BGEMVNKERNEL ../generic/gemv_n.c)
228232
SetFallback(BGEMVTKERNEL ../generic/gemv_t.c)
233+
SetFallback(SHGEMVNKERNEL ../generic/gemv_n.c)
234+
SetFallback(SHGEMVTKERNEL ../generic/gemv_t.c)
229235
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
230236
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
231237
SetFallback(SHGERKERNEL ../generic/ger.c)
@@ -260,5 +266,16 @@ if (BUILD_BFLOAT16)
260266
SetFallback(SBGEMMONCOPYOBJ sbgemm_oncopy.o)
261267
SetFallback(SBGEMMOTCOPYOBJ sbgemm_otcopy.o)
262268
endif ()
263-
269+
if (BUILD_HFLOAT16)
270+
SetFallback(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
271+
SetFallback(SHGEMM_BETA ../generic/gemm_beta.c)
272+
SetFallback(SHGEMMINCOPY ../generic/gemm_ncopy_2.c)
273+
SetFallback(SHGEMMITCOPY ../generic/gemm_tcopy_2.c)
274+
SetFallback(SHGEMMONCOPY ../generic/gemm_ncopy_2.c)
275+
SetFallback(SHGEMMOTCOPY ../generic/gemm_tcopy_2.c)
276+
SetFallback(SHGEMMINCOPYOBJ shgemm_incopy.o)
277+
SetFallback(SHGEMMITCOPYOBJ shgemm_itcopy.o)
278+
SetFallback(SHGEMMONCOPYOBJ shgemm_oncopy.o)
279+
SetFallback(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
280+
endif ()
264281
endmacro ()

cmake/utils.cmake

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,12 @@ function(GenerateNamedObjects sources_in)
375375
if (NOT no_float_type)
376376
string(SUBSTRING ${float_type} 0 1 float_char)
377377
string(TOLOWER ${float_char} float_char)
378-
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM")
379-
set (float_char "sb")
380-
endif ()
378+
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM")
379+
set (float_char "sb")
380+
endif ()
381+
if (${float_type} STREQUAL "HFLOAT16" AND NOT "${defines_in}" MATCHES "HGEM")
382+
set (float_char "sh")
383+
endif ()
381384
endif ()
382385

383386
if (NOT name_in)

common_interface.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,8 @@ void BLASFUNC(bgemv)(char *, blasint *, blasint *, bfloat16 *, bfloat16 *, blas
261261
bfloat16 *, blasint *, bfloat16 *, bfloat16 *, blasint *);
262262
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
263263
bfloat16 *, blasint *, float *, float *, blasint *);
264+
void BLASFUNC(shgemv)(char *, blasint *, blasint *, float *, hfloat16 *, blasint *,
265+
hfloat16 *, blasint *, float *, float *, blasint *);
264266
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
265267
float *, blasint *, float *, float *, blasint *);
266268
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,

common_level2.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,10 @@ int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLO
5454
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
5555
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
5656
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
57+
int shgemv_n(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG);
58+
int shgemv_t(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG);
59+
int shgemv_thread_n(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG, int);
60+
int shgemv_thread_t(BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG, int);
5761
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
5862
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
5963
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);

common_macro.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,9 @@
703703
#define GEMM_THREAD_RC SHGEMM_THREAD_NT
704704
#define GEMM_THREAD_RR SHGEMM_THREAD_NN
705705

706+
#define SCAL_K SSCAL_K
707+
#define GEMV_N SHGEMV_N_K
708+
#define GEMV_T SHGEMV_T_K
706709

707710
#elif defined(BFLOAT16) && defined(BGEMM)
708711
#define SCAL_K BSCAL_K

common_param.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,8 @@ int (*shgemm_itcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
6060
int (*shgemm_oncopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
6161
int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
6262

63-
63+
int (*shgemv_n) (BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG);
64+
int (*shgemv_t) (BLASLONG, BLASLONG, float, hfloat16 *, BLASLONG, hfloat16 *, BLASLONG, float, float *, BLASLONG);
6465
#endif
6566

6667

common_sh.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
/***************************************************************************
2+
* Copyright (c) 2025, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
129
#ifndef COMMON_SH_H
230
#define COMMON_SH_H
331

@@ -17,6 +45,9 @@
1745
#define SHGEMM_BETA shgemm_beta
1846
#define SHGEMM_KERNEL shgemm_kernel
1947

48+
#define SHGEMV_N_K shgemv_n
49+
#define SHGEMV_T_K shgemv_t
50+
2051

2152
#else // #DYNAMIC_ARCH
2253

@@ -32,6 +63,10 @@
3263

3364
#define SHGEMM_BETA gotoblas -> shgemm_beta
3465
#define SHGEMM_KERNEL gotoblas -> shgemm_kernel
66+
67+
#define SHGEMV_N_K gotoblas->shgemv_n
68+
#define SHGEMV_T_K gotoblas->shgemv_t
69+
3570
#endif // #DYNAMIC_ARCH
3671

3772
#define SHGEMM_NN shgemm_nn

driver/level2/Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,12 @@ XBLASOBJS += \
450450
xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \
451451
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)
452452

453+
454+
ifeq ($(BUILD_HFLOAT16),1)
455+
SHBLASOBJS += \
456+
shgemv_thread_n$(TSUFFIX).$(SUFFIX) \
457+
shgemv_thread_t$(TSUFFIX).$(SUFFIX)
458+
endif
453459
ifeq ($(BUILD_BFLOAT16),1)
454460
BBLASOBJS += \
455461
bgemv_thread_n$(TSUFFIX).$(SUFFIX) \
@@ -3737,6 +3743,13 @@ xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
37373743
xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
37383744
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
37393745

3746+
ifeq ($(BUILD_HFLOAT16),1)
3747+
shgemv_thread_n.$(SUFFIX) shgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
3748+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
3749+
shgemv_thread_t.$(SUFFIX) shgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
3750+
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
3751+
endif
3752+
37403753
ifeq ($(BUILD_BFLOAT16),1)
37413754
bgemv_thread_n.$(SUFFIX) bgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
37423755
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)

0 commit comments

Comments
 (0)