Skip to content

Commit ac1604b

Browse files
committed
Merge remote-tracking branch 'refs/remotes/origin/develop' into develop
2 parents ba143f3 + c3ce473 commit ac1604b

File tree

6 files changed

+293
-19
lines changed

6 files changed

+293
-19
lines changed

interface/gemm.c

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -587,8 +587,16 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
587587
args.m, args.n, args.k, args.lda, args.ldb, args.ldc);
588588
#endif
589589

590-
#define BFLOAT16_GEMM_GEMV_FORWARD (!defined(BFLOAT16) || (!defined(BGEMM) && defined(SBGEMM_GEMV_FORWARD)) || (defined(BGEMM) && defined(BGEMM_GEMV_FORWARD)))
591-
#define HFLOAT16_GEMM_GEMV_FORWARD (!defined(HFLOAT16) || (!defined(HGEMM) && defined(SHGEMM_GEMV_FORWARD)) || (defined(HGEMM) && defined(HGEMM_GEMV_FORWARD)))
590+
#if (!defined(BFLOAT16) || (!defined(BGEMM) && defined(SBGEMM_GEMV_FORWARD)) || (defined(BGEMM) && defined(BGEMM_GEMV_FORWARD)))
591+
#define BFLOAT16_GEMM_GEMV_FORWARD 1
592+
#else
593+
#define BFLOAT16_GEMM_GEMV_FORWARD 0
594+
#endif
595+
#if (!defined(HFLOAT16) || (!defined(HGEMM) && defined(SHGEMM_GEMV_FORWARD)) || (defined(HGEMM) && defined(HGEMM_GEMV_FORWARD)))
596+
#define HFLOAT16_GEMM_GEMV_FORWARD 1
597+
#else
598+
#define HFLOAT16_GEMM_GEMV_FORWARD 0
599+
#endif
592600

593601
#if defined(GEMM_GEMV_FORWARD) && !defined(GEMM3M) && !defined(COMPLEX) && HFLOAT16_GEMM_GEMV_FORWARD && BFLOAT16_GEMM_GEMV_FORWARD
594602
#if defined(ARCH_ARM64)

kernel/power/dgemv_n_microk_power10.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -466,7 +466,7 @@ static void dgemv_kernel_4x8 (long n, double *ap, long lda, double *x, double *y
466466
"=b" (tmp)
467467
:
468468
"m" (*(double (*)[4]) x),
469-
"m" (*(double (*)[]) ap),
469+
"m" (*(double (*)[4]) ap),
470470
"d" (alpha), // 14
471471
"r" (x), // 15
472472
"3" (ap), // 16

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,8 @@ SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
283283
ifndef SBGEMM_BETA
284284
SBGEMM_BETA = gemm_beta_rvv.c
285285
endif
286+
SBGEMVNKERNEL = sbgemv_n_vector.c
287+
SBGEMVTKERNEL = sbgemv_t_vector.c
286288
endif
287289

288290
SAXPBYKERNEL = axpby_vector_v2.c

kernel/riscv64/sbgemv_n_vector.c

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
/***************************************************************************
2+
Copyright (c) 2020, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#define FLOAT_V_T vfloat32m8_t
31+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
32+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
33+
#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8)
34+
#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8)
35+
#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m8)
36+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
37+
38+
#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n)
39+
40+
#if defined(HFLOAT16)
41+
#define IFLOAT_V_T vfloat16m4_t
42+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4)
43+
#define VFMACCVF_FLOAT RISCV_RVV(vfwmacc_vf_f32m8)
44+
#else
45+
#define IFLOAT_V_T vbfloat16m4_t
46+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4)
47+
#define VFMACCVF_FLOAT RISCV_RVV(vfwmaccbf16_vf_f32m8)
48+
#endif
49+
50+
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
51+
{
52+
if (n < 0) return(0);
53+
54+
IFLOAT *a_ptr, temp;
55+
FLOAT *y_ptr;
56+
BLASLONG i, j, vl;
57+
IFLOAT_V_T va;
58+
FLOAT_V_T vy;
59+
60+
y_ptr = y;
61+
if (inc_y == 1) {
62+
if (beta == 0.0) {
63+
for (i = m; i > 0; i -= vl) {
64+
vl = VSETVL(i);
65+
vy = VFMVVF_FLOAT(0.0, vl);
66+
VSEV_FLOAT(y_ptr, vy, vl);
67+
y_ptr += vl;
68+
}
69+
} else if (beta != 1.0) {
70+
for (i = m; i > 0; i -= vl) {
71+
vl = VSETVL(i);
72+
vy = VLEV_FLOAT(y_ptr, vl);
73+
vy = VFMULVF_FLOAT(vy, beta, vl);
74+
VSEV_FLOAT(y_ptr, vy, vl);
75+
y_ptr += vl;
76+
}
77+
}
78+
for (j = 0; j < n; j++) {
79+
temp = (IFLOAT)(alpha * (FLOAT)(x[0]));
80+
y_ptr = y;
81+
a_ptr = a;
82+
for (i = m; i > 0; i -= vl) {
83+
vl = VSETVL(i);
84+
vy = VLEV_FLOAT(y_ptr, vl);
85+
va = VLEV_IFLOAT(a_ptr, vl);
86+
vy = VFMACCVF_FLOAT(vy, temp, va, vl);
87+
VSEV_FLOAT(y_ptr, vy, vl);
88+
y_ptr += vl;
89+
a_ptr += vl;
90+
}
91+
x += inc_x;
92+
a += lda;
93+
}
94+
} else {
95+
BLASLONG stride_y = inc_y * sizeof(FLOAT);
96+
if (beta == 0.0) {
97+
for (i = m; i > 0; i -= vl) {
98+
vl = VSETVL(i);
99+
vy = VFMVVF_FLOAT(0.0, vl);
100+
VSSEV_FLOAT(y_ptr, stride_y, vy, vl);
101+
y_ptr += vl * inc_y;
102+
}
103+
} else if (beta != 1.0) {
104+
for (i = m; i > 0; i -= vl) {
105+
vl = VSETVL(i);
106+
vy = VLSEV_FLOAT(y_ptr, stride_y, vl);
107+
vy = VFMULVF_FLOAT(vy, beta, vl);
108+
VSSEV_FLOAT(y_ptr, stride_y, vy, vl);
109+
y_ptr += vl * inc_y;
110+
}
111+
}
112+
for (j = 0; j < n; j++) {
113+
temp = (IFLOAT)(alpha * (FLOAT)(x[0]));
114+
y_ptr = y;
115+
a_ptr = a;
116+
for (i = m; i > 0; i -= vl) {
117+
vl = VSETVL(i);
118+
vy = VLSEV_FLOAT(y_ptr, stride_y, vl);
119+
va = VLEV_IFLOAT(a_ptr, vl);
120+
vy = VFMACCVF_FLOAT(vy, temp, va, vl);
121+
VSSEV_FLOAT(y_ptr, stride_y, vy, vl);
122+
y_ptr += vl * inc_y;
123+
a_ptr += vl;
124+
}
125+
x += inc_x;
126+
a += lda;
127+
}
128+
}
129+
return(0);
130+
}

kernel/riscv64/sbgemv_t_vector.c

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
30+
#define FLOAT_V_T vfloat32m8_t
31+
#define FLOAT_V_T_M1 vfloat32m1_t
32+
#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8)
33+
#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8)
34+
35+
#define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n)
36+
37+
#if defined(HFLOAT16)
38+
#define IFLOAT_V_T vfloat16m4_t
39+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_f16m4)
40+
#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_f16m4)
41+
#define VFMACCVV_FLOAT(a,b,c,d) RISCV_RVV(vfwmul_vv_f32m8)(b,c,d)
42+
#else
43+
#define IFLOAT_V_T vbfloat16m4_t
44+
#define VLEV_IFLOAT RISCV_RVV(vle16_v_bf16m4)
45+
#define VLSEV_IFLOAT RISCV_RVV(vlse16_v_bf16m4)
46+
#define VFMACCVV_FLOAT RISCV_RVV(vfwmaccbf16_vv_f32m8)
47+
#endif
48+
49+
#ifdef RISCV_0p10_INTRINSICS
50+
#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m8_f32m1(v_res, va, vb, gvl)
51+
#else
52+
#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m8_f32m1)
53+
#endif
54+
#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8)
55+
#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1)
56+
57+
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
58+
{
59+
BLASLONG i = 0, j = 0, k = 0;
60+
BLASLONG ix = 0, iy = 0;
61+
IFLOAT *a_ptr = a;
62+
FLOAT temp;
63+
64+
IFLOAT_V_T va, vx;
65+
#if !defined(HFLOAT16)
66+
FLOAT_V_T vz;
67+
#endif
68+
FLOAT_V_T vr;
69+
BLASLONG gvl = 0;
70+
FLOAT_V_T_M1 v_res;
71+
72+
if (inc_x == 1) {
73+
for (i = 0; i < n; i++) {
74+
v_res = VFMVVF_FLOAT_M1(0, 1);
75+
gvl = VSETVL(m);
76+
j = 0;
77+
#if !defined(HFLOAT16)
78+
vz = VFMVVF_FLOAT(0, gvl);
79+
#endif
80+
for (k = 0; k < m/gvl; k++) {
81+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
82+
vx = VLEV_IFLOAT(&x[j], gvl);
83+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl); // could vfmacc here and reduce outside loop
84+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl); // but that reordering diverges far enough from scalar path to make tests fail
85+
j += gvl;
86+
}
87+
if (j < m) {
88+
gvl = VSETVL(m-j);
89+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
90+
vx = VLEV_IFLOAT(&x[j], gvl);
91+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
92+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
93+
}
94+
temp = (FLOAT)EXTRACT_FLOAT(v_res);
95+
y[iy] = y[iy] * beta + alpha * temp;
96+
97+
iy += inc_y;
98+
a_ptr += lda;
99+
}
100+
} else {
101+
BLASLONG stride_x = inc_x * sizeof(IFLOAT);
102+
for (i = 0; i < n; i++) {
103+
v_res = VFMVVF_FLOAT_M1(0, 1);
104+
gvl = VSETVL(m);
105+
j = 0;
106+
ix = 0;
107+
#if !defined(HFLOAT16)
108+
vz = VFMVVF_FLOAT(0, gvl);
109+
#endif
110+
for (k = 0; k < m/gvl; k++) {
111+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
112+
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
113+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
114+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
115+
j += gvl;
116+
ix += inc_x * gvl;
117+
}
118+
if (j < m) {
119+
gvl = VSETVL(m-j);
120+
va = VLEV_IFLOAT(&a_ptr[j], gvl);
121+
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
122+
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
123+
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
124+
}
125+
temp = (FLOAT)EXTRACT_FLOAT(v_res);
126+
y[iy] = y[iy] * beta + alpha * temp;
127+
128+
iy += inc_y;
129+
a_ptr += lda;
130+
}
131+
}
132+
133+
return (0);
134+
}

0 commit comments

Comments
 (0)