@@ -51,11 +51,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
5151
5252#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
5353#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
54+ #define FLOAT_V_M1_T JOIN(vfloat, ELEN, m1, _t, _)
5455#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
5556#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
5657#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
5758#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)
5859#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _)
60+ #define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
61+ #define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
62+
63+ #ifdef RISCV_0p10_INTRINSICS
64+ #define VFREDSUMVS_FLOAT (va , vb , gvl ) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
65+ #else
66+ #define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
67+ #endif
5968
6069int CNAME (BLASLONG n , BLASLONG dummy0 , BLASLONG dummy1 , FLOAT da , FLOAT * x , BLASLONG inc_x , FLOAT * y , BLASLONG inc_y , FLOAT * dummy , BLASLONG dummy2 )
6170{
@@ -123,7 +132,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
123132 VSEV_FLOAT (& y [j ], vy0 , gvl );
124133 j += gvl ;
125134 }
126- } else if ( inc_x == 1 ) {
135+ } else if ( 1 == inc_x && 0 != inc_y ) {
127136 stride_y = inc_y * sizeof (FLOAT );
128137 gvl = VSETVL (n );
129138 if (gvl <= n /2 ){
@@ -151,6 +160,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
151160 VSSEV_FLOAT (& y [j * inc_y ], stride_y , vy0 , gvl );
152161 j += gvl ;
153162 }
163+ } else if ( 0 == inc_y ) {
164+ BLASLONG stride_x = inc_x * sizeof (FLOAT );
165+ size_t in_vl = VSETVL (n );
166+ vy0 = VFMVVF_FLOAT ( y [0 ], in_vl );
167+
168+ for (size_t vl ; n > 0 ; n -= vl , x += vl * inc_x ) {
169+ vl = VSETVL (n );
170+ vx0 = VLSEV_FLOAT (x , stride_x , vl );
171+ vy0 = VFMACCVF_FLOAT (vy0 , da , vx0 , vl );
172+ }
173+ FLOAT_V_M1_T v_res = VFMVVF_FLOAT_M1 ( 0.0f , 1 );
174+ v_res = VFREDSUMVS_FLOAT ( vy0 , v_res , in_vl );
175+ y [0 ] = EXTRACT_FLOAT (v_res );
154176 }else {
155177 stride_x = inc_x * sizeof (FLOAT );
156178 stride_y = inc_y * sizeof (FLOAT );
0 commit comments