@@ -69,13 +69,12 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
6969 FLOAT * a2_ptr = a + lda * width * 2 ;
7070
7171 for (j = 0 ; j < width ; j ++ ) {
72- for (i = 0 ; (i + sve_size - 1 ) < m ; i += sve_size ) {
73- ix = j * inc_x ;
74-
75- SV_TYPE x0_vec = SV_DUP (alpha * x [ix + (inc_x * width * 0 )]);
76- SV_TYPE x1_vec = SV_DUP (alpha * x [ix + (inc_x * width * 1 )]);
77- SV_TYPE x2_vec = SV_DUP (alpha * x [ix + (inc_x * width * 2 )]);
72+ ix = j * inc_x ;
7873
74+ SV_TYPE x0_vec = SV_DUP (alpha * x [ix + (inc_x * width * 0 )]);
75+ SV_TYPE x1_vec = SV_DUP (alpha * x [ix + (inc_x * width * 1 )]);
76+ SV_TYPE x2_vec = SV_DUP (alpha * x [ix + (inc_x * width * 2 )]);
77+ for (i = 0 ; (i + sve_size - 1 ) < m ; i += sve_size ) {
7978 SV_TYPE a00_vec = svld1 (pg_true , a0_ptr + i );
8079 SV_TYPE a01_vec = svld1 (pg_true , a1_ptr + i );
8180 SV_TYPE a02_vec = svld1 (pg_true , a2_ptr + i );
@@ -89,10 +88,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
8988 }
9089
9190 if (i < m ) {
92- SV_TYPE x0_vec = SV_DUP (alpha * x [ix + (inc_x * width * 0 )]);
93- SV_TYPE x1_vec = SV_DUP (alpha * x [ix + (inc_x * width * 1 )]);
94- SV_TYPE x2_vec = SV_DUP (alpha * x [ix + (inc_x * width * 2 )]);
95-
9691 SV_TYPE a00_vec = svld1 (pg , a0_ptr + i );
9792 SV_TYPE a01_vec = svld1 (pg , a1_ptr + i );
9893 SV_TYPE a02_vec = svld1 (pg , a2_ptr + i );
@@ -115,17 +110,16 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
115110 a_ptr = a2_ptr ;
116111 for (j = width * 3 ; j < n ; j ++ ) {
117112 ix = j * inc_x ;
113+ SV_TYPE x_vec = SV_DUP (alpha * x [(ix )]);
118114 for (i = 0 ; (i + sve_size - 1 ) < m ; i += sve_size ) {
119115 SV_TYPE y_vec = svld1 (pg_true , y + i );
120- SV_TYPE x_vec = SV_DUP (alpha * x [(ix )]);
121116 SV_TYPE a_vec = svld1 (pg_true , a_ptr + i );
122117 y_vec = svmla_x (pg_true , y_vec , a_vec , x_vec );
123118 svst1 (pg_true , y + i , y_vec );
124119 }
125120
126121 if (i < m ) {
127122 SV_TYPE y_vec = svld1 (pg , y + i );
128- SV_TYPE x_vec = SV_DUP (alpha * x [(ix )]);
129123 SV_TYPE a_vec = svld1 (pg , a_ptr + i );
130124 y_vec = svmla_m (pg , y_vec , a_vec , x_vec );
131125 svst1 (pg , y + i , y_vec );
0 commit comments