1+ /***************************************************************************
2+ Copyright (c) 2013, The OpenBLAS Project
3+ All rights reserved.
4+ Redistribution and use in source and binary forms, with or without
5+ modification, are permitted provided that the following conditions are
6+ met:
7+ 1. Redistributions of source code must retain the above copyright
8+ notice, this list of conditions and the following disclaimer.
9+ 2. Redistributions in binary form must reproduce the above copyright
10+ notice, this list of conditions and the following disclaimer in
11+ the documentation and/or other materials provided with the
12+ distribution.
13+ 3. Neither the name of the OpenBLAS project nor the names of
14+ its contributors may be used to endorse or promote products
15+ derived from this software without specific prior written permission.
16+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+ ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+ *****************************************************************************/
27+
28+ #include "common.h"
29+ #include <stdio.h>
30+
31+ #if !defined(DOUBLE )
32+ #define VSETVL_MAX __riscv_vsetvlmax_e32m8()
33+ #define VSETVL (n ) __riscv_vsetvl_e32m8(n)
34+ #define FLOAT_V_T vfloat32m8_t
35+ #define VLEV_FLOAT __riscv_vle32_v_f32m8
36+ #define VSEV_FLOAT __riscv_vse32_v_f32m8
37+ #define VLSEV_FLOAT __riscv_vlse32_v_f32m8
38+ #define VSSEV_FLOAT __riscv_vsse32_v_f32m8
39+ #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
40+ #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
41+ #else
42+ #define VSETVL_MAX __riscv_vsetvlmax_e64m8()
43+ #define VSETVL (n ) __riscv_vsetvl_e64m8(n)
44+ #define FLOAT_V_T vfloat64m8_t
45+ #define VLEV_FLOAT __riscv_vle64_v_f64m8
46+ #define VSEV_FLOAT __riscv_vse64_v_f64m8
47+ #define VLSEV_FLOAT __riscv_vlse64_v_f64m8
48+ #define VSSEV_FLOAT __riscv_vsse64_v_f64m8
49+ #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
50+ #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
51+ #endif
52+
53+ /*****************************************************
54+ * Order ColMajor
55+ * Trans with RVV optimization
56+ *
57+ ******************************************************/
58+
59+ int CNAME (BLASLONG rows , BLASLONG cols , FLOAT alpha , FLOAT * a , BLASLONG lda , FLOAT * b , BLASLONG ldb )
60+ {
61+ BLASLONG i , j ;
62+ FLOAT * aptr , * bptr ;
63+ size_t vl ;
64+ FLOAT_V_T va , vb ;
65+
66+ if (rows <= 0 ) return (0 );
67+ if (cols <= 0 ) return (0 );
68+
69+ aptr = a ;
70+
71+ if (alpha == 0.0 )
72+ {
73+ vl = VSETVL_MAX ;
74+ va = VFMVVF_FLOAT (0 , vl );
75+ for (i = 0 ; i < cols ; i ++ )
76+ {
77+ bptr = & b [i ];
78+ for (j = 0 ; j < rows ; j += vl )
79+ {
80+ vl = VSETVL (rows - j );
81+ VSSEV_FLOAT (bptr + j * ldb , sizeof (FLOAT ) * ldb , va , vl );
82+ }
83+ }
84+ return (0 );
85+ }
86+
87+ if (alpha == 1.0 )
88+ {
89+ for (i = 0 ; i < cols ; i ++ )
90+ {
91+ bptr = & b [i ];
92+ for (j = 0 ; j < rows ; j += vl )
93+ {
94+ vl = VSETVL (rows - j );
95+ va = VLEV_FLOAT (aptr + j , vl );
96+ VSSEV_FLOAT (bptr + j * ldb , sizeof (FLOAT ) * ldb , va , vl );
97+ }
98+ aptr += lda ;
99+ }
100+ return (0 );
101+ }
102+
103+ // General case with alpha scaling
104+ for (i = 0 ; i < cols ; i ++ )
105+ {
106+ bptr = & b [i ];
107+ for (j = 0 ; j < rows ; j += vl )
108+ {
109+ vl = VSETVL (rows - j );
110+ va = VLEV_FLOAT (aptr + j , vl );
111+ va = VFMULVF_FLOAT (va , alpha , vl );
112+ VSSEV_FLOAT (bptr + j * ldb , sizeof (FLOAT ) * ldb , va , vl );
113+ }
114+ aptr += lda ;
115+ }
116+
117+ return (0 );
118+ }
0 commit comments