1+ /***************************************************************************
2+ Copyright (c) 2013, The OpenBLAS Project
3+ All rights reserved.
4+ Redistribution and use in source and binary forms, with or without
5+ modification, are permitted provided that the following conditions are
6+ met:
7+ 1. Redistributions of source code must retain the above copyright
8+ notice, this list of conditions and the following disclaimer.
9+ 2. Redistributions in binary form must reproduce the above copyright
10+ notice, this list of conditions and the following disclaimer in
11+ the documentation and/or other materials provided with the
12+ distribution.
13+ 3. Neither the name of the OpenBLAS project nor the names of
14+ its contributors may be used to endorse or promote products
15+ derived from this software without specific prior written permission.
16+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+ AREDISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+ *****************************************************************************/
27+
28+ #include "common.h"
29+ #include <stdio.h>
30+
31+ #if !defined(DOUBLE )
32+ #define VSETVL_MAX __riscv_vsetvlmax_e32m8()
33+ #define VSETVL (n ) __riscv_vsetvl_e32m8(n)
34+ #define FLOAT_V_T vfloat32m8_t
35+ #define VLEV_FLOAT __riscv_vle32_v_f32m8
36+ #define VSEV_FLOAT __riscv_vse32_v_f32m8
37+ #define VSSEV_FLOAT __riscv_vsse32_v_f32m8
38+ #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
39+ #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
40+ #else
41+ #define VSETVL_MAX __riscv_vsetvlmax_e64m8()
42+ #define VSETVL (n ) __riscv_vsetvl_e64m8(n)
43+ #define FLOAT_V_T vfloat64m8_t
44+ #define VLEV_FLOAT __riscv_vle64_v_f64m8
45+ #define VSEV_FLOAT __riscv_vse64_v_f64m8
46+ #define VSSEV_FLOAT __riscv_vsse64_v_f64m8
47+ #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
48+ #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
49+ #endif
50+
51+ /*****************************************************
52+ * Order ColMajor
53+ * Trans with RVV optimization
54+ ******************************************************/
55+
56+ int CNAME (BLASLONG rows , BLASLONG cols , FLOAT alpha , FLOAT * a , BLASLONG lda , FLOAT * b , BLASLONG ldb )
57+ {
58+ BLASLONG i , j ;
59+ FLOAT * aptr , * bptr ;
60+ size_t vl ;
61+
62+ FLOAT_V_T va ;
63+ if (rows <= 0 ) return (0 );
64+ if (cols <= 0 ) return (0 );
65+
66+ aptr = a ;
67+
68+ if (alpha == 0.0 )
69+ {
70+ vl = VSETVL_MAX ;
71+ va = VFMVVF_FLOAT (0 , vl );
72+ for (i = 0 ; i < cols ; i ++ )
73+ {
74+ bptr = & b [i ];
75+ for (j = 0 ; j < rows ; j += vl )
76+ {
77+ vl = VSETVL (rows - j );
78+ VSSEV_FLOAT (bptr + j * ldb , sizeof (FLOAT ) * ldb , va , vl );
79+ }
80+ }
81+ return (0 );
82+ }
83+
84+ if (alpha == 1.0 )
85+ {
86+ for (i = 0 ; i < cols ; i ++ )
87+ {
88+ bptr = & b [i ];
89+ for (j = 0 ; j < rows ; j += vl )
90+ {
91+ vl = VSETVL (rows - j );
92+ va = VLEV_FLOAT (aptr + j , vl );
93+ VSSEV_FLOAT (bptr + j * ldb , sizeof (FLOAT ) * ldb , va , vl );
94+ }
95+ aptr += lda ;
96+ }
97+ return (0 );
98+ }
99+
100+ for (i = 0 ; i < cols ; i ++ )
101+ {
102+ bptr = & b [i ];
103+ for (j = 0 ; j < rows ; j += vl )
104+ {
105+ vl = VSETVL (rows - j );
106+ va = VLEV_FLOAT (aptr + j , vl );
107+ va = VFMULVF_FLOAT (va , alpha , vl );
108+ VSSEV_FLOAT (bptr + j * ldb , sizeof (FLOAT ) * ldb , va , vl );
109+ }
110+ aptr += lda ;
111+ }
112+
113+ return (0 );
114+ }
0 commit comments