Skip to content

Commit 708d586

Browse files
Add OMATCOPY_CT performance test with RVV optimization
Co-authored-by: gong-flying <gongxiaofei24@iscas.ac.cn>
1 parent 2953c7d commit 708d586

File tree

7 files changed

+180
-16
lines changed

7 files changed

+180
-16
lines changed

build_and_test.sh

Lines changed: 56 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,19 +8,29 @@ echo "适用于 SG2044 RISC-V 服务器"
88
echo
99

1010
# 检查编译器
11-
if ! command -v gcc &> /dev/null; then
12-
echo "错误: 未找到 GCC 编译器"
11+
if command -v riscv64-unknown-linux-gnu-gcc &> /dev/null; then
12+
CC="riscv64-unknown-linux-gnu-gcc"
13+
echo "使用 RISC-V 交叉编译器"
14+
elif command -v gcc &> /dev/null; then
15+
CC="gcc"
16+
echo "使用系统 GCC 编译器"
17+
else
18+
echo "错误: 未找到合适的编译器"
1319
exit 1
1420
fi
1521

16-
# 显示 GCC 版本
17-
echo "GCC 版本:"
18-
gcc --version | head -1
22+
# 显示编译器版本
23+
echo "编译器版本:"
24+
$CC --version | head -1
1925
echo
2026

2127
# 编译标准版本(无RVV)
2228
echo "[1/3] 编译标准版本(标量优化)..."
23-
gcc -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
29+
if [[ "$CC" == *"riscv64"* ]]; then
30+
$CC -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar -static
31+
else
32+
$CC -O3 test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
33+
fi
2434
if [ $? -eq 0 ]; then
2535
echo "✓ 标准版本编译成功: test_omatcopy_ct_scalar"
2636
else
@@ -30,7 +40,11 @@ fi
3040

3141
# 编译RVV版本
3242
echo "[2/3] 编译RVV优化版本..."
33-
gcc -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
43+
if [[ "$CC" == *"riscv64"* ]]; then
44+
$CC -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv -static
45+
else
46+
$CC -O3 -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
47+
fi
3448
if [ $? -eq 0 ]; then
3549
echo "✓ RVV版本编译成功: test_omatcopy_ct_rvv"
3650
else
@@ -62,17 +76,33 @@ echo
6276
echo "=== 开始性能测试 ==="
6377
echo
6478

65-
if [ -f "test_omatcopy_ct_rvv" ]; then
66-
echo "运行 RVV 优化版本测试:"
67-
echo "----------------------------------------"
68-
./test_omatcopy_ct_rvv
79+
# 如果是交叉编译,提示用户需要在目标平台运行
80+
if [[ "$CC" == *"riscv64"* ]]; then
81+
echo "⚠ 检测到交叉编译环境,生成的可执行文件需要在 RISC-V 平台上运行"
82+
echo "请将以下文件传输到目标 RISC-V 系统:"
83+
echo " - test_omatcopy_ct_scalar (标量版本)"
84+
if [ -f "test_omatcopy_ct_rvv" ]; then
85+
echo " - test_omatcopy_ct_rvv (RVV优化版本)"
86+
fi
6987
echo
88+
echo "在目标系统上运行:"
89+
echo " ./test_omatcopy_ct_scalar # 运行标量版本"
90+
if [ -f "test_omatcopy_ct_rvv" ]; then
91+
echo " ./test_omatcopy_ct_rvv # 运行RVV版本"
92+
fi
93+
else
94+
if [ -f "test_omatcopy_ct_rvv" ]; then
95+
echo "运行 RVV 优化版本测试:"
96+
echo "----------------------------------------"
97+
./test_omatcopy_ct_rvv
98+
echo
99+
fi
100+
101+
echo "运行标量版本测试:"
102+
echo "----------------------------------------"
103+
./test_omatcopy_ct_scalar
70104
fi
71105

72-
echo "运行标量版本测试:"
73-
echo "----------------------------------------"
74-
./test_omatcopy_ct_scalar
75-
76106
echo
77107
echo "=== 测试完成 ==="
78108
echo "文件说明:"
@@ -81,4 +111,14 @@ if [ -f "test_omatcopy_ct_rvv" ]; then
81111
echo " test_omatcopy_ct_rvv - RVV向量化版本"
82112
fi
83113
echo " test_omatcopy_ct.c - 源代码文件"
84-
echo " build_and_test.sh - 本编译脚本"
114+
echo " build_and_test.sh - 本编译脚本"
115+
echo
116+
echo "编译器信息:"
117+
echo " 使用编译器: $CC"
118+
if [[ "$CC" == *"riscv64"* ]]; then
119+
echo " 目标架构: RISC-V 64位"
120+
echo " 编译模式: 交叉编译 (静态链接)"
121+
else
122+
echo " 目标架构: 本机架构"
123+
echo " 编译模式: 本地编译"
124+
fi

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,3 +266,6 @@ ifndef SHGEMM_BETA
266266
SHGEMM_BETA = gemm_beta_rvv.c
267267
endif
268268
endif
269+
270+
DOMATCOPY_CT = omatcopy_ct_rvv.c
271+
SOMATCOPY_CT = omatcopy_ct_rvv.c

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,9 @@ COMATCOPY_CN = zomatcopy_cn_vector.c
219219
DOMATCOPY_CN = omatcopy_cn_vector.c
220220
SOMATCOPY_CN = omatcopy_cn_vector.c
221221

222+
DOMATCOPY_CT = omatcopy_ct_rvv.c
223+
SOMATCOPY_CT = omatcopy_ct_rvv.c
224+
222225

223226
ifeq ($(BUILD_BFLOAT16), 1)
224227
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c

kernel/riscv64/omatcopy_ct_rvv.c

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
/***************************************************************************
2+
Copyright (c) 2013, The OpenBLAS Project
3+
All rights reserved.
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are
6+
met:
7+
1. Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
2. Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
3. Neither the name of the OpenBLAS project nor the names of
14+
its contributors may be used to endorse or promote products
15+
derived from this software without specific prior written permission.
16+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
25+
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
*****************************************************************************/
27+
28+
#include "common.h"
29+
#include <stdio.h>
30+
31+
#if !defined(DOUBLE)
32+
#define VSETVL_MAX __riscv_vsetvlmax_e32m8()
33+
#define VSETVL(n) __riscv_vsetvl_e32m8(n)
34+
#define FLOAT_V_T vfloat32m8_t
35+
#define VLEV_FLOAT __riscv_vle32_v_f32m8
36+
#define VSEV_FLOAT __riscv_vse32_v_f32m8
37+
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
38+
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
39+
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
40+
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
41+
#else
42+
#define VSETVL_MAX __riscv_vsetvlmax_e64m8()
43+
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
44+
#define FLOAT_V_T vfloat64m8_t
45+
#define VLEV_FLOAT __riscv_vle64_v_f64m8
46+
#define VSEV_FLOAT __riscv_vse64_v_f64m8
47+
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
48+
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
49+
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
50+
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
51+
#endif
52+
53+
/*****************************************************
54+
* Order ColMajor
55+
* Trans with RVV optimization
56+
*
57+
******************************************************/
58+
59+
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
60+
{
61+
BLASLONG i, j;
62+
FLOAT *aptr, *bptr;
63+
size_t vl;
64+
FLOAT_V_T va, vb;
65+
66+
if (rows <= 0) return(0);
67+
if (cols <= 0) return(0);
68+
69+
aptr = a;
70+
71+
if (alpha == 0.0)
72+
{
73+
vl = VSETVL_MAX;
74+
va = VFMVVF_FLOAT(0, vl);
75+
for (i = 0; i < cols; i++)
76+
{
77+
bptr = &b[i];
78+
for (j = 0; j < rows; j += vl)
79+
{
80+
vl = VSETVL(rows - j);
81+
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
82+
}
83+
}
84+
return(0);
85+
}
86+
87+
if (alpha == 1.0)
88+
{
89+
for (i = 0; i < cols; i++)
90+
{
91+
bptr = &b[i];
92+
for (j = 0; j < rows; j += vl)
93+
{
94+
vl = VSETVL(rows - j);
95+
va = VLEV_FLOAT(aptr + j, vl);
96+
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
97+
}
98+
aptr += lda;
99+
}
100+
return(0);
101+
}
102+
103+
// General case with alpha scaling
104+
for (i = 0; i < cols; i++)
105+
{
106+
bptr = &b[i];
107+
for (j = 0; j < rows; j += vl)
108+
{
109+
vl = VSETVL(rows - j);
110+
va = VLEV_FLOAT(aptr + j, vl);
111+
va = VFMULVF_FLOAT(va, alpha, vl);
112+
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
113+
}
114+
aptr += lda;
115+
}
116+
117+
return(0);
118+
}

test_omatcopy_ct

16.2 KB
Binary file not shown.

test_omatcopy_ct_rvv

723 KB
Binary file not shown.

test_omatcopy_ct_scalar

723 KB
Binary file not shown.

0 commit comments

Comments
 (0)