OpenMathLib
diff --git a/‎build_and_test.sh‎
Lines changed: 56 additions & 16 deletions b/‎build_and_test.sh‎
Lines changed: 56 additions & 16 deletions
diff --git a/‎kernel/riscv64/KERNEL.RISCV64_ZVL128B‎
Lines changed: 3 additions & 0 deletions b/‎kernel/riscv64/KERNEL.RISCV64_ZVL128B‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎kernel/riscv64/KERNEL.RISCV64_ZVL256B‎
Lines changed: 3 additions & 0 deletions b/‎kernel/riscv64/KERNEL.RISCV64_ZVL256B‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎kernel/riscv64/omatcopy_ct_rvv.c‎
Lines changed: 118 additions & 0 deletions b/‎kernel/riscv64/omatcopy_ct_rvv.c‎
Lines changed: 118 additions & 0 deletions
diff --git a/‎test_omatcopy_ct‎
16.2 KB b/‎test_omatcopy_ct‎
16.2 KB
diff --git a/‎test_omatcopy_ct_rvv‎
723 KB b/‎test_omatcopy_ct_rvv‎
723 KB
diff --git a/‎test_omatcopy_ct_scalar‎
723 KB b/‎test_omatcopy_ct_scalar‎
723 KB
@@ -8,19 +8,29 @@ echo "适用于 SG2044 RISC-V 服务器"
 echo
 
 # 检查编译器
-if ! command -v gcc &> /dev/null; then
-    echo "错误: 未找到 GCC 编译器"
+if command -v riscv64-unknown-linux-gnu-gcc &> /dev/null; then
+    CC="riscv64-unknown-linux-gnu-gcc"
+    echo "使用 RISC-V 交叉编译器"
+elif command -v gcc &> /dev/null; then
+    CC="gcc"
+    echo "使用系统 GCC 编译器"
+else
+    echo "错误: 未找到合适的编译器"
     exit 1
 fi
 
-# 显示 GCC 版本
-echo "GCC 版本:"
-gcc --version | head -1
+# 显示编译器版本
+echo "编译器版本:"
+$CC --version | head -1
 echo
 
 # 编译标准版本（无RVV）
 echo "[1/3] 编译标准版本（标量优化）..."
-gcc -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
+if [[ "$CC" == *"riscv64"* ]]; then
+    $CC -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar -static
+else
+    $CC -O3 test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
+fi
 if [ $? -eq 0 ]; then
     echo "✓ 标准版本编译成功: test_omatcopy_ct_scalar"
 else
@@ -30,7 +40,11 @@ fi
 
 # 编译RVV版本
 echo "[2/3] 编译RVV优化版本..."
-gcc -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
+if [[ "$CC" == *"riscv64"* ]]; then
+    $CC -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv -static
+else
+    $CC -O3 -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
+fi
 if [ $? -eq 0 ]; then
     echo "✓ RVV版本编译成功: test_omatcopy_ct_rvv"
 else
@@ -62,17 +76,33 @@ echo
 echo "=== 开始性能测试 ==="
 echo
 
-if [ -f "test_omatcopy_ct_rvv" ]; then
-    echo "运行 RVV 优化版本测试:"
-    echo "----------------------------------------"
-    ./test_omatcopy_ct_rvv
+# 如果是交叉编译，提示用户需要在目标平台运行
+if [[ "$CC" == *"riscv64"* ]]; then
+    echo "⚠ 检测到交叉编译环境，生成的可执行文件需要在 RISC-V 平台上运行"
+    echo "请将以下文件传输到目标 RISC-V 系统:"
+    echo "  - test_omatcopy_ct_scalar (标量版本)"
+    if [ -f "test_omatcopy_ct_rvv" ]; then
+        echo "  - test_omatcopy_ct_rvv (RVV优化版本)"
+    fi
     echo
+    echo "在目标系统上运行:"
+    echo "  ./test_omatcopy_ct_scalar  # 运行标量版本"
+    if [ -f "test_omatcopy_ct_rvv" ]; then
+        echo "  ./test_omatcopy_ct_rvv     # 运行RVV版本"
+    fi
+else
+    if [ -f "test_omatcopy_ct_rvv" ]; then
+        echo "运行 RVV 优化版本测试:"
+        echo "----------------------------------------"
+        ./test_omatcopy_ct_rvv
+        echo
+    fi
+    
+    echo "运行标量版本测试:"
+    echo "----------------------------------------"
+    ./test_omatcopy_ct_scalar
 fi
 
-echo "运行标量版本测试:"
-echo "----------------------------------------"
-./test_omatcopy_ct_scalar
-
 echo
 echo "=== 测试完成 ==="
 echo "文件说明:"
@@ -81,4 +111,14 @@ if [ -f "test_omatcopy_ct_rvv" ]; then
     echo "  test_omatcopy_ct_rvv    - RVV向量化版本"
 fi
 echo "  test_omatcopy_ct.c      - 源代码文件"
-echo "  build_and_test.sh       - 本编译脚本"
+echo "  build_and_test.sh       - 本编译脚本"
+echo
+echo "编译器信息:"
+echo "  使用编译器: $CC"
+if [[ "$CC" == *"riscv64"* ]]; then
+    echo "  目标架构: RISC-V 64位"
+    echo "  编译模式: 交叉编译 (静态链接)"
+else
+    echo "  目标架构: 本机架构"
+    echo "  编译模式: 本地编译"
+fi
@@ -266,3 +266,6 @@ ifndef SHGEMM_BETA
 SHGEMM_BETA =  gemm_beta_rvv.c
 endif
 endif
+
+DOMATCOPY_CT = omatcopy_ct_rvv.c
+SOMATCOPY_CT = omatcopy_ct_rvv.c
@@ -219,6 +219,9 @@ COMATCOPY_CN = zomatcopy_cn_vector.c
 DOMATCOPY_CN = omatcopy_cn_vector.c
 SOMATCOPY_CN = omatcopy_cn_vector.c
 
+DOMATCOPY_CT = omatcopy_ct_rvv.c
+SOMATCOPY_CT = omatcopy_ct_rvv.c
+
 
 ifeq ($(BUILD_BFLOAT16), 1)
 SHGEMMKERNEL    =  shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c
 
@@ -0,0 +1,118 @@
+/***************************************************************************
+Copyright (c) 2013, The OpenBLAS Project
+All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in
+the documentation and/or other materials provided with the
+distribution.
+3. Neither the name of the OpenBLAS project nor the names of
+its contributors may be used to endorse or promote products
+derived from this software without specific prior written permission.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*****************************************************************************/
+
+#include "common.h"
+#include <stdio.h>
+
+#if !defined(DOUBLE)
+#define VSETVL_MAX				__riscv_vsetvlmax_e32m8()
+#define VSETVL(n)               __riscv_vsetvl_e32m8(n)
+#define FLOAT_V_T               vfloat32m8_t
+#define VLEV_FLOAT              __riscv_vle32_v_f32m8
+#define VSEV_FLOAT              __riscv_vse32_v_f32m8
+#define VLSEV_FLOAT             __riscv_vlse32_v_f32m8
+#define VSSEV_FLOAT             __riscv_vsse32_v_f32m8
+#define VFMULVF_FLOAT           __riscv_vfmul_vf_f32m8
+#define VFMVVF_FLOAT            __riscv_vfmv_v_f_f32m8
+#else
+#define VSETVL_MAX				__riscv_vsetvlmax_e64m8()
+#define VSETVL(n)               __riscv_vsetvl_e64m8(n)
+#define FLOAT_V_T               vfloat64m8_t
+#define VLEV_FLOAT              __riscv_vle64_v_f64m8
+#define VSEV_FLOAT              __riscv_vse64_v_f64m8
+#define VLSEV_FLOAT             __riscv_vlse64_v_f64m8
+#define VSSEV_FLOAT             __riscv_vsse64_v_f64m8
+#define VFMULVF_FLOAT           __riscv_vfmul_vf_f64m8
+#define VFMVVF_FLOAT            __riscv_vfmv_v_f_f64m8
+#endif
+
+/*****************************************************
+ * Order ColMajor
+ * Trans with RVV optimization
+ *
+******************************************************/
+
+int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
+{
+	BLASLONG i, j;
+	FLOAT *aptr, *bptr;
+	size_t vl;
+	FLOAT_V_T va, vb;
+
+	if (rows <= 0) return(0);
+	if (cols <= 0) return(0);
+
+	aptr = a;
+
+	if (alpha == 0.0)
+	{
+		vl = VSETVL_MAX;
+		va = VFMVVF_FLOAT(0, vl);
+		for (i = 0; i < cols; i++)
+		{
+			bptr = &b[i];
+			for (j = 0; j < rows; j += vl)
+			{
+				vl = VSETVL(rows - j);
+				VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
+			}
+		}
+		return(0);
+	}
+
+	if (alpha == 1.0)
+	{
+		for (i = 0; i < cols; i++)
+		{
+			bptr = &b[i];
+			for (j = 0; j < rows; j += vl)
+			{
+				vl = VSETVL(rows - j);
+				va = VLEV_FLOAT(aptr + j, vl);
+				VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
+			}
+			aptr += lda;
+		}
+		return(0);
+	}
+
+	// General case with alpha scaling
+	for (i = 0; i < cols; i++)
+	{
+		bptr = &b[i];
+		for (j = 0; j < rows; j += vl)
+		{
+			vl = VSETVL(rows - j);
+			va = VLEV_FLOAT(aptr + j, vl);
+			va = VFMULVF_FLOAT(va, alpha, vl);
+			VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
+		}
+		aptr += lda;
+	}
+
+	return(0);
+}