Add LSX support for S8S8 and S8U8 GEMM kernels (microsoft#24397)

wszqkzqk · web-flow · commit 8267066d9a2c · 2025-10-26T20:17:11.000-07:00
### Description - Add missing support for S8S8/S8U8 in GEMM kernels of LSX - Add new dispatch entries for S8S8 and S8U8 GEMM operations in mlasi.h - Extend MLAS_PLATFORM struct to include S8S8 and S8U8 dispatch pointers for LSX ### Motivation and Context To fix [build error](lcpu-club/loongarch-packages#526 (comment)) on loong64: ``` error: ‘struct MLAS_PLATFORM’ has no member named ‘GemmS8S8Dispatch’ ``` ### Test status Tested on Arch Linux for Loong64, here is the build log: * [onnxruntime-1.20.2-7.1-loong64-build.log](https://github.com/user-attachments/files/19710083/onnxruntime-1.20.2-7.1-loong64-build.log) Signed-off-by: Zhou Qiankang <wszqkzqk@qq.com>
diff --git a/onnxruntime/core/mlas/lib/mlasi.h b/onnxruntime/core/mlas/lib/mlasi.h
@@ -1153,6 +1153,8 @@ struct MLAS_GEMM_QUANT_DISPATCH;
 
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchSse;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8X8DispatchLSX;
+extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8S8DispatchLSX;
+extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmS8U8DispatchLSX;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8S8DispatchSse41;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8S8DispatchAvx2;
 extern const MLAS_GEMM_QUANT_DISPATCH MlasGemmU8U8DispatchAvx2;
@@ -1337,6 +1339,8 @@ struct MLAS_PLATFORM {
 #if defined(MLAS_TARGET_LARCH64)
     const MLAS_GEMM_QUANT_DISPATCH* GemmU8S8Dispatch;
     const MLAS_GEMM_QUANT_DISPATCH* GemmU8U8Dispatch;
+    const MLAS_GEMM_QUANT_DISPATCH* GemmS8S8Dispatch;
+    const MLAS_GEMM_QUANT_DISPATCH* GemmS8U8Dispatch;
     MLAS_GEMM_FLOAT_KERNEL* GemmFloatKernel;
     MLAS_GEMM_DOUBLE_KERNEL* GemmDoubleKernel;
     MLAS_CONV_FLOAT_KERNEL* ConvNchwFloatKernel;
diff --git a/onnxruntime/core/mlas/lib/platform.cpp b/onnxruntime/core/mlas/lib/platform.cpp
@@ -747,10 +747,14 @@ Return Value:
 
         this->GemmU8S8Dispatch = &MlasGemmU8X8DispatchLSX;
         this->GemmU8U8Dispatch = &MlasGemmU8X8DispatchLSX;
+        this->GemmS8S8Dispatch = &MlasGemmS8S8DispatchLSX;
+        this->GemmS8U8Dispatch = &MlasGemmS8U8DispatchLSX;
     }else if( cap_lsx ){
         this->GemmFloatKernel = MlasGemmFloatKernelLSX;
         this->GemmU8S8Dispatch = &MlasGemmU8X8DispatchLSX;
         this->GemmU8U8Dispatch = &MlasGemmU8X8DispatchLSX;
+        this->GemmS8S8Dispatch = &MlasGemmS8S8DispatchLSX;
+        this->GemmS8U8Dispatch = &MlasGemmS8U8DispatchLSX;
         this->TransposePackB16x4Routine = MlasSgemmTransposePackB16x4LSX;
         this->GemmDoubleKernel = MlasGemmDoubleKernelLSX;
         this->ConvNchwFloatKernel = MlasConvNchwFloatKernelLSX;
diff --git a/onnxruntime/core/mlas/lib/qgemm.h b/onnxruntime/core/mlas/lib/qgemm.h
@@ -905,7 +905,10 @@ MlasGemmQuantGetDispatch(
         GemmQuantDispatch = GetMlasPlatform().GemmU8X8Dispatch;
     }
 #elif defined(MLAS_TARGET_LARCH64)
-    if (!AIsSigned) {
+    if (AIsSigned) {
+        GemmQuantDispatch =
+            BIsSigned ? GetMlasPlatform().GemmS8S8Dispatch : GetMlasPlatform().GemmS8U8Dispatch;
+    } else { // !AIsSigned
         GemmQuantDispatch =
             BIsSigned ? GetMlasPlatform().GemmU8S8Dispatch : GetMlasPlatform().GemmU8U8Dispatch;
     }
diff --git a/onnxruntime/core/mlas/lib/qgemm_kernel_lsx.cpp b/onnxruntime/core/mlas/lib/qgemm_kernel_lsx.cpp

Original file line number	Diff line number	Diff line change
`@@ -905,7 +905,10 @@ MlasGemmQuantGetDispatch(`
`905`	`905`	`GemmQuantDispatch = GetMlasPlatform().GemmU8X8Dispatch;`
`906`	`906`	`}`
`907`	`907`	`#elif defined(MLAS_TARGET_LARCH64)`
`908`		`- if (!AIsSigned) {`
	`908`	`+ if (AIsSigned) {`
	`909`	`+ GemmQuantDispatch =`
	`910`	`+ BIsSigned ? GetMlasPlatform().GemmS8S8Dispatch : GetMlasPlatform().GemmS8U8Dispatch;`
	`911`	`+ } else { // !AIsSigned`
`909`	`912`	`GemmQuantDispatch =`
`910`	`913`	`BIsSigned ? GetMlasPlatform().GemmU8S8Dispatch : GetMlasPlatform().GemmU8U8Dispatch;`
`911`	`914`	`}`