@@ -383,79 +383,85 @@ class multi_layer_perceptron_t {
383383 bool implementable = true ;
384384 if (gemm_layer1_t ::msg_type_a != msg_type::unaligned_2d) {
385385 if (gemm_layer1_t ::msg_type_a == msg_type::block_2d) {
386- implementable &= kernel::block_2d<gpu_arch::Xe, dtype_a>::check_tensor (
387- (uint64_t )(args.matA_base .base ),
388- args.matrix_k_layer1 ,
389- args.matrix_m_layer1 ,
390- args.matA_ld );
386+ implementable &=
387+ kernel::block_2d<gpu_arch::XeHpc, dtype_a>::check_tensor (
388+ (uint64_t )(args.matA_base .base ),
389+ args.matrix_k_layer1 ,
390+ args.matrix_m_layer1 ,
391+ args.matA_ld );
391392 } else {
392393 implementable &=
393- kernel::general_1d<gpu_arch::Xe , dtype_a>::check_alignment (
394+ kernel::general_1d<gpu_arch::XeHpc , dtype_a>::check_alignment (
394395 args.matA_base .base , args.matA_ld );
395396 }
396397 }
397398 if (gemm_layer1_t ::msg_type_b != msg_type::unaligned_2d) {
398399 if (gemm_layer1_t ::msg_type_b == msg_type::block_2d) {
399- implementable &= kernel::block_2d<gpu_arch::Xe, dtype_w>::check_tensor (
400- (uint64_t )(args.matW_base .base ),
401- args.matrix_n_layer1 ,
402- args.matrix_k_layer1 ,
403- args.matW_ld );
400+ implementable &=
401+ kernel::block_2d<gpu_arch::XeHpc, dtype_w>::check_tensor (
402+ (uint64_t )(args.matW_base .base ),
403+ args.matrix_n_layer1 ,
404+ args.matrix_k_layer1 ,
405+ args.matW_ld );
404406 } else {
405407 implementable &=
406- kernel::general_1d<gpu_arch::Xe , dtype_w>::check_alignment (
408+ kernel::general_1d<gpu_arch::XeHpc , dtype_w>::check_alignment (
407409 args.matW_base .base , args.matW_ld );
408410 }
409411 }
410412 if (epilogue_layer1_t ::msg_type_c != msg_type::unaligned_2d) {
411413 if (epilogue_layer1_t ::msg_type_c == msg_type::block_2d) {
412- implementable &= kernel::block_2d<gpu_arch::Xe, dtype_b>::check_tensor (
413- (uint64_t )(args.matB_base .base ),
414- args.matrix_n_layer1 ,
415- args.matrix_m_layer1 ,
416- args.matB_ld );
414+ implementable &=
415+ kernel::block_2d<gpu_arch::XeHpc, dtype_b>::check_tensor (
416+ (uint64_t )(args.matB_base .base ),
417+ args.matrix_n_layer1 ,
418+ args.matrix_m_layer1 ,
419+ args.matB_ld );
417420 } else {
418421 implementable &=
419- kernel::general_1d<gpu_arch::Xe , dtype_b>::check_alignment (
422+ kernel::general_1d<gpu_arch::XeHpc , dtype_b>::check_alignment (
420423 args.matB_base .base , args.matB_ld );
421424 }
422425 }
423426 if (gemm_layer2_t ::msg_type_a != msg_type::unaligned_2d) {
424427 if (gemm_layer2_t ::msg_type_a == msg_type::block_2d) {
425- implementable &= kernel::block_2d<gpu_arch::Xe, dtype_b>::check_tensor (
426- (uint64_t )(args.matB_base .base ),
427- args.matrix_k_layer2 ,
428- args.matrix_m_layer2 ,
429- args.matB_ld );
428+ implementable &=
429+ kernel::block_2d<gpu_arch::XeHpc, dtype_b>::check_tensor (
430+ (uint64_t )(args.matB_base .base ),
431+ args.matrix_k_layer2 ,
432+ args.matrix_m_layer2 ,
433+ args.matB_ld );
430434 } else {
431435 implementable &=
432- kernel::general_1d<gpu_arch::Xe , dtype_a>::check_alignment (
436+ kernel::general_1d<gpu_arch::XeHpc , dtype_a>::check_alignment (
433437 args.matB_base .base , args.matB_ld );
434438 }
435439 }
436440 if (gemm_layer2_t ::msg_type_b != msg_type::unaligned_2d) {
437441 if (gemm_layer2_t ::msg_type_b == msg_type::block_2d) {
438- implementable &= kernel::block_2d<gpu_arch::Xe, dtype_v>::check_tensor (
439- (uint64_t )(args.matV_base .base ),
440- args.matrix_n_layer2 ,
441- args.matrix_k_layer2 ,
442- args.matV_ld );
442+ implementable &=
443+ kernel::block_2d<gpu_arch::XeHpc, dtype_v>::check_tensor (
444+ (uint64_t )(args.matV_base .base ),
445+ args.matrix_n_layer2 ,
446+ args.matrix_k_layer2 ,
447+ args.matV_ld );
443448 } else {
444449 implementable &=
445- kernel::general_1d<gpu_arch::Xe , dtype_v>::check_alignment (
450+ kernel::general_1d<gpu_arch::XeHpc , dtype_v>::check_alignment (
446451 args.matV_base .base , args.matV_ld );
447452 }
448453 }
449454 if (epilogue_layer2_t ::msg_type_c != msg_type::unaligned_2d) {
450455 if (epilogue_layer2_t ::msg_type_c == msg_type::block_2d) {
451- implementable &= kernel::block_2d<gpu_arch::Xe, dtype_c>::check_tensor (
452- (uint64_t )(args.matC_base .base ),
453- args.matrix_n_layer2 ,
454- args.matrix_m_layer2 ,
455- args.matC_ld );
456+ implementable &=
457+ kernel::block_2d<gpu_arch::XeHpc, dtype_c>::check_tensor (
458+ (uint64_t )(args.matC_base .base ),
459+ args.matrix_n_layer2 ,
460+ args.matrix_m_layer2 ,
461+ args.matC_ld );
456462 } else {
457463 implementable &=
458- kernel::general_1d<gpu_arch::Xe , dtype_c>::check_alignment (
464+ kernel::general_1d<gpu_arch::XeHpc , dtype_c>::check_alignment (
459465 args.matC_base .base , args.matC_ld );
460466 }
461467 }
@@ -557,7 +563,7 @@ class multi_layer_perceptron_t {
557563 xetla_nbarrier_t <
558564 work_group_layer2_t ::size,
559565 work_group_layer2_t ::size,
560- gpu_arch::Xe >
566+ gpu_arch::XeHpc >
561567 nbarrier_global;
562568 nbarrier_global.init_nbarrier (
563569 global_nbarr_base, nbarrier_role::producer_consumer);
0 commit comments