Skip to content

Commit a398a41

Browse files
author
H. Peter Anvin (Intel)
committed
travis: update AMX test
Update AMX test per previous commit from IntLatx64. Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
1 parent 019615f commit a398a41

File tree

2 files changed

+102
-21
lines changed

2 files changed

+102
-21
lines changed

travis/test/amx.asm

Lines changed: 102 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,36 +1,117 @@
11
bits 64
22

3-
%macro amx 1
3+
%macro amx 3
44
%define treg tmm %+ %1
5+
%define treg2 tmm %+ %2
6+
%define treg3 tmm %+ %3
7+
%define zreg zmm %+ %1
58

6-
ldtilecfg [rsi]
7-
sttilecfg [rdi]
9+
ldtilecfg [rsi] ;AMX_TILE
10+
sttilecfg [rdi] ;AMX_TILE
811

9-
tilezero treg
12+
tilezero treg ;AMX_TILE
1013

11-
tileloadd treg, [rax]
12-
tileloadd treg, [rax,rdx]
13-
tileloadd treg, [rax,rdx*2]
14+
tileloadd treg, [rax] ;AMX_TILE
15+
tileloadd treg, [rax+rdx] ;AMX_TILE
16+
tileloadd treg, [rax+rdx*2] ;AMX_TILE
1417

15-
tileloaddt1 treg, [rax]
16-
tileloaddt1 treg, [rax,rdx]
17-
tileloaddt1 treg, [rax,rdx*2]
18+
tileloaddt1 treg, [rax] ;AMX_TILE
19+
tileloaddt1 treg, [rax+rdx] ;AMX_TILE
20+
tileloaddt1 treg, [rax+rdx*2] ;AMX_TILE
1821

19-
tdpbf16ps treg, treg, treg
20-
tdpbssd treg, treg, treg
21-
tdpbusd treg, treg, treg
22-
tdpbsud treg, treg, treg
23-
tdpbuud treg, treg, treg
22+
tileloaddrs treg, [rax] ;AMX-MOVRS
23+
tileloaddrs treg, [rax+rdx] ;AMX-MOVRS
24+
tileloaddrs treg, [rax+rdx*2] ;AMX-MOVRS
2425

25-
tilestored [rax], treg
26-
tilestored [rax,rdx], treg
27-
tilestored [rax,rdx*2], treg
26+
tileloaddrst1 treg, [rax] ;AMX-MOVRS
27+
tileloaddrst1 treg, [rax+rdx] ;AMX-MOVRS
28+
tileloaddrst1 treg, [rax+rdx*2] ;AMX-MOVRS
2829

29-
tilerelease
30+
tdpbf16ps treg, treg2, treg3 ;AMX-BF16
31+
tdpbssd treg, treg2, treg3 ;AMX_INT8
32+
tdpbusd treg, treg2, treg3 ;AMX_INT8
33+
tdpbsud treg, treg2, treg3 ;AMX_INT8
34+
tdpbuud treg, treg2, treg3 ;AMX_INT8
35+
tdpfp16ps treg, treg2, treg3 ;AMX-FP16
36+
tcmmimfp16ps treg, treg2, treg3 ;AMX-COMPLEX
37+
tcmmrlfp16ps treg, treg2, treg3 ;AMX-COMPLEX
38+
39+
tmmultf32ps treg, treg2, treg3 ;AMX_TF32
40+
41+
tdpbf8ps treg, treg2, treg3 ;AMX-FP8
42+
tdpbhf8ps treg, treg2, treg3 ;AMX-FP8
43+
tdphbf8ps treg, treg2, treg3 ;AMX-FP8
44+
tdphf8ps treg, treg2, treg3 ;AMX-FP8
45+
46+
tcvtrowd2ps zreg, treg, eax ;AMX-AVX512
47+
tcvtrowd2ps zreg, treg, %1 ;AMX-AVX512
48+
tcvtrowps2bf16h zreg, treg, eax ;AMX-AVX512
49+
tcvtrowps2bf16h zreg, treg, %1 ;AMX-AVX512
50+
tcvtrowps2bf16l zreg, treg, eax ;AMX-AVX512
51+
tcvtrowps2bf16l zreg, treg, %1 ;AMX-AVX512
52+
tcvtrowps2phh zreg, treg, eax ;AMX-AVX512
53+
tcvtrowps2phh zreg, treg, %1 ;AMX-AVX512
54+
tcvtrowps2phl zreg, treg, eax ;AMX-AVX512
55+
tcvtrowps2phl zreg, treg, %1 ;AMX-AVX512
56+
tilemovrow zreg, treg, eax ;AMX-AVX512
57+
tilemovrow zreg, treg, %1 ;AMX-AVX512
58+
59+
t2rpntlvwz0 treg, [rax] ;AMX-TRANSPOSE
60+
t2rpntlvwz0 treg, [rax+rdx] ;AMX-TRANSPOSE
61+
t2rpntlvwz0 treg, [rax+rdx*2] ;AMX-TRANSPOSE
62+
63+
t2rpntlvwz0t1 treg, [rax] ;AMX-TRANSPOSE
64+
t2rpntlvwz0t1 treg, [rax+rdx] ;AMX-TRANSPOSE
65+
t2rpntlvwz0t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
66+
67+
t2rpntlvwz1 treg, [rax] ;AMX-TRANSPOSE
68+
t2rpntlvwz1 treg, [rax+rdx] ;AMX-TRANSPOSE
69+
t2rpntlvwz1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
70+
71+
t2rpntlvwz1t1 treg, [rax] ;AMX-TRANSPOSE
72+
t2rpntlvwz1t1 treg, [rax+rdx] ;AMX-TRANSPOSE
73+
t2rpntlvwz1t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
74+
75+
ttransposed treg, treg ;AMX-TRANSPOSE
76+
77+
t2rpntlvwz0rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
78+
t2rpntlvwz0rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
79+
t2rpntlvwz0rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
80+
81+
t2rpntlvwz0rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
82+
t2rpntlvwz0rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
83+
t2rpntlvwz0rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
84+
85+
t2rpntlvwz1rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
86+
t2rpntlvwz1rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
87+
t2rpntlvwz1rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
88+
89+
t2rpntlvwz1rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
90+
t2rpntlvwz1rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
91+
t2rpntlvwz1rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
92+
93+
ttdpbf16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-BF16
94+
ttdpfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-FP16
95+
ttcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
96+
ttcmmrlfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
97+
tconjtcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
98+
tconjtfp16 treg, treg ;AMX-TRANSPOSE + AMX-COMPLEX
99+
100+
ttmmultf32ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX_TF32
101+
102+
tilestored [rax], treg ;AMX_TILE
103+
tilestored [rax,rdx], treg ;AMX_TILE
104+
tilestored [rax,rdx*2], treg ;AMX_TILE
105+
106+
tilerelease ;AMX_TILE
30107
%endmacro
31108

32109
%assign n 0
110+
%assign m 1
111+
%assign l 2
33112
%rep 8
34-
amx n
35-
%assign n n+1
113+
amx n, m, l
114+
%assign n ((n+1) % 8)
115+
%assign m ((m+1) % 8)
116+
%assign l ((l+1) % 8)
36117
%endrep

travis/test/amx.bin.t

2.64 KB
Binary file not shown.

0 commit comments

Comments
 (0)