Skip to content

Commit 019615f

Browse files
InstLatx64H. Peter Anvin (Intel)
authored andcommitted
AMX fix
-- AMX instruction fixes -- AMX test file extension with new levels according to Intel ISA Extension Guide 58th edition Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
1 parent 292d852 commit 019615f

File tree

3 files changed

+117
-40
lines changed

3 files changed

+117
-40
lines changed

test/amx.asm

Lines changed: 93 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -4,34 +4,106 @@
44
%define treg tmm %+ %1
55
%define treg2 tmm %+ %2
66
%define treg3 tmm %+ %3
7+
%define zreg zmm %+ %1
78

8-
ldtilecfg [rsi]
9-
sttilecfg [rdi]
9+
ldtilecfg [rsi] ;AMX_TILE
10+
sttilecfg [rdi] ;AMX_TILE
1011

11-
tilezero treg
12+
tilezero treg ;AMX_TILE
1213

13-
tileloadd treg, [rax]
14-
tileloadd treg, [rax,rdx]
15-
tileloadd treg, [rax,rdx*2]
14+
tileloadd treg, [rax] ;AMX_TILE
15+
tileloadd treg, [rax+rdx] ;AMX_TILE
16+
tileloadd treg, [rax+rdx*2] ;AMX_TILE
1617

17-
tileloaddt1 treg, [rax]
18-
tileloaddt1 treg, [rax,rdx]
19-
tileloaddt1 treg, [rax,rdx*2]
18+
tileloaddt1 treg, [rax] ;AMX_TILE
19+
tileloaddt1 treg, [rax+rdx] ;AMX_TILE
20+
tileloaddt1 treg, [rax+rdx*2] ;AMX_TILE
2021

21-
tdpbf16ps treg, treg2, treg3
22-
tdpbssd treg, treg2, treg3
23-
tdpbusd treg, treg2, treg3
24-
tdpbsud treg, treg2, treg3
25-
tdpbuud treg, treg2, treg3
26-
tdpfp16ps treg, treg2, treg3
27-
tcmmimfp16ps treg, treg2, treg3
28-
tcmmrlfp16ps treg, treg2, treg3
22+
tileloaddrs treg, [rax] ;AMX-MOVRS
23+
tileloaddrs treg, [rax+rdx] ;AMX-MOVRS
24+
tileloaddrs treg, [rax+rdx*2] ;AMX-MOVRS
2925

30-
tilestored [rax], treg
31-
tilestored [rax,rdx], treg
32-
tilestored [rax,rdx*2], treg
26+
tileloaddrst1 treg, [rax] ;AMX-MOVRS
27+
tileloaddrst1 treg, [rax+rdx] ;AMX-MOVRS
28+
tileloaddrst1 treg, [rax+rdx*2] ;AMX-MOVRS
3329

34-
tilerelease
30+
tdpbf16ps treg, treg2, treg3 ;AMX-BF16
31+
tdpbssd treg, treg2, treg3 ;AMX_INT8
32+
tdpbusd treg, treg2, treg3 ;AMX_INT8
33+
tdpbsud treg, treg2, treg3 ;AMX_INT8
34+
tdpbuud treg, treg2, treg3 ;AMX_INT8
35+
tdpfp16ps treg, treg2, treg3 ;AMX-FP16
36+
tcmmimfp16ps treg, treg2, treg3 ;AMX-COMPLEX
37+
tcmmrlfp16ps treg, treg2, treg3 ;AMX-COMPLEX
38+
39+
tmmultf32ps treg, treg2, treg3 ;AMX_TF32
40+
41+
tdpbf8ps treg, treg2, treg3 ;AMX-FP8
42+
tdpbhf8ps treg, treg2, treg3 ;AMX-FP8
43+
tdphbf8ps treg, treg2, treg3 ;AMX-FP8
44+
tdphf8ps treg, treg2, treg3 ;AMX-FP8
45+
46+
tcvtrowd2ps zreg, treg, eax ;AMX-AVX512
47+
tcvtrowd2ps zreg, treg, %1 ;AMX-AVX512
48+
tcvtrowps2bf16h zreg, treg, eax ;AMX-AVX512
49+
tcvtrowps2bf16h zreg, treg, %1 ;AMX-AVX512
50+
tcvtrowps2bf16l zreg, treg, eax ;AMX-AVX512
51+
tcvtrowps2bf16l zreg, treg, %1 ;AMX-AVX512
52+
tcvtrowps2phh zreg, treg, eax ;AMX-AVX512
53+
tcvtrowps2phh zreg, treg, %1 ;AMX-AVX512
54+
tcvtrowps2phl zreg, treg, eax ;AMX-AVX512
55+
tcvtrowps2phl zreg, treg, %1 ;AMX-AVX512
56+
tilemovrow zreg, treg, eax ;AMX-AVX512
57+
tilemovrow zreg, treg, %1 ;AMX-AVX512
58+
59+
t2rpntlvwz0 treg, [rax] ;AMX-TRANSPOSE
60+
t2rpntlvwz0 treg, [rax+rdx] ;AMX-TRANSPOSE
61+
t2rpntlvwz0 treg, [rax+rdx*2] ;AMX-TRANSPOSE
62+
63+
t2rpntlvwz0t1 treg, [rax] ;AMX-TRANSPOSE
64+
t2rpntlvwz0t1 treg, [rax+rdx] ;AMX-TRANSPOSE
65+
t2rpntlvwz0t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
66+
67+
t2rpntlvwz1 treg, [rax] ;AMX-TRANSPOSE
68+
t2rpntlvwz1 treg, [rax+rdx] ;AMX-TRANSPOSE
69+
t2rpntlvwz1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
70+
71+
t2rpntlvwz1t1 treg, [rax] ;AMX-TRANSPOSE
72+
t2rpntlvwz1t1 treg, [rax+rdx] ;AMX-TRANSPOSE
73+
t2rpntlvwz1t1 treg, [rax+rdx*2] ;AMX-TRANSPOSE
74+
75+
ttransposed treg, treg ;AMX-TRANSPOSE
76+
77+
t2rpntlvwz0rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
78+
t2rpntlvwz0rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
79+
t2rpntlvwz0rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
80+
81+
t2rpntlvwz0rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
82+
t2rpntlvwz0rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
83+
t2rpntlvwz0rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
84+
85+
t2rpntlvwz1rs treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
86+
t2rpntlvwz1rs treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
87+
t2rpntlvwz1rs treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
88+
89+
t2rpntlvwz1rst1 treg, [rax] ;AMX-TRANSPOSE + AMX-MOVRS
90+
t2rpntlvwz1rst1 treg, [rax+rdx] ;AMX-TRANSPOSE + AMX-MOVRS
91+
t2rpntlvwz1rst1 treg, [rax+rdx*2] ;AMX-TRANSPOSE + AMX-MOVRS
92+
93+
ttdpbf16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-BF16
94+
ttdpfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-FP16
95+
ttcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
96+
ttcmmrlfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
97+
tconjtcmmimfp16ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX-COMPLEX
98+
tconjtfp16 treg, treg ;AMX-TRANSPOSE + AMX-COMPLEX
99+
100+
ttmmultf32ps treg, treg2, treg3 ;AMX-TRANSPOSE + AMX_TF32
101+
102+
tilestored [rax], treg ;AMX_TILE
103+
tilestored [rax,rdx], treg ;AMX_TILE
104+
tilestored [rax,rdx*2], treg ;AMX_TILE
105+
106+
tilerelease ;AMX_TILE
35107
%endmacro
36108

37109
%assign n 0

x86/iflags.ph

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,13 @@ if_("AVX512VP2INTERSECT", "AVX-512 VP2INTERSECT instructions");
156156
if_("AMXTILE", "AMX tile configuration instructions");
157157
if_("AMXBF16", "AMX bfloat16 multiplication");
158158
if_("AMXFP16", "AMX FP16 multiplication");
159+
if_("AMXFP8", "AMX FP8 instructions");
160+
if_("AMXTF32", "AMX TF32 multiplication");
159161
if_("AMXINT8", "AMX 8-bit integer multiplication");
160162
if_("AMXCOMPLEX", "AMX float16 complex multiplication");
163+
if_("AMXAVX512", "EVEX zmm<-tmm conversion instructions");
164+
if_("AMXMOVRS", "AMX loads with MOVRS hint");
165+
if_("AMXTRANSPOSE", "AMX transpose instructions");
161166
if_("FRED", "Flexible Return and Exception Delivery (FRED)");
162167
if_("RAOINT", "Remote atomic operations (RAO-INT)");
163168
if_("UINTR", "User interrupts");

x86/insns.dat

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5398,8 +5398,8 @@ TILERELEASE void [ vex.128.np.0f38.w0 49 c0] AMXTILE,LONG
53985398
TILESTORED mem,tmmreg [mr: vex+.128.f3.0f38.w0 4b /r] APX,AMXTILE,MIB,SIB,ANYSIZE,AR0,LONG
53995399
TILEZERO tmmreg [r: vex.128.f2.0f38.w0 49 /3r0] AMXTILE,LONG
54005400

5401-
TILELOADDRS tmmreg,mem [rm: vex+.128.f2.0f38.w0 4a /r] APX,AMXTILE,FUTURE,MIB,SIB,ANYSIZE,AR1,LONG
5402-
TILELOADDRST1 tmmreg,mem [rm: vex+.128.66.0f38.w0 4a /r] APX,AMXTILE,FUTURE,MIB,SIB,ANYSIZE,AR1,LONG
5401+
TILELOADDRS tmmreg,mem [rm: vex+.128.f2.0f38.w0 4a /r] AMXMOVRS,APX,AMXTILE,FUTURE,MIB,SIB,ANYSIZE,AR1,LONG
5402+
TILELOADDRST1 tmmreg,mem [rm: vex+.128.66.0f38.w0 4a /r] AMXMOVRS,APX,AMXTILE,FUTURE,MIB,SIB,ANYSIZE,AR1,LONG
54035403

54045404
T2RPNTLVWZ0 tmmreg,mem [rm: vex.128.np.0f38.w0 6e /r] FUTURE,SIB
54055405
T2RPNTLVWZ0T1 tmmreg,mem [rm: vex.128.np.0f38.w0 6f /r] FUTURE,SIB
@@ -5412,23 +5412,23 @@ T2RPNTLVWZ1RST1 tmmreg,mem [rm: vex.128.66.map5.w0 f9 /r] FUTURE,SIB
54125412

54135413
TCONJTCMMIMFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.np.0f38.w0 6b /r] FUTURE
54145414
TCONJTFP16 tmmreg,tmmreg [rm: vex.128.66.0f38.w0 6b /r] FUTURE
5415-
TCVTROWD2PS zmmreg,tmmreg,reg32 [rmv: evex.512.f3.0f38.w0 4a /r] FUTURE
5416-
TCVTROWD2PS zmmreg,tmmreg,imm8 [rmi: evex.512.f3.0f38.w0 07 /r ib] FUTURE
5417-
TCVTROWPS2BF16H zmmreg,tmmreg,reg32 [rmv: evex.512.f2.0f38.w0 6d /r] FUTURE
5418-
TCVTROWPS2BF16H zmmreg,tmmreg,imm8 [rmi: evex.512.f2.0f38.w0 07 /r ib] FUTURE
5419-
TCVTROWPS2BF16L zmmreg,tmmreg,reg32 [rmv: evex.512.f3.0f38.w0 6d /r] FUTURE
5420-
TCVTROWPS2BF16L zmmreg,tmmreg,imm8 [rmi: evex.512.f3.0f38.w0 77 /r ib] FUTURE
5421-
TCVTROWPS2PHH zmmreg,tmmreg,reg32 [rmv: evex.512.np.0f38.w0 6d /r] FUTURE
5422-
TCVTROWPS2PHH zmmreg,tmmreg,imm8 [rmi: evex.512.np.0f38.w0 07 /r ib] FUTURE
5423-
TCVTROWPS2PHL zmmreg,tmmreg,reg32 [rmv: evex.512.66.0f38.w0 6d /r] FUTURE
5424-
TCVTROWPS2PHL zmmreg,tmmreg,imm8 [rmi: evex.512.f2.0f38.w0 77 /r ib] FUTURE
5425-
TDPBF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.np.map5.w0 fd /r] FUTURE
5426-
TDPBHF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.map5.w0 fd /r] FUTURE
5427-
TDPHBF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.map5.w0 fd /r] FUTURE
5428-
TDPHF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.66.map5.w0 fd /r] FUTURE
5429-
TILEMOVROW zmmreg,tmmreg,imm8 [rmi: evex.512.66.0f3a.w0 07 /r ib] FUTURE
5430-
TILEMOVROW zmmreg,tmmreg,reg32 [rmv: evex.512.66.0f3a.w0 4a /r] FUTURE
5431-
TMMULTF32PS tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 48 /r] FUTURE
5415+
TCVTROWD2PS zmmreg,tmmreg,reg32 [rmv: evex.512.f3.0f38.w0 4a /r] AMXAVX512
5416+
TCVTROWD2PS zmmreg,tmmreg,imm8 [rmi: evex.512.f3.0f3a.w0 07 /r ib] AMXAVX512
5417+
TCVTROWPS2BF16H zmmreg,tmmreg,reg32 [rmv: evex.512.f2.0f38.w0 6d /r] AMXAVX512
5418+
TCVTROWPS2BF16H zmmreg,tmmreg,imm8 [rmi: evex.512.f2.0f3a.w0 07 /r ib] AMXAVX512
5419+
TCVTROWPS2BF16L zmmreg,tmmreg,reg32 [rmv: evex.512.f3.0f38.w0 6d /r] AMXAVX512
5420+
TCVTROWPS2BF16L zmmreg,tmmreg,imm8 [rmi: evex.512.f3.0f3a.w0 77 /r ib] AMXAVX512
5421+
TCVTROWPS2PHH zmmreg,tmmreg,reg32 [rmv: evex.512.np.0f38.w0 6d /r] AMXAVX512
5422+
TCVTROWPS2PHH zmmreg,tmmreg,imm8 [rmi: evex.512.np.0f3a.w0 07 /r ib] AMXAVX512
5423+
TCVTROWPS2PHL zmmreg,tmmreg,reg32 [rmv: evex.512.66.0f38.w0 6d /r] AMXAVX512
5424+
TCVTROWPS2PHL zmmreg,tmmreg,imm8 [rmi: evex.512.f2.0f3a.w0 77 /r ib] AMXAVX512
5425+
TDPBF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.np.map5.w0 fd /r] AMXFP8
5426+
TDPBHF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.map5.w0 fd /r] AMXFP8
5427+
TDPHBF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.map5.w0 fd /r] AMXFP8
5428+
TDPHF8PS tmmreg,tmmreg,tmmreg [rmv: vex.128.66.map5.w0 fd /r] AMXFP8
5429+
TILEMOVROW zmmreg,tmmreg,imm8 [rmi: evex.512.66.0f3a.w0 07 /r ib] AMXAVX512
5430+
TILEMOVROW zmmreg,tmmreg,reg32 [rmv: evex.512.66.0f38.w0 4a /r] AMXAVX512
5431+
TMMULTF32PS tmmreg,tmmreg,tmmreg [rmv: vex.128.66.0f38.w0 48 /r] AMXTF32
54325432
TTCMMIMFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f2.0f38.w0 6b /r] FUTURE
54335433
TTCMMRLFP16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 6b /r] FUTURE
54345434
TTDPBF16PS tmmreg,tmmreg,tmmreg [rmv: vex.128.f3.0f38.w0 6c /r] FUTURE

0 commit comments

Comments
 (0)