Skip to content

Commit 41d35bb

Browse files
author
H. Peter Anvin (Intel)
committed
Merge multiple fixes from "InstLatx64"
5 parents e72a541 + 172c4b2 + fb43244 + f4b1a20 + 2a2acca commit 41d35bb

File tree

10 files changed

+479
-91
lines changed

10 files changed

+479
-91
lines changed

test/avx-vnni-64.asm

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
BITS 64
2+
cpu latevex
3+
vpdpbusd xmm1, xmm2, xmm0
4+
vpdpbusd xmm2, xmm3, [rax]
5+
vpdpbusd xmm3, xmm4, [rax+0x12]
6+
vpdpbusd xmm4, xmm5, [rax+rbx*2]
7+
8+
vpdpbusd ymm1, ymm2, ymm0
9+
vpdpbusd ymm2, ymm3, [rax]
10+
vpdpbusd ymm3, ymm4, [rax+0x12]
11+
vpdpbusd ymm4, ymm5, [rax+rbx*2]
12+
13+
vpdpbusds xmm1, xmm2, xmm0
14+
vpdpbusds xmm2, xmm3, [rax]
15+
vpdpbusds xmm3, xmm4, [rax+0x12]
16+
vpdpbusds xmm4, xmm5, [rax+rbx*2]
17+
18+
vpdpbusds ymm1, ymm2, ymm0
19+
vpdpbusds ymm2, ymm3, [rax]
20+
vpdpbusds ymm3, ymm4, [rax+0x12]
21+
vpdpbusds ymm4, ymm5, [rax+rbx*2]
22+
23+
vpdpwssd xmm1, xmm2, xmm0
24+
vpdpwssd xmm2, xmm3, [rax]
25+
vpdpwssd xmm3, xmm4, [rax+0x12]
26+
vpdpwssd xmm4, xmm5, [rax+rbx*2]
27+
28+
vpdpwssd ymm1, ymm2, ymm0
29+
vpdpwssd ymm2, ymm3, [rax]
30+
vpdpwssd ymm3, ymm4, [rax+0x12]
31+
vpdpwssd ymm4, ymm5, [rax+rbx*2]
32+
33+
vpdpwssds xmm1, xmm2, xmm0
34+
vpdpwssds xmm2, xmm3, [rax]
35+
vpdpwssds xmm3, xmm4, [rax+0x12]
36+
vpdpwssds xmm4, xmm5, [rax+rbx*2]
37+
38+
vpdpwssds ymm1, ymm2, ymm0
39+
vpdpwssds ymm2, ymm3, [rax]
40+
vpdpwssds ymm3, ymm4, [rax+0x12]
41+
vpdpwssds ymm4, ymm5, [rax+rbx*2]

test/avx-vnni-int16-64.asm

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
BITS 64
2+
cpu latevex
3+
4+
vpdpwsud xmm1, xmm2, xmm0
5+
vpdpwsud xmm2, xmm3, [rax]
6+
vpdpwsud xmm3, xmm4, [rax+0x12]
7+
vpdpwsud xmm4, xmm5, [rax+rbx*2]
8+
9+
vpdpwsud ymm1, ymm2, ymm0
10+
vpdpwsud ymm2, ymm3, [rax]
11+
vpdpwsud ymm3, ymm14, [rax+0x12]
12+
vpdpwsud ymm14, ymm5, [rax+rbx*2]
13+
14+
vpdpwsuds xmm1, xmm2, xmm0
15+
vpdpwsuds xmm2, xmm3, [rax]
16+
vpdpwsuds xmm3, xmm14, [rax+0x12]
17+
vpdpwsuds xmm14, xmm5, [rax+rbx*2]
18+
19+
vpdpwsuds ymm1, ymm2, ymm0
20+
vpdpwsuds ymm2, ymm3, [rax]
21+
vpdpwsuds ymm3, ymm14, [rax+0x12]
22+
vpdpwsuds ymm14, ymm5, [rax+rbx*2]
23+
24+
vpdpwusd xmm1, xmm2, xmm0
25+
vpdpwusd xmm2, xmm3, [rax]
26+
vpdpwusd xmm3, xmm14, [rax+0x12]
27+
vpdpwusd xmm14, xmm5, [rax+rbx*2]
28+
29+
vpdpwusd ymm1, ymm2, ymm0
30+
vpdpwusd ymm2, ymm3, [rax]
31+
vpdpwusd ymm3, ymm14, [rax+0x12]
32+
vpdpwusd ymm14, ymm5, [rax+rbx*2]
33+
34+
vpdpwusds xmm1, xmm2, xmm0
35+
vpdpwusds xmm2, xmm3, [rax]
36+
vpdpwusds xmm3, xmm14, [rax+0x12]
37+
vpdpwusds xmm14, xmm5, [rax+rbx*2]
38+
39+
vpdpwusds ymm1, ymm2, ymm0
40+
vpdpwusds ymm2, ymm3, [rax]
41+
vpdpwusds ymm3, ymm14, [rax+0x12]
42+
vpdpwusds ymm14, ymm5, [rax+rbx*2]
43+
44+
vpdpwuud xmm1, xmm2, xmm0
45+
vpdpwuud xmm2, xmm3, [rax]
46+
vpdpwuud xmm3, xmm14, [rax+0x12]
47+
vpdpwuud xmm14, xmm5, [rax+rbx*2]
48+
49+
vpdpwuud ymm1, ymm2, ymm0
50+
vpdpwuud ymm2, ymm3, [rax]
51+
vpdpwuud ymm3, ymm14, [rax+0x12]
52+
vpdpwuud ymm14, ymm5, [rax+rbx*2]
53+
54+
vpdpwuuds xmm1, xmm2, xmm0
55+
vpdpwuuds xmm2, xmm3, [rax]
56+
vpdpwuuds xmm3, xmm14, [rax+0x12]
57+
vpdpwuuds xmm14, xmm5, [rax+rbx*2]
58+
59+
vpdpwuuds ymm1, ymm2, ymm0
60+
vpdpwuuds ymm2, ymm3, [rax]
61+
vpdpwuuds ymm3, ymm14, [rax+0x12]
62+
vpdpwuuds ymm14, ymm5, [rax+rbx*2]
63+

test/avx-vnni-int16.asm

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
BITS 32
2+
cpu latevex
3+
4+
vpdpwsud xmm1, xmm2, xmm0
5+
vpdpwsud xmm2, xmm3, [eax]
6+
vpdpwsud xmm3, xmm4, [eax+0x12]
7+
vpdpwsud xmm4, xmm5, [eax+ebx*2]
8+
9+
vpdpwsud ymm1, ymm2, ymm0
10+
vpdpwsud ymm2, ymm3, [eax]
11+
vpdpwsud ymm3, ymm4, [eax+0x12]
12+
vpdpwsud ymm4, ymm5, [eax+ebx*2]
13+
14+
vpdpwsuds xmm1, xmm2, xmm0
15+
vpdpwsuds xmm2, xmm3, [eax]
16+
vpdpwsuds xmm3, xmm4, [eax+0x12]
17+
vpdpwsuds xmm4, xmm5, [eax+ebx*2]
18+
19+
vpdpwsuds ymm1, ymm2, ymm0
20+
vpdpwsuds ymm2, ymm3, [eax]
21+
vpdpwsuds ymm3, ymm4, [eax+0x12]
22+
vpdpwsuds ymm4, ymm5, [eax+ebx*2]
23+
24+
vpdpwusd xmm1, xmm2, xmm0
25+
vpdpwusd xmm2, xmm3, [eax]
26+
vpdpwusd xmm3, xmm4, [eax+0x12]
27+
vpdpwusd xmm4, xmm5, [eax+ebx*2]
28+
29+
vpdpwusd ymm1, ymm2, ymm0
30+
vpdpwusd ymm2, ymm3, [eax]
31+
vpdpwusd ymm3, ymm4, [eax+0x12]
32+
vpdpwusd ymm4, ymm5, [eax+ebx*2]
33+
34+
vpdpwusds xmm1, xmm2, xmm0
35+
vpdpwusds xmm2, xmm3, [eax]
36+
vpdpwusds xmm3, xmm4, [eax+0x12]
37+
vpdpwusds xmm4, xmm5, [eax+ebx*2]
38+
39+
vpdpwusds ymm1, ymm2, ymm0
40+
vpdpwusds ymm2, ymm3, [eax]
41+
vpdpwusds ymm3, ymm4, [eax+0x12]
42+
vpdpwusds ymm4, ymm5, [eax+ebx*2]
43+
44+
vpdpwuud xmm1, xmm2, xmm0
45+
vpdpwuud xmm2, xmm3, [eax]
46+
vpdpwuud xmm3, xmm4, [eax+0x12]
47+
vpdpwuud xmm4, xmm5, [eax+ebx*2]
48+
49+
vpdpwuud ymm1, ymm2, ymm0
50+
vpdpwuud ymm2, ymm3, [eax]
51+
vpdpwuud ymm3, ymm4, [eax+0x12]
52+
vpdpwuud ymm4, ymm5, [eax+ebx*2]
53+
54+
vpdpwuuds xmm1, xmm2, xmm0
55+
vpdpwuuds xmm2, xmm3, [eax]
56+
vpdpwuuds xmm3, xmm4, [eax+0x12]
57+
vpdpwuuds xmm4, xmm5, [eax+ebx*2]
58+
59+
vpdpwuuds ymm1, ymm2, ymm0
60+
vpdpwuuds ymm2, ymm3, [eax]
61+
vpdpwuuds ymm3, ymm4, [eax+0x12]
62+
vpdpwuuds ymm4, ymm5, [eax+ebx*2]
63+

test/avx-vnni-int8-64.asm

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
BITS 64
2+
cpu latevex
3+
4+
vpdpbsud xmm1, xmm2, xmm0
5+
vpdpbsud xmm2, xmm3, [rax]
6+
vpdpbsud xmm3, xmm14, oword [rax+0x12]
7+
vpdpbsud xmm14, xmm5, [rax+rbx*2]
8+
9+
vpdpbsud ymm1, ymm2, ymm0
10+
vpdpbsud ymm2, ymm3, [rax]
11+
vpdpbsud ymm3, ymm14, yword [rax+0x12]
12+
vpdpbsud ymm14, ymm5, [rax+rbx*2]
13+
14+
vpdpbsuds xmm1, xmm2, xmm0
15+
vpdpbsuds xmm2, xmm3, [rax]
16+
vpdpbsuds xmm3, xmm14, [rax+0x12]
17+
vpdpbsuds xmm14, xmm5, [rax+rbx*2]
18+
19+
vpdpbsuds ymm1, ymm2, ymm0
20+
vpdpbsuds ymm2, ymm3, [rax]
21+
vpdpbsuds ymm3, ymm14, [rax+0x12]
22+
vpdpbsuds ymm14, ymm5, [rax+rbx*2]
23+
24+
vpdpbssd xmm1, xmm2, xmm0
25+
vpdpbssd xmm2, xmm3, [rax]
26+
vpdpbssd xmm3, xmm14, [rax+0x12]
27+
vpdpbssd xmm14, xmm5, [rax+rbx*2]
28+
29+
vpdpbssd ymm1, ymm2, ymm0
30+
vpdpbssd ymm2, ymm3, [rax]
31+
vpdpbssd ymm3, ymm14, [rax+0x12]
32+
vpdpbssd ymm14, ymm5, [rax+rbx*2]
33+
34+
vpdpbssds xmm1, xmm2, xmm0
35+
vpdpbssds xmm2, xmm3, [rax]
36+
vpdpbssds xmm3, xmm14, [rax+0x12]
37+
vpdpbssds xmm14, xmm5, [rax+rbx*2]
38+
39+
vpdpbssds ymm1, ymm2, ymm0
40+
vpdpbssds ymm2, ymm3, [rax]
41+
vpdpbssds ymm3, ymm14, [rax+0x12]
42+
vpdpbssds ymm14, ymm5, [rax+rbx*2]
43+
44+
vpdpbuud xmm1, xmm2, xmm0
45+
vpdpbuud xmm2, xmm3, [rax]
46+
vpdpbuud xmm3, xmm14, [rax+0x12]
47+
vpdpbuud xmm14, xmm5, [rax+rbx*2]
48+
49+
vpdpbuud ymm1, ymm2, ymm0
50+
vpdpbuud ymm2, ymm3, [rax]
51+
vpdpbuud ymm3, ymm14, [rax+0x12]
52+
vpdpbuud ymm14, ymm5, [rax+rbx*2]
53+
54+
vpdpbuuds xmm1, xmm2, xmm0
55+
vpdpbuuds xmm2, xmm3, [rax]
56+
vpdpbuuds xmm3, xmm14, [rax+0x12]
57+
vpdpbuuds xmm14, xmm5, [rax+rbx*2]
58+
59+
vpdpbuuds ymm1, ymm2, ymm0
60+
vpdpbuuds ymm2, ymm3, [rax]
61+
vpdpbuuds ymm3, ymm14, [rax+0x12]
62+
vpdpbuuds ymm14, ymm5, [rax+rbx*2]
63+
64+

test/avx-vnni-int8.asm

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
BITS 32
2+
cpu latevex
3+
4+
vpdpbsud xmm1, xmm2, xmm0
5+
vpdpbsud xmm2, xmm3, [eax]
6+
vpdpbsud xmm3, xmm4, [eax+0x12]
7+
vpdpbsud xmm4, xmm5, [eax+ebx*2]
8+
9+
vpdpbsud ymm1, ymm2, ymm0
10+
vpdpbsud ymm2, ymm3, [eax]
11+
vpdpbsud ymm3, ymm4, [eax+0x12]
12+
vpdpbsud ymm4, ymm5, [eax+ebx*2]
13+
14+
vpdpbsuds xmm1, xmm2, xmm0
15+
vpdpbsuds xmm2, xmm3, [eax]
16+
vpdpbsuds xmm3, xmm4, [eax+0x12]
17+
vpdpbsuds xmm4, xmm5, [eax+ebx*2]
18+
19+
vpdpbsuds ymm1, ymm2, ymm0
20+
vpdpbsuds ymm2, ymm3, [eax]
21+
vpdpbsuds ymm3, ymm4, [eax+0x12]
22+
vpdpbsuds ymm4, ymm5, [eax+ebx*2]
23+
24+
vpdpbssd xmm1, xmm2, xmm0
25+
vpdpbssd xmm2, xmm3, [eax]
26+
vpdpbssd xmm3, xmm4, [eax+0x12]
27+
vpdpbssd xmm4, xmm5, [eax+ebx*2]
28+
29+
vpdpbssd ymm1, ymm2, ymm0
30+
vpdpbssd ymm2, ymm3, [eax]
31+
vpdpbssd ymm3, ymm4, [eax+0x12]
32+
vpdpbssd ymm4, ymm5, [eax+ebx*2]
33+
34+
vpdpbssds xmm1, xmm2, xmm0
35+
vpdpbssds xmm2, xmm3, [eax]
36+
vpdpbssds xmm3, xmm4, [eax+0x12]
37+
vpdpbssds xmm4, xmm5, [eax+ebx*2]
38+
39+
vpdpbssds ymm1, ymm2, ymm0
40+
vpdpbssds ymm2, ymm3, [eax]
41+
vpdpbssds ymm3, ymm4, [eax+0x12]
42+
vpdpbssds ymm4, ymm5, [eax+ebx*2]
43+
44+
vpdpbuud xmm1, xmm2, xmm0
45+
vpdpbuud xmm2, xmm3, [eax]
46+
vpdpbuud xmm3, xmm4, [eax+0x12]
47+
vpdpbuud xmm4, xmm5, [eax+ebx*2]
48+
49+
vpdpbuud ymm1, ymm2, ymm0
50+
vpdpbuud ymm2, ymm3, [eax]
51+
vpdpbuud ymm3, ymm4, [eax+0x12]
52+
vpdpbuud ymm4, ymm5, [eax+ebx*2]
53+
54+
vpdpbuuds xmm1, xmm2, xmm0
55+
vpdpbuuds xmm2, xmm3, [eax]
56+
vpdpbuuds xmm3, xmm4, [eax+0x12]
57+
vpdpbuuds xmm4, xmm5, [eax+ebx*2]
58+
59+
vpdpbuuds ymm1, ymm2, ymm0
60+
vpdpbuuds ymm2, ymm3, [eax]
61+
vpdpbuuds ymm3, ymm4, [eax+0x12]
62+
vpdpbuuds ymm4, ymm5, [eax+ebx*2]
63+

test/avx-vnni.asm

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
BITS 32
2+
cpu latevex
3+
4+
vpdpbusd xmm1, xmm2, xmm0
5+
vpdpbusd xmm2, xmm3, [eax]
6+
vpdpbusd xmm3, xmm4, [eax+0x12]
7+
vpdpbusd xmm4, xmm5, [eax+ebx*2]
8+
9+
vpdpbusd ymm1, ymm2, ymm0
10+
vpdpbusd ymm2, ymm3, [eax]
11+
vpdpbusd ymm3, ymm4, [eax+0x12]
12+
vpdpbusd ymm4, ymm5, [eax+ebx*2]
13+
14+
vpdpbusds xmm1, xmm2, xmm0
15+
vpdpbusds xmm2, xmm3, [eax]
16+
vpdpbusds xmm3, xmm4, [eax+0x12]
17+
vpdpbusds xmm4, xmm5, [eax+ebx*2]
18+
19+
vpdpbusds ymm1, ymm2, ymm0
20+
vpdpbusds ymm2, ymm3, [eax]
21+
vpdpbusds ymm3, ymm4, [eax+0x12]
22+
vpdpbusds ymm4, ymm5, [eax+ebx*2]
23+
24+
vpdpwssd xmm1, xmm2, xmm0
25+
vpdpwssd xmm2, xmm3, [eax]
26+
vpdpwssd xmm3, xmm4, [eax+0x12]
27+
vpdpwssd xmm4, xmm5, [eax+ebx*2]
28+
29+
vpdpwssd ymm1, ymm2, ymm0
30+
vpdpwssd ymm2, ymm3, [eax]
31+
vpdpwssd ymm3, ymm4, [eax+0x12]
32+
vpdpwssd ymm4, ymm5, [eax+ebx*2]
33+
34+
vpdpwssds xmm1, xmm2, xmm0
35+
vpdpwssds xmm2, xmm3, [eax]
36+
vpdpwssds xmm3, xmm4, [eax+0x12]
37+
vpdpwssds xmm4, xmm5, [eax+ebx*2]
38+
39+
vpdpwssds ymm1, ymm2, ymm0
40+
vpdpwssds ymm2, ymm3, [eax]
41+
vpdpwssds ymm3, ymm4, [eax+0x12]
42+
vpdpwssds ymm4, ymm5, [eax+ebx*2]

test/avx512vp2intersect-64.asm

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
BITS 64
2+
vp2intersectd k0, xmm1, xmm2
3+
vp2intersectd k0, ymm1, ymm2
4+
vp2intersectd k0, zmm1, zmm2
5+
6+
vp2intersectq k0, xmm1, xmm2
7+
vp2intersectq k0, ymm1, ymm2
8+
vp2intersectq k0, zmm1, zmm2
9+
10+
vp2intersectd k1, xmm1, xmm2
11+
vp2intersectd k1, ymm1, ymm2
12+
vp2intersectd k1, zmm1, zmm2
13+
14+
vp2intersectq k1, xmm1, xmm2
15+
vp2intersectq k1, ymm1, ymm2
16+
vp2intersectq k1, zmm1, zmm2
17+
18+
vp2intersectd k0, xmm1, [rax]
19+
vp2intersectd k0, ymm1, [rcx+1]
20+
vp2intersectd k0, zmm1, [2*rdx+64]
21+
22+
vp2intersectq k0, xmm1, [rax]
23+
vp2intersectq k0, ymm1, [rcx+1]
24+
vp2intersectq k0, zmm1, [2*rdx+64]
25+
26+
vp2intersectd k1, xmm1, [rax]
27+
vp2intersectd k1, ymm1, [rcx+1]
28+
vp2intersectd k1, zmm1, [2*rdx+64]
29+
30+
vp2intersectq k1, xmm1, [rax]
31+
vp2intersectq k1, ymm1, [rcx+1]
32+
vp2intersectq k1, zmm1, [2*rdx+64]
33+
34+
vp2intersectd k0, xmm1, [rax]{1to4}
35+
vp2intersectd k0, ymm1, [rcx+1]{1to8}
36+
vp2intersectd k0, zmm1, [2*rdx+4]{1to16}
37+
38+
vp2intersectq k0, xmm1, [rax]{1to2}
39+
vp2intersectq k0, ymm1, [rcx+1]{1to4}
40+
vp2intersectq k0, zmm1, [2*rdx+8]{1to8}
41+
42+
vp2intersectd k1, xmm1, [rax]{1to4}
43+
vp2intersectd k1, ymm1, [rcx+1]{1to8}
44+
vp2intersectd k1, zmm1, [2*rdx+4]{1to16}
45+
46+
vp2intersectq k1, xmm1, [rax]{1to2}
47+
vp2intersectq k1, ymm1, [rcx+1]{1to4}
48+
vp2intersectq k1, zmm1, [2*rdx+8]{1to8}

0 commit comments

Comments
 (0)