|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
2 | | -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s |
3 | | -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s |
4 | | -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s |
5 | | -; RUN: llc -mtriple=amdgcn-amd-hsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s |
6 | | -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s |
7 | | -; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s |
| 2 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s |
| 3 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s |
| 4 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-SDAG %s |
| 5 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-sgprs -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9ARCH,GFX9ARCH-GISEL %s |
| 6 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-SDAG %s |
| 7 | +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12,GFX12-GISEL %s |
8 | 8 |
|
9 | 9 | define amdgpu_kernel void @workgroup_ids_kernel() { |
10 | 10 | ; GFX9-LABEL: workgroup_ids_kernel: |
11 | 11 | ; GFX9: ; %bb.0: ; %.entry |
12 | | -; GFX9-NEXT: v_mov_b32_e32 v0, s0 |
13 | | -; GFX9-NEXT: v_mov_b32_e32 v1, s1 |
14 | | -; GFX9-NEXT: v_mov_b32_e32 v2, s2 |
| 12 | +; GFX9-NEXT: v_mov_b32_e32 v0, s4 |
| 13 | +; GFX9-NEXT: v_mov_b32_e32 v1, s5 |
| 14 | +; GFX9-NEXT: v_mov_b32_e32 v2, s6 |
15 | 15 | ; GFX9-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 |
16 | 16 | ; GFX9-NEXT: s_endpgm |
17 | 17 | ; |
@@ -72,137 +72,103 @@ define amdgpu_kernel void @workgroup_ids_kernel() { |
72 | 72 | define amdgpu_kernel void @caller() { |
73 | 73 | ; GFX9-SDAG-LABEL: caller: |
74 | 74 | ; GFX9-SDAG: ; %bb.0: |
75 | | -; GFX9-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 |
76 | | -; GFX9-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 |
77 | | -; GFX9-SDAG-NEXT: s_mov_b32 s38, -1 |
78 | | -; GFX9-SDAG-NEXT: s_mov_b32 s39, 0xe00000 |
79 | | -; GFX9-SDAG-NEXT: s_add_u32 s36, s36, s7 |
80 | | -; GFX9-SDAG-NEXT: s_addc_u32 s37, s37, 0 |
81 | | -; GFX9-SDAG-NEXT: s_add_u32 s8, s2, 36 |
82 | | -; GFX9-SDAG-NEXT: s_addc_u32 s9, s3, 0 |
83 | | -; GFX9-SDAG-NEXT: s_getpc_b64 s[2:3] |
84 | | -; GFX9-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4 |
85 | | -; GFX9-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12 |
86 | | -; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[2:3], 0x0 |
87 | | -; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] |
| 75 | +; GFX9-SDAG-NEXT: s_add_u32 flat_scratch_lo, s10, s13 |
| 76 | +; GFX9-SDAG-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 |
| 77 | +; GFX9-SDAG-NEXT: s_add_u32 s0, s0, s13 |
| 78 | +; GFX9-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| 79 | +; GFX9-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] |
| 80 | +; GFX9-SDAG-NEXT: s_getpc_b64 s[8:9] |
| 81 | +; GFX9-SDAG-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 |
| 82 | +; GFX9-SDAG-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 |
| 83 | +; GFX9-SDAG-NEXT: s_load_dwordx2 s[14:15], s[8:9], 0x0 |
88 | 84 | ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
89 | 85 | ; GFX9-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
90 | | -; GFX9-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] |
91 | | -; GFX9-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] |
92 | 86 | ; GFX9-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 |
93 | | -; GFX9-SDAG-NEXT: s_mov_b32 s12, s6 |
94 | | -; GFX9-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] |
95 | | -; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s6 |
| 87 | +; GFX9-SDAG-NEXT: s_mov_b64 s[8:9], s[6:7] |
| 88 | +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s12 |
96 | 89 | ; GFX9-SDAG-NEXT: s_mov_b32 s32, 0 |
97 | 90 | ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
98 | 91 | ; GFX9-SDAG-NEXT: s_swappc_b64 s[30:31], s[14:15] |
99 | 92 | ; GFX9-SDAG-NEXT: s_endpgm |
100 | 93 | ; |
101 | 94 | ; GFX9-GISEL-LABEL: caller: |
102 | 95 | ; GFX9-GISEL: ; %bb.0: |
103 | | -; GFX9-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 |
104 | | -; GFX9-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 |
105 | | -; GFX9-GISEL-NEXT: s_mov_b32 s38, -1 |
106 | | -; GFX9-GISEL-NEXT: s_mov_b32 s39, 0xe00000 |
107 | | -; GFX9-GISEL-NEXT: s_add_u32 s36, s36, s7 |
108 | | -; GFX9-GISEL-NEXT: s_addc_u32 s37, s37, 0 |
109 | | -; GFX9-GISEL-NEXT: s_add_u32 s8, s2, 36 |
110 | | -; GFX9-GISEL-NEXT: s_addc_u32 s9, s3, 0 |
111 | | -; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] |
112 | | -; GFX9-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] |
113 | | -; GFX9-GISEL-NEXT: s_getpc_b64 s[0:1] |
114 | | -; GFX9-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 |
115 | | -; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 |
116 | | -; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[0:1], 0x0 |
| 96 | +; GFX9-GISEL-NEXT: s_add_u32 flat_scratch_lo, s10, s13 |
| 97 | +; GFX9-GISEL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 |
| 98 | +; GFX9-GISEL-NEXT: s_add_u32 s0, s0, s13 |
| 99 | +; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0 |
| 100 | +; GFX9-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] |
| 101 | +; GFX9-GISEL-NEXT: s_getpc_b64 s[8:9] |
| 102 | +; GFX9-GISEL-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 |
| 103 | +; GFX9-GISEL-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 |
| 104 | +; GFX9-GISEL-NEXT: s_load_dwordx2 s[14:15], s[8:9], 0x0 |
117 | 105 | ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
118 | 106 | ; GFX9-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
119 | | -; GFX9-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] |
120 | 107 | ; GFX9-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 |
121 | | -; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s6 |
122 | | -; GFX9-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] |
123 | | -; GFX9-GISEL-NEXT: s_mov_b32 s12, s6 |
| 108 | +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s12 |
| 109 | +; GFX9-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] |
124 | 110 | ; GFX9-GISEL-NEXT: s_mov_b32 s32, 0 |
125 | 111 | ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
126 | 112 | ; GFX9-GISEL-NEXT: s_swappc_b64 s[30:31], s[14:15] |
127 | 113 | ; GFX9-GISEL-NEXT: s_endpgm |
128 | 114 | ; |
129 | 115 | ; GFX9ARCH-SDAG-LABEL: caller: |
130 | 116 | ; GFX9ARCH-SDAG: ; %bb.0: |
131 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 |
132 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 |
133 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s38, -1 |
134 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b32 s39, 0xe00000 |
135 | | -; GFX9ARCH-SDAG-NEXT: s_add_u32 s36, s36, s6 |
136 | | -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s37, s37, 0 |
137 | | -; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s2, 36 |
138 | | -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s3, 0 |
139 | | -; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[2:3] |
140 | | -; GFX9ARCH-SDAG-NEXT: s_add_u32 s2, s2, callee@gotpcrel32@lo+4 |
141 | | -; GFX9ARCH-SDAG-NEXT: s_addc_u32 s3, s3, callee@gotpcrel32@hi+12 |
142 | | -; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[6:7], s[2:3], 0x0 |
143 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] |
| 117 | +; GFX9ARCH-SDAG-NEXT: s_add_u32 flat_scratch_lo, s10, s12 |
| 118 | +; GFX9ARCH-SDAG-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 |
| 119 | +; GFX9ARCH-SDAG-NEXT: s_add_u32 s0, s0, s12 |
| 120 | +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s1, s1, 0 |
| 121 | +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[10:11], s[8:9] |
| 122 | +; GFX9ARCH-SDAG-NEXT: s_getpc_b64 s[8:9] |
| 123 | +; GFX9ARCH-SDAG-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 |
| 124 | +; GFX9ARCH-SDAG-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 |
| 125 | +; GFX9ARCH-SDAG-NEXT: s_load_dwordx2 s[12:13], s[8:9], 0x0 |
144 | 126 | ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
145 | 127 | ; GFX9ARCH-SDAG-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
146 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] |
147 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[0:1], s[36:37] |
148 | 128 | ; GFX9ARCH-SDAG-NEXT: v_or3_b32 v31, v0, v1, v2 |
149 | | -; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[2:3], s[38:39] |
| 129 | +; GFX9ARCH-SDAG-NEXT: s_mov_b64 s[8:9], s[6:7] |
150 | 130 | ; GFX9ARCH-SDAG-NEXT: v_mov_b32_e32 v0, ttmp9 |
151 | 131 | ; GFX9ARCH-SDAG-NEXT: s_mov_b32 s32, 0 |
152 | 132 | ; GFX9ARCH-SDAG-NEXT: s_waitcnt lgkmcnt(0) |
153 | | -; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7] |
| 133 | +; GFX9ARCH-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13] |
154 | 134 | ; GFX9ARCH-SDAG-NEXT: s_endpgm |
155 | 135 | ; |
156 | 136 | ; GFX9ARCH-GISEL-LABEL: caller: |
157 | 137 | ; GFX9ARCH-GISEL: ; %bb.0: |
158 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 |
159 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 |
160 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s38, -1 |
161 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b32 s39, 0xe00000 |
162 | | -; GFX9ARCH-GISEL-NEXT: s_add_u32 s36, s36, s6 |
163 | | -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s37, s37, 0 |
164 | | -; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s2, 36 |
165 | | -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s3, 0 |
166 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] |
167 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] |
168 | | -; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[0:1] |
169 | | -; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, callee@gotpcrel32@lo+4 |
170 | | -; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, callee@gotpcrel32@hi+12 |
171 | | -; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[6:7], s[0:1], 0x0 |
| 138 | +; GFX9ARCH-GISEL-NEXT: s_add_u32 flat_scratch_lo, s10, s12 |
| 139 | +; GFX9ARCH-GISEL-NEXT: s_addc_u32 flat_scratch_hi, s11, 0 |
| 140 | +; GFX9ARCH-GISEL-NEXT: s_add_u32 s0, s0, s12 |
| 141 | +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s1, s1, 0 |
| 142 | +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[10:11], s[8:9] |
| 143 | +; GFX9ARCH-GISEL-NEXT: s_getpc_b64 s[8:9] |
| 144 | +; GFX9ARCH-GISEL-NEXT: s_add_u32 s8, s8, callee@gotpcrel32@lo+4 |
| 145 | +; GFX9ARCH-GISEL-NEXT: s_addc_u32 s9, s9, callee@gotpcrel32@hi+12 |
| 146 | +; GFX9ARCH-GISEL-NEXT: s_load_dwordx2 s[12:13], s[8:9], 0x0 |
172 | 147 | ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v1, 10, v1 |
173 | 148 | ; GFX9ARCH-GISEL-NEXT: v_lshlrev_b32_e32 v2, 20, v2 |
174 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[0:1], s[36:37] |
175 | 149 | ; GFX9ARCH-GISEL-NEXT: v_or3_b32 v31, v0, v1, v2 |
176 | 150 | ; GFX9ARCH-GISEL-NEXT: v_mov_b32_e32 v0, ttmp9 |
177 | | -; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[2:3], s[38:39] |
| 151 | +; GFX9ARCH-GISEL-NEXT: s_mov_b64 s[8:9], s[6:7] |
178 | 152 | ; GFX9ARCH-GISEL-NEXT: s_mov_b32 s32, 0 |
179 | 153 | ; GFX9ARCH-GISEL-NEXT: s_waitcnt lgkmcnt(0) |
180 | | -; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] |
| 154 | +; GFX9ARCH-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13] |
181 | 155 | ; GFX9ARCH-GISEL-NEXT: s_endpgm |
182 | 156 | ; |
183 | | -; GFX12-SDAG-LABEL: caller: |
184 | | -; GFX12-SDAG: ; %bb.0: |
185 | | -; GFX12-SDAG-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 |
186 | | -; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[4:5] |
187 | | -; GFX12-SDAG-NEXT: s_mov_b32 s7, callee@abs32@hi |
188 | | -; GFX12-SDAG-NEXT: s_mov_b32 s6, callee@abs32@lo |
189 | | -; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1] |
190 | | -; GFX12-SDAG-NEXT: s_mov_b64 s[8:9], s[2:3] |
191 | | -; GFX12-SDAG-NEXT: s_mov_b32 s32, 0 |
192 | | -; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[6:7] |
193 | | -; GFX12-SDAG-NEXT: s_endpgm |
194 | | -; |
195 | | -; GFX12-GISEL-LABEL: caller: |
196 | | -; GFX12-GISEL: ; %bb.0: |
197 | | -; GFX12-GISEL-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 |
198 | | -; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[4:5] |
199 | | -; GFX12-GISEL-NEXT: s_mov_b32 s6, callee@abs32@lo |
200 | | -; GFX12-GISEL-NEXT: s_mov_b32 s7, callee@abs32@hi |
201 | | -; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1] |
202 | | -; GFX12-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] |
203 | | -; GFX12-GISEL-NEXT: s_mov_b32 s32, 0 |
204 | | -; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7] |
205 | | -; GFX12-GISEL-NEXT: s_endpgm |
| 157 | +; GFX12-LABEL: caller: |
| 158 | +; GFX12: ; %bb.0: |
| 159 | +; GFX12-NEXT: s_mov_b64 s[10:11], s[4:5] |
| 160 | +; GFX12-NEXT: s_getpc_b64 s[4:5] |
| 161 | +; GFX12-NEXT: s_sext_i32_i16 s5, s5 |
| 162 | +; GFX12-NEXT: s_add_co_u32 s4, s4, callee@gotpcrel32@lo+8 |
| 163 | +; GFX12-NEXT: s_add_co_ci_u32 s5, s5, callee@gotpcrel32@hi+16 |
| 164 | +; GFX12-NEXT: v_dual_mov_b32 v31, v0 :: v_dual_mov_b32 v0, ttmp9 |
| 165 | +; GFX12-NEXT: s_load_b64 s[6:7], s[4:5], 0x0 |
| 166 | +; GFX12-NEXT: s_mov_b64 s[4:5], s[0:1] |
| 167 | +; GFX12-NEXT: s_mov_b64 s[8:9], s[2:3] |
| 168 | +; GFX12-NEXT: s_mov_b32 s32, 0 |
| 169 | +; GFX12-NEXT: s_wait_kmcnt 0x0 |
| 170 | +; GFX12-NEXT: s_swappc_b64 s[30:31], s[6:7] |
| 171 | +; GFX12-NEXT: s_endpgm |
206 | 172 | %idx = call i32 @llvm.amdgcn.workgroup.id.x() |
207 | 173 | call void @callee(i32 %idx) #0 |
208 | 174 | ret void |
|
0 commit comments