|
1 | 1 | ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
2 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s |
3 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s |
4 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s |
5 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s |
6 | | -; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefix=GFX942-VGPR %s |
| 2 | +; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s |
| 3 | +; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s |
| 4 | +; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s |
| 5 | +; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s |
| 6 | +; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefix=GFX942-VGPR %s |
7 | 7 |
|
8 | 8 | declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) |
9 | 9 | declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32) |
@@ -3186,13 +3186,14 @@ define amdgpu_kernel void @test_mfma_i32_16x16x4i8_splatimm_src2_64(ptr addrspac |
3186 | 3186 | ; |
3187 | 3187 | ; GFX942-VGPR-LABEL: test_mfma_i32_16x16x4i8_splatimm_src2_64: |
3188 | 3188 | ; GFX942-VGPR: ; %bb.0: ; %bb |
3189 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1 |
3190 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2 |
| 3189 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 1 |
| 3190 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2 |
3191 | 3191 | ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 3192 | +; GFX942-VGPR-NEXT: s_nop 0 |
| 3193 | +; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v16, v17, 64 cbsz:1 abid:2 blgp:3 |
3192 | 3194 | ; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 |
3193 | | -; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, 64 cbsz:1 abid:2 blgp:3 |
3194 | 3195 | ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) |
3195 | | -; GFX942-VGPR-NEXT: s_nop 9 |
| 3196 | +; GFX942-VGPR-NEXT: s_nop 8 |
3196 | 3197 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 |
3197 | 3198 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 |
3198 | 3199 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 |
@@ -4538,13 +4539,14 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) % |
4538 | 4539 | ; |
4539 | 4540 | ; GFX942-VGPR-LABEL: test_mfma_f32_16x16x1f32_imm_splat: |
4540 | 4541 | ; GFX942-VGPR: ; %bb.0: ; %bb |
4541 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 |
4542 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0 |
| 4542 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 1.0 |
| 4543 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2.0 |
4543 | 4544 | ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 4545 | +; GFX942-VGPR-NEXT: s_nop 0 |
| 4546 | +; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v16, v17, 1.0 |
4544 | 4547 | ; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 |
4545 | | -; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, 1.0 |
4546 | 4548 | ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) |
4547 | | -; GFX942-VGPR-NEXT: s_nop 8 |
| 4549 | +; GFX942-VGPR-NEXT: s_nop 7 |
4548 | 4550 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 |
4549 | 4551 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 |
4550 | 4552 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 |
@@ -4689,15 +4691,16 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) % |
4689 | 4691 | ; |
4690 | 4692 | ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8f16_imm_splat: |
4691 | 4693 | ; GFX942-VGPR: ; %bb.0: ; %bb |
4692 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0x3c003c00 |
4693 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0 |
4694 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0x40004000 |
4695 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v2 |
| 4694 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0x3c003c00 |
| 4695 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, v16 |
| 4696 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 0x40004000 |
| 4697 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v18 |
4696 | 4698 | ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 4699 | +; GFX942-VGPR-NEXT: s_nop 0 |
| 4700 | +; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[16:17], v[18:19], 1.0 |
4697 | 4701 | ; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0 |
4698 | | -; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], 1.0 |
4699 | 4702 | ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) |
4700 | | -; GFX942-VGPR-NEXT: s_nop 9 |
| 4703 | +; GFX942-VGPR-NEXT: s_nop 8 |
4701 | 4704 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48 |
4702 | 4705 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32 |
4703 | 4706 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16 |
@@ -4908,14 +4911,14 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) % |
4908 | 4911 | ; |
4909 | 4912 | ; GFX942-VGPR-LABEL: test_mfma_f32_32x32x1f32_imm_splat: |
4910 | 4913 | ; GFX942-VGPR: ; %bb.0: ; %bb |
4911 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0 |
4912 | | -; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0 |
| 4914 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 1.0 |
| 4915 | +; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 2.0 |
4913 | 4916 | ; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 |
| 4917 | +; GFX942-VGPR-NEXT: s_nop 0 |
| 4918 | +; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v32, v33, 0 |
4914 | 4919 | ; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 0 |
4915 | | -; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 0 |
4916 | 4920 | ; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0) |
4917 | 4921 | ; GFX942-VGPR-NEXT: s_nop 15 |
4918 | | -; GFX942-VGPR-NEXT: s_nop 0 |
4919 | 4922 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112 |
4920 | 4923 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96 |
4921 | 4924 | ; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80 |
|
0 commit comments