Skip to content

Commit b4b57ad

Browse files
AbhayKanherearsenmshiltian
authored
[AMDGPU][MachineVerifier] test failures in SIFoldOperands (#166600)
After PR:#151421 merged following fails in SIFoldOperands showed up. LLVM :: CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll LLVM :: CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll LLVM :: CodeGen/AMDGPU/llvm.amdgcn.mfma.ll LLVM :: CodeGen/AMDGPU/mfma-loop.ll LLVM :: CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll In Folding code, if folded operand is register ensure earlyClobber is set. --------- Co-authored-by: Matt Arsenault <arsenm2@gmail.com> Co-authored-by: Shilei Tian <i@tianshilei.me>
1 parent de4aa9c commit b4b57ad

File tree

6 files changed

+39
-32
lines changed

6 files changed

+39
-32
lines changed

llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,10 @@ bool SIFoldOperandsImpl::updateOperand(FoldCandidate &Fold) const {
681681
return false;
682682
MI->setDesc(TII->get(NewMFMAOpc));
683683
MI->untieRegOperand(0);
684+
const MCInstrDesc &MCID = MI->getDesc();
685+
for (unsigned I = 0; I < MI->getNumDefs(); ++I)
686+
if (MCID.getOperandConstraint(I, MCOI::EARLY_CLOBBER) != -1)
687+
MI->getOperand(I).setIsEarlyClobber(true);
684688
}
685689

686690
// TODO: Should we try to avoid adding this to the candidate list?

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN %s
2+
; RUN: llc -verify-machineinstrs -global-isel -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck --check-prefixes=GCN %s
33

44
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
55
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx90a.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX90A-VGPR %s
5-
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX942-VGPR %s
2+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A %s
3+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942 %s
4+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX90A-VGPR %s
5+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefixes=VGPR,GFX942-VGPR %s
66

77
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x4bf16.1k(<4 x i16>, <4 x i16>, <32 x float>, i32, i32, i32)
88
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x4bf16.1k(<4 x i16>, <4 x i16>, <16 x float>, i32, i32, i32)

llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.ll

Lines changed: 26 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s
5-
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s
6-
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefix=GFX942-VGPR %s
2+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,NOLIT-SRCC,GFX908,GFX908_A %s
3+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 -mattr=-mfma-inline-literal-bug < %s | FileCheck -enable-var-scope --check-prefixes=GCN,LIT-SRCC,GFX908,GFX908_A %s
4+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX90A,GFX908_A,GFX90A_42 %s
5+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GFX942,GFX90A_42 %s
6+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck -enable-var-scope --check-prefix=GFX942-VGPR %s
77

88
declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32)
99
declare <16 x float> @llvm.amdgcn.mfma.f32.16x16x1f32(float, float, <16 x float>, i32, i32, i32)
@@ -3186,13 +3186,14 @@ define amdgpu_kernel void @test_mfma_i32_16x16x4i8_splatimm_src2_64(ptr addrspac
31863186
;
31873187
; GFX942-VGPR-LABEL: test_mfma_i32_16x16x4i8_splatimm_src2_64:
31883188
; GFX942-VGPR: ; %bb.0: ; %bb
3189-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1
3190-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2
3189+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 1
3190+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2
31913191
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
3192+
; GFX942-VGPR-NEXT: s_nop 0
3193+
; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v16, v17, 64 cbsz:1 abid:2 blgp:3
31923194
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
3193-
; GFX942-VGPR-NEXT: v_mfma_i32_16x16x4_4b_i8 v[0:15], v0, v1, 64 cbsz:1 abid:2 blgp:3
31943195
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
3195-
; GFX942-VGPR-NEXT: s_nop 9
3196+
; GFX942-VGPR-NEXT: s_nop 8
31963197
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
31973198
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
31983199
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4538,13 +4539,14 @@ define amdgpu_kernel void @test_mfma_f32_16x16x1f32_imm_splat(ptr addrspace(1) %
45384539
;
45394540
; GFX942-VGPR-LABEL: test_mfma_f32_16x16x1f32_imm_splat:
45404541
; GFX942-VGPR: ; %bb.0: ; %bb
4541-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0
4542-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0
4542+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 1.0
4543+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, 2.0
45434544
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
4545+
; GFX942-VGPR-NEXT: s_nop 0
4546+
; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v16, v17, 1.0
45444547
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
4545-
; GFX942-VGPR-NEXT: v_mfma_f32_16x16x1_4b_f32 v[0:15], v0, v1, 1.0
45464548
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
4547-
; GFX942-VGPR-NEXT: s_nop 8
4549+
; GFX942-VGPR-NEXT: s_nop 7
45484550
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
45494551
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
45504552
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4689,15 +4691,16 @@ define amdgpu_kernel void @test_mfma_f32_32x32x8f16_imm_splat(ptr addrspace(1) %
46894691
;
46904692
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x8f16_imm_splat:
46914693
; GFX942-VGPR: ; %bb.0: ; %bb
4692-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 0x3c003c00
4693-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, v0
4694-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v2, 0x40004000
4695-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v3, v2
4694+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0x3c003c00
4695+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v17, v16
4696+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v18, 0x40004000
4697+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v19, v18
46964698
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
4699+
; GFX942-VGPR-NEXT: s_nop 0
4700+
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[16:17], v[18:19], 1.0
46974701
; GFX942-VGPR-NEXT: v_mov_b32_e32 v16, 0
4698-
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x8_f16 v[0:15], v[0:1], v[2:3], 1.0
46994702
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
4700-
; GFX942-VGPR-NEXT: s_nop 9
4703+
; GFX942-VGPR-NEXT: s_nop 8
47014704
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
47024705
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
47034706
; GFX942-VGPR-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
@@ -4908,14 +4911,14 @@ define amdgpu_kernel void @test_mfma_f32_32x32x1f32_imm_splat(ptr addrspace(1) %
49084911
;
49094912
; GFX942-VGPR-LABEL: test_mfma_f32_32x32x1f32_imm_splat:
49104913
; GFX942-VGPR: ; %bb.0: ; %bb
4911-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v0, 1.0
4912-
; GFX942-VGPR-NEXT: v_mov_b32_e32 v1, 2.0
4914+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 1.0
4915+
; GFX942-VGPR-NEXT: v_mov_b32_e32 v33, 2.0
49134916
; GFX942-VGPR-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
4917+
; GFX942-VGPR-NEXT: s_nop 0
4918+
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v32, v33, 0
49144919
; GFX942-VGPR-NEXT: v_mov_b32_e32 v32, 0
4915-
; GFX942-VGPR-NEXT: v_mfma_f32_32x32x1_2b_f32 v[0:31], v0, v1, 0
49164920
; GFX942-VGPR-NEXT: s_waitcnt lgkmcnt(0)
49174921
; GFX942-VGPR-NEXT: s_nop 15
4918-
; GFX942-VGPR-NEXT: s_nop 0
49194922
; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[28:31], s[0:1] offset:112
49204923
; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[24:27], s[0:1] offset:96
49214924
; GFX942-VGPR-NEXT: global_store_dwordx4 v32, v[20:23], s[0:1] offset:80

llvm/test/CodeGen/AMDGPU/mfma-loop.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s
3-
; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
4-
; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
2+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck -enable-var-scope -check-prefixes=GFX908 %s
3+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck -enable-var-scope -check-prefixes=GFX90A %s
4+
; RUN: llc -verify-machineinstrs -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck -enable-var-scope -check-prefixes=GFX942 %s
55

66

77
; Check that we do not copy agprs to vgprs and back inside the loop.

llvm/test/CodeGen/AMDGPU/rewrite-vgpr-mfma-to-agpr.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2-
; RUN: llc -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck %s
2+
; RUN: llc -verify-machineinstrs -mcpu=gfx942 -amdgpu-mfma-vgpr-form < %s | FileCheck %s
33

44
target triple = "amdgcn-amd-amdhsa"
55

0 commit comments

Comments
 (0)