@@ -1459,8 +1459,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
14591459; SI-NEXT: s_mov_b32 s3, 0xf000
14601460; SI-NEXT: s_mov_b32 s2, -1
14611461; SI-NEXT: s_waitcnt lgkmcnt(0)
1462- ; SI-NEXT: s_mul_i32 s6 , s6, s7
1463- ; SI-NEXT: s_and_b32 s4, s6 , 1
1462+ ; SI-NEXT: s_and_b32 s4 , s6, s7
1463+ ; SI-NEXT: s_and_b32 s4, s4 , 1
14641464; SI-NEXT: v_mov_b32_e32 v0, s4
14651465; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
14661466; SI-NEXT: s_endpgm
@@ -1473,8 +1473,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
14731473; VI-NEXT: s_mov_b32 s3, 0xf000
14741474; VI-NEXT: s_mov_b32 s2, -1
14751475; VI-NEXT: s_waitcnt lgkmcnt(0)
1476- ; VI-NEXT: s_mul_i32 s6 , s6, s7
1477- ; VI-NEXT: s_and_b32 s4, s6 , 1
1476+ ; VI-NEXT: s_and_b32 s4 , s6, s7
1477+ ; VI-NEXT: s_and_b32 s4, s4 , 1
14781478; VI-NEXT: v_mov_b32_e32 v0, s4
14791479; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0
14801480; VI-NEXT: s_endpgm
@@ -1487,8 +1487,8 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
14871487; GFX9-NEXT: s_mov_b32 s3, 0xf000
14881488; GFX9-NEXT: s_mov_b32 s2, -1
14891489; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1490- ; GFX9-NEXT: s_mul_i32 s6 , s6, s7
1491- ; GFX9-NEXT: s_and_b32 s4, s6 , 1
1490+ ; GFX9-NEXT: s_and_b32 s4 , s6, s7
1491+ ; GFX9-NEXT: s_and_b32 s4, s4 , 1
14921492; GFX9-NEXT: v_mov_b32_e32 v0, s4
14931493; GFX9-NEXT: buffer_store_byte v0, off, s[0:3], 0
14941494; GFX9-NEXT: s_endpgm
@@ -1500,7 +1500,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15001500; GFX10-NEXT: s_load_dword s3, s[4:5], 0x70
15011501; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
15021502; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1503- ; GFX10-NEXT: s_mul_i32 s2, s2, s3
1503+ ; GFX10-NEXT: s_and_b32 s2, s2, s3
15041504; GFX10-NEXT: s_mov_b32 s3, 0x31016000
15051505; GFX10-NEXT: s_and_b32 s2, s2, 1
15061506; GFX10-NEXT: v_mov_b32_e32 v0, s2
@@ -1515,7 +1515,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15151515; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x70
15161516; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
15171517; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1518- ; GFX11-NEXT: s_mul_i32 s2, s2, s3
1518+ ; GFX11-NEXT: s_and_b32 s2, s2, s3
15191519; GFX11-NEXT: s_mov_b32 s3, 0x31016000
15201520; GFX11-NEXT: s_and_b32 s2, s2, 1
15211521; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -1531,7 +1531,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15311531; GFX12-NEXT: s_load_b32 s3, s[4:5], 0x70
15321532; GFX12-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
15331533; GFX12-NEXT: s_wait_kmcnt 0x0
1534- ; GFX12-NEXT: s_mul_i32 s2, s2, s3
1534+ ; GFX12-NEXT: s_and_b32 s2, s2, s3
15351535; GFX12-NEXT: s_mov_b32 s3, 0x31016000
15361536; GFX12-NEXT: s_and_b32 s2, s2, 1
15371537; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
@@ -1555,7 +1555,7 @@ define amdgpu_kernel void @s_mul_i1(ptr addrspace(1) %out, [8 x i32], i1 %a, [8
15551555; EG-NEXT: MOV * T0.X, 0.0,
15561556; EG-NEXT: ALU clause starting at 11:
15571557; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1558- ; EG-NEXT: MULLO_INT * T0.X , T1.X, T0.X,
1558+ ; EG-NEXT: AND_INT * T1.W , T1.X, T0.X,
15591559; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
15601560; EG-NEXT: AND_INT T1.W, PS, 1,
15611561; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
@@ -1589,7 +1589,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
15891589; SI-NEXT: s_mov_b32 s4, s0
15901590; SI-NEXT: s_mov_b32 s5, s1
15911591; SI-NEXT: s_waitcnt vmcnt(0)
1592- ; SI-NEXT: v_mul_lo_u32 v0, v0, v1
1592+ ; SI-NEXT: v_and_b32_e32 v0, v0, v1
15931593; SI-NEXT: v_and_b32_e32 v0, 1, v0
15941594; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
15951595; SI-NEXT: s_endpgm
@@ -1609,7 +1609,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16091609; VI-NEXT: s_mov_b32 s4, s0
16101610; VI-NEXT: s_mov_b32 s5, s1
16111611; VI-NEXT: s_waitcnt vmcnt(0)
1612- ; VI-NEXT: v_mul_lo_u32 v0, v0, v1
1612+ ; VI-NEXT: v_and_b32_e32 v0, v0, v1
16131613; VI-NEXT: v_and_b32_e32 v0, 1, v0
16141614; VI-NEXT: buffer_store_byte v0, off, s[4:7], 0
16151615; VI-NEXT: s_endpgm
@@ -1629,7 +1629,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16291629; GFX9-NEXT: s_mov_b32 s4, s0
16301630; GFX9-NEXT: s_mov_b32 s5, s1
16311631; GFX9-NEXT: s_waitcnt vmcnt(0)
1632- ; GFX9-NEXT: v_mul_lo_u32 v0, v0, v1
1632+ ; GFX9-NEXT: v_and_b32_e32 v0, v0, v1
16331633; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
16341634; GFX9-NEXT: buffer_store_byte v0, off, s[4:7], 0
16351635; GFX9-NEXT: s_endpgm
@@ -1650,7 +1650,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16501650; GFX10-NEXT: s_mov_b32 s4, s0
16511651; GFX10-NEXT: s_mov_b32 s5, s1
16521652; GFX10-NEXT: s_waitcnt vmcnt(0)
1653- ; GFX10-NEXT: v_mul_lo_u32 v0, v0, v1
1653+ ; GFX10-NEXT: v_and_b32_e32 v0, v0, v1
16541654; GFX10-NEXT: v_and_b32_e32 v0, 1, v0
16551655; GFX10-NEXT: buffer_store_byte v0, off, s[4:7], 0
16561656; GFX10-NEXT: s_endpgm
@@ -1671,7 +1671,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16711671; GFX11-NEXT: s_mov_b32 s4, s0
16721672; GFX11-NEXT: s_mov_b32 s5, s1
16731673; GFX11-NEXT: s_waitcnt vmcnt(0)
1674- ; GFX11-NEXT: v_mul_lo_u32 v0, v0, v1
1674+ ; GFX11-NEXT: v_and_b32_e32 v0, v0, v1
16751675; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
16761676; GFX11-NEXT: v_and_b32_e32 v0, 1, v0
16771677; GFX11-NEXT: buffer_store_b8 v0, off, s[4:7], 0
@@ -1693,7 +1693,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
16931693; GFX12-NEXT: s_mov_b32 s4, s0
16941694; GFX12-NEXT: s_mov_b32 s5, s1
16951695; GFX12-NEXT: s_wait_loadcnt 0x0
1696- ; GFX12-NEXT: v_mul_lo_u32 v0, v0, v1
1696+ ; GFX12-NEXT: v_and_b32_e32 v0, v0, v1
16971697; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
16981698; GFX12-NEXT: v_and_b32_e32 v0, 1, v0
16991699; GFX12-NEXT: buffer_store_b8 v0, off, s[4:7], null
@@ -1714,7 +1714,7 @@ define amdgpu_kernel void @v_mul_i1(ptr addrspace(1) %out, ptr addrspace(1) %in)
17141714; EG-NEXT: MOV * T0.X, KC0[2].Z,
17151715; EG-NEXT: ALU clause starting at 11:
17161716; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x,
1717- ; EG-NEXT: MULLO_INT * T0.X , T0.X, T1.X,
1717+ ; EG-NEXT: AND_INT * T1.W , T0.X, T1.X,
17181718; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00)
17191719; EG-NEXT: AND_INT T1.W, PS, 1,
17201720; EG-NEXT: LSHL * T0.W, PV.W, literal.x,
0 commit comments