Skip to content

Commit 7dcea28

Browse files
authored
[AMDGPU] Add identity_combines to RegBankCombiner (#131305)
1 parent 1f1f820 commit 7dcea28

21 files changed

+640
-688
lines changed

llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2567,7 +2567,8 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI,
25672567
SrcReg = OriginalSrcReg;
25682568
LLT DstTy = MRI.getType(DstReg);
25692569
return mi_match(SrcReg, MRI,
2570-
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2570+
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2571+
canReplaceReg(DstReg, Reg, MRI);
25712572
}
25722573

25732574
bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
@@ -2577,7 +2578,8 @@ bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
25772578
Register SrcReg = MI.getOperand(1).getReg();
25782579
LLT DstTy = MRI.getType(DstReg);
25792580
if (mi_match(SrcReg, MRI,
2580-
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2581+
m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) &&
2582+
canReplaceReg(DstReg, Reg, MRI)) {
25812583
unsigned DstSize = DstTy.getScalarSizeInBits();
25822584
unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
25832585
return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;

llvm/lib/Target/AMDGPU/AMDGPUCombine.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,5 +180,5 @@ def AMDGPURegBankCombiner : GICombiner<
180180
[unmerge_merge, unmerge_cst, unmerge_undef,
181181
zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
182182
fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
183-
redundant_and]> {
183+
identity_combines, redundant_and]> {
184184
}

llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -640,7 +640,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
640640
; GFX7-NEXT: s_cmp_lt_i32 s1, 0
641641
; GFX7-NEXT: s_cselect_b32 s1, 1, 0
642642
; GFX7-NEXT: s_xor_b32 s0, s1, s0
643-
; GFX7-NEXT: s_and_b32 s0, s0, 1
644643
; GFX7-NEXT: s_add_i32 s0, s2, s0
645644
; GFX7-NEXT: ; return to shader part epilog
646645
;
@@ -652,7 +651,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
652651
; GFX8-NEXT: s_cmp_lt_i32 s1, 0
653652
; GFX8-NEXT: s_cselect_b32 s1, 1, 0
654653
; GFX8-NEXT: s_xor_b32 s0, s1, s0
655-
; GFX8-NEXT: s_and_b32 s0, s0, 1
656654
; GFX8-NEXT: s_add_i32 s0, s2, s0
657655
; GFX8-NEXT: ; return to shader part epilog
658656
;
@@ -664,7 +662,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
664662
; GFX9-NEXT: s_cmp_lt_i32 s1, 0
665663
; GFX9-NEXT: s_cselect_b32 s1, 1, 0
666664
; GFX9-NEXT: s_xor_b32 s0, s1, s0
667-
; GFX9-NEXT: s_and_b32 s0, s0, 1
668665
; GFX9-NEXT: s_add_i32 s0, s2, s0
669666
; GFX9-NEXT: ; return to shader part epilog
670667
%saddo = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
@@ -749,8 +746,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
749746
; GFX7-NEXT: s_cselect_b32 s3, 1, 0
750747
; GFX7-NEXT: s_xor_b32 s0, s2, s0
751748
; GFX7-NEXT: s_xor_b32 s1, s3, s1
752-
; GFX7-NEXT: s_and_b32 s0, s0, 1
753-
; GFX7-NEXT: s_and_b32 s1, s1, 1
754749
; GFX7-NEXT: s_add_i32 s0, s4, s0
755750
; GFX7-NEXT: s_add_i32 s1, s5, s1
756751
; GFX7-NEXT: ; return to shader part epilog
@@ -769,8 +764,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
769764
; GFX8-NEXT: s_cselect_b32 s3, 1, 0
770765
; GFX8-NEXT: s_xor_b32 s0, s2, s0
771766
; GFX8-NEXT: s_xor_b32 s1, s3, s1
772-
; GFX8-NEXT: s_and_b32 s0, s0, 1
773-
; GFX8-NEXT: s_and_b32 s1, s1, 1
774767
; GFX8-NEXT: s_add_i32 s0, s4, s0
775768
; GFX8-NEXT: s_add_i32 s1, s5, s1
776769
; GFX8-NEXT: ; return to shader part epilog
@@ -789,8 +782,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
789782
; GFX9-NEXT: s_cselect_b32 s3, 1, 0
790783
; GFX9-NEXT: s_xor_b32 s0, s2, s0
791784
; GFX9-NEXT: s_xor_b32 s1, s3, s1
792-
; GFX9-NEXT: s_and_b32 s0, s0, 1
793-
; GFX9-NEXT: s_and_b32 s1, s1, 1
794785
; GFX9-NEXT: s_add_i32 s0, s4, s0
795786
; GFX9-NEXT: s_add_i32 s1, s5, s1
796787
; GFX9-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
106106
; GCN-NEXT: s_mov_b32 s2, 0
107107
; GCN-NEXT: .LBB4_2: ; %Flow
108108
; GCN-NEXT: s_xor_b32 s2, s2, 1
109-
; GCN-NEXT: s_and_b32 s2, s2, 1
110109
; GCN-NEXT: s_cmp_lg_u32 s2, 0
111110
; GCN-NEXT: s_cbranch_scc1 .LBB4_4
112111
; GCN-NEXT: ; %bb.3: ; %.zero

llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
3636
; GFX9-NEXT: s_mov_b32 s0, 0
3737
; GFX9-NEXT: .LBB0_2: ; %Flow
3838
; GFX9-NEXT: s_xor_b32 s0, s0, 1
39-
; GFX9-NEXT: s_and_b32 s0, s0, 1
4039
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
4140
; GFX9-NEXT: s_cbranch_scc1 .LBB0_4
4241
; GFX9-NEXT: ; %bb.3: ; %bb0
@@ -121,7 +120,6 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
121120
; GFX9-NEXT: s_waitcnt vmcnt(0)
122121
; GFX9-NEXT: .LBB1_2: ; %Flow
123122
; GFX9-NEXT: s_xor_b32 s0, s0, 1
124-
; GFX9-NEXT: s_and_b32 s0, s0, 1
125123
; GFX9-NEXT: s_cmp_lg_u32 s0, 0
126124
; GFX9-NEXT: s_cbranch_scc1 .LBB1_4
127125
; GFX9-NEXT: ; %bb.3: ; %bb0

llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -356,7 +356,6 @@ define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %
356356
; OLD_RBS-NEXT: s_cmp_ge_u32 s1, 20
357357
; OLD_RBS-NEXT: s_cselect_b32 s3, 1, 0
358358
; OLD_RBS-NEXT: s_and_b32 s2, s2, s3
359-
; OLD_RBS-NEXT: s_and_b32 s2, s2, 1
360359
; OLD_RBS-NEXT: s_cmp_lg_u32 s2, 0
361360
; OLD_RBS-NEXT: s_cselect_b32 s0, s0, s1
362361
; OLD_RBS-NEXT: v_mov_b32_e32 v2, s0

llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,6 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
10771077
; GFX8-NEXT: s_xor_b32 s0, s1, s0
10781078
; GFX8-NEXT: s_ashr_i32 s1, s3, 23
10791079
; GFX8-NEXT: s_add_i32 s1, s1, 0xff800000
1080-
; GFX8-NEXT: s_and_b32 s0, s0, 1
10811080
; GFX8-NEXT: s_cmp_lg_u32 s0, 0
10821081
; GFX8-NEXT: s_cselect_b32 s0, s1, s2
10831082
; GFX8-NEXT: ; return to shader part epilog

llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll

Lines changed: 60 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -171,17 +171,17 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
171171
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
172172
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
173173
; CHECK-NEXT: v_mul_hi_u32 v0, v4, v0
174-
; CHECK-NEXT: v_mul_lo_u32 v1, v0, v2
175-
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
176-
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v4, v1
177-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
178-
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
179-
; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v1, v2
180-
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc
181-
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
182-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
183-
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
184174
; CHECK-NEXT: v_mov_b32_e32 v1, 0
175+
; CHECK-NEXT: v_mul_lo_u32 v3, v0, v2
176+
; CHECK-NEXT: v_add_i32_e32 v5, vcc, 1, v0
177+
; CHECK-NEXT: v_sub_i32_e32 v3, vcc, v4, v3
178+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
179+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc
180+
; CHECK-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v2
181+
; CHECK-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
182+
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0
183+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v3, v2
184+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
185185
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
186186
; CHECK-NEXT: s_setpc_b64 s[30:31]
187187
%result = sdiv i64 %num, %den
@@ -335,7 +335,6 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
335335
; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
336336
; CHECK-NEXT: .LBB1_3: ; %Flow
337337
; CHECK-NEXT: s_xor_b32 s0, s0, 1
338-
; CHECK-NEXT: s_and_b32 s0, s0, 1
339338
; CHECK-NEXT: s_cmp_lg_u32 s0, 0
340339
; CHECK-NEXT: s_cbranch_scc1 .LBB1_5
341340
; CHECK-NEXT: ; %bb.4:
@@ -809,17 +808,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
809808
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
810809
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
811810
; CGP-NEXT: v_mul_hi_u32 v0, v10, v0
812-
; CGP-NEXT: v_mul_lo_u32 v1, v0, v4
813-
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
814-
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v10, v1
815-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
816-
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
817-
; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v4
818-
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
819-
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
820-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v4
821-
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
822811
; CGP-NEXT: v_mov_b32_e32 v1, 0
812+
; CGP-NEXT: v_mul_lo_u32 v2, v0, v4
813+
; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
814+
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v10, v2
815+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
816+
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
817+
; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v4
818+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
819+
; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
820+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v4
821+
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
823822
; CGP-NEXT: .LBB2_4:
824823
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
825824
; CGP-NEXT: v_or_b32_e32 v3, v9, v7
@@ -981,17 +980,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
981980
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
982981
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
983982
; CGP-NEXT: v_mul_hi_u32 v2, v8, v2
984-
; CGP-NEXT: v_mul_lo_u32 v3, v2, v6
985-
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
986-
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v8, v3
987-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
988-
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
989-
; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v6
990-
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
991-
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
992-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v6
993-
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
994983
; CGP-NEXT: v_mov_b32_e32 v3, 0
984+
; CGP-NEXT: v_mul_lo_u32 v4, v2, v6
985+
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
986+
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v8, v4
987+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
988+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
989+
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v4, v6
990+
; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
991+
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
992+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v6
993+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
995994
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
996995
; CGP-NEXT: s_setpc_b64 s[30:31]
997996
%result = sdiv <2 x i64> %num, %den
@@ -1817,17 +1816,17 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
18171816
; CHECK-NEXT: v_mul_hi_u32 v1, v0, v1
18181817
; CHECK-NEXT: v_add_i32_e32 v0, vcc, v0, v1
18191818
; CHECK-NEXT: v_mul_hi_u32 v0, v3, v0
1820-
; CHECK-NEXT: v_mul_lo_u32 v1, v0, v5
1821-
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1822-
; CHECK-NEXT: v_sub_i32_e32 v1, vcc, v3, v1
1823-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1824-
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
1825-
; CHECK-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v5
1826-
; CHECK-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
1827-
; CHECK-NEXT: v_add_i32_e32 v2, vcc, 1, v0
1828-
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v1, v5
1829-
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
18301819
; CHECK-NEXT: v_mov_b32_e32 v1, 0
1820+
; CHECK-NEXT: v_mul_lo_u32 v2, v0, v5
1821+
; CHECK-NEXT: v_add_i32_e32 v4, vcc, 1, v0
1822+
; CHECK-NEXT: v_sub_i32_e32 v2, vcc, v3, v2
1823+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1824+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc
1825+
; CHECK-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v5
1826+
; CHECK-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
1827+
; CHECK-NEXT: v_add_i32_e32 v3, vcc, 1, v0
1828+
; CHECK-NEXT: v_cmp_ge_u32_e32 vcc, v2, v5
1829+
; CHECK-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
18311830
; CHECK-NEXT: s_or_b64 exec, exec, s[6:7]
18321831
; CHECK-NEXT: s_setpc_b64 s[30:31]
18331832
%shl.y = shl i64 4096, %y
@@ -2279,17 +2278,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
22792278
; CGP-NEXT: v_mul_hi_u32 v1, v0, v1
22802279
; CGP-NEXT: v_add_i32_e32 v0, vcc, v0, v1
22812280
; CGP-NEXT: v_mul_hi_u32 v0, v8, v0
2282-
; CGP-NEXT: v_mul_lo_u32 v1, v0, v11
2283-
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
2284-
; CGP-NEXT: v_sub_i32_e32 v1, vcc, v8, v1
2285-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11
2286-
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
2287-
; CGP-NEXT: v_sub_i32_e64 v2, s[4:5], v1, v11
2288-
; CGP-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
2289-
; CGP-NEXT: v_add_i32_e32 v2, vcc, 1, v0
2290-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v1, v11
2291-
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
22922281
; CGP-NEXT: v_mov_b32_e32 v1, 0
2282+
; CGP-NEXT: v_mul_lo_u32 v2, v0, v11
2283+
; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
2284+
; CGP-NEXT: v_sub_i32_e32 v2, vcc, v8, v2
2285+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11
2286+
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
2287+
; CGP-NEXT: v_sub_i32_e64 v3, s[4:5], v2, v11
2288+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc
2289+
; CGP-NEXT: v_add_i32_e32 v3, vcc, 1, v0
2290+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v2, v11
2291+
; CGP-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc
22932292
; CGP-NEXT: .LBB8_4:
22942293
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
22952294
; CGP-NEXT: v_or_b32_e32 v3, v7, v10
@@ -2453,17 +2452,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
24532452
; CGP-NEXT: v_mul_hi_u32 v3, v2, v3
24542453
; CGP-NEXT: v_add_i32_e32 v2, vcc, v2, v3
24552454
; CGP-NEXT: v_mul_hi_u32 v2, v5, v2
2456-
; CGP-NEXT: v_mul_lo_u32 v3, v2, v9
2457-
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
2458-
; CGP-NEXT: v_sub_i32_e32 v3, vcc, v5, v3
2459-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
2460-
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
2461-
; CGP-NEXT: v_sub_i32_e64 v4, s[4:5], v3, v9
2462-
; CGP-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc
2463-
; CGP-NEXT: v_add_i32_e32 v4, vcc, 1, v2
2464-
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v3, v9
2465-
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
24662455
; CGP-NEXT: v_mov_b32_e32 v3, 0
2456+
; CGP-NEXT: v_mul_lo_u32 v4, v2, v9
2457+
; CGP-NEXT: v_add_i32_e32 v6, vcc, 1, v2
2458+
; CGP-NEXT: v_sub_i32_e32 v4, vcc, v5, v4
2459+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9
2460+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc
2461+
; CGP-NEXT: v_sub_i32_e64 v5, s[4:5], v4, v9
2462+
; CGP-NEXT: v_cndmask_b32_e32 v4, v4, v5, vcc
2463+
; CGP-NEXT: v_add_i32_e32 v5, vcc, 1, v2
2464+
; CGP-NEXT: v_cmp_ge_u32_e32 vcc, v4, v9
2465+
; CGP-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc
24672466
; CGP-NEXT: s_or_b64 exec, exec, s[6:7]
24682467
; CGP-NEXT: s_setpc_b64 s[30:31]
24692468
%shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y

llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,7 +1444,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14441444
; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14451445
; GFX6-NEXT: v_or_b32_e32 v2, v2, v3
14461446
; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1
1447-
; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 0
14481447
; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2
14491448
; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 10
14501449
; GFX6-NEXT: v_lshlrev_b32_e32 v4, 10, v2
@@ -1459,7 +1458,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14591458
; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14601459
; GFX8-NEXT: v_or_b32_e32 v2, v2, v3
14611460
; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1
1462-
; GFX8-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
14631461
; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2
14641462
; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 10
14651463
; GFX8-NEXT: v_lshlrev_b32_e32 v4, 10, v2
@@ -1473,7 +1471,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14731471
; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
14741472
; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1
14751473
; GFX9-NEXT: v_or_b32_e32 v2, v2, v3
1476-
; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
14771474
; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1
14781475
; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2
14791476
; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10
@@ -1486,9 +1483,8 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
14861483
; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
14871484
; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3]
14881485
; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1
1489-
; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1]
1490-
; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3
14911486
; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10
1487+
; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3
14921488
; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1
14931489
; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2
14941490
; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1

0 commit comments

Comments
 (0)