Skip to content

[AMDGPU] Add identity_combines to RegBankCombiner #131305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 17, 2025

Conversation

Pierre-vh
Copy link
Contributor

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Mar 14, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Pierre van Houtryve (Pierre-vh)

Changes

Patch is 128.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131305.diff

21 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+2-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll (-9)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll (-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+60-61)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll (+1-5)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+30-31)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll (-9)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+90-91)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+54-55)
  • (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+5-5)
  • (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+183-199)
  • (modified) llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll (+1-3)
  • (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+172-184)
  • (modified) llvm/test/CodeGen/AMDGPU/fptrunc.ll (-4)
  • (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+30-14)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0dfbb91f2ac54..bdb0d01e74f00 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2567,7 +2567,7 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI,
     SrcReg = OriginalSrcReg;
   LLT DstTy = MRI.getType(DstReg);
   return mi_match(SrcReg, MRI,
-                  m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+                  m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) && canReplaceReg(DstReg, Reg, MRI);
 }
 
 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
@@ -2577,7 +2577,7 @@ bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
   Register SrcReg = MI.getOperand(1).getReg();
   LLT DstTy = MRI.getType(DstReg);
   if (mi_match(SrcReg, MRI,
-               m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
+               m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) && canReplaceReg(DstReg, Reg, MRI)) {
     unsigned DstSize = DstTy.getScalarSizeInBits();
     unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
     return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index da47aaf8a3b5c..36653867fbba0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -180,5 +180,5 @@ def AMDGPURegBankCombiner : GICombiner<
   [unmerge_merge, unmerge_cst, unmerge_undef,
    zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
-   redundant_and]> {
+   identity_combines, redundant_and]> {
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index ff5880819020d..38374d1689366 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -640,7 +640,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
 ; GFX7-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX7-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s1, s0
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
 ; GFX7-NEXT:    s_add_i32 s0, s2, s0
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
@@ -652,7 +651,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
 ; GFX8-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s1, s0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_add_i32 s0, s2, s0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -664,7 +662,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
 ; GFX9-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX9-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s1, s0
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_add_i32 s0, s2, s0
 ; GFX9-NEXT:    ; return to shader part epilog
   %saddo = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
@@ -749,8 +746,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
 ; GFX7-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s2, s0
 ; GFX7-NEXT:    s_xor_b32 s1, s3, s1
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
-; GFX7-NEXT:    s_and_b32 s1, s1, 1
 ; GFX7-NEXT:    s_add_i32 s0, s4, s0
 ; GFX7-NEXT:    s_add_i32 s1, s5, s1
 ; GFX7-NEXT:    ; return to shader part epilog
@@ -769,8 +764,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
 ; GFX8-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s2, s0
 ; GFX8-NEXT:    s_xor_b32 s1, s3, s1
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_and_b32 s1, s1, 1
 ; GFX8-NEXT:    s_add_i32 s0, s4, s0
 ; GFX8-NEXT:    s_add_i32 s1, s5, s1
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -789,8 +782,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
 ; GFX9-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s2, s0
 ; GFX9-NEXT:    s_xor_b32 s1, s3, s1
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
-; GFX9-NEXT:    s_and_b32 s1, s1, 1
 ; GFX9-NEXT:    s_add_i32 s0, s4, s0
 ; GFX9-NEXT:    s_add_i32 s1, s5, s1
 ; GFX9-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index ee89b28a0d2bb..2c44d719d0b45 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -106,7 +106,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
 ; GCN-NEXT:    s_mov_b32 s2, 0
 ; GCN-NEXT:  .LBB4_2: ; %Flow
 ; GCN-NEXT:    s_xor_b32 s2, s2, 1
-; GCN-NEXT:    s_and_b32 s2, s2, 1
 ; GCN-NEXT:    s_cmp_lg_u32 s2, 0
 ; GCN-NEXT:    s_cbranch_scc1 .LBB4_4
 ; GCN-NEXT:  ; %bb.3: ; %.zero
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index a354c072aa150..c295a662704e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -36,7 +36,6 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:  .LBB0_2: ; %Flow
 ; GFX9-NEXT:    s_xor_b32 s0, s0, 1
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cbranch_scc1 .LBB0_4
 ; GFX9-NEXT:  ; %bb.3: ; %bb0
@@ -121,7 +120,6 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:  .LBB1_2: ; %Flow
 ; GFX9-NEXT:    s_xor_b32 s0, s0, 1
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cbranch_scc1 .LBB1_4
 ; GFX9-NEXT:  ; %bb.3: ; %bb0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index 755eb13a61e14..5240bf4f3a1d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -356,7 +356,6 @@ define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %
 ; OLD_RBS-NEXT:    s_cmp_ge_u32 s1, 20
 ; OLD_RBS-NEXT:    s_cselect_b32 s3, 1, 0
 ; OLD_RBS-NEXT:    s_and_b32 s2, s2, s3
-; OLD_RBS-NEXT:    s_and_b32 s2, s2, 1
 ; OLD_RBS-NEXT:    s_cmp_lg_u32 s2, 0
 ; OLD_RBS-NEXT:    s_cselect_b32 s0, s0, s1
 ; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 4bfd29430ff1e..694a81a9668f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -1077,7 +1077,6 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
 ; GFX8-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s3, 23
 ; GFX8-NEXT:    s_add_i32 s1, s1, 0xff800000
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s1, s2
 ; GFX8-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 2a200259a93d2..4031fe0be2823 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -171,17 +171,17 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT:    v_mul_lo_u32 v1, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv i64 %num, %den
@@ -335,7 +335,6 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:  .LBB1_3: ; %Flow
 ; CHECK-NEXT:    s_xor_b32 s0, s0, 1
-; CHECK-NEXT:    s_and_b32 s0, s0, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB1_5
 ; CHECK-NEXT:  ; %bb.4:
@@ -809,17 +808,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
-; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_mul_lo_u32 v2, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:  .LBB2_4:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
@@ -981,17 +980,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, v6
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v8, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, v6
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v4, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i64> %num, %den
@@ -1817,17 +1816,17 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v1, v0, v5
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_mul_lo_u32 v2, v0, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl i64 4096, %y
@@ -2279,17 +2278,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
-; CGP-NEXT:    v_mul_lo_u32 v1, v0, v11
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_mul_lo_u32 v2, v0, v11
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:  .LBB8_4:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    v_or_b32_e32 v3, v7, v10
@@ -2453,17 +2452,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v2, v5, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, v9
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v5, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v9
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, v9
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v4, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
index bac80f0777c02..8300e2542d452 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
@@ -1444,7 +1444,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
 ; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
-; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 10
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 10, v2
@@ -1459,7 +1458,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
 ; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
-; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 0, v[0:1]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_bfe_u32 v1, v1, 0, 10
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 10, v2
@@ -1473,7 +1471,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 22, v[2:3]
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
 ; GFX9-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_bfe_u32 v1, v1, 0, 10
@@ -1486,9 +1483,8 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_lshlrev_b64 v[2:3], 22, v[2:3]
 ; GFX10PLUS-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
-; GFX10PLUS-NEXT:    v_lshrrev_b64 v[0:1], 0, v[0:1]
-; GFX10PLUS-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX10PLUS-NEXT:    v_bfe_u32 v1, v1, 0, 10
+; GFX10PLUS-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX10PLUS-NEXT:    v_bfe_i32 v2, v2, 0, 1
 ; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX10PLUS-NEXT:    v_lshl_or_b32 v1, v2, 10, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 2bb42308d935c..1a10f5fb7a5ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -167,15 +167,15 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    v_mul_lo_u32 v0, v0, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = srem i64 %num, %den
@@ -327,7 +327,6 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:  .LBB1_3: ; %Flow
 ; CHECK-NEXT:    s_xor_b32 s0, s7, 1
-; CHECK-NEXT:    s_and_b32 s0, s0, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB1_5
 ; CHECK-NEXT:  ; %bb.4:
@@ -791,15 +790,15 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
 ; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:  .LBB2_4:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
@@ -959,15 +958,15 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
+; CGP-NEXT:    v_mov_b32_e32 v3...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Mar 14, 2025

@llvm/pr-subscribers-llvm-globalisel

Author: Pierre van Houtryve (Pierre-vh)

Changes

Patch is 128.04 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/131305.diff

21 Files Affected:

  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+2-2)
  • (modified) llvm/lib/Target/AMDGPU/AMDGPUCombine.td (+1-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll (-9)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll (-2)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+60-61)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll (+1-5)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+30-31)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll (-1)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/subo.ll (-9)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+90-91)
  • (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+54-55)
  • (modified) llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll (+9-9)
  • (modified) llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll (+5-5)
  • (modified) llvm/test/CodeGen/AMDGPU/div_i128.ll (+183-199)
  • (modified) llvm/test/CodeGen/AMDGPU/dynamic_stackalloc.ll (+1-3)
  • (modified) llvm/test/CodeGen/AMDGPU/fptoi.i128.ll (+172-184)
  • (modified) llvm/test/CodeGen/AMDGPU/fptrunc.ll (-4)
  • (modified) llvm/test/CodeGen/AMDGPU/global-saddr-load.ll (+30-14)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0dfbb91f2ac54..bdb0d01e74f00 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2567,7 +2567,7 @@ bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI,
     SrcReg = OriginalSrcReg;
   LLT DstTy = MRI.getType(DstReg);
   return mi_match(SrcReg, MRI,
-                  m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
+                  m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) && canReplaceReg(DstReg, Reg, MRI);
 }
 
 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
@@ -2577,7 +2577,7 @@ bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI,
   Register SrcReg = MI.getOperand(1).getReg();
   LLT DstTy = MRI.getType(DstReg);
   if (mi_match(SrcReg, MRI,
-               m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
+               m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy)))) && canReplaceReg(DstReg, Reg, MRI)) {
     unsigned DstSize = DstTy.getScalarSizeInBits();
     unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
     return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index da47aaf8a3b5c..36653867fbba0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -180,5 +180,5 @@ def AMDGPURegBankCombiner : GICombiner<
   [unmerge_merge, unmerge_cst, unmerge_undef,
    zext_trunc_fold, int_minmax_to_med3, ptr_add_immed_chain,
    fp_minmax_to_clamp, fp_minmax_to_med3, fmed3_intrinsic_to_clamp,
-   redundant_and]> {
+   identity_combines, redundant_and]> {
 }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
index ff5880819020d..38374d1689366 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/addo.ll
@@ -640,7 +640,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
 ; GFX7-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX7-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s1, s0
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
 ; GFX7-NEXT:    s_add_i32 s0, s2, s0
 ; GFX7-NEXT:    ; return to shader part epilog
 ;
@@ -652,7 +651,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
 ; GFX8-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s1, s0
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_add_i32 s0, s2, s0
 ; GFX8-NEXT:    ; return to shader part epilog
 ;
@@ -664,7 +662,6 @@ define amdgpu_ps i32 @s_saddo_i32(i32 inreg %a, i32 inreg %b) {
 ; GFX9-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX9-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s1, s0
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_add_i32 s0, s2, s0
 ; GFX9-NEXT:    ; return to shader part epilog
   %saddo = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b)
@@ -749,8 +746,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
 ; GFX7-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX7-NEXT:    s_xor_b32 s0, s2, s0
 ; GFX7-NEXT:    s_xor_b32 s1, s3, s1
-; GFX7-NEXT:    s_and_b32 s0, s0, 1
-; GFX7-NEXT:    s_and_b32 s1, s1, 1
 ; GFX7-NEXT:    s_add_i32 s0, s4, s0
 ; GFX7-NEXT:    s_add_i32 s1, s5, s1
 ; GFX7-NEXT:    ; return to shader part epilog
@@ -769,8 +764,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
 ; GFX8-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s2, s0
 ; GFX8-NEXT:    s_xor_b32 s1, s3, s1
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
-; GFX8-NEXT:    s_and_b32 s1, s1, 1
 ; GFX8-NEXT:    s_add_i32 s0, s4, s0
 ; GFX8-NEXT:    s_add_i32 s1, s5, s1
 ; GFX8-NEXT:    ; return to shader part epilog
@@ -789,8 +782,6 @@ define amdgpu_ps <2 x i32> @s_saddo_v2i32(<2 x i32> inreg %a, <2 x i32> inreg %b
 ; GFX9-NEXT:    s_cselect_b32 s3, 1, 0
 ; GFX9-NEXT:    s_xor_b32 s0, s2, s0
 ; GFX9-NEXT:    s_xor_b32 s1, s3, s1
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
-; GFX9-NEXT:    s_and_b32 s1, s1, 1
 ; GFX9-NEXT:    s_add_i32 s0, s4, s0
 ; GFX9-NEXT:    s_add_i32 s1, s5, s1
 ; GFX9-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
index ee89b28a0d2bb..2c44d719d0b45 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
@@ -106,7 +106,6 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
 ; GCN-NEXT:    s_mov_b32 s2, 0
 ; GCN-NEXT:  .LBB4_2: ; %Flow
 ; GCN-NEXT:    s_xor_b32 s2, s2, 1
-; GCN-NEXT:    s_and_b32 s2, s2, 1
 ; GCN-NEXT:    s_cmp_lg_u32 s2, 0
 ; GCN-NEXT:    s_cbranch_scc1 .LBB4_4
 ; GCN-NEXT:  ; %bb.3: ; %.zero
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index a354c072aa150..c295a662704e9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -36,7 +36,6 @@ define amdgpu_kernel void @localize_constants(i1 %cond) {
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:  .LBB0_2: ; %Flow
 ; GFX9-NEXT:    s_xor_b32 s0, s0, 1
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cbranch_scc1 .LBB0_4
 ; GFX9-NEXT:  ; %bb.3: ; %bb0
@@ -121,7 +120,6 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:  .LBB1_2: ; %Flow
 ; GFX9-NEXT:    s_xor_b32 s0, s0, 1
-; GFX9-NEXT:    s_and_b32 s0, s0, 1
 ; GFX9-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX9-NEXT:    s_cbranch_scc1 .LBB1_4
 ; GFX9-NEXT:  ; %bb.3: ; %bb0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
index 755eb13a61e14..5240bf4f3a1d7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-mui.ll
@@ -356,7 +356,6 @@ define amdgpu_ps void @and_i1_scc(i32 inreg %a, i32 inreg %b, ptr addrspace(1) %
 ; OLD_RBS-NEXT:    s_cmp_ge_u32 s1, 20
 ; OLD_RBS-NEXT:    s_cselect_b32 s3, 1, 0
 ; OLD_RBS-NEXT:    s_and_b32 s2, s2, s3
-; OLD_RBS-NEXT:    s_and_b32 s2, s2, 1
 ; OLD_RBS-NEXT:    s_cmp_lg_u32 s2, 0
 ; OLD_RBS-NEXT:    s_cselect_b32 s0, s0, s1
 ; OLD_RBS-NEXT:    v_mov_b32_e32 v2, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 4bfd29430ff1e..694a81a9668f3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -1077,7 +1077,6 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
 ; GFX8-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s3, 23
 ; GFX8-NEXT:    s_add_i32 s1, s1, 0xff800000
-; GFX8-NEXT:    s_and_b32 s0, s0, 1
 ; GFX8-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX8-NEXT:    s_cselect_b32 s0, s1, s2
 ; GFX8-NEXT:    ; return to shader part epilog
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 2a200259a93d2..4031fe0be2823 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -171,17 +171,17 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
-; CHECK-NEXT:    v_mul_lo_u32 v1, v0, v2
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v4, v1
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v3, vcc
-; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_mul_lo_u32 v3, v0, v2
+; CHECK-NEXT:    v_add_i32_e32 v5, vcc, 1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v4, v3
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v5, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv i64 %num, %den
@@ -335,7 +335,6 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:  .LBB1_3: ; %Flow
 ; CHECK-NEXT:    s_xor_b32 s0, s0, 1
-; CHECK-NEXT:    s_and_b32 s0, s0, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB1_5
 ; CHECK-NEXT:  ; %bb.4:
@@ -809,17 +808,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
-; CGP-NEXT:    v_mul_lo_u32 v1, v0, v4
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v10, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_mul_lo_u32 v2, v0, v4
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v10, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:  .LBB2_4:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
@@ -981,17 +980,17 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, v6
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v8, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v6
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, v6
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v8, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
+; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v4, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v6
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %result = sdiv <2 x i64> %num, %den
@@ -1817,17 +1816,17 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v3, v0
-; CHECK-NEXT:    v_mul_lo_u32 v1, v0, v5
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v3, v1
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CHECK-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; CHECK-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v5
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_mul_lo_u32 v2, v0, v5
+; CHECK-NEXT:    v_add_i32_e32 v4, vcc, 1, v0
+; CHECK-NEXT:    v_sub_i32_e32 v2, vcc, v3, v2
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; CHECK-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; CHECK-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v5
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl i64 4096, %y
@@ -2279,17 +2278,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v0, v8, v0
-; CGP-NEXT:    v_mul_lo_u32 v1, v0, v11
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v8, v1
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; CGP-NEXT:    v_sub_i32_e64 v2, s[4:5], v1, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; CGP-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v11
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_mul_lo_u32 v2, v0, v11
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v8, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CGP-NEXT:    v_sub_i32_e64 v3, s[4:5], v2, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v3, vcc
+; CGP-NEXT:    v_add_i32_e32 v3, vcc, 1, v0
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v2, v11
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CGP-NEXT:  .LBB8_4:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    v_or_b32_e32 v3, v7, v10
@@ -2453,17 +2452,17 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v2, v5, v2
-; CGP-NEXT:    v_mul_lo_u32 v3, v2, v9
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_sub_i32_e32 v3, vcc, v5, v3
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
-; CGP-NEXT:    v_sub_i32_e64 v4, s[4:5], v3, v9
-; CGP-NEXT:    v_cndmask_b32_e32 v3, v3, v4, vcc
-; CGP-NEXT:    v_add_i32_e32 v4, vcc, 1, v2
-; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v3, v9
-; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
 ; CGP-NEXT:    v_mov_b32_e32 v3, 0
+; CGP-NEXT:    v_mul_lo_u32 v4, v2, v9
+; CGP-NEXT:    v_add_i32_e32 v6, vcc, 1, v2
+; CGP-NEXT:    v_sub_i32_e32 v4, vcc, v5, v4
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; CGP-NEXT:    v_sub_i32_e64 v5, s[4:5], v4, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v4, v4, v5, vcc
+; CGP-NEXT:    v_add_i32_e32 v5, vcc, 1, v2
+; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v4, v9
+; CGP-NEXT:    v_cndmask_b32_e32 v2, v2, v5, vcc
 ; CGP-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; CGP-NEXT:    s_setpc_b64 s[30:31]
   %shl.y = shl <2 x i64> <i64 4096, i64 4096>, %y
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
index bac80f0777c02..8300e2542d452 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sext_inreg.ll
@@ -1444,7 +1444,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
 ; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX6-NEXT:    v_bfe_i32 v2, v2, 0, 1
-; GFX6-NEXT:    v_lshr_b64 v[0:1], v[0:1], 0
 ; GFX6-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX6-NEXT:    v_bfe_u32 v1, v1, 0, 10
 ; GFX6-NEXT:    v_lshlrev_b32_e32 v4, 10, v2
@@ -1459,7 +1458,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX8-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
 ; GFX8-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 1
-; GFX8-NEXT:    v_lshrrev_b64 v[0:1], 0, v[0:1]
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX8-NEXT:    v_bfe_u32 v1, v1, 0, 10
 ; GFX8-NEXT:    v_lshlrev_b32_e32 v4, 10, v2
@@ -1473,7 +1471,6 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX9-NEXT:    v_lshlrev_b64 v[2:3], 22, v[2:3]
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
 ; GFX9-NEXT:    v_or_b32_e32 v2, v2, v3
-; GFX9-NEXT:    v_lshrrev_b64 v[0:1], 0, v[0:1]
 ; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX9-NEXT:    v_bfe_u32 v1, v1, 0, 10
@@ -1486,9 +1483,8 @@ define i65 @v_sext_inreg_i65_22(i65 %value) {
 ; GFX10PLUS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10PLUS-NEXT:    v_lshlrev_b64 v[2:3], 22, v[2:3]
 ; GFX10PLUS-NEXT:    v_lshrrev_b32_e32 v3, 10, v1
-; GFX10PLUS-NEXT:    v_lshrrev_b64 v[0:1], 0, v[0:1]
-; GFX10PLUS-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX10PLUS-NEXT:    v_bfe_u32 v1, v1, 0, 10
+; GFX10PLUS-NEXT:    v_or_b32_e32 v2, v2, v3
 ; GFX10PLUS-NEXT:    v_bfe_i32 v2, v2, 0, 1
 ; GFX10PLUS-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
 ; GFX10PLUS-NEXT:    v_lshl_or_b32 v1, v2, 10, v1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 2bb42308d935c..1a10f5fb7a5ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -167,15 +167,15 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CHECK-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CHECK-NEXT:    v_mul_hi_u32 v0, v4, v0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
 ; CHECK-NEXT:    v_mul_lo_u32 v0, v0, v2
 ; CHECK-NEXT:    v_sub_i32_e32 v0, vcc, v4, v0
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, v0, v2
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; CHECK-NEXT:    v_sub_i32_e32 v3, vcc, v0, v2
 ; CHECK-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v2
-; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; CHECK-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CHECK-NEXT:    s_setpc_b64 s[30:31]
   %result = srem i64 %num, %den
@@ -327,7 +327,6 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    ; implicit-def: $vgpr0_vgpr1
 ; CHECK-NEXT:  .LBB1_3: ; %Flow
 ; CHECK-NEXT:    s_xor_b32 s0, s7, 1
-; CHECK-NEXT:    s_and_b32 s0, s0, 1
 ; CHECK-NEXT:    s_cmp_lg_u32 s0, 0
 ; CHECK-NEXT:    s_cbranch_scc1 .LBB1_5
 ; CHECK-NEXT:  ; %bb.4:
@@ -791,15 +790,15 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v1, v0, v1
 ; CGP-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
 ; CGP-NEXT:    v_mul_hi_u32 v0, v10, v0
+; CGP-NEXT:    v_mov_b32_e32 v1, 0
 ; CGP-NEXT:    v_mul_lo_u32 v0, v0, v4
 ; CGP-NEXT:    v_sub_i32_e32 v0, vcc, v10, v0
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CGP-NEXT:    v_sub_i32_e32 v1, vcc, v0, v4
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; CGP-NEXT:    v_sub_i32_e32 v2, vcc, v0, v4
 ; CGP-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v4
-; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; CGP-NEXT:    v_mov_b32_e32 v1, 0
+; CGP-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; CGP-NEXT:  .LBB2_4:
 ; CGP-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; CGP-NEXT:    v_or_b32_e32 v3, v9, v7
@@ -959,15 +958,15 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_mul_hi_u32 v3, v2, v3
 ; CGP-NEXT:    v_add_i32_e32 v2, vcc, v2, v3
 ; CGP-NEXT:    v_mul_hi_u32 v2, v8, v2
+; CGP-NEXT:    v_mov_b32_e32 v3...
[truncated]

Copy link

github-actions bot commented Mar 14, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

Copy link
Contributor Author

Pierre-vh commented Mar 17, 2025

Merge activity

  • Mar 17, 4:51 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • Mar 17, 4:53 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 4:56 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 4:59 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 5:02 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 5:05 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 5:08 AM EDT: Graphite rebased this pull request as part of a merge.
  • Mar 17, 5:09 AM EDT: A user started a stack merge that includes this pull request via Graphite.
  • Mar 17, 5:11 AM EDT: A user merged this pull request with Graphite.

@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/identity-combine-rbcomb branch 5 times, most recently from 4d0072e to 251835c Compare March 17, 2025 09:04
@Pierre-vh Pierre-vh force-pushed the users/pierre-vh/identity-combine-rbcomb branch from 251835c to 3a471d0 Compare March 17, 2025 09:07
@Pierre-vh Pierre-vh merged commit 7dcea28 into main Mar 17, 2025
6 of 9 checks passed
@Pierre-vh Pierre-vh deleted the users/pierre-vh/identity-combine-rbcomb branch March 17, 2025 09:11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants