Skip to content

Commit e30301a

Browse files
authored
[MachineCopyPropagation] Make use of lane mask info in basic block liveins (#140248)
1 parent be6c168 commit e30301a

File tree

2 files changed

+6
-21
lines changed

2 files changed

+6
-21
lines changed

llvm/lib/CodeGen/MachineCopyPropagation.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,9 +553,12 @@ void MachineCopyPropagation::readSuccessorLiveIns(
553553
// If a copy result is livein to a successor, it is not dead.
554554
for (const MachineBasicBlock *Succ : MBB.successors()) {
555555
for (const auto &LI : Succ->liveins()) {
556-
for (MCRegUnit Unit : TRI->regunits(LI.PhysReg)) {
557-
if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
558-
MaybeDeadCopies.remove(Copy);
556+
for (MCRegUnitMaskIterator U(LI.PhysReg, TRI); U.isValid(); ++U) {
557+
auto [Unit, Mask] = *U;
558+
if ((Mask & LI.LaneMask).any()) {
559+
if (MachineInstr *Copy = Tracker.findCopyForUnit(Unit, *TRI))
560+
MaybeDeadCopies.remove(Copy);
561+
}
559562
}
560563
}
561564
}

llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -875,7 +875,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
875875
; GFX1064_DPP-NEXT: s_cbranch_execz .LBB2_2
876876
; GFX1064_DPP-NEXT: ; %bb.1:
877877
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, s6
878-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
879878
; GFX1064_DPP-NEXT: ds_add_rtn_u32 v0, v4, v0
880879
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
881880
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -980,7 +979,6 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) {
980979
; GFX1164_DPP-NEXT: s_cbranch_execz .LBB2_2
981980
; GFX1164_DPP-NEXT: ; %bb.1:
982981
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, s6
983-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
984982
; GFX1164_DPP-NEXT: ds_add_rtn_u32 v0, v4, v0
985983
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
986984
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -4282,7 +4280,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
42824280
; GFX1064_DPP-NEXT: s_cbranch_execz .LBB10_2
42834281
; GFX1064_DPP-NEXT: ; %bb.1:
42844282
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, s6
4285-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
42864283
; GFX1064_DPP-NEXT: ds_sub_rtn_u32 v0, v4, v0
42874284
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
42884285
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -4387,7 +4384,6 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) {
43874384
; GFX1164_DPP-NEXT: s_cbranch_execz .LBB10_2
43884385
; GFX1164_DPP-NEXT: ; %bb.1:
43894386
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, s6
4390-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
43914387
; GFX1164_DPP-NEXT: ds_sub_rtn_u32 v0, v4, v0
43924388
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
43934389
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -6691,7 +6687,6 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
66916687
; GFX1064_DPP-NEXT: ; %bb.1:
66926688
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, 0
66936689
; GFX1064_DPP-NEXT: v_mov_b32_e32 v4, s6
6694-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
66956690
; GFX1064_DPP-NEXT: ds_and_rtn_b32 v0, v0, v4
66966691
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
66976692
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -6796,7 +6791,6 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) {
67966791
; GFX1164_DPP-NEXT: ; %bb.1:
67976792
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, 0
67986793
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s6
6799-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
68006794
; GFX1164_DPP-NEXT: ds_and_rtn_b32 v0, v0, v4
68016795
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
68026796
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -8052,7 +8046,6 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
80528046
; GFX1064_DPP-NEXT: s_cbranch_execz .LBB17_2
80538047
; GFX1064_DPP-NEXT: ; %bb.1:
80548048
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, s6
8055-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
80568049
; GFX1064_DPP-NEXT: ds_or_rtn_b32 v0, v4, v0
80578050
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
80588051
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -8157,7 +8150,6 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) {
81578150
; GFX1164_DPP-NEXT: s_cbranch_execz .LBB17_2
81588151
; GFX1164_DPP-NEXT: ; %bb.1:
81598152
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, s6
8160-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
81618153
; GFX1164_DPP-NEXT: ds_or_rtn_b32 v0, v4, v0
81628154
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
81638155
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -9412,7 +9404,6 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
94129404
; GFX1064_DPP-NEXT: s_cbranch_execz .LBB19_2
94139405
; GFX1064_DPP-NEXT: ; %bb.1:
94149406
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, s6
9415-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
94169407
; GFX1064_DPP-NEXT: ds_xor_rtn_b32 v0, v4, v0
94179408
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
94189409
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -9517,7 +9508,6 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) {
95179508
; GFX1164_DPP-NEXT: s_cbranch_execz .LBB19_2
95189509
; GFX1164_DPP-NEXT: ; %bb.1:
95199510
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, s6
9520-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
95219511
; GFX1164_DPP-NEXT: ds_xor_rtn_b32 v0, v4, v0
95229512
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
95239513
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -10772,7 +10762,6 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
1077210762
; GFX1064_DPP-NEXT: ; %bb.1:
1077310763
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, 0
1077410764
; GFX1064_DPP-NEXT: v_mov_b32_e32 v4, s6
10775-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
1077610765
; GFX1064_DPP-NEXT: ds_max_rtn_i32 v0, v0, v4
1077710766
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
1077810767
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -10877,7 +10866,6 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) {
1087710866
; GFX1164_DPP-NEXT: ; %bb.1:
1087810867
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, 0
1087910868
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s6
10880-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
1088110869
; GFX1164_DPP-NEXT: ds_max_rtn_i32 v0, v0, v4
1088210870
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
1088310871
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -12600,7 +12588,6 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
1260012588
; GFX1064_DPP-NEXT: ; %bb.1:
1260112589
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, 0
1260212590
; GFX1064_DPP-NEXT: v_mov_b32_e32 v4, s6
12603-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
1260412591
; GFX1064_DPP-NEXT: ds_min_rtn_i32 v0, v0, v4
1260512592
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
1260612593
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -12705,7 +12692,6 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) {
1270512692
; GFX1164_DPP-NEXT: ; %bb.1:
1270612693
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, 0
1270712694
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s6
12708-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
1270912695
; GFX1164_DPP-NEXT: ds_min_rtn_i32 v0, v0, v4
1271012696
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
1271112697
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -14428,7 +14414,6 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
1442814414
; GFX1064_DPP-NEXT: s_cbranch_execz .LBB27_2
1442914415
; GFX1064_DPP-NEXT: ; %bb.1:
1443014416
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, s6
14431-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
1443214417
; GFX1064_DPP-NEXT: ds_max_rtn_u32 v0, v4, v0
1443314418
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
1443414419
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -14533,7 +14518,6 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) {
1453314518
; GFX1164_DPP-NEXT: s_cbranch_execz .LBB27_2
1453414519
; GFX1164_DPP-NEXT: ; %bb.1:
1453514520
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, s6
14536-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
1453714521
; GFX1164_DPP-NEXT: ds_max_rtn_u32 v0, v4, v0
1453814522
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
1453914523
; GFX1164_DPP-NEXT: buffer_gl0_inv
@@ -16243,7 +16227,6 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
1624316227
; GFX1064_DPP-NEXT: ; %bb.1:
1624416228
; GFX1064_DPP-NEXT: v_mov_b32_e32 v0, 0
1624516229
; GFX1064_DPP-NEXT: v_mov_b32_e32 v4, s6
16246-
; GFX1064_DPP-NEXT: s_mov_b32 s3, s6
1624716230
; GFX1064_DPP-NEXT: ds_min_rtn_u32 v0, v0, v4
1624816231
; GFX1064_DPP-NEXT: s_waitcnt lgkmcnt(0)
1624916232
; GFX1064_DPP-NEXT: buffer_gl0_inv
@@ -16348,7 +16331,6 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) {
1634816331
; GFX1164_DPP-NEXT: ; %bb.1:
1634916332
; GFX1164_DPP-NEXT: v_mov_b32_e32 v0, 0
1635016333
; GFX1164_DPP-NEXT: v_mov_b32_e32 v4, s6
16351-
; GFX1164_DPP-NEXT: s_mov_b32 s3, s6
1635216334
; GFX1164_DPP-NEXT: ds_min_rtn_u32 v0, v0, v4
1635316335
; GFX1164_DPP-NEXT: s_waitcnt lgkmcnt(0)
1635416336
; GFX1164_DPP-NEXT: buffer_gl0_inv

0 commit comments

Comments
 (0)