Skip to content

Commit 61b61f2

Browse files
committed
Hackily use shrinkToUses
1 parent 7d7a5e6 commit 61b61f2

File tree

2 files changed

+45
-40
lines changed

2 files changed

+45
-40
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4059,42 +4059,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40594059
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
40604060
MachineInstr *DefMI;
40614061
const auto killDef = [&](SlotIndex OldUseIdx) -> void {
4062-
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4062+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
40634063
// The only user is the instruction which will be killed.
40644064
Register DefReg = DefMI->getOperand(0).getReg();
40654065

4066+
if (MRI.hasOneNonDBGUse(DefReg)) {
4067+
// We cannot just remove the DefMI here, calling pass will crash.
4068+
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
4069+
DefMI->getOperand(0).setIsDead(true);
4070+
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
4071+
DefMI->removeOperand(I);
4072+
if (LV)
4073+
LV->getVarInfo(DefReg).AliveBlocks.clear();
4074+
}
4075+
40664076
if (LIS) {
40674077
LiveInterval &DefLI = LIS->getInterval(DefReg);
4068-
LiveRange::Segment *OldSeg = DefLI.getSegmentContaining(OldUseIdx);
4069-
assert(OldSeg && "segment not found for instruction in LiveInterval");
4070-
4071-
if (OldSeg->end == OldUseIdx.getRegSlot()) {
4072-
// We only want to leave the dead def.
4073-
DefLI.removeSegment(OldSeg->start.getDeadSlot(), OldUseIdx.getRegSlot(),
4074-
true);
4075-
4076-
for (auto &SR : DefLI.subranges()) {
4077-
LiveRange::Segment *OldSegSR = SR.getSegmentContaining(OldUseIdx);
4078-
if (OldSegSR->end == OldUseIdx.getRegSlot()) {
4079-
// We only want to leave the dead def.
4080-
SR.removeSegment(OldSegSR->start.getDeadSlot(),
4081-
OldUseIdx.getRegSlot(), true);
4082-
}
4083-
}
40844078

4085-
DefLI.removeEmptySubRanges();
4079+
// We cannot delete the original instruction here, so hack out the use
4080+
// in the original instruction with a dummy register so we can use
4081+
// shrinkToUses to deal with any multi-use edge cases. Other targets do
4082+
// not have the complexity of deleting a use to consider here.
4083+
Register DummyReg = MRI.cloneVirtualRegister(DefReg);
4084+
for (MachineOperand &MIOp : MI.uses()) {
4085+
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4086+
MIOp.setIsUndef(true);
4087+
MIOp.setReg(DummyReg);
4088+
}
40864089
}
4087-
}
40884090

4089-
if (!MRI.hasOneNonDBGUse(DefReg))
4090-
return;
4091-
// We cannot just remove the DefMI here, calling pass will crash.
4092-
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
4093-
DefMI->getOperand(0).setIsDead(true);
4094-
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
4095-
DefMI->removeOperand(I);
4096-
if (LV)
4097-
LV->getVarInfo(DefReg).AliveBlocks.clear();
4091+
LIS->shrinkToUses(&DefLI);
4092+
}
40984093
};
40994094

41004095
int64_t Imm;

llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10 %s
3-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=livevars,liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10 %s
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s
4+
45

56
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
67
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
78
# GFX10-NOT: V_MOV_B32
8-
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
9+
# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
10+
# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
911
---
1012
name: test_fmamk_reg_imm_f16
1113
tracksRegLiveness: true
@@ -23,7 +25,8 @@ body: |
2325
# GFX10: %0:vreg_64 = IMPLICIT_DEF
2426
# GFX10: %1:vgpr_32 = COPY %0.sub1
2527
# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
26-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
28+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
29+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
2730
---
2831
name: test_fmamk_reg_imm_f16__imm_is_subreg
2932
tracksRegLiveness: true
@@ -42,7 +45,8 @@ body: |
4245
# GFX10: %1:vgpr_32 = COPY %0.sub1
4346
# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
4447
# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
45-
# GFX10: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
48+
# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
49+
# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e
4650
---
4751
name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
4852
tracksRegLiveness: true
@@ -61,7 +65,8 @@ body: |
6165
# GFX10: %1:vgpr_32 = COPY %0.sub1
6266
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
6367
# GFX10: S_NOP 0, implicit %2
64-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
68+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
69+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
6570
---
6671
name: test_fmamk_reg_imm_f16__use_imm_before_mac
6772
tracksRegLiveness: true
@@ -80,7 +85,8 @@ body: |
8085
# GFX10: %0:vreg_64 = IMPLICIT_DEF
8186
# GFX10: %1:vgpr_32 = COPY %0.sub1
8287
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
83-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
88+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
89+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
8490
---
8591
name: test_fmamk_reg_imm_f16__use_imm_after_mac
8692
tracksRegLiveness: true
@@ -100,7 +106,8 @@ body: |
100106
# GFX10: %1:vgpr_32 = COPY %0.sub1
101107
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
102108
# GFX10: S_NOP 0, implicit %2
103-
# GFX10: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
109+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
110+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
104111
# GFX10: S_NOP 0, implicit %2
105112

106113
---
@@ -121,7 +128,8 @@ body: |
121128
# GFX10-LABEL: name: test_fmamk_imm_reg_f16
122129
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
123130
# GFX10-NOT: V_MOV_B32
124-
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
131+
# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
132+
# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
125133
---
126134
name: test_fmamk_imm_reg_f16
127135
tracksRegLiveness: true
@@ -143,7 +151,8 @@ body: |
143151
# GFX10-LABEL: name: test_fmaak_f16
144152
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
145153
# GFX10-NOT: V_MOV_B32
146-
# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
154+
# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
155+
# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
147156
---
148157
name: test_fmaak_f16
149158
tracksRegLiveness: true
@@ -163,7 +172,8 @@ body: |
163172
# GFX10-LABEL: name: test_fmaak_inline_literal_f16
164173
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
165174
# GFX10-NOT: V_MOV_B32
166-
# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
175+
# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
176+
# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec
167177

168178
---
169179
name: test_fmaak_inline_literal_f16

0 commit comments

Comments
 (0)