Skip to content

Commit a9daad8

Browse files
authored
AMDGPU: Update live intervals in convertToThreeAddress (#104610)
Fixes #98741
1 parent 6ab5829 commit a9daad8

File tree

2 files changed

+156
-26
lines changed

2 files changed

+156
-26
lines changed

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4059,17 +4059,37 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40594059
!RI.isSGPRReg(MBB.getParent()->getRegInfo(), Src0->getReg()))) {
40604060
MachineInstr *DefMI;
40614061
const auto killDef = [&]() -> void {
4062-
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4062+
MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
40634063
// The only user is the instruction which will be killed.
40644064
Register DefReg = DefMI->getOperand(0).getReg();
4065-
if (!MRI.hasOneNonDBGUse(DefReg))
4066-
return;
4067-
// We cannot just remove the DefMI here, calling pass will crash.
4068-
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
4069-
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
4070-
DefMI->removeOperand(I);
4071-
if (LV)
4072-
LV->getVarInfo(DefReg).AliveBlocks.clear();
4065+
4066+
if (MRI.hasOneNonDBGUse(DefReg)) {
4067+
// We cannot just remove the DefMI here, calling pass will crash.
4068+
DefMI->setDesc(get(AMDGPU::IMPLICIT_DEF));
4069+
DefMI->getOperand(0).setIsDead(true);
4070+
for (unsigned I = DefMI->getNumOperands() - 1; I != 0; --I)
4071+
DefMI->removeOperand(I);
4072+
if (LV)
4073+
LV->getVarInfo(DefReg).AliveBlocks.clear();
4074+
}
4075+
4076+
if (LIS) {
4077+
LiveInterval &DefLI = LIS->getInterval(DefReg);
4078+
4079+
// We cannot delete the original instruction here, so hack out the use
4080+
// in the original instruction with a dummy register so we can use
4081+
// shrinkToUses to deal with any multi-use edge cases. Other targets do
4082+
// not have the complexity of deleting a use to consider here.
4083+
Register DummyReg = MRI.cloneVirtualRegister(DefReg);
4084+
for (MachineOperand &MIOp : MI.uses()) {
4085+
if (MIOp.isReg() && MIOp.getReg() == DefReg) {
4086+
MIOp.setIsUndef(true);
4087+
MIOp.setReg(DummyReg);
4088+
}
4089+
}
4090+
4091+
LIS->shrinkToUses(&DefLI);
4092+
}
40734093
};
40744094

40754095
int64_t Imm;
@@ -4107,6 +4127,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
41074127
.add(*Src2)
41084128
.setMIFlags(MI.getFlags());
41094129
updateLiveVariables(LV, MI, *MIB);
4130+
41104131
if (LIS)
41114132
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
41124133
killDef();
@@ -4129,6 +4150,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
41294150
.add(*Src2)
41304151
.setMIFlags(MI.getFlags());
41314152
updateLiveVariables(LV, MI, *MIB);
4153+
41324154
if (LIS)
41334155
LIS->ReplaceMachineInstrInMaps(MI, *MIB);
41344156
if (DefMI)

llvm/test/CodeGen/AMDGPU/gfx10-twoaddr-fma.mir

Lines changed: 125 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,138 @@
1-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s -run-pass twoaddressinstruction -verify-machineinstrs -o - | FileCheck --check-prefixes=GFX10 %s
2-
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 %s --passes=two-address-instruction -verify-each -o - | FileCheck --check-prefixes=GFX10 %s
1+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
2+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -passes=two-address-instruction -verify-each -o - %s | FileCheck --check-prefixes=GFX10,GFX10-NOLIS %s
3+
# RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -run-pass=liveintervals,twoaddressinstruction -verify-machineinstrs -o - %s | FileCheck --check-prefixes=GFX10,GFX10-LIS %s
4+
35

46
# GFX10-LABEL: name: test_fmamk_reg_imm_f16
5-
# GFX10: %2:vgpr_32 = IMPLICIT_DEF
7+
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
68
# GFX10-NOT: V_MOV_B32
7-
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
9+
# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
10+
# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
811
---
912
name: test_fmamk_reg_imm_f16
10-
registers:
11-
- { id: 0, class: vreg_64 }
12-
- { id: 1, class: vgpr_32 }
13-
- { id: 2, class: vgpr_32 }
14-
- { id: 3, class: vgpr_32 }
13+
tracksRegLiveness: true
1514
body: |
1615
bb.0:
1716
18-
%0 = IMPLICIT_DEF
19-
%1 = COPY %0.sub1
20-
%2 = V_MOV_B32_e32 1078523331, implicit $exec
21-
%3 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
17+
%0:vreg_64 = IMPLICIT_DEF
18+
%1:vgpr_32 = COPY %0.sub1
19+
%2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
20+
%3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
21+
22+
...
23+
24+
# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg
25+
# GFX10: %0:vreg_64 = IMPLICIT_DEF
26+
# GFX10: %1:vgpr_32 = COPY %0.sub1
27+
# GFX10: dead undef %2.sub0:vreg_64 = IMPLICIT_DEF
28+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
29+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
30+
---
31+
name: test_fmamk_reg_imm_f16__imm_is_subreg
32+
tracksRegLiveness: true
33+
body: |
34+
bb.0:
35+
36+
%0:vreg_64 = IMPLICIT_DEF
37+
%1:vgpr_32 = COPY %0.sub1
38+
undef %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
39+
%3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
40+
41+
...
42+
43+
# GFX10-LABEL: name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
44+
# GFX10: %0:vreg_64 = IMPLICIT_DEF
45+
# GFX10: %1:vgpr_32 = COPY %0.sub1
46+
# GFX10: undef %2.sub1:vreg_64 = V_MOV_B32_e32 9999, implicit $exec
47+
# GFX10: %2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
48+
# GFX10-NOLIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed %0.sub0, 0, %2.sub0, 0, killed %1, 0, 0, 0, implicit $mode, implicit $e
49+
# GFX10-LIS: %3:vgpr_32 = V_FMA_F16_gfx9_e64 0, %0.sub0, 0, %2.sub0, 0, %1, 0, 0, 0, implicit $mode, implicit $e
50+
---
51+
name: test_fmamk_reg_imm_f16__imm_is_subreg_fully_defined
52+
tracksRegLiveness: true
53+
body: |
54+
bb.0:
55+
%0:vreg_64 = IMPLICIT_DEF
56+
%1:vgpr_32 = COPY %0.sub1
57+
undef %2.sub1 = V_MOV_B32_e32 9999, implicit $exec
58+
%2.sub0:vreg_64 = V_MOV_B32_e32 1078523331, implicit $exec
59+
%3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2.sub0, killed %1, implicit $mode, implicit $exec
60+
61+
...
62+
63+
# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_mac
64+
# GFX10: %0:vreg_64 = IMPLICIT_DEF
65+
# GFX10: %1:vgpr_32 = COPY %0.sub1
66+
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
67+
# GFX10: S_NOP 0, implicit %2
68+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
69+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
70+
---
71+
name: test_fmamk_reg_imm_f16__use_imm_before_mac
72+
tracksRegLiveness: true
73+
body: |
74+
bb.0:
75+
76+
%0:vreg_64 = IMPLICIT_DEF
77+
%1:vgpr_32 = COPY %0.sub1
78+
%2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
79+
S_NOP 0, implicit %2
80+
%3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
81+
82+
...
83+
84+
# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_after_mac
85+
# GFX10: %0:vreg_64 = IMPLICIT_DEF
86+
# GFX10: %1:vgpr_32 = COPY %0.sub1
87+
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
88+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
89+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
90+
---
91+
name: test_fmamk_reg_imm_f16__use_imm_after_mac
92+
tracksRegLiveness: true
93+
body: |
94+
bb.0:
95+
96+
%0:vreg_64 = IMPLICIT_DEF
97+
%1:vgpr_32 = COPY %0.sub1
98+
%2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
99+
%3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
100+
S_NOP 0, implicit %2
101+
102+
...
103+
104+
# GFX10-LABEL: name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
105+
# GFX10: %0:vreg_64 = IMPLICIT_DEF
106+
# GFX10: %1:vgpr_32 = COPY %0.sub1
107+
# GFX10: %2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
108+
# GFX10: S_NOP 0, implicit %2
109+
# GFX10-NOLIS: %3:vgpr_32 = V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
110+
# GFX10-LIS: %3:vgpr_32 = V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
111+
# GFX10: S_NOP 0, implicit %2
112+
113+
---
114+
name: test_fmamk_reg_imm_f16__use_imm_before_after_mac
115+
tracksRegLiveness: true
116+
body: |
117+
bb.0:
118+
119+
%0:vreg_64 = IMPLICIT_DEF
120+
%1:vgpr_32 = COPY %0.sub1
121+
%2:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
122+
S_NOP 0, implicit %2
123+
%3:vgpr_32 = V_FMAC_F16_e32 killed %0.sub0, %2, killed %1, implicit $mode, implicit $exec
124+
S_NOP 0, implicit %2
22125
23126
...
24127

25128
# GFX10-LABEL: name: test_fmamk_imm_reg_f16
26-
# GFX10: %2:vgpr_32 = IMPLICIT_DEF
129+
# GFX10: dead %2:vgpr_32 = IMPLICIT_DEF
27130
# GFX10-NOT: V_MOV_B32
28-
# GFX10: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
131+
# GFX10-NOLIS: V_FMAMK_F16 killed %0.sub0, 1078523331, killed %1, implicit $mode, implicit $exec
132+
# GFX10-LIS: V_FMAMK_F16 %0.sub0, 1078523331, %1, implicit $mode, implicit $exec
29133
---
30134
name: test_fmamk_imm_reg_f16
135+
tracksRegLiveness: true
31136
registers:
32137
- { id: 0, class: vreg_64 }
33138
- { id: 1, class: vgpr_32 }
@@ -46,9 +151,11 @@ body: |
46151
# GFX10-LABEL: name: test_fmaak_f16
47152
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
48153
# GFX10-NOT: V_MOV_B32
49-
# GFX10: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
154+
# GFX10-NOLIS: V_FMAAK_F16 killed %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
155+
# GFX10-LIS: V_FMAAK_F16 %0.sub0, %0.sub1, 1078523331, implicit $mode, implicit $exec
50156
---
51157
name: test_fmaak_f16
158+
tracksRegLiveness: true
52159
registers:
53160
- { id: 0, class: vreg_64 }
54161
- { id: 1, class: vgpr_32 }
@@ -65,7 +172,8 @@ body: |
65172
# GFX10-LABEL: name: test_fmaak_inline_literal_f16
66173
# GFX10: %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
67174
# GFX10-NOT: V_MOV_B32
68-
# GFX10: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
175+
# GFX10-NOLIS: %2:vgpr_32 = V_FMAAK_F16 16384, killed %0, 49664, implicit $mode, implicit $exec
176+
# GFX10-LIS: %2:vgpr_32 = V_FMAAK_F16 16384, %0, 49664, implicit $mode, implicit $exec
69177

70178
---
71179
name: test_fmaak_inline_literal_f16

0 commit comments

Comments
 (0)