Skip to content

Commit dfa5429

Browse files
authored
[InitUndef] Enable the InitUndef pass on non-AMDGPU targets (#108353)
The InitUndef pass works around a register allocation issue, where undef operands can be allocated to the same register as early-clobber result operands. This may lead to ISA constraint violations, where certain input and output registers are not allowed to overlap. Originally this pass was implemented for RISCV, and then extended to ARM in #77770. I've since removed the target-specific parts of the pass in #106744 and #107885. This PR reduces the pass to use a single requiresDisjointEarlyClobberAndUndef() target hook and enables it by default. The hook is disabled for AMDGPU, because overlapping early-clobber and undef operands are known to be safe for that target, and we get significant codegen diffs otherwise. The motivating case is the one in arm64-ldxr-stxr.ll, where we were previously incorrectly allocating a stxp input and output to the same register.
1 parent 961bc37 commit dfa5429

12 files changed

+51
-62
lines changed

llvm/include/llvm/CodeGen/TargetRegisterInfo.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1203,19 +1203,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
12031203
virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const {
12041204
return false;
12051205
}
1206-
1207-
/// Returns if the architecture being targeted has the required Pseudo
1208-
/// Instructions for initializing the register. By default this returns false,
1209-
/// but where it is overriden for an architecture, the behaviour will be
1210-
/// different. This can either be a check to ensure the Register Class is
1211-
/// present, or to return true as an indication the architecture supports the
1212-
/// pass. If using the method that does not check for the Register Class, it
1213-
/// is imperative to ensure all required Pseudo Instructions are implemented,
1214-
/// otherwise compilation may fail with an `Unexpected register class` error.
1215-
virtual bool
1216-
doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const {
1217-
return false;
1218-
}
12191206
};
12201207

12211208
//===----------------------------------------------------------------------===//

llvm/include/llvm/CodeGen/TargetSubtargetInfo.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -333,13 +333,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
333333
/// Get the list of MacroFusion predicates.
334334
virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; };
335335

336-
/// supportsInitUndef is used to determine if an architecture supports
337-
/// the Init Undef Pass. By default, it is assumed that it will not support
338-
/// the pass, with architecture specific overrides providing the information
339-
/// where they are implemented.
340-
virtual bool supportsInitUndef() const { return false; }
336+
/// Whether the target has instructions where an early-clobber result
337+
/// operand cannot overlap with an undef input operand.
338+
virtual bool requiresDisjointEarlyClobberAndUndef() const {
339+
// Conservatively assume such instructions exist by default.
340+
return true;
341+
}
341342
};
342-
343343
} // end namespace llvm
344344

345345
#endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H

llvm/lib/CodeGen/InitUndef.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -120,8 +120,6 @@ bool InitUndef::handleReg(MachineInstr *MI) {
120120
continue;
121121
if (!UseMO.getReg().isVirtual())
122122
continue;
123-
if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
124-
continue;
125123

126124
if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
127125
Changed |= fixupIllOperand(MI, UseMO);
@@ -140,8 +138,6 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
140138
continue;
141139
if (UseMO.isTied())
142140
continue;
143-
if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
144-
continue;
145141

146142
Register Reg = UseMO.getReg();
147143
if (NewRegs.count(Reg))
@@ -246,13 +242,9 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
246242
bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
247243
ST = &MF.getSubtarget();
248244

249-
// supportsInitUndef is implemented to reflect if an architecture has support
250-
// for the InitUndef pass. Support comes from having the relevant Pseudo
251-
// instructions that can be used to initialize the register. The function
252-
// returns false by default so requires an implementation per architecture.
253-
// Support can be added by overriding the function in a way that best fits
254-
// the architecture.
255-
if (!ST->supportsInitUndef())
245+
// The pass is only needed if early-clobber defs and undef ops cannot be
246+
// allocated to the same register.
247+
if (!ST->requiresDisjointEarlyClobberAndUndef())
256248
return false;
257249

258250
MRI = &MF.getRegInfo();

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1587,6 +1587,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
15871587
// the nop.
15881588
return true;
15891589
}
1590+
1591+
bool requiresDisjointEarlyClobberAndUndef() const override {
1592+
// AMDGPU doesn't care if early-clobber and undef operands are allocated
1593+
// to the same register.
1594+
return false;
1595+
}
15901596
};
15911597

15921598
class GCNUserSGPRUsageInfo {

llvm/lib/Target/AMDGPU/R600Subtarget.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,12 @@ class R600Subtarget final : public R600GenSubtargetInfo,
160160
unsigned getMinWavesPerEU() const override {
161161
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
162162
}
163+
164+
bool requiresDisjointEarlyClobberAndUndef() const override {
165+
// AMDGPU doesn't care if early-clobber and undef operands are allocated
166+
// to the same register.
167+
return false;
168+
}
163169
};
164170

165171
} // end namespace llvm

llvm/lib/Target/ARM/ARMBaseRegisterInfo.h

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -240,20 +240,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
240240
unsigned SrcSubReg) const override;
241241

242242
int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }
243-
244-
bool doesRegClassHavePseudoInitUndef(
245-
const TargetRegisterClass *RC) const override {
246-
(void)RC;
247-
// For the ARM Architecture we want to always return true because all
248-
// required PseudoInitUndef types have been added. If compilation fails due
249-
// to `Unexpected register class`, this is likely to be because the specific
250-
// register being used is not support by Init Undef and needs the Pseudo
251-
// Instruction adding to ARMInstrInfo.td. If this is implemented as a
252-
// conditional check, this could create a false positive where Init Undef is
253-
// not running, skipping the instruction and moving to the next. This could
254-
// lead to illegal instructions being generated by the register allocator.
255-
return true;
256-
}
257243
};
258244

259245
} // end namespace llvm

llvm/lib/Target/ARM/ARMSubtarget.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
209209
return &InstrInfo->getRegisterInfo();
210210
}
211211

212-
/// The correct instructions have been implemented to initialize undef
213-
/// registers, therefore the ARM Architecture is supported by the Init Undef
214-
/// Pass. This will return true as the pass needs to be supported for all
215-
/// types of instructions. The pass will then perform more checks to ensure it
216-
/// should be applying the Pseudo Instructions.
217-
bool supportsInitUndef() const override { return true; }
218-
219212
const CallLowering *getCallLowering() const override;
220213
InstructionSelector *getInstructionSelector() const override;
221214
const LegalizerInfo *getLegalizerInfo() const override;

llvm/lib/Target/RISCV/RISCVRegisterInfo.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
130130
const MachineFunction &MF, const VirtRegMap *VRM,
131131
const LiveRegMatrix *Matrix) const override;
132132

133-
bool doesRegClassHavePseudoInitUndef(
134-
const TargetRegisterClass *RC) const override {
135-
return isVRRegClass(RC);
136-
}
137-
138133
static bool isVRRegClass(const TargetRegisterClass *RC) {
139134
return RISCVRI::isVRegClass(RC->TSFlags) &&
140135
RISCVRI::getNF(RC->TSFlags) == 1;

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -306,8 +306,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
306306
unsigned getTailDupAggressiveThreshold() const {
307307
return TuneInfo->TailDupAggressiveThreshold;
308308
}
309-
310-
bool supportsInitUndef() const override { return hasVInstructions(); }
311309
};
312310
} // End llvm namespace
313311

llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,11 +354,10 @@ define dso_local i32 @test_store_release_i64(i32, i64 %val, ptr %addr) {
354354
}
355355

356356
; The stxp result cannot be allocated to the same register as the inputs.
357-
; FIXME: This is a miscompile.
358357
define dso_local i32 @test_stxp_undef(ptr %p, i64 %x) nounwind {
359358
; CHECK-LABEL: test_stxp_undef:
360359
; CHECK: // %bb.0:
361-
; CHECK-NEXT: stxp w8, x8, x1, [x0]
360+
; CHECK-NEXT: stxp w8, x9, x1, [x0]
362361
; CHECK-NEXT: mov w0, w8
363362
; CHECK-NEXT: ret
364363
%res = call i32 @llvm.aarch64.stxp(i64 undef, i64 %x, ptr %p)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2+
# RUN: llc -mtriple=aarch64-- -run-pass=init-undef -o - %s | FileCheck %s
3+
4+
---
5+
name: test_stxp_undef
6+
body: |
7+
bb.0:
8+
liveins: $x0, $x1
9+
10+
; CHECK-LABEL: name: test_stxp_undef
11+
; CHECK: liveins: $x0, $x1
12+
; CHECK-NEXT: {{ $}}
13+
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
14+
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
15+
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
16+
; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:gpr64 = INIT_UNDEF
17+
; CHECK-NEXT: early-clobber %3:gpr32 = STXPX killed [[INIT_UNDEF]], [[COPY]], [[COPY1]] :: (volatile store (s128))
18+
; CHECK-NEXT: $w0 = COPY %3
19+
; CHECK-NEXT: RET_ReallyLR implicit $w0
20+
%1:gpr64 = COPY $x1
21+
%0:gpr64common = COPY $x0
22+
%3:gpr64 = IMPLICIT_DEF
23+
early-clobber %2:gpr32 = STXPX killed %3, %1, %0 :: (volatile store (s128))
24+
$w0 = COPY %2
25+
RET_ReallyLR implicit $w0
26+
27+
...

llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ define void @last_chance_recoloring_failure() {
2525
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
2626
; CHECK-NEXT: li a0, 55
2727
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
28-
; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8
28+
; CHECK-NEXT: vloxseg2ei32.v v16, (a1), v8
2929
; CHECK-NEXT: csrr a0, vlenb
3030
; CHECK-NEXT: slli a0, a0, 3
3131
; CHECK-NEXT: add a0, sp, a0
@@ -81,7 +81,7 @@ define void @last_chance_recoloring_failure() {
8181
; SUBREGLIVENESS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
8282
; SUBREGLIVENESS-NEXT: li a0, 55
8383
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma
84-
; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a0), v8
84+
; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a1), v8
8585
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
8686
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
8787
; SUBREGLIVENESS-NEXT: add a0, sp, a0

0 commit comments

Comments
 (0)