Skip to content

[InitUndef] Enable the InitUndef pass on non-AMDGPU targets #108353

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions llvm/include/llvm/CodeGen/TargetRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1203,19 +1203,6 @@ class TargetRegisterInfo : public MCRegisterInfo {
virtual bool isNonallocatableRegisterCalleeSave(MCRegister Reg) const {
return false;
}

/// Returns if the architecture being targeted has the required Pseudo
/// Instructions for initializing the register. By default this returns false,
/// but where it is overriden for an architecture, the behaviour will be
/// different. This can either be a check to ensure the Register Class is
/// present, or to return true as an indication the architecture supports the
/// pass. If using the method that does not check for the Register Class, it
/// is imperative to ensure all required Pseudo Instructions are implemented,
/// otherwise compilation may fail with an `Unexpected register class` error.
virtual bool
doesRegClassHavePseudoInitUndef(const TargetRegisterClass *RC) const {
return false;
}
};

//===----------------------------------------------------------------------===//
Expand Down
12 changes: 6 additions & 6 deletions llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -333,13 +333,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
/// Get the list of MacroFusion predicates.
virtual std::vector<MacroFusionPredTy> getMacroFusions() const { return {}; };

/// supportsInitUndef is used to determine if an architecture supports
/// the Init Undef Pass. By default, it is assumed that it will not support
/// the pass, with architecture specific overrides providing the information
/// where they are implemented.
virtual bool supportsInitUndef() const { return false; }
/// Whether the target has instructions where an early-clobber result
/// operand cannot overlap with an undef input operand.
virtual bool requiresDisjointEarlyClobberAndUndef() const {
// Conservatively assume such instructions exist by default.
return true;
}
};

} // end namespace llvm

#endif // LLVM_CODEGEN_TARGETSUBTARGETINFO_H
14 changes: 3 additions & 11 deletions llvm/lib/CodeGen/InitUndef.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,8 +120,6 @@ bool InitUndef::handleReg(MachineInstr *MI) {
continue;
if (!UseMO.getReg().isVirtual())
continue;
if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
continue;

if (UseMO.isUndef() || findImplictDefMIFromReg(UseMO.getReg(), MRI))
Changed |= fixupIllOperand(MI, UseMO);
Expand All @@ -140,8 +138,6 @@ bool InitUndef::handleSubReg(MachineFunction &MF, MachineInstr &MI,
continue;
if (UseMO.isTied())
continue;
if (!TRI->doesRegClassHavePseudoInitUndef(MRI->getRegClass(UseMO.getReg())))
continue;

Register Reg = UseMO.getReg();
if (NewRegs.count(Reg))
Expand Down Expand Up @@ -246,13 +242,9 @@ bool InitUndef::processBasicBlock(MachineFunction &MF, MachineBasicBlock &MBB,
bool InitUndef::runOnMachineFunction(MachineFunction &MF) {
ST = &MF.getSubtarget();

// supportsInitUndef is implemented to reflect if an architecture has support
// for the InitUndef pass. Support comes from having the relevant Pseudo
// instructions that can be used to initialize the register. The function
// returns false by default so requires an implementation per architecture.
// Support can be added by overriding the function in a way that best fits
// the architecture.
if (!ST->supportsInitUndef())
// The pass is only needed if early-clobber defs and undef ops cannot be
// allocated to the same register.
if (!ST->requiresDisjointEarlyClobberAndUndef())
return false;

MRI = &MF.getRegInfo();
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/GCNSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -1587,6 +1587,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// the nop.
return true;
}

bool requiresDisjointEarlyClobberAndUndef() const override {
// AMDGPU doesn't care if early-clobber and undef operands are allocated
// to the same register.
return false;
}
};

class GCNUserSGPRUsageInfo {
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/AMDGPU/R600Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ class R600Subtarget final : public R600GenSubtargetInfo,
unsigned getMinWavesPerEU() const override {
return AMDGPU::IsaInfo::getMinWavesPerEU(this);
}

bool requiresDisjointEarlyClobberAndUndef() const override {
// AMDGPU doesn't care if early-clobber and undef operands are allocated
// to the same register.
return false;
}
};

} // end namespace llvm
Expand Down
14 changes: 0 additions & 14 deletions llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,20 +240,6 @@ class ARMBaseRegisterInfo : public ARMGenRegisterInfo {
unsigned SrcSubReg) const override;

int getSEHRegNum(unsigned i) const { return getEncodingValue(i); }

bool doesRegClassHavePseudoInitUndef(
const TargetRegisterClass *RC) const override {
(void)RC;
// For the ARM Architecture we want to always return true because all
// required PseudoInitUndef types have been added. If compilation fails due
// to `Unexpected register class`, this is likely to be because the specific
// register being used is not support by Init Undef and needs the Pseudo
// Instruction adding to ARMInstrInfo.td. If this is implemented as a
// conditional check, this could create a false positive where Init Undef is
// not running, skipping the instruction and moving to the next. This could
// lead to illegal instructions being generated by the register allocator.
return true;
}
};

} // end namespace llvm
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,6 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return &InstrInfo->getRegisterInfo();
}

/// The correct instructions have been implemented to initialize undef
/// registers, therefore the ARM Architecture is supported by the Init Undef
/// Pass. This will return true as the pass needs to be supported for all
/// types of instructions. The pass will then perform more checks to ensure it
/// should be applying the Pseudo Instructions.
bool supportsInitUndef() const override { return true; }

const CallLowering *getCallLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/RISCV/RISCVRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,6 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;

bool doesRegClassHavePseudoInitUndef(
const TargetRegisterClass *RC) const override {
return isVRRegClass(RC);
}

static bool isVRRegClass(const TargetRegisterClass *RC) {
return RISCVRI::isVRegClass(RC->TSFlags) &&
RISCVRI::getNF(RC->TSFlags) == 1;
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/RISCV/RISCVSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,6 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
unsigned getTailDupAggressiveThreshold() const {
return TuneInfo->TailDupAggressiveThreshold;
}

bool supportsInitUndef() const override { return hasVInstructions(); }
};
} // End llvm namespace

Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AArch64/arm64-ldxr-stxr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -354,11 +354,10 @@ define dso_local i32 @test_store_release_i64(i32, i64 %val, ptr %addr) {
}

; The stxp result cannot be allocated to the same register as the inputs.
; FIXME: This is a miscompile.
define dso_local i32 @test_stxp_undef(ptr %p, i64 %x) nounwind {
; CHECK-LABEL: test_stxp_undef:
; CHECK: // %bb.0:
; CHECK-NEXT: stxp w8, x8, x1, [x0]
; CHECK-NEXT: stxp w8, x9, x1, [x0]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could use an additional MIR test for something so specific

Copy link
Contributor Author

@nikic nikic Sep 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pass itself already has MIR tests. I don't think adding another MIR test for this case would be useful, because we care about the end-to-end behavior (from the interaction of three different passes) here.

What would be quite nice is if the old code would emit an error, as it's hard to spot the issue otherwise. Currently it only generates an error when parsed back as inline assembly. Not sure whether there is a good way to share that validation, I'm not familiar with this part of the backend...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AArch64InstrInfo::verifyInstruction is the place for target-specific verification of an instruction... but currently it only gets called with -verify-machineinstrs. (It looks like AMDGPUAsmPrinter::emitInstruction calls verifyInstruction, but other targets currently don't do that.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only see 3 places using run-pass=init-undef, all in RISCV. You could just dump the MIR before init-undef, extract the one instruction, and make sure the pass isn't a no-op on AArch64

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, if all we want to test is "the pass does something", then a MIR test makes sense. I've added one.

; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: ret
%res = call i32 @llvm.aarch64.stxp(i64 undef, i64 %x, ptr %p)
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/CodeGen/AArch64/init-undef.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=aarch64-- -run-pass=init-undef -o - %s | FileCheck %s

---
name: test_stxp_undef
body: |
bb.0:
liveins: $x0, $x1

; CHECK-LABEL: name: test_stxp_undef
; CHECK: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x1
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
; CHECK-NEXT: [[DEF:%[0-9]+]]:gpr64 = IMPLICIT_DEF
; CHECK-NEXT: [[INIT_UNDEF:%[0-9]+]]:gpr64 = INIT_UNDEF
; CHECK-NEXT: early-clobber %3:gpr32 = STXPX killed [[INIT_UNDEF]], [[COPY]], [[COPY1]] :: (volatile store (s128))
; CHECK-NEXT: $w0 = COPY %3
; CHECK-NEXT: RET_ReallyLR implicit $w0
%1:gpr64 = COPY $x1
%0:gpr64common = COPY $x0
%3:gpr64 = IMPLICIT_DEF
early-clobber %2:gpr32 = STXPX killed %3, %1, %0 :: (volatile store (s128))
$w0 = COPY %2
RET_ReallyLR implicit $w0

...
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
; CHECK-NEXT: li a0, 55
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8
; CHECK-NEXT: vloxseg2ei32.v v16, (a1), v8
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
Expand Down Expand Up @@ -81,7 +81,7 @@ define void @last_chance_recoloring_failure() {
; SUBREGLIVENESS-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
; SUBREGLIVENESS-NEXT: li a0, 55
; SUBREGLIVENESS-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a0), v8
; SUBREGLIVENESS-NEXT: vloxseg2ei32.v v16, (a1), v8
; SUBREGLIVENESS-NEXT: csrr a0, vlenb
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
; SUBREGLIVENESS-NEXT: add a0, sp, a0
Expand Down
Loading