Skip to content

Commit 7c72d0e

Browse files
committed
[AArch64][SME] Spill p-regs as z-regs when streaming hazards are possible
This patch adds a new option `-aarch64-enable-zpr-predicate-spills` (which is disabled by default), this option replaces predicate spills with vector spills in streaming[-compatible] functions. For example: ``` str p8, [sp, #7, mul vl] // 2-byte Folded Spill // ... ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ``` Becomes: ``` mov z0.b, p8/z, #1 str z0, [sp] // 16-byte Folded Spill // ... ldr z0, [sp] // 16-byte Folded Reload ptrue p4.b cmpne p8.b, p4/z, z0.b, #0 ``` This is done to avoid streaming memory hazards between FPR/vector and predicate spills, which currently occupy the same stack area even when the `-aarch64-stack-hazard-size` flag is set. This is implemented with two new pseudos SPILL_PPR_TO_ZPR_SLOT_PSEUDO and FILL_PPR_FROM_ZPR_SLOT_PSEUDO. The expansion of these pseudos handles scavenging the required registers (z0 in the above example) and, in the worst case spilling a register to an emergency stack slot in the expansion. The condition flags are also preserved around the `cmpne` in case they are live at the expansion point.
1 parent 29f7392 commit 7c72d0e

10 files changed

+1444
-10
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+331-4
Large diffs are not rendered by default.

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+15-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ static cl::opt<unsigned>
8181
AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
8282
: AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
8383
AArch64::CATCHRET),
84-
RI(STI.getTargetTriple()), Subtarget(STI) {}
84+
RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
8585

8686
/// GetInstSize - Return the number of bytes of code the specified
8787
/// instruction may be. This returns the maximum number of bytes.
@@ -2438,6 +2438,8 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
24382438
case AArch64::STZ2Gi:
24392439
case AArch64::STZGi:
24402440
case AArch64::TAGPstack:
2441+
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2442+
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
24412443
return 2;
24422444
case AArch64::LD1B_D_IMM:
24432445
case AArch64::LD1B_H_IMM:
@@ -4223,6 +4225,8 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
42234225
MinOffset = -256;
42244226
MaxOffset = 254;
42254227
break;
4228+
case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4229+
case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
42264230
case AArch64::LDR_ZXI:
42274231
case AArch64::STR_ZXI:
42284232
Scale = TypeSize::getScalable(16);
@@ -5354,6 +5358,11 @@ void AArch64InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
53545358
"Unexpected register store without SVE store instructions");
53555359
Opc = AArch64::STR_ZXI;
53565360
StackID = TargetStackID::ScalableVector;
5361+
} else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5362+
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5363+
"Unexpected predicate store without SVE store instructions");
5364+
Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5365+
StackID = TargetStackID::ScalableVector;
53575366
}
53585367
break;
53595368
case 24:
@@ -5528,6 +5537,11 @@ void AArch64InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
55285537
"Unexpected register load without SVE load instructions");
55295538
Opc = AArch64::LDR_ZXI;
55305539
StackID = TargetStackID::ScalableVector;
5540+
} else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5541+
assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5542+
"Unexpected predicate load without SVE load instructions");
5543+
Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5544+
StackID = TargetStackID::ScalableVector;
55315545
}
55325546
break;
55335547
case 24:

llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ using namespace llvm;
3838
#define GET_REGINFO_TARGET_DESC
3939
#include "AArch64GenRegisterInfo.inc"
4040

41-
AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT)
42-
: AArch64GenRegisterInfo(AArch64::LR), TT(TT) {
41+
AArch64RegisterInfo::AArch64RegisterInfo(const Triple &TT, unsigned HwMode)
42+
: AArch64GenRegisterInfo(AArch64::LR, 0, 0, 0, HwMode), TT(TT) {
4343
AArch64_MC::initLLVMToCVRegMapping(this);
4444
}
4545

llvm/lib/Target/AArch64/AArch64RegisterInfo.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
2727
const Triple &TT;
2828

2929
public:
30-
AArch64RegisterInfo(const Triple &TT);
30+
AArch64RegisterInfo(const Triple &TT, unsigned HwMode);
3131

3232
// FIXME: This should be tablegen'd like getDwarfRegNum is
3333
int getSEHRegNum(unsigned i) const {

llvm/lib/Target/AArch64/AArch64RegisterInfo.td

+10-1
Original file line numberDiff line numberDiff line change
@@ -979,10 +979,19 @@ class ZPRRegOp <string Suffix, AsmOperandClass C, ElementSizeEnum Size,
979979
//******************************************************************************
980980

981981
// SVE predicate register classes.
982+
983+
// Note: This hardware mode is enabled in AArch64Subtarget::getHwModeSet()
984+
// (without the use of the table-gen'd predicates).
985+
def SMEWithStreamingMemoryHazards : HwMode<"", [Predicate<"false">]>;
986+
987+
def PPRSpillFillRI : RegInfoByHwMode<
988+
[DefaultMode, SMEWithStreamingMemoryHazards],
989+
[RegInfo<16,16,16>, RegInfo<16,128,128>]>;
990+
982991
class PPRClass<int firstreg, int lastreg, int step = 1> : RegisterClass<"AArch64",
983992
[ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
984993
(sequence "P%u", firstreg, lastreg, step)> {
985-
let Size = 16;
994+
let RegInfos = PPRSpillFillRI;
986995
}
987996

988997
def PPR : PPRClass<0, 15> {

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,11 @@ static cl::alias AArch64StreamingStackHazardSize(
8686
cl::desc("alias for -aarch64-streaming-hazard-size"),
8787
cl::aliasopt(AArch64StreamingHazardSize));
8888

89+
static cl::opt<bool> EnableZPRPredicateSpills(
90+
"aarch64-enable-zpr-predicate-spills", cl::init(false), cl::Hidden,
91+
cl::desc(
92+
"Enables spilling/reloading SVE predicates as data vectors (ZPRs)"));
93+
8994
// Subreg liveness tracking is disabled by default for now until all issues
9095
// are ironed out. This option allows the feature to be used in tests.
9196
static cl::opt<bool>
@@ -397,6 +402,23 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
397402
EnableSubregLiveness = EnableSubregLivenessTracking.getValue();
398403
}
399404

405+
unsigned AArch64Subtarget::getHwModeSet() const {
406+
unsigned Modes = 0;
407+
408+
// Use a special hardware mode in streaming functions with stack hazards.
409+
// This changes the spill size (and alignment) for the predicate register
410+
// class.
411+
//
412+
// FIXME: This overrides the table-gen'd `getHwModeSet()` which only looks at
413+
// CPU features.
414+
if (EnableZPRPredicateSpills.getValue() &&
415+
(isStreaming() || isStreamingCompatible())) {
416+
Modes |= (1 << 0);
417+
}
418+
419+
return Modes;
420+
}
421+
400422
const CallLowering *AArch64Subtarget::getCallLowering() const {
401423
return CallLoweringInfo.get();
402424
}

llvm/lib/Target/AArch64/AArch64Subtarget.h

+2
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
130130
bool IsStreaming = false, bool IsStreamingCompatible = false,
131131
bool HasMinSize = false);
132132

133+
virtual unsigned getHwModeSet() const override;
134+
133135
// Getters for SubtargetFeatures defined in tablegen
134136
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
135137
bool GETTER() const { return ATTRIBUTE; }

llvm/lib/Target/AArch64/SMEInstrFormats.td

+14
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,20 @@ def FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO :
5959
let hasPostISelHook = 1;
6060
}
6161

62+
def SPILL_PPR_TO_ZPR_SLOT_PSEUDO :
63+
Pseudo<(outs), (ins PPRorPNRAny:$Pt, GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]>
64+
{
65+
let mayStore = 1;
66+
let hasSideEffects = 0;
67+
}
68+
69+
def FILL_PPR_FROM_ZPR_SLOT_PSEUDO :
70+
Pseudo<(outs PPRorPNRAny:$Pt), (ins GPR64sp:$Rn, simm9:$imm9), []>, Sched<[]>
71+
{
72+
let mayLoad = 1;
73+
let hasSideEffects = 0;
74+
}
75+
6276
def SDTZALoadStore : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisPtrTy<1>, SDTCisInt<2>]>;
6377
def AArch64SMELdr : SDNode<"AArch64ISD::SME_ZA_LDR", SDTZALoadStore,
6478
[SDNPHasChain, SDNPSideEffect, SDNPMayLoad]>;

0 commit comments

Comments
 (0)