@@ -580,25 +580,124 @@ static MCCFIInstruction createDefCFAOffset(const TargetRegisterInfo &TRI,
580
580
Comment.str ());
581
581
}
582
582
583
+ // Allocate stack space and probe it if necessary.
583
584
void RISCVFrameLowering::allocateStack (MachineBasicBlock &MBB,
584
585
MachineBasicBlock::iterator MBBI,
585
- MachineFunction &MF, StackOffset Offset,
586
- uint64_t RealStackSize,
587
- bool EmitCFI) const {
586
+ MachineFunction &MF, uint64_t Offset,
587
+ uint64_t RealStackSize, bool EmitCFI,
588
+ bool NeedProbe,
589
+ uint64_t ProbeSize) const {
588
590
DebugLoc DL;
589
591
const RISCVRegisterInfo *RI = STI.getRegisterInfo ();
590
592
const RISCVInstrInfo *TII = STI.getInstrInfo ();
591
593
592
- RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, Offset, MachineInstr::FrameSetup,
594
+ // Simply allocate the stack if it's not big enough to require a probe.
595
+ if (!NeedProbe || Offset <= ProbeSize) {
596
+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Offset),
597
+ MachineInstr::FrameSetup, getStackAlign ());
598
+
599
+ if (EmitCFI) {
600
+ // Emit ".cfi_def_cfa_offset RealStackSize"
601
+ unsigned CFIIndex = MF.addFrameInst (
602
+ MCCFIInstruction::cfiDefCfaOffset (nullptr , RealStackSize));
603
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
604
+ .addCFIIndex (CFIIndex)
605
+ .setMIFlag (MachineInstr::FrameSetup);
606
+ }
607
+
608
+ return ;
609
+ }
610
+
611
+ // Unroll the probe loop depending on the number of iterations.
612
+ if (Offset < ProbeSize * 5 ) {
613
+ uint64_t CurrentOffset = 0 ;
614
+ bool IsRV64 = STI.is64Bit ();
615
+ while (CurrentOffset + ProbeSize <= Offset) {
616
+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
617
+ StackOffset::getFixed (-ProbeSize), MachineInstr::FrameSetup,
618
+ getStackAlign ());
619
+ // s[d|w] zero, 0(sp)
620
+ BuildMI (MBB, MBBI, DL, TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
621
+ .addReg (RISCV::X0)
622
+ .addReg (SPReg)
623
+ .addImm (0 )
624
+ .setMIFlags (MachineInstr::FrameSetup);
625
+
626
+ CurrentOffset += ProbeSize;
627
+ if (EmitCFI) {
628
+ // Emit ".cfi_def_cfa_offset CurrentOffset"
629
+ unsigned CFIIndex = MF.addFrameInst (
630
+ MCCFIInstruction::cfiDefCfaOffset (nullptr , CurrentOffset));
631
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
632
+ .addCFIIndex (CFIIndex)
633
+ .setMIFlag (MachineInstr::FrameSetup);
634
+ }
635
+ }
636
+
637
+ uint64_t Residual = Offset - CurrentOffset;
638
+ if (Residual) {
639
+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg,
640
+ StackOffset::getFixed (-Residual), MachineInstr::FrameSetup,
641
+ getStackAlign ());
642
+ if (EmitCFI) {
643
+ // Emit ".cfi_def_cfa_offset Offset"
644
+ unsigned CFIIndex =
645
+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
646
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
647
+ .addCFIIndex (CFIIndex)
648
+ .setMIFlag (MachineInstr::FrameSetup);
649
+ }
650
+ }
651
+
652
+ return ;
653
+ }
654
+
655
+ // Emit a variable-length allocation probing loop.
656
+ uint64_t RoundedSize = alignDown (Offset, ProbeSize);
657
+ uint64_t Residual = Offset - RoundedSize;
658
+
659
+ Register TargetReg = RISCV::X6;
660
+ // SUB TargetReg, SP, RoundedSize
661
+ RI->adjustReg (MBB, MBBI, DL, TargetReg, SPReg,
662
+ StackOffset::getFixed (-RoundedSize), MachineInstr::FrameSetup,
593
663
getStackAlign ());
594
664
595
665
if (EmitCFI) {
596
- // Emit ".cfi_def_cfa_offset RealStackSize"
597
- unsigned CFIIndex = MF.addFrameInst (
598
- MCCFIInstruction::cfiDefCfaOffset (nullptr , RealStackSize));
666
+ // Set the CFA register to TargetReg.
667
+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (TargetReg, true );
668
+ unsigned CFIIndex =
669
+ MF.addFrameInst (MCCFIInstruction::cfiDefCfa (nullptr , Reg, RoundedSize));
599
670
BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
600
671
.addCFIIndex (CFIIndex)
601
- .setMIFlag (MachineInstr::FrameSetup);
672
+ .setMIFlags (MachineInstr::FrameSetup);
673
+ }
674
+
675
+ // It will be expanded to a probe loop in `inlineStackProbe`.
676
+ BuildMI (MBB, MBBI, DL, TII->get (RISCV::PROBED_STACKALLOC))
677
+ .addReg (SPReg)
678
+ .addReg (TargetReg);
679
+
680
+ if (EmitCFI) {
681
+ // Set the CFA register back to SP.
682
+ unsigned Reg = STI.getRegisterInfo ()->getDwarfRegNum (SPReg, true );
683
+ unsigned CFIIndex =
684
+ MF.addFrameInst (MCCFIInstruction::createDefCfaRegister (nullptr , Reg));
685
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
686
+ .addCFIIndex (CFIIndex)
687
+ .setMIFlags (MachineInstr::FrameSetup);
688
+ }
689
+
690
+ if (Residual)
691
+ RI->adjustReg (MBB, MBBI, DL, SPReg, SPReg, StackOffset::getFixed (-Residual),
692
+ MachineInstr::FrameSetup, getStackAlign ());
693
+
694
+ if (EmitCFI) {
695
+ // Emit ".cfi_def_cfa_offset Offset"
696
+ unsigned CFIIndex =
697
+ MF.addFrameInst (MCCFIInstruction::cfiDefCfaOffset (nullptr , Offset));
698
+ BuildMI (MBB, MBBI, DL, TII->get (TargetOpcode::CFI_INSTRUCTION))
699
+ .addCFIIndex (CFIIndex)
700
+ .setMIFlags (MachineInstr::FrameSetup);
602
701
}
603
702
}
604
703
@@ -716,11 +815,14 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
716
815
getPushOrLibCallsSavedInfo (MF, CSI));
717
816
}
718
817
719
- if (StackSize != 0 ) {
720
- // Allocate space on the stack if necessary.
721
- allocateStack (MBB, MBBI, MF, StackOffset::getFixed (-StackSize),
722
- RealStackSize, /* EmitCFI=*/ true );
723
- }
818
+ // Allocate space on the stack if necessary.
819
+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
820
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
821
+ bool NeedProbe = TLI->hasInlineStackProbe (MF);
822
+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, getStackAlign ());
823
+ if (StackSize != 0 )
824
+ allocateStack (MBB, MBBI, MF, StackSize, RealStackSize, /* EmitCFI=*/ true ,
825
+ NeedProbe, ProbeSize);
724
826
725
827
// The frame pointer is callee-saved, and code has been generated for us to
726
828
// save it to the stack. We need to skip over the storing of callee-saved
@@ -761,8 +863,9 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
761
863
assert (SecondSPAdjustAmount > 0 &&
762
864
" SecondSPAdjustAmount should be greater than zero" );
763
865
764
- allocateStack (MBB, MBBI, MF, StackOffset::getFixed (-SecondSPAdjustAmount),
765
- getStackSizeWithRVVPadding (MF), !hasFP (MF));
866
+ allocateStack (MBB, MBBI, MF, SecondSPAdjustAmount,
867
+ getStackSizeWithRVVPadding (MF), !hasFP (MF), NeedProbe,
868
+ ProbeSize);
766
869
}
767
870
768
871
if (RVVStackSize) {
@@ -1910,3 +2013,69 @@ bool RISCVFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
1910
2013
TargetStackID::Value RISCVFrameLowering::getStackIDForScalableVectors () const {
1911
2014
return TargetStackID::ScalableVector;
1912
2015
}
2016
+
2017
+ // Synthesize the probe loop.
2018
+ static void emitStackProbeInline (MachineFunction &MF, MachineBasicBlock &MBB,
2019
+ MachineBasicBlock::iterator MBBI,
2020
+ DebugLoc DL) {
2021
+
2022
+ auto &Subtarget = MF.getSubtarget <RISCVSubtarget>();
2023
+ const RISCVInstrInfo *TII = Subtarget.getInstrInfo ();
2024
+ bool IsRV64 = Subtarget.is64Bit ();
2025
+ Align StackAlign = Subtarget.getFrameLowering ()->getStackAlign ();
2026
+ const RISCVTargetLowering *TLI = Subtarget.getTargetLowering ();
2027
+ uint64_t ProbeSize = TLI->getStackProbeSize (MF, StackAlign);
2028
+
2029
+ MachineFunction::iterator MBBInsertPoint = std::next (MBB.getIterator ());
2030
+ MachineBasicBlock *LoopTestMBB =
2031
+ MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2032
+ MF.insert (MBBInsertPoint, LoopTestMBB);
2033
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock (MBB.getBasicBlock ());
2034
+ MF.insert (MBBInsertPoint, ExitMBB);
2035
+ MachineInstr::MIFlag Flags = MachineInstr::FrameSetup;
2036
+ Register TargetReg = RISCV::X6;
2037
+ Register ScratchReg = RISCV::X7;
2038
+
2039
+ // ScratchReg = ProbeSize
2040
+ TII->movImm (MBB, MBBI, DL, ScratchReg, ProbeSize, Flags);
2041
+
2042
+ // LoopTest:
2043
+ // SUB SP, SP, ProbeSize
2044
+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::SUB), SPReg)
2045
+ .addReg (SPReg)
2046
+ .addReg (ScratchReg)
2047
+ .setMIFlags (Flags);
2048
+
2049
+ // s[d|w] zero, 0(sp)
2050
+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL,
2051
+ TII->get (IsRV64 ? RISCV::SD : RISCV::SW))
2052
+ .addReg (RISCV::X0)
2053
+ .addReg (SPReg)
2054
+ .addImm (0 )
2055
+ .setMIFlags (Flags);
2056
+
2057
+ // BNE SP, TargetReg, LoopTest
2058
+ BuildMI (*LoopTestMBB, LoopTestMBB->end (), DL, TII->get (RISCV::BNE))
2059
+ .addReg (SPReg)
2060
+ .addReg (TargetReg)
2061
+ .addMBB (LoopTestMBB)
2062
+ .setMIFlags (Flags);
2063
+
2064
+ ExitMBB->splice (ExitMBB->end (), &MBB, std::next (MBBI), MBB.end ());
2065
+
2066
+ LoopTestMBB->addSuccessor (ExitMBB);
2067
+ LoopTestMBB->addSuccessor (LoopTestMBB);
2068
+ MBB.addSuccessor (LoopTestMBB);
2069
+ }
2070
+
2071
+ void RISCVFrameLowering::inlineStackProbe (MachineFunction &MF,
2072
+ MachineBasicBlock &MBB) const {
2073
+ auto Where = llvm::find_if (MBB, [](MachineInstr &MI) {
2074
+ return MI.getOpcode () == RISCV::PROBED_STACKALLOC;
2075
+ });
2076
+ if (Where != MBB.end ()) {
2077
+ DebugLoc DL = MBB.findDebugLoc (Where);
2078
+ emitStackProbeInline (MF, MBB, Where, DL);
2079
+ Where->eraseFromParent ();
2080
+ }
2081
+ }
0 commit comments