32
32
#include " llvm/Analysis/AliasAnalysis.h"
33
33
#include " llvm/CodeGen/LivePhysRegs.h"
34
34
#include " llvm/CodeGen/MachineBasicBlock.h"
35
+ #include " llvm/CodeGen/MachineDominators.h"
35
36
#include " llvm/CodeGen/MachineFunction.h"
36
37
#include " llvm/CodeGen/MachineFunctionPass.h"
37
38
#include " llvm/CodeGen/MachineInstr.h"
50
51
#include " llvm/IR/DerivedTypes.h"
51
52
#include " llvm/IR/Function.h"
52
53
#include " llvm/IR/Type.h"
54
+ #include " llvm/InitializePasses.h"
53
55
#include " llvm/MC/MCInstrDesc.h"
54
56
#include " llvm/Pass.h"
55
57
#include " llvm/Support/Allocator.h"
@@ -1383,6 +1385,38 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
1383
1385
case ARM::t2STRi8:
1384
1386
case ARM::t2STRi12:
1385
1387
return ARM::t2STR_POST;
1388
+
1389
+ case ARM::MVE_VLDRBS16:
1390
+ return ARM::MVE_VLDRBS16_post;
1391
+ case ARM::MVE_VLDRBS32:
1392
+ return ARM::MVE_VLDRBS32_post;
1393
+ case ARM::MVE_VLDRBU16:
1394
+ return ARM::MVE_VLDRBU16_post;
1395
+ case ARM::MVE_VLDRBU32:
1396
+ return ARM::MVE_VLDRBU32_post;
1397
+ case ARM::MVE_VLDRHS32:
1398
+ return ARM::MVE_VLDRHS32_post;
1399
+ case ARM::MVE_VLDRHU32:
1400
+ return ARM::MVE_VLDRHU32_post;
1401
+ case ARM::MVE_VLDRBU8:
1402
+ return ARM::MVE_VLDRBU8_post;
1403
+ case ARM::MVE_VLDRHU16:
1404
+ return ARM::MVE_VLDRHU16_post;
1405
+ case ARM::MVE_VLDRWU32:
1406
+ return ARM::MVE_VLDRWU32_post;
1407
+ case ARM::MVE_VSTRB16:
1408
+ return ARM::MVE_VSTRB16_post;
1409
+ case ARM::MVE_VSTRB32:
1410
+ return ARM::MVE_VSTRB32_post;
1411
+ case ARM::MVE_VSTRH32:
1412
+ return ARM::MVE_VSTRH32_post;
1413
+ case ARM::MVE_VSTRBU8:
1414
+ return ARM::MVE_VSTRBU8_post;
1415
+ case ARM::MVE_VSTRHU16:
1416
+ return ARM::MVE_VSTRHU16_post;
1417
+ case ARM::MVE_VSTRWU32:
1418
+ return ARM::MVE_VSTRWU32_post;
1419
+
1386
1420
default : llvm_unreachable (" Unhandled opcode!" );
1387
1421
}
1388
1422
}
@@ -2046,6 +2080,7 @@ namespace {
2046
2080
const TargetRegisterInfo *TRI;
2047
2081
const ARMSubtarget *STI;
2048
2082
MachineRegisterInfo *MRI;
2083
+ MachineDominatorTree *DT;
2049
2084
MachineFunction *MF;
2050
2085
2051
2086
ARMPreAllocLoadStoreOpt () : MachineFunctionPass(ID) {}
@@ -2058,6 +2093,8 @@ namespace {
2058
2093
2059
2094
void getAnalysisUsage (AnalysisUsage &AU) const override {
2060
2095
AU.addRequired <AAResultsWrapperPass>();
2096
+ AU.addRequired <MachineDominatorTree>();
2097
+ AU.addPreserved <MachineDominatorTree>();
2061
2098
MachineFunctionPass::getAnalysisUsage (AU);
2062
2099
}
2063
2100
@@ -2071,14 +2108,19 @@ namespace {
2071
2108
unsigned Base, bool isLd,
2072
2109
DenseMap<MachineInstr*, unsigned > &MI2LocMap);
2073
2110
bool RescheduleLoadStoreInstrs (MachineBasicBlock *MBB);
2111
+ bool DistributeIncrements ();
2112
+ bool DistributeIncrements (Register Base);
2074
2113
};
2075
2114
2076
2115
} // end anonymous namespace
2077
2116
2078
2117
char ARMPreAllocLoadStoreOpt::ID = 0 ;
2079
2118
2080
- INITIALIZE_PASS (ARMPreAllocLoadStoreOpt, " arm-prera-ldst-opt" ,
2081
- ARM_PREALLOC_LOAD_STORE_OPT_NAME, false , false )
2119
+ INITIALIZE_PASS_BEGIN (ARMPreAllocLoadStoreOpt, " arm-prera-ldst-opt" ,
2120
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false , false )
2121
+ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
2122
+ INITIALIZE_PASS_END(ARMPreAllocLoadStoreOpt, " arm-prera-ldst-opt" ,
2123
+ ARM_PREALLOC_LOAD_STORE_OPT_NAME, false , false )
2082
2124
2083
2125
// Limit the number of instructions to be rescheduled.
2084
2126
// FIXME: tune this limit, and/or come up with some better heuristics.
@@ -2094,10 +2136,11 @@ bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
2094
2136
TII = STI->getInstrInfo ();
2095
2137
TRI = STI->getRegisterInfo ();
2096
2138
MRI = &Fn.getRegInfo ();
2139
+ DT = &getAnalysis<MachineDominatorTree>();
2097
2140
MF = &Fn;
2098
2141
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults ();
2099
2142
2100
- bool Modified = false ;
2143
+ bool Modified = DistributeIncrements () ;
2101
2144
for (MachineBasicBlock &MFI : Fn)
2102
2145
Modified |= RescheduleLoadStoreInstrs (&MFI);
2103
2146
@@ -2475,6 +2518,198 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
2475
2518
return RetVal;
2476
2519
}
2477
2520
2521
+ // Get the Base register operand index from the memory access MachineInst if we
2522
+ // should attempt to distribute postinc on it. Return -1 if not of a valid
2523
+ // instruction type. If it returns an index, it is assumed that instruction is a
2524
+ // r+i indexing mode, and getBaseOperandIndex() + 1 is the Offset index.
2525
+ static int getBaseOperandIndex (MachineInstr &MI) {
2526
+ switch (MI.getOpcode ()) {
2527
+ case ARM::MVE_VLDRBS16:
2528
+ case ARM::MVE_VLDRBS32:
2529
+ case ARM::MVE_VLDRBU16:
2530
+ case ARM::MVE_VLDRBU32:
2531
+ case ARM::MVE_VLDRHS32:
2532
+ case ARM::MVE_VLDRHU32:
2533
+ case ARM::MVE_VLDRBU8:
2534
+ case ARM::MVE_VLDRHU16:
2535
+ case ARM::MVE_VLDRWU32:
2536
+ case ARM::MVE_VSTRB16:
2537
+ case ARM::MVE_VSTRB32:
2538
+ case ARM::MVE_VSTRH32:
2539
+ case ARM::MVE_VSTRBU8:
2540
+ case ARM::MVE_VSTRHU16:
2541
+ case ARM::MVE_VSTRWU32:
2542
+ return 1 ;
2543
+ }
2544
+ return -1 ;
2545
+ }
2546
+
2547
+ static MachineInstr *createPostIncLoadStore (MachineInstr *MI, int Offset,
2548
+ Register NewReg,
2549
+ const TargetInstrInfo *TII,
2550
+ const TargetRegisterInfo *TRI) {
2551
+ MachineFunction *MF = MI->getMF ();
2552
+ MachineRegisterInfo &MRI = MF->getRegInfo ();
2553
+
2554
+ unsigned NewOpcode = getPostIndexedLoadStoreOpcode (
2555
+ MI->getOpcode (), Offset > 0 ? ARM_AM::add : ARM_AM::sub);
2556
+
2557
+ const MCInstrDesc &MCID = TII->get (NewOpcode);
2558
+ // Constrain the def register class
2559
+ const TargetRegisterClass *TRC = TII->getRegClass (MCID, 0 , TRI, *MF);
2560
+ MRI.constrainRegClass (NewReg, TRC);
2561
+ // And do the same for the base operand
2562
+ TRC = TII->getRegClass (MCID, 2 , TRI, *MF);
2563
+ MRI.constrainRegClass (MI->getOperand (1 ).getReg (), TRC);
2564
+
2565
+ return BuildMI (*MI->getParent (), MI, MI->getDebugLoc (), MCID)
2566
+ .addReg (NewReg, RegState::Define)
2567
+ .add (MI->getOperand (0 ))
2568
+ .add (MI->getOperand (1 ))
2569
+ .addImm (Offset)
2570
+ .add (MI->getOperand (3 ))
2571
+ .add (MI->getOperand (4 ))
2572
+ .cloneMemRefs (*MI);
2573
+ }
2574
+
2575
+ // Given a Base Register, optimise the load/store uses to attempt to create more
2576
+ // post-inc accesses. We do this by taking zero offset loads/stores with an add,
2577
+ // and convert them to a postinc load/store of the same type. Any subsequent
2578
+ // accesses will be adjusted to use and account for the post-inc value.
2579
+ // For example:
2580
+ // LDR #0 LDR_POSTINC #16
2581
+ // LDR #4 LDR #-12
2582
+ // LDR #8 LDR #-8
2583
+ // LDR #12 LDR #-4
2584
+ // ADD #16
2585
+ bool ARMPreAllocLoadStoreOpt::DistributeIncrements (Register Base) {
2586
+ // We are looking for:
2587
+ // One zero offset load/store that can become postinc
2588
+ MachineInstr *BaseAccess = nullptr ;
2589
+ // An increment that can be folded in
2590
+ MachineInstr *Increment = nullptr ;
2591
+ // Other accesses after BaseAccess that will need to be updated to use the
2592
+ // postinc value
2593
+ SmallPtrSet<MachineInstr *, 8 > OtherAccesses;
2594
+ for (auto &Use : MRI->use_nodbg_instructions (Base)) {
2595
+ if (!Increment && getAddSubImmediate (Use) != 0 ) {
2596
+ Increment = &Use;
2597
+ continue ;
2598
+ }
2599
+
2600
+ int BaseOp = getBaseOperandIndex (Use);
2601
+ if (BaseOp == -1 )
2602
+ return false ;
2603
+
2604
+ if (!Use.getOperand (BaseOp).isReg () ||
2605
+ Use.getOperand (BaseOp).getReg () != Base)
2606
+ return false ;
2607
+ if (Use.getOperand (BaseOp + 1 ).getImm () == 0 )
2608
+ BaseAccess = &Use;
2609
+ else
2610
+ OtherAccesses.insert (&Use);
2611
+ }
2612
+
2613
+ if (!BaseAccess || !Increment ||
2614
+ BaseAccess->getParent () != Increment->getParent ())
2615
+ return false ;
2616
+ Register PredReg;
2617
+ if (Increment->definesRegister (ARM::CPSR) ||
2618
+ getInstrPredicate (*Increment, PredReg) != ARMCC::AL)
2619
+ return false ;
2620
+
2621
+ LLVM_DEBUG (dbgs () << " \n Attempting to distribute increments on VirtualReg "
2622
+ << Base.virtRegIndex () << " \n " );
2623
+
2624
+ // Make sure that Increment has no uses before BaseAccess.
2625
+ for (MachineInstr &Use :
2626
+ MRI->use_nodbg_instructions (Increment->getOperand (0 ).getReg ())) {
2627
+ if (!DT->dominates (BaseAccess, &Use) || &Use == BaseAccess) {
2628
+ LLVM_DEBUG (dbgs () << " BaseAccess doesn't dominate use of increment\n " );
2629
+ return false ;
2630
+ }
2631
+ }
2632
+
2633
+ // Make sure that Increment can be folded into Base
2634
+ int IncrementOffset = getAddSubImmediate (*Increment);
2635
+ unsigned NewPostIncOpcode = getPostIndexedLoadStoreOpcode (
2636
+ BaseAccess->getOpcode (), IncrementOffset > 0 ? ARM_AM::add : ARM_AM::sub);
2637
+ if (!isLegalAddressImm (NewPostIncOpcode, IncrementOffset, TII)) {
2638
+ LLVM_DEBUG (dbgs () << " Illegal addressing mode immediate on postinc\n " );
2639
+ return false ;
2640
+ }
2641
+
2642
+ // And make sure that the negative value of increment can be added to all
2643
+ // other offsets after the BaseAccess. We rely on either
2644
+ // dominates(BaseAccess, OtherAccess) or dominates(OtherAccess, BaseAccess)
2645
+ // to keep things simple.
2646
+ SmallPtrSet<MachineInstr *, 4 > SuccessorAccesses;
2647
+ for (auto *Use : OtherAccesses) {
2648
+ if (DT->dominates (BaseAccess, Use)) {
2649
+ SuccessorAccesses.insert (Use);
2650
+ unsigned BaseOp = getBaseOperandIndex (*Use);
2651
+ if (!isLegalAddressImm (
2652
+ Use->getOpcode (),
2653
+ Use->getOperand (BaseOp + 1 ).getImm () - IncrementOffset, TII)) {
2654
+ LLVM_DEBUG (dbgs () << " Illegal addressing mode immediate on use\n " );
2655
+ return false ;
2656
+ }
2657
+ } else if (!DT->dominates (Use, BaseAccess)) {
2658
+ LLVM_DEBUG (
2659
+ dbgs () << " Unknown dominance relation between Base and Use\n " );
2660
+ return false ;
2661
+ }
2662
+ }
2663
+
2664
+ // Replace BaseAccess with a post inc
2665
+ LLVM_DEBUG (dbgs () << " Changing: " ; BaseAccess->dump ());
2666
+ LLVM_DEBUG (dbgs () << " And : " ; Increment->dump ());
2667
+ Register NewBaseReg = Increment->getOperand (0 ).getReg ();
2668
+ MachineInstr *BaseAccessPost =
2669
+ createPostIncLoadStore (BaseAccess, IncrementOffset, NewBaseReg, TII, TRI);
2670
+ BaseAccess->eraseFromParent ();
2671
+ Increment->eraseFromParent ();
2672
+ LLVM_DEBUG (dbgs () << " To : " ; BaseAccessPost->dump ());
2673
+
2674
+ for (auto *Use : SuccessorAccesses) {
2675
+ LLVM_DEBUG (dbgs () << " Changing: " ; Use->dump ());
2676
+ unsigned BaseOp = getBaseOperandIndex (*Use);
2677
+ Use->getOperand (BaseOp).setReg (NewBaseReg);
2678
+ int OldOffset = Use->getOperand (BaseOp + 1 ).getImm ();
2679
+ Use->getOperand (BaseOp + 1 ).setImm (OldOffset - IncrementOffset);
2680
+ LLVM_DEBUG (dbgs () << " To : " ; Use->dump ());
2681
+ }
2682
+
2683
+ // Remove the kill flag from all uses of NewBaseReg, in case any old uses
2684
+ // remain.
2685
+ for (MachineOperand &Op : MRI->use_nodbg_operands (NewBaseReg))
2686
+ Op.setIsKill (false );
2687
+ return true ;
2688
+ }
2689
+
2690
+ bool ARMPreAllocLoadStoreOpt::DistributeIncrements () {
2691
+ bool Changed = false ;
2692
+ SmallSetVector<Register, 4 > Visited;
2693
+ for (auto &MBB : *MF) {
2694
+ for (auto &MI : MBB) {
2695
+ int BaseOp = getBaseOperandIndex (MI);
2696
+ if (BaseOp == -1 || !MI.getOperand (BaseOp).isReg ())
2697
+ continue ;
2698
+
2699
+ Register Base = MI.getOperand (BaseOp).getReg ();
2700
+ if (!Base.isVirtual () || Visited.count (Base))
2701
+ continue ;
2702
+
2703
+ Visited.insert (Base);
2704
+ }
2705
+ }
2706
+
2707
+ for (auto Base : Visited)
2708
+ Changed |= DistributeIncrements (Base);
2709
+
2710
+ return Changed;
2711
+ }
2712
+
2478
2713
// / Returns an instance of the load / store optimization pass.
2479
2714
FunctionPass *llvm::createARMLoadStoreOptimizationPass (bool PreAlloc) {
2480
2715
if (PreAlloc)
0 commit comments