Skip to content

Commit 1c35341

Browse files
committed
AArch64: add a pass to compress jump-table entries when possible.
llvm-svn: 345188
1 parent 769d4ce commit 1c35341

18 files changed

+693
-42
lines changed

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ def int_aarch64_dmb : GCCBuiltin<"__builtin_arm_dmb">, MSBuiltin<"__dmb">, Intri
4444
def int_aarch64_dsb : GCCBuiltin<"__builtin_arm_dsb">, MSBuiltin<"__dsb">, Intrinsic<[], [llvm_i32_ty]>;
4545
def int_aarch64_isb : GCCBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, Intrinsic<[], [llvm_i32_ty]>;
4646

47+
// A space-consuming intrinsic primarily for testing block and jump table
48+
// placements. The first argument is the number of bytes this "instruction"
49+
// takes up, the second and return value are essentially chains, used to force
50+
// ordering during ISel.
51+
def int_aarch64_space : Intrinsic<[llvm_i64_ty], [llvm_i32_ty, llvm_i64_ty], []>;
52+
4753
}
4854

4955
//===----------------------------------------------------------------------===//

llvm/lib/Target/AArch64/AArch64.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class MachineFunctionPass;
3232
FunctionPass *createAArch64DeadRegisterDefinitions();
3333
FunctionPass *createAArch64RedundantCopyEliminationPass();
3434
FunctionPass *createAArch64CondBrTuning();
35+
FunctionPass *createAArch64CompressJumpTablesPass();
3536
FunctionPass *createAArch64ConditionalCompares();
3637
FunctionPass *createAArch64AdvSIMDScalar();
3738
FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
@@ -62,6 +63,7 @@ void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
6263
void initializeAArch64BranchTargetsPass(PassRegistry&);
6364
void initializeAArch64CollectLOHPass(PassRegistry&);
6465
void initializeAArch64CondBrTuningPass(PassRegistry &);
66+
void initializeAArch64CompressJumpTablesPass(PassRegistry&);
6567
void initializeAArch64ConditionalComparesPass(PassRegistry&);
6668
void initializeAArch64ConditionOptimizerPass(PassRegistry&);
6769
void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);

llvm/lib/Target/AArch64/AArch64.td

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
180180
"disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
181181
"Disable latency scheduling heuristic">;
182182

183+
def FeatureForce32BitJumpTables
184+
: SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true",
185+
"Force jump table entries to be 32-bits wide except at MinSize">;
186+
183187
def FeatureRCPC : SubtargetFeature<"rcpc", "HasRCPC", "true",
184188
"Enable support for RCPC extension">;
185189

@@ -411,7 +415,8 @@ def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
411415
FeaturePostRAScheduler,
412416
FeatureSlowMisaligned128Store,
413417
FeatureUseRSqrt,
414-
FeatureZCZeroingFP]>;
418+
FeatureZCZeroingFP,
419+
FeatureForce32BitJumpTables]>;
415420

416421
def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
417422
"Samsung Exynos-M2 processors",
@@ -425,7 +430,8 @@ def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
425430
FeaturePerfMon,
426431
FeaturePostRAScheduler,
427432
FeatureSlowMisaligned128Store,
428-
FeatureZCZeroingFP]>;
433+
FeatureZCZeroingFP,
434+
FeatureForce32BitJumpTables]>;
429435

430436
def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
431437
"Samsung Exynos-M3 processors",
@@ -442,7 +448,8 @@ def ProcExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3",
442448
FeaturePerfMon,
443449
FeaturePostRAScheduler,
444450
FeaturePredictableSelectIsExpensive,
445-
FeatureZCZeroingFP]>;
451+
FeatureZCZeroingFP,
452+
FeatureForce32BitJumpTables]>;
446453

447454
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
448455
"Qualcomm Kryo processors", [

llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131
#include "llvm/CodeGen/MachineBasicBlock.h"
3232
#include "llvm/CodeGen/MachineFunction.h"
3333
#include "llvm/CodeGen/MachineInstr.h"
34+
#include "llvm/CodeGen/MachineJumpTableInfo.h"
35+
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
3436
#include "llvm/CodeGen/MachineOperand.h"
3537
#include "llvm/CodeGen/StackMaps.h"
3638
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -77,6 +79,12 @@ class AArch64AsmPrinter : public AsmPrinter {
7779
return MCInstLowering.lowerOperand(MO, MCOp);
7880
}
7981

82+
void EmitJumpTableInfo() override;
83+
void emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
84+
const MachineBasicBlock *MBB, unsigned JTI);
85+
86+
void LowerJumpTableDestSmall(MCStreamer &OutStreamer, const MachineInstr &MI);
87+
8088
void LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
8189
const MachineInstr &MI);
8290
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
@@ -433,6 +441,104 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
433441
printOperand(MI, NOps - 2, OS);
434442
}
435443

444+
void AArch64AsmPrinter::EmitJumpTableInfo() {
445+
const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
446+
if (!MJTI) return;
447+
448+
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
449+
if (JT.empty()) return;
450+
451+
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
452+
MCSection *ReadOnlySec = TLOF.getSectionForJumpTable(MF->getFunction(), TM);
453+
OutStreamer->SwitchSection(ReadOnlySec);
454+
455+
auto AFI = MF->getInfo<AArch64FunctionInfo>();
456+
for (unsigned JTI = 0, e = JT.size(); JTI != e; ++JTI) {
457+
const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
458+
459+
// If this jump table was deleted, ignore it.
460+
if (JTBBs.empty()) continue;
461+
462+
unsigned Size = AFI->getJumpTableEntrySize(JTI);
463+
EmitAlignment(Log2_32(Size));
464+
OutStreamer->EmitLabel(GetJTISymbol(JTI));
465+
466+
for (auto *JTBB : JTBBs)
467+
emitJumpTableEntry(MJTI, JTBB, JTI);
468+
}
469+
}
470+
471+
void AArch64AsmPrinter::emitJumpTableEntry(const MachineJumpTableInfo *MJTI,
472+
const MachineBasicBlock *MBB,
473+
unsigned JTI) {
474+
const MCExpr *Value = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
475+
auto AFI = MF->getInfo<AArch64FunctionInfo>();
476+
unsigned Size = AFI->getJumpTableEntrySize(JTI);
477+
478+
if (Size == 4) {
479+
// .word LBB - LJTI
480+
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
481+
const MCExpr *Base = TLI->getPICJumpTableRelocBaseExpr(MF, JTI, OutContext);
482+
Value = MCBinaryExpr::createSub(Value, Base, OutContext);
483+
} else {
484+
// .byte (LBB - LBB) >> 2 (or .hword)
485+
const MCSymbol *BaseSym = AFI->getJumpTableEntryPCRelSymbol(JTI);
486+
const MCExpr *Base = MCSymbolRefExpr::create(BaseSym, OutContext);
487+
Value = MCBinaryExpr::createSub(Value, Base, OutContext);
488+
Value = MCBinaryExpr::createLShr(
489+
Value, MCConstantExpr::create(2, OutContext), OutContext);
490+
}
491+
492+
OutStreamer->EmitValue(Value, Size);
493+
}
494+
495+
/// Small jump tables contain an unsigned byte or half, representing the offset
496+
/// from the lowest-addressed possible destination to the desired basic
497+
/// block. Since all instructions are 4-byte aligned, this is further compressed
498+
/// by counting in instructions rather than bytes (i.e. divided by 4). So, to
499+
/// materialize the correct destination we need:
500+
///
501+
/// adr xDest, .LBB0_0
502+
/// ldrb wScratch, [xTable, xEntry] (with "lsl #1" for ldrh).
503+
/// add xDest, xDest, xScratch, lsl #2
504+
void AArch64AsmPrinter::LowerJumpTableDestSmall(llvm::MCStreamer &OutStreamer,
505+
const llvm::MachineInstr &MI) {
506+
unsigned DestReg = MI.getOperand(0).getReg();
507+
unsigned ScratchReg = MI.getOperand(1).getReg();
508+
unsigned ScratchRegW =
509+
STI->getRegisterInfo()->getSubReg(ScratchReg, AArch64::sub_32);
510+
unsigned TableReg = MI.getOperand(2).getReg();
511+
unsigned EntryReg = MI.getOperand(3).getReg();
512+
int JTIdx = MI.getOperand(4).getIndex();
513+
bool IsByteEntry = MI.getOpcode() == AArch64::JumpTableDest8;
514+
515+
// This has to be first because the compression pass based its reachability
516+
// calculations on the start of the JumpTableDest instruction.
517+
auto Label =
518+
MF->getInfo<AArch64FunctionInfo>()->getJumpTableEntryPCRelSymbol(JTIdx);
519+
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADR)
520+
.addReg(DestReg)
521+
.addExpr(MCSymbolRefExpr::create(
522+
Label, MF->getContext())));
523+
524+
// Load the number of instruction-steps to offset from the label.
525+
unsigned LdrOpcode = IsByteEntry ? AArch64::LDRBBroX : AArch64::LDRHHroX;
526+
EmitToStreamer(OutStreamer, MCInstBuilder(LdrOpcode)
527+
.addReg(ScratchRegW)
528+
.addReg(TableReg)
529+
.addReg(EntryReg)
530+
.addImm(0)
531+
.addImm(IsByteEntry ? 0 : 1));
532+
533+
// Multiply the steps by 4 and add to the already materialized base label
534+
// address.
535+
EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::ADDXrs)
536+
.addReg(DestReg)
537+
.addReg(DestReg)
538+
.addReg(ScratchReg)
539+
.addImm(2));
540+
}
541+
436542
void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
437543
const MachineInstr &MI) {
438544
unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
@@ -662,6 +768,32 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
662768
return;
663769
}
664770

771+
case AArch64::JumpTableDest32: {
772+
// We want:
773+
// ldrsw xScratch, [xTable, xEntry, lsl #2]
774+
// add xDest, xTable, xScratch
775+
unsigned DestReg = MI->getOperand(0).getReg(),
776+
ScratchReg = MI->getOperand(1).getReg(),
777+
TableReg = MI->getOperand(2).getReg(),
778+
EntryReg = MI->getOperand(3).getReg();
779+
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::LDRSWroX)
780+
.addReg(ScratchReg)
781+
.addReg(TableReg)
782+
.addReg(EntryReg)
783+
.addImm(0)
784+
.addImm(1));
785+
EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::ADDXrs)
786+
.addReg(DestReg)
787+
.addReg(TableReg)
788+
.addReg(ScratchReg)
789+
.addImm(0));
790+
return;
791+
}
792+
case AArch64::JumpTableDest16:
793+
case AArch64::JumpTableDest8:
794+
LowerJumpTableDestSmall(*OutStreamer, *MI);
795+
return;
796+
665797
case AArch64::FMOVH0:
666798
case AArch64::FMOVS0:
667799
case AArch64::FMOVD0:
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
//==-- AArch64CompressJumpTables.cpp - Compress jump tables for AArch64 --====//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
// This pass looks at the basic blocks each jump-table refers to and works out
9+
// whether they can be emitted in a compressed form (with 8 or 16-bit
10+
// entries). If so, it changes the opcode and flags them in the associated
11+
// AArch64FunctionInfo.
12+
//
13+
//===----------------------------------------------------------------------===//
14+
15+
#include "AArch64.h"
16+
#include "AArch64MachineFunctionInfo.h"
17+
#include "AArch64Subtarget.h"
18+
#include "llvm/ADT/Statistic.h"
19+
#include "llvm/CodeGen/MachineFunctionPass.h"
20+
#include "llvm/CodeGen/MachineJumpTableInfo.h"
21+
#include "llvm/CodeGen/TargetInstrInfo.h"
22+
#include "llvm/CodeGen/TargetSubtargetInfo.h"
23+
#include "llvm/MC/MCContext.h"
24+
#include "llvm/Support/Debug.h"
25+
26+
using namespace llvm;
27+
28+
#define DEBUG_TYPE "aarch64-jump-tables"
29+
30+
STATISTIC(NumJT8, "Number of jump-tables with 1-byte entries");
31+
STATISTIC(NumJT16, "Number of jump-tables with 2-byte entries");
32+
STATISTIC(NumJT32, "Number of jump-tables with 4-byte entries");
33+
34+
namespace {
35+
class AArch64CompressJumpTables : public MachineFunctionPass {
36+
const TargetInstrInfo *TII;
37+
MachineFunction *MF;
38+
SmallVector<int, 8> BlockInfo;
39+
40+
int computeBlockSize(MachineBasicBlock &MBB);
41+
void scanFunction();
42+
43+
bool compressJumpTable(MachineInstr &MI, int Offset);
44+
45+
public:
46+
static char ID;
47+
AArch64CompressJumpTables() : MachineFunctionPass(ID) {
48+
initializeAArch64CompressJumpTablesPass(*PassRegistry::getPassRegistry());
49+
}
50+
51+
bool runOnMachineFunction(MachineFunction &MF) override;
52+
53+
MachineFunctionProperties getRequiredProperties() const override {
54+
return MachineFunctionProperties().set(
55+
MachineFunctionProperties::Property::NoVRegs);
56+
}
57+
StringRef getPassName() const override {
58+
return "AArch64 Compress Jump Tables";
59+
}
60+
};
61+
char AArch64CompressJumpTables::ID = 0;
62+
}
63+
64+
INITIALIZE_PASS(AArch64CompressJumpTables, DEBUG_TYPE,
65+
"AArch64 compress jump tables pass", false, false)
66+
67+
int AArch64CompressJumpTables::computeBlockSize(MachineBasicBlock &MBB) {
68+
int Size = 0;
69+
for (const MachineInstr &MI : MBB)
70+
Size += TII->getInstSizeInBytes(MI);
71+
return Size;
72+
}
73+
74+
void AArch64CompressJumpTables::scanFunction() {
75+
BlockInfo.clear();
76+
BlockInfo.resize(MF->getNumBlockIDs());
77+
78+
int Offset = 0;
79+
for (MachineBasicBlock &MBB : *MF) {
80+
BlockInfo[MBB.getNumber()] = Offset;
81+
Offset += computeBlockSize(MBB);
82+
}
83+
}
84+
85+
bool AArch64CompressJumpTables::compressJumpTable(MachineInstr &MI,
86+
int Offset) {
87+
if (MI.getOpcode() != AArch64::JumpTableDest32)
88+
return false;
89+
90+
int JTIdx = MI.getOperand(4).getIndex();
91+
auto &JTInfo = *MF->getJumpTableInfo();
92+
const MachineJumpTableEntry &JT = JTInfo.getJumpTables()[JTIdx];
93+
94+
// The jump-table might have been optimized away.
95+
if (JT.MBBs.empty())
96+
return false;
97+
98+
int MaxOffset = std::numeric_limits<int>::min(),
99+
MinOffset = std::numeric_limits<int>::max();
100+
MachineBasicBlock *MinBlock = nullptr;
101+
for (auto Block : JT.MBBs) {
102+
int BlockOffset = BlockInfo[Block->getNumber()];
103+
assert(BlockOffset % 4 == 0 && "misaligned basic block");
104+
105+
MaxOffset = std::max(MaxOffset, BlockOffset);
106+
if (BlockOffset <= MinOffset) {
107+
MinOffset = BlockOffset;
108+
MinBlock = Block;
109+
}
110+
}
111+
112+
// The ADR instruction needed to calculate the address of the first reachable
113+
// basic block can address +/-1MB.
114+
if (!isInt<21>(MinOffset - Offset)) {
115+
++NumJT32;
116+
return false;
117+
}
118+
119+
int Span = MaxOffset - MinOffset;
120+
auto AFI = MF->getInfo<AArch64FunctionInfo>();
121+
if (isUInt<8>(Span / 4)) {
122+
AFI->setJumpTableEntryInfo(JTIdx, 1, MinBlock->getSymbol());
123+
MI.setDesc(TII->get(AArch64::JumpTableDest8));
124+
++NumJT8;
125+
return true;
126+
} else if (isUInt<16>(Span / 4)) {
127+
AFI->setJumpTableEntryInfo(JTIdx, 2, MinBlock->getSymbol());
128+
MI.setDesc(TII->get(AArch64::JumpTableDest16));
129+
++NumJT16;
130+
return true;
131+
}
132+
133+
++NumJT32;
134+
return false;
135+
}
136+
137+
bool AArch64CompressJumpTables::runOnMachineFunction(MachineFunction &MFIn) {
138+
bool Changed = false;
139+
MF = &MFIn;
140+
141+
const auto &ST = MF->getSubtarget<AArch64Subtarget>();
142+
TII = ST.getInstrInfo();
143+
144+
if (ST.force32BitJumpTables() && !MF->getFunction().optForMinSize())
145+
return false;
146+
147+
scanFunction();
148+
149+
for (MachineBasicBlock &MBB : *MF) {
150+
int Offset = BlockInfo[MBB.getNumber()];
151+
for (MachineInstr &MI : MBB) {
152+
Changed |= compressJumpTable(MI, Offset);
153+
Offset += TII->getInstSizeInBytes(MI);
154+
}
155+
}
156+
157+
return Changed;
158+
}
159+
160+
FunctionPass *llvm::createAArch64CompressJumpTablesPass() {
161+
return new AArch64CompressJumpTables();
162+
}

0 commit comments

Comments
 (0)