Skip to content

Commit 2e20622

Browse files
committed
[AArch64] Implement INIT/ADJUST_TRAMPOLINE
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes #65573 Fixes #76927 Fixes #83555 Updates #66157
1 parent 4f79ef4 commit 2e20622

File tree

6 files changed

+127
-1
lines changed

6 files changed

+127
-1
lines changed

compiler-rt/lib/builtins/README.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,11 @@ switch32
272272
switch8
273273
switchu8
274274

275+
// This function generates a custom trampoline function with the specific
276+
// realFunc and localsPtr values.
277+
void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
278+
const void* realFunc, void* localsPtr);
279+
275280
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
276281
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
277282
// SJLJ for exceptions, each function with a catch clause or destructors needs

compiler-rt/lib/builtins/trampoline_setup.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
4141
__clear_cache(trampOnStack, &trampOnStack[10]);
4242
}
4343
#endif // __powerpc__ && !defined(__powerpc64__)
44+
45+
// The AArch64 compiler generates calls to __trampoline_setup() when creating
46+
// trampoline functions on the stack for use with nested functions.
47+
// This function creates a custom 36-byte trampoline function on the stack
48+
// which loads x18 with a pointer to the outer function's locals
49+
// and then jumps to the target nested function.
50+
// Note: x18 is a reserved platform register on Windows and macOS.
51+
52+
#if defined(__aarch64__) && defined(__ELF__)
53+
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
54+
int trampSizeAllocated,
55+
const void *realFunc, void *localsPtr) {
56+
// This should never happen, but if compiler did not allocate
57+
// enough space on stack for the trampoline, abort.
58+
if (trampSizeAllocated < 36)
59+
compilerrt_abort();
60+
61+
// create trampoline
62+
// Load realFunc into x17. mov/movk 16 bits at a time.
63+
trampOnStack[0] =
64+
0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
65+
trampOnStack[1] =
66+
0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
67+
trampOnStack[2] =
68+
0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
69+
trampOnStack[3] =
70+
0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
71+
// Load localsPtr into x18
72+
trampOnStack[4] =
73+
0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
74+
trampOnStack[5] =
75+
0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
76+
trampOnStack[6] =
77+
0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
78+
trampOnStack[7] =
79+
0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
80+
trampOnStack[8] = 0xd61f0220; // br x17
81+
82+
// Clear instruction cache.
83+
__clear_cache(trampOnStack, &trampOnStack[9]);
84+
}
85+
#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)

compiler-rt/test/builtins/Unit/trampoline_setup_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
/*
99
* Tests nested functions
10-
* The ppc compiler generates a call to __trampoline_setup
10+
* The ppc and aarch64 compilers generates a call to __trampoline_setup
1111
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
1212
*/
1313

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1080,6 +1080,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10801080
// Try to create BICs for vector ANDs.
10811081
setTargetDAGCombine(ISD::AND);
10821082

1083+
// llvm.init.trampoline and llvm.adjust.trampoline
1084+
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
1085+
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
1086+
10831087
// Vector add and sub nodes may conceal a high-half opportunity.
10841088
// Also, try to fold ADD into CSINC/CSINV..
10851089
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6688,6 +6692,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
66886692
return Final;
66896693
}
66906694

6695+
SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
6696+
SelectionDAG &DAG) const {
6697+
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
6698+
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6699+
report_fatal_error(
6700+
"ADJUST_TRAMPOLINE operation is only supported on Linux.");
6701+
6702+
return Op.getOperand(0);
6703+
}
6704+
6705+
SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
6706+
SelectionDAG &DAG) const {
6707+
6708+
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
6709+
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6710+
report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
6711+
6712+
SDValue Chain = Op.getOperand(0);
6713+
SDValue Trmp = Op.getOperand(1); // trampoline
6714+
SDValue FPtr = Op.getOperand(2); // nested function
6715+
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
6716+
SDLoc dl(Op);
6717+
6718+
EVT PtrVT = getPointerTy(DAG.getDataLayout());
6719+
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
6720+
6721+
TargetLowering::ArgListTy Args;
6722+
TargetLowering::ArgListEntry Entry;
6723+
6724+
Entry.Ty = IntPtrTy;
6725+
Entry.Node = Trmp;
6726+
Args.push_back(Entry);
6727+
Entry.Node = DAG.getConstant(20, dl, MVT::i64);
6728+
Args.push_back(Entry);
6729+
6730+
Entry.Node = FPtr;
6731+
Args.push_back(Entry);
6732+
Entry.Node = Nest;
6733+
Args.push_back(Entry);
6734+
6735+
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
6736+
TargetLowering::CallLoweringInfo CLI(DAG);
6737+
CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
6738+
CallingConv::C, Type::getVoidTy(*DAG.getContext()),
6739+
DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
6740+
6741+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6742+
return CallResult.second;
6743+
}
6744+
66916745
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
66926746
SelectionDAG &DAG) const {
66936747
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6705,6 +6759,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
67056759
return LowerGlobalTLSAddress(Op, DAG);
67066760
case ISD::PtrAuthGlobalAddress:
67076761
return LowerPtrAuthGlobalAddress(Op, DAG);
6762+
case ISD::ADJUST_TRAMPOLINE:
6763+
return LowerADJUST_TRAMPOLINE(Op, DAG);
6764+
case ISD::INIT_TRAMPOLINE:
6765+
return LowerINIT_TRAMPOLINE(Op, DAG);
67086766
case ISD::SETCC:
67096767
case ISD::STRICT_FSETCC:
67106768
case ISD::STRICT_FSETCCS:

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,8 @@ class AArch64TargetLowering : public TargetLowering {
11431143
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
11441144
SDValue TVal, SDValue FVal, const SDLoc &dl,
11451145
SelectionDAG &DAG) const;
1146+
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1147+
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
11461148
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
11471149
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
11481150
SDValue LowerBRIND(SDValue Op, SelectionDAG &DAG) const;
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
2+
3+
declare void @llvm.init.trampoline(ptr, ptr, ptr);
4+
declare ptr @llvm.adjust.trampoline(ptr);
5+
6+
define i64 @f(ptr nest %c, i64 %x, i64 %y) {
7+
%sum = add i64 %x, %y
8+
ret i64 %sum
9+
}
10+
11+
define i64 @main() {
12+
%val = alloca i64
13+
%nval = bitcast ptr %val to ptr
14+
%tramp = alloca [36 x i8], align 8
15+
; CHECK: bl __trampoline_setup
16+
call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
17+
%fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
18+
ret i64 0
19+
}

0 commit comments

Comments
 (0)