Skip to content

Commit 8512cfc

Browse files
committed
[AArch64] Implement INIT/ADJUST_TRAMPOLINE
Add support for llvm.init.trampoline and llvm.adjust.trampoline intrinsics for AArch64. Fixes #65573 Fixes #76927 Fixes #83555 Updates #66157
1 parent e6ec7c8 commit 8512cfc

File tree

6 files changed

+139
-1
lines changed

6 files changed

+139
-1
lines changed

compiler-rt/lib/builtins/README.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,11 @@ switch32
272272
switch8
273273
switchu8
274274

275+
// This function generates a custom trampoline function with the specific
276+
// realFunc and localsPtr values.
277+
void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
278+
const void* realFunc, void* localsPtr);
279+
275280
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
276281
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
277282
// SJLJ for exceptions, each function with a catch clause or destructors needs

compiler-rt/lib/builtins/trampoline_setup.c

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,3 +41,45 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
4141
__clear_cache(trampOnStack, &trampOnStack[10]);
4242
}
4343
#endif // __powerpc__ && !defined(__powerpc64__)
44+
45+
// The AArch64 compiler generates calls to __trampoline_setup() when creating
46+
// trampoline functions on the stack for use with nested functions.
47+
// This function creates a custom 36-byte trampoline function on the stack
48+
// which loads x18 with a pointer to the outer function's locals
49+
// and then jumps to the target nested function.
50+
// Note: x18 is a reserved platform register on Windows and macOS.
51+
52+
#if defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
53+
COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
54+
int trampSizeAllocated,
55+
const void *realFunc, void *localsPtr) {
56+
// This should never happen, but if compiler did not allocate
57+
// enough space on stack for the trampoline, abort.
58+
if (trampSizeAllocated < 36)
59+
compilerrt_abort();
60+
61+
// create trampoline
62+
// Load realFunc into x17. mov/movk 16 bits at a time.
63+
trampOnStack[0] =
64+
0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
65+
trampOnStack[1] =
66+
0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
67+
trampOnStack[2] =
68+
0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
69+
trampOnStack[3] =
70+
0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
71+
// Load localsPtr into x18
72+
trampOnStack[4] =
73+
0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
74+
trampOnStack[5] =
75+
0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
76+
trampOnStack[6] =
77+
0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
78+
trampOnStack[7] =
79+
0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
80+
trampOnStack[8] = 0xd61f0220; // br x17
81+
82+
// Clear instruction cache.
83+
__clear_cache(trampOnStack, &trampOnStack[9]);
84+
}
85+
#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)

compiler-rt/test/builtins/Unit/trampoline_setup_test.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
/*
99
* Tests nested functions
10-
* The ppc compiler generates a call to __trampoline_setup
10+
* The ppc and aarch64 compilers generates a call to __trampoline_setup
1111
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
1212
*/
1313

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,6 +1074,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10741074
// Try to create BICs for vector ANDs.
10751075
setTargetDAGCombine(ISD::AND);
10761076

1077+
// llvm.init.trampoline and llvm.adjust.trampoline
1078+
setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
1079+
setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
1080+
10771081
// Vector add and sub nodes may conceal a high-half opportunity.
10781082
// Also, try to fold ADD into CSINC/CSINV..
10791083
setTargetDAGCombine({ISD::ADD, ISD::ABS, ISD::SUB, ISD::XOR, ISD::SINT_TO_FP,
@@ -6653,6 +6657,56 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
66536657
return Final;
66546658
}
66556659

6660+
SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
6661+
SelectionDAG &DAG) const {
6662+
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
6663+
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6664+
report_fatal_error(
6665+
"ADJUST_TRAMPOLINE operation is only supported on Linux.");
6666+
6667+
return Op.getOperand(0);
6668+
}
6669+
6670+
SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
6671+
SelectionDAG &DAG) const {
6672+
6673+
// Note: x18 cannot be used for the Nest parameter on Windows and macOS.
6674+
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
6675+
report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
6676+
6677+
SDValue Chain = Op.getOperand(0);
6678+
SDValue Trmp = Op.getOperand(1); // trampoline
6679+
SDValue FPtr = Op.getOperand(2); // nested function
6680+
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
6681+
SDLoc dl(Op);
6682+
6683+
EVT PtrVT = getPointerTy(DAG.getDataLayout());
6684+
Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
6685+
6686+
TargetLowering::ArgListTy Args;
6687+
TargetLowering::ArgListEntry Entry;
6688+
6689+
Entry.Ty = IntPtrTy;
6690+
Entry.Node = Trmp;
6691+
Args.push_back(Entry);
6692+
Entry.Node = DAG.getConstant(20, dl, MVT::i64);
6693+
Args.push_back(Entry);
6694+
6695+
Entry.Node = FPtr;
6696+
Args.push_back(Entry);
6697+
Entry.Node = Nest;
6698+
Args.push_back(Entry);
6699+
6700+
// Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
6701+
TargetLowering::CallLoweringInfo CLI(DAG);
6702+
CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
6703+
CallingConv::C, Type::getVoidTy(*DAG.getContext()),
6704+
DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
6705+
6706+
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
6707+
return CallResult.second;
6708+
}
6709+
66566710
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
66576711
SelectionDAG &DAG) const {
66586712
LLVM_DEBUG(dbgs() << "Custom lowering: ");
@@ -6670,6 +6724,10 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
66706724
return LowerGlobalTLSAddress(Op, DAG);
66716725
case ISD::PtrAuthGlobalAddress:
66726726
return LowerPtrAuthGlobalAddress(Op, DAG);
6727+
case ISD::ADJUST_TRAMPOLINE:
6728+
return LowerADJUST_TRAMPOLINE(Op, DAG);
6729+
case ISD::INIT_TRAMPOLINE:
6730+
return LowerINIT_TRAMPOLINE(Op, DAG);
66736731
case ISD::SETCC:
66746732
case ISD::STRICT_FSETCC:
66756733
case ISD::STRICT_FSETCCS:

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1141,6 +1141,8 @@ class AArch64TargetLowering : public TargetLowering {
11411141
SDValue LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, SDValue RHS,
11421142
SDValue TVal, SDValue FVal, const SDLoc &dl,
11431143
SelectionDAG &DAG) const;
1144+
SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1145+
SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
11441146
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
11451147
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
11461148
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
2+
; UNSUPPORTED: darwin, system-windows
3+
4+
declare void @llvm.init.trampoline(i8*, i8*, i8*);
5+
declare i8* @llvm.adjust.trampoline(i8*);
6+
7+
define i64 @func(i64* nest %ptr, i64 %val)
8+
{
9+
%x = load i64, i64* %ptr
10+
%sum = add i64 %x, %val
11+
ret i64 %sum
12+
}
13+
14+
; CHECK-LABEL: main
15+
define i64 @main(i64, i8**)
16+
{
17+
%val = alloca i64
18+
store i64 13, i64* %val
19+
%nval = bitcast i64* %val to i8*
20+
%tramp_buf = alloca [36 x i8], align 4
21+
%tramp = getelementptr [36 x i8], [36 x i8]* %tramp_buf, i64 0, i64 0
22+
; CHECK: bl __trampoline_setup
23+
call void @llvm.init.trampoline(
24+
i8* %tramp,
25+
i8* bitcast (i64 (i64*, i64)* @func to i8*),
26+
i8* %nval)
27+
%ptr = call i8* @llvm.adjust.trampoline(i8* %tramp)
28+
%fptr = bitcast i8* %ptr to i64(i64)*
29+
%retval = call i64 %fptr (i64 42)
30+
ret i64 %retval
31+
}

0 commit comments

Comments
 (0)