Skip to content

Commit 9e898b9

Browse files
committed
"[bolt][aarch64] Fixed indirect call instrumentation snippet"
Indirect call instrumentation snippet uses x16 register in exit handler to go to destination target __bolt_instr_ind_call_handler_func: msr nzcv, x1 ldp x0, x1, [sp], #16 ldr x16, [sp], #16 ldp x0, x1, [sp], #16 br x16 <----- Depend on compiler x16 register used to store smtng in cross function calling. This patch adds the instrumentation snippet by calling instrumentation runtime library through indirect call instruction and adding the wrapper to store/load target value and the register for original indirect instruction. Example: mov x16, foo infirectCall: adrp x8, Label add x8, x8, #:lo12:Lable blr x8 Before: Instrumented indirect call: stp x0, x1, [sp, #-16]! mov x0, x8 movk x1, #0x0, lsl #48 movk x1, #0x0, lsl #32 movk x1, #0x0, lsl #16 movk x1, #0x0 stp x0, x1, [sp, #-16]! adrp x0, __bolt_instr_ind_call_handler_func add x0, x0, #:lo12:__bolt_instr_ind_call_handler_func blr x0 __bolt_instr_ind_call_handler: (exit snippet) msr nzcv, x1 ldp x0, x1, [sp], #16 ldr x16, [sp], #16 ldp x0, x1, [sp], #16 br x16 <- overwrites the original value in X16 __bolt_instr_ind_call_handler_func: (entry snippet) stp x0, x1, [sp, #-16]! mrs x1, nzcv adrp x0, __bolt_instr_ind_call_handler add x0, x0, x0, #:lo12:__bolt_instr_ind_call_handler ldr x0, [x0] cmp x0, #0x0 b.eq __bolt_instr_ind_call_handler str x30, [sp, #-16]! blr x0 <--- runtime lib store/load all regs ldr x30, [sp], #16 b __bolt_instr_ind_call_handler _________________________________________________________________________ After: mov x16, foo infirectCall: adrp x8, Label add x8, x8, #:lo12:Lable blr x8 Instrumented indirect call: stp x0, x1, [sp, #-16]! mov x0, x8 movk x1, #0x0, lsl #48 movk x1, #0x0, lsl #32 movk x1, #0x0, lsl #16 movk x1, #0x0 stp x0, x0, [sp, #-16]! adrp x8, __bolt_instr_ind_call_handler_func add x8, x8, #:lo12:__bolt_instr_ind_call_handler_func str x30, [sp, #-16]! blr x8 <--- call trampoline instr lib ldr x30, [sp], #16 ldp x0, x1, [sp], #16 mov x8, x0 <---- restore original target ldp x0, x1, [sp], #16 blr x8 <--- original indirect call instruction // don't touch regs besides x0, x1 __bolt_instr_ind_call_handler: (exit snippet) ldr x1, sp, 16 msr nzcv, x1 ldp x0, x1, [sp], #16 ret <---- return to original function with indirect call __bolt_instr_ind_call_handler_func: (entry snippet) stp x0, x1, [sp, #-16]! mrs x1, nzcv str x1, [sp, #-16]! adrp x0, __bolt_instr_ind_call_handler add x0, x0, #:lo12:__bolt_instr_ind_call_handler ldr x0, [x0] cmp x0, #0x0 b.eq __bolt_instr_ind_call_handler str x30, [sp, #-16]! blr x0 <--- runtime lib store/load all regs ldr x30, [sp], #16 b __bolt_instr_ind_call_handler
1 parent 57f3151 commit 9e898b9

File tree

4 files changed

+111
-42
lines changed

4 files changed

+111
-42
lines changed

bolt/include/bolt/Core/MCPlusBuilder.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,6 +511,11 @@ class MCPlusBuilder {
511511
llvm_unreachable("not implemented");
512512
}
513513

514+
virtual void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
515+
MCContext *Ctx) {
516+
llvm_unreachable("not implemented");
517+
}
518+
514519
virtual MCPhysReg getX86R11() const { llvm_unreachable("not implemented"); }
515520

516521
virtual unsigned getShortBranchOpcode(unsigned Opcode) const {

bolt/lib/Passes/Instrumentation.cpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
293293
BinaryBasicBlock::iterator &Iter,
294294
BinaryFunction &FromFunction,
295295
uint32_t From) {
296-
auto L = FromFunction.getBinaryContext().scopeLock();
297-
const size_t IndCallSiteID = Summary->IndCallDescriptions.size();
298-
createIndCallDescription(FromFunction, From);
296+
size_t IndCallSiteID;
297+
{
298+
auto L = FromFunction.getBinaryContext().scopeLock();
299+
IndCallSiteID = Summary->IndCallDescriptions.size();
300+
createIndCallDescription(FromFunction, From);
301+
}
299302

300303
BinaryContext &BC = FromFunction.getBinaryContext();
301304
bool IsTailCall = BC.MIB->isTailCall(*Iter);
@@ -305,9 +308,12 @@ void Instrumentation::instrumentIndirectTarget(BinaryBasicBlock &BB,
305308
: IndCallHandlerExitBBFunction->getSymbol(),
306309
IndCallSiteID, &*BC.Ctx);
307310

308-
Iter = BB.eraseInstruction(Iter);
309-
Iter = insertInstructions(CounterInstrs, BB, Iter);
310-
--Iter;
311+
if (!BC.isAArch64()) {
312+
Iter = BB.eraseInstruction(Iter);
313+
Iter = insertInstructions(CounterInstrs, BB, Iter);
314+
--iter;
315+
} else
316+
Iter = insertInstructions(CounterInstrs, BB, Iter);
311317
}
312318

313319
bool Instrumentation::instrumentOneTarget(

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 92 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1966,6 +1966,15 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
19661966
convertJmpToTailCall(Inst);
19671967
}
19681968

1969+
void createDirectBranch(MCInst &Inst, const MCSymbol *Target,
1970+
MCContext *Ctx) override {
1971+
Inst.setOpcode(AArch64::B);
1972+
Inst.clear();
1973+
Inst.addOperand(MCOperand::createExpr(getTargetExprFor(
1974+
Inst, MCSymbolRefExpr::create(Target, MCSymbolRefExpr::VK_None, *Ctx),
1975+
*Ctx, 0)));
1976+
}
1977+
19691978
bool analyzeBranch(InstructionIterator Begin, InstructionIterator End,
19701979
const MCSymbol *&TBB, const MCSymbol *&FBB,
19711980
MCInst *&CondBranch,
@@ -2328,21 +2337,26 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
23282337
}
23292338

23302339
InstructionListType createInstrumentedIndCallHandlerExitBB() const override {
2331-
InstructionListType Insts(5);
23322340
// Code sequence for instrumented indirect call handler:
2341+
// ldr x1, [sp, #16]
23332342
// msr nzcv, x1
23342343
// ldp x0, x1, [sp], #16
2335-
// ldr x16, [sp], #16
2336-
// ldp x0, x1, [sp], #16
2337-
// br x16
2338-
setSystemFlag(Insts[0], AArch64::X1);
2339-
createPopRegisters(Insts[1], AArch64::X0, AArch64::X1);
2340-
// Here we load address of the next function which should be called in the
2341-
// original binary to X16 register. Writing to X16 is permitted without
2342-
// needing to restore.
2343-
loadReg(Insts[2], AArch64::X16, AArch64::SP);
2344-
createPopRegisters(Insts[3], AArch64::X0, AArch64::X1);
2345-
createIndirectBranch(Insts[4], AArch64::X16, 0);
2344+
// ret
2345+
2346+
InstructionListType Insts;
2347+
2348+
Insts.emplace_back();
2349+
loadReg(Insts.back(), AArch64::X1, AArch64::SP);
2350+
2351+
Insts.emplace_back();
2352+
setSystemFlag(Insts.back(), AArch64::X1);
2353+
2354+
Insts.emplace_back();
2355+
createPopRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2356+
2357+
Insts.emplace_back();
2358+
createReturn(Insts.back());
2359+
23462360
return Insts;
23472361
}
23482362

@@ -2418,39 +2432,69 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
24182432
MCSymbol *HandlerFuncAddr,
24192433
int CallSiteID,
24202434
MCContext *Ctx) override {
2421-
InstructionListType Insts;
24222435
// Code sequence used to enter indirect call instrumentation helper:
2423-
// stp x0, x1, [sp, #-16]! createPushRegisters
2436+
// stp x0, x1, [sp, #-16]! createPushRegisters (1)
24242437
// mov target x0 convertIndirectCallToLoad -> orr x0 target xzr
24252438
// mov x1 CallSiteID createLoadImmediate ->
24262439
// movk x1, #0x0, lsl #48
24272440
// movk x1, #0x0, lsl #32
24282441
// movk x1, #0x0, lsl #16
24292442
// movk x1, #0x0
2430-
// stp x0, x1, [sp, #-16]!
2431-
// bl *HandlerFuncAddr createIndirectCall ->
2443+
// stp x0, x1, [sp, #-16]! (2)
24322444
// adr x0 *HandlerFuncAddr -> adrp + add
2433-
// blr x0
2445+
// str x30, [sp, #-16]! (3)
2446+
// blr x0 (__bolt_instr_ind_call_handler_func)
2447+
// ldr x30, sp, #16 (3)
2448+
// ldp x0, x1, [sp], #16 (2)
2449+
// mov x0, x0 ; move target address to used register
2450+
// ldp x0, x1, [sp], #16 (1)
2451+
2452+
InstructionListType Insts;
24342453
Insts.emplace_back();
2435-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2454+
createPushRegisters(Insts.back(), getIntArgRegister(0),
2455+
getIntArgRegister(1));
24362456
Insts.emplace_back(CallInst);
2437-
convertIndirectCallToLoad(Insts.back(), AArch64::X0);
2457+
convertIndirectCallToLoad(Insts.back(), getIntArgRegister(0));
24382458
InstructionListType LoadImm =
24392459
createLoadImmediate(getIntArgRegister(1), CallSiteID);
24402460
Insts.insert(Insts.end(), LoadImm.begin(), LoadImm.end());
24412461
Insts.emplace_back();
2442-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2462+
createPushRegisters(Insts.back(), getIntArgRegister(0),
2463+
getIntArgRegister(1));
24432464
Insts.resize(Insts.size() + 2);
2444-
InstructionListType Addr =
2445-
materializeAddress(HandlerFuncAddr, Ctx, AArch64::X0);
2465+
InstructionListType Addr = materializeAddress(
2466+
HandlerFuncAddr, Ctx, CallInst.getOperand(0).getReg());
24462467
assert(Addr.size() == 2 && "Invalid Addr size");
24472468
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
2469+
2470+
Insts.emplace_back();
2471+
storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2472+
2473+
Insts.emplace_back();
2474+
createIndirectCallInst(Insts.back(), false,
2475+
CallInst.getOperand(0).getReg());
2476+
24482477
Insts.emplace_back();
2449-
createIndirectCallInst(Insts.back(), isTailCall(CallInst), AArch64::X0);
2478+
loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
24502479

2451-
// Carry over metadata including tail call marker if present.
2452-
stripAnnotations(Insts.back());
2453-
moveAnnotations(std::move(CallInst), Insts.back());
2480+
Insts.emplace_back();
2481+
createPopRegisters(Insts.back(), getIntArgRegister(0),
2482+
getIntArgRegister(1));
2483+
2484+
// move x0 to indirect call register
2485+
Insts.emplace_back();
2486+
Insts.back().setOpcode(AArch64::ORRXrs);
2487+
Insts.back().insert(Insts.back().begin(),
2488+
MCOperand::createReg(CallInst.getOperand(0).getReg()));
2489+
Insts.back().insert(Insts.back().begin() + 1,
2490+
MCOperand::createReg(AArch64::XZR));
2491+
Insts.back().insert(Insts.back().begin() + 2,
2492+
MCOperand::createReg(getIntArgRegister(0)));
2493+
Insts.back().insert(Insts.back().begin() + 3, MCOperand::createImm(0));
2494+
2495+
Insts.emplace_back();
2496+
createPopRegisters(Insts.back(), getIntArgRegister(0),
2497+
getIntArgRegister(1));
24542498

24552499
return Insts;
24562500
}
@@ -2472,30 +2516,44 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
24722516
// ldr x30, [sp], #16
24732517
// b IndCallHandler
24742518
InstructionListType Insts;
2519+
24752520
Insts.emplace_back();
2476-
createPushRegisters(Insts.back(), AArch64::X0, AArch64::X1);
2521+
createPushRegisters(Insts.back(), getIntArgRegister(0),
2522+
getIntArgRegister(1));
2523+
24772524
Insts.emplace_back();
24782525
getSystemFlag(Insts.back(), getIntArgRegister(1));
2526+
2527+
Insts.emplace_back();
2528+
storeReg(Insts.back(), getIntArgRegister(1), getSpRegister(/*Size*/ 8));
2529+
24792530
Insts.emplace_back();
24802531
Insts.emplace_back();
24812532
InstructionListType Addr =
2482-
materializeAddress(InstrTrampoline, Ctx, AArch64::X0);
2533+
materializeAddress(InstrTrampoline, Ctx, getIntArgRegister(0));
24832534
std::copy(Addr.begin(), Addr.end(), Insts.end() - Addr.size());
24842535
assert(Addr.size() == 2 && "Invalid Addr size");
2536+
24852537
Insts.emplace_back();
2486-
loadReg(Insts.back(), AArch64::X0, AArch64::X0);
2538+
loadReg(Insts.back(), getIntArgRegister(0), getIntArgRegister(0));
2539+
24872540
InstructionListType cmpJmp =
2488-
createCmpJE(AArch64::X0, 0, IndCallHandler, Ctx);
2541+
createCmpJE(getIntArgRegister(0), 0, IndCallHandler, Ctx);
24892542
Insts.insert(Insts.end(), cmpJmp.begin(), cmpJmp.end());
2543+
24902544
Insts.emplace_back();
2491-
storeReg(Insts.back(), AArch64::LR, AArch64::SP);
2545+
storeReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2546+
24922547
Insts.emplace_back();
24932548
Insts.back().setOpcode(AArch64::BLR);
2494-
Insts.back().addOperand(MCOperand::createReg(AArch64::X0));
2549+
Insts.back().addOperand(MCOperand::createReg(getIntArgRegister(0)));
2550+
24952551
Insts.emplace_back();
2496-
loadReg(Insts.back(), AArch64::LR, AArch64::SP);
2552+
loadReg(Insts.back(), AArch64::LR, getSpRegister(/*Size*/ 8));
2553+
24972554
Insts.emplace_back();
2498-
createDirectCall(Insts.back(), IndCallHandler, Ctx, /*IsTailCall*/ true);
2555+
createDirectBranch(Insts.back(), IndCallHandler, Ctx);
2556+
24992557
return Insts;
25002558
}
25012559

bolt/runtime/instr.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1668,7 +1668,7 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_call()
16681668
#if defined(__aarch64__)
16691669
// clang-format off
16701670
__asm__ __volatile__(SAVE_ALL
1671-
"ldp x0, x1, [sp, #288]\n"
1671+
"ldp x0, x1, [sp, #320]\n"
16721672
"bl instrumentIndirectCall\n"
16731673
RESTORE_ALL
16741674
"ret\n"
@@ -1705,7 +1705,7 @@ extern "C" __attribute((naked)) void __bolt_instr_indirect_tailcall()
17051705
#if defined(__aarch64__)
17061706
// clang-format off
17071707
__asm__ __volatile__(SAVE_ALL
1708-
"ldp x0, x1, [sp, #288]\n"
1708+
"ldp x0, x1, [sp, #320]\n"
17091709
"bl instrumentIndirectCall\n"
17101710
RESTORE_ALL
17111711
"ret\n"

0 commit comments

Comments
 (0)