Skip to content

Commit 1f67f34

Browse files
committed
[MTE] add stack frame history buffer
this will allow us to find offending objects in a symbolization step, like we can do with hwasan. needs matching changes in AOSP: https://android-review.git.corp.google.com/q/topic:%22stackhistorybuffer%22 Pull Request: #86356
1 parent c546578 commit 1f67f34

File tree

3 files changed

+134
-2
lines changed

3 files changed

+134
-2
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2500,7 +2500,8 @@ AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
25002500
return resolveFrameIndexReference(
25012501
MF, FI, FrameReg,
25022502
/*PreferFP=*/
2503-
MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
2503+
MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) ||
2504+
MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag),
25042505
/*ForSimm=*/false);
25052506
}
25062507

llvm/lib/Target/AArch64/AArch64StackTagging.cpp

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "AArch64InstrInfo.h"
1212
#include "AArch64Subtarget.h"
1313
#include "AArch64TargetMachine.h"
14+
#include "llvm/ADT/APInt.h"
1415
#include "llvm/ADT/MapVector.h"
1516
#include "llvm/ADT/SmallVector.h"
1617
#include "llvm/ADT/Statistic.h"
@@ -21,6 +22,7 @@
2122
#include "llvm/Analysis/ScalarEvolution.h"
2223
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
2324
#include "llvm/Analysis/StackSafetyAnalysis.h"
25+
#include "llvm/BinaryFormat/Dwarf.h"
2426
#include "llvm/CodeGen/LiveRegUnits.h"
2527
#include "llvm/CodeGen/MachineBasicBlock.h"
2628
#include "llvm/CodeGen/MachineFunction.h"
@@ -82,6 +84,26 @@ static cl::opt<size_t> ClMaxLifetimes(
8284
cl::desc("How many lifetime ends to handle for a single alloca."),
8385
cl::Optional);
8486

87+
// Mode for selecting how to insert frame record info into the stack ring
88+
// buffer.
89+
enum RecordStackHistoryMode {
90+
// Do not record frame record info.
91+
none,
92+
93+
// Insert instructions into the prologue for storing into the stack ring
94+
// buffer directly.
95+
instr,
96+
};
97+
98+
static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
99+
"stack-tagging-record-stack-history",
100+
cl::desc("Record stack frames with tagged allocations in a thread-local "
101+
"ring buffer"),
102+
cl::values(clEnumVal(none, "Do not record stack ring history"),
103+
clEnumVal(instr, "Insert instructions into the prologue for "
104+
"storing into the stack ring buffer")),
105+
cl::Hidden, cl::init(none));
106+
85107
static const Align kTagGranuleSize = Align(16);
86108

87109
namespace {
@@ -309,6 +331,7 @@ class AArch64StackTagging : public FunctionPass {
309331
uint64_t Size, InitializerBuilder &IB);
310332

311333
Instruction *insertBaseTaggedPointer(
334+
const Module &M,
312335
const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas,
313336
const DominatorTree *DT);
314337
bool runOnFunction(Function &F) override;
@@ -437,6 +460,7 @@ void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
437460
}
438461

439462
Instruction *AArch64StackTagging::insertBaseTaggedPointer(
463+
const Module &M,
440464
const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument,
441465
const DominatorTree *DT) {
442466
BasicBlock *PrologueBB = nullptr;
@@ -458,6 +482,41 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
458482
Instruction *Base =
459483
IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
460484
Base->setName("basetag");
485+
auto TargetTriple = Triple(M.getTargetTriple());
486+
// This is not a stable ABI for now, so only allow in dev builds with API
487+
// level 10000.
488+
// The ThreadLong format is the same as with HWASan, but the entries for
489+
// stack MTE take two slots (16 bytes).
490+
if (ClRecordStackHistory == instr && TargetTriple.isAndroid() &&
491+
TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(10000) &&
492+
!AllocasToInstrument.empty()) {
493+
constexpr int StackMteSlot = -3;
494+
constexpr uint64_t TagMask = 0xFULL << 56;
495+
496+
auto *IntptrTy = IRB.getIntPtrTy(M.getDataLayout());
497+
Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, StackMteSlot);
498+
auto *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
499+
Value *TaggedFP = IRB.CreateOr(
500+
memtag::getFP(IRB),
501+
IRB.CreateAnd(IRB.CreatePtrToInt(Base, IntptrTy), TagMask));
502+
Value *PC = memtag::getPC(TargetTriple, IRB);
503+
Value *RecordPtr = IRB.CreateIntToPtr(ThreadLong, IRB.getPtrTy(0));
504+
IRB.CreateStore(PC, RecordPtr);
505+
IRB.CreateStore(TaggedFP, IRB.CreateConstGEP1_64(IntptrTy, RecordPtr, 1));
506+
// Update the ring buffer. Top byte of ThreadLong defines the size of the
507+
// buffer in pages, it must be a power of two, and the start of the buffer
508+
// must be aligned by twice that much. Therefore wrap around of the ring
509+
// buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
510+
// The use of AShr instead of LShr is due to
511+
// https://bugs.llvm.org/show_bug.cgi?id=39030
512+
// Runtime library makes sure not to use the highest bit.
513+
Value *WrapMask = IRB.CreateXor(
514+
IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
515+
ConstantInt::get(IntptrTy, (uint64_t)-1));
516+
Value *ThreadLongNew = IRB.CreateAnd(
517+
IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 16)), WrapMask);
518+
IRB.CreateStore(ThreadLongNew, SlotPtr);
519+
}
461520
return Base;
462521
}
463522

@@ -513,7 +572,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
513572
SetTagFunc =
514573
Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
515574

516-
Instruction *Base = insertBaseTaggedPointer(SInfo.AllocasToInstrument, DT);
575+
Instruction *Base =
576+
insertBaseTaggedPointer(*Fn.getParent(), SInfo.AllocasToInstrument, DT);
517577

518578
int NextTag = 0;
519579
for (auto &I : SInfo.AllocasToInstrument) {
@@ -575,6 +635,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
575635
for (auto *II : Info.LifetimeEnd)
576636
II->eraseFromParent();
577637
}
638+
639+
memtag::annotateDebugRecords(Info, static_cast<unsigned long>(Tag));
578640
}
579641

580642
// If we have instrumented at least one alloca, all unrecognized lifetime
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: opt < %s -aarch64-stack-tagging -stack-tagging-use-stack-safety=0 -S -o - | FileCheck %s --check-prefixes=CHECK
2+
; RUN: opt < %s -aarch64-stack-tagging -stack-tagging-use-stack-safety=0 -S -stack-tagging-record-stack-history=instr -o - | FileCheck %s --check-prefixes=INSTR
3+
; RUN llc -mattr=+mte -stack-tagging-use-stack-safety=0 -stack-tagging-record-stack-history=instr %s -o - | FileCheck %s --check-prefixes=ASMINSTR
4+
5+
6+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7+
target triple = "aarch64--linux-android10000"
8+
9+
declare void @use8(ptr)
10+
declare void @use32(ptr)
11+
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
12+
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
13+
14+
define dso_local void @noUse32(ptr) sanitize_memtag {
15+
entry:
16+
ret void
17+
}
18+
19+
define void @OneVar() sanitize_memtag {
20+
entry:
21+
%x = alloca i32, align 4
22+
call void @use32(ptr %x)
23+
ret void
24+
}
25+
26+
; CHECK-LABEL: define void @OneVar(
27+
; CHECK: [[BASE:%.*]] = call ptr @llvm.aarch64.irg.sp(i64 0)
28+
; CHECK: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
29+
; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], ptr [[BASE]], i64 0)
30+
; CHECK: ret void
31+
32+
; INSTR-LABEL: define void @OneVar(
33+
; INSTR: [[BASE:%.*]] = call ptr @llvm.aarch64.irg.sp(i64 0)
34+
; INSTR: [[TLS:%.*]] = call ptr @llvm.thread.pointer()
35+
; INSTR: [[TLS_SLOT:%.*]] = getelementptr i8, ptr [[TLS]], i32 -24
36+
; INSTR: [[TLS_VALUE:%.*]] = load i64, ptr %1, align 8
37+
; INSTR: [[FP:%.*]] = call ptr @llvm.frameaddress.p0(i32 0)
38+
; INSTR: [[FP_INT:%.*]] = ptrtoint ptr %3 to i64
39+
; INSTR: [[BASE_INT:%.*]] = ptrtoint ptr %basetag to i64
40+
; INSTR: [[BASE_TAG:%.*]] = and i64 [[BASE_INT]], 1080863910568919040
41+
; INSTR: [[TAGGED_FP:%.*]] = or i64 [[FP_INT]], [[BASE_TAG]]
42+
; INSTR: [[PC:%.*]] = call i64 @llvm.read_register.i64(metadata !0)
43+
; INSTR: [[TLS_VALUE_PTR:%.*]] = inttoptr i64 [[TLS_VALUE]] to ptr
44+
; INSTR: store i64 [[PC]], ptr [[TLS_VALUE_PTR]], align 8
45+
; INSTR: [[SECOND_SLOT:%.*]] = getelementptr i64, ptr [[TLS_VALUE_PTR]], i64 1
46+
; INSTR: store i64 [[TAGGED_FP]], ptr [[SECOND_SLOT]], align 8
47+
; INSTR: [[SIZE_IN_PAGES:%.*]] = ashr i64 [[TLS_VALUE]], 56
48+
; INSTR: [[WRAP_MASK_INTERMEDIARY:%.*]] = shl nuw nsw i64 [[SIZE_IN_PAGES]], 12
49+
; INSTR: [[WRAP_MASK:%.*]] = xor i64 [[WRAP_MASK_INTERMEDIARY]], -1
50+
; INSTR: [[NEXT_TLS_VALUE_BEFORE_WRAP:%.*]] = add i64 [[TLS_VALUE]], 16
51+
; INSTR: [[NEXT_TLS_VALUE:%.*]] = and i64 [[NEXT_TLS_VALUE_BEFORE_WRAP]], [[WRAP_MASK]]
52+
; INSTR: store i64 [[NEXT_TLS_VALUE]], ptr [[TLS_SLOT]], align 8
53+
; INSTR: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
54+
; INSTR: [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], ptr [[BASE]], i64 0)
55+
; INSTR: [[PC:!.*]] = !{!"pc"}
56+
57+
; ASMINSTR-LABEL: OneVar:
58+
; ASMINSTR: mrs [[TLS:x.*]], TPIDR_EL0
59+
; ASMINSTR: irg [[BASE:x.*]], sp
60+
; ASMINSTR: adr [[PC:x.*]], #0
61+
; ASMINSTR: ldur [[TLS_SLOT:x.*]], [[[TLS]], #-24]
62+
; ASMINSTR: and [[SP_TAG:x.*]], [[BASE]], #0xf00000000000000
63+
; ASMINSTR: orr [[TAGGED_FP]], x29, [[SP_TAG]]
64+
; ASMINSTR: asr [[TLS_SIZE:x.*]], [[TLS_SLOT]], #56
65+
; ASMINSTR: add [[NEXT_TLS_VALUE_BEFORE_WRAP:x.*]], [[TLS_SLOT]], #16
66+
; ASMINSTR: stp [[PC]], [[TAGGED_FP]], [[[TLS_SLOT]]]
67+
; ASMINSTR: bic [[NEXT_TLS_VALUE:x.*]], [[NEXT_TLS_VALUE_BEFORE_WRAP]], [[TLS_SIZE]], lsl #12
68+
; ASMINSTR: stur [[NEXT_TLS_VALUE]], [[[TLS]], #-24]
69+
; ASMINSTR: stg [[BASE]], [[[BASE]]]

0 commit comments

Comments
 (0)