Skip to content

Commit 4e67f45

Browse files
committed
Reapply "[MTE] add stack frame history buffer"
In the reverted change, the order of the IR was dependent on the host compiler, because we inserted instructions in arguments to functions. Fix that, and also fix another problem with the test. This reverts commit 3313f28.
1 parent f68fdb8 commit 4e67f45

File tree

3 files changed

+134
-2
lines changed

3 files changed

+134
-2
lines changed

llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2500,7 +2500,8 @@ AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
25002500
return resolveFrameIndexReference(
25012501
MF, FI, FrameReg,
25022502
/*PreferFP=*/
2503-
MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
2503+
MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress) ||
2504+
MF.getFunction().hasFnAttribute(Attribute::SanitizeMemTag),
25042505
/*ForSimm=*/false);
25052506
}
25062507

llvm/lib/Target/AArch64/AArch64StackTagging.cpp

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "AArch64InstrInfo.h"
1212
#include "AArch64Subtarget.h"
1313
#include "AArch64TargetMachine.h"
14+
#include "llvm/ADT/APInt.h"
1415
#include "llvm/ADT/MapVector.h"
1516
#include "llvm/ADT/SmallVector.h"
1617
#include "llvm/ADT/Statistic.h"
@@ -21,6 +22,7 @@
2122
#include "llvm/Analysis/ScalarEvolution.h"
2223
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
2324
#include "llvm/Analysis/StackSafetyAnalysis.h"
25+
#include "llvm/BinaryFormat/Dwarf.h"
2426
#include "llvm/CodeGen/LiveRegUnits.h"
2527
#include "llvm/CodeGen/MachineBasicBlock.h"
2628
#include "llvm/CodeGen/MachineFunction.h"
@@ -82,6 +84,26 @@ static cl::opt<size_t> ClMaxLifetimes(
8284
cl::desc("How many lifetime ends to handle for a single alloca."),
8385
cl::Optional);
8486

87+
// Mode for selecting how to insert frame record info into the stack ring
88+
// buffer.
89+
enum RecordStackHistoryMode {
90+
// Do not record frame record info.
91+
none,
92+
93+
// Insert instructions into the prologue for storing into the stack ring
94+
// buffer directly.
95+
instr,
96+
};
97+
98+
static cl::opt<RecordStackHistoryMode> ClRecordStackHistory(
99+
"stack-tagging-record-stack-history",
100+
cl::desc("Record stack frames with tagged allocations in a thread-local "
101+
"ring buffer"),
102+
cl::values(clEnumVal(none, "Do not record stack ring history"),
103+
clEnumVal(instr, "Insert instructions into the prologue for "
104+
"storing into the stack ring buffer")),
105+
cl::Hidden, cl::init(none));
106+
85107
static const Align kTagGranuleSize = Align(16);
86108

87109
namespace {
@@ -309,6 +331,7 @@ class AArch64StackTagging : public FunctionPass {
309331
uint64_t Size, InitializerBuilder &IB);
310332

311333
Instruction *insertBaseTaggedPointer(
334+
const Module &M,
312335
const MapVector<AllocaInst *, memtag::AllocaInfo> &Allocas,
313336
const DominatorTree *DT);
314337
bool runOnFunction(Function &F) override;
@@ -437,6 +460,7 @@ void AArch64StackTagging::untagAlloca(AllocaInst *AI, Instruction *InsertBefore,
437460
}
438461

439462
Instruction *AArch64StackTagging::insertBaseTaggedPointer(
463+
const Module &M,
440464
const MapVector<AllocaInst *, memtag::AllocaInfo> &AllocasToInstrument,
441465
const DominatorTree *DT) {
442466
BasicBlock *PrologueBB = nullptr;
@@ -458,6 +482,41 @@ Instruction *AArch64StackTagging::insertBaseTaggedPointer(
458482
Instruction *Base =
459483
IRB.CreateCall(IRG_SP, {Constant::getNullValue(IRB.getInt64Ty())});
460484
Base->setName("basetag");
485+
auto TargetTriple = Triple(M.getTargetTriple());
486+
// This is not a stable ABI for now, so only allow in dev builds with API
487+
// level 10000.
488+
// The ThreadLong format is the same as with HWASan, but the entries for
489+
// stack MTE take two slots (16 bytes).
490+
if (ClRecordStackHistory == instr && TargetTriple.isAndroid() &&
491+
TargetTriple.isAArch64() && !TargetTriple.isAndroidVersionLT(10000) &&
492+
!AllocasToInstrument.empty()) {
493+
constexpr int StackMteSlot = -3;
494+
constexpr uint64_t TagMask = 0xFULL << 56;
495+
496+
auto *IntptrTy = IRB.getIntPtrTy(M.getDataLayout());
497+
Value *SlotPtr = memtag::getAndroidSlotPtr(IRB, StackMteSlot);
498+
auto *ThreadLong = IRB.CreateLoad(IntptrTy, SlotPtr);
499+
Value *FP = memtag::getFP(IRB);
500+
Value *Tag = IRB.CreateAnd(IRB.CreatePtrToInt(Base, IntptrTy), TagMask);
501+
Value *TaggedFP = IRB.CreateOr(FP, Tag);
502+
Value *PC = memtag::getPC(TargetTriple, IRB);
503+
Value *RecordPtr = IRB.CreateIntToPtr(ThreadLong, IRB.getPtrTy(0));
504+
IRB.CreateStore(PC, RecordPtr);
505+
IRB.CreateStore(TaggedFP, IRB.CreateConstGEP1_64(IntptrTy, RecordPtr, 1));
506+
// Update the ring buffer. Top byte of ThreadLong defines the size of the
507+
// buffer in pages, it must be a power of two, and the start of the buffer
508+
// must be aligned by twice that much. Therefore wrap around of the ring
509+
// buffer is simply Addr &= ~((ThreadLong >> 56) << 12).
510+
// The use of AShr instead of LShr is due to
511+
// https://bugs.llvm.org/show_bug.cgi?id=39030
512+
// Runtime library makes sure not to use the highest bit.
513+
Value *WrapMask = IRB.CreateXor(
514+
IRB.CreateShl(IRB.CreateAShr(ThreadLong, 56), 12, "", true, true),
515+
ConstantInt::get(IntptrTy, (uint64_t)-1));
516+
Value *ThreadLongNew = IRB.CreateAnd(
517+
IRB.CreateAdd(ThreadLong, ConstantInt::get(IntptrTy, 16)), WrapMask);
518+
IRB.CreateStore(ThreadLongNew, SlotPtr);
519+
}
461520
return Base;
462521
}
463522

@@ -513,7 +572,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
513572
SetTagFunc =
514573
Intrinsic::getDeclaration(F->getParent(), Intrinsic::aarch64_settag);
515574

516-
Instruction *Base = insertBaseTaggedPointer(SInfo.AllocasToInstrument, DT);
575+
Instruction *Base =
576+
insertBaseTaggedPointer(*Fn.getParent(), SInfo.AllocasToInstrument, DT);
517577

518578
int NextTag = 0;
519579
for (auto &I : SInfo.AllocasToInstrument) {
@@ -575,6 +635,8 @@ bool AArch64StackTagging::runOnFunction(Function &Fn) {
575635
for (auto *II : Info.LifetimeEnd)
576636
II->eraseFromParent();
577637
}
638+
639+
memtag::annotateDebugRecords(Info, static_cast<unsigned long>(Tag));
578640
}
579641

580642
// If we have instrumented at least one alloca, all unrecognized lifetime
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: opt < %s -aarch64-stack-tagging -stack-tagging-use-stack-safety=0 -S -o - | FileCheck %s --check-prefixes=CHECK
2+
; RUN: opt < %s -aarch64-stack-tagging -stack-tagging-use-stack-safety=0 -S -stack-tagging-record-stack-history=instr -o - | FileCheck %s --check-prefixes=INSTR
3+
; RUN llc -mattr=+mte -stack-tagging-use-stack-safety=0 -stack-tagging-record-stack-history=instr %s -o - | FileCheck %s --check-prefixes=ASMINSTR
4+
5+
6+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
7+
target triple = "aarch64--linux-android10000"
8+
9+
declare void @use8(ptr)
10+
declare void @use32(ptr)
11+
declare void @llvm.lifetime.start.p0(i64, ptr nocapture)
12+
declare void @llvm.lifetime.end.p0(i64, ptr nocapture)
13+
14+
define dso_local void @noUse32(ptr) sanitize_memtag {
15+
entry:
16+
ret void
17+
}
18+
19+
define void @OneVar() sanitize_memtag {
20+
entry:
21+
%x = alloca i32, align 4
22+
call void @use32(ptr %x)
23+
ret void
24+
}
25+
26+
; CHECK-LABEL: define void @OneVar(
27+
; CHECK: [[BASE:%.*]] = call ptr @llvm.aarch64.irg.sp(i64 0)
28+
; CHECK: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
29+
; CHECK: [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], ptr [[BASE]], i64 0)
30+
; CHECK: ret void
31+
32+
; INSTR-LABEL: define void @OneVar(
33+
; INSTR: [[BASE:%.*]] = call ptr @llvm.aarch64.irg.sp(i64 0)
34+
; INSTR: [[TLS:%.*]] = call ptr @llvm.thread.pointer()
35+
; INSTR: [[TLS_SLOT:%.*]] = getelementptr i8, ptr [[TLS]], i32 -24
36+
; INSTR: [[TLS_VALUE:%.*]] = load i64, ptr %1, align 8
37+
; INSTR: [[FP:%.*]] = call ptr @llvm.frameaddress.p0(i32 0)
38+
; INSTR: [[FP_INT:%.*]] = ptrtoint ptr [[FP]] to i64
39+
; INSTR: [[BASE_INT:%.*]] = ptrtoint ptr [[BASE]] to i64
40+
; INSTR: [[BASE_TAG:%.*]] = and i64 [[BASE_INT]], 1080863910568919040
41+
; INSTR: [[TAGGED_FP:%.*]] = or i64 [[FP_INT]], [[BASE_TAG]]
42+
; INSTR: [[PC:%.*]] = call i64 @llvm.read_register.i64(metadata !0)
43+
; INSTR: [[TLS_VALUE_PTR:%.*]] = inttoptr i64 [[TLS_VALUE]] to ptr
44+
; INSTR: store i64 [[PC]], ptr [[TLS_VALUE_PTR]], align 8
45+
; INSTR: [[SECOND_SLOT:%.*]] = getelementptr i64, ptr [[TLS_VALUE_PTR]], i64 1
46+
; INSTR: store i64 [[TAGGED_FP]], ptr [[SECOND_SLOT]], align 8
47+
; INSTR: [[SIZE_IN_PAGES:%.*]] = ashr i64 [[TLS_VALUE]], 56
48+
; INSTR: [[WRAP_MASK_INTERMEDIARY:%.*]] = shl nuw nsw i64 [[SIZE_IN_PAGES]], 12
49+
; INSTR: [[WRAP_MASK:%.*]] = xor i64 [[WRAP_MASK_INTERMEDIARY]], -1
50+
; INSTR: [[NEXT_TLS_VALUE_BEFORE_WRAP:%.*]] = add i64 [[TLS_VALUE]], 16
51+
; INSTR: [[NEXT_TLS_VALUE:%.*]] = and i64 [[NEXT_TLS_VALUE_BEFORE_WRAP]], [[WRAP_MASK]]
52+
; INSTR: store i64 [[NEXT_TLS_VALUE]], ptr [[TLS_SLOT]], align 8
53+
; INSTR: [[X:%.*]] = alloca { i32, [12 x i8] }, align 16
54+
; INSTR: [[TX:%.*]] = call ptr @llvm.aarch64.tagp.{{.*}}(ptr [[X]], ptr [[BASE]], i64 0)
55+
; INSTR: [[PC:!.*]] = !{!"pc"}
56+
57+
; ASMINSTR-LABEL: OneVar:
58+
; ASMINSTR: mrs [[TLS:x.*]], TPIDR_EL0
59+
; ASMINSTR: irg [[BASE:x.*]], sp
60+
; ASMINSTR: adr [[PC:x.*]], #0
61+
; ASMINSTR: ldur [[TLS_SLOT:x.*]], [[[TLS]], #-24]
62+
; ASMINSTR: and [[SP_TAG:x.*]], [[BASE]], #0xf00000000000000
63+
; ASMINSTR: orr [[TAGGED_FP]], x29, [[SP_TAG]]
64+
; ASMINSTR: asr [[TLS_SIZE:x.*]], [[TLS_SLOT]], #56
65+
; ASMINSTR: add [[NEXT_TLS_VALUE_BEFORE_WRAP:x.*]], [[TLS_SLOT]], #16
66+
; ASMINSTR: stp [[PC]], [[TAGGED_FP]], [[[TLS_SLOT]]]
67+
; ASMINSTR: bic [[NEXT_TLS_VALUE:x.*]], [[NEXT_TLS_VALUE_BEFORE_WRAP]], [[TLS_SIZE]], lsl #12
68+
; ASMINSTR: stur [[NEXT_TLS_VALUE]], [[[TLS]], #-24]
69+
; ASMINSTR: stg [[BASE]], [[[BASE]]]

0 commit comments

Comments
 (0)