Skip to content

Commit fb9ce10

Browse files
committed
hwasan: Add a tag_offset DWARF attribute to instrumented stack variables.
The goal is to improve hwasan's error reporting for stack use-after-return by recording enough information to allow the specific variable that was accessed to be identified based on the pointer's tag. Currently we record the PC and lower bits of SP for each stack frame we create (which will eventually be enough to derive the base tag used by the stack frame) but that's not enough to determine the specific tag for each variable, which is the stack frame's base tag XOR a value (the "tag offset") that is unique for each variable in a function. In IR, the tag offset is most naturally represented as part of a location expression on the llvm.dbg.declare instruction. However, the presence of the tag offset in the variable's actual location expression is likely to confuse debuggers which won't know about tag offsets, and moreover the tag offset is not required for a debugger to determine the location of the variable on the stack, so at the DWARF level it is represented as an attribute so that it will be ignored by debuggers that don't know about it. Differential Revision: https://reviews.llvm.org/D63119 llvm-svn: 363635
1 parent c3b6d77 commit fb9ce10

File tree

12 files changed

+144
-8
lines changed

12 files changed

+144
-8
lines changed

llvm/docs/LangRef.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4704,6 +4704,9 @@ The current supported opcode vocabulary is limited:
47044704
(``16`` and ``DW_ATE_signed`` here, respectively) to which the top of the
47054705
expression stack is to be converted. Maps into a ``DW_OP_convert`` operation
47064706
that references a base type constructed from the supplied values.
4707+
- ``DW_OP_LLVM_tag_offset, tag_offset`` specifies that a memory tag should be
4708+
optionally applied to the pointer. The memory tag is derived from the
4709+
given tag offset in an implementation-defined manner.
47074710
- ``DW_OP_swap`` swaps top two stack entries.
47084711
- ``DW_OP_xderef`` provides extended dereference mechanism. The entry at the top
47094712
of the stack is treated as an address. The second stack entry is treated as an

llvm/include/llvm/BinaryFormat/Dwarf.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,7 @@ HANDLE_DW_AT(0x3b31, BORLAND_closure, 0, BORLAND)
386386
HANDLE_DW_AT(0x3e00, LLVM_include_path, 0, LLVM)
387387
HANDLE_DW_AT(0x3e01, LLVM_config_macros, 0, LLVM)
388388
HANDLE_DW_AT(0x3e02, LLVM_isysroot, 0, LLVM)
389+
HANDLE_DW_AT(0x3e03, LLVM_tag_offset, 0, LLVM)
389390
// Apple extensions.
390391
HANDLE_DW_AT(0x3fe1, APPLE_optimized, 0, APPLE)
391392
HANDLE_DW_AT(0x3fe2, APPLE_flags, 0, APPLE)

llvm/include/llvm/BinaryFormat/Dwarf.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,9 @@ enum LocationAtom {
129129
#include "llvm/BinaryFormat/Dwarf.def"
130130
DW_OP_lo_user = 0xe0,
131131
DW_OP_hi_user = 0xff,
132-
DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
133-
DW_OP_LLVM_convert = 0x1001 ///< Only used in LLVM metadata.
132+
DW_OP_LLVM_fragment = 0x1000, ///< Only used in LLVM metadata.
133+
DW_OP_LLVM_convert = 0x1001, ///< Only used in LLVM metadata.
134+
DW_OP_LLVM_tag_offset = 0x1002, ///< Only used in LLVM metadata.
134135
};
135136

136137
enum TypeKind : uint8_t {

llvm/lib/BinaryFormat/Dwarf.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
147147
return "DW_OP_LLVM_convert";
148148
case DW_OP_LLVM_fragment:
149149
return "DW_OP_LLVM_fragment";
150+
case DW_OP_LLVM_tag_offset:
151+
return "DW_OP_LLVM_tag_offset";
150152
}
151153
}
152154

@@ -157,6 +159,7 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
157159
#include "llvm/BinaryFormat/Dwarf.def"
158160
.Case("DW_OP_LLVM_convert", DW_OP_LLVM_convert)
159161
.Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
162+
.Case("DW_OP_LLVM_tag_offset", DW_OP_LLVM_tag_offset)
160163
.Default(0);
161164
}
162165

llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -683,6 +683,9 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
683683
NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
684684
}
685685
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
686+
if (DwarfExpr.TagOffset)
687+
addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
688+
*DwarfExpr.TagOffset);
686689

687690
return VariableDie;
688691
}

llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,9 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
438438
emitOp(dwarf::DW_OP_deref_size);
439439
emitData1(Op->getArg(0));
440440
break;
441+
case dwarf::DW_OP_LLVM_tag_offset:
442+
TagOffset = Op->getArg(0);
443+
break;
441444
default:
442445
llvm_unreachable("unhandled opcode found in expression");
443446
}

llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,8 @@ class DwarfExpression {
140140
return LocationKind == Implicit;
141141
}
142142

143+
Optional<uint8_t> TagOffset;
144+
143145
protected:
144146
/// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
145147
/// to represent a subregister.

llvm/lib/IR/DebugInfoMetadata.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,7 @@ unsigned DIExpression::ExprOperand::getSize() const {
835835
case dwarf::DW_OP_constu:
836836
case dwarf::DW_OP_deref_size:
837837
case dwarf::DW_OP_plus_uconst:
838+
case dwarf::DW_OP_LLVM_tag_offset:
838839
return 2;
839840
default:
840841
return 1;
@@ -876,6 +877,7 @@ bool DIExpression::isValid() const {
876877
break;
877878
}
878879
case dwarf::DW_OP_LLVM_convert:
880+
case dwarf::DW_OP_LLVM_tag_offset:
879881
case dwarf::DW_OP_constu:
880882
case dwarf::DW_OP_plus_uconst:
881883
case dwarf::DW_OP_plus:
@@ -905,7 +907,9 @@ bool DIExpression::isImplicit() const {
905907
unsigned N = getNumElements();
906908
if (isValid() && N > 0) {
907909
switch (getElement(N-1)) {
908-
case dwarf::DW_OP_stack_value: return true;
910+
case dwarf::DW_OP_stack_value:
911+
case dwarf::DW_OP_LLVM_tag_offset:
912+
return true;
909913
case dwarf::DW_OP_LLVM_fragment:
910914
return N > 1 && getElement(N-2) == dwarf::DW_OP_stack_value;
911915
default: break;

llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "llvm/IR/Constant.h"
2222
#include "llvm/IR/Constants.h"
2323
#include "llvm/IR/DataLayout.h"
24+
#include "llvm/IR/DebugInfoMetadata.h"
2425
#include "llvm/IR/DerivedTypes.h"
2526
#include "llvm/IR/Function.h"
2627
#include "llvm/IR/IRBuilder.h"
@@ -205,8 +206,10 @@ class HWAddressSanitizer {
205206
bool tagAlloca(IRBuilder<> &IRB, AllocaInst *AI, Value *Tag);
206207
Value *tagPointer(IRBuilder<> &IRB, Type *Ty, Value *PtrLong, Value *Tag);
207208
Value *untagPointer(IRBuilder<> &IRB, Value *PtrLong);
208-
bool instrumentStack(SmallVectorImpl<AllocaInst *> &Allocas,
209-
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
209+
bool instrumentStack(
210+
SmallVectorImpl<AllocaInst *> &Allocas,
211+
DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
212+
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag);
210213
bool instrumentLandingPads(SmallVectorImpl<Instruction *> &RetVec);
211214
Value *getNextTagWithCall(IRBuilder<> &IRB);
212215
Value *getStackBaseTag(IRBuilder<> &IRB);
@@ -984,6 +987,7 @@ bool HWAddressSanitizer::instrumentLandingPads(
984987

985988
bool HWAddressSanitizer::instrumentStack(
986989
SmallVectorImpl<AllocaInst *> &Allocas,
990+
DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> &AllocaDeclareMap,
987991
SmallVectorImpl<Instruction *> &RetVec, Value *StackTag) {
988992
// Ideally, we want to calculate tagged stack base pointer, and rewrite all
989993
// alloca addresses using that. Unfortunately, offsets are not known yet
@@ -1008,6 +1012,13 @@ bool HWAddressSanitizer::instrumentStack(
10081012
U.set(Replacement);
10091013
}
10101014

1015+
for (auto *DDI : AllocaDeclareMap.lookup(AI)) {
1016+
DIExpression *OldExpr = DDI->getExpression();
1017+
DIExpression *NewExpr = DIExpression::append(
1018+
OldExpr, {dwarf::DW_OP_LLVM_tag_offset, RetagMask(N)});
1019+
DDI->setArgOperand(2, MetadataAsValue::get(*C, NewExpr));
1020+
}
1021+
10111022
tagAlloca(IRB, AI, Tag);
10121023

10131024
for (auto RI : RetVec) {
@@ -1051,6 +1062,7 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
10511062
SmallVector<AllocaInst*, 8> AllocasToInstrument;
10521063
SmallVector<Instruction*, 8> RetVec;
10531064
SmallVector<Instruction*, 8> LandingPadVec;
1065+
DenseMap<AllocaInst *, std::vector<DbgDeclareInst *>> AllocaDeclareMap;
10541066
for (auto &BB : F) {
10551067
for (auto &Inst : BB) {
10561068
if (ClInstrumentStack)
@@ -1069,6 +1081,10 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
10691081
isa<CleanupReturnInst>(Inst))
10701082
RetVec.push_back(&Inst);
10711083

1084+
if (auto *DDI = dyn_cast<DbgDeclareInst>(&Inst))
1085+
if (auto *Alloca = dyn_cast_or_null<AllocaInst>(DDI->getAddress()))
1086+
AllocaDeclareMap[Alloca].push_back(DDI);
1087+
10721088
if (ClInstrumentLandingPads && isa<LandingPadInst>(Inst))
10731089
LandingPadVec.push_back(&Inst);
10741090

@@ -1107,7 +1123,8 @@ bool HWAddressSanitizer::sanitizeFunction(Function &F) {
11071123
if (!AllocasToInstrument.empty()) {
11081124
Value *StackTag =
11091125
ClGenerateTagsWithCalls ? nullptr : getStackBaseTag(EntryIRB);
1110-
Changed |= instrumentStack(AllocasToInstrument, RetVec, StackTag);
1126+
Changed |= instrumentStack(AllocasToInstrument, AllocaDeclareMap, RetVec,
1127+
StackTag);
11111128
}
11121129

11131130
// If we split the entry block, move any allocas that were originally in the

llvm/test/Assembler/diexpression.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
; CHECK-SAME: !DIExpression(DW_OP_deref, DW_OP_plus_uconst, 3, DW_OP_LLVM_fragment, 3, 7),
1010
; CHECK-SAME: !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef),
1111
; CHECK-SAME: !DIExpression(DW_OP_plus_uconst, 3)
12-
; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_signed)}
12+
; CHECK-SAME: !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_signed)
13+
; CHECK-SAME: !DIExpression(DW_OP_LLVM_tag_offset, 1)}
1314

14-
!named = !{!0, !1, !2, !3, !4, !5, !6, !7}
15+
!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8}
1516

1617
!0 = !DIExpression()
1718
!1 = !DIExpression(DW_OP_deref)
@@ -21,3 +22,4 @@
2122
!5 = !DIExpression(DW_OP_constu, 2, DW_OP_swap, DW_OP_xderef)
2223
!6 = !DIExpression(DW_OP_plus_uconst, 3)
2324
!7 = !DIExpression(DW_OP_LLVM_convert, 16, DW_ATE_unsigned, DW_OP_LLVM_convert, 32, DW_ATE_signed)
25+
!8 = !DIExpression(DW_OP_LLVM_tag_offset, 1)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
; RUN: llc -o - %s | FileCheck %s
2+
3+
target triple="aarch64--"
4+
5+
; CHECK: .Linfo_string4:
6+
; CHECK-NEXT: .asciz "a"
7+
; CHECK: .Linfo_string6:
8+
; CHECK-NEXT: .asciz "b"
9+
10+
; CHECK: .byte 1 // DW_AT_LLVM_tag_offset
11+
; CHECK: .word .Linfo_string4 // DW_AT_name
12+
13+
; CHECK: .byte 2 // DW_AT_LLVM_tag_offset
14+
; CHECK: .word .Linfo_string6 // DW_AT_name
15+
16+
define void @f() !dbg !6 {
17+
entry:
18+
%a = alloca i8*
19+
%b = alloca i8*
20+
call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression(DW_OP_LLVM_tag_offset, 1)), !dbg !14
21+
call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression(DW_OP_LLVM_tag_offset, 2)), !dbg !14
22+
ret void, !dbg !15
23+
}
24+
25+
declare void @llvm.dbg.declare(metadata, metadata, metadata)
26+
27+
!llvm.dbg.cu = !{!0}
28+
!llvm.module.flags = !{!3, !4}
29+
!llvm.ident = !{!5}
30+
31+
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
32+
!1 = !DIFile(filename: "x.c", directory: "/")
33+
!2 = !{}
34+
!3 = !{i32 2, !"Dwarf Version", i32 4}
35+
!4 = !{i32 2, !"Debug Info Version", i32 3}
36+
!5 = !{!"clang"}
37+
!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags:
38+
DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
39+
!7 = !DISubroutineType(types: !8)
40+
!8 = !{null, !9}
41+
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
42+
!10 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !11)
43+
!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
44+
!12 = !DILocalVariable(name: "a", scope: !6, file: !1, line: 1, type: !9)
45+
!13 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 1, type: !9)
46+
!14 = !DILocation(line: 1, column: 29, scope: !6)
47+
!15 = !DILocation(line: 1, column: 37, scope: !6)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
; RUN: opt -hwasan -S -o - %s | FileCheck %s
2+
3+
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
4+
target triple = "aarch64--linux-android"
5+
6+
declare void @g(i8**, i8**, i8**, i8**, i8**, i8**)
7+
8+
define void @f() sanitize_hwaddress !dbg !6 {
9+
entry:
10+
%nodebug0 = alloca i8*
11+
%nodebug1 = alloca i8*
12+
%nodebug2 = alloca i8*
13+
%nodebug3 = alloca i8*
14+
%a = alloca i8*
15+
%b = alloca i8*
16+
; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4)
17+
call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression()), !dbg !14
18+
; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 4)
19+
call void @llvm.dbg.declare(metadata i8** %a, metadata !12, metadata !DIExpression()), !dbg !14
20+
; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 6)
21+
call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression()), !dbg !14
22+
; CHECK: @llvm.dbg.declare{{.*}} !DIExpression(DW_OP_LLVM_tag_offset, 6)
23+
call void @llvm.dbg.declare(metadata i8** %b, metadata !13, metadata !DIExpression()), !dbg !14
24+
call void @g(i8** %nodebug0, i8** %nodebug1, i8** %nodebug2, i8** %nodebug3, i8** %a, i8** %b)
25+
ret void, !dbg !15
26+
}
27+
28+
declare void @llvm.dbg.declare(metadata, metadata, metadata)
29+
30+
!llvm.dbg.cu = !{!0}
31+
!llvm.module.flags = !{!3, !4}
32+
!llvm.ident = !{!5}
33+
34+
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
35+
!1 = !DIFile(filename: "x.c", directory: "/")
36+
!2 = !{}
37+
!3 = !{i32 2, !"Dwarf Version", i32 4}
38+
!4 = !{i32 2, !"Debug Info Version", i32 3}
39+
!5 = !{!"clang"}
40+
!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags:
41+
DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
42+
!7 = !DISubroutineType(types: !8)
43+
!8 = !{null, !9}
44+
!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
45+
!10 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !11)
46+
!11 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
47+
!12 = !DILocalVariable(name: "a", scope: !6, file: !1, line: 1, type: !9)
48+
!13 = !DILocalVariable(name: "b", scope: !6, file: !1, line: 1, type: !9)
49+
!14 = !DILocation(line: 1, column: 29, scope: !6)
50+
!15 = !DILocation(line: 1, column: 37, scope: !6)

0 commit comments

Comments
 (0)