Skip to content

Commit bd546e5

Browse files
committed
DWARFDebugLoc: Make parsing and error reporting more robust
Summary: While examining this class for possible use in lldb, I noticed two things: - it spits out parsing errors directly to stderr - the loclists parser can incorrectly return valid location lists when parsing malformed (truncated) data I improve the stderr situation by making the parseOneLocationList functions return Expected<T>s. The errors are still dumped to stderr by their callers, so this is only a partial fix, but it is enough for my use case, as I intend to parse the locations lists one by one. I fix the behavior in the truncated scenario by using the newly introduced DataExtractor Cursor API. I also add tests for handling the error cases, as they currently have no coverage. Reviewers: dblaikie, JDevlieghere, probinson Subscribers: lldb-commits, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D63591 llvm-svn: 370363
1 parent e1f73e9 commit bd546e5

File tree

5 files changed

+185
-66
lines changed

5 files changed

+185
-66
lines changed

llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLoc.h

+7-6
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ class DWARFDebugLoc {
2929
/// The ending address of the instruction range.
3030
uint64_t End;
3131
/// The location of the variable within the specified range.
32-
SmallString<4> Loc;
32+
SmallVector<uint8_t, 4> Loc;
3333
};
3434

3535
/// A list of locations that contain one variable.
@@ -68,8 +68,8 @@ class DWARFDebugLoc {
6868
/// Return the location list at the given offset or nullptr.
6969
LocationList const *getLocationListAtOffset(uint64_t Offset) const;
7070

71-
Optional<LocationList> parseOneLocationList(DWARFDataExtractor Data,
72-
uint64_t *Offset);
71+
static Expected<LocationList>
72+
parseOneLocationList(const DWARFDataExtractor &Data, uint64_t *Offset);
7373
};
7474

7575
class DWARFDebugLoclists {
@@ -78,7 +78,7 @@ class DWARFDebugLoclists {
7878
uint8_t Kind;
7979
uint64_t Value0;
8080
uint64_t Value1;
81-
SmallVector<char, 4> Loc;
81+
SmallVector<uint8_t, 4> Loc;
8282
};
8383

8484
struct LocationList {
@@ -106,8 +106,9 @@ class DWARFDebugLoclists {
106106
/// Return the location list at the given offset or nullptr.
107107
LocationList const *getLocationListAtOffset(uint64_t Offset) const;
108108

109-
static Optional<LocationList>
110-
parseOneLocationList(DataExtractor Data, uint64_t *Offset, unsigned Version);
109+
static Expected<LocationList> parseOneLocationList(const DataExtractor &Data,
110+
uint64_t *Offset,
111+
unsigned Version);
111112
};
112113

113114
} // end namespace llvm

llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp

+43-54
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,10 @@ using namespace llvm;
2828
// expression that LLVM doesn't produce. Guessing the wrong version means we
2929
// won't be able to pretty print expressions in DWARF2 binaries produced by
3030
// non-LLVM tools.
31-
static void dumpExpression(raw_ostream &OS, ArrayRef<char> Data,
31+
static void dumpExpression(raw_ostream &OS, ArrayRef<uint8_t> Data,
3232
bool IsLittleEndian, unsigned AddressSize,
3333
const MCRegisterInfo *MRI, DWARFUnit *U) {
34-
DWARFDataExtractor Extractor(StringRef(Data.data(), Data.size()),
35-
IsLittleEndian, AddressSize);
34+
DWARFDataExtractor Extractor(toStringRef(Data), IsLittleEndian, AddressSize);
3635
DWARFExpression(Extractor, dwarf::DWARF_VERSION, AddressSize).print(OS, MRI, U);
3736
}
3837

@@ -83,47 +82,37 @@ void DWARFDebugLoc::dump(raw_ostream &OS, const MCRegisterInfo *MRI,
8382
}
8483
}
8584

86-
Optional<DWARFDebugLoc::LocationList>
87-
DWARFDebugLoc::parseOneLocationList(DWARFDataExtractor Data, uint64_t *Offset) {
85+
Expected<DWARFDebugLoc::LocationList>
86+
DWARFDebugLoc::parseOneLocationList(const DWARFDataExtractor &Data,
87+
uint64_t *Offset) {
8888
LocationList LL;
8989
LL.Offset = *Offset;
90+
DataExtractor::Cursor C(*Offset);
9091

9192
// 2.6.2 Location Lists
9293
// A location list entry consists of:
9394
while (true) {
9495
Entry E;
95-
if (!Data.isValidOffsetForDataOfSize(*Offset, 2 * Data.getAddressSize())) {
96-
WithColor::error() << "location list overflows the debug_loc section.\n";
97-
return None;
98-
}
9996

10097
// 1. A beginning address offset. ...
101-
E.Begin = Data.getRelocatedAddress(Offset);
98+
E.Begin = Data.getRelocatedAddress(C);
10299

103100
// 2. An ending address offset. ...
104-
E.End = Data.getRelocatedAddress(Offset);
101+
E.End = Data.getRelocatedAddress(C);
105102

103+
if (Error Err = C.takeError())
104+
return std::move(Err);
106105
// The end of any given location list is marked by an end of list entry,
107106
// which consists of a 0 for the beginning address offset and a 0 for the
108107
// ending address offset.
109-
if (E.Begin == 0 && E.End == 0)
108+
if (E.Begin == 0 && E.End == 0) {
109+
*Offset = C.tell();
110110
return LL;
111-
112-
if (!Data.isValidOffsetForDataOfSize(*Offset, 2)) {
113-
WithColor::error() << "location list overflows the debug_loc section.\n";
114-
return None;
115111
}
116112

117-
unsigned Bytes = Data.getU16(Offset);
118-
if (!Data.isValidOffsetForDataOfSize(*Offset, Bytes)) {
119-
WithColor::error() << "location list overflows the debug_loc section.\n";
120-
return None;
121-
}
113+
unsigned Bytes = Data.getU16(C);
122114
// A single location description describing the location of the object...
123-
StringRef str = Data.getData().substr(*Offset, Bytes);
124-
*Offset += Bytes;
125-
E.Loc.reserve(str.size());
126-
llvm::copy(str, std::back_inserter(E.Loc));
115+
Data.getU8(C, E.Loc, Bytes);
127116
LL.Entries.push_back(std::move(E));
128117
}
129118
}
@@ -133,67 +122,65 @@ void DWARFDebugLoc::parse(const DWARFDataExtractor &data) {
133122
AddressSize = data.getAddressSize();
134123

135124
uint64_t Offset = 0;
136-
while (data.isValidOffset(Offset + data.getAddressSize() - 1)) {
125+
while (Offset < data.getData().size()) {
137126
if (auto LL = parseOneLocationList(data, &Offset))
138127
Locations.push_back(std::move(*LL));
139-
else
128+
else {
129+
logAllUnhandledErrors(LL.takeError(), WithColor::error());
140130
break;
131+
}
141132
}
142-
if (data.isValidOffset(Offset))
143-
WithColor::error() << "failed to consume entire .debug_loc section\n";
144133
}
145134

146-
Optional<DWARFDebugLoclists::LocationList>
147-
DWARFDebugLoclists::parseOneLocationList(DataExtractor Data, uint64_t *Offset,
148-
unsigned Version) {
135+
Expected<DWARFDebugLoclists::LocationList>
136+
DWARFDebugLoclists::parseOneLocationList(const DataExtractor &Data,
137+
uint64_t *Offset, unsigned Version) {
149138
LocationList LL;
150139
LL.Offset = *Offset;
140+
DataExtractor::Cursor C(*Offset);
151141

152142
// dwarf::DW_LLE_end_of_list_entry is 0 and indicates the end of the list.
153-
while (auto Kind =
154-
static_cast<dwarf::LocationListEntry>(Data.getU8(Offset))) {
155-
143+
while (auto Kind = static_cast<dwarf::LocationListEntry>(Data.getU8(C))) {
156144
Entry E;
157145
E.Kind = Kind;
158146
switch (Kind) {
159147
case dwarf::DW_LLE_startx_length:
160-
E.Value0 = Data.getULEB128(Offset);
148+
E.Value0 = Data.getULEB128(C);
161149
// Pre-DWARF 5 has different interpretation of the length field. We have
162150
// to support both pre- and standartized styles for the compatibility.
163151
if (Version < 5)
164-
E.Value1 = Data.getU32(Offset);
152+
E.Value1 = Data.getU32(C);
165153
else
166-
E.Value1 = Data.getULEB128(Offset);
154+
E.Value1 = Data.getULEB128(C);
167155
break;
168156
case dwarf::DW_LLE_start_length:
169-
E.Value0 = Data.getAddress(Offset);
170-
E.Value1 = Data.getULEB128(Offset);
157+
E.Value0 = Data.getAddress(C);
158+
E.Value1 = Data.getULEB128(C);
171159
break;
172160
case dwarf::DW_LLE_offset_pair:
173-
E.Value0 = Data.getULEB128(Offset);
174-
E.Value1 = Data.getULEB128(Offset);
161+
E.Value0 = Data.getULEB128(C);
162+
E.Value1 = Data.getULEB128(C);
175163
break;
176164
case dwarf::DW_LLE_base_address:
177-
E.Value0 = Data.getAddress(Offset);
165+
E.Value0 = Data.getAddress(C);
178166
break;
179167
default:
180-
WithColor::error() << "dumping support for LLE of kind " << (int)Kind
181-
<< " not implemented\n";
182-
return None;
168+
cantFail(C.takeError());
169+
return createStringError(errc::illegal_byte_sequence,
170+
"LLE of kind %x not supported", (int)Kind);
183171
}
184172

185173
if (Kind != dwarf::DW_LLE_base_address) {
186-
unsigned Bytes =
187-
Version >= 5 ? Data.getULEB128(Offset) : Data.getU16(Offset);
174+
unsigned Bytes = Version >= 5 ? Data.getULEB128(C) : Data.getU16(C);
188175
// A single location description describing the location of the object...
189-
StringRef str = Data.getData().substr(*Offset, Bytes);
190-
*Offset += Bytes;
191-
E.Loc.resize(str.size());
192-
llvm::copy(str, E.Loc.begin());
176+
Data.getU8(C, E.Loc, Bytes);
193177
}
194178

195179
LL.Entries.push_back(std::move(E));
196180
}
181+
if (Error Err = C.takeError())
182+
return std::move(Err);
183+
*Offset = C.tell();
197184
return LL;
198185
}
199186

@@ -202,11 +189,13 @@ void DWARFDebugLoclists::parse(DataExtractor data, unsigned Version) {
202189
AddressSize = data.getAddressSize();
203190

204191
uint64_t Offset = 0;
205-
while (data.isValidOffset(Offset)) {
192+
while (Offset < data.getData().size()) {
206193
if (auto LL = parseOneLocationList(data, &Offset, Version))
207194
Locations.push_back(std::move(*LL));
208-
else
195+
else {
196+
logAllUnhandledErrors(LL.takeError(), WithColor::error());
209197
return;
198+
}
210199
}
211200
}
212201

llvm/lib/DebugInfo/DWARF/DWARFVerifier.cpp

+6-6
Original file line numberDiff line numberDiff line change
@@ -466,9 +466,9 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
466466
ReportError("DIE has invalid DW_AT_stmt_list encoding:");
467467
break;
468468
case DW_AT_location: {
469-
auto VerifyLocationExpr = [&](StringRef D) {
469+
auto VerifyLocationExpr = [&](ArrayRef<uint8_t> D) {
470470
DWARFUnit *U = Die.getDwarfUnit();
471-
DataExtractor Data(D, DCtx.isLittleEndian(), 0);
471+
DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), 0);
472472
DWARFExpression Expression(Data, U->getVersion(),
473473
U->getAddressByteSize());
474474
bool Error = llvm::any_of(Expression, [](DWARFExpression::Operation &Op) {
@@ -479,7 +479,7 @@ unsigned DWARFVerifier::verifyDebugInfoAttribute(const DWARFDie &Die,
479479
};
480480
if (Optional<ArrayRef<uint8_t>> Expr = AttrValue.Value.getAsBlock()) {
481481
// Verify inlined location.
482-
VerifyLocationExpr(llvm::toStringRef(*Expr));
482+
VerifyLocationExpr(*Expr);
483483
} else if (auto LocOffset = AttrValue.Value.getAsSectionOffset()) {
484484
// Verify location list.
485485
if (auto DebugLoc = DCtx.getDebugLoc())
@@ -1277,9 +1277,9 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
12771277
if (!Location)
12781278
return false;
12791279

1280-
auto ContainsInterestingOperators = [&](StringRef D) {
1280+
auto ContainsInterestingOperators = [&](ArrayRef<uint8_t> D) {
12811281
DWARFUnit *U = Die.getDwarfUnit();
1282-
DataExtractor Data(D, DCtx.isLittleEndian(), U->getAddressByteSize());
1282+
DataExtractor Data(toStringRef(D), DCtx.isLittleEndian(), U->getAddressByteSize());
12831283
DWARFExpression Expression(Data, U->getVersion(), U->getAddressByteSize());
12841284
return any_of(Expression, [](DWARFExpression::Operation &Op) {
12851285
return !Op.isError() && (Op.getCode() == DW_OP_addr ||
@@ -1290,7 +1290,7 @@ static bool isVariableIndexable(const DWARFDie &Die, DWARFContext &DCtx) {
12901290

12911291
if (Optional<ArrayRef<uint8_t>> Expr = Location->getAsBlock()) {
12921292
// Inlined location.
1293-
if (ContainsInterestingOperators(toStringRef(*Expr)))
1293+
if (ContainsInterestingOperators(*Expr))
12941294
return true;
12951295
} else if (Optional<uint64_t> Offset = Location->getAsSectionOffset()) {
12961296
// Location list.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE1=0 -o %t1.o
2+
# RUN: llvm-dwarfdump -debug-loc %t1.o 2>&1 | FileCheck %s
3+
4+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE2=0 -o %t2.o
5+
# RUN: llvm-dwarfdump -debug-loc %t2.o 2>&1 | FileCheck %s
6+
7+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE3=0 -o %t3.o
8+
# RUN: llvm-dwarfdump -debug-loc %t3.o 2>&1 | FileCheck %s
9+
10+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE4=0 -o %t4.o
11+
# RUN: llvm-dwarfdump -debug-loc %t4.o 2>&1 | FileCheck %s
12+
13+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE5=0 -o %t5.o
14+
# RUN: llvm-dwarfdump -debug-loc %t5.o 2>&1 | FileCheck %s
15+
16+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE6=0 -o %t6.o
17+
# RUN: llvm-dwarfdump -debug-loc %t6.o 2>&1 | FileCheck %s
18+
19+
# CHECK: error: unexpected end of data
20+
21+
.section .debug_loc,"",@progbits
22+
.ifdef CASE1
23+
.byte 1 # bogus
24+
.endif
25+
.ifdef CASE2
26+
.long 0 # starting offset
27+
.endif
28+
.ifdef CASE3
29+
.long 0 # starting offset
30+
.long 1 # ending offset
31+
.endif
32+
.ifdef CASE4
33+
.long 0 # starting offset
34+
.long 1 # ending offset
35+
.word 0 # Loc expr size
36+
.endif
37+
.ifdef CASE5
38+
.long 0 # starting offset
39+
.long 1 # ending offset
40+
.word 0 # Loc expr size
41+
.long 0 # starting offset
42+
.endif
43+
.ifdef CASE6
44+
.long 0 # starting offset
45+
.long 1 # ending offset
46+
.word 0xffff # Loc expr size
47+
.endif
48+
49+
# A minimal compile unit is needed to deduce the address size of the location
50+
# lists
51+
.section .debug_info,"",@progbits
52+
.long .Lcu_end0-.Lcu_begin0 # Length of Unit
53+
.Lcu_begin0:
54+
.short 4 # DWARF version number
55+
.long 0 # Offset Into Abbrev. Section
56+
.byte 8 # Address Size (in bytes)
57+
.byte 0 # End Of Children Mark
58+
.Lcu_end0:
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE1=0 -o %t1.o
2+
# RUN: llvm-dwarfdump -debug-loclists %t1.o 2>&1 | FileCheck %s --check-prefix=ULEB
3+
4+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE2=0 -o %t2.o
5+
# RUN: llvm-dwarfdump -debug-loclists %t2.o 2>&1 | FileCheck %s --check-prefix=ULEB
6+
7+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE3=0 -o %t3.o
8+
# RUN: llvm-dwarfdump -debug-loclists %t3.o 2>&1 | FileCheck %s --check-prefix=ULEB
9+
10+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE4=0 -o %t4.o
11+
# RUN: llvm-dwarfdump -debug-loclists %t4.o 2>&1 | FileCheck %s
12+
13+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE5=0 -o %t5.o
14+
# RUN: llvm-dwarfdump -debug-loclists %t5.o 2>&1 | FileCheck %s
15+
16+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE6=0 -o %t6.o
17+
# RUN: llvm-dwarfdump -debug-loclists %t6.o 2>&1 | FileCheck %s
18+
19+
# RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux --defsym CASE7=0 -o %t7.o
20+
# RUN: llvm-dwarfdump -debug-loclists %t7.o 2>&1 | FileCheck %s --check-prefix=UNIMPL
21+
22+
# CHECK: error: unexpected end of data
23+
# ULEB: error: malformed uleb128, extends past end
24+
# UNIMPL: error: LLE of kind 47 not supported
25+
26+
.section .debug_loclists,"",@progbits
27+
.long .Ldebug_loclist_table_end0-.Ldebug_loclist_table_start0
28+
.Ldebug_loclist_table_start0:
29+
.short 5 # Version.
30+
.byte 8 # Address size.
31+
.byte 0 # Segment selector size.
32+
.long 0 # Offset entry count.
33+
.Lloclists_table_base0:
34+
.Ldebug_loc0:
35+
.ifdef CASE1
36+
.byte 4 # DW_LLE_offset_pair
37+
.endif
38+
.ifdef CASE2
39+
.byte 4 # DW_LLE_offset_pair
40+
.uleb128 0x0 # starting offset
41+
.endif
42+
.ifdef CASE3
43+
.byte 4 # DW_LLE_offset_pair
44+
.uleb128 0x0 # starting offset
45+
.uleb128 0x10 # ending offset
46+
.endif
47+
.ifdef CASE4
48+
.byte 4 # DW_LLE_offset_pair
49+
.uleb128 0x0 # starting offset
50+
.uleb128 0x10 # ending offset
51+
.byte 1 # Loc expr size
52+
.endif
53+
.ifdef CASE5
54+
.byte 4 # DW_LLE_offset_pair
55+
.uleb128 0x0 # starting offset
56+
.uleb128 0x10 # ending offset
57+
.byte 1 # Loc expr size
58+
.byte 117 # DW_OP_breg5
59+
.endif
60+
.ifdef CASE6
61+
.byte 4 # DW_LLE_offset_pair
62+
.uleb128 0x0 # starting offset
63+
.uleb128 0x10 # ending offset
64+
.uleb128 0xdeadbeef # Loc expr size
65+
.endif
66+
.ifdef CASE7
67+
.byte 0x47
68+
.endif
69+
70+
.Ldebug_loclist_table_end0:
71+

0 commit comments

Comments
 (0)