Skip to content

Commit 8b845ac

Browse files
committed
Recommit "[lldb] Don't dissasemble large functions by default"
This recommits f665e80 which was reverted in 1cbd1b8 for breaking TestFoundationDisassembly.py. The fix is to use --force in the test to avoid bailing out on large functions. I have also doubled the large function limit to 8000 bytes (~~ 2000 insns), as the foundation library contains a lot of large-ish functions. The intent of this feature is to prevent accidental disassembling of enormous (multi-megabyte) "functions", not to get in people's way. The original commit message follows: If we have a binary without symbol information (and without LC_FUNCTION_STARTS, if on a mac), then we have to resort to using heuristics to determine the function boundaries. However, these don't always work, and so we can easily end up thinking we have functions which are several megabytes in size. Attempting to (accidentally) disassemble these can take a very long time spam the terminal with thousands of lines of disassembly. This patch works around that problem by adding a sanity check to the disassemble command. If we are about to disassemble a function which is larger than a certain threshold, we will refuse to disassemble such a function unless the user explicitly specifies the number of instructions to disassemble, uses start/stop addresses for disassembly, or passes the (new) --force argument. The threshold is currently fairly aggressive (4000 bytes ~~ 1000 instructions). If needed, we can increase it, or even make it configurable. Differential Revision: https://reviews.llvm.org/D79789
1 parent 969c63a commit 8b845ac

File tree

7 files changed

+143
-23
lines changed

7 files changed

+143
-23
lines changed

lldb/source/Commands/CommandObjectDisassemble.cpp

Lines changed: 47 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
#include "lldb/Target/StackFrame.h"
2222
#include "lldb/Target/Target.h"
2323

24-
#define DEFAULT_DISASM_BYTE_SIZE 32
25-
#define DEFAULT_DISASM_NUM_INS 4
24+
static constexpr unsigned default_disasm_byte_size = 32;
25+
static constexpr unsigned default_disasm_num_ins = 4;
26+
static constexpr unsigned large_function_threshold = 8000;
2627

2728
using namespace lldb;
2829
using namespace lldb_private;
@@ -143,6 +144,10 @@ Status CommandObjectDisassemble::CommandOptions::SetOptionValue(
143144
}
144145
} break;
145146

147+
case '\x01':
148+
force = true;
149+
break;
150+
146151
default:
147152
llvm_unreachable("Unimplemented option");
148153
}
@@ -186,6 +191,7 @@ void CommandObjectDisassemble::CommandOptions::OptionParsingStarting(
186191

187192
arch.Clear();
188193
some_location_specified = false;
194+
force = false;
189195
}
190196

191197
Status CommandObjectDisassemble::CommandOptions::OptionParsingFinished(
@@ -214,6 +220,21 @@ CommandObjectDisassemble::CommandObjectDisassemble(
214220

215221
CommandObjectDisassemble::~CommandObjectDisassemble() = default;
216222

223+
llvm::Error CommandObjectDisassemble::CheckRangeSize(const AddressRange &range,
224+
llvm::StringRef what) {
225+
if (m_options.num_instructions > 0 || m_options.force ||
226+
range.GetByteSize() < large_function_threshold)
227+
return llvm::Error::success();
228+
StreamString msg;
229+
msg << "Not disassembling " << what << " because it is very large ";
230+
range.Dump(&msg, &GetSelectedTarget(), Address::DumpStyleLoadAddress,
231+
Address::DumpStyleFileAddress);
232+
msg << ". To disassemble specify an instruction count limit, start/stop "
233+
"addresses or use the --force option.";
234+
return llvm::createStringError(llvm::inconvertibleErrorCode(),
235+
msg.GetString());
236+
}
237+
217238
llvm::Expected<std::vector<AddressRange>>
218239
CommandObjectDisassemble::GetContainingAddressRanges() {
219240
std::vector<AddressRange> ranges;
@@ -254,6 +275,9 @@ CommandObjectDisassemble::GetContainingAddressRanges() {
254275
"Could not find function bounds for address 0x%" PRIx64,
255276
m_options.symbol_containing_addr);
256277
}
278+
279+
if (llvm::Error err = CheckRangeSize(ranges[0], "the function"))
280+
return std::move(err);
257281
return ranges;
258282
}
259283

@@ -273,8 +297,10 @@ CommandObjectDisassemble::GetCurrentFunctionRanges() {
273297
else if (sc.symbol && sc.symbol->ValueIsAddress()) {
274298
range = {sc.symbol->GetAddress(), sc.symbol->GetByteSize()};
275299
} else
276-
range = {frame->GetFrameCodeAddress(), DEFAULT_DISASM_BYTE_SIZE};
300+
range = {frame->GetFrameCodeAddress(), default_disasm_byte_size};
277301

302+
if (llvm::Error err = CheckRangeSize(range, "the current function"))
303+
return std::move(err);
278304
return std::vector<AddressRange>{range};
279305
}
280306

@@ -298,7 +324,7 @@ CommandObjectDisassemble::GetCurrentLineRanges() {
298324
}
299325

300326
llvm::Expected<std::vector<AddressRange>>
301-
CommandObjectDisassemble::GetNameRanges() {
327+
CommandObjectDisassemble::GetNameRanges(CommandReturnObject &result) {
302328
ConstString name(m_options.func_name.c_str());
303329
const bool include_symbols = true;
304330
const bool include_inlines = true;
@@ -309,6 +335,7 @@ CommandObjectDisassemble::GetNameRanges() {
309335
name, eFunctionNameTypeAuto, include_symbols, include_inlines, sc_list);
310336

311337
std::vector<AddressRange> ranges;
338+
llvm::Error range_errs = llvm::Error::success();
312339
AddressRange range;
313340
const uint32_t scope =
314341
eSymbolContextBlock | eSymbolContextFunction | eSymbolContextSymbol;
@@ -317,14 +344,21 @@ CommandObjectDisassemble::GetNameRanges() {
317344
for (uint32_t range_idx = 0;
318345
sc.GetAddressRange(scope, range_idx, use_inline_block_range, range);
319346
++range_idx) {
320-
ranges.push_back(range);
347+
if (llvm::Error err = CheckRangeSize(range, "a range"))
348+
range_errs = joinErrors(std::move(range_errs), std::move(err));
349+
else
350+
ranges.push_back(range);
321351
}
322352
}
323353
if (ranges.empty()) {
354+
if (range_errs)
355+
return std::move(range_errs);
324356
return llvm::createStringError(llvm::inconvertibleErrorCode(),
325357
"Unable to find symbol with name '%s'.\n",
326358
name.GetCString());
327359
}
360+
if (range_errs)
361+
result.AppendWarning(toString(std::move(range_errs)));
328362
return ranges;
329363
}
330364

@@ -340,7 +374,7 @@ CommandObjectDisassemble::GetPCRanges() {
340374
if (m_options.num_instructions == 0) {
341375
// Disassembling at the PC always disassembles some number of
342376
// instructions (not the whole function).
343-
m_options.num_instructions = DEFAULT_DISASM_NUM_INS;
377+
m_options.num_instructions = default_disasm_num_ins;
344378
}
345379
return std::vector<AddressRange>{{frame->GetFrameCodeAddress(), 0}};
346380
}
@@ -359,15 +393,16 @@ CommandObjectDisassemble::GetStartEndAddressRanges() {
359393
}
360394

361395
llvm::Expected<std::vector<AddressRange>>
362-
CommandObjectDisassemble::GetRangesForSelectedMode() {
396+
CommandObjectDisassemble::GetRangesForSelectedMode(
397+
CommandReturnObject &result) {
363398
if (m_options.symbol_containing_addr != LLDB_INVALID_ADDRESS)
364399
return CommandObjectDisassemble::GetContainingAddressRanges();
365400
if (m_options.current_function)
366401
return CommandObjectDisassemble::GetCurrentFunctionRanges();
367402
if (m_options.frame_line)
368403
return CommandObjectDisassemble::GetCurrentLineRanges();
369404
if (!m_options.func_name.empty())
370-
return CommandObjectDisassemble::GetNameRanges();
405+
return CommandObjectDisassemble::GetNameRanges(result);
371406
if (m_options.start_addr != LLDB_INVALID_ADDRESS)
372407
return CommandObjectDisassemble::GetStartEndAddressRanges();
373408
return CommandObjectDisassemble::GetPCRanges();
@@ -440,7 +475,8 @@ bool CommandObjectDisassemble::DoExecute(Args &command,
440475
if (m_options.raw)
441476
options |= Disassembler::eOptionRawOuput;
442477

443-
llvm::Expected<std::vector<AddressRange>> ranges = GetRangesForSelectedMode();
478+
llvm::Expected<std::vector<AddressRange>> ranges =
479+
GetRangesForSelectedMode(result);
444480
if (!ranges) {
445481
result.AppendError(toString(ranges.takeError()));
446482
result.SetStatus(eReturnStatusFailed);
@@ -453,7 +489,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command,
453489
if (m_options.num_instructions == 0) {
454490
limit = {Disassembler::Limit::Bytes, cur_range.GetByteSize()};
455491
if (limit.value == 0)
456-
limit.value = DEFAULT_DISASM_BYTE_SIZE;
492+
limit.value = default_disasm_byte_size;
457493
} else {
458494
limit = {Disassembler::Limit::Instructions, m_options.num_instructions};
459495
}
@@ -476,7 +512,7 @@ bool CommandObjectDisassemble::DoExecute(Args &command,
476512
result.SetStatus(eReturnStatusFailed);
477513
}
478514
if (print_sc_header)
479-
result.AppendMessage("\n");
515+
result.GetOutputStream() << "\n";
480516
}
481517

482518
return result.Succeeded();

lldb/source/Commands/CommandObjectDisassemble.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ class CommandObjectDisassemble : public CommandObjectParsed {
6262
// "at_pc". This should be set
6363
// in SetOptionValue if anything the selects a location is set.
6464
lldb::addr_t symbol_containing_addr;
65+
bool force = false;
6566
};
6667

6768
CommandObjectDisassemble(CommandInterpreter &interpreter);
@@ -73,15 +74,19 @@ class CommandObjectDisassemble : public CommandObjectParsed {
7374
protected:
7475
bool DoExecute(Args &command, CommandReturnObject &result) override;
7576

76-
llvm::Expected<std::vector<AddressRange>> GetRangesForSelectedMode();
77+
llvm::Expected<std::vector<AddressRange>>
78+
GetRangesForSelectedMode(CommandReturnObject &result);
7779

7880
llvm::Expected<std::vector<AddressRange>> GetContainingAddressRanges();
7981
llvm::Expected<std::vector<AddressRange>> GetCurrentFunctionRanges();
8082
llvm::Expected<std::vector<AddressRange>> GetCurrentLineRanges();
81-
llvm::Expected<std::vector<AddressRange>> GetNameRanges();
83+
llvm::Expected<std::vector<AddressRange>>
84+
GetNameRanges(CommandReturnObject &result);
8285
llvm::Expected<std::vector<AddressRange>> GetPCRanges();
8386
llvm::Expected<std::vector<AddressRange>> GetStartEndAddressRanges();
8487

88+
llvm::Error CheckRangeSize(const AddressRange &range, llvm::StringRef what);
89+
8590
CommandOptions m_options;
8691
};
8792

lldb/source/Commands/Options.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -311,7 +311,7 @@ let Command = "disassemble" in {
311311
Desc<"Address at which to start disassembling.">;
312312
def disassemble_options_end_address : Option<"end-address", "e">, Group<1>,
313313
Arg<"AddressOrExpression">, Desc<"Address at which to end disassembling.">;
314-
def disassemble_options_count : Option<"count", "c">, Groups<[2,3,4,5]>,
314+
def disassemble_options_count : Option<"count", "c">, Groups<[2,3,4,5,7]>,
315315
Arg<"NumLines">, Desc<"Number of instructions to display.">;
316316
def disassemble_options_name : Option<"name", "n">, Group<3>,
317317
Arg<"FunctionName">, Completion<"Symbol">,
@@ -326,6 +326,8 @@ let Command = "disassemble" in {
326326
def disassemble_options_address : Option<"address", "a">, Group<7>,
327327
Arg<"AddressOrExpression">,
328328
Desc<"Disassemble function containing this address.">;
329+
def disassemble_options_force : Option<"force", "\\x01">, Groups<[2,3,4,5,7]>,
330+
Desc<"Force dissasembly of large functions.">;
329331
}
330332

331333
let Command = "expression" in {

lldb/test/API/lang/objc/foundation/TestFoundationDisassembly.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ def test_foundation_disasm(self):
6161
if match:
6262
func = match.group(1)
6363
self.runCmd('image lookup -s "%s"' % func)
64-
self.runCmd('disassemble -n "%s"' % func)
64+
self.runCmd('disassemble --force -n "%s"' % func)
6565

6666
@skipIfAsan
6767
def test_simple_disasm(self):

lldb/test/Shell/Commands/Inputs/command-disassemble.lldbinit

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@ disassemble --start-address 0x0
66
disassemble --start-address 0x4 --end-address 0x8
77
disassemble --start-address 0x8 --end-address 0x4
88
disassemble --address 0x0
9-
disassemble --address 0xdead
9+
disassemble --address 0xdeadb
10+
disassemble --address 0x100
11+
disassemble --address 0x100 --count 3
12+
disassemble --address 0x100 --force
1013
disassemble --start-address 0x0 --count 7
1114
disassemble --start-address 0x0 --end-address 0x20 --count 7
12-
disassemble --address 0x0 --count 7
15+
disassemble --name case1
16+
disassemble --name case2
17+
disassemble --name case3
18+
disassemble --name case3 --count 3

lldb/test/Shell/Commands/command-disassemble-process.yaml

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
# REQUIRES: x86
22

3-
# RUN: yaml2obj --docnum=1 %s > %T/command-disassemble-process.exe
3+
# RUN: yaml2obj --docnum=1 -DMAIN_SIZE=8 %s > %T/command-disassemble-process.exe
4+
# RUN: yaml2obj --docnum=1 -DMAIN_SIZE=4000 %s > %T/command-disassemble-process.big.exe
45
# RUN: yaml2obj --docnum=2 %s > %t
56

67
# RUN: %lldb -c %t %T/command-disassemble-process.exe \
78
# RUN: -o "settings set interpreter.stop-command-source-on-error false" \
89
# RUN: -s %S/Inputs/command-disassemble-process.lldbinit -o exit 2>&1 \
910
# RUN: | FileCheck %s
1011

12+
# RUN: %lldb -c %t %T/command-disassemble-process.big.exe \
13+
# RUN: -o disassemble -o exit 2>&1 | FileCheck %s --check-prefix=BIG
14+
1115
# CHECK: (lldb) disassemble
1216
# CHECK-NEXT: command-disassemble-process.exe`main:
1317
# CHECK-NEXT: 0x4002 <+0>: addb %al, (%rcx)
@@ -59,6 +63,8 @@
5963
# CHECK-NEXT: 0x400e: addb %cl, (%rcx)
6064
# CHECK-NEXT: 0x4010: addb %cl, (%rdx)
6165

66+
# BIG: error: Not disassembling the current function because it is very large [0x0000000000004002-0x0000000000004fa2). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
67+
6268
--- !ELF
6369
FileHeader:
6470
Class: ELFCLASS64
@@ -72,6 +78,7 @@ Sections:
7278
Address: 0x0000000000004000
7379
AddressAlign: 0x0000000000001000
7480
Content: 00000001000200030006000700080009000A000B000E000F00100011001200130016001700180019001A001B001E001F00200021002200230026002700280029002A002B002E002F
81+
Size: 0x10000
7582
- Name: .note.gnu.build-id
7683
Type: SHT_NOTE
7784
Flags: [ SHF_ALLOC ]
@@ -83,7 +90,7 @@ Symbols:
8390
Type: STT_FUNC
8491
Section: .text
8592
Value: 0x0000000000004002
86-
Size: 0x0000000000000008
93+
Size: [[MAIN_SIZE]]
8794
ProgramHeaders:
8895
- Type: PT_LOAD
8996
Flags: [ PF_X, PF_R ]

lldb/test/Shell/Commands/command-disassemble.s

Lines changed: 68 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,19 @@
5151
# CHECK-NEXT: command-disassemble.s.tmp[0x8] <+8>: int $0x14
5252
# CHECK-NEXT: command-disassemble.s.tmp[0xa] <+10>: int $0x15
5353
# CHECK-NEXT: command-disassemble.s.tmp[0xc] <+12>: int $0x16
54-
# CHECK-NEXT: (lldb) disassemble --address 0xdead
55-
# CHECK-NEXT: error: Could not find function bounds for address 0xdead
54+
# CHECK-NEXT: (lldb) disassemble --address 0xdeadb
55+
# CHECK-NEXT: error: Could not find function bounds for address 0xdeadb
56+
# CHECK-NEXT: (lldb) disassemble --address 0x100
57+
# CHECK-NEXT: error: Not disassembling the function because it is very large [0x0000000000000040-0x0000000000002040). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
58+
# CHECK-NEXT: (lldb) disassemble --address 0x100 --count 3
59+
# CHECK-NEXT: command-disassemble.s.tmp`very_long:
60+
# CHECK-NEXT: command-disassemble.s.tmp[0x40] <+0>: int $0x2a
61+
# CHECK-NEXT: command-disassemble.s.tmp[0x42] <+2>: int $0x2a
62+
# CHECK-NEXT: command-disassemble.s.tmp[0x44] <+4>: int $0x2a
63+
# CHECK-NEXT: (lldb) disassemble --address 0x100 --force
64+
# CHECK-NEXT: command-disassemble.s.tmp`very_long:
65+
# CHECK-NEXT: command-disassemble.s.tmp[0x40] <+0>: int $0x2a
66+
# CHECK: command-disassemble.s.tmp[0x203e] <+8190>: int $0x2a
5667
# CHECK-NEXT: (lldb) disassemble --start-address 0x0 --count 7
5768
# CHECK-NEXT: command-disassemble.s.tmp`foo:
5869
# CHECK-NEXT: command-disassemble.s.tmp[0x0] <+0>: int $0x10
@@ -64,8 +75,32 @@
6475
# CHECK-NEXT: command-disassemble.s.tmp[0xc] <+12>: int $0x16
6576
# CHECK-NEXT: (lldb) disassemble --start-address 0x0 --end-address 0x20 --count 7
6677
# CHECK-NEXT: error: invalid combination of options for the given command
67-
# CHECK-NEXT: (lldb) disassemble --address 0x0 --count 7
68-
# CHECK-NEXT: error: invalid combination of options for the given command
78+
# CHECK-NEXT: (lldb) disassemble --name case1
79+
# CHECK-NEXT: command-disassemble.s.tmp`n1::case1:
80+
# CHECK-NEXT: command-disassemble.s.tmp[0x2040] <+0>: int $0x30
81+
# CHECK-EMPTY:
82+
# CHECK-NEXT: command-disassemble.s.tmp`n2::case1:
83+
# CHECK-NEXT: command-disassemble.s.tmp[0x2042] <+0>: int $0x31
84+
# CHECK-EMPTY:
85+
# CHECK-NEXT: (lldb) disassemble --name case2
86+
# CHECK-NEXT: command-disassemble.s.tmp`n1::case2:
87+
# CHECK-NEXT: command-disassemble.s.tmp[0x2044] <+0>: int $0x32
88+
# CHECK-NEXT: warning: Not disassembling a range because it is very large [0x0000000000002046-0x0000000000004046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
89+
# CHECK-NEXT: (lldb) disassemble --name case3
90+
# CHECK-NEXT: error: Not disassembling a range because it is very large [0x0000000000004046-0x0000000000006046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
91+
# CHECK-NEXT: Not disassembling a range because it is very large [0x0000000000006046-0x0000000000008046). To disassemble specify an instruction count limit, start/stop addresses or use the --force option.
92+
# CHECK-NEXT: (lldb) disassemble --name case3 --count 3
93+
# CHECK-NEXT: command-disassemble.s.tmp`n1::case3:
94+
# CHECK-NEXT: command-disassemble.s.tmp[0x4046] <+0>: int $0x2a
95+
# CHECK-NEXT: command-disassemble.s.tmp[0x4048] <+2>: int $0x2a
96+
# CHECK-NEXT: command-disassemble.s.tmp[0x404a] <+4>: int $0x2a
97+
# CHECK-EMPTY:
98+
# CHECK-NEXT: command-disassemble.s.tmp`n2::case3:
99+
# CHECK-NEXT: command-disassemble.s.tmp[0x6046] <+0>: int $0x2a
100+
# CHECK-NEXT: command-disassemble.s.tmp[0x6048] <+2>: int $0x2a
101+
# CHECK-NEXT: command-disassemble.s.tmp[0x604a] <+4>: int $0x2a
102+
# CHECK-EMPTY:
103+
69104

70105
.text
71106
foo:
@@ -102,3 +137,32 @@ bar:
102137
int $0x2d
103138
int $0x2e
104139
int $0x2f
140+
141+
very_long:
142+
.rept 0x1000
143+
int $42
144+
.endr
145+
146+
_ZN2n15case1Ev:
147+
int $0x30
148+
149+
_ZN2n25case1Ev:
150+
int $0x31
151+
152+
_ZN2n15case2Ev:
153+
int $0x32
154+
155+
_ZN2n25case2Ev:
156+
.rept 0x1000
157+
int $42
158+
.endr
159+
160+
_ZN2n15case3Ev:
161+
.rept 0x1000
162+
int $42
163+
.endr
164+
165+
_ZN2n25case3Ev:
166+
.rept 0x1000
167+
int $42
168+
.endr

0 commit comments

Comments
 (0)