Skip to content

Commit 953bdce

Browse files
committed
[MC] Separate masm integer literal lexer support from inline asm
Summary: This renames the IsParsingMSInlineAsm member variable of AsmLexer to LexMasmIntegers and moves it up to MCAsmLexer. This is the only behavior controlled by that variable. I added a public setter, so that it can be set from outside or from the llvm-mc command line. We may need to arrange things so that users can get this behavior from clang, but that's future work. I also put additional hex literal lexing functionality under this flag to fix PR32973. It appears that this hex literal parsing wasn't intended to be enabled in non-masm-style blocks. Now, masm integers (0b1101 and 0ABCh) work in __asm blocks from clang, but 0b label references work when using .intel_syntax in standalone .s files. However, 0b label references will *not* work from __asm blocks in clang. They will work from GCC inline asm blocks, which it sounds like is important for Crypto++ as mentioned in PR36144. Essentially, we only lex masm literals for inline asm blobs that use intel syntax. If the .intel_syntax directive is used inside a gnu-style inline asm statement, masm literals will not be lexed, which is compatible with gas and llvm-mc standalone .s assembly. This fixes PR36144 and PR32973. Reviewers: Gerolf, avt77 Subscribers: eraman, hiraditya, llvm-commits Differential Revision: https://reviews.llvm.org/D53535 llvm-svn: 345189
1 parent 1c35341 commit 953bdce

File tree

11 files changed

+47
-26
lines changed

11 files changed

+47
-26
lines changed

llvm/include/llvm/MC/MCParser/AsmLexer.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class AsmLexer : public MCAsmLexer {
3030
StringRef CurBuf;
3131
bool IsAtStartOfLine = true;
3232
bool IsAtStartOfStatement = true;
33-
bool IsParsingMSInlineAsm = false;
3433
bool IsPeeking = false;
3534

3635
protected:
@@ -44,7 +43,6 @@ class AsmLexer : public MCAsmLexer {
4443
~AsmLexer() override;
4544

4645
void setBuffer(StringRef Buf, const char *ptr = nullptr);
47-
void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
4846

4947
StringRef LexUntilEndOfStatement() override;
5048

llvm/include/llvm/MC/MCParser/MCAsmLexer.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ class MCAsmLexer {
5050
bool SkipSpace = true;
5151
bool AllowAtInIdentifier;
5252
bool IsAtStartOfStatement = true;
53+
bool LexMasmIntegers = false;
5354
AsmCommentConsumer *CommentConsumer = nullptr;
5455

5556
MCAsmLexer();
@@ -146,6 +147,10 @@ class MCAsmLexer {
146147
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
147148
this->CommentConsumer = CommentConsumer;
148149
}
150+
151+
/// Set whether to lex masm-style binary and hex literals. They look like
152+
/// 0b1101 and 0ABCh respectively.
153+
void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
149154
};
150155

151156
} // end namespace llvm

llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
156156
Parser->setAssemblerDialect(Dialect);
157157
Parser->setTargetParser(*TAP.get());
158158
Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
159+
// Enable lexing Masm binary and hex integer literals in intel inline
160+
// assembly.
159161
if (Dialect == InlineAsm::AD_Intel)
160-
// We need this flag to be able to parse numbers like "0bH"
161-
Parser->setParsingInlineAsm(true);
162+
Parser->getLexer().setLexMasmIntegers(true);
162163
if (MF) {
163164
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
164165
TAP->SetFrameRegister(TRI->getFrameRegister(*MF));

llvm/lib/MC/MCParser/AsmLexer.cpp

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -243,22 +243,26 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
243243

244244
// Look ahead to search for first non-hex digit, if it's [hH], then we treat the
245245
// integer as a hexadecimal, possibly with leading zeroes.
246-
static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
247-
const char *FirstHex = nullptr;
246+
static unsigned doHexLookAhead(const char *&CurPtr, unsigned DefaultRadix,
247+
bool LexHex) {
248+
const char *FirstNonDec = nullptr;
248249
const char *LookAhead = CurPtr;
249250
while (true) {
250251
if (isDigit(*LookAhead)) {
251252
++LookAhead;
252-
} else if (isHexDigit(*LookAhead)) {
253-
if (!FirstHex)
254-
FirstHex = LookAhead;
255-
++LookAhead;
256253
} else {
257-
break;
254+
if (!FirstNonDec)
255+
FirstNonDec = LookAhead;
256+
257+
// Keep going if we are looking for a 'h' suffix.
258+
if (LexHex && isHexDigit(*LookAhead))
259+
++LookAhead;
260+
else
261+
break;
258262
}
259263
}
260-
bool isHex = *LookAhead == 'h' || *LookAhead == 'H';
261-
CurPtr = isHex || !FirstHex ? LookAhead : FirstHex;
264+
bool isHex = LexHex && (*LookAhead == 'h' || *LookAhead == 'H');
265+
CurPtr = isHex || !FirstNonDec ? LookAhead : FirstNonDec;
262266
if (isHex)
263267
return 16;
264268
return DefaultRadix;
@@ -281,7 +285,7 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
281285
AsmToken AsmLexer::LexDigit() {
282286
// MASM-flavor binary integer: [01]+[bB]
283287
// MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
284-
if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
288+
if (LexMasmIntegers && isdigit(CurPtr[-1])) {
285289
const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
286290
CurPtr - 1 : nullptr;
287291
const char *OldCurPtr = CurPtr;
@@ -320,7 +324,7 @@ AsmToken AsmLexer::LexDigit() {
320324

321325
// Decimal integer: [1-9][0-9]*
322326
if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
323-
unsigned Radix = doLookAhead(CurPtr, 10);
327+
unsigned Radix = doHexLookAhead(CurPtr, 10, LexMasmIntegers);
324328
bool isHex = Radix == 16;
325329
// Check for floating point literals.
326330
if (!isHex && (*CurPtr == '.' || *CurPtr == 'e')) {
@@ -335,8 +339,8 @@ AsmToken AsmLexer::LexDigit() {
335339
return ReturnError(TokStart, !isHex ? "invalid decimal number" :
336340
"invalid hexdecimal number");
337341

338-
// Consume the [bB][hH].
339-
if (Radix == 2 || Radix == 16)
342+
// Consume the [hH].
343+
if (LexMasmIntegers && Radix == 16)
340344
++CurPtr;
341345

342346
// The darwin/x86 (and x86-64) assembler accepts and ignores type
@@ -346,7 +350,7 @@ AsmToken AsmLexer::LexDigit() {
346350
return intToken(Result, Value);
347351
}
348352

349-
if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
353+
if (!LexMasmIntegers && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
350354
++CurPtr;
351355
// See if we actually have "0b" as part of something like "jmp 0b\n"
352356
if (!isDigit(CurPtr[0])) {
@@ -395,7 +399,7 @@ AsmToken AsmLexer::LexDigit() {
395399
return ReturnError(TokStart, "invalid hexadecimal number");
396400

397401
// Consume the optional [hH].
398-
if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
402+
if (LexMasmIntegers && (*CurPtr == 'h' || *CurPtr == 'H'))
399403
++CurPtr;
400404

401405
// The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -407,7 +411,7 @@ AsmToken AsmLexer::LexDigit() {
407411

408412
// Either octal or hexadecimal.
409413
APInt Value(128, 0, true);
410-
unsigned Radix = doLookAhead(CurPtr, 8);
414+
unsigned Radix = doHexLookAhead(CurPtr, 8, LexMasmIntegers);
411415
bool isHex = Radix == 16;
412416
StringRef Result(TokStart, CurPtr - TokStart);
413417
if (Result.getAsInteger(Radix, Value))

llvm/lib/MC/MCParser/AsmParser.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,9 @@ class AsmParser : public MCAsmParser {
229229

230230
void setParsingInlineAsm(bool V) override {
231231
ParsingInlineAsm = V;
232-
Lexer.setParsingMSInlineAsm(V);
232+
// When parsing MS inline asm, we must lex 0b1101 and 0ABCH as binary and
233+
// hex integer literals.
234+
Lexer.setLexMasmIntegers(V);
233235
}
234236
bool isParsingInlineAsm() override { return ParsingInlineAsm; }
235237

llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3283,7 +3283,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
32833283
if (IDVal.startswith(".code"))
32843284
return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
32853285
else if (IDVal.startswith(".att_syntax")) {
3286-
getParser().setParsingInlineAsm(false);
32873286
if (getLexer().isNot(AsmToken::EndOfStatement)) {
32883287
if (Parser.getTok().getString() == "prefix")
32893288
Parser.Lex();
@@ -3296,7 +3295,6 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
32963295
return false;
32973296
} else if (IDVal.startswith(".intel_syntax")) {
32983297
getParser().setAssemblerDialect(1);
3299-
getParser().setParsingInlineAsm(true);
33003298
if (getLexer().isNot(AsmToken::EndOfStatement)) {
33013299
if (Parser.getTok().getString() == "noprefix")
33023300
Parser.Lex();

llvm/test/MC/AArch64/macro-hex-int.s

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
// RUN: llvm-mc -triple aarch64-elf -filetype=obj %s -o - | llvm-objdump -d -r - | FileCheck %s
2+
3+
.macro do_add sz
4+
add v0.\sz, v0.\sz, v0.\sz
5+
.endm
6+
7+
do_add 8h
8+
// CHECK: add v0.8h, v0.8h, v0.8h

llvm/test/MC/X86/intel-syntax-hex.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
1+
// RUN: llvm-mc -masm-integers -triple x86_64-unknown-unknown -x86-asm-syntax=intel %s | FileCheck %s
22
// rdar://12470373
33

44
// Checks to make sure we parse the hexadecimal suffix properly.

llvm/test/MC/X86/pr27884.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// RUN: llvm-mc -triple x86_64-unknown-unknown %s
1+
// RUN: llvm-mc -triple x86_64-unknown-unknown %s -masm-integers=1
22

33
.intel_syntax
44
add rbx, 0B0h

llvm/test/tools/llvm-mca/X86/intel-syntax.s

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
.intel_syntax noprefix
77
mov eax, 1
8-
mov ebx, 0ffh
8+
mov ebx, 0xff
99
imul esi, edi
1010
lea eax, [rsi + rdi]
1111

llvm/tools/llvm-mc/llvm-mc.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,10 @@ MainFileName("main-file-name",
164164
static cl::opt<bool> SaveTempLabels("save-temp-labels",
165165
cl::desc("Don't discard temporary labels"));
166166

167+
static cl::opt<bool> LexMasmIntegers(
168+
"masm-integers",
169+
cl::desc("Enable binary and hex masm integers (0b110 and 0ABCh)"));
170+
167171
static cl::opt<bool> NoExecStack("no-exec-stack",
168172
cl::desc("File doesn't need an exec stack"));
169173

@@ -293,6 +297,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget,
293297
return SymbolResult;
294298
Parser->setShowParsedOperands(ShowInstOperands);
295299
Parser->setTargetParser(*TAP);
300+
Parser->getLexer().setLexMasmIntegers(LexMasmIntegers);
296301

297302
int Res = Parser->Run(NoInitialTextSection);
298303

0 commit comments

Comments
 (0)