Skip to content

Commit 5202200

Browse files
committed
[MC] Merge MCAsmLexer.{h,cpp} into AsmLexer.{h,cpp}
2b11c7d introduced `llvm/include/llvm/MC/MCAsmLexer.h` and made `AsmLexer` inherit from `MCAsmLexer`, likely to allow target-specific parsers to depend solely on `MCAsmLexer`. However, this separation now seems unnecessary and confusing. `MCAsmLexer` defines virtual functions with `AsmLexer` as its only implementation, and `AsmLexer` itself has few extra public methods. To simplify the codebase, this change merges MCAsmLexer.{h,cpp} into AsmLexer.{h,cpp}. MCAsmLexer.h is temporarily kept as a forwarder. Note: I doubt that a downstream lexer handling an assembly syntax significantly different from the standard GNU Assembler syntax would want to inherit from `MCAsmLexer`. Instead, it's more likely they'd extend `AsmLexer` by adding new states and modifying its internal logic, as seen with variables for MASM, M68k, and HLASM.
1 parent 4998273 commit 5202200

File tree

6 files changed

+255
-293
lines changed

6 files changed

+255
-293
lines changed

llvm/include/llvm/MC/MCParser/AsmLexer.h

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,182 @@
1313
#ifndef LLVM_MC_MCPARSER_ASMLEXER_H
1414
#define LLVM_MC_MCPARSER_ASMLEXER_H
1515

16+
#include "llvm/ADT/ArrayRef.h"
17+
#include "llvm/ADT/SmallVector.h"
1618
#include "llvm/ADT/StringRef.h"
17-
#include "llvm/MC/MCParser/MCAsmLexer.h"
19+
#include "llvm/MC/MCAsmMacro.h"
20+
#include <cassert>
21+
#include <cstddef>
1822
#include <string>
23+
#include <utility>
1924

2025
namespace llvm {
2126

2227
class MCAsmInfo;
2328

29+
/// A callback class which is notified of each comment in an assembly file as
30+
/// it is lexed.
31+
class AsmCommentConsumer {
32+
public:
33+
virtual ~AsmCommentConsumer() = default;
34+
35+
/// Callback function for when a comment is lexed. Loc is the start of the
36+
/// comment text (excluding the comment-start marker). CommentText is the text
37+
/// of the comment, excluding the comment start and end markers, and the
38+
/// newline for single-line comments.
39+
virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
40+
};
41+
42+
/// Generic assembler lexer interface, for use by target specific assembly
43+
/// lexers.
44+
class MCAsmLexer {
45+
/// The current token, stored in the base class for faster access.
46+
SmallVector<AsmToken, 1> CurTok;
47+
48+
/// The location and description of the current error
49+
SMLoc ErrLoc;
50+
std::string Err;
51+
52+
protected: // Can only create subclasses.
53+
const char *TokStart = nullptr;
54+
bool SkipSpace = true;
55+
bool AllowAtInIdentifier = false;
56+
bool AllowHashInIdentifier = false;
57+
bool IsAtStartOfStatement = true;
58+
bool LexMasmHexFloats = false;
59+
bool LexMasmIntegers = false;
60+
bool LexMasmStrings = false;
61+
bool LexMotorolaIntegers = false;
62+
bool UseMasmDefaultRadix = false;
63+
unsigned DefaultRadix = 10;
64+
bool LexHLASMIntegers = false;
65+
bool LexHLASMStrings = false;
66+
AsmCommentConsumer *CommentConsumer = nullptr;
67+
68+
MCAsmLexer();
69+
70+
virtual AsmToken LexToken() = 0;
71+
72+
void SetError(SMLoc errLoc, const std::string &err) {
73+
ErrLoc = errLoc;
74+
Err = err;
75+
}
76+
77+
public:
78+
MCAsmLexer(const MCAsmLexer &) = delete;
79+
MCAsmLexer &operator=(const MCAsmLexer &) = delete;
80+
virtual ~MCAsmLexer();
81+
82+
/// Consume the next token from the input stream and return it.
83+
///
84+
/// The lexer will continuously return the end-of-file token once the end of
85+
/// the main input file has been reached.
86+
const AsmToken &Lex() {
87+
assert(!CurTok.empty());
88+
// Mark if we parsing out a EndOfStatement.
89+
IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
90+
CurTok.erase(CurTok.begin());
91+
// LexToken may generate multiple tokens via UnLex but will always return
92+
// the first one. Place returned value at head of CurTok vector.
93+
if (CurTok.empty()) {
94+
AsmToken T = LexToken();
95+
CurTok.insert(CurTok.begin(), T);
96+
}
97+
return CurTok.front();
98+
}
99+
100+
void UnLex(AsmToken const &Token) {
101+
IsAtStartOfStatement = false;
102+
CurTok.insert(CurTok.begin(), Token);
103+
}
104+
105+
bool isAtStartOfStatement() { return IsAtStartOfStatement; }
106+
107+
virtual StringRef LexUntilEndOfStatement() = 0;
108+
109+
/// Get the current source location.
110+
SMLoc getLoc() const;
111+
112+
/// Get the current (last) lexed token.
113+
const AsmToken &getTok() const { return CurTok[0]; }
114+
115+
/// Look ahead at the next token to be lexed.
116+
const AsmToken peekTok(bool ShouldSkipSpace = true) {
117+
AsmToken Tok;
118+
119+
MutableArrayRef<AsmToken> Buf(Tok);
120+
size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
121+
122+
assert(ReadCount == 1);
123+
(void)ReadCount;
124+
125+
return Tok;
126+
}
127+
128+
/// Look ahead an arbitrary number of tokens.
129+
virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
130+
bool ShouldSkipSpace = true) = 0;
131+
132+
/// Get the current error location
133+
SMLoc getErrLoc() { return ErrLoc; }
134+
135+
/// Get the current error string
136+
const std::string &getErr() { return Err; }
137+
138+
/// Get the kind of current token.
139+
AsmToken::TokenKind getKind() const { return getTok().getKind(); }
140+
141+
/// Check if the current token has kind \p K.
142+
bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
143+
144+
/// Check if the current token has kind \p K.
145+
bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
146+
147+
/// Set whether spaces should be ignored by the lexer
148+
void setSkipSpace(bool val) { SkipSpace = val; }
149+
150+
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
151+
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
152+
153+
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
154+
155+
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
156+
this->CommentConsumer = CommentConsumer;
157+
}
158+
159+
/// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
160+
/// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
161+
void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
162+
163+
/// Set whether to use masm-style default-radix integer literals. If disabled,
164+
/// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
165+
void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
166+
167+
unsigned getMasmDefaultRadix() const { return DefaultRadix; }
168+
void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
169+
170+
/// Set whether to lex masm-style hex float literals, such as 3f800000r.
171+
void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
172+
173+
/// Set whether to lex masm-style string literals, such as 'Can''t find file'
174+
/// and "This ""value"" not found".
175+
void setLexMasmStrings(bool V) { LexMasmStrings = V; }
176+
177+
/// Set whether to lex Motorola-style integer literals, such as $deadbeef or
178+
/// %01010110.
179+
void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
180+
181+
/// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
182+
void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
183+
184+
/// Set whether to "lex" HLASM-flavour character and string literals. For now,
185+
/// setting this option to true, will disable lexing for character and string
186+
/// literals.
187+
void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
188+
};
189+
24190
/// AsmLexer - Lexer class for assembly files.
25-
class AsmLexer : public MCAsmLexer {
191+
class AsmLexer final : public MCAsmLexer {
26192
const MCAsmInfo &MAI;
27193

28194
const char *CurPtr = nullptr;

llvm/include/llvm/MC/MCParser/MCAsmLexer.h

Lines changed: 1 addition & 184 deletions
Original file line numberDiff line numberDiff line change
@@ -6,187 +6,4 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#ifndef LLVM_MC_MCPARSER_MCASMLEXER_H
10-
#define LLVM_MC_MCPARSER_MCASMLEXER_H
11-
12-
#include "llvm/ADT/ArrayRef.h"
13-
#include "llvm/ADT/SmallVector.h"
14-
#include "llvm/MC/MCAsmMacro.h"
15-
#include <cassert>
16-
#include <cstddef>
17-
#include <string>
18-
#include <utility>
19-
20-
namespace llvm {
21-
22-
/// A callback class which is notified of each comment in an assembly file as
23-
/// it is lexed.
24-
class AsmCommentConsumer {
25-
public:
26-
virtual ~AsmCommentConsumer() = default;
27-
28-
/// Callback function for when a comment is lexed. Loc is the start of the
29-
/// comment text (excluding the comment-start marker). CommentText is the text
30-
/// of the comment, excluding the comment start and end markers, and the
31-
/// newline for single-line comments.
32-
virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
33-
};
34-
35-
36-
/// Generic assembler lexer interface, for use by target specific assembly
37-
/// lexers.
38-
class MCAsmLexer {
39-
/// The current token, stored in the base class for faster access.
40-
SmallVector<AsmToken, 1> CurTok;
41-
42-
/// The location and description of the current error
43-
SMLoc ErrLoc;
44-
std::string Err;
45-
46-
protected: // Can only create subclasses.
47-
const char *TokStart = nullptr;
48-
bool SkipSpace = true;
49-
bool AllowAtInIdentifier = false;
50-
bool AllowHashInIdentifier = false;
51-
bool IsAtStartOfStatement = true;
52-
bool LexMasmHexFloats = false;
53-
bool LexMasmIntegers = false;
54-
bool LexMasmStrings = false;
55-
bool LexMotorolaIntegers = false;
56-
bool UseMasmDefaultRadix = false;
57-
unsigned DefaultRadix = 10;
58-
bool LexHLASMIntegers = false;
59-
bool LexHLASMStrings = false;
60-
AsmCommentConsumer *CommentConsumer = nullptr;
61-
62-
MCAsmLexer();
63-
64-
virtual AsmToken LexToken() = 0;
65-
66-
void SetError(SMLoc errLoc, const std::string &err) {
67-
ErrLoc = errLoc;
68-
Err = err;
69-
}
70-
71-
public:
72-
MCAsmLexer(const MCAsmLexer &) = delete;
73-
MCAsmLexer &operator=(const MCAsmLexer &) = delete;
74-
virtual ~MCAsmLexer();
75-
76-
/// Consume the next token from the input stream and return it.
77-
///
78-
/// The lexer will continuously return the end-of-file token once the end of
79-
/// the main input file has been reached.
80-
const AsmToken &Lex() {
81-
assert(!CurTok.empty());
82-
// Mark if we parsing out a EndOfStatement.
83-
IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
84-
CurTok.erase(CurTok.begin());
85-
// LexToken may generate multiple tokens via UnLex but will always return
86-
// the first one. Place returned value at head of CurTok vector.
87-
if (CurTok.empty()) {
88-
AsmToken T = LexToken();
89-
CurTok.insert(CurTok.begin(), T);
90-
}
91-
return CurTok.front();
92-
}
93-
94-
void UnLex(AsmToken const &Token) {
95-
IsAtStartOfStatement = false;
96-
CurTok.insert(CurTok.begin(), Token);
97-
}
98-
99-
bool isAtStartOfStatement() { return IsAtStartOfStatement; }
100-
101-
virtual StringRef LexUntilEndOfStatement() = 0;
102-
103-
/// Get the current source location.
104-
SMLoc getLoc() const;
105-
106-
/// Get the current (last) lexed token.
107-
const AsmToken &getTok() const {
108-
return CurTok[0];
109-
}
110-
111-
/// Look ahead at the next token to be lexed.
112-
const AsmToken peekTok(bool ShouldSkipSpace = true) {
113-
AsmToken Tok;
114-
115-
MutableArrayRef<AsmToken> Buf(Tok);
116-
size_t ReadCount = peekTokens(Buf, ShouldSkipSpace);
117-
118-
assert(ReadCount == 1);
119-
(void)ReadCount;
120-
121-
return Tok;
122-
}
123-
124-
/// Look ahead an arbitrary number of tokens.
125-
virtual size_t peekTokens(MutableArrayRef<AsmToken> Buf,
126-
bool ShouldSkipSpace = true) = 0;
127-
128-
/// Get the current error location
129-
SMLoc getErrLoc() {
130-
return ErrLoc;
131-
}
132-
133-
/// Get the current error string
134-
const std::string &getErr() {
135-
return Err;
136-
}
137-
138-
/// Get the kind of current token.
139-
AsmToken::TokenKind getKind() const { return getTok().getKind(); }
140-
141-
/// Check if the current token has kind \p K.
142-
bool is(AsmToken::TokenKind K) const { return getTok().is(K); }
143-
144-
/// Check if the current token has kind \p K.
145-
bool isNot(AsmToken::TokenKind K) const { return getTok().isNot(K); }
146-
147-
/// Set whether spaces should be ignored by the lexer
148-
void setSkipSpace(bool val) { SkipSpace = val; }
149-
150-
bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
151-
void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
152-
153-
void setAllowHashInIdentifier(bool V) { AllowHashInIdentifier = V; }
154-
155-
void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
156-
this->CommentConsumer = CommentConsumer;
157-
}
158-
159-
/// Set whether to lex masm-style binary (e.g., 0b1101) and radix-specified
160-
/// literals (e.g., 0ABCh [hex], 576t [decimal], 77o [octal], 1101y [binary]).
161-
void setLexMasmIntegers(bool V) { LexMasmIntegers = V; }
162-
163-
/// Set whether to use masm-style default-radix integer literals. If disabled,
164-
/// assume decimal unless prefixed (e.g., 0x2c [hex], 077 [octal]).
165-
void useMasmDefaultRadix(bool V) { UseMasmDefaultRadix = V; }
166-
167-
unsigned getMasmDefaultRadix() const { return DefaultRadix; }
168-
void setMasmDefaultRadix(unsigned Radix) { DefaultRadix = Radix; }
169-
170-
/// Set whether to lex masm-style hex float literals, such as 3f800000r.
171-
void setLexMasmHexFloats(bool V) { LexMasmHexFloats = V; }
172-
173-
/// Set whether to lex masm-style string literals, such as 'Can''t find file'
174-
/// and "This ""value"" not found".
175-
void setLexMasmStrings(bool V) { LexMasmStrings = V; }
176-
177-
/// Set whether to lex Motorola-style integer literals, such as $deadbeef or
178-
/// %01010110.
179-
void setLexMotorolaIntegers(bool V) { LexMotorolaIntegers = V; }
180-
181-
/// Set whether to lex HLASM-flavour integers. For now this is only [0-9]*
182-
void setLexHLASMIntegers(bool V) { LexHLASMIntegers = V; }
183-
184-
/// Set whether to "lex" HLASM-flavour character and string literals. For now,
185-
/// setting this option to true, will disable lexing for character and string
186-
/// literals.
187-
void setLexHLASMStrings(bool V) { LexHLASMStrings = V; }
188-
};
189-
190-
} // end namespace llvm
191-
192-
#endif // LLVM_MC_MCPARSER_MCASMLEXER_H
9+
#include "llvm/MC/MCParser/AsmLexer.h"

0 commit comments

Comments
 (0)