Skip to content

Commit 9f2bf66

Browse files
committed
[clangd] Implement getBeginning for overloaded operators.
Summary: This will fix some bugs where navigation doesn't work on cases like `std::cout <^< "hello"`. Reviewers: ilya-biryukov Subscribers: MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits Tags: #clang Differential Revision: https://reviews.llvm.org/D67695 llvm-svn: 373323
1 parent 339b1b5 commit 9f2bf66

File tree

4 files changed

+113
-20
lines changed

4 files changed

+113
-20
lines changed

clang-tools-extra/clangd/SourceCode.cpp

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,45 @@ llvm::Optional<Range> getTokenRange(const SourceManager &SM,
237237
return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End));
238238
}
239239

240+
namespace {
241+
242+
enum TokenFlavor { Identifier, Operator, Whitespace, Other };
243+
244+
bool isOverloadedOperator(const Token &Tok) {
245+
switch (Tok.getKind()) {
246+
#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly) \
247+
case tok::Token:
248+
#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly)
249+
#include "clang/Basic/OperatorKinds.def"
250+
return true;
251+
252+
default:
253+
break;
254+
}
255+
return false;
256+
}
257+
258+
TokenFlavor getTokenFlavor(SourceLocation Loc, const SourceManager &SM,
259+
const LangOptions &LangOpts) {
260+
Token Tok;
261+
Tok.setKind(tok::NUM_TOKENS);
262+
if (Lexer::getRawToken(Loc, Tok, SM, LangOpts,
263+
/*IgnoreWhiteSpace*/ false))
264+
return Other;
265+
266+
// getRawToken will return false without setting Tok when the token is
267+
// whitespace, so if the flag is not set, we are sure this is a whitespace.
268+
if (Tok.is(tok::TokenKind::NUM_TOKENS))
269+
return Whitespace;
270+
if (Tok.is(tok::TokenKind::raw_identifier))
271+
return Identifier;
272+
if (isOverloadedOperator(Tok))
273+
return Operator;
274+
return Other;
275+
}
276+
277+
} // namespace
278+
240279
SourceLocation getBeginningOfIdentifier(const Position &Pos,
241280
const SourceManager &SM,
242281
const LangOptions &LangOpts) {
@@ -247,27 +286,57 @@ SourceLocation getBeginningOfIdentifier(const Position &Pos,
247286
return SourceLocation();
248287
}
249288

250-
// GetBeginningOfToken(pos) is almost what we want, but does the wrong thing
251-
// if the cursor is at the end of the identifier.
252-
// Instead, we lex at GetBeginningOfToken(pos - 1). The cases are:
253-
// 1) at the beginning of an identifier, we'll be looking at something
254-
// that isn't an identifier.
255-
// 2) at the middle or end of an identifier, we get the identifier.
256-
// 3) anywhere outside an identifier, we'll get some non-identifier thing.
257-
// We can't actually distinguish cases 1 and 3, but returning the original
258-
// location is correct for both!
289+
// GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong
290+
// thing if the cursor is at the end of the token (identifier or operator).
291+
// The cases are:
292+
// 1) at the beginning of the token
293+
// 2) at the middle of the token
294+
// 3) at the end of the token
295+
// 4) anywhere outside the identifier or operator
296+
// To distinguish all cases, we lex both at the
297+
// GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for
298+
// cases 1 and 4, we just return the original location.
259299
SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset);
260-
if (*Offset == 0) // Case 1 or 3.
300+
if (*Offset == 0) // Case 1 or 4.
261301
return InputLoc;
262302
SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1);
303+
SourceLocation BeforeTokBeginning =
304+
Lexer::GetBeginningOfToken(Before, SM, LangOpts);
305+
TokenFlavor BeforeKind = getTokenFlavor(BeforeTokBeginning, SM, LangOpts);
306+
307+
SourceLocation CurrentTokBeginning =
308+
Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts);
309+
TokenFlavor CurrentKind = getTokenFlavor(CurrentTokBeginning, SM, LangOpts);
310+
311+
// At the middle of the token.
312+
if (BeforeTokBeginning == CurrentTokBeginning) {
313+
// For interesting token, we return the beginning of the token.
314+
if (CurrentKind == Identifier || CurrentKind == Operator)
315+
return CurrentTokBeginning;
316+
// otherwise, we return the original loc.
317+
return InputLoc;
318+
}
263319

264-
Before = Lexer::GetBeginningOfToken(Before, SM, LangOpts);
265-
Token Tok;
266-
if (Before.isValid() &&
267-
!Lexer::getRawToken(Before, Tok, SM, LangOpts, false) &&
268-
Tok.is(tok::raw_identifier))
269-
return Before; // Case 2.
270-
return InputLoc; // Case 1 or 3.
320+
// Whitespace is not interesting.
321+
if (BeforeKind == Whitespace)
322+
return CurrentTokBeginning;
323+
if (CurrentKind == Whitespace)
324+
return BeforeTokBeginning;
325+
326+
// The cursor is at the token boundary, e.g. "Before^Current", we prefer
327+
// identifiers to other tokens.
328+
if (CurrentKind == Identifier)
329+
return CurrentTokBeginning;
330+
if (BeforeKind == Identifier)
331+
return BeforeTokBeginning;
332+
// Then prefer overloaded operators to other tokens.
333+
if (CurrentKind == Operator)
334+
return CurrentTokBeginning;
335+
if (BeforeKind == Operator)
336+
return BeforeTokBeginning;
337+
338+
// Non-interesting case, we just return the original location.
339+
return InputLoc;
271340
}
272341

273342
bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {

clang-tools-extra/clangd/SourceCode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM,
7979
Position P);
8080

8181
/// Get the beginning SourceLocation at a specified \p Pos in the main file.
82-
/// May be invalid if Pos is, or if there's no identifier.
82+
/// May be invalid if Pos is, or if there's no identifier or operators.
8383
/// The returned position is in the main file, callers may prefer to
8484
/// obtain the macro expansion location.
8585
SourceLocation getBeginningOfIdentifier(const Position &Pos,

clang-tools-extra/clangd/unittests/SourceCodeTests.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -319,14 +319,29 @@ struct Bar { int func(); };
319319
Bar* bar;
320320
)cpp";
321321
// First ^ is the expected beginning, last is the search position.
322-
for (std::string Text : std::vector<std::string>{
322+
for (const std::string &Text : std::vector<std::string>{
323323
"int ^f^oo();", // inside identifier
324324
"int ^foo();", // beginning of identifier
325325
"int ^foo^();", // end of identifier
326326
"int foo(^);", // non-identifier
327327
"^int foo();", // beginning of file (can't back up)
328328
"int ^f0^0();", // after a digit (lexing at N-1 is wrong)
329-
"int ^λλ^λ();", // UTF-8 handled properly when backing up
329+
"/^/ comments", // non-interesting token
330+
"void f(int abc) { abc ^ ++; }", // whitespace
331+
"void f(int abc) { ^abc^++; }", // range of identifier
332+
"void f(int abc) { ++^abc^; }", // range of identifier
333+
"void f(int abc) { ++^abc; }", // range of identifier
334+
"void f(int abc) { ^+^+abc; }", // range of operator
335+
"void f(int abc) { ^abc^ ++; }", // range of identifier
336+
"void f(int abc) { abc ^++^; }", // range of operator
337+
"void f(int abc) { ^++^ abc; }", // range of operator
338+
"void f(int abc) { ++ ^abc^; }", // range of identifier
339+
"void f(int abc) { ^++^/**/abc; }", // range of operator
340+
"void f(int abc) { ++/**/^abc; }", // range of identifier
341+
"void f(int abc) { ^abc^/**/++; }", // range of identifier
342+
"void f(int abc) { abc/**/^++; }", // range of operator
343+
"void f() {^ }", // outside of identifier and operator
344+
"int ^λλ^λ();", // UTF-8 handled properly when backing up
330345

331346
// identifier in macro arg
332347
"MACRO(bar->^func())", // beginning of identifier

clang-tools-extra/clangd/unittests/XRefsTests.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,15 @@ TEST(LocateSymbol, All) {
441441
auto x = m^akeX();
442442
}
443443
)cpp",
444+
445+
R"cpp(
446+
struct X {
447+
X& [[operator]]++() {}
448+
};
449+
void foo(X& x) {
450+
+^+x;
451+
}
452+
)cpp",
444453
};
445454
for (const char *Test : Tests) {
446455
Annotations T(Test);

0 commit comments

Comments
 (0)