@@ -237,6 +237,45 @@ llvm::Optional<Range> getTokenRange(const SourceManager &SM,
237
237
return halfOpenToRange (SM, CharSourceRange::getCharRange (TokLoc, End));
238
238
}
239
239
240
+ namespace {
241
+
242
+ enum TokenFlavor { Identifier, Operator, Whitespace, Other };
243
+
244
+ bool isOverloadedOperator (const Token &Tok) {
245
+ switch (Tok.getKind ()) {
246
+ #define OVERLOADED_OPERATOR (Name, Spelling, Token, Unary, Binary, MemOnly ) \
247
+ case tok::Token:
248
+ #define OVERLOADED_OPERATOR_MULTI (Name, Spelling, Unary, Binary, MemOnly )
249
+ #include " clang/Basic/OperatorKinds.def"
250
+ return true ;
251
+
252
+ default :
253
+ break ;
254
+ }
255
+ return false ;
256
+ }
257
+
258
+ TokenFlavor getTokenFlavor (SourceLocation Loc, const SourceManager &SM,
259
+ const LangOptions &LangOpts) {
260
+ Token Tok;
261
+ Tok.setKind (tok::NUM_TOKENS);
262
+ if (Lexer::getRawToken (Loc, Tok, SM, LangOpts,
263
+ /* IgnoreWhiteSpace*/ false ))
264
+ return Other;
265
+
266
+ // getRawToken will return false without setting Tok when the token is
267
+ // whitespace, so if the flag is not set, we are sure this is a whitespace.
268
+ if (Tok.is (tok::TokenKind::NUM_TOKENS))
269
+ return Whitespace;
270
+ if (Tok.is (tok::TokenKind::raw_identifier))
271
+ return Identifier;
272
+ if (isOverloadedOperator (Tok))
273
+ return Operator;
274
+ return Other;
275
+ }
276
+
277
+ } // namespace
278
+
240
279
SourceLocation getBeginningOfIdentifier (const Position &Pos,
241
280
const SourceManager &SM,
242
281
const LangOptions &LangOpts) {
@@ -247,27 +286,57 @@ SourceLocation getBeginningOfIdentifier(const Position &Pos,
247
286
return SourceLocation ();
248
287
}
249
288
250
- // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing
251
- // if the cursor is at the end of the identifier.
252
- // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are:
253
- // 1) at the beginning of an identifier, we'll be looking at something
254
- // that isn't an identifier.
255
- // 2) at the middle or end of an identifier, we get the identifier.
256
- // 3) anywhere outside an identifier, we'll get some non-identifier thing.
257
- // We can't actually distinguish cases 1 and 3, but returning the original
258
- // location is correct for both!
289
+ // GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong
290
+ // thing if the cursor is at the end of the token (identifier or operator).
291
+ // The cases are:
292
+ // 1) at the beginning of the token
293
+ // 2) at the middle of the token
294
+ // 3) at the end of the token
295
+ // 4) anywhere outside the identifier or operator
296
+ // To distinguish all cases, we lex both at the
297
+ // GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for
298
+ // cases 1 and 4, we just return the original location.
259
299
SourceLocation InputLoc = SM.getComposedLoc (FID, *Offset);
260
- if (*Offset == 0 ) // Case 1 or 3 .
300
+ if (*Offset == 0 ) // Case 1 or 4 .
261
301
return InputLoc;
262
302
SourceLocation Before = SM.getComposedLoc (FID, *Offset - 1 );
303
+ SourceLocation BeforeTokBeginning =
304
+ Lexer::GetBeginningOfToken (Before, SM, LangOpts);
305
+ TokenFlavor BeforeKind = getTokenFlavor (BeforeTokBeginning, SM, LangOpts);
306
+
307
+ SourceLocation CurrentTokBeginning =
308
+ Lexer::GetBeginningOfToken (InputLoc, SM, LangOpts);
309
+ TokenFlavor CurrentKind = getTokenFlavor (CurrentTokBeginning, SM, LangOpts);
310
+
311
+ // At the middle of the token.
312
+ if (BeforeTokBeginning == CurrentTokBeginning) {
313
+ // For interesting token, we return the beginning of the token.
314
+ if (CurrentKind == Identifier || CurrentKind == Operator)
315
+ return CurrentTokBeginning;
316
+ // otherwise, we return the original loc.
317
+ return InputLoc;
318
+ }
263
319
264
- Before = Lexer::GetBeginningOfToken (Before, SM, LangOpts);
265
- Token Tok;
266
- if (Before.isValid () &&
267
- !Lexer::getRawToken (Before, Tok, SM, LangOpts, false ) &&
268
- Tok.is (tok::raw_identifier))
269
- return Before; // Case 2.
270
- return InputLoc; // Case 1 or 3.
320
+ // Whitespace is not interesting.
321
+ if (BeforeKind == Whitespace)
322
+ return CurrentTokBeginning;
323
+ if (CurrentKind == Whitespace)
324
+ return BeforeTokBeginning;
325
+
326
+ // The cursor is at the token boundary, e.g. "Before^Current", we prefer
327
+ // identifiers to other tokens.
328
+ if (CurrentKind == Identifier)
329
+ return CurrentTokBeginning;
330
+ if (BeforeKind == Identifier)
331
+ return BeforeTokBeginning;
332
+ // Then prefer overloaded operators to other tokens.
333
+ if (CurrentKind == Operator)
334
+ return CurrentTokBeginning;
335
+ if (BeforeKind == Operator)
336
+ return BeforeTokBeginning;
337
+
338
+ // Non-interesting case, we just return the original location.
339
+ return InputLoc;
271
340
}
272
341
273
342
bool isValidFileRange (const SourceManager &Mgr, SourceRange R) {
0 commit comments