Skip to content

Commit 243bfed

Browse files
authored
[analyzer][HTMLRewriter] Cache partial rewrite results. (#80220)
This is a follow-up for 721dd3b [analyzer] NFC: Don't regenerate duplicate HTML reports. Because HTMLRewriter re-runs the Lexer for syntax highlighting and macro expansion purposes, it may get fairly expensive when the rewriter is invoked multiple times on the same file. In the static analyzer (which uses HTMLRewriter for HTML output mode) we only get away with this because there are usually very few reports emitted per file. But if loud checkers are enabled, such as `webkit.*`, this may explode in complexity and even cause the compiler to run over the 32-bit SourceLocation addressing limit. This patch caches intermediate results so that re-lexing only needed to happen once. As the clever __COUNTER__ test demonstrates, "once" is still too many. Ideally we shouldn't re-lex anything at all, which remains a TODO.
1 parent fe408eb commit 243bfed

File tree

4 files changed

+162
-27
lines changed

4 files changed

+162
-27
lines changed

clang/include/clang/Rewrite/Core/HTMLRewrite.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,15 @@ class RewriteBuffer;
2424
class Preprocessor;
2525

2626
namespace html {
27+
struct RelexRewriteCache;
28+
using RelexRewriteCacheRef = std::shared_ptr<RelexRewriteCache>;
29+
30+
/// If you need to rewrite the same file multiple times, you can instantiate
31+
/// a RelexRewriteCache and refer functions such as SyntaxHighlight()
32+
/// and HighlightMacros() to it so that to avoid re-lexing the file each time.
33+
/// The cache may outlive the rewriter as long as cached FileIDs and source
34+
/// locations continue to make sense for the translation unit as a whole.
35+
RelexRewriteCacheRef instantiateRelexRewriteCache();
2736

2837
/// HighlightRange - Highlight a range in the source code with the specified
2938
/// start/end tags. B/E must be in the same file. This ensures that
@@ -67,13 +76,15 @@ namespace html {
6776

6877
/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
6978
/// information about keywords, comments, etc.
70-
void SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP);
79+
void SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP,
80+
RelexRewriteCacheRef Cache = nullptr);
7181

7282
/// HighlightMacros - This uses the macro table state from the end of the
7383
/// file, to reexpand macros and insert (into the HTML) information about the
7484
/// macro expansions. This won't be perfectly perfect, but it will be
7585
/// reasonably close.
76-
void HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP);
86+
void HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP,
87+
RelexRewriteCacheRef Cache = nullptr);
7788

7889
} // end html namespace
7990
} // end clang namespace

clang/lib/Rewrite/HTMLRewrite.cpp

Lines changed: 116 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
#include "llvm/Support/MemoryBuffer.h"
2222
#include "llvm/Support/raw_ostream.h"
2323
#include <memory>
24-
using namespace clang;
2524

25+
using namespace clang;
26+
using namespace llvm;
27+
using namespace html;
2628

2729
/// HighlightRange - Highlight a range in the source code with the specified
2830
/// start/end tags. B/E must be in the same file. This ensures that
@@ -104,6 +106,32 @@ void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
104106
}
105107
}
106108

109+
namespace clang::html {
110+
struct RelexRewriteCache {
111+
// These structs mimic input arguments of HighlightRange().
112+
struct Highlight {
113+
SourceLocation B, E;
114+
std::string StartTag, EndTag;
115+
bool IsTokenRange;
116+
};
117+
struct RawHighlight {
118+
unsigned B, E;
119+
std::string StartTag, EndTag;
120+
};
121+
122+
// SmallVector isn't appropriate because these vectors are almost never small.
123+
using HighlightList = std::vector<Highlight>;
124+
using RawHighlightList = std::vector<RawHighlight>;
125+
126+
DenseMap<FileID, RawHighlightList> SyntaxHighlights;
127+
DenseMap<FileID, HighlightList> MacroHighlights;
128+
};
129+
} // namespace clang::html
130+
131+
html::RelexRewriteCacheRef html::instantiateRelexRewriteCache() {
132+
return std::make_shared<RelexRewriteCache>();
133+
}
134+
107135
void html::EscapeText(Rewriter &R, FileID FID,
108136
bool EscapeSpaces, bool ReplaceTabs) {
109137

@@ -442,13 +470,18 @@ input.spoilerhider:checked + label + .spoiler{
442470
/// information about keywords, macro expansions etc. This uses the macro
443471
/// table state from the end of the file, so it won't be perfectly perfect,
444472
/// but it will be reasonably close.
445-
void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
446-
RewriteBuffer &RB = R.getEditBuffer(FID);
473+
static void SyntaxHighlightImpl(
474+
Rewriter &R, FileID FID, const Preprocessor &PP,
475+
llvm::function_ref<void(RewriteBuffer &, unsigned, unsigned, const char *,
476+
const char *, const char *)>
477+
HighlightRangeCallback) {
447478

479+
RewriteBuffer &RB = R.getEditBuffer(FID);
448480
const SourceManager &SM = PP.getSourceManager();
449481
llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
482+
const char *BufferStart = FromFile.getBuffer().data();
483+
450484
Lexer L(FID, FromFile, SM, PP.getLangOpts());
451-
const char *BufferStart = L.getBuffer().data();
452485

453486
// Inform the preprocessor that we want to retain comments as tokens, so we
454487
// can highlight them.
@@ -475,13 +508,13 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
475508

476509
// If this is a pp-identifier, for a keyword, highlight it as such.
477510
if (Tok.isNot(tok::identifier))
478-
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
479-
"<span class='keyword'>", "</span>");
511+
HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
512+
"<span class='keyword'>", "</span>");
480513
break;
481514
}
482515
case tok::comment:
483-
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
484-
"<span class='comment'>", "</span>");
516+
HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
517+
"<span class='comment'>", "</span>");
485518
break;
486519
case tok::utf8_string_literal:
487520
// Chop off the u part of u8 prefix
@@ -498,8 +531,8 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
498531
[[fallthrough]];
499532
case tok::string_literal:
500533
// FIXME: Exclude the optional ud-suffix from the highlighted range.
501-
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
502-
"<span class='string_literal'>", "</span>");
534+
HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
535+
"<span class='string_literal'>", "</span>");
503536
break;
504537
case tok::hash: {
505538
// If this is a preprocessor directive, all tokens to end of line are too.
@@ -516,8 +549,8 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
516549
}
517550

518551
// Find end of line. This is a hack.
519-
HighlightRange(RB, TokOffs, TokEnd, BufferStart,
520-
"<span class='directive'>", "</span>");
552+
HighlightRangeCallback(RB, TokOffs, TokEnd, BufferStart,
553+
"<span class='directive'>", "</span>");
521554

522555
// Don't skip the next token.
523556
continue;
@@ -527,12 +560,43 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
527560
L.LexFromRawLexer(Tok);
528561
}
529562
}
563+
void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP,
564+
RelexRewriteCacheRef Cache) {
565+
RewriteBuffer &RB = R.getEditBuffer(FID);
566+
const SourceManager &SM = PP.getSourceManager();
567+
llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
568+
const char *BufferStart = FromFile.getBuffer().data();
569+
570+
if (Cache) {
571+
auto CacheIt = Cache->SyntaxHighlights.find(FID);
572+
if (CacheIt != Cache->SyntaxHighlights.end()) {
573+
for (const RelexRewriteCache::RawHighlight &H : CacheIt->second) {
574+
HighlightRange(RB, H.B, H.E, BufferStart, H.StartTag.data(),
575+
H.EndTag.data());
576+
}
577+
return;
578+
}
579+
}
580+
581+
// "Every time you would call HighlightRange, cache the inputs as well."
582+
auto HighlightRangeCallback = [&](RewriteBuffer &RB, unsigned B, unsigned E,
583+
const char *BufferStart,
584+
const char *StartTag, const char *EndTag) {
585+
HighlightRange(RB, B, E, BufferStart, StartTag, EndTag);
586+
587+
if (Cache)
588+
Cache->SyntaxHighlights[FID].push_back({B, E, StartTag, EndTag});
589+
};
590+
591+
SyntaxHighlightImpl(R, FID, PP, HighlightRangeCallback);
592+
}
593+
594+
static void HighlightMacrosImpl(
595+
Rewriter &R, FileID FID, const Preprocessor &PP,
596+
llvm::function_ref<void(Rewriter &, SourceLocation, SourceLocation,
597+
const char *, const char *, bool)>
598+
HighlightRangeCallback) {
530599

531-
/// HighlightMacros - This uses the macro table state from the end of the
532-
/// file, to re-expand macros and insert (into the HTML) information about the
533-
/// macro expansions. This won't be perfectly perfect, but it will be
534-
/// reasonably close.
535-
void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
536600
// Re-lex the raw token stream into a token buffer.
537601
const SourceManager &SM = PP.getSourceManager();
538602
std::vector<Token> TokenStream;
@@ -659,11 +723,44 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
659723
// get highlighted.
660724
Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>";
661725

662-
HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "<span class='macro'>",
663-
Expansion.c_str(), LLoc.isTokenRange());
726+
HighlightRangeCallback(R, LLoc.getBegin(), LLoc.getEnd(),
727+
"<span class='macro'>", Expansion.c_str(),
728+
LLoc.isTokenRange());
664729
}
665730

666731
// Restore the preprocessor's old state.
667732
TmpPP.setDiagnostics(*OldDiags);
668733
TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);
669734
}
735+
736+
/// HighlightMacros - This uses the macro table state from the end of the
737+
/// file, to re-expand macros and insert (into the HTML) information about the
738+
/// macro expansions. This won't be perfectly perfect, but it will be
739+
/// reasonably close.
740+
void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP,
741+
RelexRewriteCacheRef Cache) {
742+
if (Cache) {
743+
auto CacheIt = Cache->MacroHighlights.find(FID);
744+
if (CacheIt != Cache->MacroHighlights.end()) {
745+
for (const RelexRewriteCache::Highlight &H : CacheIt->second) {
746+
HighlightRange(R, H.B, H.E, H.StartTag.data(), H.EndTag.data(),
747+
H.IsTokenRange);
748+
}
749+
return;
750+
}
751+
}
752+
753+
// "Every time you would call HighlightRange, cache the inputs as well."
754+
auto HighlightRangeCallback = [&](Rewriter &R, SourceLocation B,
755+
SourceLocation E, const char *StartTag,
756+
const char *EndTag, bool isTokenRange) {
757+
HighlightRange(R, B, E, StartTag, EndTag, isTokenRange);
758+
759+
if (Cache) {
760+
Cache->MacroHighlights[FID].push_back(
761+
{B, E, StartTag, EndTag, isTokenRange});
762+
}
763+
};
764+
765+
HighlightMacrosImpl(R, FID, PP, HighlightRangeCallback);
766+
}

clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ class HTMLDiagnostics : public PathDiagnosticConsumer {
6969
const Preprocessor &PP;
7070
const bool SupportsCrossFileDiagnostics;
7171
llvm::StringSet<> EmittedHashes;
72+
html::RelexRewriteCacheRef RewriterCache =
73+
html::instantiateRelexRewriteCache();
7274

7375
public:
7476
HTMLDiagnostics(PathDiagnosticConsumerOptions DiagOpts,
@@ -309,10 +311,6 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
309311
return;
310312
}
311313

312-
// FIXME: This causes each file to be re-parsed and syntax-highlighted
313-
// and macro-expanded separately for each report. We could cache such rewrites
314-
// across all reports and only re-do the part that's actually different:
315-
// the warning/note bubbles.
316314
std::string report = GenerateHTML(D, R, SMgr, path, declName.c_str());
317315
if (report.empty()) {
318316
llvm::errs() << "warning: no diagnostics generated for main file.\n";
@@ -882,8 +880,8 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R, const PathPieces &path,
882880
// If we have a preprocessor, relex the file and syntax highlight.
883881
// We might not have a preprocessor if we come from a deserialized AST file,
884882
// for example.
885-
html::SyntaxHighlight(R, FID, PP);
886-
html::HighlightMacros(R, FID, PP);
883+
html::SyntaxHighlight(R, FID, PP, RewriterCache);
884+
html::HighlightMacros(R, FID, PP, RewriterCache);
887885
}
888886

889887
void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID,
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// RUN: rm -fR %t
2+
// RUN: mkdir %t
3+
// RUN: %clang_analyze_cc1 -analyzer-checker=core \
4+
// RUN: -analyzer-output=html -o %t -verify %s
5+
// RUN: grep -v CHECK %t/report-*.html | FileCheck %s
6+
7+
8+
void foo() {
9+
int *x = 0;
10+
*x = __COUNTER__; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
11+
}
12+
13+
void bar() {
14+
int *y;
15+
*y = __COUNTER__; // expected-warning{{Dereference of undefined pointer value (loaded from variable 'y')}}
16+
}
17+
18+
// The checks below confirm that both reports have the same values for __COUNTER__.
19+
//
20+
// FIXME: The correct values are (0, 1, 0, 1). Because we re-lex the file in order
21+
// to detect macro expansions for HTML report purposes, they turn into (2, 3, 2, 3)
22+
// by the time we emit HTML. But at least it's better than (2, 3, 4, 5)
23+
// which would have been the case if we re-lexed the file *each* time we
24+
// emitted an HTML report.
25+
26+
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>2</span></span>
27+
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>3</span></span>
28+
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>2</span></span>
29+
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>3</span></span>

0 commit comments

Comments
 (0)