Skip to content

[analyzer][HTMLRewriter] Cache partial rewrite results. #80220

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions clang/include/clang/Rewrite/Core/HTMLRewrite.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ class RewriteBuffer;
class Preprocessor;

namespace html {
struct RelexRewriteCache;
using RelexRewriteCacheRef = std::shared_ptr<RelexRewriteCache>;

/// If you need to rewrite the same file multiple times, you can instantiate
/// a RelexRewriteCache and refer functions such as SyntaxHighlight()
/// and HighlightMacros() to it so that to avoid re-lexing the file each time.
/// The cache may outlive the rewriter as long as cached FileIDs and source
/// locations continue to make sense for the translation unit as a whole.
RelexRewriteCacheRef instantiateRelexRewriteCache();

/// HighlightRange - Highlight a range in the source code with the specified
/// start/end tags. B/E must be in the same file. This ensures that
Expand Down Expand Up @@ -67,13 +76,15 @@ namespace html {

/// SyntaxHighlight - Relex the specified FileID and annotate the HTML with
/// information about keywords, comments, etc.
void SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP);
void SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP,
RelexRewriteCacheRef Cache = nullptr);

/// HighlightMacros - This uses the macro table state from the end of the
/// file, to reexpand macros and insert (into the HTML) information about the
/// macro expansions. This won't be perfectly perfect, but it will be
/// reasonably close.
void HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP);
void HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP,
RelexRewriteCacheRef Cache = nullptr);

} // end html namespace
} // end clang namespace
Expand Down
135 changes: 116 additions & 19 deletions clang/lib/Rewrite/HTMLRewrite.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
#include <memory>
using namespace clang;

using namespace clang;
using namespace llvm;
using namespace html;

/// HighlightRange - Highlight a range in the source code with the specified
/// start/end tags. B/E must be in the same file. This ensures that
Expand Down Expand Up @@ -104,6 +106,32 @@ void html::HighlightRange(RewriteBuffer &RB, unsigned B, unsigned E,
}
}

namespace clang::html {
struct RelexRewriteCache {
// These structs mimic input arguments of HighlightRange().
struct Highlight {
SourceLocation B, E;
std::string StartTag, EndTag;
bool IsTokenRange;
};
struct RawHighlight {
unsigned B, E;
std::string StartTag, EndTag;
};

// SmallVector isn't appropriate because these vectors are almost never small.
using HighlightList = std::vector<Highlight>;
using RawHighlightList = std::vector<RawHighlight>;

DenseMap<FileID, RawHighlightList> SyntaxHighlights;
DenseMap<FileID, HighlightList> MacroHighlights;
};
} // namespace clang::html

html::RelexRewriteCacheRef html::instantiateRelexRewriteCache() {
return std::make_shared<RelexRewriteCache>();
}

void html::EscapeText(Rewriter &R, FileID FID,
bool EscapeSpaces, bool ReplaceTabs) {

Expand Down Expand Up @@ -442,13 +470,18 @@ input.spoilerhider:checked + label + .spoiler{
/// information about keywords, macro expansions etc. This uses the macro
/// table state from the end of the file, so it won't be perfectly perfect,
/// but it will be reasonably close.
void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
RewriteBuffer &RB = R.getEditBuffer(FID);
static void SyntaxHighlightImpl(
Rewriter &R, FileID FID, const Preprocessor &PP,
llvm::function_ref<void(RewriteBuffer &, unsigned, unsigned, const char *,
const char *, const char *)>
HighlightRangeCallback) {

RewriteBuffer &RB = R.getEditBuffer(FID);
const SourceManager &SM = PP.getSourceManager();
llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
const char *BufferStart = FromFile.getBuffer().data();

Lexer L(FID, FromFile, SM, PP.getLangOpts());
const char *BufferStart = L.getBuffer().data();

// Inform the preprocessor that we want to retain comments as tokens, so we
// can highlight them.
Expand All @@ -475,13 +508,13 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {

// If this is a pp-identifier, for a keyword, highlight it as such.
if (Tok.isNot(tok::identifier))
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
"<span class='keyword'>", "</span>");
HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
"<span class='keyword'>", "</span>");
break;
}
case tok::comment:
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
"<span class='comment'>", "</span>");
HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
"<span class='comment'>", "</span>");
break;
case tok::utf8_string_literal:
// Chop off the u part of u8 prefix
Expand All @@ -498,8 +531,8 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
[[fallthrough]];
case tok::string_literal:
// FIXME: Exclude the optional ud-suffix from the highlighted range.
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
"<span class='string_literal'>", "</span>");
HighlightRangeCallback(RB, TokOffs, TokOffs + TokLen, BufferStart,
"<span class='string_literal'>", "</span>");
break;
case tok::hash: {
// If this is a preprocessor directive, all tokens to end of line are too.
Expand All @@ -516,8 +549,8 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
}

// Find end of line. This is a hack.
HighlightRange(RB, TokOffs, TokEnd, BufferStart,
"<span class='directive'>", "</span>");
HighlightRangeCallback(RB, TokOffs, TokEnd, BufferStart,
"<span class='directive'>", "</span>");

// Don't skip the next token.
continue;
Expand All @@ -527,12 +560,43 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
L.LexFromRawLexer(Tok);
}
}
void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP,
RelexRewriteCacheRef Cache) {
RewriteBuffer &RB = R.getEditBuffer(FID);
const SourceManager &SM = PP.getSourceManager();
llvm::MemoryBufferRef FromFile = SM.getBufferOrFake(FID);
const char *BufferStart = FromFile.getBuffer().data();

if (Cache) {
auto CacheIt = Cache->SyntaxHighlights.find(FID);
if (CacheIt != Cache->SyntaxHighlights.end()) {
for (const RelexRewriteCache::RawHighlight &H : CacheIt->second) {
HighlightRange(RB, H.B, H.E, BufferStart, H.StartTag.data(),
H.EndTag.data());
}
return;
}
}

// "Every time you would call HighlightRange, cache the inputs as well."
auto HighlightRangeCallback = [&](RewriteBuffer &RB, unsigned B, unsigned E,
const char *BufferStart,
const char *StartTag, const char *EndTag) {
HighlightRange(RB, B, E, BufferStart, StartTag, EndTag);

if (Cache)
Cache->SyntaxHighlights[FID].push_back({B, E, StartTag, EndTag});
};

SyntaxHighlightImpl(R, FID, PP, HighlightRangeCallback);
}

static void HighlightMacrosImpl(
Rewriter &R, FileID FID, const Preprocessor &PP,
llvm::function_ref<void(Rewriter &, SourceLocation, SourceLocation,
const char *, const char *, bool)>
HighlightRangeCallback) {

/// HighlightMacros - This uses the macro table state from the end of the
/// file, to re-expand macros and insert (into the HTML) information about the
/// macro expansions. This won't be perfectly perfect, but it will be
/// reasonably close.
void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
// Re-lex the raw token stream into a token buffer.
const SourceManager &SM = PP.getSourceManager();
std::vector<Token> TokenStream;
Expand Down Expand Up @@ -659,11 +723,44 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
// get highlighted.
Expansion = "<span class='macro_popup'>" + Expansion + "</span></span>";

HighlightRange(R, LLoc.getBegin(), LLoc.getEnd(), "<span class='macro'>",
Expansion.c_str(), LLoc.isTokenRange());
HighlightRangeCallback(R, LLoc.getBegin(), LLoc.getEnd(),
"<span class='macro'>", Expansion.c_str(),
LLoc.isTokenRange());
}

// Restore the preprocessor's old state.
TmpPP.setDiagnostics(*OldDiags);
TmpPP.setPragmasEnabled(PragmasPreviouslyEnabled);
}

/// HighlightMacros - This uses the macro table state from the end of the
/// file, to re-expand macros and insert (into the HTML) information about the
/// macro expansions. This won't be perfectly perfect, but it will be
/// reasonably close.
void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor &PP,
RelexRewriteCacheRef Cache) {
if (Cache) {
auto CacheIt = Cache->MacroHighlights.find(FID);
if (CacheIt != Cache->MacroHighlights.end()) {
for (const RelexRewriteCache::Highlight &H : CacheIt->second) {
HighlightRange(R, H.B, H.E, H.StartTag.data(), H.EndTag.data(),
H.IsTokenRange);
}
return;
}
}

// "Every time you would call HighlightRange, cache the inputs as well."
auto HighlightRangeCallback = [&](Rewriter &R, SourceLocation B,
SourceLocation E, const char *StartTag,
const char *EndTag, bool isTokenRange) {
HighlightRange(R, B, E, StartTag, EndTag, isTokenRange);

if (Cache) {
Cache->MacroHighlights[FID].push_back(
{B, E, StartTag, EndTag, isTokenRange});
}
};

HighlightMacrosImpl(R, FID, PP, HighlightRangeCallback);
}
10 changes: 4 additions & 6 deletions clang/lib/StaticAnalyzer/Core/HTMLDiagnostics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ class HTMLDiagnostics : public PathDiagnosticConsumer {
const Preprocessor &PP;
const bool SupportsCrossFileDiagnostics;
llvm::StringSet<> EmittedHashes;
html::RelexRewriteCacheRef RewriterCache =
html::instantiateRelexRewriteCache();

public:
HTMLDiagnostics(PathDiagnosticConsumerOptions DiagOpts,
Expand Down Expand Up @@ -309,10 +311,6 @@ void HTMLDiagnostics::ReportDiag(const PathDiagnostic& D,
return;
}

// FIXME: This causes each file to be re-parsed and syntax-highlighted
// and macro-expanded separately for each report. We could cache such rewrites
// across all reports and only re-do the part that's actually different:
// the warning/note bubbles.
std::string report = GenerateHTML(D, R, SMgr, path, declName.c_str());
if (report.empty()) {
llvm::errs() << "warning: no diagnostics generated for main file.\n";
Expand Down Expand Up @@ -882,8 +880,8 @@ void HTMLDiagnostics::RewriteFile(Rewriter &R, const PathPieces &path,
// If we have a preprocessor, relex the file and syntax highlight.
// We might not have a preprocessor if we come from a deserialized AST file,
// for example.
html::SyntaxHighlight(R, FID, PP);
html::HighlightMacros(R, FID, PP);
html::SyntaxHighlight(R, FID, PP, RewriterCache);
html::HighlightMacros(R, FID, PP, RewriterCache);
}

void HTMLDiagnostics::HandlePiece(Rewriter &R, FileID BugFileID,
Expand Down
29 changes: 29 additions & 0 deletions clang/test/Analysis/html_diagnostics/counter.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// RUN: rm -fR %t
// RUN: mkdir %t
// RUN: %clang_analyze_cc1 -analyzer-checker=core \
// RUN: -analyzer-output=html -o %t -verify %s
// RUN: grep -v CHECK %t/report-*.html | FileCheck %s


void foo() {
int *x = 0;
*x = __COUNTER__; // expected-warning{{Dereference of null pointer (loaded from variable 'x')}}
}

void bar() {
int *y;
*y = __COUNTER__; // expected-warning{{Dereference of undefined pointer value (loaded from variable 'y')}}
}

// The checks below confirm that both reports have the same values for __COUNTER__.
//
// FIXME: The correct values are (0, 1, 0, 1). Because we re-lex the file in order
// to detect macro expansions for HTML report purposes, they turn into (2, 3, 2, 3)
// by the time we emit HTML. But at least it's better than (2, 3, 4, 5)
// which would have been the case if we re-lexed the file *each* time we
// emitted an HTML report.

// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>2</span></span>
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>3</span></span>
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>2</span></span>
// CHECK: <span class='macro'>__COUNTER__<span class='macro_popup'>3</span></span>