Skip to content

[demangle] Represent a char array initializer as a string literal. #109021

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 115 additions & 1 deletion libcxxabi/src/demangle/ItaniumDemangle.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ template <class T, size_t N> class PODSmallVector {
}
};

class NodeArray;

// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
Expand Down Expand Up @@ -293,6 +295,13 @@ class Node {
// implementation.
virtual void printRight(OutputBuffer &) const {}

// Print an initializer list of this type. Returns true if we printed a custom
// representation, false if nothing has been printed and the default
// representation should be used.
virtual bool printInitListAsType(OutputBuffer &, const NodeArray &) const {
return false;
}

virtual std::string_view getBaseName() const { return {}; }

// Silence compiler warnings, this dtor will never be called.
Expand Down Expand Up @@ -339,6 +348,10 @@ class NodeArray {
FirstElement = false;
}
}

// Print an array of integer literals as a string literal. Returns whether we
// could do so.
bool printAsString(OutputBuffer &OB) const;
};

struct NodeArrayNode : Node {
Expand Down Expand Up @@ -796,6 +809,15 @@ class ArrayType final : public Node {
OB += "]";
Base->printRight(OB);
}

bool printInitListAsType(OutputBuffer &OB,
const NodeArray &Elements) const override {
if (Base->getKind() == KNameType &&
static_cast<const NameType *>(Base)->getName() == "char") {
return Elements.printAsString(OB);
}
return false;
}
};

class FunctionType final : public Node {
Expand Down Expand Up @@ -2225,8 +2247,11 @@ class InitListExpr : public Node {
template<typename Fn> void match(Fn F) const { F(Ty, Inits); }

void printLeft(OutputBuffer &OB) const override {
if (Ty)
if (Ty) {
if (Ty->printInitListAsType(OB, Inits))
return;
Ty->print(OB);
}
OB += '{';
Inits.printWithComma(OB);
OB += '}';
Expand Down Expand Up @@ -2433,6 +2458,8 @@ class IntegerLiteral : public Node {
if (Type.size() <= 3)
OB += Type;
}

std::string_view value() const { return Value; }
};

class RequiresExpr : public Node {
Expand Down Expand Up @@ -2604,6 +2631,93 @@ template<typename NodeT> struct NodeKind;
};
#include "ItaniumNodes.def"

inline bool NodeArray::printAsString(OutputBuffer &OB) const {
auto Fail = [&OB, StartPos = OB.getCurrentPosition()] {
OB.setCurrentPosition(StartPos);
return false;
};

OB += '"';
bool LastWasNumericEscape = false;
for (const Node *Element : *this) {
if (Element->getKind() != Node::KIntegerLiteral)
return Fail();
int integer_value = 0;
for (char c : static_cast<const IntegerLiteral *>(Element)->value()) {
if (c < '0' || c > '9' || integer_value > 25)
return Fail();
integer_value *= 10;
integer_value += c - '0';
}
if (integer_value > 255)
return Fail();

// Insert a `""` to avoid accidentally extending a numeric escape.
if (LastWasNumericEscape) {
if ((integer_value >= '0' && integer_value <= '9') ||
(integer_value >= 'a' && integer_value <= 'f') ||
(integer_value >= 'A' && integer_value <= 'F')) {
OB += "\"\"";
}
}

LastWasNumericEscape = false;

// Determine how to print this character.
switch (integer_value) {
case '\a':
OB += "\\a";
break;
case '\b':
OB += "\\b";
break;
case '\f':
OB += "\\f";
break;
case '\n':
OB += "\\n";
break;
case '\r':
OB += "\\r";
break;
case '\t':
OB += "\\t";
break;
case '\v':
OB += "\\v";
break;

case '"':
OB += "\\\"";
break;
case '\\':
OB += "\\\\";
break;

default:
// We assume that the character is ASCII, and use a numeric escape for all
// remaining non-printable ASCII characters.
if (integer_value < 32 || integer_value == 127) {
constexpr char Hex[] = "0123456789ABCDEF";
OB += '\\';
if (integer_value > 7)
OB += 'x';
if (integer_value >= 16)
OB += Hex[integer_value >> 4];
OB += Hex[integer_value & 0xF];
LastWasNumericEscape = true;
break;
}

// Assume all remaining characters are directly printable.
OB += (char)integer_value;
break;
}
}
OB += '"';
return true;
}

template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
const char *Last;
Expand Down
27 changes: 26 additions & 1 deletion libcxxabi/test/test_demangle.pass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30037,7 +30037,32 @@ const char* cases[][2] =
// FIXME: This is not valid pointer-to-member syntax.
{"_Z1fIXtl1DmcM7DerivedKiadL_ZN11MoreDerived1zEEn8EEEEvv", "void f<D{(int const Derived::*)(&MoreDerived::z)}>()"},
{"_Z1fIXtl1Edi1nLi42EEEEvv", "void f<E{.n = 42}>()"},
{"_ZTAXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEE", "template parameter object for S{char [32]{(char)104, (char)101, (char)108, (char)108, (char)111, (char)32, (char)119, (char)111, (char)114, (char)108, (char)100}}"},
// Arrays of char are formatted as string literals. Escape sequences are
// used for non-printable ASCII characters.
// FIXME: We should do the same for arrays of charN_t and wchar_t.
{"_ZTAXtl1StlA32_cLc104ELc101ELc108ELc108ELc111ELc32ELc119ELc111ELc114ELc108ELc100EEEE", "template parameter object for S{\"hello world\"}"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc108ELc108ELc111EEEEEvv", "void f<Hello{\"Hello\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc108ELc111EEEEEvv", "void f<Hello{\"Helo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc0ELc108ELc111EEEEEvv", "void f<Hello{\"He\\0lo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc1ELc108ELc111EEEEEvv", "void f<Hello{\"He\\1lo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc6ELc108ELc111EEEEEvv", "void f<Hello{\"He\\6lo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc7ELc108ELc111EEEEEvv", "void f<Hello{\"He\\alo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc8ELc108ELc111EEEEEvv", "void f<Hello{\"He\\blo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc9ELc108ELc111EEEEEvv", "void f<Hello{\"He\\tlo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc10ELc108ELc111EEEEEvv", "void f<Hello{\"He\\nlo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc11ELc108ELc111EEEEEvv", "void f<Hello{\"He\\vlo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc12ELc108ELc111EEEEEvv", "void f<Hello{\"He\\flo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc13ELc108ELc111EEEEEvv", "void f<Hello{\"He\\rlo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc14ELc108ELc111EEEEEvv", "void f<Hello{\"He\\xElo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc15ELc108ELc111EEEEEvv", "void f<Hello{\"He\\xFlo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc16ELc108ELc111EEEEEvv", "void f<Hello{\"He\\x10lo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc34ELc108ELc111EEEEEvv", "void f<Hello{\"He\\\"lo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc72ELc101ELc92ELc108ELc111EEEEEvv", "void f<Hello{\"He\\\\lo\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc15ELc101ELc108ELc108ELc111EEEEEvv", "void f<Hello{\"\\xF\"\"ello\"}>()"},
{"_Z1fIXtl5HellotlA6_cLc240ELc159ELc152ELc138ELc33EEEEEvv", "void f<Hello{\"😊!\"}>()"},
// Even non-null-terminated strings get this treatment, even though this
// isn't valid C++ syntax to initialize an array of char.
{"_Z1fIXtl5HellotlA5_cLc72ELc101ELc108ELc108ELc111EEEEEvv", "void f<Hello{\"Hello\"}>()"},

// FIXME: This is wrong; the S2_ backref should expand to OT_ and then to
// "double&&". But we can't cope with a substitution that represents a
Expand Down
116 changes: 115 additions & 1 deletion llvm/include/llvm/Demangle/ItaniumDemangle.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ template <class T, size_t N> class PODSmallVector {
}
};

class NodeArray;

// Base class of all AST nodes. The AST is built by the parser, then is
// traversed by the printLeft/Right functions to produce a demangled string.
class Node {
Expand Down Expand Up @@ -293,6 +295,13 @@ class Node {
// implementation.
virtual void printRight(OutputBuffer &) const {}

// Print an initializer list of this type. Returns true if we printed a custom
// representation, false if nothing has been printed and the default
// representation should be used.
virtual bool printInitListAsType(OutputBuffer &, const NodeArray &) const {
return false;
}

virtual std::string_view getBaseName() const { return {}; }

// Silence compiler warnings, this dtor will never be called.
Expand Down Expand Up @@ -339,6 +348,10 @@ class NodeArray {
FirstElement = false;
}
}

// Print an array of integer literals as a string literal. Returns whether we
// could do so.
bool printAsString(OutputBuffer &OB) const;
};

struct NodeArrayNode : Node {
Expand Down Expand Up @@ -796,6 +809,15 @@ class ArrayType final : public Node {
OB += "]";
Base->printRight(OB);
}

bool printInitListAsType(OutputBuffer &OB,
const NodeArray &Elements) const override {
if (Base->getKind() == KNameType &&
static_cast<const NameType *>(Base)->getName() == "char") {
return Elements.printAsString(OB);
}
return false;
}
};

class FunctionType final : public Node {
Expand Down Expand Up @@ -2225,8 +2247,11 @@ class InitListExpr : public Node {
template<typename Fn> void match(Fn F) const { F(Ty, Inits); }

void printLeft(OutputBuffer &OB) const override {
if (Ty)
if (Ty) {
if (Ty->printInitListAsType(OB, Inits))
return;
Ty->print(OB);
}
OB += '{';
Inits.printWithComma(OB);
OB += '}';
Expand Down Expand Up @@ -2433,6 +2458,8 @@ class IntegerLiteral : public Node {
if (Type.size() <= 3)
OB += Type;
}

std::string_view value() const { return Value; }
};

class RequiresExpr : public Node {
Expand Down Expand Up @@ -2604,6 +2631,93 @@ template<typename NodeT> struct NodeKind;
};
#include "ItaniumNodes.def"

inline bool NodeArray::printAsString(OutputBuffer &OB) const {
auto Fail = [&OB, StartPos = OB.getCurrentPosition()] {
OB.setCurrentPosition(StartPos);
return false;
};

OB += '"';
bool LastWasNumericEscape = false;
for (const Node *Element : *this) {
if (Element->getKind() != Node::KIntegerLiteral)
return Fail();
int integer_value = 0;
for (char c : static_cast<const IntegerLiteral *>(Element)->value()) {
if (c < '0' || c > '9' || integer_value > 25)
return Fail();
integer_value *= 10;
integer_value += c - '0';
}
if (integer_value > 255)
return Fail();

// Insert a `""` to avoid accidentally extending a numeric escape.
if (LastWasNumericEscape) {
if ((integer_value >= '0' && integer_value <= '9') ||
(integer_value >= 'a' && integer_value <= 'f') ||
(integer_value >= 'A' && integer_value <= 'F')) {
OB += "\"\"";
}
}

LastWasNumericEscape = false;

// Determine how to print this character.
switch (integer_value) {
case '\a':
OB += "\\a";
break;
case '\b':
OB += "\\b";
break;
case '\f':
OB += "\\f";
break;
case '\n':
OB += "\\n";
break;
case '\r':
OB += "\\r";
break;
case '\t':
OB += "\\t";
break;
case '\v':
OB += "\\v";
break;

case '"':
OB += "\\\"";
break;
case '\\':
OB += "\\\\";
break;

default:
// We assume that the character is ASCII, and use a numeric escape for all
// remaining non-printable ASCII characters.
if (integer_value < 32 || integer_value == 127) {
constexpr char Hex[] = "0123456789ABCDEF";
OB += '\\';
if (integer_value > 7)
OB += 'x';
if (integer_value >= 16)
OB += Hex[integer_value >> 4];
OB += Hex[integer_value & 0xF];
LastWasNumericEscape = true;
break;
}

// Assume all remaining characters are directly printable.
OB += (char)integer_value;
break;
}
}
OB += '"';
return true;
}

template <typename Derived, typename Alloc> struct AbstractManglingParser {
const char *First;
const char *Last;
Expand Down
Loading