Skip to content

[StrTable] Switch intrinsics to StringTable and work around MSVC #123548

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jan 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions clang/utils/TableGen/ClangDiagnosticsEmitter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1785,8 +1785,7 @@ static void emitDiagArrays(DiagsInGroupTy &DiagsInGroup,
/// This creates an `llvm::StringTable` of all the diagnostic group names.
static void emitDiagGroupNames(const StringToOffsetTable &GroupNames,
raw_ostream &OS) {
GroupNames.EmitStringLiteralDef(
OS, "static constexpr llvm::StringTable DiagGroupNames");
GroupNames.EmitStringTableDef(OS, "DiagGroupNames");
OS << "\n";
}

Expand Down Expand Up @@ -1939,9 +1938,6 @@ void clang::EmitClangDiagGroups(const RecordKeeper &Records, raw_ostream &OS) {
inferPedantic.compute(&DiagsInPedantic, &GroupsInPedantic);

StringToOffsetTable GroupNames;
// Add an empty string to the table first so we can use `llvm::StringTable`.
// TODO: Factor this into `StringToOffsetTable`.
GroupNames.GetOrAddStringOffset("");
for (const auto &[Name, Group] : DiagsInGroup) {
GroupNames.GetOrAddStringOffset(Name);
}
Expand Down
9 changes: 3 additions & 6 deletions llvm/include/llvm/ADT/StringTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,11 @@ class StringTable {
// support `constexpr`.
assert(!Table.empty() && "Requires at least a valid empty string.");
assert(Table.data()[0] == '\0' && "Offset zero must be the empty string.");
// Ensure that `strlen` from any offset cannot overflow the end of the table
// by insisting on a null byte at the end. We also insist on the last string
// within the table being *separately* null terminated. This structure is
// used to enable predictable iteration over all the strings when needed.
// Regardless of how many strings are in the table, the last one should also
// be null terminated. This also ensures that computing `strlen` on the
// strings can't accidentally run past the end of the table.
assert(Table.data()[Table.size() - 1] == '\0' &&
"Last byte must be a null byte.");
assert(Table.data()[Table.size() - 2] == '\0' &&
"Next-to-last byte must be a null byte.");
}

// Get a string from the table starting with the provided offset. The returned
Expand Down
93 changes: 61 additions & 32 deletions llvm/include/llvm/TableGen/StringToOffsetTable.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,12 @@ class StringToOffsetTable {
std::string AggregateString;

public:
StringToOffsetTable() {
// Ensure we always put the empty string at offset zero. That lets empty
// initialization also be zero initialization for offsets into the table.
GetOrAddStringOffset("");
}

bool empty() const { return StringOffset.empty(); }
size_t size() const { return AggregateString.size(); }

Expand All @@ -51,28 +57,71 @@ class StringToOffsetTable {
return II->second;
}

// Emit the string using string literal concatenation, for better readability
// and searchability.
void EmitStringLiteralDef(raw_ostream &OS, const Twine &Decl,
const Twine &Indent = " ") const {
// Emit a string table definition with the provided name and indent.
//
// When possible, this uses string-literal concatenation to emit the string
// contents in a readable and searchable way. However, for (very) large string
// tables MSVC cannot reliably use string literals and so there we use a large
// character array. We still use a line oriented emission and add comments to
// provide searchability even in this case.
//
// The string table, and its input string contents, are always emitted as both
// `static` and `constexpr`. Both `Name` and (`Name` + "Storage") must be
// valid identifiers to declare.
void EmitStringTableDef(raw_ostream &OS, const Twine &Name,
const Twine &Indent = "") const {
OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Woverlength-strings"
#endif
{0}{1} = )",
Indent, Decl);
{0}static constexpr char {1}Storage[] = )",
Indent, Name);

// MSVC silently miscompiles string literals longer than 64k in some
// circumstances. When the string table is longer, emit it as an array of
// character literals.
bool UseChars = AggregateString.size() > (64 * 1024);
OS << (UseChars ? "{\n" : "\n");

llvm::ListSeparator LineSep(UseChars ? ",\n" : "\n");
llvm::SmallVector<StringRef> Strings(split(AggregateString, '\0'));
// We should always have an empty string at the start, and because these are
// null terminators rather than separators, we'll have one at the end as
// well. Skip the end one.
assert(Strings.front().empty() && "Expected empty initial string!");
assert(Strings.back().empty() &&
"Expected empty string at the end due to terminators!");
Strings.pop_back();
for (StringRef Str : Strings) {
OS << LineSep << Indent << " ";
// If we can, just emit this as a string literal to be concatenated.
if (!UseChars) {
OS << "\"";
OS.write_escaped(Str);
OS << "\\0\"";
continue;
}

for (StringRef Str : split(AggregateString, '\0')) {
OS << "\n" << Indent << " \"";
OS.write_escaped(Str);
OS << "\\0\"";
llvm::ListSeparator CharSep(", ");
for (char C : Str) {
OS << CharSep << "'";
OS.write_escaped(StringRef(&C, 1));
OS << "'";
}
OS << CharSep << "'\\0'";
}
OS << R"(;
OS << LineSep << Indent << (UseChars ? "};" : " ;");

OS << formatv(R"(
#ifdef __GNUC__
#pragma GCC diagnostic pop
#endif
)";

{0}static constexpr llvm::StringTable {1} =
{0} {1}Storage;
)",
Indent, Name);
}

// Emit the string as one single string.
Expand Down Expand Up @@ -110,26 +159,6 @@ class StringToOffsetTable {
}
O << "\"";
}

/// Emit the string using character literals. MSVC has a limitation that
/// string literals cannot be longer than 64K.
void EmitCharArray(raw_ostream &O) {
assert(AggregateString.find(')') == std::string::npos &&
"can't emit raw string with closing parens");
int Count = 0;
O << ' ';
for (char C : AggregateString) {
O << " \'";
O.write_escaped(StringRef(&C, 1));
O << "\',";
Count++;
if (Count > 14) {
O << "\n ";
Count = 0;
}
}
O << '\n';
}
};

} // end namespace llvm
Expand Down
19 changes: 10 additions & 9 deletions llvm/lib/IR/Intrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

#include "llvm/IR/Intrinsics.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringTable.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
Expand Down Expand Up @@ -40,7 +41,7 @@ using namespace llvm;

StringRef Intrinsic::getBaseName(ID id) {
assert(id < num_intrinsics && "Invalid intrinsic ID!");
return IntrinsicNameTable + IntrinsicNameOffsetTable[id];
return IntrinsicNameTable[IntrinsicNameOffsetTable[id]];
}

StringRef Intrinsic::getName(ID id) {
Expand Down Expand Up @@ -649,20 +650,20 @@ static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,
// `equal_range` requires the comparison to work with either side being an
// offset or the value. Detect which kind each side is to set up the
// compared strings.
const char *LHSStr;
StringRef LHSStr;
if constexpr (std::is_integral_v<decltype(LHS)>) {
LHSStr = &IntrinsicNameTable[LHS];
LHSStr = IntrinsicNameTable[LHS];
} else {
LHSStr = LHS;
}
const char *RHSStr;
StringRef RHSStr;
if constexpr (std::is_integral_v<decltype(RHS)>) {
RHSStr = &IntrinsicNameTable[RHS];
RHSStr = IntrinsicNameTable[RHS];
} else {
RHSStr = RHS;
}
return strncmp(LHSStr + CmpStart, RHSStr + CmpStart, CmpEnd - CmpStart) <
0;
return strncmp(LHSStr.data() + CmpStart, RHSStr.data() + CmpStart,
CmpEnd - CmpStart) < 0;
};
LastLow = Low;
std::tie(Low, High) = std::equal_range(Low, High, Name.data(), Cmp);
Expand All @@ -672,7 +673,7 @@ static int lookupLLVMIntrinsicByName(ArrayRef<unsigned> NameOffsetTable,

if (LastLow == NameOffsetTable.end())
return -1;
StringRef NameFound = &IntrinsicNameTable[*LastLow];
StringRef NameFound = IntrinsicNameTable[*LastLow];
if (Name == NameFound ||
(Name.starts_with(NameFound) && Name[NameFound.size()] == '.'))
return LastLow - NameOffsetTable.begin();
Expand Down Expand Up @@ -716,7 +717,7 @@ Intrinsic::ID Intrinsic::lookupIntrinsicID(StringRef Name) {

// If the intrinsic is not overloaded, require an exact match. If it is
// overloaded, require either exact or prefix match.
const auto MatchSize = strlen(&IntrinsicNameTable[NameOffsetTable[Idx]]);
const auto MatchSize = IntrinsicNameTable[NameOffsetTable[Idx]].size();
assert(Name.size() >= MatchSize && "Expected either exact or prefix match");
bool IsExactMatch = Name.size() == MatchSize;
return IsExactMatch || Intrinsic::isOverloaded(ID) ? ID
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/TableGen/MixedCasedMnemonic.td
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def :MnemonicAlias<"InstB", "BInst">;

// Check that the matcher lower()s the mnemonics it matches.
// MATCHER: static const char MnemonicTable[] =
// MATCHER-NEXT: "\005ainst\005binst";
// MATCHER-NEXT: "\000\005ainst\005binst";

// Check that aInst appears before BInst in the match table.
// This shows that the mnemonics are sorted in a case-insensitive way,
Expand Down
16 changes: 9 additions & 7 deletions llvm/test/TableGen/SDNodeInfoEmitter/ambiguous-constraints.td
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,17 @@ def my_node_b : SDNode<"MyTargetISD::NODE", SDTypeProfile<1, 0, [SDTCisVT<0, f32
// CHECK-NEXT: NODE = ISD::BUILTIN_OP_END,
// CHECK-NEXT: };

// CHECK: static const char MyTargetSDNodeNames[] =
// CHECK: static constexpr char MyTargetSDNodeNamesStorage[] =
// CHECK-NEXT: "\0"
// CHECK-NEXT: "MyTargetISD::NODE\0"
// CHECK-NEXT: "\0";
// CHECK-NEXT: ;

// CHECK: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeDesc MyTargetSDNodeDescs[] = {
// CHECK-NEXT: {1, 0, 0, 0, 0, 0, 0, 0}, // NODE
// CHECK-NEXT: {1, 0, 0, 0, 0, 1, 0, 0}, // NODE
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeInfo MyTargetGenSDNodeInfo(
Expand Down Expand Up @@ -54,18 +55,19 @@ def my_node_2b : SDNode<"MyTargetISD::NODE_2", SDTypeProfile<1, 0, [SDTCisVT<0,
// CHECK-EMPTY:
// CHECK-NEXT: } // namespace llvm::MyTargetISD

// CHECK: static const char MyTargetSDNodeNames[] =
// CHECK: static constexpr char MyTargetSDNodeNamesStorage[] =
// CHECK-NEXT: "\0"
// CHECK-NEXT: "MyTargetISD::NODE_1\0"
// CHECK-NEXT: "MyTargetISD::NODE_2\0"
// CHECK-NEXT: "\0";
// CHECK-NEXT: ;

// CHECK: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* 0 */ {SDTCisVT, 0, 0, MVT::i32},
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeDesc MyTargetSDNodeDescs[] = {
// CHECK-NEXT: {1, 0, 0, 0, 0, 0, 0, 1}, // NODE_1
// CHECK-NEXT: {1, 0, 0, 0, 0, 20, 0, 0}, // NODE_2
// CHECK-NEXT: {1, 0, 0, 0, 0, 1, 0, 1}, // NODE_1
// CHECK-NEXT: {1, 0, 0, 0, 0, 21, 0, 0}, // NODE_2
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeInfo MyTargetGenSDNodeInfo(
Expand Down
26 changes: 16 additions & 10 deletions llvm/test/TableGen/SDNodeInfoEmitter/basic.td
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,16 @@ def MyTarget : Target;
// CHECK-NEXT: #pragma GCC diagnostic push
// CHECK-NEXT: #pragma GCC diagnostic ignored "-Woverlength-strings"
// CHECK-NEXT: #endif
// CHECK-NEXT: static const char MyTargetSDNodeNames[] =
// CHECK-NEXT: "\0";
// CHECK-NEXT: static constexpr char MyTargetSDNodeNamesStorage[] =
// CHECK-NEXT: "\0"
// CHECK-NEXT: ;
// CHECK-NEXT: #ifdef __GNUC__
// CHECK-NEXT: #pragma GCC diagnostic pop
// CHECK-NEXT: #endif
// CHECK-EMPTY:
// CHECK-NEXT: static constexpr llvm::StringTable MyTargetSDNodeNames =
// CHECK-NEXT: MyTargetSDNodeNamesStorage;
// CHECK-EMPTY:
// CHECK-NEXT: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
// CHECK-NEXT: };
Expand Down Expand Up @@ -70,16 +74,17 @@ def my_noop : SDNode<"MyTargetISD::NOOP", SDTypeProfile<0, 0, []>>;
// CHECK-EMPTY:
// CHECK-NEXT: } // namespace llvm::MyTargetISD

// CHECK: static const char MyTargetSDNodeNames[] =
// CHECK: static constexpr char MyTargetSDNodeNamesStorage[] =
// CHECK-NEXT: "\0"
// CHECK-NEXT: "MyTargetISD::NOOP\0"
// CHECK-NEXT: "\0";
// CHECK-NEXT: ;

// CHECK: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeDesc MyTargetSDNodeDescs[] = {
// CHECK-NEXT: {0, 0, 0, 0, 0, 0, 0, 0}, // NOOP
// CHECK-NEXT: {0, 0, 0, 0, 0, 1, 0, 0}, // NOOP
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeInfo MyTargetGenSDNodeInfo(
Expand Down Expand Up @@ -148,11 +153,12 @@ def my_node_3 : SDNode<
// CHECK-EMPTY:
// CHECK-NEXT: } // namespace llvm::MyTargetISD

// CHECK: static const char MyTargetSDNodeNames[] =
// CHECK: static constexpr char MyTargetSDNodeNamesStorage[] =
// CHECK-NEXT: "\0"
// CHECK-NEXT: "MyTargetISD::NODE_1\0"
// CHECK-NEXT: "MyTargetISD::NODE_2\0"
// CHECK-NEXT: "MyTargetISD::NODE_3\0"
// CHECK-NEXT: "\0";
// CHECK-NEXT: ;

// CHECK: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// CHECK-NEXT: /* 0 */ {SDTCisVT, 1, 0, MVT::i2},
Expand All @@ -173,9 +179,9 @@ def my_node_3 : SDNode<
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeDesc MyTargetSDNodeDescs[] = {
// CHECK-NEXT: {1, 1, 0|1<<SDNPHasChain, 0, 0, 0, 0, 2}, // NODE_1
// CHECK-NEXT: {3, 1, 0|1<<SDNPVariadic|1<<SDNPMemOperand, 0, 42, 20, 11, 4}, // NODE_2
// CHECK-NEXT: {2, -1, 0|1<<SDNPHasChain|1<<SDNPOutGlue|1<<SDNPInGlue|1<<SDNPOptInGlue, 0|1<<SDNFIsStrictFP, 24, 40, 2, 13}, // NODE_3
// CHECK-NEXT: {1, 1, 0|1<<SDNPHasChain, 0, 0, 1, 0, 2}, // NODE_1
// CHECK-NEXT: {3, 1, 0|1<<SDNPVariadic|1<<SDNPMemOperand, 0, 42, 21, 11, 4}, // NODE_2
// CHECK-NEXT: {2, -1, 0|1<<SDNPHasChain|1<<SDNPOutGlue|1<<SDNPInGlue|1<<SDNPOptInGlue, 0|1<<SDNFIsStrictFP, 24, 41, 2, 13}, // NODE_3
// CHECK-NEXT: };
// CHECK-EMPTY:
// CHECK-NEXT: static const SDNodeInfo MyTargetGenSDNodeInfo(
Expand Down
14 changes: 8 additions & 6 deletions llvm/test/TableGen/SDNodeInfoEmitter/namespace.td
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ def node_2 : SDNode<"MyCustomISD::NODE", SDTypeProfile<0, 1, [SDTCisVT<0, i2>]>>
// EMPTY-EMPTY:
// EMPTY-NEXT: } // namespace llvm::EmptyISD

// EMPTY: static const char MyTargetSDNodeNames[] =
// EMPTY-NEXT: "\0";
// EMPTY: static constexpr char MyTargetSDNodeNamesStorage[] =
// EMPTY-NEXT: "\0"
// EMPTY-NEXT: ;

// EMPTY: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// EMPTY-NEXT: /* dummy */ {SDTCisVT, 0, 0, MVT::INVALID_SIMPLE_VALUE_TYPE}
Expand All @@ -43,18 +44,19 @@ def node_2 : SDNode<"MyCustomISD::NODE", SDTypeProfile<0, 1, [SDTCisVT<0, i2>]>>
// COMMON-EMPTY:
// COMMON-NEXT: } // namespace llvm::[[NS]]

// COMMON: static const char MyTargetSDNodeNames[] =
// COMMON: static constexpr char MyTargetSDNodeNamesStorage[] =
// COMMON-NEXT: "\0"
// COMMON-NEXT: "[[NS]]::NODE\0"
// COMMON-NEXT: "\0";
// COMMON-NEXT: ;

// COMMON: static const SDTypeConstraint MyTargetSDTypeConstraints[] = {
// TARGET-NEXT: /* 0 */ {SDTCisVT, 0, 0, MVT::i1},
// CUSTOM-NEXT: /* 0 */ {SDTCisVT, 0, 0, MVT::i2},
// COMMON-NEXT: };
// COMMON-EMPTY:
// COMMON-NEXT: static const SDNodeDesc MyTargetSDNodeDescs[] = {
// TARGET-NEXT: {1, 0, 0, 0, 0, 0, 0, 1}, // NODE
// CUSTOM-NEXT: {0, 1, 0, 0, 0, 0, 0, 1}, // NODE
// TARGET-NEXT: {1, 0, 0, 0, 0, 1, 0, 1}, // NODE
// CUSTOM-NEXT: {0, 1, 0, 0, 0, 1, 0, 1}, // NODE
// COMMON-NEXT: };
// COMMON-EMPTY:
// COMMON-NEXT: static const SDNodeInfo MyTargetGenSDNodeInfo(
Expand Down
Loading
Loading