Skip to content

Commit 0cfedb1

Browse files
chandlercIcohedron
authored andcommitted
[StrTable] Switch Clang builtins to use string tables
This both reapplies llvm#118734, the initial attempt at this, and updates it significantly. First, it uses the newly added `StringTable` abstraction for string tables, and simplifies the construction to build the string table and info arrays separately. This should reduce any `constexpr` compile time memory or CPU cost of the original PR while significantly improving the APIs throughout. It also restructures the builtins to support sharding across several independent tables. This accomplishes two improvements from the original PR: 1) It improves the APIs used significantly. 2) When builtins are defined from different sources (like SVE vs MVE in AArch64), this allows each of them to build their own string table independently rather than having to merge the string tables and info structures. 3) It allows each shard to factor out a common prefix, often cutting the size of the strings needed for the builtins by a factor two. The second point is important both to allow different mechanisms of construction (for example a `.def` file and a tablegen'ed `.inc` file, or different tablegen'ed `.inc files), it also simply reduces the sizes of these tables which is valuable given how large they are in some cases. The third builds on that size reduction. Initially, we use this new sharding rather than merging tables in AArch64, LoongArch, RISCV, and X86. Mostly this helps ensure the system works, as without further changes these still push scaling limits. Subsequent commits will more deeply leverage the new structure, including using the prefix capabilities which cannot be easily factored out here and requires deep changes to the targets.
1 parent da7805e commit 0cfedb1

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+814
-365
lines changed

clang/include/clang/Basic/Builtins.h

Lines changed: 189 additions & 38 deletions
Large diffs are not rendered by default.

clang/include/clang/Basic/BuiltinsLoongArch.def

Lines changed: 0 additions & 28 deletions
This file was deleted.

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,5 +1138,6 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true,
11381138
// FIXME: Obviously incomplete.
11391139

11401140
#undef BUILTIN
1141+
#undef TARGET_BUILTIN
11411142
#undef CUSTOM_BUILTIN
11421143
#undef UNALIASED_CUSTOM_BUILTIN

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,16 @@ namespace clang {
178178
namespace LoongArch {
179179
enum {
180180
LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
181-
#define BUILTIN(ID, TYPE, ATTRS) BI##ID,
182-
#include "clang/Basic/BuiltinsLoongArch.def"
181+
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
182+
#include "clang/Basic/BuiltinsLoongArchBase.def"
183+
FirstLSXBuiltin,
184+
LastBaseBuiltin = FirstLSXBuiltin - 1,
185+
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
186+
#include "clang/Basic/BuiltinsLoongArchLSX.def"
187+
FirstLASXBuiltin,
188+
LastLSXBuiltin = FirstLASXBuiltin - 1,
189+
#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BI##ID,
190+
#include "clang/Basic/BuiltinsLoongArchLASX.def"
183191
LastTSBuiltin
184192
};
185193
} // namespace LoongArch

clang/include/clang/Basic/TargetInfo.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#include "clang/Basic/AddressSpaces.h"
1818
#include "clang/Basic/BitmaskEnum.h"
19+
#include "clang/Basic/Builtins.h"
1920
#include "clang/Basic/CFProtectionOptions.h"
2021
#include "clang/Basic/CodeGenOptions.h"
2122
#include "clang/Basic/LLVM.h"
@@ -32,6 +33,7 @@
3233
#include "llvm/ADT/StringMap.h"
3334
#include "llvm/ADT/StringRef.h"
3435
#include "llvm/ADT/StringSet.h"
36+
#include "llvm/ADT/StringTable.h"
3537
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
3638
#include "llvm/IR/DerivedTypes.h"
3739
#include "llvm/Support/DataTypes.h"
@@ -1016,10 +1018,10 @@ class TargetInfo : public TransferrableTargetInfo,
10161018
virtual void getTargetDefines(const LangOptions &Opts,
10171019
MacroBuilder &Builder) const = 0;
10181020

1019-
/// Return information about target-specific builtins for
1020-
/// the current primary target, and info about which builtins are non-portable
1021-
/// across the current set of primary and secondary targets.
1022-
virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;
1021+
/// Return information about target-specific builtins for the current primary
1022+
/// target, and info about which builtins are non-portable across the current
1023+
/// set of primary and secondary targets.
1024+
virtual llvm::SmallVector<Builtin::InfosShard> getTargetBuiltins() const = 0;
10231025

10241026
/// Returns target-specific min and max values VScale_Range.
10251027
virtual std::optional<std::pair<unsigned, unsigned>>

clang/include/module.modulemap

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ module Clang_Basic {
4545
textual header "clang/Basic/BuiltinsAMDGPU.def"
4646
textual header "clang/Basic/BuiltinsARM.def"
4747
textual header "clang/Basic/BuiltinsHexagonMapCustomDep.def"
48-
textual header "clang/Basic/BuiltinsLoongArch.def"
4948
textual header "clang/Basic/BuiltinsLoongArchBase.def"
5049
textual header "clang/Basic/BuiltinsLoongArchLASX.def"
5150
textual header "clang/Basic/BuiltinsLoongArchLSX.def"

clang/lib/Basic/Builtins.cpp

Lines changed: 131 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -29,54 +29,124 @@ const char *HeaderDesc::getName() const {
2929
llvm_unreachable("Unknown HeaderDesc::HeaderID enum");
3030
}
3131

32-
static constexpr Builtin::Info BuiltinInfo[] = {
33-
{"not a builtin function", nullptr, nullptr, nullptr, HeaderDesc::NO_HEADER,
34-
ALL_LANGUAGES},
35-
#define BUILTIN(ID, TYPE, ATTRS) \
36-
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
37-
#define LANGBUILTIN(ID, TYPE, ATTRS, LANGS) \
38-
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, LANGS},
39-
#define LIBBUILTIN(ID, TYPE, ATTRS, HEADER, LANGS) \
40-
{#ID, TYPE, ATTRS, nullptr, HeaderDesc::HEADER, LANGS},
32+
static constexpr llvm::StringTable BuiltinStrings =
33+
CLANG_BUILTIN_STR_TABLE_START
34+
// We inject a non-builtin string into the table.
35+
CLANG_BUILTIN_STR_TABLE("not a builtin function", "", "")
36+
#define BUILTIN CLANG_BUILTIN_STR_TABLE
4137
#include "clang/Basic/Builtins.inc"
42-
};
38+
;
39+
static_assert(BuiltinStrings.size() < 100'000);
40+
41+
static constexpr auto BuiltinInfos =
42+
Builtin::MakeInfos<Builtin::FirstTSBuiltin>(
43+
{CLANG_BUILTIN_ENTRY("not a builtin function", "", "")
44+
#define BUILTIN CLANG_BUILTIN_ENTRY
45+
#define LANGBUILTIN CLANG_LANGBUILTIN_ENTRY
46+
#define LIBBUILTIN CLANG_LIBBUILTIN_ENTRY
47+
#include "clang/Basic/Builtins.inc"
48+
});
4349

44-
const Builtin::Info &Builtin::Context::getRecord(unsigned ID) const {
45-
if (ID < Builtin::FirstTSBuiltin)
46-
return BuiltinInfo[ID];
47-
assert(((ID - Builtin::FirstTSBuiltin) <
48-
(TSRecords.size() + AuxTSRecords.size())) &&
50+
std::pair<const Builtin::InfosShard &, const Builtin::Info &>
51+
Builtin::Context::getShardAndInfo(unsigned ID) const {
52+
assert((ID < (Builtin::FirstTSBuiltin + NumTargetBuiltins +
53+
NumAuxTargetBuiltins)) &&
4954
"Invalid builtin ID!");
50-
if (isAuxBuiltinID(ID))
51-
return AuxTSRecords[getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin];
52-
return TSRecords[ID - Builtin::FirstTSBuiltin];
55+
56+
ArrayRef<InfosShard> Shards = BuiltinShards;
57+
if (isAuxBuiltinID(ID)) {
58+
Shards = AuxTargetShards;
59+
ID = getAuxBuiltinID(ID) - Builtin::FirstTSBuiltin;
60+
} else if (ID >= Builtin::FirstTSBuiltin) {
61+
Shards = TargetShards;
62+
ID -= Builtin::FirstTSBuiltin;
63+
}
64+
65+
// Loop over the shards to find the one matching this ID. We don't expect to
66+
// have many shards and so its better to search linearly than with a binary
67+
// search.
68+
for (const auto &Shard : Shards) {
69+
if (ID < Shard.Infos.size()) {
70+
return {Shard, Shard.Infos[ID]};
71+
}
72+
73+
ID -= Shard.Infos.size();
74+
}
75+
llvm_unreachable("Invalid target builtin shard structure!");
76+
}
77+
78+
std::string Builtin::Info::getName(const Builtin::InfosShard &Shard) const {
79+
return (Twine(Shard.NamePrefix) + (*Shard.Strings)[Offsets.Name]).str();
5380
}
5481

82+
/// Return the identifier name for the specified builtin,
83+
/// e.g. "__builtin_abs".
84+
std::string Builtin::Context::getName(unsigned ID) const {
85+
const auto &[Shard, I] = getShardAndInfo(ID);
86+
return I.getName(Shard);
87+
}
88+
89+
std::string Builtin::Context::getQuotedName(unsigned ID) const {
90+
const auto &[Shard, I] = getShardAndInfo(ID);
91+
return (Twine("'") + Shard.NamePrefix + (*Shard.Strings)[I.Offsets.Name] +
92+
"'")
93+
.str();
94+
}
95+
96+
const char *Builtin::Context::getTypeString(unsigned ID) const {
97+
const auto &[Shard, I] = getShardAndInfo(ID);
98+
return (*Shard.Strings)[I.Offsets.Type].data();
99+
}
100+
101+
const char *Builtin::Context::getAttributesString(unsigned ID) const {
102+
const auto &[Shard, I] = getShardAndInfo(ID);
103+
return (*Shard.Strings)[I.Offsets.Attributes].data();
104+
}
105+
106+
const char *Builtin::Context::getRequiredFeatures(unsigned ID) const {
107+
const auto &[Shard, I] = getShardAndInfo(ID);
108+
return (*Shard.Strings)[I.Offsets.Features].data();
109+
}
110+
111+
Builtin::Context::Context() : BuiltinShards{{&BuiltinStrings, BuiltinInfos}} {}
112+
55113
void Builtin::Context::InitializeTarget(const TargetInfo &Target,
56114
const TargetInfo *AuxTarget) {
57-
assert(TSRecords.empty() && "Already initialized target?");
58-
TSRecords = Target.getTargetBuiltins();
59-
if (AuxTarget)
60-
AuxTSRecords = AuxTarget->getTargetBuiltins();
115+
assert(TargetShards.empty() && "Already initialized target?");
116+
assert(NumTargetBuiltins == 0 && "Already initialized target?");
117+
TargetShards = Target.getTargetBuiltins();
118+
for (const auto &Shard : TargetShards)
119+
NumTargetBuiltins += Shard.Infos.size();
120+
if (AuxTarget) {
121+
AuxTargetShards = AuxTarget->getTargetBuiltins();
122+
for (const auto &Shard : AuxTargetShards)
123+
NumAuxTargetBuiltins += Shard.Infos.size();
124+
}
61125
}
62126

63127
bool Builtin::Context::isBuiltinFunc(llvm::StringRef FuncName) {
64128
bool InStdNamespace = FuncName.consume_front("std-");
65-
for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin;
66-
++i) {
67-
if (FuncName == BuiltinInfo[i].Name &&
68-
(bool)strchr(BuiltinInfo[i].Attributes, 'z') == InStdNamespace)
69-
return strchr(BuiltinInfo[i].Attributes, 'f') != nullptr;
70-
}
129+
for (const auto &Shard : {InfosShard{&BuiltinStrings, BuiltinInfos}})
130+
if (llvm::StringRef FuncNameSuffix = FuncName;
131+
FuncNameSuffix.consume_front(Shard.NamePrefix))
132+
for (const auto &I : Shard.Infos)
133+
if (FuncNameSuffix == (*Shard.Strings)[I.Offsets.Name] &&
134+
(bool)strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'z') ==
135+
InStdNamespace)
136+
return strchr((*Shard.Strings)[I.Offsets.Attributes].data(), 'f') !=
137+
nullptr;
71138

72139
return false;
73140
}
74141

75142
/// Is this builtin supported according to the given language options?
76-
static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
143+
static bool builtinIsSupported(const llvm::StringTable &Strings,
144+
const Builtin::Info &BuiltinInfo,
77145
const LangOptions &LangOpts) {
146+
auto AttributesStr = Strings[BuiltinInfo.Offsets.Attributes];
147+
78148
/* Builtins Unsupported */
79-
if (LangOpts.NoBuiltin && strchr(BuiltinInfo.Attributes, 'f') != nullptr)
149+
if (LangOpts.NoBuiltin && strchr(AttributesStr.data(), 'f') != nullptr)
80150
return false;
81151
/* CorBuiltins Unsupported */
82152
if (!LangOpts.Coroutines && (BuiltinInfo.Langs & COR_LANG))
@@ -123,7 +193,7 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
123193
if (!LangOpts.CPlusPlus && BuiltinInfo.Langs == CXX_LANG)
124194
return false;
125195
/* consteval Unsupported */
126-
if (!LangOpts.CPlusPlus20 && strchr(BuiltinInfo.Attributes, 'G') != nullptr)
196+
if (!LangOpts.CPlusPlus20 && strchr(AttributesStr.data(), 'G') != nullptr)
127197
return false;
128198
return true;
129199
}
@@ -132,22 +202,34 @@ static bool builtinIsSupported(const Builtin::Info &BuiltinInfo,
132202
/// appropriate builtin ID # and mark any non-portable builtin identifiers as
133203
/// such.
134204
void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
135-
const LangOptions& LangOpts) {
136-
// Step #1: mark all target-independent builtins with their ID's.
137-
for (unsigned i = Builtin::NotBuiltin + 1; i != Builtin::FirstTSBuiltin; ++i)
138-
if (builtinIsSupported(BuiltinInfo[i], LangOpts)) {
139-
Table.get(BuiltinInfo[i].Name).setBuiltinID(i);
140-
}
141-
142-
// Step #2: Register target-specific builtins.
143-
for (unsigned i = 0, e = TSRecords.size(); i != e; ++i)
144-
if (builtinIsSupported(TSRecords[i], LangOpts))
145-
Table.get(TSRecords[i].Name).setBuiltinID(i + Builtin::FirstTSBuiltin);
205+
const LangOptions &LangOpts) {
206+
{
207+
unsigned ID = 0;
208+
// Step #1: mark all target-independent builtins with their ID's.
209+
for (const auto &Shard : BuiltinShards)
210+
for (const auto &I : Shard.Infos) {
211+
// If this is a real builtin (ID != 0) and is supported, add it.
212+
if (ID != 0 && builtinIsSupported(*Shard.Strings, I, LangOpts))
213+
Table.get(I.getName(Shard)).setBuiltinID(ID);
214+
++ID;
215+
}
216+
assert(ID == FirstTSBuiltin && "Should have added all non-target IDs!");
217+
218+
// Step #2: Register target-specific builtins.
219+
for (const auto &Shard : TargetShards)
220+
for (const auto &I : Shard.Infos) {
221+
if (builtinIsSupported(*Shard.Strings, I, LangOpts))
222+
Table.get(I.getName(Shard)).setBuiltinID(ID);
223+
++ID;
224+
}
146225

147-
// Step #3: Register target-specific builtins for AuxTarget.
148-
for (unsigned i = 0, e = AuxTSRecords.size(); i != e; ++i)
149-
Table.get(AuxTSRecords[i].Name)
150-
.setBuiltinID(i + Builtin::FirstTSBuiltin + TSRecords.size());
226+
// Step #3: Register target-specific builtins for AuxTarget.
227+
for (const auto &Shard : AuxTargetShards)
228+
for (const auto &I : Shard.Infos) {
229+
Table.get(I.getName(Shard)).setBuiltinID(ID);
230+
++ID;
231+
}
232+
}
151233

152234
// Step #4: Unregister any builtins specified by -fno-builtin-foo.
153235
for (llvm::StringRef Name : LangOpts.NoBuiltinFuncs) {
@@ -163,12 +245,8 @@ void Builtin::Context::initializeBuiltins(IdentifierTable &Table,
163245
}
164246
}
165247

166-
std::string Builtin::Context::getQuotedName(unsigned ID) const {
167-
return (llvm::Twine("'") + getName(ID) + "'").str();
168-
}
169-
170248
unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
171-
const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V');
249+
const char *WidthPos = ::strchr(getAttributesString(ID), 'V');
172250
if (!WidthPos)
173251
return 0;
174252

@@ -191,7 +269,7 @@ bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx,
191269
assert(::toupper(Fmt[0]) == Fmt[1] &&
192270
"Format string is not in the form \"xX\"");
193271

194-
const char *Like = ::strpbrk(getRecord(ID).Attributes, Fmt);
272+
const char *Like = ::strpbrk(getAttributesString(ID), Fmt);
195273
if (!Like)
196274
return false;
197275

@@ -218,7 +296,7 @@ bool Builtin::Context::isScanfLike(unsigned ID, unsigned &FormatIdx,
218296

219297
bool Builtin::Context::performsCallback(unsigned ID,
220298
SmallVectorImpl<int> &Encoding) const {
221-
const char *CalleePos = ::strchr(getRecord(ID).Attributes, 'C');
299+
const char *CalleePos = ::strchr(getAttributesString(ID), 'C');
222300
if (!CalleePos)
223301
return false;
224302

0 commit comments

Comments
 (0)