Skip to content

Commit 78b4e48

Browse files
committed
Reapply "Switch builtin strings to use string tables" (llvm#118734)
This reverts commit ca79ff0. It also updates the original PR to use the newly added `StringTable` abstraction for string tables, and simplifies the construction to build the string table and info arrays separately. This should reduce any `constexpr` compile time memory or CPU cost of the original PR while significantly improving the APIs throughout.
1 parent 3316476 commit 78b4e48

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

49 files changed

+656
-307
lines changed

clang/include/clang/Basic/Builtins.h

Lines changed: 167 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "llvm/ADT/ArrayRef.h"
1919
#include "llvm/ADT/StringMap.h"
2020
#include "llvm/ADT/StringRef.h"
21+
#include "llvm/ADT/StringTable.h"
2122
#include <cstring>
2223

2324
// VC++ defines 'alloca' as an object-like macro, which interferes with our
@@ -55,6 +56,7 @@ struct HeaderDesc {
5556
#undef HEADER
5657
} ID;
5758

59+
constexpr HeaderDesc() : ID() {}
5860
constexpr HeaderDesc(HeaderID ID) : ID(ID) {}
5961

6062
const char *getName() const;
@@ -68,23 +70,146 @@ enum ID {
6870
FirstTSBuiltin
6971
};
7072

73+
// The info used to represent each builtin.
7174
struct Info {
72-
llvm::StringLiteral Name;
73-
const char *Type, *Attributes;
74-
const char *Features;
75-
HeaderDesc Header;
76-
LanguageID Langs;
75+
// Rather than store pointers to the string literals describing these four
76+
// aspects of builtins, we store offsets into a common string table.
77+
struct StrOffsets {
78+
llvm::StringTable::Offset Name;
79+
llvm::StringTable::Offset Type;
80+
llvm::StringTable::Offset Attributes;
81+
82+
// Defaults to the empty string offset.
83+
llvm::StringTable::Offset Features = {};
84+
} Offsets;
85+
86+
HeaderDesc Header = HeaderDesc::NO_HEADER;
87+
LanguageID Langs = ALL_LANGUAGES;
7788
};
7889

90+
// A constexpr function to construct an infos array from X-macros.
91+
//
92+
// The input array uses the same data structure, but the offsets are actually
93+
// _lengths_ when input. This is all we can compute from the X-macro approach to
94+
// builtins. This function will convert these lengths into actual offsets to a
95+
// string table built up through sequentially appending strings with the given
96+
// lengths.
97+
template <size_t N>
98+
static constexpr std::array<Info, N> MakeInfos(std::array<Info, N> Infos) {
99+
// Translate lengths to offsets. We start past the initial empty string at
100+
// offset zero.
101+
unsigned Offset = 1;
102+
for (Info &I : Infos) {
103+
Info::StrOffsets NewOffsets = {};
104+
NewOffsets.Name = Offset;
105+
Offset += I.Offsets.Name.value();
106+
NewOffsets.Type = Offset;
107+
Offset += I.Offsets.Type.value();
108+
NewOffsets.Attributes = Offset;
109+
Offset += I.Offsets.Attributes.value();
110+
NewOffsets.Features = Offset;
111+
Offset += I.Offsets.Features.value();
112+
I.Offsets = NewOffsets;
113+
}
114+
return Infos;
115+
}
116+
117+
// A detail macro used below to emit a string literal that, after string literal
118+
// concatenation, ends up triggering the `-Woverlength-strings` warning. While
119+
// the warning is useful in general to catch accidentally excessive strings,
120+
// here we are creating them intentionally.
121+
//
122+
// This relies on a subtle aspect of `_Pragma`: that the *diagnostic* ones don't
123+
// turn into actual tokens that would disrupt string literal concatenation.
124+
#ifdef __clang__
125+
#define CLANG_BUILTIN_DETAIL_STR_TABLE(S) \
126+
_Pragma("clang diagnostic push") \
127+
_Pragma("clang diagnostic ignored \"-Woverlength-strings\"") \
128+
S _Pragma("clang diagnostic pop")
129+
#else
130+
#define CLANG_BUILTIN_DETAIL_STR_TABLE(S) S
131+
#endif
132+
133+
// We require string tables to start with an empty string so that a `0` offset
134+
// can always be used to refer to an empty string. To satisfy that when building
135+
// string tables with X-macros, we use this start macro prior to expanding the
136+
// X-macros.
137+
#define CLANG_BUILTIN_STR_TABLE_START CLANG_BUILTIN_DETAIL_STR_TABLE("\0")
138+
139+
// A macro that can be used with `Builtins.def` and similar files as an X-macro
140+
// to add the string arguments to a builtin string table. This is typically the
141+
// target for the `BUILTIN`, `LANGBUILTIN`, or `LIBBUILTIN` macros in those
142+
// files.
143+
#define CLANG_BUILTIN_STR_TABLE(ID, TYPE, ATTRS) \
144+
CLANG_BUILTIN_DETAIL_STR_TABLE(#ID "\0" TYPE "\0" ATTRS "\0" /*FEATURE*/ "\0")
145+
146+
// A macro that can be used with target builtin `.def` and `.inc` files as an
147+
// X-macro to add the string arguments to a builtin string table. this is
148+
// typically the target for the `TARGET_BUILTIN` macro.
149+
#define CLANG_TARGET_BUILTIN_STR_TABLE(ID, TYPE, ATTRS, FEATURE) \
150+
CLANG_BUILTIN_DETAIL_STR_TABLE(#ID "\0" TYPE "\0" ATTRS "\0" FEATURE "\0")
151+
152+
// A macro that can be used with target builtin `.def` and `.inc` files as an
153+
// X-macro to add the string arguments to a builtin string table. this is
154+
// typically the target for the `TARGET_HEADER_BUILTIN` macro. We can't delegate
155+
// to `TARGET_BUILTIN` because the `FEATURE` string changes position.
156+
#define CLANG_TARGET_HEADER_BUILTIN_STR_TABLE(ID, TYPE, ATTRS, HEADER, LANGS, \
157+
FEATURE) \
158+
CLANG_BUILTIN_DETAIL_STR_TABLE(#ID "\0" TYPE "\0" ATTRS "\0" FEATURE "\0")
159+
160+
// A detail macro used internally to compute the desired string table
161+
// `StrOffsets` struct for arguments to `MakeInfos`.
162+
#define CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS) \
163+
Builtin::Info::StrOffsets { \
164+
sizeof(#ID), sizeof(TYPE), sizeof(ATTRS), sizeof("") \
165+
}
166+
167+
// A detail macro used internally to compute the desired string table
168+
// `StrOffsets` struct for arguments to `Storage::Make`.
169+
#define CLANG_TARGET_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS, FEATURE) \
170+
Builtin::Info::StrOffsets { \
171+
sizeof(#ID), sizeof(TYPE), sizeof(ATTRS), sizeof(FEATURE) \
172+
}
173+
174+
// A set of macros that can be used with builtin `.def' files as an X-macro to
175+
// create an `Info` struct for a particular builtin. It both computes the
176+
// `StrOffsets` value for the string table (the lengths here, translated to
177+
// offsets by the `MakeInfos` function), and the other metadata for each
178+
// builtin.
179+
//
180+
// There is a corresponding macro for each of `BUILTIN`, `LANGBUILTIN`,
181+
// `LIBBUILTIN`, `TARGET_BUILTIN`, and `TARGET_HEADER_BUILTIN`.
182+
#define CLANG_BUILTIN_ENTRY(ID, TYPE, ATTRS) \
183+
Builtin::Info{CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS), \
184+
HeaderDesc::NO_HEADER, ALL_LANGUAGES},
185+
#define CLANG_LANGBUILTIN_ENTRY(ID, TYPE, ATTRS, LANG) \
186+
Builtin::Info{CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS), \
187+
HeaderDesc::NO_HEADER, LANG},
188+
#define CLANG_LIBBUILTIN_ENTRY(ID, TYPE, ATTRS, HEADER, LANG) \
189+
Builtin::Info{CLANG_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS), \
190+
HeaderDesc::HEADER, LANG},
191+
#define CLANG_TARGET_BUILTIN_ENTRY(ID, TYPE, ATTRS, FEATURE) \
192+
Builtin::Info{ \
193+
CLANG_TARGET_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS, FEATURE), \
194+
HeaderDesc::NO_HEADER, ALL_LANGUAGES},
195+
#define CLANG_TARGET_HEADER_BUILTIN_ENTRY(ID, TYPE, ATTRS, HEADER, LANG, \
196+
FEATURE) \
197+
Builtin::Info{ \
198+
CLANG_TARGET_BUILTIN_DETAIL_STR_OFFSETS(ID, TYPE, ATTRS, FEATURE), \
199+
HeaderDesc::HEADER, LANG},
200+
79201
/// Holds information about both target-independent and
80202
/// target-specific builtins, allowing easy queries by clients.
81203
///
82204
/// Builtins from an optional auxiliary target are stored in
83205
/// AuxTSRecords. Their IDs are shifted up by TSRecords.size() and need to
84206
/// be translated back with getAuxBuiltinID() before use.
85207
class Context {
86-
llvm::ArrayRef<Info> TSRecords;
87-
llvm::ArrayRef<Info> AuxTSRecords;
208+
const llvm::StringTable *TSStrTable = nullptr;
209+
const llvm::StringTable *AuxTSStrTable = nullptr;
210+
211+
llvm::ArrayRef<Info> TSInfos;
212+
llvm::ArrayRef<Info> AuxTSInfos;
88213

89214
public:
90215
Context() = default;
@@ -100,13 +225,16 @@ class Context {
100225

101226
/// Return the identifier name for the specified builtin,
102227
/// e.g. "__builtin_abs".
103-
llvm::StringRef getName(unsigned ID) const { return getRecord(ID).Name; }
228+
llvm::StringRef getName(unsigned ID) const;
104229

105230
/// Return a quoted name for the specified builtin for use in diagnostics.
106231
std::string getQuotedName(unsigned ID) const;
107232

108233
/// Get the type descriptor string for the specified builtin.
109-
const char *getTypeString(unsigned ID) const { return getRecord(ID).Type; }
234+
const char *getTypeString(unsigned ID) const;
235+
236+
/// Get the attributes descriptor string for the specified builtin.
237+
const char *getAttributesString(unsigned ID) const;
110238

111239
/// Return true if this function is a target-specific builtin.
112240
bool isTSBuiltin(unsigned ID) const {
@@ -115,40 +243,40 @@ class Context {
115243

116244
/// Return true if this function has no side effects.
117245
bool isPure(unsigned ID) const {
118-
return strchr(getRecord(ID).Attributes, 'U') != nullptr;
246+
return strchr(getAttributesString(ID), 'U') != nullptr;
119247
}
120248

121249
/// Return true if this function has no side effects and doesn't
122250
/// read memory.
123251
bool isConst(unsigned ID) const {
124-
return strchr(getRecord(ID).Attributes, 'c') != nullptr;
252+
return strchr(getAttributesString(ID), 'c') != nullptr;
125253
}
126254

127255
/// Return true if we know this builtin never throws an exception.
128256
bool isNoThrow(unsigned ID) const {
129-
return strchr(getRecord(ID).Attributes, 'n') != nullptr;
257+
return strchr(getAttributesString(ID), 'n') != nullptr;
130258
}
131259

132260
/// Return true if we know this builtin never returns.
133261
bool isNoReturn(unsigned ID) const {
134-
return strchr(getRecord(ID).Attributes, 'r') != nullptr;
262+
return strchr(getAttributesString(ID), 'r') != nullptr;
135263
}
136264

137265
/// Return true if we know this builtin can return twice.
138266
bool isReturnsTwice(unsigned ID) const {
139-
return strchr(getRecord(ID).Attributes, 'j') != nullptr;
267+
return strchr(getAttributesString(ID), 'j') != nullptr;
140268
}
141269

142270
/// Returns true if this builtin does not perform the side-effects
143271
/// of its arguments.
144272
bool isUnevaluated(unsigned ID) const {
145-
return strchr(getRecord(ID).Attributes, 'u') != nullptr;
273+
return strchr(getAttributesString(ID), 'u') != nullptr;
146274
}
147275

148276
/// Return true if this is a builtin for a libc/libm function,
149277
/// with a "__builtin_" prefix (e.g. __builtin_abs).
150278
bool isLibFunction(unsigned ID) const {
151-
return strchr(getRecord(ID).Attributes, 'F') != nullptr;
279+
return strchr(getAttributesString(ID), 'F') != nullptr;
152280
}
153281

154282
/// Determines whether this builtin is a predefined libc/libm
@@ -159,29 +287,29 @@ class Context {
159287
/// they do not, but they are recognized as builtins once we see
160288
/// a declaration.
161289
bool isPredefinedLibFunction(unsigned ID) const {
162-
return strchr(getRecord(ID).Attributes, 'f') != nullptr;
290+
return strchr(getAttributesString(ID), 'f') != nullptr;
163291
}
164292

165293
/// Returns true if this builtin requires appropriate header in other
166294
/// compilers. In Clang it will work even without including it, but we can emit
167295
/// a warning about missing header.
168296
bool isHeaderDependentFunction(unsigned ID) const {
169-
return strchr(getRecord(ID).Attributes, 'h') != nullptr;
297+
return strchr(getAttributesString(ID), 'h') != nullptr;
170298
}
171299

172300
/// Determines whether this builtin is a predefined compiler-rt/libgcc
173301
/// function, such as "__clear_cache", where we know the signature a
174302
/// priori.
175303
bool isPredefinedRuntimeFunction(unsigned ID) const {
176-
return strchr(getRecord(ID).Attributes, 'i') != nullptr;
304+
return strchr(getAttributesString(ID), 'i') != nullptr;
177305
}
178306

179307
/// Determines whether this builtin is a C++ standard library function
180308
/// that lives in (possibly-versioned) namespace std, possibly a template
181309
/// specialization, where the signature is determined by the standard library
182310
/// declaration.
183311
bool isInStdNamespace(unsigned ID) const {
184-
return strchr(getRecord(ID).Attributes, 'z') != nullptr;
312+
return strchr(getAttributesString(ID), 'z') != nullptr;
185313
}
186314

187315
/// Determines whether this builtin can have its address taken with no
@@ -195,33 +323,33 @@ class Context {
195323

196324
/// Determines whether this builtin has custom typechecking.
197325
bool hasCustomTypechecking(unsigned ID) const {
198-
return strchr(getRecord(ID).Attributes, 't') != nullptr;
326+
return strchr(getAttributesString(ID), 't') != nullptr;
199327
}
200328

201329
/// Determines whether a declaration of this builtin should be recognized
202330
/// even if the type doesn't match the specified signature.
203331
bool allowTypeMismatch(unsigned ID) const {
204-
return strchr(getRecord(ID).Attributes, 'T') != nullptr ||
332+
return strchr(getAttributesString(ID), 'T') != nullptr ||
205333
hasCustomTypechecking(ID);
206334
}
207335

208336
/// Determines whether this builtin has a result or any arguments which
209337
/// are pointer types.
210338
bool hasPtrArgsOrResult(unsigned ID) const {
211-
return strchr(getRecord(ID).Type, '*') != nullptr;
339+
return strchr(getTypeString(ID), '*') != nullptr;
212340
}
213341

214342
/// Return true if this builtin has a result or any arguments which are
215343
/// reference types.
216344
bool hasReferenceArgsOrResult(unsigned ID) const {
217-
return strchr(getRecord(ID).Type, '&') != nullptr ||
218-
strchr(getRecord(ID).Type, 'A') != nullptr;
345+
return strchr(getTypeString(ID), '&') != nullptr ||
346+
strchr(getTypeString(ID), 'A') != nullptr;
219347
}
220348

221349
/// If this is a library function that comes from a specific
222350
/// header, retrieve that header name.
223351
const char *getHeaderName(unsigned ID) const {
224-
return getRecord(ID).Header.getName();
352+
return getInfo(ID).Header.getName();
225353
}
226354

227355
/// Determine whether this builtin is like printf in its
@@ -246,27 +374,25 @@ class Context {
246374
/// Such functions can be const when the MathErrno lang option and FP
247375
/// exceptions are disabled.
248376
bool isConstWithoutErrnoAndExceptions(unsigned ID) const {
249-
return strchr(getRecord(ID).Attributes, 'e') != nullptr;
377+
return strchr(getAttributesString(ID), 'e') != nullptr;
250378
}
251379

252380
bool isConstWithoutExceptions(unsigned ID) const {
253-
return strchr(getRecord(ID).Attributes, 'g') != nullptr;
381+
return strchr(getAttributesString(ID), 'g') != nullptr;
254382
}
255383

256-
const char *getRequiredFeatures(unsigned ID) const {
257-
return getRecord(ID).Features;
258-
}
384+
const char *getRequiredFeatures(unsigned ID) const;
259385

260386
unsigned getRequiredVectorWidth(unsigned ID) const;
261387

262388
/// Return true if builtin ID belongs to AuxTarget.
263389
bool isAuxBuiltinID(unsigned ID) const {
264-
return ID >= (Builtin::FirstTSBuiltin + TSRecords.size());
390+
return ID >= (Builtin::FirstTSBuiltin + TSInfos.size());
265391
}
266392

267393
/// Return real builtin ID (i.e. ID it would have during compilation
268394
/// for AuxTarget).
269-
unsigned getAuxBuiltinID(unsigned ID) const { return ID - TSRecords.size(); }
395+
unsigned getAuxBuiltinID(unsigned ID) const { return ID - TSInfos.size(); }
270396

271397
/// Returns true if this is a libc/libm function without the '__builtin_'
272398
/// prefix.
@@ -278,16 +404,21 @@ class Context {
278404

279405
/// Return true if this function can be constant evaluated by Clang frontend.
280406
bool isConstantEvaluated(unsigned ID) const {
281-
return strchr(getRecord(ID).Attributes, 'E') != nullptr;
407+
return strchr(getAttributesString(ID), 'E') != nullptr;
282408
}
283409

284410
/// Returns true if this is an immediate (consteval) function
285411
bool isImmediate(unsigned ID) const {
286-
return strchr(getRecord(ID).Attributes, 'G') != nullptr;
412+
return strchr(getAttributesString(ID), 'G') != nullptr;
287413
}
288414

289415
private:
290-
const Info &getRecord(unsigned ID) const;
416+
std::pair<const llvm::StringTable &, const Info &>
417+
getStrTableAndInfo(unsigned ID) const;
418+
419+
const Info &getInfo(unsigned ID) const {
420+
return getStrTableAndInfo(ID).second;
421+
}
291422

292423
/// Helper function for isPrintfLike and isScanfLike.
293424
bool isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg,

clang/include/clang/Basic/BuiltinsPPC.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,5 +1138,6 @@ UNALIASED_CUSTOM_BUILTIN(mma_pmxvbf16ger2nn, "vW512*VVi15i15i3", true,
11381138
// FIXME: Obviously incomplete.
11391139

11401140
#undef BUILTIN
1141+
#undef TARGET_BUILTIN
11411142
#undef CUSTOM_BUILTIN
11421143
#undef UNALIASED_CUSTOM_BUILTIN

0 commit comments

Comments
 (0)