Skip to content

Commit 389f339

Browse files
authored
[TableGen] Rework EmitIntrinsicToBuiltinMap (#104681)
Rework `IntrinsicEmitter::EmitIntrinsicToBuiltinMap` for improved peformance as well as refactor the code. Performance: - Current generated code does a linear search on the TargetPrefix, followed by a binary search on the builtin names for that target's builtins. - Improve the performance of this code in 2 ways: (a) Use binary search on the target prefix to lookup the builtin table for the target. (b) Improve the (common) case of when all builtins for a target share a common prefix. Check this common prefix first, and then do the binary search in the builtin table using the builtin name with the common prefix removed. This should help both data size (by creating a smaller static string table) and runtime (by reducing the cost of binary search on smaller strings). Refactor: - Use range based for loops for iterating over maps. - Use formatv() and C++ raw string literals to simplify the emission code. - Change the generated `getIntrinsicForClangBuiltin` and `getIntrinsicForMSBuiltin` to take a `StringRef` instead of `const char *` for the prefix.
1 parent a3c66c8 commit 389f339

File tree

6 files changed

+281
-68
lines changed

6 files changed

+281
-68
lines changed

llvm/benchmarks/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
set(LLVM_LINK_COMPONENTS
2+
Core
23
Support)
34

45
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
56
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
7+
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#include "benchmark/benchmark.h"
2+
#include "llvm/IR/Intrinsics.h"
3+
4+
using namespace llvm;
5+
using namespace Intrinsic;
6+
7+
// Benchmark intrinsic lookup from a variety of targets.
8+
static void BM_GetIntrinsicForClangBuiltin(benchmark::State &state) {
9+
static const char *Builtins[] = {
10+
"__builtin_adjust_trampoline",
11+
"__builtin_trap",
12+
"__builtin_arm_ttest",
13+
"__builtin_amdgcn_cubetc",
14+
"__builtin_amdgcn_udot2",
15+
"__builtin_arm_stc",
16+
"__builtin_bpf_compare",
17+
"__builtin_HEXAGON_A2_max",
18+
"__builtin_lasx_xvabsd_b",
19+
"__builtin_mips_dlsa",
20+
"__nvvm_floor_f",
21+
"__builtin_altivec_vslb",
22+
"__builtin_r600_read_tgid_x",
23+
"__builtin_riscv_aes64im",
24+
"__builtin_s390_vcksm",
25+
"__builtin_ve_vl_pvfmksge_Mvl",
26+
"__builtin_ia32_axor64",
27+
"__builtin_bitrev",
28+
};
29+
static const char *Targets[] = {"", "aarch64", "amdgcn", "mips",
30+
"nvvm", "r600", "riscv"};
31+
32+
for (auto _ : state) {
33+
for (auto Builtin : Builtins)
34+
for (auto Target : Targets)
35+
getIntrinsicForClangBuiltin(Target, Builtin);
36+
}
37+
}
38+
39+
static void
40+
BM_GetIntrinsicForClangBuiltinHexagonFirst(benchmark::State &state) {
41+
// Exercise the worst case by looking for the first builtin for a target
42+
// that has a lot of builtins.
43+
for (auto _ : state)
44+
getIntrinsicForClangBuiltin("hexagon", "__builtin_HEXAGON_A2_abs");
45+
}
46+
47+
BENCHMARK(BM_GetIntrinsicForClangBuiltin);
48+
BENCHMARK(BM_GetIntrinsicForClangBuiltinHexagonFirst);
49+
50+
BENCHMARK_MAIN();

llvm/include/llvm/IR/Intrinsics.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,10 +100,10 @@ namespace Intrinsic {
100100
StringRef Name);
101101

102102
/// Map a Clang builtin name to an intrinsic ID.
103-
ID getIntrinsicForClangBuiltin(const char *Prefix, StringRef BuiltinName);
103+
ID getIntrinsicForClangBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
104104

105105
/// Map a MS builtin name to an intrinsic ID.
106-
ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
106+
ID getIntrinsicForMSBuiltin(StringRef TargetPrefix, StringRef BuiltinName);
107107

108108
/// Returns true if the intrinsic ID is for one of the "Constrained
109109
/// Floating-Point Intrinsics".

llvm/include/llvm/TableGen/StringToOffsetTable.h

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "llvm/ADT/StringMap.h"
1515
#include "llvm/Support/raw_ostream.h"
1616
#include <cctype>
17+
#include <optional>
1718

1819
namespace llvm {
1920

@@ -26,7 +27,8 @@ class StringToOffsetTable {
2627
std::string AggregateString;
2728

2829
public:
29-
bool Empty() const { return StringOffset.empty(); }
30+
bool empty() const { return StringOffset.empty(); }
31+
size_t size() const { return AggregateString.size(); }
3032

3133
unsigned GetOrAddStringOffset(StringRef Str, bool appendZero = true) {
3234
auto IterBool =
@@ -41,6 +43,15 @@ class StringToOffsetTable {
4143
return IterBool.first->second;
4244
}
4345

46+
// Returns the offset of `Str` in the table if its preset, else return
47+
// std::nullopt.
48+
std::optional<unsigned> GetStringOffset(StringRef Str) const {
49+
auto II = StringOffset.find(Str);
50+
if (II == StringOffset.end())
51+
return std::nullopt;
52+
return II->second;
53+
}
54+
4455
void EmitString(raw_ostream &O) {
4556
// Escape the string.
4657
SmallString<256> Str;

llvm/unittests/IR/IntrinsicsTest.cpp

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,19 @@
1111
#include "llvm/IR/Constant.h"
1212
#include "llvm/IR/IRBuilder.h"
1313
#include "llvm/IR/IntrinsicInst.h"
14+
#include "llvm/IR/IntrinsicsAArch64.h"
15+
#include "llvm/IR/IntrinsicsAMDGPU.h"
16+
#include "llvm/IR/IntrinsicsARM.h"
17+
#include "llvm/IR/IntrinsicsBPF.h"
18+
#include "llvm/IR/IntrinsicsDirectX.h"
19+
#include "llvm/IR/IntrinsicsHexagon.h"
20+
#include "llvm/IR/IntrinsicsLoongArch.h"
21+
#include "llvm/IR/IntrinsicsMips.h"
22+
#include "llvm/IR/IntrinsicsNVPTX.h"
23+
#include "llvm/IR/IntrinsicsPowerPC.h"
24+
#include "llvm/IR/IntrinsicsRISCV.h"
25+
#include "llvm/IR/IntrinsicsS390.h"
26+
#include "llvm/IR/IntrinsicsX86.h"
1427
#include "llvm/IR/Module.h"
1528
#include "gtest/gtest.h"
1629

@@ -68,6 +81,51 @@ TEST(IntrinsicNameLookup, Basic) {
6881
EXPECT_EQ(4, I);
6982
}
7083

84+
// Tests to verify getIntrinsicForClangBuiltin.
85+
TEST(IntrinsicNameLookup, ClangBuiltinLookup) {
86+
using namespace Intrinsic;
87+
static constexpr std::tuple<StringRef, StringRef, ID> ClangTests[] = {
88+
{"__builtin_adjust_trampoline", "", adjust_trampoline},
89+
{"__builtin_trap", "", trap},
90+
{"__builtin_arm_chkfeat", "aarch64", aarch64_chkfeat},
91+
{"__builtin_amdgcn_alignbyte", "amdgcn", amdgcn_alignbyte},
92+
{"__builtin_amdgcn_workgroup_id_z", "amdgcn", amdgcn_workgroup_id_z},
93+
{"__builtin_arm_cdp", "arm", arm_cdp},
94+
{"__builtin_bpf_preserve_type_info", "bpf", bpf_preserve_type_info},
95+
{"__builtin_hlsl_create_handle", "dx", dx_create_handle},
96+
{"__builtin_HEXAGON_A2_tfr", "hexagon", hexagon_A2_tfr},
97+
{"__builtin_lasx_xbz_w", "loongarch", loongarch_lasx_xbz_w},
98+
{"__builtin_mips_bitrev", "mips", mips_bitrev},
99+
{"__nvvm_add_rn_d", "nvvm", nvvm_add_rn_d},
100+
{"__builtin_altivec_dss", "ppc", ppc_altivec_dss},
101+
{"__builtin_riscv_sha512sum1r", "riscv", riscv_sha512sum1r},
102+
{"__builtin_tend", "s390", s390_tend},
103+
{"__builtin_ia32_pause", "x86", x86_sse2_pause},
104+
105+
{"__does_not_exist", "", not_intrinsic},
106+
{"__does_not_exist", "arm", not_intrinsic},
107+
{"__builtin_arm_cdp", "", not_intrinsic},
108+
{"__builtin_arm_cdp", "x86", not_intrinsic},
109+
};
110+
111+
for (const auto &[Builtin, Target, ID] : ClangTests)
112+
EXPECT_EQ(ID, getIntrinsicForClangBuiltin(Target, Builtin));
113+
}
114+
115+
// Tests to verify getIntrinsicForMSBuiltin.
116+
TEST(IntrinsicNameLookup, MSBuiltinLookup) {
117+
using namespace Intrinsic;
118+
static constexpr std::tuple<StringRef, StringRef, ID> MSTests[] = {
119+
{"__dmb", "aarch64", aarch64_dmb},
120+
{"__dmb", "arm", arm_dmb},
121+
{"__dmb", "", not_intrinsic},
122+
{"__does_not_exist", "", not_intrinsic},
123+
{"__does_not_exist", "arm", not_intrinsic},
124+
};
125+
for (const auto &[Builtin, Target, ID] : MSTests)
126+
EXPECT_EQ(ID, getIntrinsicForMSBuiltin(Target, Builtin));
127+
}
128+
71129
TEST_F(IntrinsicsTest, InstrProfInheritance) {
72130
auto isInstrProfInstBase = [](const Instruction &I) {
73131
return isa<InstrProfInstBase>(I);
@@ -106,4 +164,5 @@ TEST_F(IntrinsicsTest, InstrProfInheritance) {
106164
EXPECT_TRUE(Checker(*Intr));
107165
}
108166
}
167+
109168
} // end namespace

0 commit comments

Comments
 (0)