Skip to content

Commit 0f2ccf1

Browse files
Reduce memory usage in AST parent map generation by lazily checking if nodes have been seen
1 parent c017cdf commit 0f2ccf1

File tree

1 file changed

+88
-7
lines changed

1 file changed

+88
-7
lines changed

clang/lib/AST/ParentMapContext.cpp

Lines changed: 88 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,34 @@ class ParentMapContext::ParentMap {
6060

6161
template <typename, typename...> friend struct ::MatchParents;
6262

63+
template <class T> struct IndirectDenseMapInfo {
64+
using Ptr = T *;
65+
using Base = llvm::DenseMapInfo<std::remove_cv_t<T>>;
66+
static T &Verify(T &Val) {
67+
assert(Val.getMemoizationData());
68+
return Val;
69+
}
70+
static inline Ptr getEmptyKey() {
71+
return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getEmptyKey());
72+
}
73+
static inline Ptr getTombstoneKey() {
74+
return static_cast<Ptr>(llvm::DenseMapInfo<void *>::getTombstoneKey());
75+
}
76+
static unsigned getHashValue(Ptr Val) {
77+
return Val == getEmptyKey() || Val == getTombstoneKey()
78+
? 0
79+
: Base::getHashValue(Verify(*Val));
80+
}
81+
static bool isEqual(Ptr LHS, Ptr RHS) {
82+
if (LHS == getEmptyKey() || LHS == getTombstoneKey() ||
83+
RHS == getEmptyKey() || RHS == getTombstoneKey() || LHS == RHS) {
84+
return LHS == RHS;
85+
}
86+
return Base::isEqual(Verify(*LHS), Verify(*RHS));
87+
}
88+
};
89+
using MapInfo = IndirectDenseMapInfo<const DynTypedNode>;
90+
6391
/// Contains parents of a node.
6492
class ParentVector {
6593
public:
@@ -69,17 +97,70 @@ class ParentMapContext::ParentMap {
6997
for (; N > 0; --N)
7098
push_back(Value);
7199
}
72-
bool contains(const DynTypedNode &Value) {
73-
return Seen.contains(Value);
74-
}
100+
bool contains(const DynTypedNode &Value) { return CacheUntil(Value); }
75101
void push_back(const DynTypedNode &Value) {
76-
if (!Value.getMemoizationData() || Seen.insert(Value).second)
102+
if (!Value.getMemoizationData() || !FragileDedupCache.contains(&Value)) {
103+
const size_t OldCapacity = Items.capacity();
77104
Items.push_back(Value);
105+
// Danger: Clear the cache if its pointers might be invalidated.
106+
if (OldCapacity != Items.capacity()) {
107+
ItemsProcessed = 0;
108+
FragileDedupCache.clear();
109+
}
110+
}
111+
112+
// Cache some nodes every time we attempt an insertion.
113+
// This is important to guarantee that our (cheap) cache catches up with
114+
// the (expensive) main vector upon repeated insertions, thus bounding
115+
// element duplication.
116+
// It also amortizes insertions to constant-time, preventing us from
117+
// spending unnecessary time rebuilding the cache when it is invalidated.
118+
CacheUntil(2);
119+
}
120+
llvm::ArrayRef<DynTypedNode> view() {
121+
CacheUntil();
122+
assert(ItemsProcessed == Items.size());
123+
return Items;
78124
}
79-
llvm::ArrayRef<DynTypedNode> view() const { return Items; }
125+
80126
private:
127+
// Places at most `MaxCount` items into the cache, stopping early if the
128+
// node is seen.
129+
bool CacheUntil(size_t MaxCount = std::numeric_limits<size_t>::max(),
130+
const DynTypedNode *OptionalNeedle = nullptr) {
131+
assert(!OptionalNeedle || OptionalNeedle->getMemoizationData());
132+
133+
bool Found = OptionalNeedle && FragileDedupCache.contains(OptionalNeedle);
134+
while (!Found && ItemsProcessed < Items.size() && MaxCount > 0) {
135+
const auto *Item = &Items[ItemsProcessed];
136+
137+
if (Item->getMemoizationData()) {
138+
FragileDedupCache.insert(Item);
139+
}
140+
++ItemsProcessed;
141+
--MaxCount;
142+
143+
assert(!Found);
144+
Found = OptionalNeedle && Item->getMemoizationData() &&
145+
MapInfo::isEqual(OptionalNeedle, Item);
146+
}
147+
148+
return Found;
149+
}
150+
bool CacheUntil(const DynTypedNode &Needle) {
151+
return CacheUntil(std::numeric_limits<size_t>::max(), &Needle);
152+
}
153+
154+
// A partitioned vector of nodes, where the first `ItemsProcessed` elements
155+
// are already processed into the cache, and the remaining have not.
156+
// BE CAREFUL. Pointers into this container are stored in the
157+
// `FragileDedupCache` set below.
81158
llvm::SmallVector<DynTypedNode, 2> Items;
82-
llvm::SmallDenseSet<DynTypedNode, 2> Seen;
159+
// This cache is fragile because it contains pointers that are invalidated
160+
// when the vector capacity changes.
161+
llvm::SmallDenseSet<const DynTypedNode *, 2, MapInfo> FragileDedupCache;
162+
// Lazily tracks which items have been processed for the cache.
163+
size_t ItemsProcessed = 0;
83164
};
84165

85166
/// Maps from a node to its parents. This is used for nodes that have
@@ -117,7 +198,7 @@ class ParentMapContext::ParentMap {
117198
if (I == Map.end()) {
118199
return llvm::ArrayRef<DynTypedNode>();
119200
}
120-
if (const auto *V = dyn_cast<ParentVector *>(I->second)) {
201+
if (auto *V = dyn_cast<ParentVector *>(I->second)) {
121202
return V->view();
122203
}
123204
return getSingleDynTypedNodeFromParentMap(I->second);

0 commit comments

Comments
 (0)