Skip to content

Commit 0003846

Browse files
committed
[DSE] Delay deleting non-memory-defs until end of DSE. (llvm#83411)
DSE uses BatchAA, which caches queries using pairs of MemoryLocations. At the moment, DSE may remove instructions that are used as pointers in cached MemoryLocations. If a new instruction used by a new MemoryLoation and this instruction gets allocated at the same address as a previosuly cached and then removed instruction, we may access an incorrect entry in the cache. To avoid this delay removing all instructions except MemoryDefs until the end of DSE. This should avoid removing any values used in BatchAA's cache. Test case by @vporpo from llvm#83181. (Test not precommitted because the results are non-determinstic - memset only sometimes gets removed) PR: llvm#83411
1 parent 0c7823c commit 0003846

File tree

2 files changed

+215
-5
lines changed

2 files changed

+215
-5
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,9 @@ struct DSEState {
867867
// no longer be captured.
868868
bool ShouldIterateEndOfFunctionDSE;
869869

870+
/// Dead instructions to be removed at the end of DSE.
871+
SmallVector<Instruction *> ToRemove;
872+
870873
// Class contains self-reference, make sure it's not copied/moved.
871874
DSEState(const DSEState &) = delete;
872875
DSEState &operator=(const DSEState &) = delete;
@@ -1697,7 +1700,8 @@ struct DSEState {
16971700
return {MaybeDeadAccess};
16981701
}
16991702

1700-
// Delete dead memory defs
1703+
/// Delete dead memory defs and recursively add their operands to ToRemove if
1704+
/// they became dead.
17011705
void deleteDeadInstruction(Instruction *SI) {
17021706
MemorySSAUpdater Updater(&MSSA);
17031707
SmallVector<Instruction *, 32> NowDeadInsts;
@@ -1713,8 +1717,11 @@ struct DSEState {
17131717
salvageKnowledge(DeadInst);
17141718

17151719
// Remove the Instruction from MSSA.
1716-
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1717-
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1720+
MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1721+
bool IsMemDef = MA && isa<MemoryDef>(MA);
1722+
if (MA) {
1723+
if (IsMemDef) {
1724+
auto *MD = cast<MemoryDef>(MA);
17181725
SkipStores.insert(MD);
17191726
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
17201727
if (SI->getValueOperand()->getType()->isPointerTy()) {
@@ -1735,13 +1742,21 @@ struct DSEState {
17351742
// Remove its operands
17361743
for (Use &O : DeadInst->operands())
17371744
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1738-
O = nullptr;
1745+
O.set(PoisonValue::get(O->getType()));
17391746
if (isInstructionTriviallyDead(OpI, &TLI))
17401747
NowDeadInsts.push_back(OpI);
17411748
}
17421749

17431750
EI.removeInstruction(DeadInst);
1744-
DeadInst->eraseFromParent();
1751+
// Remove memory defs directly if they don't produce results, but only
1752+
// queue other dead instructions for later removal. They may have been
1753+
// used as memory locations that have been cached by BatchAA. Removing
1754+
// them here may lead to newly created instructions to be allocated at the
1755+
// same address, yielding stale cache entries.
1756+
if (IsMemDef && DeadInst->getType()->isVoidTy())
1757+
DeadInst->eraseFromParent();
1758+
else
1759+
ToRemove.push_back(DeadInst);
17451760
}
17461761
}
17471762

@@ -2237,6 +2252,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
22372252

22382253
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
22392254
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2255+
2256+
while (!State.ToRemove.empty()) {
2257+
Instruction *DeadInst = State.ToRemove.pop_back_val();
2258+
DeadInst->eraseFromParent();
2259+
}
2260+
22402261
return MadeChange;
22412262
}
22422263
} // end anonymous namespace
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -passes=dse < %s | FileCheck %s
3+
;
4+
; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill
5+
; `call void @llvm.memset.p0.i64(...)` because it has a clobber read:
6+
; `%ret = load ptr, ptr %struct.byte.8`
7+
8+
9+
%struct.type = type { ptr, ptr }
10+
11+
define ptr @foo(ptr noundef %ptr) {
12+
; CHECK-LABEL: define ptr @foo(
13+
; CHECK-SAME: ptr noundef [[PTR:%.*]]) {
14+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
15+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]]
16+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
17+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
18+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
19+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
20+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
21+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
22+
; CHECK-NEXT: ret ptr [[RET]]
23+
;
24+
%struct.alloca = alloca %struct.type, align 8
25+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
26+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
27+
; Set %struct.alloca[8, 16) to 42.
28+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
29+
; Set %struct.alloca[8, 12) to 43.
30+
store i32 43, ptr %struct.byte.8, align 4
31+
; Set %struct.alloca[4, 8) to 44.
32+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
33+
store i32 44, ptr %struct.byte.4, align 4
34+
; Return %struct.alloca[8, 16).
35+
%ret = load ptr, ptr %struct.byte.8
36+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
37+
ret ptr %ret
38+
}
39+
40+
; Set of tests based on @foo, but where the memset's operands cannot be erased
41+
; due to other uses. Instead, they contain a number of removable MemoryDefs;
42+
; with non-void types result types.
43+
44+
define ptr @foo_with_removable_malloc() {
45+
; CHECK-LABEL: define ptr @foo_with_removable_malloc() {
46+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
47+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
48+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
49+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
50+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
51+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
52+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
53+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
54+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
55+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
56+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
57+
; CHECK-NEXT: ret ptr [[RET]]
58+
;
59+
%struct.alloca = alloca %struct.type, align 8
60+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
61+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
62+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
63+
64+
; Set of removable memory deffs
65+
%m2 = tail call ptr @malloc(i64 4)
66+
%m1 = tail call ptr @malloc(i64 4)
67+
store i32 0, ptr %struct.byte.8
68+
store i32 0, ptr %struct.byte.8
69+
store i32 123, ptr %m1
70+
store i32 123, ptr %m2
71+
72+
; Set %struct.alloca[8, 16) to 42.
73+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
74+
; Set %struct.alloca[8, 12) to 43.
75+
store i32 43, ptr %struct.byte.8, align 4
76+
; Set %struct.alloca[4, 8) to 44.
77+
store i32 44, ptr %struct.byte.4, align 4
78+
; Return %struct.alloca[8, 16).
79+
%ret = load ptr, ptr %struct.byte.8
80+
call void @readnone(ptr %struct.byte.4);
81+
call void @readnone(ptr %struct.byte.8);
82+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
83+
ret ptr %ret
84+
}
85+
86+
define ptr @foo_with_removable_malloc_free() {
87+
; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() {
88+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
89+
; CHECK-NEXT: [[M1:%.*]] = tail call ptr @malloc(i64 4)
90+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
91+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
92+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
93+
; CHECK-NEXT: [[M2:%.*]] = tail call ptr @malloc(i64 4)
94+
; CHECK-NEXT: call void @free(ptr [[M1]])
95+
; CHECK-NEXT: call void @free(ptr [[M2]])
96+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
97+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
98+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
99+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
100+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
101+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
102+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
103+
; CHECK-NEXT: ret ptr [[RET]]
104+
;
105+
%struct.alloca = alloca %struct.type, align 8
106+
%m1 = tail call ptr @malloc(i64 4)
107+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
108+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
109+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
110+
111+
store i32 0, ptr %struct.byte.4
112+
store i32 0, ptr %struct.byte.8
113+
%m2 = tail call ptr @malloc(i64 4)
114+
store i32 123, ptr %m1
115+
call void @free(ptr %m1);
116+
store i32 123, ptr %m2
117+
call void @free(ptr %m2);
118+
119+
; Set %struct.alloca[8, 16) to 42.
120+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
121+
; Set %struct.alloca[8, 12) to 43.
122+
store i32 43, ptr %struct.byte.8, align 4
123+
; Set %struct.alloca[4, 8) to 44.
124+
store i32 44, ptr %struct.byte.4, align 4
125+
; Return %struct.alloca[8, 16).
126+
%ret = load ptr, ptr %struct.byte.8
127+
call void @readnone(ptr %struct.byte.4);
128+
call void @readnone(ptr %struct.byte.8);
129+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
130+
ret ptr %ret
131+
}
132+
133+
define ptr @foo_with_malloc_to_calloc() {
134+
; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() {
135+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
136+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
137+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
138+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
139+
; CHECK-NEXT: [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4)
140+
; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
141+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
142+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
143+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
144+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
145+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
146+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
147+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
148+
; CHECK-NEXT: call void @use(ptr [[CALLOC1]])
149+
; CHECK-NEXT: call void @use(ptr [[CALLOC]])
150+
; CHECK-NEXT: ret ptr [[RET]]
151+
;
152+
%struct.alloca = alloca %struct.type, align 8
153+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
154+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
155+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
156+
157+
; Set of removable memory deffs
158+
%m1 = tail call ptr @malloc(i64 4)
159+
%m2 = tail call ptr @malloc(i64 4)
160+
store i32 0, ptr %struct.byte.4
161+
store i32 0, ptr %struct.byte.8
162+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false)
163+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false)
164+
165+
; Set %struct.alloca[8, 16) to 42.
166+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
167+
; Set %struct.alloca[8, 12) to 43.
168+
store i32 43, ptr %struct.byte.8, align 4
169+
; Set %struct.alloca[4, 8) to 44.
170+
store i32 44, ptr %struct.byte.4, align 4
171+
; Return %struct.alloca[8, 16).
172+
%ret = load ptr, ptr %struct.byte.8
173+
call void @readnone(ptr %struct.byte.4);
174+
call void @readnone(ptr %struct.byte.8);
175+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
176+
call void @use(ptr %m1)
177+
call void @use(ptr %m2)
178+
ret ptr %ret
179+
}
180+
181+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
182+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
183+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
184+
185+
declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
186+
declare void @readnone(ptr) readnone nounwind
187+
declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc"
188+
189+
declare void @use(ptr)

0 commit comments

Comments
 (0)