Skip to content

Commit 46f35c9

Browse files
authored
Merge pull request #8323 from fhahn/dse-fix
[DSE] Delay deleting non-memory-defs until end of DSE. (llvm#83411)
2 parents 3eedd76 + 0003846 commit 46f35c9

File tree

2 files changed

+215
-5
lines changed

2 files changed

+215
-5
lines changed

llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -867,6 +867,9 @@ struct DSEState {
867867
// no longer be captured.
868868
bool ShouldIterateEndOfFunctionDSE;
869869

870+
/// Dead instructions to be removed at the end of DSE.
871+
SmallVector<Instruction *> ToRemove;
872+
870873
// Class contains self-reference, make sure it's not copied/moved.
871874
DSEState(const DSEState &) = delete;
872875
DSEState &operator=(const DSEState &) = delete;
@@ -1697,7 +1700,8 @@ struct DSEState {
16971700
return {MaybeDeadAccess};
16981701
}
16991702

1700-
// Delete dead memory defs
1703+
/// Delete dead memory defs and recursively add their operands to ToRemove if
1704+
/// they became dead.
17011705
void deleteDeadInstruction(Instruction *SI) {
17021706
MemorySSAUpdater Updater(&MSSA);
17031707
SmallVector<Instruction *, 32> NowDeadInsts;
@@ -1713,8 +1717,11 @@ struct DSEState {
17131717
salvageKnowledge(DeadInst);
17141718

17151719
// Remove the Instruction from MSSA.
1716-
if (MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst)) {
1717-
if (MemoryDef *MD = dyn_cast<MemoryDef>(MA)) {
1720+
MemoryAccess *MA = MSSA.getMemoryAccess(DeadInst);
1721+
bool IsMemDef = MA && isa<MemoryDef>(MA);
1722+
if (MA) {
1723+
if (IsMemDef) {
1724+
auto *MD = cast<MemoryDef>(MA);
17181725
SkipStores.insert(MD);
17191726
if (auto *SI = dyn_cast<StoreInst>(MD->getMemoryInst())) {
17201727
if (SI->getValueOperand()->getType()->isPointerTy()) {
@@ -1735,13 +1742,21 @@ struct DSEState {
17351742
// Remove its operands
17361743
for (Use &O : DeadInst->operands())
17371744
if (Instruction *OpI = dyn_cast<Instruction>(O)) {
1738-
O = nullptr;
1745+
O.set(PoisonValue::get(O->getType()));
17391746
if (isInstructionTriviallyDead(OpI, &TLI))
17401747
NowDeadInsts.push_back(OpI);
17411748
}
17421749

17431750
EI.removeInstruction(DeadInst);
1744-
DeadInst->eraseFromParent();
1751+
// Remove memory defs directly if they don't produce results, but only
1752+
// queue other dead instructions for later removal. They may have been
1753+
// used as memory locations that have been cached by BatchAA. Removing
1754+
// them here may lead to newly created instructions to be allocated at the
1755+
// same address, yielding stale cache entries.
1756+
if (IsMemDef && DeadInst->getType()->isVoidTy())
1757+
DeadInst->eraseFromParent();
1758+
else
1759+
ToRemove.push_back(DeadInst);
17451760
}
17461761
}
17471762

@@ -2237,6 +2252,12 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis &AA, MemorySSA &MSSA,
22372252

22382253
MadeChange |= State.eliminateRedundantStoresOfExistingValues();
22392254
MadeChange |= State.eliminateDeadWritesAtEndOfFunction();
2255+
2256+
while (!State.ToRemove.empty()) {
2257+
Instruction *DeadInst = State.ToRemove.pop_back_val();
2258+
DeadInst->eraseFromParent();
2259+
}
2260+
22402261
return MadeChange;
22412262
}
22422263
} // end anonymous namespace
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -S -passes=dse < %s | FileCheck %s
3+
;
4+
; DSE kills `store i32 44, ptr %struct.byte.4, align 4` but should not kill
5+
; `call void @llvm.memset.p0.i64(...)` because it has a clobber read:
6+
; `%ret = load ptr, ptr %struct.byte.8`
7+
8+
9+
%struct.type = type { ptr, ptr }
10+
11+
define ptr @foo(ptr noundef %ptr) {
12+
; CHECK-LABEL: define ptr @foo(
13+
; CHECK-SAME: ptr noundef [[PTR:%.*]]) {
14+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
15+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6:[0-9]+]]
16+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
17+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
18+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
19+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
20+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
21+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
22+
; CHECK-NEXT: ret ptr [[RET]]
23+
;
24+
%struct.alloca = alloca %struct.type, align 8
25+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
26+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
27+
; Set %struct.alloca[8, 16) to 42.
28+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
29+
; Set %struct.alloca[8, 12) to 43.
30+
store i32 43, ptr %struct.byte.8, align 4
31+
; Set %struct.alloca[4, 8) to 44.
32+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
33+
store i32 44, ptr %struct.byte.4, align 4
34+
; Return %struct.alloca[8, 16).
35+
%ret = load ptr, ptr %struct.byte.8
36+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
37+
ret ptr %ret
38+
}
39+
40+
; Set of tests based on @foo, but where the memset's operands cannot be erased
41+
; due to other uses. Instead, they contain a number of removable MemoryDefs;
42+
; with non-void types result types.
43+
44+
define ptr @foo_with_removable_malloc() {
45+
; CHECK-LABEL: define ptr @foo_with_removable_malloc() {
46+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
47+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
48+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
49+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
50+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
51+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
52+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
53+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
54+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
55+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
56+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
57+
; CHECK-NEXT: ret ptr [[RET]]
58+
;
59+
%struct.alloca = alloca %struct.type, align 8
60+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
61+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
62+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
63+
64+
; Set of removable memory deffs
65+
%m2 = tail call ptr @malloc(i64 4)
66+
%m1 = tail call ptr @malloc(i64 4)
67+
store i32 0, ptr %struct.byte.8
68+
store i32 0, ptr %struct.byte.8
69+
store i32 123, ptr %m1
70+
store i32 123, ptr %m2
71+
72+
; Set %struct.alloca[8, 16) to 42.
73+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
74+
; Set %struct.alloca[8, 12) to 43.
75+
store i32 43, ptr %struct.byte.8, align 4
76+
; Set %struct.alloca[4, 8) to 44.
77+
store i32 44, ptr %struct.byte.4, align 4
78+
; Return %struct.alloca[8, 16).
79+
%ret = load ptr, ptr %struct.byte.8
80+
call void @readnone(ptr %struct.byte.4);
81+
call void @readnone(ptr %struct.byte.8);
82+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
83+
ret ptr %ret
84+
}
85+
86+
define ptr @foo_with_removable_malloc_free() {
87+
; CHECK-LABEL: define ptr @foo_with_removable_malloc_free() {
88+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
89+
; CHECK-NEXT: [[M1:%.*]] = tail call ptr @malloc(i64 4)
90+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
91+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
92+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
93+
; CHECK-NEXT: [[M2:%.*]] = tail call ptr @malloc(i64 4)
94+
; CHECK-NEXT: call void @free(ptr [[M1]])
95+
; CHECK-NEXT: call void @free(ptr [[M2]])
96+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
97+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
98+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
99+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
100+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
101+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
102+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
103+
; CHECK-NEXT: ret ptr [[RET]]
104+
;
105+
%struct.alloca = alloca %struct.type, align 8
106+
%m1 = tail call ptr @malloc(i64 4)
107+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
108+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
109+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
110+
111+
store i32 0, ptr %struct.byte.4
112+
store i32 0, ptr %struct.byte.8
113+
%m2 = tail call ptr @malloc(i64 4)
114+
store i32 123, ptr %m1
115+
call void @free(ptr %m1);
116+
store i32 123, ptr %m2
117+
call void @free(ptr %m2);
118+
119+
; Set %struct.alloca[8, 16) to 42.
120+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
121+
; Set %struct.alloca[8, 12) to 43.
122+
store i32 43, ptr %struct.byte.8, align 4
123+
; Set %struct.alloca[4, 8) to 44.
124+
store i32 44, ptr %struct.byte.4, align 4
125+
; Return %struct.alloca[8, 16).
126+
%ret = load ptr, ptr %struct.byte.8
127+
call void @readnone(ptr %struct.byte.4);
128+
call void @readnone(ptr %struct.byte.8);
129+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
130+
ret ptr %ret
131+
}
132+
133+
define ptr @foo_with_malloc_to_calloc() {
134+
; CHECK-LABEL: define ptr @foo_with_malloc_to_calloc() {
135+
; CHECK-NEXT: [[STRUCT_ALLOCA:%.*]] = alloca [[STRUCT_TYPE:%.*]], align 8
136+
; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
137+
; CHECK-NEXT: [[STRUCT_BYTE_8:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 8
138+
; CHECK-NEXT: [[STRUCT_BYTE_4:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_ALLOCA]], i64 4
139+
; CHECK-NEXT: [[CALLOC1:%.*]] = call ptr @calloc(i64 1, i64 4)
140+
; CHECK-NEXT: [[CALLOC:%.*]] = call ptr @calloc(i64 1, i64 4)
141+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[STRUCT_BYTE_8]], i64 4
142+
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 [[TMP1]], i8 42, i64 4, i1 false)
143+
; CHECK-NEXT: store i32 43, ptr [[STRUCT_BYTE_8]], align 4
144+
; CHECK-NEXT: [[RET:%.*]] = load ptr, ptr [[STRUCT_BYTE_8]], align 8
145+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_4]])
146+
; CHECK-NEXT: call void @readnone(ptr [[STRUCT_BYTE_8]])
147+
; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 56, ptr nonnull [[STRUCT_ALLOCA]]) #[[ATTR6]]
148+
; CHECK-NEXT: call void @use(ptr [[CALLOC1]])
149+
; CHECK-NEXT: call void @use(ptr [[CALLOC]])
150+
; CHECK-NEXT: ret ptr [[RET]]
151+
;
152+
%struct.alloca = alloca %struct.type, align 8
153+
call void @llvm.lifetime.start.p0(i64 56, ptr nonnull %struct.alloca) nounwind
154+
%struct.byte.8 = getelementptr inbounds i8, ptr %struct.alloca, i64 8
155+
%struct.byte.4 = getelementptr inbounds i8, ptr %struct.alloca, i64 4
156+
157+
; Set of removable memory deffs
158+
%m1 = tail call ptr @malloc(i64 4)
159+
%m2 = tail call ptr @malloc(i64 4)
160+
store i32 0, ptr %struct.byte.4
161+
store i32 0, ptr %struct.byte.8
162+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m2, i8 0, i64 4, i1 false)
163+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %m1, i8 0, i64 4, i1 false)
164+
165+
; Set %struct.alloca[8, 16) to 42.
166+
call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 %struct.byte.8, i8 42, i64 8, i1 false)
167+
; Set %struct.alloca[8, 12) to 43.
168+
store i32 43, ptr %struct.byte.8, align 4
169+
; Set %struct.alloca[4, 8) to 44.
170+
store i32 44, ptr %struct.byte.4, align 4
171+
; Return %struct.alloca[8, 16).
172+
%ret = load ptr, ptr %struct.byte.8
173+
call void @readnone(ptr %struct.byte.4);
174+
call void @readnone(ptr %struct.byte.8);
175+
call void @llvm.lifetime.end.p0(i64 56, ptr nonnull %struct.alloca) nounwind
176+
call void @use(ptr %m1)
177+
call void @use(ptr %m2)
178+
ret ptr %ret
179+
}
180+
181+
declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg)
182+
declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
183+
declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
184+
185+
declare noalias ptr @malloc(i64) willreturn allockind("alloc,uninitialized") "alloc-family"="malloc"
186+
declare void @readnone(ptr) readnone nounwind
187+
declare void @free(ptr nocapture) allockind("free") "alloc-family"="malloc"
188+
189+
declare void @use(ptr)

0 commit comments

Comments
 (0)