Skip to content

Commit 5a1d850

Browse files
authored
[InstCombine] Canonicalize gep T, (gep i8, base, C1), (Index + C2) into gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index (#76177)
This patch tries to canonicalize `gep T, (gep i8, base, C1), (Index + C2)` into `gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index`. Alive2: https://alive2.llvm.org/ce/z/dxShKF Fixes regressions found in #68882.
1 parent f5953f4 commit 5a1d850

File tree

2 files changed

+332
-0
lines changed

2 files changed

+332
-0
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,6 +2339,43 @@ static Instruction *foldSelectGEP(GetElementPtrInst &GEP,
23392339
return SelectInst::Create(Cond, NewTrueC, NewFalseC, "", nullptr, Sel);
23402340
}
23412341

2342+
// Canonicalization:
2343+
// gep T, (gep i8, base, C1), (Index + C2) into
2344+
// gep T, (gep i8, base, C1 + C2 * sizeof(T)), Index
2345+
static Instruction *canonicalizeGEPOfConstGEPI8(GetElementPtrInst &GEP,
2346+
GEPOperator *Src,
2347+
InstCombinerImpl &IC) {
2348+
if (GEP.getNumIndices() != 1)
2349+
return nullptr;
2350+
auto &DL = IC.getDataLayout();
2351+
Value *Base;
2352+
const APInt *C1;
2353+
if (!match(Src, m_PtrAdd(m_Value(Base), m_APInt(C1))))
2354+
return nullptr;
2355+
Value *VarIndex;
2356+
const APInt *C2;
2357+
Type *PtrTy = Src->getType()->getScalarType();
2358+
unsigned IndexSizeInBits = DL.getIndexTypeSizeInBits(PtrTy);
2359+
if (!match(GEP.getOperand(1), m_AddLike(m_Value(VarIndex), m_APInt(C2))))
2360+
return nullptr;
2361+
if (C1->getBitWidth() != IndexSizeInBits ||
2362+
C2->getBitWidth() != IndexSizeInBits)
2363+
return nullptr;
2364+
Type *BaseType = GEP.getSourceElementType();
2365+
if (isa<ScalableVectorType>(BaseType))
2366+
return nullptr;
2367+
APInt TypeSize(IndexSizeInBits, DL.getTypeAllocSize(BaseType));
2368+
APInt NewOffset = TypeSize * *C2 + *C1;
2369+
if (NewOffset.isZero() ||
2370+
(Src->hasOneUse() && GEP.getOperand(1)->hasOneUse())) {
2371+
Value *GEPConst =
2372+
IC.Builder.CreatePtrAdd(Base, IC.Builder.getInt(NewOffset));
2373+
return GetElementPtrInst::Create(BaseType, GEPConst, VarIndex);
2374+
}
2375+
2376+
return nullptr;
2377+
}
2378+
23422379
Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
23432380
GEPOperator *Src) {
23442381
// Combine Indices - If the source pointer to this getelementptr instruction
@@ -2347,6 +2384,9 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
23472384
if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src))
23482385
return nullptr;
23492386

2387+
if (auto *I = canonicalizeGEPOfConstGEPI8(GEP, Src, *this))
2388+
return I;
2389+
23502390
// For constant GEPs, use a more general offset-based folding approach.
23512391
Type *PtrTy = Src->getType()->getScalarType();
23522392
if (GEP.hasAllConstantIndices() &&
Lines changed: 292 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,292 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt < %s -S -passes=instcombine | FileCheck %s
3+
4+
declare void @use64(i64)
5+
declare void @useptr(ptr)
6+
7+
define ptr @test_zero(ptr %base, i64 %a) {
8+
; CHECK-LABEL: define ptr @test_zero(
9+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
10+
; CHECK-NEXT: entry:
11+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
12+
; CHECK-NEXT: ret ptr [[P2]]
13+
;
14+
entry:
15+
%p1 = getelementptr i8, ptr %base, i64 -4
16+
%index = add i64 %a, 1
17+
%p2 = getelementptr i32, ptr %p1, i64 %index
18+
ret ptr %p2
19+
}
20+
21+
define ptr @test_nonzero(ptr %base, i64 %a) {
22+
; CHECK-LABEL: define ptr @test_nonzero(
23+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
24+
; CHECK-NEXT: entry:
25+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[BASE]], i64 4
26+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[TMP0]], i64 [[A]]
27+
; CHECK-NEXT: ret ptr [[P2]]
28+
;
29+
entry:
30+
%p1 = getelementptr i8, ptr %base, i64 -4
31+
%index = add i64 %a, 2
32+
%p2 = getelementptr i32, ptr %p1, i64 %index
33+
ret ptr %p2
34+
}
35+
36+
define ptr @test_or_disjoint(ptr %base, i64 %a) {
37+
; CHECK-LABEL: define ptr @test_or_disjoint(
38+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
39+
; CHECK-NEXT: entry:
40+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
41+
; CHECK-NEXT: ret ptr [[P2]]
42+
;
43+
entry:
44+
%p1 = getelementptr i8, ptr %base, i64 -4
45+
%index = or disjoint i64 %a, 1
46+
%p2 = getelementptr i32, ptr %p1, i64 %index
47+
ret ptr %p2
48+
}
49+
50+
define ptr @test_zero_multiuse_index(ptr %base, i64 %a) {
51+
; CHECK-LABEL: define ptr @test_zero_multiuse_index(
52+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
53+
; CHECK-NEXT: entry:
54+
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1
55+
; CHECK-NEXT: call void @use64(i64 [[INDEX]])
56+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
57+
; CHECK-NEXT: ret ptr [[P2]]
58+
;
59+
entry:
60+
%p1 = getelementptr i8, ptr %base, i64 -4
61+
%index = add i64 %a, 1
62+
call void @use64(i64 %index)
63+
%p2 = getelementptr i32, ptr %p1, i64 %index
64+
ret ptr %p2
65+
}
66+
67+
define ptr @test_zero_multiuse_ptr(ptr %base, i64 %a) {
68+
; CHECK-LABEL: define ptr @test_zero_multiuse_ptr(
69+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
70+
; CHECK-NEXT: entry:
71+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
72+
; CHECK-NEXT: call void @useptr(ptr [[P1]])
73+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[A]]
74+
; CHECK-NEXT: ret ptr [[P2]]
75+
;
76+
entry:
77+
%p1 = getelementptr i8, ptr %base, i64 -4
78+
call void @useptr(ptr %p1)
79+
%index = add i64 %a, 1
80+
%p2 = getelementptr i32, ptr %p1, i64 %index
81+
ret ptr %p2
82+
}
83+
84+
define ptr @test_zero_sext_add_nsw(ptr %base, i32 %a) {
85+
; CHECK-LABEL: define ptr @test_zero_sext_add_nsw(
86+
; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
87+
; CHECK-NEXT: entry:
88+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
89+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[A]] to i64
90+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
91+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 4
92+
; CHECK-NEXT: ret ptr [[P2]]
93+
;
94+
entry:
95+
%p1 = getelementptr i8, ptr %base, i64 -4
96+
%index = add nsw i32 %a, 1
97+
%p2 = getelementptr i32, ptr %p1, i32 %index
98+
ret ptr %p2
99+
}
100+
101+
define ptr @test_zero_trunc_add(ptr %base, i128 %a) {
102+
; CHECK-LABEL: define ptr @test_zero_trunc_add(
103+
; CHECK-SAME: ptr [[BASE:%.*]], i128 [[A:%.*]]) {
104+
; CHECK-NEXT: entry:
105+
; CHECK-NEXT: [[TMP0:%.*]] = trunc i128 [[A]] to i64
106+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[BASE]], i64 [[TMP0]]
107+
; CHECK-NEXT: ret ptr [[P2]]
108+
;
109+
entry:
110+
%p1 = getelementptr i8, ptr %base, i64 -4
111+
%index = add i128 %a, 1
112+
%p2 = getelementptr i32, ptr %p1, i128 %index
113+
ret ptr %p2
114+
}
115+
116+
define ptr @test_non_i8(ptr %base, i64 %a) {
117+
; CHECK-LABEL: define ptr @test_non_i8(
118+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
119+
; CHECK-NEXT: entry:
120+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
121+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
122+
; CHECK-NEXT: ret ptr [[TMP0]]
123+
;
124+
entry:
125+
%p1 = getelementptr i16, ptr %base, i64 -4
126+
%index = add i64 %a, 1
127+
%p2 = getelementptr i32, ptr %p1, i64 %index
128+
ret ptr %p2
129+
}
130+
131+
define ptr @test_non_const(ptr %base, i64 %a, i64 %b) {
132+
; CHECK-LABEL: define ptr @test_non_const(
133+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
134+
; CHECK-NEXT: entry:
135+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[B]]
136+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
137+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 4
138+
; CHECK-NEXT: ret ptr [[P2]]
139+
;
140+
entry:
141+
%p1 = getelementptr i8, ptr %base, i64 %b
142+
%index = add i64 %a, 1
143+
%p2 = getelementptr i32, ptr %p1, i64 %index
144+
ret ptr %p2
145+
}
146+
147+
define ptr @test_too_many_indices(ptr %base, i64 %a, i64 %b) {
148+
; CHECK-LABEL: define ptr @test_too_many_indices(
149+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
150+
; CHECK-NEXT: entry:
151+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 [[B]]
152+
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 1
153+
; CHECK-NEXT: [[P2:%.*]] = getelementptr [8 x i32], ptr [[P1]], i64 1, i64 [[INDEX]]
154+
; CHECK-NEXT: ret ptr [[P2]]
155+
;
156+
entry:
157+
%p1 = getelementptr i8, ptr %base, i64 %b
158+
%index = add i64 %a, 1
159+
%p2 = getelementptr [8 x i32], ptr %p1, i64 1, i64 %index
160+
ret ptr %p2
161+
}
162+
163+
define ptr @test_wrong_op(ptr %base, i64 %a) {
164+
; CHECK-LABEL: define ptr @test_wrong_op(
165+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
166+
; CHECK-NEXT: entry:
167+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
168+
; CHECK-NEXT: [[INDEX:%.*]] = xor i64 [[A]], 1
169+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
170+
; CHECK-NEXT: ret ptr [[P2]]
171+
;
172+
entry:
173+
%p1 = getelementptr i8, ptr %base, i64 -4
174+
%index = xor i64 %a, 1
175+
%p2 = getelementptr i32, ptr %p1, i64 %index
176+
ret ptr %p2
177+
}
178+
179+
define ptr @test_sext_add_without_nsw(ptr %base, i32 %a) {
180+
; CHECK-LABEL: define ptr @test_sext_add_without_nsw(
181+
; CHECK-SAME: ptr [[BASE:%.*]], i32 [[A:%.*]]) {
182+
; CHECK-NEXT: entry:
183+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
184+
; CHECK-NEXT: [[INDEX:%.*]] = add i32 [[A]], 1
185+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
186+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[TMP0]]
187+
; CHECK-NEXT: ret ptr [[P2]]
188+
;
189+
entry:
190+
%p1 = getelementptr i8, ptr %base, i64 -4
191+
%index = add i32 %a, 1
192+
%p2 = getelementptr i32, ptr %p1, i32 %index
193+
ret ptr %p2
194+
}
195+
196+
define ptr @test_or_without_disjoint(ptr %base, i64 %a) {
197+
; CHECK-LABEL: define ptr @test_or_without_disjoint(
198+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
199+
; CHECK-NEXT: entry:
200+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
201+
; CHECK-NEXT: [[INDEX:%.*]] = or i64 [[A]], 1
202+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
203+
; CHECK-NEXT: ret ptr [[P2]]
204+
;
205+
entry:
206+
%p1 = getelementptr i8, ptr %base, i64 -4
207+
%index = or i64 %a, 1
208+
%p2 = getelementptr i32, ptr %p1, i64 %index
209+
ret ptr %p2
210+
}
211+
212+
define ptr @test_smul_overflow(ptr %base, i64 %a) {
213+
; CHECK-LABEL: define ptr @test_smul_overflow(
214+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
215+
; CHECK-NEXT: entry:
216+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -12
217+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
218+
; CHECK-NEXT: ret ptr [[TMP0]]
219+
;
220+
entry:
221+
%p1 = getelementptr i8, ptr %base, i64 -4
222+
%index = add i64 %a, 9223372036854775806
223+
%p2 = getelementptr i32, ptr %p1, i64 %index
224+
ret ptr %p2
225+
}
226+
227+
define ptr @test_sadd_overflow(ptr %base, i64 %a) {
228+
; CHECK-LABEL: define ptr @test_sadd_overflow(
229+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
230+
; CHECK-NEXT: entry:
231+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -9223372036854775808
232+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
233+
; CHECK-NEXT: ret ptr [[TMP0]]
234+
;
235+
entry:
236+
%p1 = getelementptr i8, ptr %base, i64 9223372036854775804
237+
%index = add i64 %a, 1
238+
%p2 = getelementptr i32, ptr %p1, i64 %index
239+
ret ptr %p2
240+
}
241+
242+
define ptr @test_nonzero_multiuse_index(ptr %base, i64 %a) {
243+
; CHECK-LABEL: define ptr @test_nonzero_multiuse_index(
244+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
245+
; CHECK-NEXT: entry:
246+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
247+
; CHECK-NEXT: [[INDEX:%.*]] = add i64 [[A]], 2
248+
; CHECK-NEXT: call void @use64(i64 [[INDEX]])
249+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i32, ptr [[P1]], i64 [[INDEX]]
250+
; CHECK-NEXT: ret ptr [[P2]]
251+
;
252+
entry:
253+
%p1 = getelementptr i8, ptr %base, i64 -4
254+
%index = add i64 %a, 2
255+
call void @use64(i64 %index)
256+
%p2 = getelementptr i32, ptr %p1, i64 %index
257+
ret ptr %p2
258+
}
259+
260+
define ptr @test_nonzero_multiuse_ptr(ptr %base, i64 %a) {
261+
; CHECK-LABEL: define ptr @test_nonzero_multiuse_ptr(
262+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
263+
; CHECK-NEXT: entry:
264+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
265+
; CHECK-NEXT: call void @useptr(ptr [[P1]])
266+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P1]], i64 [[A]]
267+
; CHECK-NEXT: [[P2:%.*]] = getelementptr i8, ptr [[TMP0]], i64 8
268+
; CHECK-NEXT: ret ptr [[P2]]
269+
;
270+
entry:
271+
%p1 = getelementptr i8, ptr %base, i64 -4
272+
call void @useptr(ptr %p1)
273+
%index = add i64 %a, 2
274+
%p2 = getelementptr i32, ptr %p1, i64 %index
275+
ret ptr %p2
276+
}
277+
278+
define ptr @test_scalable(ptr %base, i64 %a) {
279+
; CHECK-LABEL: define ptr @test_scalable(
280+
; CHECK-SAME: ptr [[BASE:%.*]], i64 [[A:%.*]]) {
281+
; CHECK-NEXT: entry:
282+
; CHECK-NEXT: [[P1:%.*]] = getelementptr i8, ptr [[BASE]], i64 -4
283+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[P1]], i64 [[A]]
284+
; CHECK-NEXT: [[P2:%.*]] = getelementptr <vscale x 4 x i32>, ptr [[TMP0]], i64 1
285+
; CHECK-NEXT: ret ptr [[P2]]
286+
;
287+
entry:
288+
%p1 = getelementptr i8, ptr %base, i64 -4
289+
%index = add i64 %a, 1
290+
%p2 = getelementptr <vscale x 4 x i32>, ptr %p1, i64 %index
291+
ret ptr %p2
292+
}

0 commit comments

Comments
 (0)