Skip to content

Commit 149efb8

Browse files
committed
[InstCombine] Canonicalize more geps with constant gep bases and constant offsets.
This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep instruction, so that the constant can hopefully be combined together (or the offset can be computed in common).
1 parent b3e0bd3 commit 149efb8

File tree

3 files changed

+50
-23
lines changed

3 files changed

+50
-23
lines changed

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2731,6 +2731,36 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
27312731
return nullptr;
27322732
}
27332733

2734+
/// Return true if we should canonicalize the gep to an i8 ptradd.
2735+
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
2736+
Value *PtrOp = GEP.getOperand(0);
2737+
Type *GEPEltType = GEP.getSourceElementType();
2738+
if (GEPEltType->isIntegerTy(8))
2739+
return false;
2740+
2741+
// Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2742+
// intrinsic. This has better support in BasicAA.
2743+
if (GEPEltType->isScalableTy())
2744+
return true;
2745+
2746+
// gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
2747+
// together.
2748+
if (GEP.getNumIndices() == 1 &&
2749+
match(GEP.getOperand(1),
2750+
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2751+
m_Shl(m_Value(), m_ConstantInt())))))
2752+
return true;
2753+
2754+
// gep (gep %p, C1), %x, C2 is expanded so the two constants can
2755+
// possibly be merged together.
2756+
auto PtrOpGep = dyn_cast<GEPOperator>(PtrOp);
2757+
return PtrOpGep && PtrOpGep->hasAllConstantIndices() &&
2758+
any_of(GEP.indices(), [](Value *V) {
2759+
const APInt *C;
2760+
return match(V, m_APInt(C)) && !C->isZero();
2761+
});
2762+
}
2763+
27342764
Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
27352765
Value *PtrOp = GEP.getOperand(0);
27362766
SmallVector<Value *, 8> Indices(GEP.indices());
@@ -2812,19 +2842,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
28122842
GEP.getNoWrapFlags()));
28132843
}
28142844

2815-
// Canonicalize
2816-
// - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
2817-
// This has better support in BasicAA.
2818-
// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
2819-
// multiplies together.
2820-
if (GEPEltType->isScalableTy() ||
2821-
(!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
2822-
match(GEP.getOperand(1),
2823-
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2824-
m_Shl(m_Value(), m_ConstantInt())))))) {
2845+
if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
28252846
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
2826-
return replaceInstUsesWith(
2827-
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
2847+
Value *NewGEP =
2848+
Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
2849+
return replaceInstUsesWith(GEP, NewGEP);
28282850
}
28292851

28302852
// Check to see if the inputs to the PHI node are getelementptr instructions.

llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ define ptr @x12(i64 %x) {
77
; CHECK-LABEL: define ptr @x12(
88
; CHECK-SAME: i64 [[X:%.*]]) {
99
; CHECK-NEXT: entry:
10-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
10+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
11+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
1112
; CHECK-NEXT: ret ptr [[GEP]]
1213
;
1314
entry:
@@ -19,7 +20,10 @@ define ptr @x1y(i64 %x, i64 %y) {
1920
; CHECK-LABEL: define ptr @x1y(
2021
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
2122
; CHECK-NEXT: entry:
22-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
23+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
24+
; CHECK-NEXT: [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
25+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
26+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
2327
; CHECK-NEXT: ret ptr [[GEP]]
2428
;
2529
entry:
@@ -55,8 +59,10 @@ define i32 @twoloads(i64 %x) {
5559
; CHECK-LABEL: define i32 @twoloads(
5660
; CHECK-SAME: i64 [[X:%.*]]) {
5761
; CHECK-NEXT: entry:
58-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
59-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
62+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
63+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
64+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
65+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
6066
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[GEP1]], align 4
6167
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[GEP2]], align 4
6268
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]

llvm/test/Transforms/InstCombine/gep-merge-constant-indices.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,26 +140,25 @@ define ptr @notDivisible(ptr %p) {
140140
ret ptr %2
141141
}
142142

143-
; Negative test. Two GEP should not be merged if not both offsets are constant
144-
; or divisible by the other's size.
145143
define ptr @partialConstant2(ptr %p, i64 %a) {
146144
; CHECK-LABEL: @partialConstant2(
147-
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
148-
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[A:%.*]], i64 2
145+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nsw i64 [[A:%.*]], 5
146+
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 20
147+
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 [[DOTIDX]]
149148
; CHECK-NEXT: ret ptr [[TMP2]]
150149
;
151150
%1 = getelementptr inbounds i32, ptr %p, i64 1
152151
%2 = getelementptr inbounds [4 x i64], ptr %1, i64 %a, i64 2
153152
ret ptr %2
154153
}
155154

156-
; Negative test. Two GEP should not be merged if there is another use of the
157-
; first GEP by the second GEP.
158155
define ptr @partialConstant3(ptr %p) {
159156
; CHECK-LABEL: @partialConstant3(
160157
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P:%.*]], i64 4
161158
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
162-
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [4 x i64], ptr [[TMP1]], i64 [[TMP2]], i64 2
159+
; CHECK-NEXT: [[DOTIDX:%.*]] = shl nsw i64 [[TMP2]], 5
160+
; CHECK-NEXT: [[DOTOFFS:%.*]] = or disjoint i64 [[DOTIDX]], 16
161+
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[DOTOFFS]]
163162
; CHECK-NEXT: ret ptr [[TMP3]]
164163
;
165164
%1 = getelementptr inbounds i32, ptr %p, i64 1

0 commit comments

Comments
 (0)