Skip to content

Commit 8d20eac

Browse files
committed
[InstCombine] Canonicalize more geps with constant gep bases and constant offsets.
This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep instruction, so that the constant can hopefully be combined together (or the offset can be computed in common).
1 parent edd4566 commit 8d20eac

File tree

3 files changed

+45
-17
lines changed

3 files changed

+45
-17
lines changed

llvm/lib/Transforms/InstCombine/InstCombineInternal.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
238238
std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>>
239239
convertOrOfShiftsToFunnelShift(Instruction &Or);
240240

241+
Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false);
242+
241243
private:
242244
bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
243245
bool isDesirableIntType(unsigned BitWidth) const;
@@ -374,7 +376,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
374376
}
375377
}
376378

377-
Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false);
378379
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
379380
Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
380381
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2736,6 +2736,35 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
27362736
return nullptr;
27372737
}
27382738

2739+
// Return true if we should canonicalize the gep to a i8 ptradd.
2740+
static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
2741+
Value *PtrOp = GEP.getOperand(0);
2742+
Type *GEPEltType = GEP.getSourceElementType();
2743+
if (GEPEltType->isIntegerTy(8))
2744+
return false;
2745+
2746+
// Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
2747+
// intrinsic. This has better support in BasicAA.
2748+
if (GEPEltType->isScalableTy())
2749+
return true;
2750+
2751+
// gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
2752+
// together.
2753+
if (GEP.getNumIndices() == 1 &&
2754+
match(GEP.getOperand(1),
2755+
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2756+
m_Shl(m_Value(), m_ConstantInt())))))
2757+
return true;
2758+
2759+
// gep (gep @global, C1), %x, C2 is expanded so the two constants can
2760+
// possibly be merged together.
2761+
return isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) &&
2762+
any_of(GEP.indices(), [](Value *V) {
2763+
const APInt *C;
2764+
return match(V, m_APInt(C)) && !C->isZero();
2765+
});
2766+
}
2767+
27392768
Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
27402769
Value *PtrOp = GEP.getOperand(0);
27412770
SmallVector<Value *, 8> Indices(GEP.indices());
@@ -2817,19 +2846,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
28172846
GEP.getNoWrapFlags()));
28182847
}
28192848

2820-
// Canonicalize
2821-
// - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
2822-
// This has better support in BasicAA.
2823-
// - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
2824-
// multiplies together.
2825-
if (GEPEltType->isScalableTy() ||
2826-
(!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
2827-
match(GEP.getOperand(1),
2828-
m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
2829-
m_Shl(m_Value(), m_ConstantInt())))))) {
2849+
if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
28302850
Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
2831-
return replaceInstUsesWith(
2832-
GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
2851+
Value *NewGEP =
2852+
Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
2853+
return replaceInstUsesWith(GEP, NewGEP);
28332854
}
28342855

28352856
// Check to see if the inputs to the PHI node are getelementptr instructions.

llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@ define ptr @x12(i64 %x) {
77
; CHECK-LABEL: define ptr @x12(
88
; CHECK-SAME: i64 [[X:%.*]]) {
99
; CHECK-NEXT: entry:
10-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
10+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
11+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
1112
; CHECK-NEXT: ret ptr [[GEP]]
1213
;
1314
entry:
@@ -19,7 +20,10 @@ define ptr @x1y(i64 %x, i64 %y) {
1920
; CHECK-LABEL: define ptr @x1y(
2021
; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
2122
; CHECK-NEXT: entry:
22-
; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
23+
; CHECK-NEXT: [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
24+
; CHECK-NEXT: [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
25+
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
26+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
2327
; CHECK-NEXT: ret ptr [[GEP]]
2428
;
2529
entry:
@@ -55,8 +59,10 @@ define i32 @twoloads(i64 %x) {
5559
; CHECK-LABEL: define i32 @twoloads(
5660
; CHECK-SAME: i64 [[X:%.*]]) {
5761
; CHECK-NEXT: entry:
58-
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
59-
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
62+
; CHECK-NEXT: [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
63+
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
64+
; CHECK-NEXT: [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
65+
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
6066
; CHECK-NEXT: [[A:%.*]] = load i32, ptr [[GEP1]], align 4
6167
; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[GEP2]], align 4
6268
; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]]

0 commit comments

Comments
 (0)