[InstCombine] Canonicalize more geps with constant gep bases and constant offsets.

davemgreen · davemgreen · commit 8d20eac63f0b · 2024-09-27T07:58:50.000+01:00
This is another small but hopefully not performance negative step to canonicalizing towards i8 geps. We looks for geps with a constant offset base pointer of the form `gep (gep @glob, C1), x, C2` and expand the gep instruction, so that the constant can hopefully be combined together (or the offset can be computed in common).
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -238,6 +238,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>>
   convertOrOfShiftsToFunnelShift(Instruction &Or);
 
+  Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false);
+
 private:
   bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);
   bool isDesirableIntType(unsigned BitWidth) const;
@@ -374,7 +376,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
     }
   }
 
-  Value *EmitGEPOffset(GEPOperator *GEP, bool RewriteGEP = false);
   Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
   Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);
   Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2736,6 +2736,35 @@ Value *InstCombiner::getFreelyInvertedImpl(Value *V, bool WillInvertAllUses,
   return nullptr;
 }
 
+// Return true if we should canonicalize the gep to a i8 ptradd.
+static bool shouldCanonicalizeGEPToPtrAdd(GetElementPtrInst &GEP) {
+  Value *PtrOp = GEP.getOperand(0);
+  Type *GEPEltType = GEP.getSourceElementType();
+  if (GEPEltType->isIntegerTy(8))
+    return false;
+
+  // Canonicalize scalable GEPs to an explicit offset using the llvm.vscale
+  // intrinsic. This has better support in BasicAA.
+  if (GEPEltType->isScalableTy())
+    return true;
+
+  // gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two multiplies
+  // together.
+  if (GEP.getNumIndices() == 1 &&
+      match(GEP.getOperand(1),
+            m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
+                                 m_Shl(m_Value(), m_ConstantInt())))))
+    return true;
+
+  // gep (gep @global, C1), %x, C2 is expanded so the two constants can
+  // possibly be merged together.
+  return isa<GEPOperator>(PtrOp) && isa<ConstantExpr>(PtrOp) &&
+         any_of(GEP.indices(), [](Value *V) {
+           const APInt *C;
+           return match(V, m_APInt(C)) && !C->isZero();
+         });
+}
+
 Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   Value *PtrOp = GEP.getOperand(0);
   SmallVector<Value *, 8> Indices(GEP.indices());
@@ -2817,19 +2846,11 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                                     GEP.getNoWrapFlags()));
   }
 
-  // Canonicalize
-  //  - scalable GEPs to an explicit offset using the llvm.vscale intrinsic.
-  //    This has better support in BasicAA.
-  //  - gep i32 p, mul(O, C) -> gep i8, p, mul(O, C*4) to fold the two
-  //    multiplies together.
-  if (GEPEltType->isScalableTy() ||
-      (!GEPEltType->isIntegerTy(8) && GEP.getNumIndices() == 1 &&
-       match(GEP.getOperand(1),
-             m_OneUse(m_CombineOr(m_Mul(m_Value(), m_ConstantInt()),
-                                  m_Shl(m_Value(), m_ConstantInt())))))) {
+  if (shouldCanonicalizeGEPToPtrAdd(GEP)) {
     Value *Offset = EmitGEPOffset(cast<GEPOperator>(&GEP));
-    return replaceInstUsesWith(
-        GEP, Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags()));
+    Value *NewGEP =
+        Builder.CreatePtrAdd(PtrOp, Offset, "", GEP.getNoWrapFlags());
+    return replaceInstUsesWith(GEP, NewGEP);
   }
 
   // Check to see if the inputs to the PHI node are getelementptr instructions.
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -7,7 +7,8 @@ define ptr @x12(i64 %x) {
 ; CHECK-LABEL: define ptr @x12(
 ; CHECK-SAME: i64 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 1, i64 2
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 84), i64 [[GEP_IDX]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
 entry:
@@ -19,7 +20,10 @@ define ptr @x1y(i64 %x, i64 %y) {
 ; CHECK-LABEL: define ptr @x1y(
 ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 [[Y]]
+; CHECK-NEXT:    [[GEP_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP_IDX1:%.*]] = shl nsw i64 [[Y]], 2
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 116), i64 [[GEP_IDX]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[GEP_IDX1]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
 entry:
@@ -55,8 +59,10 @@ define i32 @twoloads(i64 %x) {
 ; CHECK-LABEL: define i32 @twoloads(
 ; CHECK-SAME: i64 [[X:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 50), i64 0, i64 [[X]], i64 2, i64 1
-; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [10 x [10 x [10 x i32]]], ptr getelementptr inbounds (i8, ptr @glob, i64 36), i64 0, i64 [[X]], i64 2, i64 4
+; CHECK-NEXT:    [[GEP1_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 134), i64 [[GEP1_IDX]]
+; CHECK-NEXT:    [[GEP2_IDX:%.*]] = mul nsw i64 [[X]], 400
+; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, ptr getelementptr inbounds (i8, ptr @glob, i64 132), i64 [[GEP2_IDX]]
 ; CHECK-NEXT:    [[A:%.*]] = load i32, ptr [[GEP1]], align 4
 ; CHECK-NEXT:    [[B:%.*]] = load i32, ptr [[GEP2]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]

Original file line number	Diff line number	Diff line change
`@@ -238,6 +238,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final`
`238`	`238`	`std::optional<std::pair<Intrinsic::ID, SmallVector<Value *, 3>>>`
`239`	`239`	`convertOrOfShiftsToFunnelShift(Instruction &Or);`
`240`	`240`
	`241`	`+ Value EmitGEPOffset(GEPOperator GEP, bool RewriteGEP = false);`
	`242`	`+`
`241`	`243`	`private:`
`242`	`244`	`bool annotateAnyAllocSite(CallBase &Call, const TargetLibraryInfo *TLI);`
`243`	`245`	`bool isDesirableIntType(unsigned BitWidth) const;`
`@@ -374,7 +376,6 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final`
`374`	`376`	`}`
`375`	`377`	`}`
`376`	`378`
`377`		`- Value EmitGEPOffset(GEPOperator GEP, bool RewriteGEP = false);`
`378`	`379`	`Instruction scalarizePHI(ExtractElementInst &EI, PHINode PN);`
`379`	`380`	`Instruction *foldBitcastExtElt(ExtractElementInst &ExtElt);`
`380`	`381`	`Instruction *foldCastedBitwiseLogic(BinaryOperator &I);`