-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[InstCombine] Support ptrtoint of gep folds for chain of geps #137323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Nikita Popov (nikic) ChangesSupport the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from #137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size. Full diff: https://github.com/llvm/llvm-project/pull/137323.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 1a95636f37ed7..eb3676a0fc47e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -2067,6 +2067,42 @@ Instruction *InstCombinerImpl::visitIntToPtr(IntToPtrInst &CI) {
return nullptr;
}
+Value *InstCombinerImpl::foldPtrToIntOfGEP(Type *IntTy, Value *Ptr) {
+ // Look through chain of use-use GEPs.
+ Type *PtrTy = Ptr->getType();
+ SmallVector<GEPOperator *> GEPs;
+ while (true) {
+ auto *GEP = dyn_cast<GEPOperator>(Ptr);
+ if (!GEP || !GEP->hasOneUse())
+ break;
+ GEPs.push_back(GEP);
+ Ptr = GEP->getPointerOperand();
+ }
+
+ // Don't handle case where GEP converts from pointer to vector.
+ if (GEPs.empty() || PtrTy != Ptr->getType())
+ return nullptr;
+
+ // Check whether we know the integer value of the base pointer.
+ Value *Res;
+ Type *IdxTy = DL.getIndexType(PtrTy);
+ if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) &&
+ Res->getType() == IntTy && IntTy == IdxTy) {
+ // pass
+ } else if (isa<ConstantPointerNull>(Ptr)) {
+ Res = Constant::getNullValue(IdxTy);
+ } else {
+ return nullptr;
+ }
+
+ // Perform the entire operation on integers instead.
+ for (GEPOperator *GEP : reverse(GEPs)) {
+ Value *Offset = EmitGEPOffset(GEP);
+ Res = Builder.CreateAdd(Res, Offset, "", GEP->hasNoUnsignedWrap());
+ }
+ return Builder.CreateZExtOrTrunc(Res, IntTy);
+}
+
Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
// If the destination integer type is not the intptr_t type for this target,
// do a ptrtoint to intptr_t then do a trunc or zext. This allows the cast
@@ -2093,29 +2129,8 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
Mask->getType() == Ty)
return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
- if (auto *GEP = dyn_cast<GEPOperator>(SrcOp)) {
- // Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
- // While this can increase the number of instructions it doesn't actually
- // increase the overall complexity since the arithmetic is just part of
- // the GEP otherwise.
- if (GEP->hasOneUse() &&
- isa<ConstantPointerNull>(GEP->getPointerOperand())) {
- return replaceInstUsesWith(CI,
- Builder.CreateIntCast(EmitGEPOffset(GEP), Ty,
- /*isSigned=*/false));
- }
-
- // (ptrtoint (gep (inttoptr Base), ...)) -> Base + Offset
- Value *Base;
- if (GEP->hasOneUse() &&
- match(GEP->getPointerOperand(), m_OneUse(m_IntToPtr(m_Value(Base)))) &&
- Base->getType() == Ty) {
- Value *Offset = EmitGEPOffset(GEP);
- auto *NewOp = BinaryOperator::CreateAdd(Base, Offset);
- NewOp->setHasNoUnsignedWrap(GEP->hasNoUnsignedWrap());
- return NewOp;
- }
- }
+ if (Value *V = foldPtrToIntOfGEP(Ty, SrcOp))
+ return replaceInstUsesWith(CI, V);
Value *Vec, *Scalar, *Index;
if (match(SrcOp, m_OneUse(m_InsertElt(m_IntToPtr(m_Value(Vec)),
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9923719c3443d..324738ef8c88e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -661,6 +661,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// folded operation.
void PHIArgMergedDebugLoc(Instruction *Inst, PHINode &PN);
+ Value *foldPtrToIntOfGEP(Type *IntTy, Value *Ptr);
Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS, CmpPredicate Cond,
Instruction &I);
Instruction *foldSelectICmp(CmpPredicate Pred, SelectInst *SI, Value *RHS,
diff --git a/llvm/test/Transforms/InstCombine/cast_ptr.ll b/llvm/test/Transforms/InstCombine/cast_ptr.ll
index 40ba21b24435b..1cf3e32556b38 100644
--- a/llvm/test/Transforms/InstCombine/cast_ptr.ll
+++ b/llvm/test/Transforms/InstCombine/cast_ptr.ll
@@ -2,7 +2,7 @@
; Tests to make sure elimination of casts is working correctly
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
-target datalayout = "p:32:32-p1:32:32-p2:16:16"
+target datalayout = "p:32:32-p1:32:32-p2:16:16-p3:32:32:32:16"
@global = global i8 0
@@ -432,3 +432,103 @@ define i32 @ptr_add_in_int_extra_use2(i32 %x) {
%r = ptrtoint ptr %p2 to i32
ret i32 %r
}
+
+define i32 @ptrtoint_of_inttoptr_multiple_gep(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @ptrtoint_of_inttoptr_multiple_gep(
+; CHECK-NEXT: [[PTR2_IDX:%.*]] = shl nuw i32 [[Y:%.*]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[X:%.*]], [[PTR2_IDX]]
+; CHECK-NEXT: [[PTR3_IDX:%.*]] = shl i32 [[Z:%.*]], 2
+; CHECK-NEXT: [[R:%.*]] = add i32 [[TMP1]], [[PTR3_IDX]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ptr = inttoptr i32 %x to ptr
+ %ptr2 = getelementptr nuw i16, ptr %ptr, i32 %y
+ %ptr3 = getelementptr i32, ptr %ptr2, i32 %z
+ %r = ptrtoint ptr %ptr3 to i32
+ ret i32 %r
+}
+
+define i32 @ptrtoint_of_inttoptr_multiple_gep_extra_use(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @ptrtoint_of_inttoptr_multiple_gep_extra_use(
+; CHECK-NEXT: [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i16, ptr [[PTR]], i32 [[Y:%.*]]
+; CHECK-NEXT: call void @use_ptr(ptr [[PTR2]])
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr i32, ptr [[PTR2]], i32 [[Z:%.*]]
+; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[PTR3]] to i32
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ptr = inttoptr i32 %x to ptr
+ %ptr2 = getelementptr i16, ptr %ptr, i32 %y
+ call void @use_ptr(ptr %ptr2)
+ %ptr3 = getelementptr i32, ptr %ptr2, i32 %z
+ %r = ptrtoint ptr %ptr3 to i32
+ ret i32 %r
+}
+
+define i32 @ptrtoint_of_inttoptr_index_type(i32 %x, i16 %y) {
+; CHECK-LABEL: @ptrtoint_of_inttoptr_index_type(
+; CHECK-NEXT: [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr addrspace(3)
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i16, ptr addrspace(3) [[PTR]], i16 [[Y:%.*]]
+; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr addrspace(3) [[PTR2]] to i32
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ptr = inttoptr i32 %x to ptr addrspace(3)
+ %ptr2 = getelementptr i16, ptr addrspace(3) %ptr, i16 %y
+ %r = ptrtoint ptr addrspace(3) %ptr2 to i32
+ ret i32 %r
+}
+
+define i32 @ptrtoint_of_null_multiple_gep(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @ptrtoint_of_null_multiple_gep(
+; CHECK-NEXT: [[PTR2_IDX:%.*]] = shl i32 [[X:%.*]], 1
+; CHECK-NEXT: [[PTR3_IDX:%.*]] = shl nuw i32 [[Y:%.*]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[PTR2_IDX]], [[PTR3_IDX]]
+; CHECK-NEXT: [[PTR4_IDX:%.*]] = shl i32 [[Z:%.*]], 3
+; CHECK-NEXT: [[R:%.*]] = add i32 [[TMP1]], [[PTR4_IDX]]
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ptr2 = getelementptr i16, ptr null, i32 %x
+ %ptr3 = getelementptr nuw i32, ptr %ptr2, i32 %y
+ %ptr4 = getelementptr i64, ptr %ptr3, i32 %z
+ %r = ptrtoint ptr %ptr4 to i32
+ ret i32 %r
+}
+
+define i32 @ptrtoint_of_null_multiple_gep_extra_use(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @ptrtoint_of_null_multiple_gep_extra_use(
+; CHECK-NEXT: [[PTR2:%.*]] = getelementptr i16, ptr null, i32 [[X:%.*]]
+; CHECK-NEXT: call void @use_ptr(ptr [[PTR2]])
+; CHECK-NEXT: [[PTR3:%.*]] = getelementptr nuw i32, ptr [[PTR2]], i32 [[Y:%.*]]
+; CHECK-NEXT: [[PTR4:%.*]] = getelementptr i64, ptr [[PTR3]], i32 [[Z:%.*]]
+; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[PTR4]] to i32
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ptr2 = getelementptr i16, ptr null, i32 %x
+ call void @use_ptr(ptr %ptr2)
+ %ptr3 = getelementptr nuw i32, ptr %ptr2, i32 %y
+ %ptr4 = getelementptr i64, ptr %ptr3, i32 %z
+ %r = ptrtoint ptr %ptr4 to i32
+ ret i32 %r
+}
+
+define i32 @ptrtoint_of_null_index_type(i16 %x) {
+; CHECK-LABEL: @ptrtoint_of_null_index_type(
+; CHECK-NEXT: [[PTR_IDX:%.*]] = shl i16 [[X:%.*]], 1
+; CHECK-NEXT: [[R:%.*]] = zext i16 [[PTR_IDX]] to i32
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %ptr = getelementptr i16, ptr addrspace(3) null, i16 %x
+ %r = ptrtoint ptr addrspace(3) %ptr to i32
+ ret i32 %r
+}
+
+define <2 x i32> @ptrtoint_of_null_splat(<2 x i16> %x) {
+; CHECK-LABEL: @ptrtoint_of_null_splat(
+; CHECK-NEXT: [[PTR:%.*]] = getelementptr i16, ptr addrspace(3) null, <2 x i16> [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = ptrtoint <2 x ptr addrspace(3)> [[PTR]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[R]]
+;
+ %ptr = getelementptr i16, ptr addrspace(3) null, <2 x i16> %x
+ %r = ptrtoint <2 x ptr addrspace(3)> %ptr to <2 x i32>
+ ret <2 x i32> %r
+}
|
Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from llvm#137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.
f6c97e3
to
cd7497b
Compare
// Check whether we know the integer value of the base pointer. | ||
Value *Res; | ||
Type *IdxTy = DL.getIndexType(PtrTy); | ||
if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) && |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Missing test for the multi-use case. Do we need a one-use constraint on inttoptr?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is pre-existing code tested here:
llvm-project/llvm/test/Transforms/InstCombine/cast_ptr.ll
Lines 406 to 419 in cd7497b
define i32 @ptr_add_in_int_extra_use1(i32 %x) { | |
; CHECK-LABEL: @ptr_add_in_int_extra_use1( | |
; CHECK-NEXT: [[PTR:%.*]] = inttoptr i32 [[X:%.*]] to ptr | |
; CHECK-NEXT: call void @use_ptr(ptr [[PTR]]) | |
; CHECK-NEXT: [[P2:%.*]] = getelementptr inbounds nuw i8, ptr [[PTR]], i32 4096 | |
; CHECK-NEXT: [[R:%.*]] = ptrtoint ptr [[P2]] to i32 | |
; CHECK-NEXT: ret i32 [[R]] | |
; | |
%ptr = inttoptr i32 %x to ptr | |
call void @use_ptr(ptr %ptr) | |
%p2 = getelementptr inbounds i8, ptr %ptr, i32 4096 | |
%r = ptrtoint ptr %p2 to i32 | |
ret i32 %r | |
} |
I agree that this one-use requirement is unnecessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like removing this one-use check has some negative interaction with the indexed compare fold.
Value *Res; | ||
Type *IdxTy = DL.getIndexType(PtrTy); | ||
if (match(Ptr, m_OneUse(m_IntToPtr(m_Value(Res)))) && | ||
Res->getType() == IntTy && IntTy == IdxTy) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Res->getType() == IntTy && IntTy == IdxTy) { | |
Res->getType() == IdxTy) { |
If IntTy != IdxTy
, we will zext or trunc the offset accumulation into IntTy
.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this is correct, but it's worth noting that we can only run into this if the inttoptr hasn't been canonicalized yet (otherwise it would be guaranteed to use IntTy), so it's not really possible to test this case.
…37323) Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from llvm#137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.
…37323) Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from llvm#137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.
…37323) Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from llvm#137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.
…37323) Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from llvm#137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.
…37323) Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from llvm#137297. While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.
Support the ptrtoint(gep null, x) -> x and ptrtoint(gep inttoptr(x), y) -> x+y folds for the case where there is a chain of geps that ends in null or inttoptr. This avoids some regressions from #137297.
While here, also be a bit more careful about edge cases like pointer to vector splats and mismatched pointer and index size.