[SeparateConstOffsetFromGEP] Support GEP reordering for different types #90802

jrbyrnes · 2024-05-02T00:10:36Z

This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8.

Alive2 tests for the inbounds handling:

Type shrink case: https://alive2.llvm.org/ce/z/VQTji8

Type expand case: https://alive2.llvm.org/ce/z/5oodLN

…ypes. Change-Id: Ic3ecd2c39f49b50ba7486a4223332771f8d672c0

llvmbot · 2024-05-02T00:11:07Z

@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-backend-amdgpu

Author: Jeffrey Byrnes (jrbyrnes)

Changes

This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8.

Alive2 tests for the inbounds handling:

Type shrink case: https://alive2.llvm.org/ce/z/VQTji8

Type expand case: https://alive2.llvm.org/ce/z/5oodLN

Patch is 26.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90802.diff

4 Files Affected:

(modified) llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp (+49-5)
(modified) llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll (+34)
(modified) llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll (+429)
(modified) llvm/test/Transforms/SeparateConstOffsetFromGEP/reorder-gep.ll (+63)

diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index c54a956fc7e243..e9ff227b6a5c98 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -985,9 +985,10 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
   if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1)
     return false;
 
-  // TODO: support reordering for non-trivial GEP chains
-  if (PtrGEPType != GEPType ||
-      PtrGEP->getSourceElementType() != GEP->getSourceElementType())
+  bool GEPIsPtr = GEPType->getScalarType()->isPointerTy();
+  bool PtrGEPIsPtr = PtrGEPType->getScalarType()->isPointerTy();
+
+  if (GEPIsPtr != PtrGEPIsPtr)
     return false;
 
   bool NestedNeedsExtraction;
@@ -1002,8 +1003,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
                                  /*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
     return false;
 
-  IRBuilder<> Builder(GEP);
-  Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
   bool GEPInBounds = GEP->isInBounds();
   bool PtrGEPInBounds = PtrGEP->isInBounds();
   bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
@@ -1017,6 +1016,50 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
       IsChainInBounds &= KnownPtrGEPIdx.isNonNegative();
     }
   }
+  TypeSize GEPSize = DL->getTypeSizeInBits(GEP->getSourceElementType());
+  TypeSize PtrGEPSize = DL->getTypeSizeInBits(PtrGEP->getSourceElementType());
+  IRBuilder<> Builder(GEP);
+  Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
+  if (GEPSize > PtrGEPSize) {
+    if (GEPSize % PtrGEPSize)
+      return false;
+    unsigned Ratio = GEPSize / PtrGEPSize;
+    if (NestedByteOffset % Ratio)
+      return false;
+
+    auto NewGEPOffset = Builder.CreateUDiv(
+        *PtrGEP->indices().begin(),
+        Builder.getIntN(
+            PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(),
+            Ratio));
+    auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(),
+                                    SmallVector<Value *, 4>(GEP->indices()));
+    cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
+    auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset);
+    cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
+    GEP->replaceAllUsesWith(NewGEP);
+    RecursivelyDeleteTriviallyDeadInstructions(GEP);
+    return true;
+  }
+
+  if (GEPSize < PtrGEPSize) {
+    if (PtrGEPSize % GEPSize)
+      return false;
+    unsigned Ratio = PtrGEPSize / GEPSize;
+
+    auto NewGEPOffset = Builder.CreateMul(
+        *PtrGEP->indices().begin(),
+        Builder.getIntN(
+            PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(),
+            Ratio));
+    auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(),
+                                    SmallVector<Value *, 4>(GEP->indices()));
+    cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
+    auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset);
+    GEP->replaceAllUsesWith(NewGEP);
+    RecursivelyDeleteTriviallyDeadInstructions(GEP);
+    return true;
+  }
 
   // For trivial GEP chains, we can swap the indicies.
   auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(),
@@ -1025,6 +1068,7 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
   auto NewGEP = Builder.CreateGEP(GEPType, NewSrc,
                                   SmallVector<Value *, 4>(PtrGEP->indices()));
   cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
+  cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
   GEP->replaceAllUsesWith(NewGEP);
   RecursivelyDeleteTriviallyDeadInstructions(GEP);
   return true;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
index c24bbd5f658f94..d5d89d42bc6972 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
@@ -49,3 +49,37 @@ entry:
   %idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg
   ret void
 }
+
+define void @inboundsNonNegativeTypeShrink(ptr %in.ptr, i32 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegativeTypeShrink(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2048
+; CHECK-NEXT:    ret void
+;
+entry:
+  %in.idx1.nneg = and i32 %in.idx1, 2147483647
+  %const1 = getelementptr inbounds i16, ptr %in.ptr, i32 1024
+  %idx1 = getelementptr inbounds i8, ptr %const1, i32 %in.idx1.nneg
+  ret void
+}
+
+define void @inboundsNonNegativeTypeExpand(ptr %in.ptr, i32 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegativeTypeExpand(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647
+; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 512
+; CHECK-NEXT:    ret void
+;
+entry:
+  %in.idx1.nneg = and i32 %in.idx1, 2147483647
+  %const1 = getelementptr inbounds i8, ptr %in.ptr, i32 1024
+  %idx1 = getelementptr inbounds i16, ptr %const1, i32 %in.idx1.nneg
+  ret void
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
index 7137f0fb66fdb9..fcf48dc415c033 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
@@ -173,3 +173,432 @@ end:
   call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
   ret void
 }
+
+
+define protected amdgpu_kernel void @reorder_expand(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: reorder_expand:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_add_i32 s0, s0, s1
+; CHECK-NEXT:    s_lshl_b32 s2, s2, 1
+; CHECK-NEXT:    s_add_i32 s0, s0, s2
+; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB2_2
+; CHECK-NEXT:  ; %bb.1: ; %bb.1
+; CHECK-NEXT:    v_mov_b32_e32 v12, s0
+; CHECK-NEXT:    ds_read_b128 v[0:3], v12
+; CHECK-NEXT:    ds_read_b128 v[4:7], v12 offset:256
+; CHECK-NEXT:    ds_read_b128 v[8:11], v12 offset:512
+; CHECK-NEXT:    ds_read_b128 v[12:15], v12 offset:768
+; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[4:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[8:11]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[12:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:  .LBB2_2: ; %end
+; CHECK-NEXT:    s_add_i32 s1, s0, 0x100
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    s_add_i32 s2, s0, 0x200
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s1
+; CHECK-NEXT:    s_add_i32 s3, s0, 0x300
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s2
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_endpgm
+entry:
+  %base = getelementptr i8, ptr addrspace(3) %in.ptr, i32 %in.idx0
+  %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1
+  %const1 = getelementptr i8, ptr addrspace(3) %base, i32 256
+  %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1
+  %const2 = getelementptr i8, ptr addrspace(3) %base, i32 512
+  %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1
+  %const3 = getelementptr i8, ptr addrspace(3) %base, i32 768
+  %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1
+  %cmp0 = icmp eq i32 %in.idx0, 0
+  br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+  %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+  %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+  %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+  %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+  br label %end
+
+end:
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+  ret void
+}
+
+define protected amdgpu_kernel void @reorder_shrink(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: reorder_shrink:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_lshl_b32 s3, s1, 3
+; CHECK-NEXT:    s_add_i32 s0, s0, s3
+; CHECK-NEXT:    s_lshl_b32 s2, s2, 1
+; CHECK-NEXT:    s_add_i32 s0, s0, s2
+; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB3_2
+; CHECK-NEXT:  ; %bb.1: ; %bb.1
+; CHECK-NEXT:    v_mov_b32_e32 v12, s0
+; CHECK-NEXT:    ds_read_b128 v[0:3], v12
+; CHECK-NEXT:    ds_read_b128 v[4:7], v12 offset:2048
+; CHECK-NEXT:    ds_read_b128 v[8:11], v12 offset:4096
+; CHECK-NEXT:    ds_read_b128 v[12:15], v12 offset:6144
+; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[4:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[8:11]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[12:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:  .LBB3_2: ; %end
+; CHECK-NEXT:    s_add_i32 s1, s0, 0x800
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    s_add_i32 s2, s0, 0x1000
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s1
+; CHECK-NEXT:    s_add_i32 s3, s0, 0x1800
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s2
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_endpgm
+entry:
+  %base = getelementptr i64, ptr addrspace(3) %in.ptr, i32 %in.idx0
+  %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1
+  %const1 = getelementptr i64, ptr addrspace(3) %base, i32 256
+  %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1
+  %const2 = getelementptr i64, ptr addrspace(3) %base, i32 512
+  %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1
+  %const3 = getelementptr i64, ptr addrspace(3) %base, i32 768
+  %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1
+  %cmp0 = icmp eq i32 %in.idx0, 0
+  br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+  %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+  %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+  %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+  %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+  br label %end
+
+end:
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+  ret void
+}
+
+define protected amdgpu_kernel void @reorder_shrink2(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: reorder_shrink2:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_lshl_b32 s3, s1, 1
+; CHECK-NEXT:    s_add_i32 s0, s0, s3
+; CHECK-NEXT:    s_add_i32 s0, s0, s2
+; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB4_2
+; CHECK-NEXT:  ; %bb.1: ; %bb.1
+; CHECK-NEXT:    v_mov_b32_e32 v12, s0
+; CHECK-NEXT:    ds_read_b128 v[0:3], v12
+; CHECK-NEXT:    ds_read_b128 v[4:7], v12 offset:512
+; CHECK-NEXT:    ds_read_b128 v[8:11], v12 offset:1024
+; CHECK-NEXT:    ds_read_b128 v[12:15], v12 offset:1536
+; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[4:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[8:11]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[12:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:  .LBB4_2: ; %end
+; CHECK-NEXT:    s_add_i32 s1, s0, 0x200
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    s_add_i32 s2, s0, 0x400
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s1
+; CHECK-NEXT:    s_add_i32 s3, s0, 0x600
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s2
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_endpgm
+entry:
+  %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0
+  %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1
+  %const1 = getelementptr half, ptr addrspace(3) %base, i32 256
+  %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1
+  %const2 = getelementptr half, ptr addrspace(3) %base, i32 512
+  %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1
+  %const3 = getelementptr half, ptr addrspace(3) %base, i32 768
+  %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1
+  %cmp0 = icmp eq i32 %in.idx0, 0
+  br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+  %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+  %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+  %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+  %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+  br label %end
+
+end:
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+  ret void
+}
+
+
+
+define protected amdgpu_kernel void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: bad_index:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_lshl_b32 s3, s1, 1
+; CHECK-NEXT:    s_add_i32 s0, s0, s3
+; CHECK-NEXT:    s_add_i32 s0, s0, s2
+; CHECK-NEXT:    s_cmp_lg_u32 s1, 0
+; CHECK-NEXT:    s_cbranch_scc1 .LBB5_2
+; CHECK-NEXT:  ; %bb.1: ; %bb.1
+; CHECK-NEXT:    v_mov_b32_e32 v12, s0
+; CHECK-NEXT:    ds_read_b128 v[0:3], v12
+; CHECK-NEXT:    ds_read_b128 v[4:7], v12 offset:2
+; CHECK-NEXT:    ds_read_b128 v[8:11], v12 offset:4
+; CHECK-NEXT:    ds_read_b128 v[12:15], v12 offset:6
+; CHECK-NEXT:    s_waitcnt lgkmcnt(3)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[0:3]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(2)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[4:7]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(1)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[8:11]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v[12:15]
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:  .LBB5_2: ; %end
+; CHECK-NEXT:    s_add_i32 s1, s0, 2
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    s_add_i32 s2, s0, 4
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s1
+; CHECK-NEXT:    s_add_i32 s3, s0, 6
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s2
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    v_mov_b32_e32 v0, s3
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use v0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_endpgm
+entry:
+  %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0
+  %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1
+  %const1 = getelementptr half, ptr addrspace(3) %base, i32 1
+  %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1
+  %const2 = getelementptr half, ptr addrspace(3) %base, i32 2
+  %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1
+  %const3 = getelementptr half, ptr addrspace(3) %base, i32 3
+  %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1
+  %cmp0 = icmp eq i32 %in.idx0, 0
+  br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+  %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+  %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+  %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+  %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+  call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+  br label %end
+
+end:
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+  call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+  ret void
+}
+
+
+%struct.Packed = type <{ [8 x i8], [4 x half] }>
+define protected amdgpu_kernel void @struct_type(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: struct_type:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_lshl_b32 s0, s5, 14
+; CHECK-NEXT:    s_add_i32 s3, s4, s0
+; CHECK-NEXT:    s_add_i32 s3, s3, s6
+; CHECK-NEXT:    s_add_i32 s2, s3, 0x400000
+; CHECK-NEXT:    s_add_i32 s1, s3, 0x800000
+; CHECK-NEXT:    s_add_i32 s0, s3, 0xc00000
+; C...
[truncated]

Change-Id: I3bd435e1baa27a36402cb06977c60662bda5059b

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll

Change-Id: If84c0b348407e40dee488145d575497f687c56d3

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Change-Id: Ic94d65538a02cb73d12d461ec513b915dafe711d

jrbyrnes · 2024-05-07T16:21:06Z

New Alive2 Tests:

Case 1: https://alive2.llvm.org/ce/z/6bfFY3
Case 2: https://alive2.llvm.org/ce/z/DkLMLF

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll

Change-Id: I301582a8a06dbef0184b9fcfa22cb7992e5bb9e2

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Change-Id: I66bdfcfd7ce3d53c28b4439bd8ebd65905574560

Change-Id: I8fdfcb81082fa2e868bae101eef40237a21d8e37

nikic

This looks good to me now.

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Change-Id: Ifa15c2b28da6efde3ee98562cf97a688f4228366

Change-Id: If4eed6fa854bba99f0f86153e5e6224a5d21a805

jrbyrnes · 2024-05-08T16:19:17Z

Thanks -- will commit after tests finish.

jrbyrnes · 2024-05-09T23:57:15Z

I did notice a failure on our internal tests -- but after looking more closely at it, I can say it doesn't seem to be related to this PR.

…rent types (llvm#90802)" This reverts commit f865dbf.

This allows dropping our existing local-revert of llvm/llvm-project#89131 and cherry-pick of llvm/llvm-project#91654 which we had introduced in the earlier integrate iree-org#17330. This locally reverts llvm/llvm-project#90802 because it causes numerical errors, reported at llvm/llvm-project#90802 (comment).

This allows dropping our existing local-revert of llvm/llvm-project#89131 and cherry-pick of llvm/llvm-project#91654 which we had introduced in the earlier integrate iree-org#17330. This locally reverts llvm/llvm-project#90802 because it causes numerical errors, reported at llvm/llvm-project#90802 (comment). Signed-off-by: Lubo Litchev <[email protected]>

…es (llvm#90802) This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8. Alive2 tests for the inbounds handling: Case 1: https://alive2.llvm.org/ce/z/6bfFY3 Case 2: https://alive2.llvm.org/ce/z/DkLMLF Change-Id: I7584e86959b2ebebc23d4b0576bcdd1251f41375

…es (llvm#90802) This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8. Alive2 tests for the inbounds handling: Case 1: https://alive2.llvm.org/ce/z/6bfFY3 Case 2: https://alive2.llvm.org/ce/z/DkLMLF Change-Id: I76b46f94b746032e2be4855361d143852c7c631a

[SeparateConstOffsetFromGEP] Support GEP reordering for conflicting t…

a8c5ae1

…ypes. Change-Id: Ic3ecd2c39f49b50ba7486a4223332771f8d672c0

jrbyrnes requested review from arsenm and krzysz00 May 2, 2024 00:10

llvmbot added backend:AMDGPU llvm:transforms labels May 2, 2024

actually set inbounds

88c84fc

Change-Id: I3bd435e1baa27a36402cb06977c60662bda5059b

nikic reviewed May 2, 2024

View reviewed changes

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp Outdated Show resolved Hide resolved

arsenm reviewed May 2, 2024

View reviewed changes

Review comments

664c92f

Change-Id: If84c0b348407e40dee488145d575497f687c56d3

arsenm reviewed May 7, 2024

View reviewed changes

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp Outdated Show resolved Hide resolved

Review Comments 2

a4c1767

Change-Id: Ic94d65538a02cb73d12d461ec513b915dafe711d

arsenm reviewed May 7, 2024

View reviewed changes

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll Show resolved Hide resolved

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll Outdated Show resolved Hide resolved

arsenm reviewed May 7, 2024

View reviewed changes

llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll Outdated Show resolved Hide resolved

Add Tests

a8ee263

Change-Id: I301582a8a06dbef0184b9fcfa22cb7992e5bb9e2

nikic reviewed May 8, 2024

View reviewed changes

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp Outdated Show resolved Hide resolved

jrbyrnes added 2 commits May 7, 2024 19:01

Allow ptr source type

b371d0b

Change-Id: I66bdfcfd7ce3d53c28b4439bd8ebd65905574560

remove redundant test

fe698dd

Change-Id: I8fdfcb81082fa2e868bae101eef40237a21d8e37

jrbyrnes force-pushed the ReorderGEPType branch from 59e0dba to fe698dd Compare May 8, 2024 02:05

nikic approved these changes May 8, 2024

View reviewed changes

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp Outdated Show resolved Hide resolved

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp Outdated Show resolved Hide resolved

jrbyrnes added 2 commits May 8, 2024 08:05

allow struct types

4e8a106

Change-Id: Ifa15c2b28da6efde3ee98562cf97a688f4228366

Reorder struct types + canonicalize idx types

15c25fe

Change-Id: If4eed6fa854bba99f0f86153e5e6224a5d21a805

arsenm approved these changes May 8, 2024

View reviewed changes

jrbyrnes merged commit f865dbf into llvm:main May 9, 2024
4 checks passed

nikic mentioned this pull request May 14, 2024

[InstCombine] Canonicalize constant GEPs to i8 source element type #68882

Merged

bjacob added a commit to iree-org/llvm-project that referenced this pull request May 14, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

a38fcb0

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

d8dab6e

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

6393f16

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

ccc500e

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

1b1b885

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

7bf7624

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

0b128f0

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

7c0c98a

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

9021900

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

8a0ac3f

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

69fb159

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

992215a

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

bce92be

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

6a3f047

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

c1c234b

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

664589b

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

e7151a6

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

a604f7e

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

3c73f66

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

9229b13

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

cb72e31

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

0fbf6f0

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

4f62417

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

f44eaa1

…rent types (llvm#90802)" This reverts commit f865dbf.

ingomueller-net pushed a commit to iree-org/llvm-project that referenced this pull request May 15, 2024

Revert "[SeparateConstOffsetFromGEP] Support GEP reordering for diffe…

37cfe34

…rent types (llvm#90802)" This reverts commit f865dbf.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[SeparateConstOffsetFromGEP] Support GEP reordering for different types #90802

[SeparateConstOffsetFromGEP] Support GEP reordering for different types #90802

Uh oh!

jrbyrnes commented May 2, 2024

Uh oh!

llvmbot commented May 2, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

jrbyrnes commented May 7, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic left a comment

Uh oh!

Uh oh!

Uh oh!

jrbyrnes commented May 8, 2024

Uh oh!

jrbyrnes commented May 9, 2024 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

[SeparateConstOffsetFromGEP] Support GEP reordering for different types #90802

[SeparateConstOffsetFromGEP] Support GEP reordering for different types #90802

Uh oh!

Conversation

jrbyrnes commented May 2, 2024

Uh oh!

llvmbot commented May 2, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

jrbyrnes commented May 7, 2024

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

nikic left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

jrbyrnes commented May 8, 2024

Uh oh!

jrbyrnes commented May 9, 2024 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

llvmbot commented May 2, 2024 •

edited

Loading

jrbyrnes commented May 9, 2024 •

edited

Loading