-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[SeparateConstOffsetFromGEP] Support GEP reordering for different types #90802
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…ypes. Change-Id: Ic3ecd2c39f49b50ba7486a4223332771f8d672c0
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-backend-amdgpu Author: Jeffrey Byrnes (jrbyrnes) ChangesThis doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8. Alive2 tests for the inbounds handling: Type shrink case: https://alive2.llvm.org/ce/z/VQTji8 Type expand case: https://alive2.llvm.org/ce/z/5oodLN Patch is 26.26 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/90802.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index c54a956fc7e243..e9ff227b6a5c98 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -985,9 +985,10 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
if (PtrGEPType->isAggregateType() || PtrGEP->getNumIndices() != 1)
return false;
- // TODO: support reordering for non-trivial GEP chains
- if (PtrGEPType != GEPType ||
- PtrGEP->getSourceElementType() != GEP->getSourceElementType())
+ bool GEPIsPtr = GEPType->getScalarType()->isPointerTy();
+ bool PtrGEPIsPtr = PtrGEPType->getScalarType()->isPointerTy();
+
+ if (GEPIsPtr != PtrGEPIsPtr)
return false;
bool NestedNeedsExtraction;
@@ -1002,8 +1003,6 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
/*HasBaseReg=*/true, /*Scale=*/0, AddrSpace))
return false;
- IRBuilder<> Builder(GEP);
- Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
bool GEPInBounds = GEP->isInBounds();
bool PtrGEPInBounds = PtrGEP->isInBounds();
bool IsChainInBounds = GEPInBounds && PtrGEPInBounds;
@@ -1017,6 +1016,50 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
IsChainInBounds &= KnownPtrGEPIdx.isNonNegative();
}
}
+ TypeSize GEPSize = DL->getTypeSizeInBits(GEP->getSourceElementType());
+ TypeSize PtrGEPSize = DL->getTypeSizeInBits(PtrGEP->getSourceElementType());
+ IRBuilder<> Builder(GEP);
+ Builder.SetCurrentDebugLocation(GEP->getDebugLoc());
+ if (GEPSize > PtrGEPSize) {
+ if (GEPSize % PtrGEPSize)
+ return false;
+ unsigned Ratio = GEPSize / PtrGEPSize;
+ if (NestedByteOffset % Ratio)
+ return false;
+
+ auto NewGEPOffset = Builder.CreateUDiv(
+ *PtrGEP->indices().begin(),
+ Builder.getIntN(
+ PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(),
+ Ratio));
+ auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(),
+ SmallVector<Value *, 4>(GEP->indices()));
+ cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
+ auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset);
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
+ GEP->replaceAllUsesWith(NewGEP);
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+ return true;
+ }
+
+ if (GEPSize < PtrGEPSize) {
+ if (PtrGEPSize % GEPSize)
+ return false;
+ unsigned Ratio = PtrGEPSize / GEPSize;
+
+ auto NewGEPOffset = Builder.CreateMul(
+ *PtrGEP->indices().begin(),
+ Builder.getIntN(
+ PtrGEP->indices().begin()->get()->getType()->getScalarSizeInBits(),
+ Ratio));
+ auto NewSrc = Builder.CreateGEP(GEPType, PtrGEP->getPointerOperand(),
+ SmallVector<Value *, 4>(GEP->indices()));
+ cast<GetElementPtrInst>(NewSrc)->setIsInBounds(IsChainInBounds);
+ auto NewGEP = Builder.CreateGEP(GEPType, NewSrc, NewGEPOffset);
+ GEP->replaceAllUsesWith(NewGEP);
+ RecursivelyDeleteTriviallyDeadInstructions(GEP);
+ return true;
+ }
// For trivial GEP chains, we can swap the indicies.
auto NewSrc = Builder.CreateGEP(PtrGEPType, PtrGEP->getPointerOperand(),
@@ -1025,6 +1068,7 @@ bool SeparateConstOffsetFromGEP::reorderGEP(GetElementPtrInst *GEP,
auto NewGEP = Builder.CreateGEP(GEPType, NewSrc,
SmallVector<Value *, 4>(PtrGEP->indices()));
cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
+ cast<GetElementPtrInst>(NewGEP)->setIsInBounds(IsChainInBounds);
GEP->replaceAllUsesWith(NewGEP);
RecursivelyDeleteTriviallyDeadInstructions(GEP);
return true;
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
index c24bbd5f658f94..d5d89d42bc6972 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
@@ -49,3 +49,37 @@ entry:
%idx1 = getelementptr <2 x i8>, ptr %const1, i32 %in.idx1.nneg
ret void
}
+
+define void @inboundsNonNegativeTypeShrink(ptr %in.ptr, i32 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegativeTypeShrink(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[TMP0]], i32 2048
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i32 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds i16, ptr %in.ptr, i32 1024
+ %idx1 = getelementptr inbounds i8, ptr %const1, i32 %in.idx1.nneg
+ ret void
+}
+
+define void @inboundsNonNegativeTypeExpand(ptr %in.ptr, i32 %in.idx1) {
+; CHECK-LABEL: define void @inboundsNonNegativeTypeExpand(
+; CHECK-SAME: ptr [[IN_PTR:%.*]], i32 [[IN_IDX1:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[IN_IDX1_NNEG:%.*]] = and i32 [[IN_IDX1]], 2147483647
+; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[IN_IDX1_NNEG]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[IN_PTR]], i64 [[IDXPROM]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 512
+; CHECK-NEXT: ret void
+;
+entry:
+ %in.idx1.nneg = and i32 %in.idx1, 2147483647
+ %const1 = getelementptr inbounds i8, ptr %in.ptr, i32 1024
+ %idx1 = getelementptr inbounds i16, ptr %const1, i32 %in.idx1.nneg
+ ret void
+}
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
index 7137f0fb66fdb9..fcf48dc415c033 100644
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
+++ b/llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
@@ -173,3 +173,432 @@ end:
call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
ret void
}
+
+
+define protected amdgpu_kernel void @reorder_expand(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: reorder_expand:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_add_i32 s0, s0, s1
+; CHECK-NEXT: s_lshl_b32 s2, s2, 1
+; CHECK-NEXT: s_add_i32 s0, s0, s2
+; CHECK-NEXT: s_cmp_lg_u32 s1, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB2_2
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v12, s0
+; CHECK-NEXT: ds_read_b128 v[0:3], v12
+; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:256
+; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:512
+; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:768
+; CHECK-NEXT: s_waitcnt lgkmcnt(3)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(2)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[4:7]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(1)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[8:11]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[12:15]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: .LBB2_2: ; %end
+; CHECK-NEXT: s_add_i32 s1, s0, 0x100
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_add_i32 s2, s0, 0x200
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s1
+; CHECK-NEXT: s_add_i32 s3, s0, 0x300
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s3
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_endpgm
+entry:
+ %base = getelementptr i8, ptr addrspace(3) %in.ptr, i32 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1
+ %const1 = getelementptr i8, ptr addrspace(3) %base, i32 256
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1
+ %const2 = getelementptr i8, ptr addrspace(3) %base, i32 512
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1
+ %const3 = getelementptr i8, ptr addrspace(3) %base, i32 768
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1
+ %cmp0 = icmp eq i32 %in.idx0, 0
+ br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+ %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+ %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+ %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+ %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+ br label %end
+
+end:
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+ ret void
+}
+
+define protected amdgpu_kernel void @reorder_shrink(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: reorder_shrink:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_lshl_b32 s3, s1, 3
+; CHECK-NEXT: s_add_i32 s0, s0, s3
+; CHECK-NEXT: s_lshl_b32 s2, s2, 1
+; CHECK-NEXT: s_add_i32 s0, s0, s2
+; CHECK-NEXT: s_cmp_lg_u32 s1, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB3_2
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v12, s0
+; CHECK-NEXT: ds_read_b128 v[0:3], v12
+; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:2048
+; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:4096
+; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:6144
+; CHECK-NEXT: s_waitcnt lgkmcnt(3)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(2)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[4:7]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(1)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[8:11]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[12:15]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: .LBB3_2: ; %end
+; CHECK-NEXT: s_add_i32 s1, s0, 0x800
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_add_i32 s2, s0, 0x1000
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s1
+; CHECK-NEXT: s_add_i32 s3, s0, 0x1800
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s3
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_endpgm
+entry:
+ %base = getelementptr i64, ptr addrspace(3) %in.ptr, i32 %in.idx0
+ %idx0 = getelementptr half, ptr addrspace(3) %base, i32 %in.idx1
+ %const1 = getelementptr i64, ptr addrspace(3) %base, i32 256
+ %idx1 = getelementptr half, ptr addrspace(3) %const1, i32 %in.idx1
+ %const2 = getelementptr i64, ptr addrspace(3) %base, i32 512
+ %idx2 = getelementptr half, ptr addrspace(3) %const2, i32 %in.idx1
+ %const3 = getelementptr i64, ptr addrspace(3) %base, i32 768
+ %idx3 = getelementptr half, ptr addrspace(3) %const3, i32 %in.idx1
+ %cmp0 = icmp eq i32 %in.idx0, 0
+ br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+ %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+ %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+ %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+ %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+ br label %end
+
+end:
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+ ret void
+}
+
+define protected amdgpu_kernel void @reorder_shrink2(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: reorder_shrink2:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_lshl_b32 s3, s1, 1
+; CHECK-NEXT: s_add_i32 s0, s0, s3
+; CHECK-NEXT: s_add_i32 s0, s0, s2
+; CHECK-NEXT: s_cmp_lg_u32 s1, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB4_2
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v12, s0
+; CHECK-NEXT: ds_read_b128 v[0:3], v12
+; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:512
+; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:1024
+; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:1536
+; CHECK-NEXT: s_waitcnt lgkmcnt(3)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(2)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[4:7]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(1)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[8:11]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[12:15]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: .LBB4_2: ; %end
+; CHECK-NEXT: s_add_i32 s1, s0, 0x200
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_add_i32 s2, s0, 0x400
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s1
+; CHECK-NEXT: s_add_i32 s3, s0, 0x600
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s3
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_endpgm
+entry:
+ %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0
+ %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1
+ %const1 = getelementptr half, ptr addrspace(3) %base, i32 256
+ %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1
+ %const2 = getelementptr half, ptr addrspace(3) %base, i32 512
+ %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1
+ %const3 = getelementptr half, ptr addrspace(3) %base, i32 768
+ %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1
+ %cmp0 = icmp eq i32 %in.idx0, 0
+ br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+ %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+ %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+ %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+ %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+ br label %end
+
+end:
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+ ret void
+}
+
+
+
+define protected amdgpu_kernel void @bad_index(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: bad_index:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_lshl_b32 s3, s1, 1
+; CHECK-NEXT: s_add_i32 s0, s0, s3
+; CHECK-NEXT: s_add_i32 s0, s0, s2
+; CHECK-NEXT: s_cmp_lg_u32 s1, 0
+; CHECK-NEXT: s_cbranch_scc1 .LBB5_2
+; CHECK-NEXT: ; %bb.1: ; %bb.1
+; CHECK-NEXT: v_mov_b32_e32 v12, s0
+; CHECK-NEXT: ds_read_b128 v[0:3], v12
+; CHECK-NEXT: ds_read_b128 v[4:7], v12 offset:2
+; CHECK-NEXT: ds_read_b128 v[8:11], v12 offset:4
+; CHECK-NEXT: ds_read_b128 v[12:15], v12 offset:6
+; CHECK-NEXT: s_waitcnt lgkmcnt(3)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[0:3]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(2)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[4:7]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(1)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[8:11]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v[12:15]
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: .LBB5_2: ; %end
+; CHECK-NEXT: s_add_i32 s1, s0, 2
+; CHECK-NEXT: v_mov_b32_e32 v0, s0
+; CHECK-NEXT: s_add_i32 s2, s0, 4
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s1
+; CHECK-NEXT: s_add_i32 s3, s0, 6
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s2
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: v_mov_b32_e32 v0, s3
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use v0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_endpgm
+entry:
+ %base = getelementptr half, ptr addrspace(3) %in.ptr, i32 %in.idx0
+ %idx0 = getelementptr i8, ptr addrspace(3) %base, i32 %in.idx1
+ %const1 = getelementptr half, ptr addrspace(3) %base, i32 1
+ %idx1 = getelementptr i8, ptr addrspace(3) %const1, i32 %in.idx1
+ %const2 = getelementptr half, ptr addrspace(3) %base, i32 2
+ %idx2 = getelementptr i8, ptr addrspace(3) %const2, i32 %in.idx1
+ %const3 = getelementptr half, ptr addrspace(3) %base, i32 3
+ %idx3 = getelementptr i8, ptr addrspace(3) %const3, i32 %in.idx1
+ %cmp0 = icmp eq i32 %in.idx0, 0
+ br i1 %cmp0, label %bb.1, label %end
+
+bb.1:
+ %val0 = load <8 x half>, ptr addrspace(3) %idx0, align 16
+ %val1 = load <8 x half>, ptr addrspace(3) %idx1, align 16
+ %val2 = load <8 x half>, ptr addrspace(3) %idx2, align 16
+ %val3 = load <8 x half>, ptr addrspace(3) %idx3, align 16
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val0)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val1)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val2)
+ call void asm sideeffect "; use $0", "v"(<8 x half> %val3)
+ br label %end
+
+end:
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx0)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx1)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx2)
+ call void asm sideeffect "; use $0", "v"(ptr addrspace(3) %idx3)
+ ret void
+}
+
+
+%struct.Packed = type <{ [8 x i8], [4 x half] }>
+define protected amdgpu_kernel void @struct_type(ptr addrspace(3) %in.ptr, i32 %in.idx0, i32 %in.idx1) {
+; CHECK-LABEL: struct_type:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_load_dwordx4 s[4:7], s[6:7], 0x0
+; CHECK-NEXT: s_waitcnt lgkmcnt(0)
+; CHECK-NEXT: s_lshl_b32 s0, s5, 14
+; CHECK-NEXT: s_add_i32 s3, s4, s0
+; CHECK-NEXT: s_add_i32 s3, s3, s6
+; CHECK-NEXT: s_add_i32 s2, s3, 0x400000
+; CHECK-NEXT: s_add_i32 s1, s3, 0x800000
+; CHECK-NEXT: s_add_i32 s0, s3, 0xc00000
+; C...
[truncated]
|
Change-Id: I3bd435e1baa27a36402cb06977c60662bda5059b
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep.ll
Outdated
Show resolved
Hide resolved
Change-Id: If84c0b348407e40dee488145d575497f687c56d3
Change-Id: Ic94d65538a02cb73d12d461ec513b915dafe711d
New Alive2 Tests: Case 1: https://alive2.llvm.org/ce/z/6bfFY3 |
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
Show resolved
Hide resolved
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/reorder-gep-inbounds.ll
Outdated
Show resolved
Hide resolved
Change-Id: I66bdfcfd7ce3d53c28b4439bd8ebd65905574560
Change-Id: I8fdfcb81082fa2e868bae101eef40237a21d8e37
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good to me now.
Change-Id: Ifa15c2b28da6efde3ee98562cf97a688f4228366
Change-Id: If4eed6fa854bba99f0f86153e5e6224a5d21a805
Thanks -- will commit after tests finish. |
I did notice a failure on our internal tests -- but after looking more closely at it, I can say it doesn't seem to be related to this PR. |
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
…rent types (llvm#90802)" This reverts commit f865dbf.
This allows dropping our existing local-revert of llvm/llvm-project#89131 and cherry-pick of llvm/llvm-project#91654 which we had introduced in the earlier integrate iree-org#17330. This locally reverts llvm/llvm-project#90802 because it causes numerical errors, reported at llvm/llvm-project#90802 (comment).
This allows dropping our existing local-revert of llvm/llvm-project#89131 and cherry-pick of llvm/llvm-project#91654 which we had introduced in the earlier integrate iree-org#17330. This locally reverts llvm/llvm-project#90802 because it causes numerical errors, reported at llvm/llvm-project#90802 (comment). Signed-off-by: Lubo Litchev <[email protected]>
…es (llvm#90802) This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8. Alive2 tests for the inbounds handling: Case 1: https://alive2.llvm.org/ce/z/6bfFY3 Case 2: https://alive2.llvm.org/ce/z/DkLMLF Change-Id: I7584e86959b2ebebc23d4b0576bcdd1251f41375
…es (llvm#90802) This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8. Alive2 tests for the inbounds handling: Case 1: https://alive2.llvm.org/ce/z/6bfFY3 Case 2: https://alive2.llvm.org/ce/z/DkLMLF Change-Id: I76b46f94b746032e2be4855361d143852c7c631a
This doesn't show up in existing lit tests, but has an impact on real code -- especially after the canonicalization of GEPs to i8.
Alive2 tests for the inbounds handling:
Type shrink case: https://alive2.llvm.org/ce/z/VQTji8
Type expand case: https://alive2.llvm.org/ce/z/5oodLN