Skip to content

Commit c7d65e4

Browse files
[IR] Enable load/store/alloca for arrays of scalable vectors.
Differential Revision: https://reviews.llvm.org/D158517
1 parent 02d27ea commit c7d65e4

22 files changed

+324
-56
lines changed

llvm/docs/LangRef.rst

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -742,16 +742,16 @@ an optional list of attached :ref:`metadata <metadata>`.
742742
Variables and aliases can have a
743743
:ref:`Thread Local Storage Model <tls_model>`.
744744

745-
:ref:`Scalable vectors <t_vector>` cannot be global variables or members of
746-
arrays because their size is unknown at compile time. They are allowed in
747-
structs to facilitate intrinsics returning multiple values. Generally, structs
748-
containing scalable vectors are not considered "sized" and cannot be used in
749-
loads, stores, allocas, or GEPs. The only exception to this rule is for structs
750-
that contain scalable vectors of the same type (e.g. ``{<vscale x 2 x i32>,
751-
<vscale x 2 x i32>}`` contains the same type while ``{<vscale x 2 x i32>,
752-
<vscale x 2 x i64>}`` doesn't). These kinds of structs (we may call them
753-
homogeneous scalable vector structs) are considered sized and can be used in
754-
loads, stores, allocas, but not GEPs.
745+
Globals cannot be or contain :ref:`Scalable vectors <t_vector>` because their
746+
size is unknown at compile time. They are allowed in structs to facilitate
747+
intrinsics returning multiple values. Generally, structs containing scalable
748+
vectors are not considered "sized" and cannot be used in loads, stores, allocas,
749+
or GEPs. The only exception to this rule is for structs that contain scalable
750+
vectors of the same type (e.g. ``{<vscale x 2 x i32>, <vscale x 2 x i32>}``
751+
contains the same type while ``{<vscale x 2 x i32>, <vscale x 2 x i64>}``
752+
doesn't). These kinds of structs (we may call them homogeneous scalable vector
753+
structs) are considered sized and can be used in loads, stores, allocas, but
754+
not GEPs.
755755

756756
Syntax::
757757

llvm/include/llvm/IR/Type.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -209,8 +209,7 @@ class Type {
209209
/// Return true if this is a target extension type with a scalable layout.
210210
bool isScalableTargetExtTy() const;
211211

212-
/// Return true if this is a scalable vector type or a target extension type
213-
/// with a scalable layout.
212+
/// Return true if this is a type whose size is a known multiple of vscale.
214213
bool isScalableTy() const;
215214

216215
/// Return true if this is a FP type or a vector of FP.

llvm/lib/Analysis/InstructionSimplify.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4934,7 +4934,7 @@ static Value *simplifyGEPInst(Type *SrcTy, Value *Ptr,
49344934
return UndefValue::get(GEPTy);
49354935

49364936
bool IsScalableVec =
4937-
isa<ScalableVectorType>(SrcTy) || any_of(Indices, [](const Value *V) {
4937+
SrcTy->isScalableTy() || any_of(Indices, [](const Value *V) {
49384938
return isa<ScalableVectorType>(V->getType());
49394939
});
49404940

llvm/lib/IR/Operator.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -127,9 +127,7 @@ bool GEPOperator::accumulateConstantOffset(
127127
auto end = generic_gep_type_iterator<decltype(Index.end())>::end(Index.end());
128128
for (auto GTI = begin, GTE = end; GTI != GTE; ++GTI) {
129129
// Scalable vectors are multiplied by a runtime constant.
130-
bool ScalableType = false;
131-
if (isa<ScalableVectorType>(GTI.getIndexedType()))
132-
ScalableType = true;
130+
bool ScalableType = GTI.getIndexedType()->isScalableTy();
133131

134132
Value *V = GTI.getOperand();
135133
StructType *STy = GTI.getStructTypeOrNull();
@@ -189,7 +187,7 @@ bool GEPOperator::collectOffset(
189187
for (gep_type_iterator GTI = gep_type_begin(this), GTE = gep_type_end(this);
190188
GTI != GTE; ++GTI) {
191189
// Scalable vectors are multiplied by a runtime constant.
192-
bool ScalableType = isa<ScalableVectorType>(GTI.getIndexedType());
190+
bool ScalableType = GTI.getIndexedType()->isScalableTy();
193191

194192
Value *V = GTI.getOperand();
195193
StructType *STy = GTI.getStructTypeOrNull();

llvm/lib/IR/Type.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ bool Type::isIntegerTy(unsigned Bitwidth) const {
5858
}
5959

6060
bool Type::isScalableTy() const {
61+
if (const auto *ATy = dyn_cast<ArrayType>(this))
62+
return ATy->getElementType()->isScalableTy();
6163
if (const auto *STy = dyn_cast<StructType>(this)) {
6264
SmallPtrSet<Type *, 4> Visited;
6365
return STy->containsScalableVectorType(&Visited);
@@ -658,8 +660,7 @@ ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
658660
bool ArrayType::isValidElementType(Type *ElemTy) {
659661
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
660662
!ElemTy->isMetadataTy() && !ElemTy->isFunctionTy() &&
661-
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy() &&
662-
!isa<ScalableVectorType>(ElemTy);
663+
!ElemTy->isTokenTy() && !ElemTy->isX86_AMXTy();
663664
}
664665

665666
//===----------------------------------------------------------------------===//

llvm/lib/IR/Verifier.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -850,17 +850,9 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
850850
}
851851

852852
// Scalable vectors cannot be global variables, since we don't know
853-
// the runtime size. If the global is an array containing scalable vectors,
854-
// that will be caught by the isValidElementType methods in StructType or
855-
// ArrayType instead.
856-
Check(!isa<ScalableVectorType>(GV.getValueType()),
857-
"Globals cannot contain scalable vectors", &GV);
858-
859-
if (auto *STy = dyn_cast<StructType>(GV.getValueType())) {
860-
SmallPtrSet<Type *, 4> Visited;
861-
Check(!STy->containsScalableVectorType(&Visited),
862-
"Globals cannot contain scalable vectors", &GV);
863-
}
853+
// the runtime size.
854+
Check(!GV.getValueType()->isScalableTy(),
855+
"Globals cannot contain scalable types", &GV);
864856

865857
// Check if it's a target extension type that disallows being used as a
866858
// global.

llvm/lib/Transforms/IPO/GlobalOpt.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ static bool collectSRATypes(DenseMap<uint64_t, GlobalPart> &Parts,
390390
}
391391

392392
// Scalable types not currently supported.
393-
if (isa<ScalableVectorType>(Ty))
393+
if (Ty->isScalableTy())
394394
return false;
395395

396396
auto IsStored = [](Value *V, Constant *Initializer) {

llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -804,25 +804,25 @@ static Instruction *unpackLoadToAggregate(InstCombinerImpl &IC, LoadInst &LI) {
804804
return nullptr;
805805

806806
const DataLayout &DL = IC.getDataLayout();
807-
auto EltSize = DL.getTypeAllocSize(ET);
807+
TypeSize EltSize = DL.getTypeAllocSize(ET);
808808
const auto Align = LI.getAlign();
809809

810810
auto *Addr = LI.getPointerOperand();
811811
auto *IdxType = Type::getInt64Ty(T->getContext());
812812
auto *Zero = ConstantInt::get(IdxType, 0);
813813

814814
Value *V = PoisonValue::get(T);
815-
uint64_t Offset = 0;
815+
TypeSize Offset = TypeSize::get(0, ET->isScalableTy());
816816
for (uint64_t i = 0; i < NumElements; i++) {
817817
Value *Indices[2] = {
818818
Zero,
819819
ConstantInt::get(IdxType, i),
820820
};
821821
auto *Ptr = IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices),
822822
Name + ".elt");
823+
auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
823824
auto *L = IC.Builder.CreateAlignedLoad(AT->getElementType(), Ptr,
824-
commonAlignment(Align, Offset),
825-
Name + ".unpack");
825+
EltAlign, Name + ".unpack");
826826
L->setAAMetadata(LI.getAAMetadata());
827827
V = IC.Builder.CreateInsertValue(V, L, i);
828828
Offset += EltSize;
@@ -957,7 +957,7 @@ static bool canReplaceGEPIdxWithZero(InstCombinerImpl &IC,
957957
Type *SourceElementType = GEPI->getSourceElementType();
958958
// Size information about scalable vectors is not available, so we cannot
959959
// deduce whether indexing at n is undefined behaviour or not. Bail out.
960-
if (isa<ScalableVectorType>(SourceElementType))
960+
if (SourceElementType->isScalableTy())
961961
return false;
962962

963963
Type *AllocTy = GetElementPtrInst::getIndexedType(SourceElementType, Ops);
@@ -1323,7 +1323,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
13231323
return false;
13241324

13251325
const DataLayout &DL = IC.getDataLayout();
1326-
auto EltSize = DL.getTypeAllocSize(AT->getElementType());
1326+
TypeSize EltSize = DL.getTypeAllocSize(AT->getElementType());
13271327
const auto Align = SI.getAlign();
13281328

13291329
SmallString<16> EltName = V->getName();
@@ -1335,7 +1335,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
13351335
auto *IdxType = Type::getInt64Ty(T->getContext());
13361336
auto *Zero = ConstantInt::get(IdxType, 0);
13371337

1338-
uint64_t Offset = 0;
1338+
TypeSize Offset = TypeSize::get(0, AT->getElementType()->isScalableTy());
13391339
for (uint64_t i = 0; i < NumElements; i++) {
13401340
Value *Indices[2] = {
13411341
Zero,
@@ -1344,7 +1344,7 @@ static bool unpackStoreToAggregate(InstCombinerImpl &IC, StoreInst &SI) {
13441344
auto *Ptr =
13451345
IC.Builder.CreateInBoundsGEP(AT, Addr, ArrayRef(Indices), AddrName);
13461346
auto *Val = IC.Builder.CreateExtractValue(V, i, EltName);
1347-
auto EltAlign = commonAlignment(Align, Offset);
1347+
auto EltAlign = commonAlignment(Align, Offset.getKnownMinValue());
13481348
Instruction *NS = IC.Builder.CreateAlignedStore(Val, Ptr, EltAlign);
13491349
NS->setAAMetadata(SI.getAAMetadata());
13501350
Offset += EltSize;

llvm/lib/Transforms/InstCombine/InstructionCombining.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2005,7 +2005,7 @@ Instruction *InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
20052005
APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), 0);
20062006
if (NumVarIndices != Src->getNumIndices()) {
20072007
// FIXME: getIndexedOffsetInType() does not handled scalable vectors.
2008-
if (isa<ScalableVectorType>(BaseType))
2008+
if (BaseType->isScalableTy())
20092009
return nullptr;
20102010

20112011
SmallVector<Value *> ConstantIndices;
@@ -2118,7 +2118,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
21182118
SmallVector<Value *, 8> Indices(GEP.indices());
21192119
Type *GEPType = GEP.getType();
21202120
Type *GEPEltType = GEP.getSourceElementType();
2121-
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
2121+
bool IsGEPSrcEleScalable = GEPEltType->isScalableTy();
21222122
if (Value *V = simplifyGEPInst(GEPEltType, PtrOp, Indices, GEP.isInBounds(),
21232123
SQ.getWithInstruction(&GEP)))
21242124
return replaceInstUsesWith(GEP, V);

llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -830,7 +830,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
830830
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
831831
if (GTI.isSequential()) {
832832
// Constant offsets of scalable types are not really constant.
833-
if (isa<ScalableVectorType>(GTI.getIndexedType()))
833+
if (GTI.getIndexedType()->isScalableTy())
834834
continue;
835835

836836
// Tries to extract a constant offset from this GEP index.
@@ -1019,7 +1019,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
10191019
for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
10201020
if (GTI.isSequential()) {
10211021
// Constant offsets of scalable types are not really constant.
1022-
if (isa<ScalableVectorType>(GTI.getIndexedType()))
1022+
if (GTI.getIndexedType()->isScalableTy())
10231023
continue;
10241024

10251025
// Splits this GEP index into a variadic part and a constant offset, and
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc < %s | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
5+
6+
%my_subtype = type <vscale x 2 x double>
7+
%my_type = type [3 x %my_subtype]
8+
9+
define void @array_1D(ptr %addr) #0 {
10+
; CHECK-LABEL: array_1D:
11+
; CHECK: // %bb.0: // %entry
12+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
13+
; CHECK-NEXT: addvl sp, sp, #-3
14+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
15+
; CHECK-NEXT: .cfi_offset w29, -16
16+
; CHECK-NEXT: ptrue p0.d
17+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
18+
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
19+
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0]
20+
; CHECK-NEXT: st1d { z2.d }, p0, [sp]
21+
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
22+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
23+
; CHECK-NEXT: addvl sp, sp, #3
24+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
25+
; CHECK-NEXT: ret
26+
entry:
27+
%ret = alloca %my_type, align 8
28+
%val = load %my_type, ptr %addr
29+
store %my_type %val, ptr %ret, align 8
30+
ret void
31+
}
32+
33+
define %my_subtype @array_1D_extract(ptr %addr) #0 {
34+
; CHECK-LABEL: array_1D_extract:
35+
; CHECK: // %bb.0: // %entry
36+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
37+
; CHECK-NEXT: addvl sp, sp, #-3
38+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
39+
; CHECK-NEXT: .cfi_offset w29, -16
40+
; CHECK-NEXT: ptrue p0.d
41+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
42+
; CHECK-NEXT: addvl sp, sp, #3
43+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
44+
; CHECK-NEXT: ret
45+
entry:
46+
%ret = alloca %my_type, align 8
47+
%val = load %my_type, ptr %addr
48+
%elt = extractvalue %my_type %val, 1
49+
ret %my_subtype %elt
50+
}
51+
52+
define void @array_1D_insert(ptr %addr, %my_subtype %elt) #0 {
53+
; CHECK-LABEL: array_1D_insert:
54+
; CHECK: // %bb.0: // %entry
55+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
56+
; CHECK-NEXT: addvl sp, sp, #-3
57+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
58+
; CHECK-NEXT: .cfi_offset w29, -16
59+
; CHECK-NEXT: ptrue p0.d
60+
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0]
61+
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #2, mul vl]
62+
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl]
63+
; CHECK-NEXT: st1d { z1.d }, p0, [sp]
64+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
65+
; CHECK-NEXT: addvl sp, sp, #3
66+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
67+
; CHECK-NEXT: ret
68+
entry:
69+
%ret = alloca %my_type, align 8
70+
%val = load %my_type, ptr %addr
71+
%ins = insertvalue %my_type %val, %my_subtype %elt, 1
72+
store %my_type %ins, ptr %ret, align 8
73+
ret void
74+
}
75+
76+
define void @array_2D(ptr %addr) #0 {
77+
; CHECK-LABEL: array_2D:
78+
; CHECK: // %bb.0: // %entry
79+
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
80+
; CHECK-NEXT: addvl sp, sp, #-6
81+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
82+
; CHECK-NEXT: .cfi_offset w29, -16
83+
; CHECK-NEXT: ptrue p0.d
84+
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
85+
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, #2, mul vl]
86+
; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0, #3, mul vl]
87+
; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #4, mul vl]
88+
; CHECK-NEXT: ld1d { z4.d }, p0/z, [x0, #5, mul vl]
89+
; CHECK-NEXT: ld1d { z5.d }, p0/z, [x0]
90+
; CHECK-NEXT: st1d { z5.d }, p0, [sp]
91+
; CHECK-NEXT: st1d { z4.d }, p0, [sp, #5, mul vl]
92+
; CHECK-NEXT: st1d { z3.d }, p0, [sp, #4, mul vl]
93+
; CHECK-NEXT: st1d { z2.d }, p0, [sp, #3, mul vl]
94+
; CHECK-NEXT: st1d { z1.d }, p0, [sp, #2, mul vl]
95+
; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl]
96+
; CHECK-NEXT: addvl sp, sp, #6
97+
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
98+
; CHECK-NEXT: ret
99+
entry:
100+
%ret = alloca [2 x %my_type], align 8
101+
%val = load [2 x %my_type], ptr %addr
102+
store [2 x %my_type] %val, ptr %ret, align 8
103+
ret void
104+
}
105+
106+
attributes #0 = { "target-features"="+sve" }
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2+
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs --riscv-no-aliases < %s | FileCheck %s
3+
4+
target triple = "riscv64-unknown-unknown-elf"
5+
6+
%my_type = type [3 x <vscale x 1 x double>]
7+
8+
define void @test(ptr %addr) {
9+
; CHECK-LABEL: test:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: addi sp, sp, -16
12+
; CHECK-NEXT: .cfi_def_cfa_offset 16
13+
; CHECK-NEXT: csrrs a1, vlenb, zero
14+
; CHECK-NEXT: slli a1, a1, 2
15+
; CHECK-NEXT: sub sp, sp, a1
16+
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb
17+
; CHECK-NEXT: csrrs a1, vlenb, zero
18+
; CHECK-NEXT: add a2, a0, a1
19+
; CHECK-NEXT: vl1re64.v v8, (a2)
20+
; CHECK-NEXT: slli a2, a1, 1
21+
; CHECK-NEXT: vl1re64.v v9, (a0)
22+
; CHECK-NEXT: add a0, a0, a2
23+
; CHECK-NEXT: vl1re64.v v10, (a0)
24+
; CHECK-NEXT: addi a0, sp, 16
25+
; CHECK-NEXT: vs1r.v v9, (a0)
26+
; CHECK-NEXT: add a2, a0, a2
27+
; CHECK-NEXT: vs1r.v v10, (a2)
28+
; CHECK-NEXT: add a0, a0, a1
29+
; CHECK-NEXT: vs1r.v v8, (a0)
30+
; CHECK-NEXT: csrrs a0, vlenb, zero
31+
; CHECK-NEXT: slli a0, a0, 2
32+
; CHECK-NEXT: add sp, sp, a0
33+
; CHECK-NEXT: addi sp, sp, 16
34+
; CHECK-NEXT: jalr zero, 0(ra)
35+
entry:
36+
%ret = alloca %my_type, align 8
37+
%val = load %my_type, ptr %addr
38+
store %my_type %val, ptr %ret, align 8
39+
ret void
40+
}

llvm/test/Other/scalable-vector-array.ll

Lines changed: 0 additions & 8 deletions
This file was deleted.

llvm/test/Transforms/GVN/opaque-ptr.ll

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
5252
; CHECK-NEXT: call void @use(ptr [[GEP5]])
5353
; CHECK-NEXT: call void @use(ptr [[GEP5_SAME]])
5454
; CHECK-NEXT: call void @use(ptr [[GEP5_DIFFERENT]])
55+
; CHECK-NEXT: [[GEP6:%.*]] = getelementptr [4 x <vscale x 4 x i32>], ptr [[P]], i64 [[IDX]], i64 1
56+
; CHECK-NEXT: [[GEP6_SAME:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX]], i64 1
57+
; CHECK-NEXT: [[GEP6_DIFFERENT:%.*]] = getelementptr [4 x <vscale x 4 x float>], ptr [[P]], i64 [[IDX2]], i64 1
58+
; CHECK-NEXT: call void @use(ptr [[GEP6]])
59+
; CHECK-NEXT: call void @use(ptr [[GEP6_SAME]])
60+
; CHECK-NEXT: call void @use(ptr [[GEP6_DIFFERENT]])
5561
; CHECK-NEXT: ret void
5662
;
5763
%gep1 = getelementptr i64, ptr %p, i64 1
@@ -89,6 +95,12 @@ define void @gep_cse_offset_canonicalization(ptr %p, i64 %idx, i64 %idx2) {
8995
call void @use(ptr %gep5)
9096
call void @use(ptr %gep5.same)
9197
call void @use(ptr %gep5.different)
98+
%gep6 = getelementptr [4 x <vscale x 4 x i32>], ptr %p, i64 %idx, i64 1
99+
%gep6.same = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx, i64 1
100+
%gep6.different = getelementptr [4 x <vscale x 4 x float>], ptr %p, i64 %idx2, i64 1
101+
call void @use(ptr %gep6)
102+
call void @use(ptr %gep6.same)
103+
call void @use(ptr %gep6.different)
92104
ret void
93105
}
94106

0 commit comments

Comments
 (0)