Skip to content

Commit 9061e6e

Browse files
author
Thorsten Schütt
authored
[GlobalISel][AArch64] Legalize G_EXTRACT_VECTOR_ELT for SVE (#115161)
AArch64InstrGISel.td defines: def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>; There are many patterns for SVE. Let's exploit that fact.
1 parent d233fed commit 9061e6e

File tree

4 files changed

+132
-9
lines changed

4 files changed

+132
-9
lines changed

llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3227,8 +3227,10 @@ bool IRTranslator::translateExtractElement(const User &U,
32273227
MachineIRBuilder &MIRBuilder) {
32283228
// If it is a <1 x Ty> vector, use the scalar as it is
32293229
// not a legal vector type in LLT.
3230-
if (cast<FixedVectorType>(U.getOperand(0)->getType())->getNumElements() == 1)
3231-
return translateCopy(U, *U.getOperand(0), MIRBuilder);
3230+
if (const FixedVectorType *FVT =
3231+
dyn_cast<FixedVectorType>(U.getOperand(0)->getType()))
3232+
if (FVT->getNumElements() == 1)
3233+
return translateCopy(U, *U.getOperand(0), MIRBuilder);
32323234

32333235
Register Res = getOrCreateVReg(U);
32343236
Register Val = getOrCreateVReg(*U.getOperand(0));

llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5316,7 +5316,9 @@ bool AArch64InstructionSelector::selectUSMovFromExtend(
53165316
return false;
53175317
Register Src0 = Extract->getOperand(1).getReg();
53185318

5319-
const LLT &VecTy = MRI.getType(Src0);
5319+
const LLT VecTy = MRI.getType(Src0);
5320+
if (VecTy.isScalableVector())
5321+
return false;
53205322

53215323
if (VecTy.getSizeInBits() != 128) {
53225324
const MachineInstr *ScalarToVector = emitScalarToVector(

llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -933,9 +933,16 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
933933
});
934934
}
935935

936+
// TODO : nxv4s16, nxv2s16, nxv2s32
936937
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
938+
.legalFor(HasSVE, {{s16, nxv16s8, s64},
939+
{s16, nxv8s16, s64},
940+
{s32, nxv4s32, s64},
941+
{s64, nxv2s64, s64}})
937942
.unsupportedIf([=](const LegalityQuery &Query) {
938943
const LLT &EltTy = Query.Types[1].getElementType();
944+
if (Query.Types[1].isScalableVector())
945+
return false;
939946
return Query.Types[0] != EltTy;
940947
})
941948
.minScalar(2, s64)
@@ -949,22 +956,26 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
949956
[=](const LegalityQuery &Query) {
950957
// We want to promote to <M x s1> to <M x s64> if that wouldn't
951958
// cause the total vec size to be > 128b.
952-
return Query.Types[1].getNumElements() <= 2;
959+
return Query.Types[1].isFixedVector() &&
960+
Query.Types[1].getNumElements() <= 2;
953961
},
954962
0, s64)
955963
.minScalarOrEltIf(
956964
[=](const LegalityQuery &Query) {
957-
return Query.Types[1].getNumElements() <= 4;
965+
return Query.Types[1].isFixedVector() &&
966+
Query.Types[1].getNumElements() <= 4;
958967
},
959968
0, s32)
960969
.minScalarOrEltIf(
961970
[=](const LegalityQuery &Query) {
962-
return Query.Types[1].getNumElements() <= 8;
971+
return Query.Types[1].isFixedVector() &&
972+
Query.Types[1].getNumElements() <= 8;
963973
},
964974
0, s16)
965975
.minScalarOrEltIf(
966976
[=](const LegalityQuery &Query) {
967-
return Query.Types[1].getNumElements() <= 16;
977+
return Query.Types[1].isFixedVector() &&
978+
Query.Types[1].getNumElements() <= 16;
968979
},
969980
0, s8)
970981
.minScalarOrElt(0, s8) // Worst case, we need at least s8.
@@ -2178,11 +2189,14 @@ bool AArch64LegalizerInfo::legalizeMemOps(MachineInstr &MI,
21782189

21792190
bool AArch64LegalizerInfo::legalizeExtractVectorElt(
21802191
MachineInstr &MI, MachineRegisterInfo &MRI, LegalizerHelper &Helper) const {
2181-
assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
2192+
const GExtractVectorElement *Element = cast<GExtractVectorElement>(&MI);
21822193
auto VRegAndVal =
2183-
getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2194+
getIConstantVRegValWithLookThrough(Element->getIndexReg(), MRI);
21842195
if (VRegAndVal)
21852196
return true;
2197+
LLT VecTy = MRI.getType(Element->getVectorReg());
2198+
if (VecTy.isScalableVector())
2199+
return true;
21862200
return Helper.lowerExtractInsertVectorElt(MI) !=
21872201
LegalizerHelper::LegalizeResult::UnableToLegalize;
21882202
}

llvm/test/CodeGen/AArch64/extract-vector-elt-sve.ll

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,108 @@ entry:
121121
%d = insertelement <vscale x 16 x i8> %vec, i8 %elt, i64 %idx
122122
ret <vscale x 16 x i8> %d
123123
}
124+
125+
define i64 @extract_vscale_2_i64(<vscale x 2 x i64> %vec, i64 %idx) {
126+
; CHECK-SD-LABEL: extract_vscale_2_i64:
127+
; CHECK-SD: // %bb.0: // %entry
128+
; CHECK-SD-NEXT: whilels p0.d, xzr, x0
129+
; CHECK-SD-NEXT: lastb x0, p0, z0.d
130+
; CHECK-SD-NEXT: ret
131+
;
132+
; CHECK-GI-LABEL: extract_vscale_2_i64:
133+
; CHECK-GI: // %bb.0: // %entry
134+
; CHECK-GI-NEXT: whilels p0.d, xzr, x0
135+
; CHECK-GI-NEXT: lastb d0, p0, z0.d
136+
; CHECK-GI-NEXT: fmov x0, d0
137+
; CHECK-GI-NEXT: ret
138+
entry:
139+
%d = extractelement <vscale x 2 x i64> %vec, i64 %idx
140+
ret i64 %d
141+
}
142+
143+
define i64 @extract_vscale_2_i64_zero(<vscale x 2 x i64> %vec, i64 %idx) {
144+
; CHECK-LABEL: extract_vscale_2_i64_zero:
145+
; CHECK: // %bb.0: // %entry
146+
; CHECK-NEXT: fmov x0, d0
147+
; CHECK-NEXT: ret
148+
entry:
149+
%d = extractelement <vscale x 2 x i64> %vec, i64 0
150+
ret i64 %d
151+
}
152+
153+
define i32 @extract_vscale_4_i32(<vscale x 4 x i32> %vec, i64 %idx) {
154+
; CHECK-SD-LABEL: extract_vscale_4_i32:
155+
; CHECK-SD: // %bb.0: // %entry
156+
; CHECK-SD-NEXT: whilels p0.s, xzr, x0
157+
; CHECK-SD-NEXT: lastb w0, p0, z0.s
158+
; CHECK-SD-NEXT: ret
159+
;
160+
; CHECK-GI-LABEL: extract_vscale_4_i32:
161+
; CHECK-GI: // %bb.0: // %entry
162+
; CHECK-GI-NEXT: whilels p0.s, xzr, x0
163+
; CHECK-GI-NEXT: lastb s0, p0, z0.s
164+
; CHECK-GI-NEXT: fmov w0, s0
165+
; CHECK-GI-NEXT: ret
166+
entry:
167+
%d = extractelement <vscale x 4 x i32> %vec, i64 %idx
168+
ret i32 %d
169+
}
170+
171+
define i32 @extract_vscale_4_i32_zero(<vscale x 4 x i32> %vec, i64 %idx) {
172+
; CHECK-LABEL: extract_vscale_4_i32_zero:
173+
; CHECK: // %bb.0: // %entry
174+
; CHECK-NEXT: fmov w0, s0
175+
; CHECK-NEXT: ret
176+
entry:
177+
%d = extractelement <vscale x 4 x i32> %vec, i64 0
178+
ret i32 %d
179+
}
180+
181+
define i16 @extract_vscale_8_i16(<vscale x 8 x i16> %vec, i64 %idx) {
182+
; CHECK-SD-LABEL: extract_vscale_8_i16:
183+
; CHECK-SD: // %bb.0: // %entry
184+
; CHECK-SD-NEXT: whilels p0.h, xzr, x0
185+
; CHECK-SD-NEXT: lastb w0, p0, z0.h
186+
; CHECK-SD-NEXT: ret
187+
;
188+
; CHECK-GI-LABEL: extract_vscale_8_i16:
189+
; CHECK-GI: // %bb.0: // %entry
190+
; CHECK-GI-NEXT: whilels p0.h, xzr, x0
191+
; CHECK-GI-NEXT: lastb h0, p0, z0.h
192+
; CHECK-GI-NEXT: fmov w0, s0
193+
; CHECK-GI-NEXT: ret
194+
entry:
195+
%d = extractelement <vscale x 8 x i16> %vec, i64 %idx
196+
ret i16 %d
197+
}
198+
199+
define i16 @extract_vscale_8_i16_zero(<vscale x 8 x i16> %vec, i64 %idx) {
200+
; CHECK-LABEL: extract_vscale_8_i16_zero:
201+
; CHECK: // %bb.0: // %entry
202+
; CHECK-NEXT: fmov w0, s0
203+
; CHECK-NEXT: ret
204+
entry:
205+
%d = extractelement <vscale x 8 x i16> %vec, i64 0
206+
ret i16 %d
207+
}
208+
209+
define i8 @extract_vscale_16_i8(<vscale x 16 x i8> %vec, i64 %idx) {
210+
; CHECK-LABEL: extract_vscale_16_i8:
211+
; CHECK: // %bb.0: // %entry
212+
; CHECK-NEXT: whilels p0.b, xzr, x0
213+
; CHECK-NEXT: lastb w0, p0, z0.b
214+
; CHECK-NEXT: ret
215+
entry:
216+
%d = extractelement <vscale x 16 x i8> %vec, i64 %idx
217+
ret i8 %d
218+
}
219+
220+
define i8 @extract_vscale_16_i8_zero(<vscale x 16 x i8> %vec, i64 %idx) {
221+
; CHECK-LABEL: extract_vscale_16_i8_zero:
222+
; CHECK: // %bb.0: // %entry
223+
; CHECK-NEXT: fmov w0, s0
224+
; CHECK-NEXT: ret
225+
entry:
226+
%d = extractelement <vscale x 16 x i8> %vec, i64 0
227+
ret i8 %d
228+
}

0 commit comments

Comments
 (0)