Skip to content

Commit 6f0589b

Browse files
[LLVM][CodeGen][SVE] Improve custom lowering for EXTRACT_SUBVECTOR.
We can extract any legal fixed length vector from a scalable vector by using VECTOR_SPLICE. I've also taken the time to simplify the code a little.
1 parent 5c1e440 commit 6f0589b

File tree

4 files changed

+77
-202
lines changed

4 files changed

+77
-202
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 33 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -13897,45 +13897,52 @@ AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
1389713897

1389813898
SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
1389913899
SelectionDAG &DAG) const {
13900-
assert(Op.getValueType().isFixedLengthVector() &&
13900+
EVT VT = Op.getValueType();
13901+
assert(VT.isFixedLengthVector() &&
1390113902
"Only cases that extract a fixed length vector are supported!");
13902-
1390313903
EVT InVT = Op.getOperand(0).getValueType();
13904-
unsigned Idx = Op.getConstantOperandVal(1);
13905-
unsigned Size = Op.getValueSizeInBits();
1390613904

1390713905
// If we don't have legal types yet, do nothing
13908-
if (!DAG.getTargetLoweringInfo().isTypeLegal(InVT))
13906+
if (!isTypeLegal(InVT))
1390913907
return SDValue();
1391013908

13911-
if (InVT.isScalableVector()) {
13912-
// This will be matched by custom code during ISelDAGToDAG.
13913-
if (Idx == 0 && isPackedVectorType(InVT, DAG))
13909+
if (InVT.is128BitVector()) {
13910+
assert(VT.is64BitVector() && "Extracting unexpected vector type!");
13911+
unsigned Idx = Op.getConstantOperandVal(1);
13912+
13913+
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
13914+
if (Idx == 0)
1391413915
return Op;
1391513916

13916-
return SDValue();
13917+
// If this is extracting the upper 64-bits of a 128-bit vector, we match
13918+
// that directly.
13919+
if (Idx * InVT.getScalarSizeInBits() == 64 && Subtarget->isNeonAvailable())
13920+
return Op;
1391713921
}
1391813922

13919-
// This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
13920-
if (Idx == 0 && InVT.getSizeInBits() <= 128)
13921-
return Op;
13922-
13923-
// If this is extracting the upper 64-bits of a 128-bit vector, we match
13924-
// that directly.
13925-
if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
13926-
InVT.getSizeInBits() == 128 && Subtarget->isNeonAvailable())
13927-
return Op;
13928-
13929-
if (useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
13923+
if (InVT.isScalableVector() ||
13924+
useSVEForFixedLengthVectorVT(InVT, !Subtarget->isNeonAvailable())) {
1393013925
SDLoc DL(Op);
13926+
SDValue Vec = Op.getOperand(0);
13927+
SDValue Idx = Op.getOperand(1);
1393113928

13932-
EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
13933-
SDValue NewInVec =
13934-
convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
13929+
EVT PackedVT = getPackedSVEVectorVT(InVT.getVectorElementType());
13930+
if (PackedVT != InVT) {
13931+
// Pack input into the bottom part of an SVE register and try again.
13932+
SDValue Container = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PackedVT,
13933+
DAG.getUNDEF(PackedVT), Vec,
13934+
DAG.getVectorIdxConstant(0, DL));
13935+
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Container, Idx);
13936+
}
13937+
13938+
// This will get matched by custom code during ISelDAGToDAG.
13939+
if (isNullConstant(Idx))
13940+
return Op;
1393513941

13936-
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ContainerVT, NewInVec,
13937-
NewInVec, DAG.getConstant(Idx, DL, MVT::i64));
13938-
return convertFromScalableVector(DAG, Op.getValueType(), Splice);
13942+
assert(InVT.isScalableVector() && "Unexpected vector type!");
13943+
// Move requested subvector to the start of the vector and try again.
13944+
SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, InVT, Vec, Vec, Idx);
13945+
return convertFromScalableVector(DAG, VT, Splice);
1393913946
}
1394013947

1394113948
return SDValue();

llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll

Lines changed: 11 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -143,15 +143,8 @@ define <4 x float> @extract_v4f32_nxv16f32_12(<vscale x 16 x float> %arg) {
143143
define <2 x float> @extract_v2f32_nxv16f32_2(<vscale x 16 x float> %arg) {
144144
; CHECK-LABEL: extract_v2f32_nxv16f32_2:
145145
; CHECK: // %bb.0:
146-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
147-
; CHECK-NEXT: addvl sp, sp, #-1
148-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
149-
; CHECK-NEXT: .cfi_offset w29, -16
150-
; CHECK-NEXT: ptrue p0.s
151-
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
152-
; CHECK-NEXT: ldr d0, [sp, #8]
153-
; CHECK-NEXT: addvl sp, sp, #1
154-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
146+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
147+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
155148
; CHECK-NEXT: ret
156149
%ext = call <2 x float> @llvm.vector.extract.v2f32.nxv16f32(<vscale x 16 x float> %arg, i64 2)
157150
ret <2 x float> %ext
@@ -274,15 +267,8 @@ define <4 x i3> @extract_v4i3_nxv32i3_16(<vscale x 32 x i3> %arg) {
274267
define <2 x i32> @extract_v2i32_nxv16i32_2(<vscale x 16 x i32> %arg) {
275268
; CHECK-LABEL: extract_v2i32_nxv16i32_2:
276269
; CHECK: // %bb.0:
277-
; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
278-
; CHECK-NEXT: addvl sp, sp, #-1
279-
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
280-
; CHECK-NEXT: .cfi_offset w29, -16
281-
; CHECK-NEXT: ptrue p0.s
282-
; CHECK-NEXT: st1w { z0.s }, p0, [sp]
283-
; CHECK-NEXT: ldr d0, [sp, #8]
284-
; CHECK-NEXT: addvl sp, sp, #1
285-
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
270+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
271+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
286272
; CHECK-NEXT: ret
287273
%ext = call <2 x i32> @llvm.vector.extract.v2i32.nxv16i32(<vscale x 16 x i32> %arg, i64 2)
288274
ret <2 x i32> %ext
@@ -314,16 +300,9 @@ define <4 x half> @extract_v4f16_nxv2f16_0(<vscale x 2 x half> %arg) {
314300
; CHECK-NEXT: addvl sp, sp, #-1
315301
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
316302
; CHECK-NEXT: .cfi_offset w29, -16
317-
; CHECK-NEXT: cntd x8
318303
; CHECK-NEXT: ptrue p0.d
319-
; CHECK-NEXT: addpl x9, sp, #6
320-
; CHECK-NEXT: subs x8, x8, #4
321-
; CHECK-NEXT: csel x8, xzr, x8, lo
322-
; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
323-
; CHECK-NEXT: cmp x8, #0
324-
; CHECK-NEXT: csel x8, x8, xzr, lo
325-
; CHECK-NEXT: lsl x8, x8, #1
326-
; CHECK-NEXT: ldr d0, [x9, x8]
304+
; CHECK-NEXT: st1h { z0.d }, p0, [sp]
305+
; CHECK-NEXT: ldr d0, [sp]
327306
; CHECK-NEXT: addvl sp, sp, #1
328307
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
329308
; CHECK-NEXT: ret
@@ -338,17 +317,12 @@ define <4 x half> @extract_v4f16_nxv2f16_4(<vscale x 2 x half> %arg) {
338317
; CHECK-NEXT: addvl sp, sp, #-1
339318
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
340319
; CHECK-NEXT: .cfi_offset w29, -16
341-
; CHECK-NEXT: cntd x8
342-
; CHECK-NEXT: mov w9, #4 // =0x4
343320
; CHECK-NEXT: ptrue p0.d
344-
; CHECK-NEXT: subs x8, x8, #4
345-
; CHECK-NEXT: csel x8, xzr, x8, lo
346-
; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl]
347-
; CHECK-NEXT: cmp x8, #4
348-
; CHECK-NEXT: csel x8, x8, x9, lo
349-
; CHECK-NEXT: addpl x9, sp, #6
350-
; CHECK-NEXT: lsl x8, x8, #1
351-
; CHECK-NEXT: ldr d0, [x9, x8]
321+
; CHECK-NEXT: ptrue p1.h
322+
; CHECK-NEXT: st1h { z0.d }, p0, [sp]
323+
; CHECK-NEXT: ld1h { z0.h }, p1/z, [sp]
324+
; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
325+
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
352326
; CHECK-NEXT: addvl sp, sp, #1
353327
; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
354328
; CHECK-NEXT: ret

0 commit comments

Comments
 (0)