@@ -2855,21 +2855,10 @@ static Value *interleaveVectors(IRBuilderBase &Builder, ArrayRef<Value *> Vals,
2855
2855
// Scalable vectors cannot use arbitrary shufflevectors (only splats), so
2856
2856
// must use intrinsics to interleave.
2857
2857
if (VecTy->isScalableTy ()) {
2858
- assert (isPowerOf2_32 (Factor) && " Unsupported interleave factor for "
2859
- " scalable vectors, must be power of 2" );
2860
- SmallVector<Value *> InterleavingValues (Vals);
2861
- // When interleaving, the number of values will be shrunk until we have the
2862
- // single final interleaved value.
2863
- auto *InterleaveTy = cast<VectorType>(InterleavingValues[0 ]->getType ());
2864
- for (unsigned Midpoint = Factor / 2 ; Midpoint > 0 ; Midpoint /= 2 ) {
2865
- InterleaveTy = VectorType::getDoubleElementsVectorType (InterleaveTy);
2866
- for (unsigned I = 0 ; I < Midpoint; ++I)
2867
- InterleavingValues[I] = Builder.CreateIntrinsic (
2868
- InterleaveTy, Intrinsic::vector_interleave2,
2869
- {InterleavingValues[I], InterleavingValues[Midpoint + I]},
2870
- /* FMFSource=*/ nullptr , Name);
2871
- }
2872
- return InterleavingValues[0 ];
2858
+ VectorType *WideVecTy = VectorType::getDoubleElementsVectorType (VecTy);
2859
+ return Builder.CreateIntrinsic (WideVecTy, Intrinsic::vector_interleave2,
2860
+ Vals,
2861
+ /* FMFSource=*/ nullptr , Name);
2873
2862
}
2874
2863
2875
2864
// Fixed length. Start by concatenating all vectors into a wide vector.
@@ -2955,11 +2944,15 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2955
2944
&InterleaveFactor](Value *MaskForGaps) -> Value * {
2956
2945
if (State.VF .isScalable ()) {
2957
2946
assert (!MaskForGaps && " Interleaved groups with gaps are not supported." );
2958
- assert (isPowerOf2_32 ( InterleaveFactor) &&
2947
+ assert (InterleaveFactor == 2 &&
2959
2948
" Unsupported deinterleave factor for scalable vectors" );
2960
2949
auto *ResBlockInMask = State.get (BlockInMask);
2961
- SmallVector<Value *> Ops (InterleaveFactor, ResBlockInMask);
2962
- return interleaveVectors (State.Builder , Ops, " interleaved.mask" );
2950
+ SmallVector<Value *, 2 > Ops = {ResBlockInMask, ResBlockInMask};
2951
+ auto *MaskTy = VectorType::get (State.Builder .getInt1Ty (),
2952
+ State.VF .getKnownMinValue () * 2 , true );
2953
+ return State.Builder .CreateIntrinsic (
2954
+ MaskTy, Intrinsic::vector_interleave2, Ops,
2955
+ /* FMFSource=*/ nullptr , " interleaved.mask" );
2963
2956
}
2964
2957
2965
2958
if (!BlockInMask)
@@ -2999,48 +2992,22 @@ void VPInterleaveRecipe::execute(VPTransformState &State) {
2999
2992
ArrayRef<VPValue *> VPDefs = definedValues ();
3000
2993
const DataLayout &DL = State.CFG .PrevBB ->getDataLayout ();
3001
2994
if (VecTy->isScalableTy ()) {
3002
- assert (isPowerOf2_32 ( InterleaveFactor) &&
2995
+ assert (InterleaveFactor == 2 &&
3003
2996
" Unsupported deinterleave factor for scalable vectors" );
3004
2997
3005
- // Scalable vectors cannot use arbitrary shufflevectors (only splats),
3006
- // so must use intrinsics to deinterleave.
3007
- SmallVector<Value *> DeinterleavedValues (InterleaveFactor);
3008
- DeinterleavedValues[0 ] = NewLoad;
3009
- // For the case of InterleaveFactor > 2, we will have to do recursive
3010
- // deinterleaving, because the current available deinterleave intrinsic
3011
- // supports only Factor of 2, otherwise it will bailout after first
3012
- // iteration.
3013
- // When deinterleaving, the number of values will double until we
3014
- // have "InterleaveFactor".
3015
- for (unsigned NumVectors = 1 ; NumVectors < InterleaveFactor;
3016
- NumVectors *= 2 ) {
3017
- // Deinterleave the elements within the vector
3018
- SmallVector<Value *> TempDeinterleavedValues (NumVectors);
3019
- for (unsigned I = 0 ; I < NumVectors; ++I) {
3020
- auto *DiTy = DeinterleavedValues[I]->getType ();
3021
- TempDeinterleavedValues[I] = State.Builder .CreateIntrinsic (
3022
- Intrinsic::vector_deinterleave2, DiTy, DeinterleavedValues[I],
3023
- /* FMFSource=*/ nullptr , " strided.vec" );
3024
- }
3025
- // Extract the deinterleaved values:
3026
- for (unsigned I = 0 ; I < 2 ; ++I)
3027
- for (unsigned J = 0 ; J < NumVectors; ++J)
3028
- DeinterleavedValues[NumVectors * I + J] =
3029
- State.Builder .CreateExtractValue (TempDeinterleavedValues[J], I);
3030
- }
3031
-
3032
- #ifndef NDEBUG
3033
- for (Value *Val : DeinterleavedValues)
3034
- assert (Val && " NULL Deinterleaved Value" );
3035
- #endif
3036
- for (unsigned I = 0 , J = 0 ; I < InterleaveFactor; ++I) {
2998
+ // Scalable vectors cannot use arbitrary shufflevectors (only splats),
2999
+ // so must use intrinsics to deinterleave.
3000
+ Value *DI = State.Builder .CreateIntrinsic (
3001
+ Intrinsic::vector_deinterleave2, VecTy, NewLoad,
3002
+ /* FMFSource=*/ nullptr , " strided.vec" );
3003
+ unsigned J = 0 ;
3004
+ for (unsigned I = 0 ; I < InterleaveFactor; ++I) {
3037
3005
Instruction *Member = Group->getMember (I);
3038
- Value *StridedVec = DeinterleavedValues[I];
3039
- if (!Member) {
3040
- // This value is not needed as it's not used
3041
- cast<Instruction>(StridedVec)->eraseFromParent ();
3006
+
3007
+ if (!Member)
3042
3008
continue ;
3043
- }
3009
+
3010
+ Value *StridedVec = State.Builder .CreateExtractValue (DI, I);
3044
3011
// If this member has different type, cast the result type.
3045
3012
if (Member->getType () != ScalarTy) {
3046
3013
VectorType *OtherVTy = VectorType::get (Member->getType (), State.VF );
0 commit comments