@@ -525,6 +525,121 @@ SDValue LoongArchTargetLowering::lowerBITREVERSE(SDValue Op,
525
525
}
526
526
}
527
527
528
+ // / Attempts to match a shuffle mask against the VBSLL, VBSRL, VSLLI and VSRLI
529
+ // / instruction.
530
+ // The funciton matches elements from one of the input vector shuffled to the
531
+ // left or right with zeroable elements 'shifted in'. It handles both the
532
+ // strictly bit-wise element shifts and the byte shfit across an entire 128-bit
533
+ // lane.
534
+ // Mostly copied from X86.
535
+ static int matchShuffleAsShift (MVT &ShiftVT, unsigned &Opcode,
536
+ unsigned ScalarSizeInBits, ArrayRef<int > Mask,
537
+ int MaskOffset, const APInt &Zeroable) {
538
+ int Size = Mask.size ();
539
+ unsigned SizeInBits = Size * ScalarSizeInBits;
540
+
541
+ auto CheckZeros = [&](int Shift, int Scale, bool Left) {
542
+ for (int i = 0 ; i < Size ; i += Scale)
543
+ for (int j = 0 ; j < Shift; ++j)
544
+ if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))])
545
+ return false ;
546
+
547
+ return true ;
548
+ };
549
+
550
+ auto isSequentialOrUndefInRange = [&](unsigned Pos, unsigned Size , int Low,
551
+ int Step = 1 ) {
552
+ for (unsigned i = Pos, e = Pos + Size ; i != e; ++i, Low += Step)
553
+ if (!(Mask[i] == -1 || Mask[i] == Low))
554
+ return false ;
555
+ return true ;
556
+ };
557
+
558
+ auto MatchShift = [&](int Shift, int Scale, bool Left) {
559
+ for (int i = 0 ; i != Size ; i += Scale) {
560
+ unsigned Pos = Left ? i + Shift : i;
561
+ unsigned Low = Left ? i : i + Shift;
562
+ unsigned Len = Scale - Shift;
563
+ if (!isSequentialOrUndefInRange (Pos, Len, Low + MaskOffset))
564
+ return -1 ;
565
+ }
566
+
567
+ int ShiftEltBits = ScalarSizeInBits * Scale;
568
+ bool ByteShift = ShiftEltBits > 64 ;
569
+ Opcode = Left ? (ByteShift ? LoongArchISD::VBSLL : LoongArchISD::VSLLI)
570
+ : (ByteShift ? LoongArchISD::VBSRL : LoongArchISD::VSRLI);
571
+ int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1 );
572
+
573
+ // Normalize the scale for byte shifts to still produce an i64 element
574
+ // type.
575
+ Scale = ByteShift ? Scale / 2 : Scale;
576
+
577
+ // We need to round trip through the appropriate type for the shift.
578
+ MVT ShiftSVT = MVT::getIntegerVT (ScalarSizeInBits * Scale);
579
+ ShiftVT = ByteShift ? MVT::getVectorVT (MVT::i8, SizeInBits / 8 )
580
+ : MVT::getVectorVT (ShiftSVT, Size / Scale);
581
+ return (int )ShiftAmt;
582
+ };
583
+
584
+ unsigned MaxWidth = 128 ;
585
+ for (int Scale = 2 ; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2 )
586
+ for (int Shift = 1 ; Shift != Scale; ++Shift)
587
+ for (bool Left : {true , false })
588
+ if (CheckZeros (Shift, Scale, Left)) {
589
+ int ShiftAmt = MatchShift (Shift, Scale, Left);
590
+ if (0 < ShiftAmt)
591
+ return ShiftAmt;
592
+ }
593
+
594
+ // no match
595
+ return -1 ;
596
+ }
597
+
598
+ // / Lower VECTOR_SHUFFLE as shift (if possible).
599
+ // /
600
+ // / For example:
601
+ // / %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
602
+ // / <4 x i32> <i32 4, i32 0, i32 1, i32 2>
603
+ // / is lowered to:
604
+ // / (VBSLL_V $v0, $v0, 4)
605
+ // /
606
+ // / %2 = shufflevector <4 x i32> %0, <4 x i32> zeroinitializer,
607
+ // / <4 x i32> <i32 4, i32 0, i32 4, i32 2>
608
+ // / is lowered to:
609
+ // / (VSLLI_D $v0, $v0, 32)
610
+ static SDValue lowerVECTOR_SHUFFLEAsShift (const SDLoc &DL, ArrayRef<int > Mask,
611
+ MVT VT, SDValue V1, SDValue V2,
612
+ SelectionDAG &DAG,
613
+ const APInt &Zeroable) {
614
+ int Size = Mask.size ();
615
+ assert (Size == (int )VT.getVectorNumElements () && " Unexpected mask size" );
616
+
617
+ MVT ShiftVT;
618
+ SDValue V = V1;
619
+ unsigned Opcode;
620
+
621
+ // Try to match shuffle against V1 shift.
622
+ int ShiftAmt = matchShuffleAsShift (ShiftVT, Opcode, VT.getScalarSizeInBits (),
623
+ Mask, 0 , Zeroable);
624
+
625
+ // If V1 failed, try to match shuffle against V2 shift.
626
+ if (ShiftAmt < 0 ) {
627
+ ShiftAmt = matchShuffleAsShift (ShiftVT, Opcode, VT.getScalarSizeInBits (),
628
+ Mask, Size , Zeroable);
629
+ V = V2;
630
+ }
631
+
632
+ if (ShiftAmt < 0 )
633
+ return SDValue ();
634
+
635
+ assert (DAG.getTargetLoweringInfo ().isTypeLegal (ShiftVT) &&
636
+ " Illegal integer vector type" );
637
+ V = DAG.getBitcast (ShiftVT, V);
638
+ V = DAG.getNode (Opcode, DL, ShiftVT, V,
639
+ DAG.getConstant (ShiftAmt, DL, MVT::i64));
640
+ return DAG.getBitcast (VT, V);
641
+ }
642
+
528
643
// / Determine whether a range fits a regular pattern of values.
529
644
// / This function accounts for the possibility of jumping over the End iterator.
530
645
template <typename ValType>
@@ -593,14 +708,12 @@ static void computeZeroableShuffleElements(ArrayRef<int> Mask, SDValue V1,
593
708
static SDValue lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (const SDLoc &DL,
594
709
ArrayRef<int > Mask, MVT VT,
595
710
SDValue V1, SDValue V2,
596
- SelectionDAG &DAG) {
711
+ SelectionDAG &DAG,
712
+ const APInt &Zeroable) {
597
713
int Bits = VT.getSizeInBits ();
598
714
int EltBits = VT.getScalarSizeInBits ();
599
715
int NumElements = VT.getVectorNumElements ();
600
716
601
- APInt KnownUndef, KnownZero;
602
- computeZeroableShuffleElements (Mask, V1, V2, KnownUndef, KnownZero);
603
- APInt Zeroable = KnownUndef | KnownZero;
604
717
if (Zeroable.isAllOnes ())
605
718
return DAG.getConstant (0 , DL, VT);
606
719
@@ -1062,6 +1175,10 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1062
1175
" Unexpected mask size for shuffle!" );
1063
1176
assert (Mask.size () % 2 == 0 && " Expected even mask size." );
1064
1177
1178
+ APInt KnownUndef, KnownZero;
1179
+ computeZeroableShuffleElements (Mask, V1, V2, KnownUndef, KnownZero);
1180
+ APInt Zeroable = KnownUndef | KnownZero;
1181
+
1065
1182
SDValue Result;
1066
1183
// TODO: Add more comparison patterns.
1067
1184
if (V2.isUndef ()) {
@@ -1089,12 +1206,14 @@ static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1089
1206
return Result;
1090
1207
if ((Result = lowerVECTOR_SHUFFLE_VPICKOD (DL, Mask, VT, V1, V2, DAG)))
1091
1208
return Result;
1209
+ if ((Result = lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG,
1210
+ Zeroable)))
1211
+ return Result;
1092
1212
if ((Result =
1093
- lowerVECTOR_SHUFFLEAsZeroOrAnyExtend (DL, Mask, VT, V1, V2, DAG)))
1213
+ lowerVECTOR_SHUFFLEAsShift (DL, Mask, VT, V1, V2, DAG, Zeroable )))
1094
1214
return Result;
1095
1215
if ((Result = lowerVECTOR_SHUFFLE_VSHUF (DL, Mask, VT, V1, V2, DAG)))
1096
1216
return Result;
1097
-
1098
1217
return SDValue ();
1099
1218
}
1100
1219
@@ -1495,6 +1614,10 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1495
1614
SmallVector<int > NewMask (Mask);
1496
1615
canonicalizeShuffleVectorByLane (DL, NewMask, VT, V1, V2, DAG);
1497
1616
1617
+ APInt KnownUndef, KnownZero;
1618
+ computeZeroableShuffleElements (NewMask, V1, V2, KnownUndef, KnownZero);
1619
+ APInt Zeroable = KnownUndef | KnownZero;
1620
+
1498
1621
SDValue Result;
1499
1622
// TODO: Add more comparison patterns.
1500
1623
if (V2.isUndef ()) {
@@ -1522,6 +1645,9 @@ static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT,
1522
1645
return Result;
1523
1646
if ((Result = lowerVECTOR_SHUFFLE_XVPICKOD (DL, NewMask, VT, V1, V2, DAG)))
1524
1647
return Result;
1648
+ if ((Result =
1649
+ lowerVECTOR_SHUFFLEAsShift (DL, NewMask, VT, V1, V2, DAG, Zeroable)))
1650
+ return Result;
1525
1651
if ((Result = lowerVECTOR_SHUFFLE_XVSHUF (DL, NewMask, VT, V1, V2, DAG)))
1526
1652
return Result;
1527
1653
@@ -5041,6 +5167,10 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
5041
5167
NODE_NAME_CASE (VANY_NONZERO)
5042
5168
NODE_NAME_CASE (FRECIPE)
5043
5169
NODE_NAME_CASE (FRSQRTE)
5170
+ NODE_NAME_CASE (VSLLI)
5171
+ NODE_NAME_CASE (VSRLI)
5172
+ NODE_NAME_CASE (VBSLL)
5173
+ NODE_NAME_CASE (VBSRL)
5044
5174
}
5045
5175
#undef NODE_NAME_CASE
5046
5176
return nullptr ;
0 commit comments