@@ -13589,6 +13589,52 @@ static bool matchIndexAsShuffle(EVT VT, SDValue Index, SDValue Mask,
13589
13589
return ActiveLanes.all();
13590
13590
}
13591
13591
13592
+ /// Match the index of a gather or scatter operation as an operation
13593
+ /// with twice the element width and half the number of elements. This is
13594
+ /// generally profitable (if legal) because these operations are linear
13595
+ /// in VL, so even if we cause some extract VTYPE/VL toggles, we still
13596
+ /// come out ahead.
13597
+ static bool matchIndexAsWiderOp(EVT VT, SDValue Index, SDValue Mask,
13598
+ Align BaseAlign, const RISCVSubtarget &ST) {
13599
+ if (!ISD::isConstantSplatVectorAllOnes(Mask.getNode()))
13600
+ return false;
13601
+ if (!ISD::isBuildVectorOfConstantSDNodes(Index.getNode()))
13602
+ return false;
13603
+
13604
+ // Attempt a doubling. If we can use a element type 4x or 8x in
13605
+ // size, this will happen via multiply iterations of the transform.
13606
+ const unsigned NumElems = VT.getVectorNumElements();
13607
+ if (NumElems % 2 != 0)
13608
+ return false;
13609
+
13610
+ const unsigned ElementSize = VT.getScalarStoreSize();
13611
+ const unsigned WiderElementSize = ElementSize * 2;
13612
+ if (WiderElementSize > ST.getELen()/8)
13613
+ return false;
13614
+
13615
+ if (!ST.enableUnalignedVectorMem() && BaseAlign < WiderElementSize)
13616
+ return false;
13617
+
13618
+ for (unsigned i = 0; i < Index->getNumOperands(); i++) {
13619
+ // TODO: We've found an active bit of UB, and could be
13620
+ // more aggressive here if desired.
13621
+ if (Index->getOperand(i)->isUndef())
13622
+ return false;
13623
+ // TODO: This offset check is too strict if we support fully
13624
+ // misaligned memory operations.
13625
+ uint64_t C = Index->getConstantOperandVal(i);
13626
+ if (C % ElementSize != 0)
13627
+ return false;
13628
+ if (i % 2 == 0)
13629
+ continue;
13630
+ uint64_t Last = Index->getConstantOperandVal(i-1);
13631
+ if (C != Last + ElementSize)
13632
+ return false;
13633
+ }
13634
+ return true;
13635
+ }
13636
+
13637
+
13592
13638
SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
13593
13639
DAGCombinerInfo &DCI) const {
13594
13640
SelectionDAG &DAG = DCI.DAG;
@@ -14020,6 +14066,36 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
14020
14066
DAG.getVectorShuffle(VT, DL, Load, DAG.getUNDEF(VT), ShuffleMask);
14021
14067
return DAG.getMergeValues({Shuffle, Load.getValue(1)}, DL);
14022
14068
}
14069
+
14070
+ if (MGN->getExtensionType() == ISD::NON_EXTLOAD &&
14071
+ matchIndexAsWiderOp(VT, Index, MGN->getMask(),
14072
+ MGN->getMemOperand()->getBaseAlign(), Subtarget)) {
14073
+ SmallVector<SDValue> NewIndices;
14074
+ for (unsigned i = 0; i < Index->getNumOperands(); i += 2)
14075
+ NewIndices.push_back(Index.getOperand(i));
14076
+ EVT IndexVT = Index.getValueType()
14077
+ .getHalfNumVectorElementsVT(*DAG.getContext());
14078
+ Index = DAG.getBuildVector(IndexVT, DL, NewIndices);
14079
+
14080
+ unsigned ElementSize = VT.getScalarStoreSize();
14081
+ EVT WideScalarVT = MVT::getIntegerVT(ElementSize * 8 * 2);
14082
+ auto EltCnt = VT.getVectorElementCount();
14083
+ assert(EltCnt.isKnownEven() && "Splitting vector, but not in half!");
14084
+ EVT WideVT = EVT::getVectorVT(*DAG.getContext(), WideScalarVT,
14085
+ EltCnt.divideCoefficientBy(2));
14086
+ SDValue Passthru = DAG.getBitcast(WideVT, MGN->getPassThru());
14087
+ EVT MaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
14088
+ EltCnt.divideCoefficientBy(2));
14089
+ SDValue Mask = DAG.getSplat(MaskVT, DL, DAG.getConstant(1, DL, MVT::i1));
14090
+
14091
+ SDValue Gather =
14092
+ DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), WideVT, DL,
14093
+ {MGN->getChain(), Passthru, Mask, MGN->getBasePtr(),
14094
+ Index, ScaleOp},
14095
+ MGN->getMemOperand(), IndexType, ISD::NON_EXTLOAD);
14096
+ SDValue Result = DAG.getBitcast(VT, Gather.getValue(0));
14097
+ return DAG.getMergeValues({Result, Gather.getValue(1)}, DL);
14098
+ }
14023
14099
break;
14024
14100
}
14025
14101
case ISD::MSCATTER:{
0 commit comments