@@ -15756,48 +15756,51 @@ bool AArch64TargetLowering::shouldSinkOperands(
15756
15756
return false;
15757
15757
}
15758
15758
15759
- static bool createTblShuffleForZExt(ZExtInst *ZExt, FixedVectorType *DstTy,
15760
- bool IsLittleEndian) {
15761
- Value *Op = ZExt->getOperand(0);
15762
- auto *SrcTy = cast<FixedVectorType>(Op->getType());
15763
- auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15764
- auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15759
+ static bool createTblShuffleMask(unsigned SrcWidth, unsigned DstWidth,
15760
+ unsigned NumElts, bool IsLittleEndian,
15761
+ SmallVectorImpl<int> &Mask) {
15765
15762
if (DstWidth % 8 != 0 || DstWidth <= 16 || DstWidth >= 64)
15766
15763
return false;
15767
15764
15768
- assert(DstWidth % SrcWidth == 0 &&
15769
- "TBL lowering is not supported for a ZExt instruction with this "
15770
- "source & destination element type.");
15771
- unsigned ZExtFactor = DstWidth / SrcWidth;
15765
+ if (DstWidth % SrcWidth != 0)
15766
+ return false;
15767
+
15768
+ unsigned Factor = DstWidth / SrcWidth;
15769
+ unsigned MaskLen = NumElts * Factor;
15770
+
15771
+ Mask.clear();
15772
+ Mask.resize(MaskLen, NumElts);
15773
+
15774
+ unsigned SrcIndex = 0;
15775
+ for (unsigned I = 0; I < MaskLen; I += Factor)
15776
+ Mask[I] = SrcIndex++;
15777
+
15778
+ if (!IsLittleEndian)
15779
+ std::rotate(Mask.rbegin(), Mask.rbegin() + Factor - 1, Mask.rend());
15780
+
15781
+ return true;
15782
+ }
15783
+
15784
+ static Value *createTblShuffleForZExt(IRBuilderBase &Builder, Value *Op,
15785
+ FixedVectorType *ZExtTy,
15786
+ FixedVectorType *DstTy,
15787
+ bool IsLittleEndian) {
15788
+ auto *SrcTy = cast<FixedVectorType>(Op->getType());
15772
15789
unsigned NumElts = SrcTy->getNumElements();
15773
- IRBuilder<> Builder(ZExt);
15790
+ auto SrcWidth = cast<IntegerType>(SrcTy->getElementType())->getBitWidth();
15791
+ auto DstWidth = cast<IntegerType>(DstTy->getElementType())->getBitWidth();
15792
+
15774
15793
SmallVector<int> Mask;
15775
- // Create a mask that selects <0,...,Op[i]> for each lane of the destination
15776
- // vector to replace the original ZExt. This can later be lowered to a set of
15777
- // tbl instructions.
15778
- for (unsigned i = 0; i < NumElts * ZExtFactor; i++) {
15779
- if (IsLittleEndian) {
15780
- if (i % ZExtFactor == 0)
15781
- Mask.push_back(i / ZExtFactor);
15782
- else
15783
- Mask.push_back(NumElts);
15784
- } else {
15785
- if ((i + 1) % ZExtFactor == 0)
15786
- Mask.push_back((i - ZExtFactor + 1) / ZExtFactor);
15787
- else
15788
- Mask.push_back(NumElts);
15789
- }
15790
- }
15794
+ if (!createTblShuffleMask(SrcWidth, DstWidth, NumElts, IsLittleEndian, Mask))
15795
+ return nullptr;
15791
15796
15792
15797
auto *FirstEltZero = Builder.CreateInsertElement(
15793
15798
PoisonValue::get(SrcTy), Builder.getInt8(0), uint64_t(0));
15794
15799
Value *Result = Builder.CreateShuffleVector(Op, FirstEltZero, Mask);
15795
15800
Result = Builder.CreateBitCast(Result, DstTy);
15796
- if (DstTy != ZExt->getType())
15797
- Result = Builder.CreateZExt(Result, ZExt->getType());
15798
- ZExt->replaceAllUsesWith(Result);
15799
- ZExt->eraseFromParent();
15800
- return true;
15801
+ if (DstTy != ZExtTy)
15802
+ Result = Builder.CreateZExt(Result, ZExtTy);
15803
+ return Result;
15801
15804
}
15802
15805
15803
15806
static void createTblForTrunc(TruncInst *TI, bool IsLittleEndian) {
@@ -15962,21 +15965,30 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
15962
15965
15963
15966
DstTy = TruncDstType;
15964
15967
}
15965
-
15966
- return createTblShuffleForZExt(ZExt, DstTy, Subtarget->isLittleEndian());
15968
+ IRBuilder<> Builder(ZExt);
15969
+ Value *Result = createTblShuffleForZExt(
15970
+ Builder, ZExt->getOperand(0), cast<FixedVectorType>(ZExt->getType()),
15971
+ DstTy, Subtarget->isLittleEndian());
15972
+ if (!Result)
15973
+ return false;
15974
+ ZExt->replaceAllUsesWith(Result);
15975
+ ZExt->eraseFromParent();
15976
+ return true;
15967
15977
}
15968
15978
15969
15979
auto *UIToFP = dyn_cast<UIToFPInst>(I);
15970
15980
if (UIToFP && SrcTy->getElementType()->isIntegerTy(8) &&
15971
15981
DstTy->getElementType()->isFloatTy()) {
15972
15982
IRBuilder<> Builder(I);
15973
- auto *ZExt = cast<ZExtInst>(
15974
- Builder.CreateZExt(I->getOperand(0), VectorType::getInteger(DstTy)));
15983
+ Value *ZExt = createTblShuffleForZExt(
15984
+ Builder, I->getOperand(0), FixedVectorType::getInteger(DstTy),
15985
+ FixedVectorType::getInteger(DstTy), Subtarget->isLittleEndian());
15986
+ if (!ZExt)
15987
+ return false;
15975
15988
auto *UI = Builder.CreateUIToFP(ZExt, DstTy);
15976
15989
I->replaceAllUsesWith(UI);
15977
15990
I->eraseFromParent();
15978
- return createTblShuffleForZExt(ZExt, cast<FixedVectorType>(ZExt->getType()),
15979
- Subtarget->isLittleEndian());
15991
+ return true;
15980
15992
}
15981
15993
15982
15994
// Convert 'fptoui <(8|16) x float> to <(8|16) x i8>' to a wide fptoui
0 commit comments