@@ -379,6 +379,11 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
379
379
return ConstantInt::get (GEP->getContext (), Quot);
380
380
}
381
381
382
+ struct MemTransferInfo {
383
+ ConstantInt *SrcIndex = nullptr ;
384
+ ConstantInt *DestIndex = nullptr ;
385
+ };
386
+
382
387
static bool tryPromoteAllocaToVector (AllocaInst *Alloca, const DataLayout &DL,
383
388
unsigned MaxVGPRs) {
384
389
@@ -419,11 +424,15 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
419
424
420
425
std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
421
426
SmallVector<Instruction *> WorkList;
427
+ SmallVector<Instruction *> DeferredInsts;
422
428
SmallVector<Use *, 8 > Uses;
429
+ DenseMap<MemTransferInst *, MemTransferInfo> TransferInfo;
430
+
423
431
for (Use &U : Alloca->uses ())
424
432
Uses.push_back (&U);
425
433
426
434
Type *VecEltTy = VectorTy->getElementType ();
435
+ unsigned ElementSize = DL.getTypeSizeInBits (VecEltTy) / 8 ;
427
436
while (!Uses.empty ()) {
428
437
Use *U = Uses.pop_back_val ();
429
438
Instruction *Inst = dyn_cast<Instruction>(U->getUser ());
@@ -476,6 +485,47 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
476
485
continue ;
477
486
}
478
487
488
+ if (MemTransferInst *TransferInst = dyn_cast<MemTransferInst>(Inst)) {
489
+ if (TransferInst->isVolatile ())
490
+ return false ;
491
+
492
+ ConstantInt *Len = dyn_cast<ConstantInt>(TransferInst->getLength ());
493
+ if (!Len || !!(Len->getZExtValue () % ElementSize))
494
+ return false ;
495
+
496
+ if (!TransferInfo.count (TransferInst)) {
497
+ DeferredInsts.push_back (Inst);
498
+ WorkList.push_back (Inst);
499
+ TransferInfo[TransferInst] = MemTransferInfo ();
500
+ }
501
+
502
+ auto getPointerIndexOfAlloca = [&](Value *Ptr ) -> ConstantInt * {
503
+ GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr );
504
+ if (Ptr != Alloca && !GEPVectorIdx.count (GEP))
505
+ return nullptr ;
506
+
507
+ return dyn_cast<ConstantInt>(calculateVectorIndex (Ptr , GEPVectorIdx));
508
+ };
509
+
510
+ unsigned OpNum = U->getOperandNo ();
511
+ MemTransferInfo *TI = &TransferInfo[TransferInst];
512
+ if (OpNum == 0 ) {
513
+ Value *Dest = TransferInst->getDest ();
514
+ ConstantInt *Index = getPointerIndexOfAlloca (Dest);
515
+ if (!Index)
516
+ return false ;
517
+ TI->DestIndex = Index;
518
+ } else {
519
+ assert (OpNum == 1 );
520
+ Value *Src = TransferInst->getSource ();
521
+ ConstantInt *Index = getPointerIndexOfAlloca (Src);
522
+ if (!Index)
523
+ return false ;
524
+ TI->SrcIndex = Index;
525
+ }
526
+ continue ;
527
+ }
528
+
479
529
// Ignore assume-like intrinsics and comparisons used in assumes.
480
530
if (isAssumeLikeIntrinsic (Inst))
481
531
continue ;
@@ -489,6 +539,16 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
489
539
return false ;
490
540
}
491
541
542
+ while (!DeferredInsts.empty ()) {
543
+ Instruction *Inst = DeferredInsts.pop_back_val ();
544
+ MemTransferInst *TransferInst = cast<MemTransferInst>(Inst);
545
+ // TODO: Support the case if the pointers are from different alloca or
546
+ // from different address spaces.
547
+ MemTransferInfo &Info = TransferInfo[TransferInst];
548
+ if (!Info.SrcIndex || !Info.DestIndex )
549
+ return false ;
550
+ }
551
+
492
552
LLVM_DEBUG (dbgs () << " Converting alloca to vector " << *AllocaTy << " -> "
493
553
<< *VectorTy << ' \n ' );
494
554
@@ -523,6 +583,35 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
523
583
Inst->eraseFromParent ();
524
584
break ;
525
585
}
586
+ case Instruction::Call: {
587
+ if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) {
588
+ ConstantInt *Length = cast<ConstantInt>(MTI->getLength ());
589
+ unsigned NumCopied = Length->getZExtValue () / ElementSize;
590
+ MemTransferInfo *TI = &TransferInfo[cast<MemTransferInst>(Inst)];
591
+ unsigned SrcBegin = TI->SrcIndex ->getZExtValue ();
592
+ unsigned DestBegin = TI->DestIndex ->getZExtValue ();
593
+
594
+ SmallVector<int > Mask;
595
+ for (unsigned Idx = 0 ; Idx < VectorTy->getNumElements (); ++Idx) {
596
+ if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
597
+ Mask.push_back (SrcBegin++);
598
+ } else {
599
+ Mask.push_back (Idx);
600
+ }
601
+ }
602
+ Type *VecPtrTy = VectorTy->getPointerTo (Alloca->getAddressSpace ());
603
+ Value *BitCast = Builder.CreateBitCast (Alloca, VecPtrTy);
604
+ Value *VecValue =
605
+ Builder.CreateAlignedLoad (VectorTy, BitCast, Alloca->getAlign ());
606
+ Value *NewVecValue = Builder.CreateShuffleVector (VecValue, Mask);
607
+ Builder.CreateAlignedStore (NewVecValue, BitCast, Alloca->getAlign ());
608
+
609
+ Inst->eraseFromParent ();
610
+ } else {
611
+ llvm_unreachable (" Unsupported call when promoting alloca to vector" );
612
+ }
613
+ break ;
614
+ }
526
615
527
616
default :
528
617
llvm_unreachable (" Inconsistency in instructions promotable to vector" );
0 commit comments