Skip to content

Commit 14d597f

Browse files
ruilingdstutt
authored andcommitted
AMDGPU: Promote array alloca if used by memmove/memcpy
Reviewed by: arsenm Differential Revision: https://reviews.llvm.org/D140599 Change-Id: If4627299a1d12195cf1ccd6f39b77b21b571f54d
1 parent 39dbf45 commit 14d597f

File tree

2 files changed

+377
-95
lines changed

2 files changed

+377
-95
lines changed

llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,11 @@ static Value *GEPToVectorIndex(GetElementPtrInst *GEP, AllocaInst *Alloca,
379379
return ConstantInt::get(GEP->getContext(), Quot);
380380
}
381381

382+
struct MemTransferInfo {
383+
ConstantInt *SrcIndex = nullptr;
384+
ConstantInt *DestIndex = nullptr;
385+
};
386+
382387
static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
383388
unsigned MaxVGPRs) {
384389

@@ -419,11 +424,15 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
419424

420425
std::map<GetElementPtrInst*, Value*> GEPVectorIdx;
421426
SmallVector<Instruction *> WorkList;
427+
SmallVector<Instruction *> DeferredInsts;
422428
SmallVector<Use *, 8> Uses;
429+
DenseMap<MemTransferInst *, MemTransferInfo> TransferInfo;
430+
423431
for (Use &U : Alloca->uses())
424432
Uses.push_back(&U);
425433

426434
Type *VecEltTy = VectorTy->getElementType();
435+
unsigned ElementSize = DL.getTypeSizeInBits(VecEltTy) / 8;
427436
while (!Uses.empty()) {
428437
Use *U = Uses.pop_back_val();
429438
Instruction *Inst = dyn_cast<Instruction>(U->getUser());
@@ -476,6 +485,47 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
476485
continue;
477486
}
478487

488+
if (MemTransferInst *TransferInst = dyn_cast<MemTransferInst>(Inst)) {
489+
if (TransferInst->isVolatile())
490+
return false;
491+
492+
ConstantInt *Len = dyn_cast<ConstantInt>(TransferInst->getLength());
493+
if (!Len || !!(Len->getZExtValue() % ElementSize))
494+
return false;
495+
496+
if (!TransferInfo.count(TransferInst)) {
497+
DeferredInsts.push_back(Inst);
498+
WorkList.push_back(Inst);
499+
TransferInfo[TransferInst] = MemTransferInfo();
500+
}
501+
502+
auto getPointerIndexOfAlloca = [&](Value *Ptr) -> ConstantInt * {
503+
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
504+
if (Ptr != Alloca && !GEPVectorIdx.count(GEP))
505+
return nullptr;
506+
507+
return dyn_cast<ConstantInt>(calculateVectorIndex(Ptr, GEPVectorIdx));
508+
};
509+
510+
unsigned OpNum = U->getOperandNo();
511+
MemTransferInfo *TI = &TransferInfo[TransferInst];
512+
if (OpNum == 0) {
513+
Value *Dest = TransferInst->getDest();
514+
ConstantInt *Index = getPointerIndexOfAlloca(Dest);
515+
if (!Index)
516+
return false;
517+
TI->DestIndex = Index;
518+
} else {
519+
assert(OpNum == 1);
520+
Value *Src = TransferInst->getSource();
521+
ConstantInt *Index = getPointerIndexOfAlloca(Src);
522+
if (!Index)
523+
return false;
524+
TI->SrcIndex = Index;
525+
}
526+
continue;
527+
}
528+
479529
// Ignore assume-like intrinsics and comparisons used in assumes.
480530
if (isAssumeLikeIntrinsic(Inst))
481531
continue;
@@ -489,6 +539,16 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
489539
return false;
490540
}
491541

542+
while (!DeferredInsts.empty()) {
543+
Instruction *Inst = DeferredInsts.pop_back_val();
544+
MemTransferInst *TransferInst = cast<MemTransferInst>(Inst);
545+
// TODO: Support the case if the pointers are from different alloca or
546+
// from different address spaces.
547+
MemTransferInfo &Info = TransferInfo[TransferInst];
548+
if (!Info.SrcIndex || !Info.DestIndex)
549+
return false;
550+
}
551+
492552
LLVM_DEBUG(dbgs() << " Converting alloca to vector " << *AllocaTy << " -> "
493553
<< *VectorTy << '\n');
494554

@@ -523,6 +583,35 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca, const DataLayout &DL,
523583
Inst->eraseFromParent();
524584
break;
525585
}
586+
case Instruction::Call: {
587+
if (const MemTransferInst *MTI = dyn_cast<MemTransferInst>(Inst)) {
588+
ConstantInt *Length = cast<ConstantInt>(MTI->getLength());
589+
unsigned NumCopied = Length->getZExtValue() / ElementSize;
590+
MemTransferInfo *TI = &TransferInfo[cast<MemTransferInst>(Inst)];
591+
unsigned SrcBegin = TI->SrcIndex->getZExtValue();
592+
unsigned DestBegin = TI->DestIndex->getZExtValue();
593+
594+
SmallVector<int> Mask;
595+
for (unsigned Idx = 0; Idx < VectorTy->getNumElements(); ++Idx) {
596+
if (Idx >= DestBegin && Idx < DestBegin + NumCopied) {
597+
Mask.push_back(SrcBegin++);
598+
} else {
599+
Mask.push_back(Idx);
600+
}
601+
}
602+
Type *VecPtrTy = VectorTy->getPointerTo(Alloca->getAddressSpace());
603+
Value *BitCast = Builder.CreateBitCast(Alloca, VecPtrTy);
604+
Value *VecValue =
605+
Builder.CreateAlignedLoad(VectorTy, BitCast, Alloca->getAlign());
606+
Value *NewVecValue = Builder.CreateShuffleVector(VecValue, Mask);
607+
Builder.CreateAlignedStore(NewVecValue, BitCast, Alloca->getAlign());
608+
609+
Inst->eraseFromParent();
610+
} else {
611+
llvm_unreachable("Unsupported call when promoting alloca to vector");
612+
}
613+
break;
614+
}
526615

527616
default:
528617
llvm_unreachable("Inconsistency in instructions promotable to vector");

0 commit comments

Comments
 (0)