Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 1e086c7

Browse files
committed
[SLPVectorizer] Add initial alternate opcode support for cast instructions. (REAPPLIED-2)
We currently only support binary instructions in the alternate opcode shuffles. This patch is an initial attempt at adding cast instructions as well, this raises several issues that we probably want to address as we continue to generalize the alternate mechanism: 1 - Duplication of cost determination - we should probably add scalar/vector costs helper functions and get BoUpSLP::getEntryCost to use them instead of determining costs directly. 2 - Support alternate instructions with the same opcode (e.g. casts with different src types) - alternate vectorization of calls with different IntrinsicIDs will require this. 3 - Allow alternates to be a different instruction type - mixing binary/cast/call etc. 4 - Allow passthrough of unsupported alternate instructions - related to PR30787/D28907 'copyable' elements. Reapplied with fix to only accept 2 different casts if they come from the same source type (PR38154). Differential Revision: https://reviews.llvm.org/D49135 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336989 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 0bc0c53 commit 1e086c7

File tree

2 files changed

+361
-102
lines changed

2 files changed

+361
-102
lines changed

lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 72 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -353,23 +353,39 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
353353
if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
354354
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
355355

356+
bool IsCastOp = isa<CastInst>(VL[BaseIndex]);
356357
bool IsBinOp = isa<BinaryOperator>(VL[BaseIndex]);
357358
unsigned Opcode = cast<Instruction>(VL[BaseIndex])->getOpcode();
358359
unsigned AltOpcode = Opcode;
359360
unsigned AltIndex = BaseIndex;
360361

361362
// Check for one alternate opcode from another BinaryOperator.
362-
// TODO - can we support other operators (casts etc.)?
363+
// TODO - generalize to support all operators (types, calls etc.).
363364
for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
364365
unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
365-
if (InstOpcode != Opcode && InstOpcode != AltOpcode) {
366-
if (Opcode == AltOpcode && IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
366+
if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
367+
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
368+
continue;
369+
if (Opcode == AltOpcode) {
367370
AltOpcode = InstOpcode;
368371
AltIndex = Cnt;
369372
continue;
370373
}
371-
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
372-
}
374+
} else if (IsCastOp && isa<CastInst>(VL[Cnt])) {
375+
Type *Ty0 = cast<Instruction>(VL[BaseIndex])->getOperand(0)->getType();
376+
Type *Ty1 = cast<Instruction>(VL[Cnt])->getOperand(0)->getType();
377+
if (Ty0 == Ty1) {
378+
if (InstOpcode == Opcode || InstOpcode == AltOpcode)
379+
continue;
380+
if (Opcode == AltOpcode) {
381+
AltOpcode = InstOpcode;
382+
AltIndex = Cnt;
383+
continue;
384+
}
385+
}
386+
} else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
387+
continue;
388+
return InstructionsState(VL[BaseIndex], nullptr, nullptr);
373389
}
374390

375391
return InstructionsState(VL[BaseIndex], cast<Instruction>(VL[BaseIndex]),
@@ -2363,32 +2379,45 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
23632379
return ReuseShuffleCost + VecCallCost - ScalarCallCost;
23642380
}
23652381
case Instruction::ShuffleVector: {
2366-
assert(S.isAltShuffle() && Instruction::isBinaryOp(S.getOpcode()) &&
2367-
Instruction::isBinaryOp(S.getAltOpcode()) &&
2382+
assert(S.isAltShuffle() &&
2383+
((Instruction::isBinaryOp(S.getOpcode()) &&
2384+
Instruction::isBinaryOp(S.getAltOpcode())) ||
2385+
(Instruction::isCast(S.getOpcode()) &&
2386+
Instruction::isCast(S.getAltOpcode()))) &&
23682387
"Invalid Shuffle Vector Operand");
23692388
int ScalarCost = 0;
23702389
if (NeedToShuffleReuses) {
23712390
for (unsigned Idx : E->ReuseShuffleIndices) {
23722391
Instruction *I = cast<Instruction>(VL[Idx]);
2373-
ReuseShuffleCost -=
2374-
TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy);
2392+
ReuseShuffleCost -= TTI->getInstructionCost(
2393+
I, TargetTransformInfo::TCK_RecipThroughput);
23752394
}
23762395
for (Value *V : VL) {
23772396
Instruction *I = cast<Instruction>(V);
2378-
ReuseShuffleCost +=
2379-
TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy);
2397+
ReuseShuffleCost += TTI->getInstructionCost(
2398+
I, TargetTransformInfo::TCK_RecipThroughput);
23802399
}
23812400
}
23822401
int VecCost = 0;
23832402
for (Value *i : VL) {
23842403
Instruction *I = cast<Instruction>(i);
23852404
assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
2386-
ScalarCost += TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy);
2405+
ScalarCost += TTI->getInstructionCost(
2406+
I, TargetTransformInfo::TCK_RecipThroughput);
23872407
}
23882408
// VecCost is equal to sum of the cost of creating 2 vectors
23892409
// and the cost of creating shuffle.
2390-
VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy);
2391-
VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy);
2410+
if (Instruction::isBinaryOp(S.getOpcode())) {
2411+
VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy);
2412+
VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy);
2413+
} else {
2414+
Type *Src0SclTy = S.MainOp->getOperand(0)->getType();
2415+
Type *Src1SclTy = S.AltOp->getOperand(0)->getType();
2416+
VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size());
2417+
VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size());
2418+
VecCost = TTI->getCastInstrCost(S.getOpcode(), VecTy, Src0Ty);
2419+
VecCost += TTI->getCastInstrCost(S.getAltOpcode(), VecTy, Src1Ty);
2420+
}
23922421
VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0);
23932422
return ReuseShuffleCost + VecCost - ScalarCost;
23942423
}
@@ -3470,30 +3499,47 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
34703499
}
34713500
case Instruction::ShuffleVector: {
34723501
ValueList LHSVL, RHSVL;
3473-
assert(S.isAltShuffle() && Instruction::isBinaryOp(S.getOpcode()) &&
3474-
Instruction::isBinaryOp(S.getAltOpcode()) &&
3502+
assert(S.isAltShuffle() &&
3503+
((Instruction::isBinaryOp(S.getOpcode()) &&
3504+
Instruction::isBinaryOp(S.getAltOpcode())) ||
3505+
(Instruction::isCast(S.getOpcode()) &&
3506+
Instruction::isCast(S.getAltOpcode()))) &&
34753507
"Invalid Shuffle Vector Operand");
3476-
reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
3477-
setInsertPointAfterBundle(E->Scalars, S);
34783508

3479-
Value *LHS = vectorizeTree(LHSVL);
3480-
Value *RHS = vectorizeTree(RHSVL);
3509+
Value *LHS, *RHS;
3510+
if (Instruction::isBinaryOp(S.getOpcode())) {
3511+
reorderAltShuffleOperands(S, E->Scalars, LHSVL, RHSVL);
3512+
setInsertPointAfterBundle(E->Scalars, S);
3513+
LHS = vectorizeTree(LHSVL);
3514+
RHS = vectorizeTree(RHSVL);
3515+
} else {
3516+
ValueList INVL;
3517+
for (Value *V : E->Scalars)
3518+
INVL.push_back(cast<Instruction>(V)->getOperand(0));
3519+
setInsertPointAfterBundle(E->Scalars, S);
3520+
LHS = vectorizeTree(INVL);
3521+
}
34813522

34823523
if (E->VectorizedValue) {
34833524
LLVM_DEBUG(dbgs() << "SLP: Diamond merged for " << *VL0 << ".\n");
34843525
return E->VectorizedValue;
34853526
}
34863527

3487-
// Create a vector of LHS op1 RHS
3488-
Value *V0 = Builder.CreateBinOp(
3528+
Value *V0, *V1;
3529+
if (Instruction::isBinaryOp(S.getOpcode())) {
3530+
V0 = Builder.CreateBinOp(
34893531
static_cast<Instruction::BinaryOps>(S.getOpcode()), LHS, RHS);
3490-
3491-
// Create a vector of LHS op2 RHS
3492-
Value *V1 = Builder.CreateBinOp(
3532+
V1 = Builder.CreateBinOp(
34933533
static_cast<Instruction::BinaryOps>(S.getAltOpcode()), LHS, RHS);
3534+
} else {
3535+
V0 = Builder.CreateCast(
3536+
static_cast<Instruction::CastOps>(S.getOpcode()), LHS, VecTy);
3537+
V1 = Builder.CreateCast(
3538+
static_cast<Instruction::CastOps>(S.getAltOpcode()), LHS, VecTy);
3539+
}
34943540

34953541
// Create shuffle to take alternate operations from the vector.
3496-
// Also, gather up odd and even scalar ops to propagate IR flags to
3542+
// Also, gather up main and alt scalar ops to propagate IR flags to
34973543
// each vector operation.
34983544
ValueList OpScalars, AltScalars;
34993545
unsigned e = E->Scalars.size();

0 commit comments

Comments
 (0)