Skip to content

Commit 35e7eeb

Browse files
committed
Addressing comments
* Mask is optional for vplan recipe * Left load+update to DCE * Documentation for new recipe * Stored histogram info by reference * Allow GEPs with more than one index value, provided that all but the last are constant. * Cleanup to remove some unnecessary local variables. * Minor nits addressed.
1 parent 7d1fa7c commit 35e7eeb

File tree

6 files changed

+332
-59
lines changed

6 files changed

+332
-59
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1050,7 +1050,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10501050
return true;
10511051
}
10521052

1053-
/// Find Histogram counts that match high-level code in loops:
1053+
/// Find histogram operations that match high-level code in loops:
10541054
/// \code
10551055
/// buckets[indices[i]]+=step;
10561056
/// \endcode
@@ -1063,10 +1063,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
10631063
/// regardless of hardware support. When there is support, it additionally
10641064
/// stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
10651065
/// used to update histogram in \p HistogramPtrs.
1066-
1067-
static bool findHistograms(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1068-
const PredicatedScalarEvolution &PSE,
1069-
SmallVectorImpl<HistogramInfo> &Histograms) {
1066+
static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
1067+
const PredicatedScalarEvolution &PSE,
1068+
SmallVectorImpl<HistogramInfo> &Histograms) {
10701069

10711070
// Store value must come from a Binary Operation.
10721071
Instruction *HPtrInstr = nullptr;
@@ -1088,23 +1087,27 @@ static bool findHistograms(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
10881087
return false;
10891088

10901089
// The address to store is calculated through a GEP Instruction.
1091-
// FIXME: Support GEPs with more operands.
1092-
GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
1093-
if (!HPtr || HPtr->getNumOperands() > 2)
1090+
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(HPtrInstr);
1091+
if (!GEP)
1092+
return false;
1093+
1094+
// Restrict address calculation to constant indices except for the last term.
1095+
Value *HIdx = nullptr;
1096+
for (Value *Index : GEP->indices()) {
1097+
if (HIdx)
1098+
return false;
1099+
if (!isa<ConstantInt>(Index))
1100+
HIdx = Index;
1101+
}
1102+
1103+
if (!HIdx)
10941104
return false;
10951105

10961106
// Check that the index is calculated by loading from another array. Ignore
10971107
// any extensions.
10981108
// FIXME: Support indices from other sources that a linear load from memory?
1099-
Value *HIdx = HPtr->getOperand(1);
1100-
Instruction *IdxInst = nullptr;
1101-
if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Instruction(IdxInst))))
1102-
return false;
1103-
1104-
// Currently restricting this to linear addressing when loading indices.
1105-
LoadInst *VLoad = dyn_cast<LoadInst>(IdxInst);
11061109
Value *VPtrVal;
1107-
if (!VLoad || !match(VLoad, m_Load(m_Value(VPtrVal))))
1110+
if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
11081111
return false;
11091112

11101113
if (!isa<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal)))
@@ -1134,7 +1137,7 @@ bool LoopVectorizationLegality::canVectorizeUnknownDependences() {
11341137
LAI = &LAIs.getInfo(*TheLoop);
11351138
const MemoryDepChecker &DepChecker = LAI->getDepChecker();
11361139
const auto *Deps = DepChecker.getDependences();
1137-
if (!Deps || Deps->size() > 1)
1140+
if (!Deps || Deps->size() != 1)
11381141
return false;
11391142

11401143
const MemoryDepChecker::Dependence &Dep = (*Deps).front();
@@ -1151,7 +1154,7 @@ bool LoopVectorizationLegality::canVectorizeUnknownDependences() {
11511154
return false;
11521155

11531156
LLVM_DEBUG(dbgs() << "LV: Checking for a histogram on: " << *SI << "\n");
1154-
return findHistograms(LI, SI, TheLoop, LAI->getPSE(), Histograms);
1157+
return findHistogram(LI, SI, TheLoop, LAI->getPSE(), Histograms);
11551158
}
11561159

11571160
bool LoopVectorizationLegality::canVectorizeMemory() {

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4660,10 +4660,7 @@ bool LoopVectorizationPlanner::isCandidateForEpilogueVectorization(
46604660
return false;
46614661

46624662
// Loops containing histograms are not currently supported.
4663-
if (!Legal->getHistograms().empty())
4664-
return false;
4665-
4666-
return true;
4663+
return Legal->getHistograms().empty();
46674664
}
46684665

46694666
bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
@@ -8296,17 +8293,11 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
82968293
HGramOps.push_back(getVPValueOrAddLiveIn(HI->Update->getOperand(1), Plan));
82978294

82988295
// In case of predicated execution (due to tail-folding, or conditional
8299-
// execution, or both), pass the relevant mask. When there is no such mask,
8300-
// generate an all-true mask.
8301-
VPValue *Mask = nullptr;
8296+
// execution, or both), pass the relevant mask.
83028297
if (Legal->isMaskRequired(HI->Store))
8303-
Mask = getBlockInMask(HI->Store->getParent());
8304-
else
8305-
Mask = Plan.getOrAddLiveIn(
8306-
ConstantInt::getTrue(IntegerType::getInt1Ty(HI->Load->getContext())));
8307-
HGramOps.push_back(Mask);
8298+
HGramOps.push_back(getBlockInMask(HI->Store->getParent()));
83088299

8309-
return new VPHistogramRecipe(HI, Opcode,
8300+
return new VPHistogramRecipe(*HI, Opcode,
83108301
make_range(HGramOps.begin(), HGramOps.end()),
83118302
HI->Store->getDebugLoc());
83128303
}
@@ -8711,15 +8702,6 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
87118702
Operands = {OpRange.begin(), OpRange.end()};
87128703
}
87138704

8714-
// If this is a load instruction or a binop associated with a histogram,
8715-
// leave it until the store instruction to emit a combined intrinsic.
8716-
// Note that if the initial VF is scalar, we need to generate the normal
8717-
// clone recipe for these instructions. A histogram recipe will only be
8718-
// generated when minVF > 1.
8719-
if (Legal->getHistogramInfo(Instr) && !isa<StoreInst>(Instr) &&
8720-
!Range.Start.isScalar())
8721-
continue;
8722-
87238705
// Invariant stores inside loop will be deleted and a single store
87248706
// with the final reduction value will be added to the exit block
87258707
StoreInst *SI;

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,13 +1577,18 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
15771577
#endif
15781578
};
15791579

1580+
/// A recipe representing a sequence of load -> update -> store as part of
1581+
/// a histogram operation. This means there may be aliasing between vector
1582+
/// lanes, which is handled by the llvm.experimental.vector.histogram family
1583+
/// of intrinsics. The only update operations currently supported are
1584+
/// 'add' and 'sub' where the other term is loop-invariant.
15801585
class VPHistogramRecipe : public VPRecipeBase {
1581-
const HistogramInfo *Info;
1586+
const HistogramInfo &Info;
15821587
unsigned Opcode;
15831588

15841589
public:
15851590
template <typename IterT>
1586-
VPHistogramRecipe(const HistogramInfo *HI, unsigned Opcode,
1591+
VPHistogramRecipe(const HistogramInfo &HI, unsigned Opcode,
15871592
iterator_range<IterT> Operands, DebugLoc DL = {})
15881593
: VPRecipeBase(VPDef::VPHistogramSC, Operands, DL), Info(HI),
15891594
Opcode(Opcode) {}
@@ -1596,12 +1601,20 @@ class VPHistogramRecipe : public VPRecipeBase {
15961601

15971602
VP_CLASSOF_IMPL(VPDef::VPHistogramSC);
15981603

1599-
// Produce a histogram operation with widened ingredients
1604+
/// Produce a vectorized histogram operation.
16001605
void execute(VPTransformState &State) override;
16011606

16021607
unsigned getOpcode() const { return Opcode; }
16031608

1604-
const HistogramInfo *getHistogramInfo() const { return Info; }
1609+
const HistogramInfo &getHistogramInfo() const { return Info; }
1610+
1611+
/// Return the mask operand if one was provided, or a null pointer if all
1612+
/// lanes should be executed unconditionally.
1613+
VPValue *getMask() const {
1614+
if (getNumOperands() == 3)
1615+
return getOperand(2);
1616+
return nullptr;
1617+
}
16051618

16061619
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16071620
/// Print the recipe

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ static Instruction *getInstructionForCost(const VPRecipeBase *R) {
284284
// FIXME: Override the cost method properly to take gather/scatter cost
285285
// into account, instead of just the intrinsic via the legacy model.
286286
if (auto *HG = dyn_cast<VPHistogramRecipe>(R))
287-
return HG->getHistogramInfo()->Update;
287+
return HG->getHistogramInfo().Update;
288288
return nullptr;
289289
}
290290

@@ -973,7 +973,17 @@ void VPHistogramRecipe::execute(VPTransformState &State) {
973973
for (unsigned Part = 0; Part < State.UF; ++Part) {
974974
Value *Address = State.get(getOperand(0), Part);
975975
Value *IncVec = State.get(getOperand(1), Part);
976-
Value *Mask = State.get(getOperand(2), Part);
976+
VectorType *VTy = cast<VectorType>(Address->getType());
977+
978+
// The histogram intrinsic requires a mask even if the recipe doesn't;
979+
// if the mask operand was omitted then all lanes should be executed and
980+
// we just need to synthesize an all-true mask.
981+
Value *Mask = nullptr;
982+
if (VPValue *VPMask = getMask())
983+
Mask = State.get(VPMask, Part);
984+
else
985+
Mask = Builder.CreateVectorSplat(
986+
VTy->getElementCount(), ConstantInt::getTrue(Builder.getInt1Ty()));
977987

978988
// Not sure how to make IncAmt stay scalar yet. For now just extract the
979989
// first element and tidy up later.
@@ -985,9 +995,11 @@ void VPHistogramRecipe::execute(VPTransformState &State) {
985995
// add a separate intrinsic in future, but for now we'll try this.
986996
if (Opcode == Instruction::Sub)
987997
IncAmt = Builder.CreateNeg(IncAmt);
998+
else
999+
assert(Opcode == Instruction::Add);
9881000

9891001
State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
990-
{Address->getType(), IncAmt->getType()},
1002+
{VTy, IncAmt->getType()},
9911003
{Address, IncAmt, Mask});
9921004
}
9931005
}
@@ -997,10 +1009,19 @@ void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent,
9971009
VPSlotTracker &SlotTracker) const {
9981010
O << Indent << "WIDEN-HISTOGRAM buckets: ";
9991011
getOperand(0)->printAsOperand(O, SlotTracker);
1000-
O << ", inc: ";
1012+
1013+
if (Opcode == Instruction::Sub)
1014+
O << ", dec: ";
1015+
else {
1016+
assert(Opcode == Instruction::Add);
1017+
O << ", inc: ";
1018+
}
10011019
getOperand(1)->printAsOperand(O, SlotTracker);
1002-
O << ", mask: ";
1003-
getOperand(2)->printAsOperand(O, SlotTracker);
1020+
1021+
if (VPValue *Mask = getMask()) {
1022+
O << ", mask: ";
1023+
Mask->printAsOperand(O, SlotTracker);
1024+
}
10041025
}
10051026

10061027
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,

0 commit comments

Comments
 (0)