Skip to content

Commit 031bb9e

Browse files
committed
Rebase, fix up tests, address comments:
* Check AddRec Loop vs. current * Remove stray struct token declaration * Add comment on Opcode field of VPHistogramRecipe * Use Ctx.Types.inferScalarType when computing cost * Check for a LiveIn value if there isn't an underlying value for costing * Tolerate nulls for both underlying and LiveIn, just in case.
1 parent 131acee commit 031bb9e

File tree

5 files changed

+16
-13
lines changed

5 files changed

+16
-13
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,8 +1119,9 @@ static bool findHistogram(LoadInst *LI, StoreInst *HSt, Loop *TheLoop,
11191119
if (!match(HIdx, m_ZExtOrSExtOrSelf(m_Load(m_Value(VPtrVal)))))
11201120
return false;
11211121

1122-
if (!isa<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal)) ||
1123-
TheLoop->isLoopInvariant(VPtrVal))
1122+
// Make sure the index address varies in this loop, not an outer loop.
1123+
const auto *AR = dyn_cast<SCEVAddRecExpr>(PSE.getSE()->getSCEV(VPtrVal));
1124+
if (!AR || AR->getLoop() != TheLoop)
11241125
return false;
11251126

11261127
// Ensure we'll have the same mask by checking that all parts of the histogram

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,6 @@ class LoopVectorizationCostModel;
6969
class LoopVersioning;
7070

7171
struct VPCostContext;
72-
struct HistogramInfo;
7372

7473
namespace Intrinsic {
7574
typedef unsigned ID;
@@ -1698,6 +1697,7 @@ class VPWidenCallRecipe : public VPSingleDefRecipe {
16981697
/// of intrinsics. The only update operations currently supported are
16991698
/// 'add' and 'sub' where the other term is loop-invariant.
17001699
class VPHistogramRecipe : public VPRecipeBase {
1700+
/// Opcode of the update operation, currently either add or sub.
17011701
unsigned Opcode;
17021702

17031703
public:

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,20 +1090,24 @@ InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
10901090
// whether we can use base + vec-of-smaller-indices or just
10911091
// vec-of-pointers.
10921092
assert(VF.isVector() && "Invalid VF for histogram cost");
1093-
Value *Address = getOperand(0)->getUnderlyingValue();
1094-
Value *IncAmt = getOperand(1)->getUnderlyingValue();
1095-
Type *IncTy = IncAmt->getType();
1093+
Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
1094+
VPValue *IncAmt = getOperand(1);
1095+
Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
10961096
VectorType *VTy = VectorType::get(IncTy, VF);
10971097

10981098
// Assume that a non-constant update value (or a constant != 1) requires
10991099
// a multiply, and add that into the cost.
1100+
Value *RHS = IncAmt->getUnderlyingValue();
1101+
// The underlying value may be null, check for a live-in if so.
1102+
if (!RHS && IncAmt->isLiveIn())
1103+
RHS = IncAmt->getLiveInIRValue();
11001104
InstructionCost MulCost = TTI::TCC_Free;
1101-
ConstantInt *RHS = dyn_cast<ConstantInt>(IncAmt);
1102-
if (!RHS || RHS->getZExtValue() != 1)
1105+
ConstantInt *CI = dyn_cast_if_present<ConstantInt>(RHS);
1106+
if (!CI || CI->getZExtValue() != 1)
11031107
MulCost = Ctx.TTI.getArithmeticInstrCost(Instruction::Mul, VTy);
11041108

11051109
// Find the cost of the histogram operation itself.
1106-
Type *PtrTy = VectorType::get(Address->getType(), VF);
1110+
Type *PtrTy = VectorType::get(AddressTy, VF);
11071111
Type *MaskTy = VectorType::get(Type::getInt1Ty(Ctx.LLVMCtx), VF);
11081112
IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_histogram_add,
11091113
Type::getVoidTy(Ctx.LLVMCtx),

llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-too-many-deps.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,7 @@ define void @many_deps(ptr noalias %buckets, ptr %array, ptr %indices, ptr %othe
7272
; NORMAL_DEP_LIMIT-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
7373
; NORMAL_DEP_LIMIT-NEXT: [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
7474
; NORMAL_DEP_LIMIT-NEXT: [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
75-
; NORMAL_DEP_LIMIT-NEXT: [[TMP8:%.*]] = call i32 @llvm.vscale.i32()
76-
; NORMAL_DEP_LIMIT-NEXT: [[TMP9:%.*]] = shl nuw nsw i32 [[TMP8]], 2
75+
; NORMAL_DEP_LIMIT-NEXT: [[TMP9:%.*]] = trunc nuw nsw i64 [[TMP6]] to i32
7776
; NORMAL_DEP_LIMIT-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP9]], i64 0
7877
; NORMAL_DEP_LIMIT-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
7978
; NORMAL_DEP_LIMIT-NEXT: br label [[FOR_BODY:%.*]]

llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -732,8 +732,7 @@ define void @simple_histogram_rtdepcheck(ptr noalias %buckets, ptr %array, ptr %
732732
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
733733
; CHECK-NEXT: [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
734734
; CHECK-NEXT: [[TMP9:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
735-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32()
736-
; CHECK-NEXT: [[TMP11:%.*]] = shl nuw nsw i32 [[TMP10]], 2
735+
; CHECK-NEXT: [[TMP11:%.*]] = trunc nuw nsw i64 [[TMP8]] to i32
737736
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP11]], i64 0
738737
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
739738
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]

0 commit comments

Comments
 (0)