-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[LV][AArch64] LoopVectorizer allows scalable frem instructions #76247
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
92c8ef7
d872bc4
ca9f165
ca78dd0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6951,10 +6951,26 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF, | |
Op2Info.Kind = TargetTransformInfo::OK_UniformValue; | ||
|
||
SmallVector<const Value *, 4> Operands(I->operand_values()); | ||
return TTI.getArithmeticInstrCost( | ||
auto InstrCost = TTI.getArithmeticInstrCost( | ||
I->getOpcode(), VectorTy, CostKind, | ||
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, | ||
Op2Info, Operands, I); | ||
|
||
// Some targets replace frem with vector library calls. | ||
if (I->getOpcode() == Instruction::FRem) { | ||
LibFunc Func; | ||
if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func)) { | ||
if (TLI->isFunctionVectorizable(TLI->getName(Func))) { | ||
SmallVector<Type *, 4> OpTypes; | ||
for (auto &Op : I->operands()) | ||
OpTypes.push_back(Op->getType()); | ||
auto CallCost = | ||
TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind); | ||
return std::min(InstrCost, CallCost); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I am not sure if that is correct. Otherwise we can have a patological case when:
In my opinion that is more misleading then just returning here CallCost. @huntergr-arm what do you think? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @mgabka If CallCost is invalid because there's no available mapping or very high because it's suboptimal and the target in question (not AArch64, at least at present) has an frem instruction, we would want to return InstrCost. If replace-with-veclib were to override that, I think it would be a bug in that pass. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The below PR will bring such functionality to |
||
} | ||
} | ||
} | ||
return InstrCost; | ||
} | ||
case Instruction::FNeg: { | ||
return TTI.getArithmeticInstrCost( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "cost.*vscale.*frem" --version 4 | ||
paschalis-mpeis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
; RUN: opt -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=NO-VECLIB | ||
paschalis-mpeis marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
; RUN: opt -vector-library=sleefgnuabi -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SLEEF | ||
|
||
; RUN: opt -vector-library=sleefgnuabi -mattr=+sve -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SLEEF-TAILFOLD | ||
|
||
; RUN: opt -vector-library=ArmPL -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=ARMPL | ||
|
||
; RUN: opt -vector-library=ArmPL -mattr=+sve -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=ARMPL-TAILFOLD | ||
|
||
target triple = "aarch64-unknown-linux-gnu" | ||
|
||
define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) { | ||
; NO-VECLIB-LABEL: 'frem_f64' | ||
; NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in | ||
; NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem double %in, %in | ||
; NO-VECLIB: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): frem %res = frem double %in, %in | ||
; | ||
; SLEEF-LABEL: 'frem_f64' | ||
; SLEEF: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in | ||
; SLEEF: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in | ||
; | ||
; SLEEF-TAILFOLD-LABEL: 'frem_f64' | ||
; SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in | ||
; SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in | ||
; | ||
; ARMPL-LABEL: 'frem_f64' | ||
; ARMPL: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in | ||
; ARMPL: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in | ||
; | ||
; ARMPL-TAILFOLD-LABEL: 'frem_f64' | ||
; ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in | ||
; ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in | ||
; | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv | ||
%in = load double, ptr %in.gep, align 8 | ||
%res = frem double %in, %in | ||
%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv | ||
store double %res, ptr %out.gep, align 8 | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
%exitcond = icmp eq i64 %iv.next, 1000 | ||
br i1 %exitcond, label %for.end, label %for.body | ||
|
||
for.end: | ||
ret void | ||
} | ||
|
||
define void @frem_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) { | ||
; NO-VECLIB-LABEL: 'frem_f32' | ||
; NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in | ||
; NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in | ||
; NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = frem float %in, %in | ||
; NO-VECLIB: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): frem %res = frem float %in, %in | ||
; | ||
; SLEEF-LABEL: 'frem_f32' | ||
; SLEEF: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in | ||
; SLEEF: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in | ||
; SLEEF: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in | ||
; | ||
; SLEEF-TAILFOLD-LABEL: 'frem_f32' | ||
; SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in | ||
; SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in | ||
; SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in | ||
; | ||
; ARMPL-LABEL: 'frem_f32' | ||
; ARMPL: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in | ||
; ARMPL: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in | ||
; ARMPL: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in | ||
; | ||
; ARMPL-TAILFOLD-LABEL: 'frem_f32' | ||
; ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in | ||
; ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in | ||
; ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in | ||
; | ||
entry: | ||
br label %for.body | ||
|
||
for.body: | ||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] | ||
%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv | ||
%in = load float, ptr %in.gep, align 8 | ||
%res = frem float %in, %in | ||
%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv | ||
store float %res, ptr %out.gep, align 4 | ||
%iv.next = add nuw nsw i64 %iv, 1 | ||
%exitcond = icmp eq i64 %iv.next, 1000 | ||
br i1 %exitcond, label %for.end, label %for.body | ||
|
||
for.end: | ||
ret void | ||
} |
Uh oh!
There was an error while loading. Please reload this page.