Skip to content

Commit 37c87d5

Browse files
[LV][AArch64] LoopVectorizer allows scalable frem instructions (#76247)
LoopVectorizer is aware when a target can replace a scalable frem instruction with a vector library call for a given VF and it returns the relevant cost. Otherwise, it returns an invalid cost (as previously). Add test that check costs on AArch64, when there is no vector library available and when there is (with and without tail-folding). NOTE: Invoking CostModel directly (not through LV) would still return invalid costs.
1 parent 9ca1a08 commit 37c87d5

File tree

2 files changed

+172
-1
lines changed

2 files changed

+172
-1
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6949,10 +6949,25 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
69496949
Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
69506950

69516951
SmallVector<const Value *, 4> Operands(I->operand_values());
6952-
return TTI.getArithmeticInstrCost(
6952+
auto InstrCost = TTI.getArithmeticInstrCost(
69536953
I->getOpcode(), VectorTy, CostKind,
69546954
{TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
69556955
Op2Info, Operands, I);
6956+
6957+
// Some targets can replace frem with vector library calls.
6958+
InstructionCost VecCallCost = InstructionCost::getInvalid();
6959+
if (I->getOpcode() == Instruction::FRem) {
6960+
LibFunc Func;
6961+
if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func) &&
6962+
TLI->isFunctionVectorizable(TLI->getName(Func), VF)) {
6963+
SmallVector<Type *, 4> OpTypes;
6964+
for (auto &Op : I->operands())
6965+
OpTypes.push_back(Op->getType());
6966+
VecCallCost =
6967+
TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind);
6968+
}
6969+
}
6970+
return std::min(InstrCost, VecCallCost);
69566971
}
69576972
case Instruction::FNeg: {
69586973
return TTI.getArithmeticInstrCost(
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "estimated cost.*frem" --version 4
2+
3+
; RUN: opt -mattr=+neon -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=NEON-NO-VECLIB
4+
5+
; RUN: opt -mattr=+neon -vector-library=sleefgnuabi -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=NEON-SLEEF
6+
7+
; RUN: opt -mattr=+neon -vector-library=ArmPL -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=NEON-ARMPL
8+
9+
; RUN: opt -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SVE-NO-VECLIB
10+
11+
; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SVE-SLEEF
12+
13+
; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SVE-SLEEF-TAILFOLD
14+
15+
; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SVE-ARMPL
16+
17+
; RUN: opt -mattr=+sve -vector-library=ArmPL -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SVE-ARMPL-TAILFOLD
18+
19+
; REQUIRES: asserts
20+
21+
target triple = "aarch64-unknown-linux-gnu"
22+
23+
define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
24+
; NEON-NO-VECLIB-LABEL: 'frem_f64'
25+
; NEON-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
26+
; NEON-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
27+
;
28+
; NEON-SLEEF-LABEL: 'frem_f64'
29+
; NEON-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
30+
; NEON-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
31+
;
32+
; NEON-ARMPL-LABEL: 'frem_f64'
33+
; NEON-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
34+
; NEON-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
35+
;
36+
; SVE-NO-VECLIB-LABEL: 'frem_f64'
37+
; SVE-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
38+
; SVE-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
39+
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
40+
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem double %in, %in
41+
;
42+
; SVE-SLEEF-LABEL: 'frem_f64'
43+
; SVE-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
44+
; SVE-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
45+
; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
46+
; SVE-SLEEF: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
47+
;
48+
; SVE-SLEEF-TAILFOLD-LABEL: 'frem_f64'
49+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
50+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
51+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
52+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
53+
;
54+
; SVE-ARMPL-LABEL: 'frem_f64'
55+
; SVE-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
56+
; SVE-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
57+
; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
58+
; SVE-ARMPL: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
59+
;
60+
; SVE-ARMPL-TAILFOLD-LABEL: 'frem_f64'
61+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem double %in, %in
62+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem double %in, %in
63+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
64+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
65+
;
66+
entry:
67+
br label %for.body
68+
69+
for.body:
70+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
71+
%in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
72+
%in = load double, ptr %in.gep, align 8
73+
%res = frem double %in, %in
74+
%out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
75+
store double %res, ptr %out.gep, align 8
76+
%iv.next = add nuw nsw i64 %iv, 1
77+
%exitcond = icmp eq i64 %iv.next, 1000
78+
br i1 %exitcond, label %for.end, label %for.body
79+
80+
for.end:
81+
ret void
82+
}
83+
84+
define void @frem_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
85+
; NEON-NO-VECLIB-LABEL: 'frem_f32'
86+
; NEON-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
87+
; NEON-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
88+
; NEON-NO-VECLIB: LV: Found an estimated cost of 20 for VF 4 For instruction: %res = frem float %in, %in
89+
;
90+
; NEON-SLEEF-LABEL: 'frem_f32'
91+
; NEON-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
92+
; NEON-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
93+
; NEON-SLEEF: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
94+
;
95+
; NEON-ARMPL-LABEL: 'frem_f32'
96+
; NEON-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
97+
; NEON-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
98+
; NEON-ARMPL: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
99+
;
100+
; SVE-NO-VECLIB-LABEL: 'frem_f32'
101+
; SVE-NO-VECLIB: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
102+
; SVE-NO-VECLIB: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
103+
; SVE-NO-VECLIB: LV: Found an estimated cost of 20 for VF 4 For instruction: %res = frem float %in, %in
104+
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
105+
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
106+
; SVE-NO-VECLIB: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = frem float %in, %in
107+
;
108+
; SVE-SLEEF-LABEL: 'frem_f32'
109+
; SVE-SLEEF: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
110+
; SVE-SLEEF: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
111+
; SVE-SLEEF: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
112+
; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
113+
; SVE-SLEEF: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
114+
; SVE-SLEEF: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
115+
;
116+
; SVE-SLEEF-TAILFOLD-LABEL: 'frem_f32'
117+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
118+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
119+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
120+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
121+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
122+
; SVE-SLEEF-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
123+
;
124+
; SVE-ARMPL-LABEL: 'frem_f32'
125+
; SVE-ARMPL: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
126+
; SVE-ARMPL: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
127+
; SVE-ARMPL: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
128+
; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
129+
; SVE-ARMPL: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
130+
; SVE-ARMPL: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
131+
;
132+
; SVE-ARMPL-TAILFOLD-LABEL: 'frem_f32'
133+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 2 for VF 1 For instruction: %res = frem float %in, %in
134+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 8 for VF 2 For instruction: %res = frem float %in, %in
135+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF 4 For instruction: %res = frem float %in, %in
136+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
137+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
138+
; SVE-ARMPL-TAILFOLD: LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
139+
;
140+
entry:
141+
br label %for.body
142+
143+
for.body:
144+
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
145+
%in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
146+
%in = load float, ptr %in.gep, align 8
147+
%res = frem float %in, %in
148+
%out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
149+
store float %res, ptr %out.gep, align 4
150+
%iv.next = add nuw nsw i64 %iv, 1
151+
%exitcond = icmp eq i64 %iv.next, 1000
152+
br i1 %exitcond, label %for.end, label %for.body
153+
154+
for.end:
155+
ret void
156+
}

0 commit comments

Comments
 (0)