Skip to content

Commit 8e85c51

Browse files
[LV][AArch64] LoopVectorizer allows scalable frem instructions
In AArch64, when an 'frem' instruction uses scalable vectors, it will be replaced with a vector library call. LoopVectorize is now aware of that so it no longer returns invalid costs. When it is not scalable, it returns the default costs, which are delegated to the BaseT TTI Implementation.
1 parent ffabf73 commit 8e85c51

File tree

4 files changed

+28
-13
lines changed

4 files changed

+28
-13
lines changed

llvm/lib/Analysis/TargetTransformInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
#include "llvm/IR/IntrinsicInst.h"
1818
#include "llvm/IR/Module.h"
1919
#include "llvm/IR/Operator.h"
20-
#include "llvm/IR/PatternMatch.h"
2120
#include "llvm/InitializePasses.h"
2221
#include "llvm/Support/CommandLine.h"
2322
#include <optional>

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2902,6 +2902,21 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
29022902
if (!Ty->getScalarType()->isFP128Ty())
29032903
return LT.first;
29042904
[[fallthrough]];
2905+
case ISD::FREM: {
2906+
// Scalable frem instructions will be replaced with Vector library calls.
2907+
if (Ty->isScalableTy()) {
2908+
SmallVector<Type *, 4> OpTypes;
2909+
for (auto &Op : CxtI->operands())
2910+
OpTypes.push_back(Op->getType());
2911+
2912+
InstructionCost ScalableCost =
2913+
getCallInstrCost(nullptr, Ty, OpTypes, CostKind);
2914+
return ScalableCost;
2915+
} else {
2916+
return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
2917+
Op2Info);
2918+
}
2919+
}
29052920
case ISD::FMUL:
29062921
case ISD::FDIV:
29072922
// These nodes are marked as 'custom' just to lower them to SVE.

llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
2-
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 -mattr=+sve | FileCheck %s
1+
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
2+
; RUN: opt -mattr=+sve -mattr=+fullfp16 -enable-no-nans-fp-math -disable-output -passes="print<cost-model>" %s 2>&1 | FileCheck %s
3+
4+
target triple = "aarch64-unknown-linux-gnu"
35

4-
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
56

67
define void @fadd() {
78
; CHECK-LABEL: 'fadd'
@@ -137,14 +138,14 @@ define void @fdiv() {
137138

138139
define void @frem() {
139140
; CHECK-LABEL: 'frem'
140-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F16 = frem <vscale x 4 x half> undef, undef
141-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F16 = frem <vscale x 8 x half> undef, undef
142-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V16F16 = frem <vscale x 16 x half> undef, undef
143-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F32 = frem <vscale x 2 x float> undef, undef
144-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F32 = frem <vscale x 4 x float> undef, undef
145-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V8F32 = frem <vscale x 8 x float> undef, undef
146-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V2F64 = frem <vscale x 2 x double> undef, undef
147-
; CHECK-NEXT: Cost Model: Invalid cost for instruction: %V4F64 = frem <vscale x 4 x double> undef, undef
141+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F16 = frem <vscale x 4 x half> undef, undef
142+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F16 = frem <vscale x 8 x half> undef, undef
143+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16F16 = frem <vscale x 16 x half> undef, undef
144+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <vscale x 2 x float> undef, undef
145+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = frem <vscale x 4 x float> undef, undef
146+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = frem <vscale x 8 x float> undef, undef
147+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <vscale x 2 x double> undef, undef
148+
; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <vscale x 4 x double> undef, undef
148149
; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
149150
;
150151
%V4F16 = frem <vscale x 4 x half> undef, undef

llvm/test/Analysis/CostModel/AArch64/arith-fp.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
22
; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
33

4-
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
4+
target triple = "aarch64-unknown-linux-gnu"
55

66
define i32 @fadd(i32 %arg) {
77
; CHECK-LABEL: 'fadd'

0 commit comments

Comments
 (0)