[LV][AArch64] LoopVectorizer allows scalable frem instructions

paschalis-mpeis · paschalis-mpeis · commit 8e85c511665a · 2023-12-22T15:23:39.000Z
In AArch64, when an 'frem' instruction uses scalable vectors, it will be
replaced with a vector library call. LoopVectorize is now aware of that
so it no longer returns invalid costs.

When it is not scalable, it returns the default costs, which are
delegated to the BaseT TTI Implementation.
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -17,7 +17,6 @@
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
-#include "llvm/IR/PatternMatch.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include <optional>
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2902,6 +2902,21 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
     if (!Ty->getScalarType()->isFP128Ty())
       return LT.first;
     [[fallthrough]];
+  case ISD::FREM: {
+    // Scalable frem instructions will be replaced with Vector library calls.
+    if (Ty->isScalableTy()) {
+      SmallVector<Type *, 4> OpTypes;
+      for (auto &Op : CxtI->operands())
+        OpTypes.push_back(Op->getType());
+
+      InstructionCost ScalableCost =
+          getCallInstrCost(nullptr, Ty, OpTypes, CostKind);
+      return ScalableCost;
+    } else {
+      return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info,
+                                           Op2Info);
+    }
+  }
   case ISD::FMUL:
   case ISD::FDIV:
     // These nodes are marked as 'custom' just to lower them to SVE.
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve.ll
@@ -1,7 +1,8 @@
-; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -enable-no-nans-fp-math -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 -mattr=+sve | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -mattr=+sve -mattr=+fullfp16 -enable-no-nans-fp-math -disable-output -passes="print<cost-model>" %s 2>&1 | FileCheck %s
+
+target triple = "aarch64-unknown-linux-gnu"
 
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 
 define void @fadd() {
 ; CHECK-LABEL: 'fadd'
@@ -137,14 +138,14 @@ define void @fdiv() {
 
 define void @frem() {
 ; CHECK-LABEL: 'frem'
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F16 = frem <vscale x 4 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F16 = frem <vscale x 8 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V16F16 = frem <vscale x 16 x half> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F32 = frem <vscale x 2 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F32 = frem <vscale x 4 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V8F32 = frem <vscale x 8 x float> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V2F64 = frem <vscale x 2 x double> undef, undef
-; CHECK-NEXT:  Cost Model: Invalid cost for instruction: %V4F64 = frem <vscale x 4 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F16 = frem <vscale x 4 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F16 = frem <vscale x 8 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V16F16 = frem <vscale x 16 x half> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = frem <vscale x 2 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F32 = frem <vscale x 4 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V8F32 = frem <vscale x 8 x float> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = frem <vscale x 2 x double> undef, undef
+; CHECK-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = frem <vscale x 4 x double> undef, undef
 ; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %V4F16 = frem <vscale x 4 x half> undef, undef
diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; RUN: opt < %s -enable-no-nans-fp-math  -passes="print<cost-model>" 2>&1 -disable-output -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s
 
-target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
 
 define i32 @fadd(i32 %arg) {
 ; CHECK-LABEL: 'fadd'