llvm · paschalis-mpeis · Jan 18, 2024 · Dec 21, 2023 · Jan 3, 2024 · Jan 3, 2024
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -6951,10 +6951,26 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, ElementCount VF,
       Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
 
     SmallVector<const Value *, 4> Operands(I->operand_values());
-    return TTI.getArithmeticInstrCost(
+    auto InstrCost = TTI.getArithmeticInstrCost(
         I->getOpcode(), VectorTy, CostKind,
         {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
         Op2Info, Operands, I);
+
+    // Some targets replace frem with vector library calls.
+    if (I->getOpcode() == Instruction::FRem) {
+      LibFunc Func;
+      if (TLI->getLibFunc(I->getOpcode(), I->getType(), Func)) {
+        if (TLI->isFunctionVectorizable(TLI->getName(Func))) {
+          SmallVector<Type *, 4> OpTypes;
+          for (auto &Op : I->operands())
+            OpTypes.push_back(Op->getType());
+          auto CallCost =
+              TTI.getCallInstrCost(nullptr, VectorTy, OpTypes, CostKind);
+          return std::min(InstrCost, CallCost);
+        }
+      }
+    }
+    return InstrCost;
   }
   case Instruction::FNeg: {
     return TTI.getArithmeticInstrCost(

diff --git a/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve-frem.ll b/llvm/test/Analysis/CostModel/AArch64/arith-fp-sve-frem.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "cost.*vscale.*frem" --version 4
+
+; RUN: opt -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=NO-VECLIB
+
+; RUN: opt -vector-library=sleefgnuabi -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SLEEF
+
+; RUN: opt -vector-library=sleefgnuabi -mattr=+sve -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=SLEEF-TAILFOLD
+
+; RUN: opt -vector-library=ArmPL -mattr=+sve -passes=loop-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=ARMPL
+
+; RUN: opt -vector-library=ArmPL -mattr=+sve -passes=loop-vectorize -prefer-predicate-over-epilogue=predicate-dont-vectorize -debug-only=loop-vectorize -disable-output -S < %s 2>&1 | FileCheck %s --check-prefix=ARMPL-TAILFOLD
+
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @frem_f64(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; NO-VECLIB-LABEL: 'frem_f64'
+; NO-VECLIB:  LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem double %in, %in
+; NO-VECLIB:  LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem double %in, %in
+; NO-VECLIB:  LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): frem %res = frem double %in, %in
+;
+; SLEEF-LABEL: 'frem_f64'
+; SLEEF:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in
+; SLEEF:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
+;
+; SLEEF-TAILFOLD-LABEL: 'frem_f64'
+; SLEEF-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in
+; SLEEF-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
+;
+; ARMPL-LABEL: 'frem_f64'
+; ARMPL:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in
+; ARMPL:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
+;
+; ARMPL-TAILFOLD-LABEL: 'frem_f64'
+; ARMPL-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem double %in, %in
+; ARMPL-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem double %in, %in
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds double, ptr %in.ptr, i64 %iv
+  %in = load double, ptr %in.gep, align 8
+  %res = frem double %in, %in
+  %out.gep = getelementptr inbounds double, ptr %out.ptr, i64 %iv
+  store double %res, ptr %out.gep, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @frem_f32(ptr noalias %in.ptr, ptr noalias %out.ptr) {
+; NO-VECLIB-LABEL: 'frem_f32'
+; NO-VECLIB:  LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %res = frem float %in, %in
+; NO-VECLIB:  LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %res = frem float %in, %in
+; NO-VECLIB:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %res = frem float %in, %in
+; NO-VECLIB:  LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): frem %res = frem float %in, %in
+;
+; SLEEF-LABEL: 'frem_f32'
+; SLEEF:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in
+; SLEEF:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in
+; SLEEF:  LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
+;
+; SLEEF-TAILFOLD-LABEL: 'frem_f32'
+; SLEEF-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in
+; SLEEF-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in
+; SLEEF-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
+;
+; ARMPL-LABEL: 'frem_f32'
+; ARMPL:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in
+; ARMPL:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in
+; ARMPL:  LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
+;
+; ARMPL-TAILFOLD-LABEL: 'frem_f32'
+; ARMPL-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 1 For instruction: %res = frem float %in, %in
+; ARMPL-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 2 For instruction: %res = frem float %in, %in
+; ARMPL-TAILFOLD:  LV: Found an estimated cost of 10 for VF vscale x 4 For instruction: %res = frem float %in, %in
+;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %in.gep = getelementptr inbounds float, ptr %in.ptr, i64 %iv
+  %in = load float, ptr %in.gep, align 8
+  %res = frem float %in, %in
+  %out.gep = getelementptr inbounds float, ptr %out.ptr, i64 %iv
+  store float %res, ptr %out.gep, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}