llvm · lntue · Jun 7, 2024 · Jun 6, 2024 · Jun 6, 2024 · Jun 7, 2024
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -515,6 +515,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.fminimum_magf16
     libc.src.math.fminimum_mag_numf16
     libc.src.math.fminimum_numf16
+    libc.src.math.fmodf16
     libc.src.math.fromfpf16
     libc.src.math.fromfpxf16
     libc.src.math.llrintf16

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -548,6 +548,7 @@ if(LIBC_TYPES_HAS_FLOAT16)
     libc.src.math.fminimum_magf16
     libc.src.math.fminimum_mag_numf16
     libc.src.math.fminimum_numf16
+    libc.src.math.fmodf16
     libc.src.math.fromfpf16
     libc.src.math.fromfpxf16
     libc.src.math.llrintf16

diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -156,7 +156,7 @@ Basic Operations
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fminimum_num     | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.12.9              | F.10.9.5                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
-| fmod             | |check|          | |check|         | |check|                |                      | |check|                | 7.12.10.1              | F.10.7.1                   |
+| fmod             | |check|          | |check|         | |check|                | |check|              | |check|                | 7.12.10.1              | F.10.7.1                   |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+
 | fmul             | N/A              |                 |                        | N/A                  |                        | 7.12.14.3              | F.10.11                    |
 +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+

diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -478,6 +478,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"fmod", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
           FunctionSpec<"fmodf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
           FunctionSpec<"fmodl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
+          GuardedFunctionSpec<"fmodf16", RetValSpec<Float16Type>, [ArgSpec<Float16Type>, ArgSpec<Float16Type>], "LIBC_TYPES_HAS_FLOAT16">,
           GuardedFunctionSpec<"fmodf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<Float128Type>], "LIBC_TYPES_HAS_FLOAT128">,
 
           FunctionSpec<"frexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntPtr>]>,

diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
@@ -744,7 +744,7 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
     if (LIBC_LIKELY(ep >= 0)) {
       // Implicit number bit will be removed by mask
       result.set_significand(number);
-      result.set_biased_exponent(ep + 1);
+      result.set_biased_exponent(static_cast<StorageType>(ep + 1));
     } else {
       result.set_significand(number >> -ep);
     }

diff --git a/libc/src/__support/FPUtil/generic/FMod.h b/libc/src/__support/FPUtil/generic/FMod.h
@@ -210,7 +210,9 @@ class FMod {
                     e_x - e_y <= int(FPB::EXP_LEN))) {
       StorageType m_x = sx.get_explicit_mantissa();
       StorageType m_y = sy.get_explicit_mantissa();
-      StorageType d = (e_x == e_y) ? (m_x - m_y) : (m_x << (e_x - e_y)) % m_y;
+      StorageType d = (e_x == e_y)
+                          ? (m_x - m_y)
+                          : static_cast<StorageType>(m_x << (e_x - e_y)) % m_y;
       if (d == 0)
         return FPB::zero();
       // iy - 1 because of "zero power" for number with power 1

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -183,6 +183,7 @@ add_math_entrypoint_object(fminimum_mag_numf128)
 add_math_entrypoint_object(fmod)
 add_math_entrypoint_object(fmodf)
 add_math_entrypoint_object(fmodl)
+add_math_entrypoint_object(fmodf16)
 add_math_entrypoint_object(fmodf128)
 
 add_math_entrypoint_object(frexp)

diff --git a/libc/src/math/fmodf16.h b/libc/src/math/fmodf16.h
@@ -0,0 +1,20 @@
+//===-- Implementation header for fmodf16 -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_FMODF16_H
+#define LLVM_LIBC_SRC_MATH_FMODF16_H
+
+#include "src/__support/macros/properties/types.h"
+
+namespace LIBC_NAMESPACE {
+
+float16 fmodf16(float16 x, float16 y);
+
+} // namespace LIBC_NAMESPACE
+
+#endif // LLVM_LIBC_SRC_MATH_FMODF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -2887,6 +2887,19 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  fmodf16
+  SRCS
+    fmodf16.cpp
+  HDRS
+    ../fmodf16.h
+  DEPENDS
+    libc.src.__support.macros.properties.types
+    libc.src.__support.FPUtil.generic.fmod
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   fmodf128
   SRCS

diff --git a/libc/src/math/generic/fmodf16.cpp b/libc/src/math/generic/fmodf16.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of fmodf16 function --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/fmodf16.h"
+#include "src/__support/FPUtil/generic/FMod.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float16, fmodf16, (float16 x, float16 y)) {
+  return fputil::generic::FMod<float16, uint32_t>::eval(x, y);
+}
+
+} // namespace LIBC_NAMESPACE
diff --git a/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h b/libc/test/src/math/performance_testing/BinaryOpSingleOutputPerf.h
@@ -9,6 +9,7 @@
 #include "src/__support/FPUtil/FPBits.h"
 #include "test/src/math/performance_testing/Timer.h"
 
+#include <cstddef>
 #include <fstream>
 
 namespace LIBC_NAMESPACE {
@@ -25,21 +26,26 @@ template <typename T> class BinaryOpSingleOutputPerf {
 
   static void run_perf_in_range(Func myFunc, Func otherFunc,
                                 StorageType startingBit, StorageType endingBit,
-                                StorageType N, std::ofstream &log) {
+                                size_t N, size_t rounds, std::ofstream &log) {
+    if (endingBit - startingBit < N)
+      N = endingBit - startingBit;
+
     auto runner = [=](Func func) {
       volatile T result;
       if (endingBit < startingBit) {
         return;
       }
 
       StorageType step = (endingBit - startingBit) / N;
-      for (StorageType bitsX = startingBit, bitsY = endingBit;;
-           bitsX += step, bitsY -= step) {
-        T x = FPBits(bitsX).get_val();
-        T y = FPBits(bitsY).get_val();
-        result = func(x, y);
-        if (endingBit - bitsX < step) {
-          break;
+      for (size_t i = 0; i < rounds; i++) {
+        for (StorageType bitsX = startingBit, bitsY = endingBit;;
+             bitsX += step, bitsY -= step) {
+          T x = FPBits(bitsX).get_val();
+          T y = FPBits(bitsY).get_val();
+          result = func(x, y);
+          if (endingBit - bitsX < step) {
+            break;
+          }
         }
       }
     };
@@ -49,7 +55,7 @@ template <typename T> class BinaryOpSingleOutputPerf {
     runner(myFunc);
     timer.stop();
 
-    double my_average = static_cast<double>(timer.nanoseconds()) / N;
+    double my_average = static_cast<double>(timer.nanoseconds()) / N / rounds;
     log << "-- My function --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << my_average << " ns/op \n";
@@ -60,7 +66,8 @@ template <typename T> class BinaryOpSingleOutputPerf {
     runner(otherFunc);
     timer.stop();
 
-    double other_average = static_cast<double>(timer.nanoseconds()) / N;
+    double other_average =
+        static_cast<double>(timer.nanoseconds()) / N / rounds;
     log << "-- Other function --\n";
     log << "     Total time      : " << timer.nanoseconds() << " ns \n";
     log << "     Average runtime : " << other_average << " ns/op \n";
@@ -71,22 +78,24 @@ template <typename T> class BinaryOpSingleOutputPerf {
     log << "     Mine / Other's  : " << my_average / other_average << " \n";
   }
 
-  static void run_perf(Func myFunc, Func otherFunc, const char *logFile) {
+  static void run_perf(Func myFunc, Func otherFunc, int rounds,
+                       const char *logFile) {
     std::ofstream log(logFile);
     log << " Performance tests with inputs in denormal range:\n";
     run_perf_in_range(myFunc, otherFunc, /* startingBit= */ StorageType(0),
                       /* endingBit= */ FPBits::max_subnormal().uintval(),
-                      10'000'001, log);
+                      1'000'001, rounds, log);
     log << "\n Performance tests with inputs in normal range:\n";
     run_perf_in_range(myFunc, otherFunc,
                       /* startingBit= */ FPBits::min_normal().uintval(),
                       /* endingBit= */ FPBits::max_normal().uintval(),
-                      10'000'001, log);
+                      1'000'001, rounds, log);
     log << "\n Performance tests with inputs in normal range with exponents "
            "close to each other:\n";
-    run_perf_in_range(
-        myFunc, otherFunc, /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
-        /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(), 1'001'001, log);
+    run_perf_in_range(myFunc, otherFunc,
+                      /* startingBit= */ FPBits(T(0x1.0p-10)).uintval(),
+                      /* endingBit= */ FPBits(T(0x1.0p+10)).uintval(),
+                      1'000'001, rounds, log);
   }
 
   static void run_diff(Func myFunc, Func otherFunc, const char *logFile) {
@@ -117,6 +126,15 @@ template <typename T> class BinaryOpSingleOutputPerf {
 #define BINARY_OP_SINGLE_OUTPUT_PERF(T, myFunc, otherFunc, filename)           \
   int main() {                                                                 \
     LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
-        &myFunc, &otherFunc, filename);                                        \
+        &myFunc, &otherFunc, 1, filename);                                     \
     return 0;                                                                  \
   }
+
+#define BINARY_OP_SINGLE_OUTPUT_PERF_EX(T, myFunc, otherFunc, rounds,          \
+                                        filename)                              \
+  {                                                                            \
+    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
+        &myFunc, &otherFunc, rounds, filename);                                \
+    LIBC_NAMESPACE::testing::BinaryOpSingleOutputPerf<T>::run_perf(            \
+        &myFunc, &otherFunc, rounds, filename);                                \
+  }
diff --git a/libc/test/src/math/performance_testing/CMakeLists.txt b/libc/test/src/math/performance_testing/CMakeLists.txt
@@ -88,6 +88,8 @@ add_header_library(
   binary_op_single_output_diff
   HDRS
     BinaryOpSingleOutputPerf.h
+  DEPENDS
+    libc.src.__support.FPUtil.fp_bits
 )
 
 add_perf_binary(
@@ -343,6 +345,17 @@ add_perf_binary(
     -fno-builtin
 )
 
+add_perf_binary(
+  fmodf16_perf
+  SRCS
+    fmodf16_perf.cpp
+  DEPENDS
+    .binary_op_single_output_diff
+    libc.src.math.fmodf16
+    libc.src.__support.FPUtil.generic.fmod
+    libc.src.__support.macros.properties.types
+)
+
 add_perf_binary(
   fmodf128_perf
   SRCS

diff --git a/libc/test/src/math/performance_testing/fmodf16_perf.cpp b/libc/test/src/math/performance_testing/fmodf16_perf.cpp
@@ -0,0 +1,27 @@
+//===-- Performance test for fmodf16 --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "BinaryOpSingleOutputPerf.h"
+
+#include "src/__support/FPUtil/generic/FMod.h"
+#include "src/__support/macros/properties/types.h"
+
+#include <stdint.h>
+
+#define FMOD_FUNC(U) (LIBC_NAMESPACE::fputil::generic::FMod<float16, U>::eval)
+
+int main() {
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, FMOD_FUNC(uint16_t),
+                                  FMOD_FUNC(uint32_t), 5000,
+                                  "fmodf16_u16_vs_u32_perf.log")
+
+  BINARY_OP_SINGLE_OUTPUT_PERF_EX(float16, FMOD_FUNC(uint16_t),
+                                  FMOD_FUNC(uint64_t), 5000,
+                                  "fmodf16_u16_vs_u64_perf.log")
+  return 0;
+}
diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt
@@ -3111,10 +3111,10 @@ add_fp_unittest(
   HDRS
     FModTest.h
   DEPENDS
+    libc.hdr.fenv_macros
     libc.src.errno.errno
     libc.src.math.fmodf
-    libc.src.__support.FPUtil.basic_operations
-    libc.src.__support.FPUtil.nearest_integer_operations
+    libc.src.__support.FPUtil.fenv_impl
   # FIXME: Currently fails on the GPU build.
   UNIT_TEST_ONLY
 )
@@ -3128,10 +3128,10 @@ add_fp_unittest(
   HDRS
     FModTest.h
   DEPENDS
+    libc.hdr.fenv_macros
     libc.src.errno.errno
     libc.src.math.fmod
-    libc.src.__support.FPUtil.basic_operations
-    libc.src.__support.FPUtil.nearest_integer_operations
+    libc.src.__support.FPUtil.fenv_impl
   # FIXME: Currently fails on the GPU build.
   UNIT_TEST_ONLY
 )
@@ -3145,10 +3145,27 @@ add_fp_unittest(
   HDRS
     FModTest.h
   DEPENDS
+    libc.hdr.fenv_macros
     libc.src.errno.errno
     libc.src.math.fmodl
-    libc.src.__support.FPUtil.basic_operations
-    libc.src.__support.FPUtil.nearest_integer_operations
+    libc.src.__support.FPUtil.fenv_impl
+  # FIXME: Currently fails on the GPU build.
+  UNIT_TEST_ONLY
+)
+
+add_fp_unittest(
+  fmodf16_test
+  SUITE
+    libc-math-smoke-tests
+  SRCS
+    fmodf16_test.cpp
+  HDRS
+    FModTest.h
+  DEPENDS
+    libc.hdr.fenv_macros
+    libc.src.errno.errno
+    libc.src.math.fmodf16
+    libc.src.__support.FPUtil.fenv_impl
   # FIXME: Currently fails on the GPU build.
   UNIT_TEST_ONLY
 )
@@ -3162,10 +3179,10 @@ add_fp_unittest(
   HDRS
     FModTest.h
   DEPENDS
+    libc.hdr.fenv_macros
     libc.src.errno.errno
     libc.src.math.fmodf128
-    libc.src.__support.FPUtil.basic_operations
-    libc.src.__support.FPUtil.nearest_integer_operations
+    libc.src.__support.FPUtil.fenv_impl
   # FIXME: Currently fails on the GPU build.
   UNIT_TEST_ONLY
 )