Skip to content

[libc][math] Improve the error analysis and accuracy for pow function. #102098

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 6, 2024

Conversation

lntue
Copy link
Contributor

@lntue lntue commented Aug 6, 2024

No description provided.

@llvmbot
Copy link
Member

llvmbot commented Aug 6, 2024

@llvm/pr-subscribers-libc

Author: None (lntue)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/102098.diff

2 Files Affected:

  • (modified) libc/src/math/generic/pow.cpp (+44-23)
  • (modified) libc/test/src/math/pow_test.cpp (+10-4)
diff --git a/libc/src/math/generic/pow.cpp b/libc/src/math/generic/pow.cpp
index d2c5f7535082c..5f906994f76fe 100644
--- a/libc/src/math/generic/pow.cpp
+++ b/libc/src/math/generic/pow.cpp
@@ -358,42 +358,63 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
   // Then m_x = (1 + dx) / r, and
   //   log2(m_x) = log2( (1 + dx) / r )
   //             = log2(1 + dx) - log2(r).
-  // Perform exact range reduction
+
+  // In order for the overall computations x^y = 2^(y * log2(x)) to has relative
+  // errors < 2^-52 (1ULP), we will need evaluate the exponent part y * log2(x)
+  // with absolute errors < 2^52 (or better, 2^-53).  Since the whole exponent
+  // range for double precision is bounded by |y * log2(x)| < 1076 ~ 2^10, we
+  // need to evaluate log2(x) with absolute errors < 2^-53 * 2^-10 = 2^-63.
+
+  // With that requirement, we use the following degree-6 polynomial
+  // approximation:
+  //   P(dx) ~ log2(1 + dx) / dx
+  // Generated by Sollya with:
+  // > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P;
+  // > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]);
+  //   0x1.d03cc...p-66
+  constexpr double COEFFS[] = {0x1.71547652b82fep0,  -0x1.71547652b82e7p-1,
+                               0x1.ec709dc3b1fd5p-2, -0x1.7154766124215p-2,
+                               0x1.2776bd90259d8p-2, -0x1.ec586c6f3d311p-3,
+                               0x1.9c4775eccf524p-3};
+  // Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66
+  // Extra errors from various computations and rounding directions, the overall
+  // errors we can be bounded by 2^-65.
+
   double dx;
+  DoubleDouble dx_c0;
+
+  // Perform exact range reduction and exact product dx * c0.
 #ifdef LIBC_TARGET_CPU_HAS_FMA
   dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
+  dx_c0 = fputil::exact_mult(COEFFS[0], dx);
 #else
   double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val();
   dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
+  dx_c0 = fputil::exact_mult<true>(COEFFS[0], dx);
 #endif // LIBC_TARGET_CPU_HAS_FMA
 
-  // Degree-5 polynomial approximation:
-  //   dx * P(dx) ~ log2(1 + dx)
-  // Generated by Sollya with:
-  // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
-  // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
-  //   0x1.653...p-52
-  constexpr double COEFFS[] = {0x1.71547652b82fep0,  -0x1.71547652b7a07p-1,
-                               0x1.ec709dc458db1p-2, -0x1.715479c2266c9p-2,
-                               0x1.2776ae1ddf8fp-2,  -0x1.e7b2178870157p-3};
-
-  double dx2 = dx * dx; // Exact
-  double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
-  double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
-  double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
+  double dx2 = dx * dx;
+  double c0 = fputil::multiply_add(dx, COEFFS[2], COEFFS[1]);
+  double c1 = fputil::multiply_add(dx, COEFFS[4], COEFFS[3]);
+  double c2 = fputil::multiply_add(dx, COEFFS[6], COEFFS[5]);
 
   double p = fputil::polyeval(dx2, c0, c1, c2);
 
   // s = e_x - log2(r) + dx * P(dx)
   // Absolute error bound:
-  //   |log2(x) - log2_x.hi - log2_x.lo| < 2^-58.
-  // Relative error bound:
-  //   |(log2_x.hi + log2_x.lo)/log2(x) - 1| < 2^-51.
-  double log2_x_hi = e_x + LOG2_R_DD[idx_x].hi; // Exact
-  // Error
-  double log2_x_lo = fputil::multiply_add(dx, p, LOG2_R_DD[idx_x].lo);
-
-  DoubleDouble log2_x = fputil::exact_add(log2_x_hi, log2_x_lo);
+  //   |log2(x) - log2_x.hi - log2_x.lo| < 2^-65.
+
+  // Notice that e_x - log2(r).hi is exact, so we perform an exact sum of
+  // e_x - log2(r).hi and the high part of the product dx * c0:
+  //   log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi
+  DoubleDouble log2_x_hi =
+      fputil::exact_add(e_x + LOG2_R_DD[idx_x].hi, dx_c0.hi);
+  // The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r).
+  double log2_x_lo =
+      fputil::multiply_add(dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo);
+  // Perform accurate sums.
+  DoubleDouble log2_x = fputil::exact_add(log2_x_hi.hi, log2_x_lo);
+  log2_x.lo += log2_x_hi.lo;
 
   // To compute 2^(y * log2(x)), we break the exponent into 3 parts:
   //   y * log(2) = hi + mid + lo, where
diff --git a/libc/test/src/math/pow_test.cpp b/libc/test/src/math/pow_test.cpp
index 468a6bbee4b2c..20e3ddfc8fbe5 100644
--- a/libc/test/src/math/pow_test.cpp
+++ b/libc/test/src/math/pow_test.cpp
@@ -18,15 +18,21 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
 
 TEST_F(LlvmLibcPowTest, TrickyInputs) {
   constexpr mpfr::BinaryInput<double> INPUTS[] = {
-      {0x1.0853408534085p-2, 0x1.0D148E03BCBA8p-1},
-      {0x1.65FBD65FBD657p-1, 0x1.F10D148E03BB6p+1},
+      {0x1.0853408534085p-2, 0x1.0d148e03bcba8p-1},
+      {0x1.65fbd65fbd657p-1, 0x1.f10d148e03bb6p+1},
+      {0x1.c046a084d2e12p-1, 0x1.1f9p+12},
+      {0x1.ae37ed1670326p-1, 0x1.f967df66a202p-1},
+      {0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-2},
+      {0x1.f558a88a8aadep-1, 0x1.88ap+12},
+      {0x1.e84d32731e593p-1, 0x1.2cb8p+13},
+      {0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-2},
   };
 
   for (auto input : INPUTS) {
     double x = input.x;
     double y = input.y;
-    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input,
-                                   LIBC_NAMESPACE::pow(x, y), 0.5);
+    EXPECT_MPFR_MATCH(mpfr::Operation::Pow, input, LIBC_NAMESPACE::pow(x, y),
+                      1.5);
   }
 }
 

@lntue lntue merged commit f133dd9 into llvm:main Aug 6, 2024
4 of 5 checks passed
@lntue lntue deleted the pow branch August 6, 2024 18:08
@lntue lntue mentioned this pull request Aug 12, 2024
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants