Skip to content

Commit f133dd9

Browse files
authored
[libc][math] Improve the error analysis and accuracy for pow function. (#102098)
1 parent e77ac42 commit f133dd9

File tree

2 files changed

+55
-27
lines changed

2 files changed

+55
-27
lines changed

libc/src/math/generic/pow.cpp

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -358,42 +358,64 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
358358
// Then m_x = (1 + dx) / r, and
359359
// log2(m_x) = log2( (1 + dx) / r )
360360
// = log2(1 + dx) - log2(r).
361-
// Perform exact range reduction
361+
362+
// In order for the overall computations x^y = 2^(y * log2(x)) to have the
363+
// relative errors < 2^-52 (1ULP), we will need to evaluate the exponent part
364+
// y * log2(x) with absolute errors < 2^-52 (or better, 2^-53). Since the
365+
// whole exponent range for double precision is bounded by
366+
// |y * log2(x)| < 1076 ~ 2^10, we need to evaluate log2(x) with absolute
367+
// errors < 2^-53 * 2^-10 = 2^-63.
368+
369+
// With that requirement, we use the following degree-6 polynomial
370+
// approximation:
371+
// P(dx) ~ log2(1 + dx) / dx
372+
// Generated by Sollya with:
373+
// > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P;
374+
// > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]);
375+
// 0x1.d03cc...p-66
376+
constexpr double COEFFS[] = {0x1.71547652b82fep0, -0x1.71547652b82e7p-1,
377+
0x1.ec709dc3b1fd5p-2, -0x1.7154766124215p-2,
378+
0x1.2776bd90259d8p-2, -0x1.ec586c6f3d311p-3,
379+
0x1.9c4775eccf524p-3};
380+
// Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66
381+
// Extra errors from various computations and rounding directions, the overall
382+
// errors we can be bounded by 2^-65.
383+
362384
double dx;
385+
DoubleDouble dx_c0;
386+
387+
// Perform exact range reduction and exact product dx * c0.
363388
#ifdef LIBC_TARGET_CPU_HAS_FMA
364389
dx = fputil::multiply_add(RD[idx_x], m_x.get_val(), -1.0); // Exact
390+
dx_c0 = fputil::exact_mult(COEFFS[0], dx);
365391
#else
366392
double c = FPBits(m_x.uintval() & 0x3fff'e000'0000'0000).get_val();
367393
dx = fputil::multiply_add(RD[idx_x], m_x.get_val() - c, CD[idx_x]); // Exact
394+
dx_c0 = fputil::exact_mult<true>(COEFFS[0], dx);
368395
#endif // LIBC_TARGET_CPU_HAS_FMA
369396

370-
// Degree-5 polynomial approximation:
371-
// dx * P(dx) ~ log2(1 + dx)
372-
// Generated by Sollya with:
373-
// > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
374-
// > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
375-
// 0x1.653...p-52
376-
constexpr double COEFFS[] = {0x1.71547652b82fep0, -0x1.71547652b7a07p-1,
377-
0x1.ec709dc458db1p-2, -0x1.715479c2266c9p-2,
378-
0x1.2776ae1ddf8fp-2, -0x1.e7b2178870157p-3};
379-
380-
double dx2 = dx * dx; // Exact
381-
double c0 = fputil::multiply_add(dx, COEFFS[1], COEFFS[0]);
382-
double c1 = fputil::multiply_add(dx, COEFFS[3], COEFFS[2]);
383-
double c2 = fputil::multiply_add(dx, COEFFS[5], COEFFS[4]);
397+
double dx2 = dx * dx;
398+
double c0 = fputil::multiply_add(dx, COEFFS[2], COEFFS[1]);
399+
double c1 = fputil::multiply_add(dx, COEFFS[4], COEFFS[3]);
400+
double c2 = fputil::multiply_add(dx, COEFFS[6], COEFFS[5]);
384401

385402
double p = fputil::polyeval(dx2, c0, c1, c2);
386403

387404
// s = e_x - log2(r) + dx * P(dx)
388405
// Absolute error bound:
389-
// |log2(x) - log2_x.hi - log2_x.lo| < 2^-58.
390-
// Relative error bound:
391-
// |(log2_x.hi + log2_x.lo)/log2(x) - 1| < 2^-51.
392-
double log2_x_hi = e_x + LOG2_R_DD[idx_x].hi; // Exact
393-
// Error
394-
double log2_x_lo = fputil::multiply_add(dx, p, LOG2_R_DD[idx_x].lo);
395-
396-
DoubleDouble log2_x = fputil::exact_add(log2_x_hi, log2_x_lo);
406+
// |log2(x) - log2_x.hi - log2_x.lo| < 2^-65.
407+
408+
// Notice that e_x - log2(r).hi is exact, so we perform an exact sum of
409+
// e_x - log2(r).hi and the high part of the product dx * c0:
410+
// log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi
411+
DoubleDouble log2_x_hi =
412+
fputil::exact_add(e_x + LOG2_R_DD[idx_x].hi, dx_c0.hi);
413+
// The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r).
414+
double log2_x_lo =
415+
fputil::multiply_add(dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo);
416+
// Perform accurate sums.
417+
DoubleDouble log2_x = fputil::exact_add(log2_x_hi.hi, log2_x_lo);
418+
log2_x.lo += log2_x_hi.lo;
397419

398420
// To compute 2^(y * log2(x)), we break the exponent into 3 parts:
399421
// y * log(2) = hi + mid + lo, where

libc/test/src/math/pow_test.cpp

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,21 @@ namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
1818

1919
TEST_F(LlvmLibcPowTest, TrickyInputs) {
2020
constexpr mpfr::BinaryInput<double> INPUTS[] = {
21-
{0x1.0853408534085p-2, 0x1.0D148E03BCBA8p-1},
22-
{0x1.65FBD65FBD657p-1, 0x1.F10D148E03BB6p+1},
21+
{0x1.0853408534085p-2, 0x1.0d148e03bcba8p-1},
22+
{0x1.65fbd65fbd657p-1, 0x1.f10d148e03bb6p+1},
23+
{0x1.c046a084d2e12p-1, 0x1.1f9p+12},
24+
{0x1.ae37ed1670326p-1, 0x1.f967df66a202p-1},
25+
{0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-2},
26+
{0x1.f558a88a8aadep-1, 0x1.88ap+12},
27+
{0x1.e84d32731e593p-1, 0x1.2cb8p+13},
28+
{0x1.ffffffffffffcp-1, 0x1.fffffffffffffp-2},
2329
};
2430

2531
for (auto input : INPUTS) {
2632
double x = input.x;
2733
double y = input.y;
28-
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Pow, input,
29-
LIBC_NAMESPACE::pow(x, y), 0.5);
34+
EXPECT_MPFR_MATCH(mpfr::Operation::Pow, input, LIBC_NAMESPACE::pow(x, y),
35+
1.5);
3036
}
3137
}
3238

0 commit comments

Comments
 (0)