@@ -358,42 +358,64 @@ LLVM_LIBC_FUNCTION(double, pow, (double x, double y)) {
358
358
// Then m_x = (1 + dx) / r, and
359
359
// log2(m_x) = log2( (1 + dx) / r )
360
360
// = log2(1 + dx) - log2(r).
361
- // Perform exact range reduction
361
+
362
+ // In order for the overall computations x^y = 2^(y * log2(x)) to have the
363
+ // relative errors < 2^-52 (1ULP), we will need to evaluate the exponent part
364
+ // y * log2(x) with absolute errors < 2^-52 (or better, 2^-53). Since the
365
+ // whole exponent range for double precision is bounded by
366
+ // |y * log2(x)| < 1076 ~ 2^10, we need to evaluate log2(x) with absolute
367
+ // errors < 2^-53 * 2^-10 = 2^-63.
368
+
369
+ // With that requirement, we use the following degree-6 polynomial
370
+ // approximation:
371
+ // P(dx) ~ log2(1 + dx) / dx
372
+ // Generated by Sollya with:
373
+ // > P = fpminimax(log2(1 + x)/x, 6, [|D...|], [-2^-8, 2^-7]); P;
374
+ // > dirtyinfnorm(log2(1 + x) - x*P, [-2^-8, 2^-7]);
375
+ // 0x1.d03cc...p-66
376
+ constexpr double COEFFS[] = {0x1 .71547652b82fep0, -0x1 .71547652b82e7p-1 ,
377
+ 0x1 .ec709dc3b1fd5p -2 , -0x1 .7154766124215p-2 ,
378
+ 0x1 .2776bd90259d8p-2 , -0x1 .ec586c6f3d311p -3 ,
379
+ 0x1 .9c4775eccf524p-3 };
380
+ // Error: ulp(dx^2) <= (2^-7)^2 * 2^-52 = 2^-66
381
+ // Extra errors from various computations and rounding directions, the overall
382
+ // errors we can be bounded by 2^-65.
383
+
362
384
double dx;
385
+ DoubleDouble dx_c0;
386
+
387
+ // Perform exact range reduction and exact product dx * c0.
363
388
#ifdef LIBC_TARGET_CPU_HAS_FMA
364
389
dx = fputil::multiply_add (RD[idx_x], m_x.get_val (), -1.0 ); // Exact
390
+ dx_c0 = fputil::exact_mult (COEFFS[0 ], dx);
365
391
#else
366
392
double c = FPBits (m_x.uintval () & 0x3fff'e000'0000'0000 ).get_val ();
367
393
dx = fputil::multiply_add (RD[idx_x], m_x.get_val () - c, CD[idx_x]); // Exact
394
+ dx_c0 = fputil::exact_mult<true >(COEFFS[0 ], dx);
368
395
#endif // LIBC_TARGET_CPU_HAS_FMA
369
396
370
- // Degree-5 polynomial approximation:
371
- // dx * P(dx) ~ log2(1 + dx)
372
- // Generated by Sollya with:
373
- // > P = fpminimax(log2(1 + x)/x, 5, [|D...|], [-2^-8, 2^-7]);
374
- // > dirtyinfnorm(log2(1 + x)/x - P, [-2^-8, 2^-7]);
375
- // 0x1.653...p-52
376
- constexpr double COEFFS[] = {0x1 .71547652b82fep0, -0x1 .71547652b7a07p-1 ,
377
- 0x1 .ec709dc458db1p -2 , -0x1 .715479c2266c9p-2 ,
378
- 0x1 .2776ae1ddf8fp-2 , -0x1 .e7b2178870157p -3 };
379
-
380
- double dx2 = dx * dx; // Exact
381
- double c0 = fputil::multiply_add (dx, COEFFS[1 ], COEFFS[0 ]);
382
- double c1 = fputil::multiply_add (dx, COEFFS[3 ], COEFFS[2 ]);
383
- double c2 = fputil::multiply_add (dx, COEFFS[5 ], COEFFS[4 ]);
397
+ double dx2 = dx * dx;
398
+ double c0 = fputil::multiply_add (dx, COEFFS[2 ], COEFFS[1 ]);
399
+ double c1 = fputil::multiply_add (dx, COEFFS[4 ], COEFFS[3 ]);
400
+ double c2 = fputil::multiply_add (dx, COEFFS[6 ], COEFFS[5 ]);
384
401
385
402
double p = fputil::polyeval (dx2, c0, c1, c2);
386
403
387
404
// s = e_x - log2(r) + dx * P(dx)
388
405
// Absolute error bound:
389
- // |log2(x) - log2_x.hi - log2_x.lo| < 2^-58.
390
- // Relative error bound:
391
- // |(log2_x.hi + log2_x.lo)/log2(x) - 1| < 2^-51.
392
- double log2_x_hi = e_x + LOG2_R_DD[idx_x].hi ; // Exact
393
- // Error
394
- double log2_x_lo = fputil::multiply_add (dx, p, LOG2_R_DD[idx_x].lo );
395
-
396
- DoubleDouble log2_x = fputil::exact_add (log2_x_hi, log2_x_lo);
406
+ // |log2(x) - log2_x.hi - log2_x.lo| < 2^-65.
407
+
408
+ // Notice that e_x - log2(r).hi is exact, so we perform an exact sum of
409
+ // e_x - log2(r).hi and the high part of the product dx * c0:
410
+ // log2_x_hi.hi + log2_x_hi.lo = e_x - log2(r).hi + (dx * c0).hi
411
+ DoubleDouble log2_x_hi =
412
+ fputil::exact_add (e_x + LOG2_R_DD[idx_x].hi , dx_c0.hi );
413
+ // The low part is dx^2 * p + low part of (dx * c0) + low part of -log2(r).
414
+ double log2_x_lo =
415
+ fputil::multiply_add (dx2, p, dx_c0.lo + LOG2_R_DD[idx_x].lo );
416
+ // Perform accurate sums.
417
+ DoubleDouble log2_x = fputil::exact_add (log2_x_hi.hi , log2_x_lo);
418
+ log2_x.lo += log2_x_hi.lo ;
397
419
398
420
// To compute 2^(y * log2(x)), we break the exponent into 3 parts:
399
421
// y * log(2) = hi + mid + lo, where
0 commit comments