Skip to content

Commit bc7a3bd

Browse files
authored
[libc][math] Implement powf function correctly rounded to all rounding modes. (#71188)
We compute `pow(x, y)` using the formula ``` pow(x, y) = x^y = 2^(y * log2(x)) ``` We follow similar steps as in `log2f(x)` and `exp2f(x)`, by breaking down into `hi + mid + lo` parts, in which `hi` parts are computed using the exponent field directly, `mid` parts will use look-up tables, and `lo` parts are approximated by polynomials. We add some speedup for common use-cases: ``` pow(2, y) = exp2(y) pow(10, y) = exp10(y) pow(x, 2) = x * x pow(x, 1/2) = sqrt(x) pow(x, -1/2) = rsqrt(x) - to be added ```
1 parent 0156b6e commit bc7a3bd

23 files changed

+1665
-325
lines changed

libc/config/darwin/arm/entrypoints.txt

+1
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ set(TARGET_LIBM_ENTRYPOINTS
199199
libc.src.math.nextafter
200200
libc.src.math.nextafterf
201201
libc.src.math.nextafterl
202+
libc.src.math.powf
202203
libc.src.math.remainderf
203204
libc.src.math.remainder
204205
libc.src.math.remainderl

libc/config/linux/aarch64/entrypoints.txt

+1
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,7 @@ set(TARGET_LIBM_ENTRYPOINTS
316316
libc.src.math.nextafter
317317
libc.src.math.nextafterf
318318
libc.src.math.nextafterl
319+
libc.src.math.powf
319320
libc.src.math.remainderf
320321
libc.src.math.remainder
321322
libc.src.math.remainderl

libc/config/linux/riscv/entrypoints.txt

+1
Original file line numberDiff line numberDiff line change
@@ -325,6 +325,7 @@ set(TARGET_LIBM_ENTRYPOINTS
325325
libc.src.math.nextafter
326326
libc.src.math.nextafterf
327327
libc.src.math.nextafterl
328+
libc.src.math.powf
328329
libc.src.math.remainderf
329330
libc.src.math.remainder
330331
libc.src.math.remainderl

libc/config/linux/x86_64/entrypoints.txt

+1
Original file line numberDiff line numberDiff line change
@@ -329,6 +329,7 @@ set(TARGET_LIBM_ENTRYPOINTS
329329
libc.src.math.nextafter
330330
libc.src.math.nextafterf
331331
libc.src.math.nextafterl
332+
libc.src.math.powf
332333
libc.src.math.remainderf
333334
libc.src.math.remainder
334335
libc.src.math.remainderl

libc/config/windows/entrypoints.txt

+1
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ set(TARGET_LIBM_ENTRYPOINTS
198198
libc.src.math.nextafter
199199
libc.src.math.nextafterf
200200
libc.src.math.nextafterl
201+
libc.src.math.powf
201202
libc.src.math.remainderf
202203
libc.src.math.remainder
203204
libc.src.math.remainderl

libc/docs/math/index.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,7 @@ Higher Math Functions
420420
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
421421
| pow | | | | | | | | | | | | |
422422
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
423-
| powf | | | | | | | | | | | | |
423+
| powf | |check| | |check| | | |check| | |check| | | | |check| | | | | |
424424
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
425425
| powl | | | | | | | | | | | | |
426426
+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
@@ -493,6 +493,7 @@ log |check| |check|
493493
log10 |check| |check|
494494
log1p |check| |check|
495495
log2 |check| |check|
496+
pow |check|
496497
sin |check| large
497498
sincos |check| large
498499
sinh |check|

libc/spec/stdc.td

+1
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ def StdC : StandardSpec<"stdc"> {
491491
FunctionSpec<"nextafter", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
492492
FunctionSpec<"nextafterl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<LongDoubleType>]>,
493493

494+
FunctionSpec<"powf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<FloatType>]>,
494495
FunctionSpec<"pow", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<DoubleType>]>,
495496

496497
FunctionSpec<"coshf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

libc/src/math/generic/CMakeLists.txt

+60-10
Original file line numberDiff line numberDiff line change
@@ -625,12 +625,10 @@ add_entrypoint_object(
625625
-O3
626626
)
627627

628-
add_entrypoint_object(
629-
exp2f
630-
SRCS
631-
exp2f.cpp
628+
add_header_library(
629+
exp2f_impl
632630
HDRS
633-
../exp2f.h
631+
exp2f_impl.h
634632
DEPENDS
635633
.explogxf
636634
libc.src.__support.FPUtil.except_value_utils
@@ -641,9 +639,20 @@ add_entrypoint_object(
641639
libc.src.__support.FPUtil.polyeval
642640
libc.src.__support.FPUtil.rounding_mode
643641
libc.src.__support.macros.optimization
642+
libc.src.__support.common
644643
libc.include.errno
645644
libc.src.errno.errno
646645
libc.include.math
646+
)
647+
648+
add_entrypoint_object(
649+
exp2f
650+
SRCS
651+
exp2f.cpp
652+
HDRS
653+
../exp2f.h
654+
DEPENDS
655+
.exp2f_impl
647656
COMPILE_OPTIONS
648657
-O3
649658
)
@@ -675,12 +684,10 @@ add_entrypoint_object(
675684
-O3
676685
)
677686

678-
add_entrypoint_object(
679-
exp10f
680-
SRCS
681-
exp10f.cpp
687+
add_header_library(
688+
exp10f_impl
682689
HDRS
683-
../exp10f.h
690+
exp10f_impl.h
684691
DEPENDS
685692
.explogxf
686693
libc.src.__support.FPUtil.fenv_impl
@@ -690,13 +697,26 @@ add_entrypoint_object(
690697
libc.src.__support.FPUtil.polyeval
691698
libc.src.__support.FPUtil.rounding_mode
692699
libc.src.__support.macros.optimization
700+
libc.src.__support.common
693701
libc.include.errno
694702
libc.src.errno.errno
695703
libc.include.math
696704
COMPILE_OPTIONS
697705
-O3
698706
)
699707

708+
add_entrypoint_object(
709+
exp10f
710+
SRCS
711+
exp10f.cpp
712+
HDRS
713+
../exp10f.h
714+
DEPENDS
715+
.exp10f_impl
716+
COMPILE_OPTIONS
717+
-O3
718+
)
719+
700720
add_entrypoint_object(
701721
expm1
702722
SRCS
@@ -747,6 +767,36 @@ add_entrypoint_object(
747767
-O3
748768
)
749769

770+
add_entrypoint_object(
771+
powf
772+
SRCS
773+
powf.cpp
774+
HDRS
775+
../powf.h
776+
DEPENDS
777+
.common_constants
778+
.explogxf
779+
.exp2f_impl
780+
.exp10f_impl
781+
libc.src.__support.builtin_wrappers
782+
libc.src.__support.CPP.bit
783+
libc.src.__support.CPP.optional
784+
libc.src.__support.FPUtil.fenv_impl
785+
libc.src.__support.FPUtil.fp_bits
786+
libc.src.__support.FPUtil.multiply_add
787+
libc.src.__support.FPUtil.nearest_integer
788+
libc.src.__support.FPUtil.polyeval
789+
libc.src.__support.FPUtil.rounding_mode
790+
libc.src.__support.FPUtil.sqrt
791+
libc.src.__support.FPUtil.triple_double
792+
libc.src.__support.macros.optimization
793+
libc.include.errno
794+
libc.src.errno.errno
795+
libc.include.math
796+
COMPILE_OPTIONS
797+
-O3
798+
)
799+
750800
add_entrypoint_object(
751801
copysign
752802
SRCS

libc/src/math/generic/common_constants.cpp

+87
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,51 @@ alignas(64) const double LOG_R[128] = {
227227
0x1.5707a26bb8c66p-1, 0x1.5af405c3649ep-1, 0x1.5af405c3649ep-1,
228228
0x1.5ee82aa24192p-1, 0x0.000000000000p0};
229229

230+
alignas(64) const double LOG2_R[128] = {
231+
0x0.0000000000000p+0, 0x1.72c7ba20f7327p-7, 0x1.743ee861f3556p-6,
232+
0x1.184b8e4c56af8p-5, 0x1.77394c9d958d5p-5, 0x1.d6ebd1f1febfep-5,
233+
0x1.1bb32a600549dp-4, 0x1.4c560fe68af88p-4, 0x1.7d60496cfbb4cp-4,
234+
0x1.960caf9abb7cap-4, 0x1.c7b528b70f1c5p-4, 0x1.f9c95dc1d1165p-4,
235+
0x1.097e38ce60649p-3, 0x1.22dadc2ab3497p-3, 0x1.3c6fb650cde51p-3,
236+
0x1.494f863b8df35p-3, 0x1.633a8bf437ce1p-3, 0x1.7046031c79f85p-3,
237+
0x1.8a8980abfbd32p-3, 0x1.97c1cb13c7ec1p-3, 0x1.b2602497d5346p-3,
238+
0x1.bfc67a7fff4ccp-3, 0x1.dac22d3e441d3p-3, 0x1.e857d3d361368p-3,
239+
0x1.01d9bbcfa61d4p-2, 0x1.08bce0d95fa38p-2, 0x1.169c05363f158p-2,
240+
0x1.1d982c9d52708p-2, 0x1.249cd2b13cd6cp-2, 0x1.32bfee370ee68p-2,
241+
0x1.39de8e1559f6fp-2, 0x1.4106017c3eca3p-2, 0x1.4f6fbb2cec598p-2,
242+
0x1.56b22e6b578e5p-2, 0x1.5dfdcf1eeae0ep-2, 0x1.6552b49986277p-2,
243+
0x1.6cb0f6865c8eap-2, 0x1.7b89f02cf2aadp-2, 0x1.8304d90c11fd3p-2,
244+
0x1.8a8980abfbd32p-2, 0x1.921800924dd3bp-2, 0x1.99b072a96c6b2p-2,
245+
0x1.a8ff971810a5ep-2, 0x1.b0b67f4f4681p-2, 0x1.b877c57b1b07p-2,
246+
0x1.c043859e2fdb3p-2, 0x1.c819dc2d45fe4p-2, 0x1.cffae611ad12bp-2,
247+
0x1.d7e6c0abc3579p-2, 0x1.dfdd89d586e2bp-2, 0x1.e7df5fe538ab3p-2,
248+
0x1.efec61b011f85p-2, 0x1.f804ae8d0cd02p-2, 0x1.0014332be0033p-1,
249+
0x1.042bd4b9a7c99p-1, 0x1.08494c66b8efp-1, 0x1.0c6caaf0c5597p-1,
250+
0x1.1096015dee4dap-1, 0x1.14c560fe68af9p-1, 0x1.18fadb6e2d3c2p-1,
251+
0x1.1d368296b5255p-1, 0x1.217868b0c37e8p-1, 0x1.25c0a0463bebp-1,
252+
0x1.2a0f3c340705cp-1, 0x1.2e644fac04fd8p-1, 0x1.2e644fac04fd8p-1,
253+
0x1.32bfee370ee68p-1, 0x1.37222bb70747cp-1, 0x1.3b8b1c68fa6edp-1,
254+
0x1.3ffad4e74f1d6p-1, 0x1.44716a2c08262p-1, 0x1.44716a2c08262p-1,
255+
0x1.48eef19317991p-1, 0x1.4d7380dcc422dp-1, 0x1.51ff2e30214bcp-1,
256+
0x1.5692101d9b4a6p-1, 0x1.5b2c3da19723bp-1, 0x1.5b2c3da19723bp-1,
257+
0x1.5fcdce2727ddbp-1, 0x1.6476d98ad990ap-1, 0x1.6927781d932a8p-1,
258+
0x1.6927781d932a8p-1, 0x1.6ddfc2a78fc63p-1, 0x1.729fd26b707c8p-1,
259+
0x1.7767c12967a45p-1, 0x1.7767c12967a45p-1, 0x1.7c37a9227e7fbp-1,
260+
0x1.810fa51bf65fdp-1, 0x1.810fa51bf65fdp-1, 0x1.85efd062c656dp-1,
261+
0x1.8ad846cf369a4p-1, 0x1.8ad846cf369a4p-1, 0x1.8fc924c89ac84p-1,
262+
0x1.94c287492c4dbp-1, 0x1.94c287492c4dbp-1, 0x1.99c48be2063c8p-1,
263+
0x1.9ecf50bf43f13p-1, 0x1.9ecf50bf43f13p-1, 0x1.a3e2f4ac43f6p-1,
264+
0x1.a8ff971810a5ep-1, 0x1.a8ff971810a5ep-1, 0x1.ae255819f022dp-1,
265+
0x1.b35458761d479p-1, 0x1.b35458761d479p-1, 0x1.b88cb9a2ab521p-1,
266+
0x1.b88cb9a2ab521p-1, 0x1.bdce9dcc96187p-1, 0x1.c31a27dd00b4ap-1,
267+
0x1.c31a27dd00b4ap-1, 0x1.c86f7b7ea4a89p-1, 0x1.c86f7b7ea4a89p-1,
268+
0x1.cdcebd2373995p-1, 0x1.d338120a6dd9dp-1, 0x1.d338120a6dd9dp-1,
269+
0x1.d8aba045b01c8p-1, 0x1.d8aba045b01c8p-1, 0x1.de298ec0bac0dp-1,
270+
0x1.de298ec0bac0dp-1, 0x1.e3b20546f554ap-1, 0x1.e3b20546f554ap-1,
271+
0x1.e9452c8a71028p-1, 0x1.e9452c8a71028p-1, 0x1.eee32e2aeccbfp-1,
272+
0x1.eee32e2aeccbfp-1, 0x1.f48c34bd1e96fp-1, 0x1.f48c34bd1e96fp-1,
273+
0x1.fa406bd2443dfp-1, 0x1.0000000000000p0};
274+
230275
// Generated by Sollya with:
231276
// for i from 0 to 127 do {
232277
// r = 2^-8 * ceil( 2^8 * (1 - 2^(-8)) / (1 + i*2^-7) );
@@ -395,6 +440,48 @@ alignas(64) const int S2[193] = {
395440
-0x1cd, -0x1d1, -0x1d5, -0x1d9, -0x1dd, -0x1e0, -0x1e4, -0x1e8, -0x1ec,
396441
-0x1f0, -0x1f4, -0x1f8, -0x1fc};
397442

443+
alignas(64) const double R2[193] = {
444+
0x1.0101p0, 0x1.00fdp0, 0x1.00f9p0, 0x1.00f5p0, 0x1.00f1p0,
445+
0x1.00edp0, 0x1.00e9p0, 0x1.00e5p0, 0x1.00e1p0, 0x1.00ddp0,
446+
0x1.00d9p0, 0x1.00d5p0, 0x1.00d1p0, 0x1.00cdp0, 0x1.00c9p0,
447+
0x1.00c5p0, 0x1.00c1p0, 0x1.00bdp0, 0x1.00b9p0, 0x1.00b4p0,
448+
0x1.00bp0, 0x1.00acp0, 0x1.00a8p0, 0x1.00a4p0, 0x1.00ap0,
449+
0x1.009cp0, 0x1.0098p0, 0x1.0094p0, 0x1.009p0, 0x1.008cp0,
450+
0x1.0088p0, 0x1.0084p0, 0x1.008p0, 0x1.007cp0, 0x1.0078p0,
451+
0x1.0074p0, 0x1.007p0, 0x1.006cp0, 0x1.0068p0, 0x1.0064p0,
452+
0x1.006p0, 0x1.005cp0, 0x1.0058p0, 0x1.0054p0, 0x1.005p0,
453+
0x1.004cp0, 0x1.0048p0, 0x1.0044p0, 0x1.004p0, 0x1.003cp0,
454+
0x1.0038p0, 0x1.0034p0, 0x1.003p0, 0x1.002cp0, 0x1.0028p0,
455+
0x1.0024p0, 0x1.002p0, 0x1.001cp0, 0x1.0018p0, 0x1.0014p0,
456+
0x1.001p0, 0x1.000cp0, 0x1.0008p0, 0x1.0004p0, 0x1p0,
457+
0x1.fff8p-1, 0x1.fffp-1, 0x1.ffe8p-1, 0x1.ffep-1, 0x1.ffd8p-1,
458+
0x1.ffdp-1, 0x1.ffc8p-1, 0x1.ffcp-1, 0x1.ffb8p-1, 0x1.ffbp-1,
459+
0x1.ffa8p-1, 0x1.ffap-1, 0x1.ff98p-1, 0x1.ff9p-1, 0x1.ff88p-1,
460+
0x1.ff8p-1, 0x1.ff78p-1, 0x1.ff7p-1, 0x1.ff68p-1, 0x1.ff6p-1,
461+
0x1.ff58p-1, 0x1.ff5p-1, 0x1.ff48p-1, 0x1.ff4p-1, 0x1.ff38p-1,
462+
0x1.ff3p-1, 0x1.ff28p-1, 0x1.ff2p-1, 0x1.ff18p-1, 0x1.ff1p-1,
463+
0x1.ff08p-1, 0x1.ffp-1, 0x1.fef8p-1, 0x1.fefp-1, 0x1.fee8p-1,
464+
0x1.feep-1, 0x1.fed8p-1, 0x1.fedp-1, 0x1.fec8p-1, 0x1.fecp-1,
465+
0x1.feb8p-1, 0x1.febp-1, 0x1.fea8p-1, 0x1.feap-1, 0x1.fe98p-1,
466+
0x1.fe92p-1, 0x1.fe8ap-1, 0x1.fe82p-1, 0x1.fe7ap-1, 0x1.fe72p-1,
467+
0x1.fe6ap-1, 0x1.fe62p-1, 0x1.fe5ap-1, 0x1.fe52p-1, 0x1.fe4ap-1,
468+
0x1.fe42p-1, 0x1.fe3ap-1, 0x1.fe32p-1, 0x1.fe2ap-1, 0x1.fe22p-1,
469+
0x1.fe1ap-1, 0x1.fe12p-1, 0x1.fe0ap-1, 0x1.fe02p-1, 0x1.fdfap-1,
470+
0x1.fdf2p-1, 0x1.fdeap-1, 0x1.fde2p-1, 0x1.fddap-1, 0x1.fdd2p-1,
471+
0x1.fdcap-1, 0x1.fdc2p-1, 0x1.fdbap-1, 0x1.fdb2p-1, 0x1.fdaap-1,
472+
0x1.fda2p-1, 0x1.fd9ap-1, 0x1.fd92p-1, 0x1.fd8cp-1, 0x1.fd84p-1,
473+
0x1.fd7cp-1, 0x1.fd74p-1, 0x1.fd6cp-1, 0x1.fd64p-1, 0x1.fd5cp-1,
474+
0x1.fd54p-1, 0x1.fd4cp-1, 0x1.fd44p-1, 0x1.fd3cp-1, 0x1.fd34p-1,
475+
0x1.fd2cp-1, 0x1.fd24p-1, 0x1.fd1cp-1, 0x1.fd14p-1, 0x1.fd0cp-1,
476+
0x1.fd04p-1, 0x1.fcfcp-1, 0x1.fcf4p-1, 0x1.fcecp-1, 0x1.fce4p-1,
477+
0x1.fcdcp-1, 0x1.fcd6p-1, 0x1.fccep-1, 0x1.fcc6p-1, 0x1.fcbep-1,
478+
0x1.fcb6p-1, 0x1.fcaep-1, 0x1.fca6p-1, 0x1.fc9ep-1, 0x1.fc96p-1,
479+
0x1.fc8ep-1, 0x1.fc86p-1, 0x1.fc7ep-1, 0x1.fc76p-1, 0x1.fc6ep-1,
480+
0x1.fc66p-1, 0x1.fc5ep-1, 0x1.fc56p-1, 0x1.fc4ep-1, 0x1.fc46p-1,
481+
0x1.fc4p-1, 0x1.fc38p-1, 0x1.fc3p-1, 0x1.fc28p-1, 0x1.fc2p-1,
482+
0x1.fc18p-1, 0x1.fc1p-1, 0x1.fc08p-1,
483+
};
484+
398485
// Logarithm range reduction - Step 3:
399486
// r(k) = 2^-21 round(2^21 / (1 + k*2^-21)) for k = -80 .. 80.
400487
// Output range:

libc/src/math/generic/common_constants.h

+5
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ extern const double CD[128];
3434
extern const double LOG_R[128];
3535
extern const NumberPair<double> LOG_R_DD[128];
3636

37+
// Lookup table for -log2(r)
38+
extern const double LOG2_R[128];
39+
3740
// Minimax polynomial for (log(1 + x) - x)/x^2, generated by sollya with:
3841
// > P = fpminimax((log(1 + x) - x)/x^2, 5, [|D...|], [-2^-8, 2^-7]);
3942
constexpr double LOG_COEFFS[6] = {-0x1.fffffffffffffp-2, 0x1.5555555554a9bp-2,
@@ -45,6 +48,8 @@ extern const int S2[193];
4548
extern const int S3[161];
4649
extern const int S4[130];
4750

51+
extern const double R2[193];
52+
4853
// log(2) generated by Sollya with:
4954
// > a = 2^-43 * nearestint(2^43*log(2));
5055
// LSB = 2^-43 is chosen so that e_x * LOG_2_HI is exact for -1075 < e_x < 1024.

0 commit comments

Comments
 (0)