Skip to content

Commit 131dda9

Browse files
committed
[libc] Implement sincosf function correctly rounded to all rounding modes.
Refactor common range reductions and evaluations for sinf, cosf, and sincosf. Added exhaustive tests for sincosf. Performance before the patch: ``` System LIBC reciprocal throughput : 30.205 LIBC reciprocal throughput : 30.533 System LIBC latency : 67.961 LIBC latency : 61.564 ``` Performance after the patch: ``` System LIBC reciprocal throughput : 30.409 LIBC reciprocal throughput : 20.273 System LIBC latency : 67.527 LIBC latency : 61.959 ``` Reviewed By: orex Differential Revision: https://reviews.llvm.org/D130901
1 parent 19bb535 commit 131dda9

File tree

13 files changed

+508
-405
lines changed

13 files changed

+508
-405
lines changed

libc/docs/math.rst

+3-1
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ log10 |check|
166166
log1p |check|
167167
log2 |check|
168168
sin |check| large
169-
sincos 0.776 ULPs large
169+
sincos |check| large
170170
sinh |check|
171171
sqrt |check| |check| |check|
172172
tanh |check|
@@ -232,6 +232,8 @@ Performance
232232
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
233233
| sinf | 13 | 25 | 54 | 57 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
234234
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
235+
| sincosf | 20 | 30 | 62 | 68 | :math:`[-\pi, \pi]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
236+
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
235237
| sinhf | 23 | 64 | 73 | 141 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |
236238
+--------------+-----------+-------------------+-----------+-------------------+-------------------------------------+------------+-------------------------+--------------+---------------+
237239
| tanhf | 25 | 59 | 95 | 125 | :math:`[-10, 10]` | Ryzen 1700 | Ubuntu 20.04 LTS x86_64 | Clang 12.0.0 | FMA |

libc/src/math/generic/CMakeLists.txt

+24-12
Original file line numberDiff line numberDiff line change
@@ -46,14 +46,26 @@ add_object_library(
4646
libc.src.errno.errno
4747
)
4848

49-
add_object_library(
49+
add_header_library(
50+
range_reduction
51+
HDRS
52+
range_reduction.h
53+
range_reduction_fma.h
54+
DEPENDS
55+
libc.src.__support.FPUtil.fputil
56+
libc.src.__support.FPUtil.fma
57+
libc.src.__support.FPUtil.multiply_add
58+
libc.src.__support.FPUtil.nearest_integer
59+
)
60+
61+
add_header_library(
5062
sincosf_utils
5163
HDRS
5264
sincosf_utils.h
53-
SRCS
54-
sincosf_data.cpp
5565
DEPENDS
56-
.math_utils
66+
.range_reduction
67+
libc.src.__support.FPUtil.fputil
68+
libc.src.__support.FPUtil.polyeval
5769
)
5870

5971
add_entrypoint_object(
@@ -62,16 +74,13 @@ add_entrypoint_object(
6274
cosf.cpp
6375
HDRS
6476
../cosf.h
65-
range_reduction.h
66-
range_reduction_fma.h
6777
DEPENDS
68-
.common_constants
78+
.sincosf_utils
6979
libc.include.math
7080
libc.src.errno.errno
7181
libc.src.__support.FPUtil.fputil
7282
libc.src.__support.FPUtil.fma
7383
libc.src.__support.FPUtil.multiply_add
74-
libc.src.__support.FPUtil.nearest_integer
7584
libc.src.__support.FPUtil.polyeval
7685
COMPILE_OPTIONS
7786
-O3
@@ -83,16 +92,14 @@ add_entrypoint_object(
8392
sinf.cpp
8493
HDRS
8594
../sinf.h
86-
range_reduction.h
87-
range_reduction_fma.h
8895
DEPENDS
89-
.common_constants
96+
.range_reduction
97+
.sincosf_utils
9098
libc.include.math
9199
libc.src.errno.errno
92100
libc.src.__support.FPUtil.fputil
93101
libc.src.__support.FPUtil.fma
94102
libc.src.__support.FPUtil.multiply_add
95-
libc.src.__support.FPUtil.nearest_integer
96103
libc.src.__support.FPUtil.polyeval
97104
COMPILE_OPTIONS
98105
-O3
@@ -105,9 +112,14 @@ add_entrypoint_object(
105112
HDRS
106113
../sincosf.h
107114
DEPENDS
115+
.range_reduction
108116
.sincosf_utils
109117
libc.include.math
110118
libc.src.errno.errno
119+
libc.src.__support.FPUtil.fputil
120+
libc.src.__support.FPUtil.fma
121+
libc.src.__support.FPUtil.multiply_add
122+
libc.src.__support.FPUtil.polyeval
111123
COMPILE_OPTIONS
112124
-O3
113125
)

libc/src/math/generic/common_constants.cpp

-17
Original file line numberDiff line numberDiff line change
@@ -240,21 +240,4 @@ const double EXP_2_POW[EXP_num_p] = {
240240
0x1.13821818624b4p-2, 0x1.2ff6b54d8a89cp-2, 0x1.4d0ad5a753e07p-2,
241241
0x1.6ac1f752150a5p-2, 0x1.891fac0e95613p-2};
242242

243-
// Lookup table for sin(k * pi / 16) with k = 0, ..., 31.
244-
// Table is generated with Sollya as follow:
245-
// > display = hexadecimal;
246-
// > for k from 0 to 31 do { D(sin(k * pi/16)); };
247-
const double SIN_K_PI_OVER_16[32] = {
248-
0x0.0000000000000p+0, 0x1.8f8b83c69a60bp-3, 0x1.87de2a6aea963p-2,
249-
0x1.1c73b39ae68c8p-1, 0x1.6a09e667f3bcdp-1, 0x1.a9b66290ea1a3p-1,
250-
0x1.d906bcf328d46p-1, 0x1.f6297cff75cb0p-1, 0x1.0000000000000p+0,
251-
0x1.f6297cff75cb0p-1, 0x1.d906bcf328d46p-1, 0x1.a9b66290ea1a3p-1,
252-
0x1.6a09e667f3bcdp-1, 0x1.1c73b39ae68c8p-1, 0x1.87de2a6aea963p-2,
253-
0x1.8f8b83c69a60bp-3, 0x0.0000000000000p+0, -0x1.8f8b83c69a60bp-3,
254-
-0x1.87de2a6aea963p-2, -0x1.1c73b39ae68c8p-1, -0x1.6a09e667f3bcdp-1,
255-
-0x1.a9b66290ea1a3p-1, -0x1.d906bcf328d46p-1, -0x1.f6297cff75cb0p-1,
256-
-0x1.0000000000000p+0, -0x1.f6297cff75cb0p-1, -0x1.d906bcf328d46p-1,
257-
-0x1.a9b66290ea1a3p-1, -0x1.6a09e667f3bcdp-1, -0x1.1c73b39ae68c8p-1,
258-
-0x1.87de2a6aea963p-2, -0x1.8f8b83c69a60bp-3};
259-
260243
} // namespace __llvm_libc

libc/src/math/generic/common_constants.h

+1-6
Original file line numberDiff line numberDiff line change
@@ -31,18 +31,13 @@ extern const double EXP_M1[195];
3131
// > for i from 0 to 127 do { D(exp(i / 128)); };
3232
extern const double EXP_M2[128];
3333

34-
// Lookup table for sin(k * pi / 16) with k = 0, ..., 31.
35-
// Table is generated with Sollya as follow:
36-
// > display = hexadecimal;
37-
// > for k from 0 to 31 do { D(sin(k * pi/16)); };
38-
extern const double SIN_K_PI_OVER_16[32];
39-
4034
static constexpr int EXP_bits_p = 5;
4135
static constexpr int EXP_num_p = 1 << EXP_bits_p;
4236

4337
// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27]
4438
// printf("%.13a,\n", d[i]);
4539
extern const double EXP_2_POW[EXP_num_p];
40+
4641
} // namespace __llvm_libc
4742

4843
#endif // LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H

libc/src/math/generic/cosf.cpp

+13-60
Original file line numberDiff line numberDiff line change
@@ -7,31 +7,16 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "src/math/cosf.h"
10-
#include "common_constants.h"
10+
#include "sincosf_utils.h"
1111
#include "src/__support/FPUtil/BasicOperations.h"
1212
#include "src/__support/FPUtil/FEnvImpl.h"
1313
#include "src/__support/FPUtil/FPBits.h"
14-
#include "src/__support/FPUtil/PolyEval.h"
1514
#include "src/__support/FPUtil/except_value_utils.h"
1615
#include "src/__support/FPUtil/multiply_add.h"
1716
#include "src/__support/common.h"
1817

1918
#include <errno.h>
2019

21-
#if defined(LIBC_TARGET_HAS_FMA)
22-
#include "range_reduction_fma.h"
23-
// using namespace __llvm_libc::fma;
24-
using __llvm_libc::fma::FAST_PASS_BOUND;
25-
using __llvm_libc::fma::large_range_reduction;
26-
using __llvm_libc::fma::small_range_reduction;
27-
#else
28-
#include "range_reduction.h"
29-
// using namespace __llvm_libc::generic;
30-
using __llvm_libc::generic::FAST_PASS_BOUND;
31-
using __llvm_libc::generic::large_range_reduction;
32-
using __llvm_libc::generic::small_range_reduction;
33-
#endif
34-
3520
namespace __llvm_libc {
3621

3722
// Exceptional cases for cosf.
@@ -132,58 +117,26 @@ LLVM_LIBC_FUNCTION(float, cosf, (float x)) {
132117
return result;
133118
}
134119

135-
// TODO(lntue): refactor range reduction and most of polynomial approximation
136-
// to share between sinf, cosf, and sincosf.
137-
int k;
138-
double y;
139-
140-
if (likely(x_abs < FAST_PASS_BOUND)) {
141-
k = small_range_reduction(xd, y);
142-
} else {
143-
// x is inf or nan.
144-
if (unlikely(x_abs >= 0x7f80'0000U)) {
145-
if (x_abs == 0x7f80'0000U) {
146-
errno = EDOM;
147-
fputil::set_except(FE_INVALID);
148-
}
149-
return x +
150-
FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
120+
// x is inf or nan.
121+
if (unlikely(x_abs >= 0x7f80'0000U)) {
122+
if (x_abs == 0x7f80'0000U) {
123+
errno = EDOM;
124+
fputil::set_except(FE_INVALID);
151125
}
152-
153-
k = large_range_reduction(xd, xbits.get_exponent(), y);
126+
return x +
127+
FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1));
154128
}
155129

156-
// After range reduction, k = round(x * 16 / pi) and y = (x * 16 / pi) - k.
157-
// So k is an integer and -0.5 <= y <= 0.5.
158-
// Then cos(x) = cos((k + y)*pi/16)
159-
// = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
160-
161-
double ysq = y * y;
162-
163-
// Degree-6 minimax even polynomial for sin(y*pi/16)/y generated by Sollya
164-
// with:
165-
// > Q = fpminimax(sin(y*pi/16)/y, [|0, 2, 4, 6|], [|D...|], [0, 0.5]);
166-
double sin_y =
167-
y * fputil::polyeval(ysq, 0x1.921fb54442d17p-3, -0x1.4abbce6256adp-10,
168-
0x1.466bc5a5ac6b3p-19, -0x1.32bdcb4207562p-29);
169-
// Degree-8 minimax even polynomial for cos(y*pi/16) generated by Sollya with:
170-
// > P = fpminimax(cos(x*pi/16), [|0, 2, 4, 6, 8|], [|1, D...|], [0, 0.5]);
171-
// Note that cosm1_y = cos(y*pi/16) - 1.
172-
double cosm1_y =
173-
ysq * fputil::polyeval(ysq, -0x1.3bd3cc9be45dcp-6, 0x1.03c1f081b08ap-14,
174-
-0x1.55d3c6fb0fb6ep-24, 0x1.e1d3d60f58873p-35);
175-
176-
double sin_k = -SIN_K_PI_OVER_16[k & 31];
177-
// cos(k * pi/16) = sin(k * pi/16 + pi/2) = sin((k + 8) * pi/16).
178-
// cos_k = y * cos(k * pi/16)
179-
double cos_k = SIN_K_PI_OVER_16[(k + 8) & 31];
180-
181130
// Combine the results with the sine of sum formula:
182131
// cos(x) = cos((k + y)*pi/16)
183132
// = cos(y*pi/16) * cos(k*pi/16) - sin(y*pi/16) * sin(k*pi/16)
184133
// = cosm1_y * cos_k + sin_y * sin_k
185134
// = (cosm1_y * cos_k + cos_k) + sin_y * sin_k
186-
return fputil::multiply_add(sin_y, sin_k,
135+
double sin_k, cos_k, sin_y, cosm1_y;
136+
137+
sincosf_eval(xd, x_abs, sin_k, cos_k, sin_y, cosm1_y);
138+
139+
return fputil::multiply_add(sin_y, -sin_k,
187140
fputil::multiply_add(cosm1_y, cos_k, cos_k));
188141
}
189142

0 commit comments

Comments
 (0)