Skip to content

Commit c0a5677

Browse files
committed
[libclc] Update uses of fma to __clc_fma in CLC functions
1 parent b09ec4e commit c0a5677

10 files changed

+156
-122
lines changed

libclc/generic/lib/math/clc_exp10.cl

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <clc/clc.h>
2424
#include <clc/clc_convert.h>
2525
#include <clc/clcmacro.h>
26+
#include <clc/math/clc_fma.h>
2627
#include <clc/math/clc_mad.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/math.h>
@@ -123,23 +124,25 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {
123124
int j = n & 0x3f;
124125
int m = n >> 6;
125126

126-
double r =
127-
R_LN10 * fma(-R_LOG10_2_BY_64_TL, dn, fma(-R_LOG10_2_BY_64_LD, dn, x));
127+
double r = R_LN10 * __clc_fma(-R_LOG10_2_BY_64_TL, dn,
128+
__clc_fma(-R_LOG10_2_BY_64_LD, dn, x));
128129

129130
// 6 term tail of Taylor expansion of e^r
130131
double z2 =
131-
r *
132-
fma(r,
133-
fma(r,
134-
fma(r,
135-
fma(r, fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
136-
0x1.5555555555555p-5),
137-
0x1.5555555555555p-3),
138-
0x1.0000000000000p-1),
139-
1.0);
132+
r * __clc_fma(
133+
r,
134+
__clc_fma(r,
135+
__clc_fma(r,
136+
__clc_fma(r,
137+
__clc_fma(r, 0x1.6c16c16c16c17p-10,
138+
0x1.1111111111111p-7),
139+
0x1.5555555555555p-5),
140+
0x1.5555555555555p-3),
141+
0x1.0000000000000p-1),
142+
1.0);
140143

141144
double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
142-
z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
145+
z2 = __clc_fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
143146

144147
int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
145148

libclc/generic/lib/math/clc_fmod.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <clc/clcmacro.h>
2626
#include <clc/integer/clc_clz.h>
2727
#include <clc/math/clc_floor.h>
28+
#include <clc/math/clc_fma.h>
2829
#include <clc/math/clc_subnormal_config.h>
2930
#include <clc/math/clc_trunc.h>
3031
#include <clc/math/math.h>
@@ -124,7 +125,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
124125

125126
// Compute w * t in quad precision
126127
p = w * t;
127-
pp = fma(w, t, -p);
128+
pp = __clc_fma(w, t, -p);
128129

129130
// Subtract w * t from dx
130131
v = dx - p;
@@ -144,7 +145,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
144145
int todd = lt & 1;
145146

146147
p = w * t;
147-
pp = fma(w, t, -p);
148+
pp = __clc_fma(w, t, -p);
148149
v = dx - p;
149150
dx = v + (((dx - v) - p) - pp);
150151
i = dx < 0.0;

libclc/generic/lib/math/clc_hypot.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
2525
#include <clc/integer/clc_abs.h>
26+
#include <clc/math/clc_fma.h>
2627
#include <clc/math/clc_mad.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/math.h>
@@ -80,7 +81,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
8081
double ay = y * preadjust;
8182

8283
// The post adjust may overflow, but this can't be avoided in any case
83-
double r = sqrt(fma(ax, ax, ay * ay)) * postadjust;
84+
double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
8485

8586
// If the difference in exponents between x and y is large
8687
double s = x + y;

libclc/generic/lib/math/clc_pow.cl

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <clc/clc_convert.h>
2525
#include <clc/clcmacro.h>
2626
#include <clc/math/clc_fabs.h>
27+
#include <clc/math/clc_fma.h>
2728
#include <clc/math/clc_mad.h>
2829
#include <clc/math/clc_subnormal_config.h>
2930
#include <clc/math/math.h>
@@ -283,26 +284,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
283284
double log_t = tv.s1;
284285
double f_inv = (log_h + log_t) * f;
285286
double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
286-
double r2 = fma(-F, r1, f) * (log_h + log_t);
287+
double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
287288
double r = r1 + r2;
288289

289-
double poly = fma(
290-
r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
290+
double poly = __clc_fma(
291+
r,
292+
__clc_fma(r,
293+
__clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
294+
1.0 / 4.0),
291295
1.0 / 3.0);
292296
poly = poly * r * r * r;
293297

294298
double hr1r1 = 0.5 * r1 * r1;
295299
double poly0h = r1 + hr1r1;
296300
double poly0t = r1 - poly0h + hr1r1;
297-
poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
301+
poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
298302

299303
tv = USE_TABLE(powlog_tbl, index);
300304
log_h = tv.s0;
301305
log_t = tv.s1;
302306

303-
double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
307+
double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
304308
double resT = resT_t - poly0h;
305-
double resH = fma(xexp, real_log2_lead, log_h);
309+
double resH = __clc_fma(xexp, real_log2_lead, log_h);
306310
double resT_h = poly0h;
307311

308312
double H = resT + resH;
@@ -313,9 +317,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
313317
double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
314318
double y_tail = y - y_head;
315319

316-
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
317-
v = fma(y_head, H, temp);
318-
vt = fma(y_head, H, -v) + temp;
320+
double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
321+
v = __clc_fma(y_head, H, temp);
322+
vt = __clc_fma(y_head, H, -v) + temp;
319323
}
320324

321325
// Now calculate exp of (v,vt)
@@ -339,21 +343,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
339343
double f2 = tv.s1;
340344
double f = f1 + f2;
341345

342-
double r1 = fma(dn, -lnof2_by_64_head, v);
346+
double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
343347
double r2 = dn * lnof2_by_64_tail;
344348
double r = (r1 + r2) + vt;
345349

346-
double q = fma(
347-
r,
348-
fma(r,
349-
fma(r,
350-
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
351-
4.16666666662260795726e-02),
352-
1.66666666665260878863e-01),
353-
5.00000000000000008883e-01);
354-
q = fma(r * r, q, r);
355-
356-
expv = fma(f, q, f2) + f1;
350+
double q =
351+
__clc_fma(r,
352+
__clc_fma(r,
353+
__clc_fma(r,
354+
__clc_fma(r, 1.38889490863777199667e-03,
355+
8.33336798434219616221e-03),
356+
4.16666666662260795726e-02),
357+
1.66666666665260878863e-01),
358+
5.00000000000000008883e-01);
359+
q = __clc_fma(r * r, q, r);
360+
361+
expv = __clc_fma(f, q, f2) + f1;
357362
expv = ldexp(expv, m);
358363

359364
expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;

libclc/generic/lib/math/clc_pown.cl

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <clc/clc_convert.h>
2525
#include <clc/clcmacro.h>
2626
#include <clc/math/clc_fabs.h>
27+
#include <clc/math/clc_fma.h>
2728
#include <clc/math/clc_mad.h>
2829
#include <clc/math/clc_subnormal_config.h>
2930
#include <clc/math/math.h>
@@ -267,26 +268,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
267268
double log_t = tv.s1;
268269
double f_inv = (log_h + log_t) * f;
269270
double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
270-
double r2 = fma(-F, r1, f) * (log_h + log_t);
271+
double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
271272
double r = r1 + r2;
272273

273-
double poly = fma(
274-
r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
274+
double poly = __clc_fma(
275+
r,
276+
__clc_fma(r,
277+
__clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
278+
1.0 / 4.0),
275279
1.0 / 3.0);
276280
poly = poly * r * r * r;
277281

278282
double hr1r1 = 0.5 * r1 * r1;
279283
double poly0h = r1 + hr1r1;
280284
double poly0t = r1 - poly0h + hr1r1;
281-
poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
285+
poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
282286

283287
tv = USE_TABLE(powlog_tbl, index);
284288
log_h = tv.s0;
285289
log_t = tv.s1;
286290

287-
double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
291+
double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
288292
double resT = resT_t - poly0h;
289-
double resH = fma(xexp, real_log2_lead, log_h);
293+
double resH = __clc_fma(xexp, real_log2_lead, log_h);
290294
double resT_h = poly0h;
291295

292296
double H = resT + resH;
@@ -303,9 +307,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
303307
double y_tail1 = (double)nyt;
304308
y_tail = mask_2_24 ? y_tail1 : y_tail;
305309

306-
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
307-
v = fma(y_head, H, temp);
308-
vt = fma(y_head, H, -v) + temp;
310+
double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
311+
v = __clc_fma(y_head, H, temp);
312+
vt = __clc_fma(y_head, H, -v) + temp;
309313
}
310314

311315
// Now calculate exp of (v,vt)
@@ -329,21 +333,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
329333
double f2 = tv.s1;
330334
double f = f1 + f2;
331335

332-
double r1 = fma(dn, -lnof2_by_64_head, v);
336+
double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
333337
double r2 = dn * lnof2_by_64_tail;
334338
double r = (r1 + r2) + vt;
335339

336-
double q = fma(
337-
r,
338-
fma(r,
339-
fma(r,
340-
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
341-
4.16666666662260795726e-02),
342-
1.66666666665260878863e-01),
343-
5.00000000000000008883e-01);
344-
q = fma(r * r, q, r);
345-
346-
expv = fma(f, q, f2) + f1;
340+
double q =
341+
__clc_fma(r,
342+
__clc_fma(r,
343+
__clc_fma(r,
344+
__clc_fma(r, 1.38889490863777199667e-03,
345+
8.33336798434219616221e-03),
346+
4.16666666662260795726e-02),
347+
1.66666666665260878863e-01),
348+
5.00000000000000008883e-01);
349+
q = __clc_fma(r * r, q, r);
350+
351+
expv = __clc_fma(f, q, f2) + f1;
347352
expv = ldexp(expv, m);
348353

349354
expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;

libclc/generic/lib/math/clc_powr.cl

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <clc/clc_convert.h>
2525
#include <clc/clcmacro.h>
2626
#include <clc/math/clc_fabs.h>
27+
#include <clc/math/clc_fma.h>
2728
#include <clc/math/clc_mad.h>
2829
#include <clc/math/clc_subnormal_config.h>
2930
#include <clc/math/math.h>
@@ -270,26 +271,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
270271
double log_t = tv.s1;
271272
double f_inv = (log_h + log_t) * f;
272273
double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
273-
double r2 = fma(-F, r1, f) * (log_h + log_t);
274+
double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
274275
double r = r1 + r2;
275276

276-
double poly = fma(
277-
r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
277+
double poly = __clc_fma(
278+
r,
279+
__clc_fma(r,
280+
__clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
281+
1.0 / 4.0),
278282
1.0 / 3.0);
279283
poly = poly * r * r * r;
280284

281285
double hr1r1 = 0.5 * r1 * r1;
282286
double poly0h = r1 + hr1r1;
283287
double poly0t = r1 - poly0h + hr1r1;
284-
poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
288+
poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
285289

286290
tv = USE_TABLE(powlog_tbl, index);
287291
log_h = tv.s0;
288292
log_t = tv.s1;
289293

290-
double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
294+
double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
291295
double resT = resT_t - poly0h;
292-
double resH = fma(xexp, real_log2_lead, log_h);
296+
double resH = __clc_fma(xexp, real_log2_lead, log_h);
293297
double resT_h = poly0h;
294298

295299
double H = resT + resH;
@@ -300,9 +304,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
300304
double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
301305
double y_tail = y - y_head;
302306

303-
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
304-
v = fma(y_head, H, temp);
305-
vt = fma(y_head, H, -v) + temp;
307+
double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
308+
v = __clc_fma(y_head, H, temp);
309+
vt = __clc_fma(y_head, H, -v) + temp;
306310
}
307311

308312
// Now calculate exp of (v,vt)
@@ -326,21 +330,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
326330
double f2 = tv.s1;
327331
double f = f1 + f2;
328332

329-
double r1 = fma(dn, -lnof2_by_64_head, v);
333+
double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
330334
double r2 = dn * lnof2_by_64_tail;
331335
double r = (r1 + r2) + vt;
332336

333-
double q = fma(
334-
r,
335-
fma(r,
336-
fma(r,
337-
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
338-
4.16666666662260795726e-02),
339-
1.66666666665260878863e-01),
340-
5.00000000000000008883e-01);
341-
q = fma(r * r, q, r);
342-
343-
expv = fma(f, q, f2) + f1;
337+
double q =
338+
__clc_fma(r,
339+
__clc_fma(r,
340+
__clc_fma(r,
341+
__clc_fma(r, 1.38889490863777199667e-03,
342+
8.33336798434219616221e-03),
343+
4.16666666662260795726e-02),
344+
1.66666666665260878863e-01),
345+
5.00000000000000008883e-01);
346+
q = __clc_fma(r * r, q, r);
347+
348+
expv = __clc_fma(f, q, f2) + f1;
344349
expv = ldexp(expv, m);
345350

346351
expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;

libclc/generic/lib/math/clc_remainder.cl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include <clc/clcmacro.h>
2626
#include <clc/integer/clc_clz.h>
2727
#include <clc/math/clc_floor.h>
28+
#include <clc/math/clc_fma.h>
2829
#include <clc/math/clc_subnormal_config.h>
2930
#include <clc/math/clc_trunc.h>
3031
#include <clc/math/math.h>
@@ -136,7 +137,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
136137

137138
// Compute w * t in quad precision
138139
p = w * t;
139-
pp = fma(w, t, -p);
140+
pp = __clc_fma(w, t, -p);
140141

141142
// Subtract w * t from dx
142143
v = dx - p;
@@ -156,7 +157,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
156157
int todd = lt & 1;
157158

158159
p = w * t;
159-
pp = fma(w, t, -p);
160+
pp = __clc_fma(w, t, -p);
160161
v = dx - p;
161162
dx = v + (((dx - v) - p) - pp);
162163
i = dx < 0.0;
@@ -197,7 +198,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
197198
c &= (yexp<1023 & 2.0 * dx> dy) | (dx > 0.5 * dy);
198199
// we could use a conversion here instead since qsgn = +-1
199200
p = qsgn == 1 ? -1.0 : 1.0;
200-
t = fma(y, p, x);
201+
t = __clc_fma(y, p, x);
201202
ret = c ? t : ret;
202203

203204
// We don't need anything special for |x| == 0

0 commit comments

Comments
 (0)