Skip to content

Commit 44b5c06

Browse files
committed
[libclc] Update uses of fma to __clc_fma in CLC functions
1 parent 7f3b8ec commit 44b5c06

10 files changed

+159
-124
lines changed

libclc/generic/lib/math/clc_exp10.cl

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
25+
#include <clc/math/clc_fma.h>
2526
#include <clc/math/clc_mad.h>
2627
#include <clc/math/clc_subnormal_config.h>
2728
#include <clc/math/math.h>
@@ -122,23 +123,25 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {
122123
int j = n & 0x3f;
123124
int m = n >> 6;
124125

125-
double r =
126-
R_LN10 * fma(-R_LOG10_2_BY_64_TL, dn, fma(-R_LOG10_2_BY_64_LD, dn, x));
126+
double r = R_LN10 * __clc_fma(-R_LOG10_2_BY_64_TL, dn,
127+
__clc_fma(-R_LOG10_2_BY_64_LD, dn, x));
127128

128129
// 6 term tail of Taylor expansion of e^r
129130
double z2 =
130-
r *
131-
fma(r,
132-
fma(r,
133-
fma(r,
134-
fma(r, fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
135-
0x1.5555555555555p-5),
136-
0x1.5555555555555p-3),
137-
0x1.0000000000000p-1),
138-
1.0);
131+
r * __clc_fma(
132+
r,
133+
__clc_fma(r,
134+
__clc_fma(r,
135+
__clc_fma(r,
136+
__clc_fma(r, 0x1.6c16c16c16c17p-10,
137+
0x1.1111111111111p-7),
138+
0x1.5555555555555p-5),
139+
0x1.5555555555555p-3),
140+
0x1.0000000000000p-1),
141+
1.0);
139142

140143
double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
141-
z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
144+
z2 = __clc_fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
142145

143146
int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
144147

libclc/generic/lib/math/clc_fmod.cl

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <clc/clcmacro.h>
2525
#include <clc/integer/clc_clz.h>
2626
#include <clc/math/clc_floor.h>
27+
#include <clc/math/clc_fma.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/clc_trunc.h>
2930
#include <clc/math/math.h>
@@ -123,7 +124,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
123124

124125
// Compute w * t in quad precision
125126
p = w * t;
126-
pp = fma(w, t, -p);
127+
pp = __clc_fma(w, t, -p);
127128

128129
// Subtract w * t from dx
129130
v = dx - p;
@@ -143,7 +144,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
143144
int todd = lt & 1;
144145

145146
p = w * t;
146-
pp = fma(w, t, -p);
147+
pp = __clc_fma(w, t, -p);
147148
v = dx - p;
148149
dx = v + (((dx - v) - p) - pp);
149150
i = dx < 0.0;

libclc/generic/lib/math/clc_hypot.cl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
2525
#include <clc/integer/clc_abs.h>
26+
#include <clc/math/clc_fma.h>
2627
#include <clc/math/clc_mad.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/math.h>
@@ -80,7 +81,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
8081
double ay = y * preadjust;
8182

8283
// The post adjust may overflow, but this can't be avoided in any case
83-
double r = sqrt(fma(ax, ax, ay * ay)) * postadjust;
84+
double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
8485

8586
// If the difference in exponents between x and y is large
8687
double s = x + y;

libclc/generic/lib/math/clc_pow.cl

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
2525
#include <clc/math/clc_fabs.h>
26+
#include <clc/math/clc_fma.h>
2627
#include <clc/math/clc_mad.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/math.h>
@@ -281,26 +282,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
281282
double log_t = tv.s1;
282283
double f_inv = (log_h + log_t) * f;
283284
double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
284-
double r2 = fma(-F, r1, f) * (log_h + log_t);
285+
double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
285286
double r = r1 + r2;
286287

287-
double poly = fma(
288-
r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
288+
double poly = __clc_fma(
289+
r,
290+
__clc_fma(r,
291+
__clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
292+
1.0 / 4.0),
289293
1.0 / 3.0);
290294
poly = poly * r * r * r;
291295

292296
double hr1r1 = 0.5 * r1 * r1;
293297
double poly0h = r1 + hr1r1;
294298
double poly0t = r1 - poly0h + hr1r1;
295-
poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
299+
poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
296300

297301
tv = USE_TABLE(powlog_tbl, index);
298302
log_h = tv.s0;
299303
log_t = tv.s1;
300304

301-
double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
305+
double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
302306
double resT = resT_t - poly0h;
303-
double resH = fma(xexp, real_log2_lead, log_h);
307+
double resH = __clc_fma(xexp, real_log2_lead, log_h);
304308
double resT_h = poly0h;
305309

306310
double H = resT + resH;
@@ -311,9 +315,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
311315
double y_head = as_double(uy & 0xfffffffff8000000L);
312316
double y_tail = y - y_head;
313317

314-
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
315-
v = fma(y_head, H, temp);
316-
vt = fma(y_head, H, -v) + temp;
318+
double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
319+
v = __clc_fma(y_head, H, temp);
320+
vt = __clc_fma(y_head, H, -v) + temp;
317321
}
318322

319323
// Now calculate exp of (v,vt)
@@ -337,21 +341,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
337341
double f2 = tv.s1;
338342
double f = f1 + f2;
339343

340-
double r1 = fma(dn, -lnof2_by_64_head, v);
344+
double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
341345
double r2 = dn * lnof2_by_64_tail;
342346
double r = (r1 + r2) + vt;
343347

344-
double q = fma(
345-
r,
346-
fma(r,
347-
fma(r,
348-
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
349-
4.16666666662260795726e-02),
350-
1.66666666665260878863e-01),
351-
5.00000000000000008883e-01);
352-
q = fma(r * r, q, r);
353-
354-
expv = fma(f, q, f2) + f1;
348+
double q =
349+
__clc_fma(r,
350+
__clc_fma(r,
351+
__clc_fma(r,
352+
__clc_fma(r, 1.38889490863777199667e-03,
353+
8.33336798434219616221e-03),
354+
4.16666666662260795726e-02),
355+
1.66666666665260878863e-01),
356+
5.00000000000000008883e-01);
357+
q = __clc_fma(r * r, q, r);
358+
359+
expv = __clc_fma(f, q, f2) + f1;
355360
expv = ldexp(expv, m);
356361

357362
expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;

libclc/generic/lib/math/clc_pown.cl

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
2525
#include <clc/math/clc_fabs.h>
26+
#include <clc/math/clc_fma.h>
2627
#include <clc/math/clc_mad.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/math.h>
@@ -265,26 +266,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
265266
double log_t = tv.s1;
266267
double f_inv = (log_h + log_t) * f;
267268
double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
268-
double r2 = fma(-F, r1, f) * (log_h + log_t);
269+
double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
269270
double r = r1 + r2;
270271

271-
double poly = fma(
272-
r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
272+
double poly = __clc_fma(
273+
r,
274+
__clc_fma(r,
275+
__clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
276+
1.0 / 4.0),
273277
1.0 / 3.0);
274278
poly = poly * r * r * r;
275279

276280
double hr1r1 = 0.5 * r1 * r1;
277281
double poly0h = r1 + hr1r1;
278282
double poly0t = r1 - poly0h + hr1r1;
279-
poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
283+
poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
280284

281285
tv = USE_TABLE(powlog_tbl, index);
282286
log_h = tv.s0;
283287
log_t = tv.s1;
284288

285-
double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
289+
double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
286290
double resT = resT_t - poly0h;
287-
double resH = fma(xexp, real_log2_lead, log_h);
291+
double resH = __clc_fma(xexp, real_log2_lead, log_h);
288292
double resT_h = poly0h;
289293

290294
double H = resT + resH;
@@ -301,9 +305,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
301305
double y_tail1 = (double)nyt;
302306
y_tail = mask_2_24 ? y_tail1 : y_tail;
303307

304-
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
305-
v = fma(y_head, H, temp);
306-
vt = fma(y_head, H, -v) + temp;
308+
double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
309+
v = __clc_fma(y_head, H, temp);
310+
vt = __clc_fma(y_head, H, -v) + temp;
307311
}
308312

309313
// Now calculate exp of (v,vt)
@@ -327,21 +331,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
327331
double f2 = tv.s1;
328332
double f = f1 + f2;
329333

330-
double r1 = fma(dn, -lnof2_by_64_head, v);
334+
double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
331335
double r2 = dn * lnof2_by_64_tail;
332336
double r = (r1 + r2) + vt;
333337

334-
double q = fma(
335-
r,
336-
fma(r,
337-
fma(r,
338-
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
339-
4.16666666662260795726e-02),
340-
1.66666666665260878863e-01),
341-
5.00000000000000008883e-01);
342-
q = fma(r * r, q, r);
343-
344-
expv = fma(f, q, f2) + f1;
338+
double q =
339+
__clc_fma(r,
340+
__clc_fma(r,
341+
__clc_fma(r,
342+
__clc_fma(r, 1.38889490863777199667e-03,
343+
8.33336798434219616221e-03),
344+
4.16666666662260795726e-02),
345+
1.66666666665260878863e-01),
346+
5.00000000000000008883e-01);
347+
q = __clc_fma(r * r, q, r);
348+
349+
expv = __clc_fma(f, q, f2) + f1;
345350
expv = ldexp(expv, m);
346351

347352
expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;
@@ -388,7 +393,7 @@ _CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_pown, double, int)
388393
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
389394

390395
_CLC_OVERLOAD _CLC_DEF half __clc_pown(half x, int y) {
391-
return (half)__clc_pown((float)x, y);
396+
return (half)__clc_pown((float)x, y);
392397
}
393398

394399
_CLC_BINARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, half, __clc_pown, half, int);

libclc/generic/lib/math/clc_powr.cl

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <clc/clc.h>
2424
#include <clc/clcmacro.h>
2525
#include <clc/math/clc_fabs.h>
26+
#include <clc/math/clc_fma.h>
2627
#include <clc/math/clc_mad.h>
2728
#include <clc/math/clc_subnormal_config.h>
2829
#include <clc/math/math.h>
@@ -268,26 +269,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
268269
double log_t = tv.s1;
269270
double f_inv = (log_h + log_t) * f;
270271
double r1 = as_double(as_long(f_inv) & 0xfffffffff8000000L);
271-
double r2 = fma(-F, r1, f) * (log_h + log_t);
272+
double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
272273
double r = r1 + r2;
273274

274-
double poly = fma(
275-
r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
275+
double poly = __clc_fma(
276+
r,
277+
__clc_fma(r,
278+
__clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
279+
1.0 / 4.0),
276280
1.0 / 3.0);
277281
poly = poly * r * r * r;
278282

279283
double hr1r1 = 0.5 * r1 * r1;
280284
double poly0h = r1 + hr1r1;
281285
double poly0t = r1 - poly0h + hr1r1;
282-
poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
286+
poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
283287

284288
tv = USE_TABLE(powlog_tbl, index);
285289
log_h = tv.s0;
286290
log_t = tv.s1;
287291

288-
double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
292+
double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
289293
double resT = resT_t - poly0h;
290-
double resH = fma(xexp, real_log2_lead, log_h);
294+
double resH = __clc_fma(xexp, real_log2_lead, log_h);
291295
double resT_h = poly0h;
292296

293297
double H = resT + resH;
@@ -298,9 +302,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
298302
double y_head = as_double(uy & 0xfffffffff8000000L);
299303
double y_tail = y - y_head;
300304

301-
double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
302-
v = fma(y_head, H, temp);
303-
vt = fma(y_head, H, -v) + temp;
305+
double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
306+
v = __clc_fma(y_head, H, temp);
307+
vt = __clc_fma(y_head, H, -v) + temp;
304308
}
305309

306310
// Now calculate exp of (v,vt)
@@ -324,21 +328,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
324328
double f2 = tv.s1;
325329
double f = f1 + f2;
326330

327-
double r1 = fma(dn, -lnof2_by_64_head, v);
331+
double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
328332
double r2 = dn * lnof2_by_64_tail;
329333
double r = (r1 + r2) + vt;
330334

331-
double q = fma(
332-
r,
333-
fma(r,
334-
fma(r,
335-
fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
336-
4.16666666662260795726e-02),
337-
1.66666666665260878863e-01),
338-
5.00000000000000008883e-01);
339-
q = fma(r * r, q, r);
340-
341-
expv = fma(f, q, f2) + f1;
335+
double q =
336+
__clc_fma(r,
337+
__clc_fma(r,
338+
__clc_fma(r,
339+
__clc_fma(r, 1.38889490863777199667e-03,
340+
8.33336798434219616221e-03),
341+
4.16666666662260795726e-02),
342+
1.66666666665260878863e-01),
343+
5.00000000000000008883e-01);
344+
q = __clc_fma(r * r, q, r);
345+
346+
expv = __clc_fma(f, q, f2) + f1;
342347
expv = ldexp(expv, m);
343348

344349
expv = v > max_exp_arg ? as_double(0x7FF0000000000000L) : expv;
@@ -391,5 +396,6 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
391396

392397
return as_double(ret);
393398
}
394-
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_powr, double, double)
399+
_CLC_BINARY_VECTORIZE(_CLC_DEF _CLC_OVERLOAD, double, __clc_powr, double,
400+
double)
395401
#endif

0 commit comments

Comments
 (0)