[libclc] Update uses of fma to __clc_fma in CLC functions

frasercrmck · frasercrmck · commit c0a5677639c1 · 2025-02-19T11:34:01.000Z
diff --git a/libclc/generic/lib/math/clc_exp10.cl b/libclc/generic/lib/math/clc_exp10.cl
@@ -23,6 +23,7 @@
 #include <clc/clc.h>
 #include <clc/clc_convert.h>
 #include <clc/clcmacro.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
@@ -123,23 +124,25 @@ _CLC_DEF _CLC_OVERLOAD double __clc_exp10(double x) {
   int j = n & 0x3f;
   int m = n >> 6;
 
-  double r =
-      R_LN10 * fma(-R_LOG10_2_BY_64_TL, dn, fma(-R_LOG10_2_BY_64_LD, dn, x));
+  double r = R_LN10 * __clc_fma(-R_LOG10_2_BY_64_TL, dn,
+                                __clc_fma(-R_LOG10_2_BY_64_LD, dn, x));
 
   // 6 term tail of Taylor expansion of e^r
   double z2 =
-      r *
-      fma(r,
-          fma(r,
-              fma(r,
-                  fma(r, fma(r, 0x1.6c16c16c16c17p-10, 0x1.1111111111111p-7),
-                      0x1.5555555555555p-5),
-                  0x1.5555555555555p-3),
-              0x1.0000000000000p-1),
-          1.0);
+      r * __clc_fma(
+              r,
+              __clc_fma(r,
+                        __clc_fma(r,
+                                  __clc_fma(r,
+                                            __clc_fma(r, 0x1.6c16c16c16c17p-10,
+                                                      0x1.1111111111111p-7),
+                                            0x1.5555555555555p-5),
+                                  0x1.5555555555555p-3),
+                        0x1.0000000000000p-1),
+              1.0);
 
   double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j);
-  z2 = fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
+  z2 = __clc_fma(tv.s0 + tv.s1, z2, tv.s1) + tv.s0;
 
   int small_value = (m < -1022) || ((m == -1022) && (z2 < 1.0));
 
diff --git a/libclc/generic/lib/math/clc_fmod.cl b/libclc/generic/lib/math/clc_fmod.cl
@@ -25,6 +25,7 @@
 #include <clc/clcmacro.h>
 #include <clc/integer/clc_clz.h>
 #include <clc/math/clc_floor.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
@@ -124,7 +125,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
 
     // Compute w * t in quad precision
     p = w * t;
-    pp = fma(w, t, -p);
+    pp = __clc_fma(w, t, -p);
 
     // Subtract w * t from dx
     v = dx - p;
@@ -144,7 +145,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_fmod(double x, double y) {
   int todd = lt & 1;
 
   p = w * t;
-  pp = fma(w, t, -p);
+  pp = __clc_fma(w, t, -p);
   v = dx - p;
   dx = v + (((dx - v) - p) - pp);
   i = dx < 0.0;
diff --git a/libclc/generic/lib/math/clc_hypot.cl b/libclc/generic/lib/math/clc_hypot.cl
@@ -23,6 +23,7 @@
 #include <clc/clc.h>
 #include <clc/clcmacro.h>
 #include <clc/integer/clc_abs.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
@@ -80,7 +81,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_hypot(double x, double y) {
   double ay = y * preadjust;
 
   // The post adjust may overflow, but this can't be avoided in any case
-  double r = sqrt(fma(ax, ax, ay * ay)) * postadjust;
+  double r = sqrt(__clc_fma(ax, ax, ay * ay)) * postadjust;
 
   // If the difference in exponents between x and y is large
   double s = x + y;
diff --git a/libclc/generic/lib/math/clc_pow.cl b/libclc/generic/lib/math/clc_pow.cl
@@ -24,6 +24,7 @@
 #include <clc/clc_convert.h>
 #include <clc/clcmacro.h>
 #include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
@@ -283,26 +284,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
     double log_t = tv.s1;
     double f_inv = (log_h + log_t) * f;
     double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
-    double r2 = fma(-F, r1, f) * (log_h + log_t);
+    double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
     double r = r1 + r2;
 
-    double poly = fma(
-        r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
+    double poly = __clc_fma(
+        r,
+        __clc_fma(r,
+                  __clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
+                  1.0 / 4.0),
         1.0 / 3.0);
     poly = poly * r * r * r;
 
     double hr1r1 = 0.5 * r1 * r1;
     double poly0h = r1 + hr1r1;
     double poly0t = r1 - poly0h + hr1r1;
-    poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
+    poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
 
     tv = USE_TABLE(powlog_tbl, index);
     log_h = tv.s0;
     log_t = tv.s1;
 
-    double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
+    double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
     double resT = resT_t - poly0h;
-    double resH = fma(xexp, real_log2_lead, log_h);
+    double resH = __clc_fma(xexp, real_log2_lead, log_h);
     double resT_h = poly0h;
 
     double H = resT + resH;
@@ -313,9 +317,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
     double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
     double y_tail = y - y_head;
 
-    double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
-    v = fma(y_head, H, temp);
-    vt = fma(y_head, H, -v) + temp;
+    double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
+    v = __clc_fma(y_head, H, temp);
+    vt = __clc_fma(y_head, H, -v) + temp;
   }
 
   // Now calculate exp of (v,vt)
@@ -339,21 +343,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pow(double x, double y) {
     double f2 = tv.s1;
     double f = f1 + f2;
 
-    double r1 = fma(dn, -lnof2_by_64_head, v);
+    double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
     double r2 = dn * lnof2_by_64_tail;
     double r = (r1 + r2) + vt;
 
-    double q = fma(
-        r,
-        fma(r,
-            fma(r,
-                fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
-                4.16666666662260795726e-02),
-            1.66666666665260878863e-01),
-        5.00000000000000008883e-01);
-    q = fma(r * r, q, r);
-
-    expv = fma(f, q, f2) + f1;
+    double q =
+        __clc_fma(r,
+                  __clc_fma(r,
+                            __clc_fma(r,
+                                      __clc_fma(r, 1.38889490863777199667e-03,
+                                                8.33336798434219616221e-03),
+                                      4.16666666662260795726e-02),
+                            1.66666666665260878863e-01),
+                  5.00000000000000008883e-01);
+    q = __clc_fma(r * r, q, r);
+
+    expv = __clc_fma(f, q, f2) + f1;
     expv = ldexp(expv, m);
 
     expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
diff --git a/libclc/generic/lib/math/clc_pown.cl b/libclc/generic/lib/math/clc_pown.cl
@@ -24,6 +24,7 @@
 #include <clc/clc_convert.h>
 #include <clc/clcmacro.h>
 #include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
@@ -267,26 +268,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
     double log_t = tv.s1;
     double f_inv = (log_h + log_t) * f;
     double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
-    double r2 = fma(-F, r1, f) * (log_h + log_t);
+    double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
     double r = r1 + r2;
 
-    double poly = fma(
-        r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
+    double poly = __clc_fma(
+        r,
+        __clc_fma(r,
+                  __clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
+                  1.0 / 4.0),
         1.0 / 3.0);
     poly = poly * r * r * r;
 
     double hr1r1 = 0.5 * r1 * r1;
     double poly0h = r1 + hr1r1;
     double poly0t = r1 - poly0h + hr1r1;
-    poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
+    poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
 
     tv = USE_TABLE(powlog_tbl, index);
     log_h = tv.s0;
     log_t = tv.s1;
 
-    double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
+    double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
     double resT = resT_t - poly0h;
-    double resH = fma(xexp, real_log2_lead, log_h);
+    double resH = __clc_fma(xexp, real_log2_lead, log_h);
     double resT_h = poly0h;
 
     double H = resT + resH;
@@ -303,9 +307,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
     double y_tail1 = (double)nyt;
     y_tail = mask_2_24 ? y_tail1 : y_tail;
 
-    double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
-    v = fma(y_head, H, temp);
-    vt = fma(y_head, H, -v) + temp;
+    double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
+    v = __clc_fma(y_head, H, temp);
+    vt = __clc_fma(y_head, H, -v) + temp;
   }
 
   // Now calculate exp of (v,vt)
@@ -329,21 +333,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_pown(double x, int ny) {
     double f2 = tv.s1;
     double f = f1 + f2;
 
-    double r1 = fma(dn, -lnof2_by_64_head, v);
+    double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
     double r2 = dn * lnof2_by_64_tail;
     double r = (r1 + r2) + vt;
 
-    double q = fma(
-        r,
-        fma(r,
-            fma(r,
-                fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
-                4.16666666662260795726e-02),
-            1.66666666665260878863e-01),
-        5.00000000000000008883e-01);
-    q = fma(r * r, q, r);
-
-    expv = fma(f, q, f2) + f1;
+    double q =
+        __clc_fma(r,
+                  __clc_fma(r,
+                            __clc_fma(r,
+                                      __clc_fma(r, 1.38889490863777199667e-03,
+                                                8.33336798434219616221e-03),
+                                      4.16666666662260795726e-02),
+                            1.66666666665260878863e-01),
+                  5.00000000000000008883e-01);
+    q = __clc_fma(r * r, q, r);
+
+    expv = __clc_fma(f, q, f2) + f1;
     expv = ldexp(expv, m);
 
     expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
diff --git a/libclc/generic/lib/math/clc_powr.cl b/libclc/generic/lib/math/clc_powr.cl
@@ -24,6 +24,7 @@
 #include <clc/clc_convert.h>
 #include <clc/clcmacro.h>
 #include <clc/math/clc_fabs.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_mad.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/math.h>
@@ -270,26 +271,29 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
     double log_t = tv.s1;
     double f_inv = (log_h + log_t) * f;
     double r1 = __clc_as_double(__clc_as_long(f_inv) & 0xfffffffff8000000L);
-    double r2 = fma(-F, r1, f) * (log_h + log_t);
+    double r2 = __clc_fma(-F, r1, f) * (log_h + log_t);
     double r = r1 + r2;
 
-    double poly = fma(
-        r, fma(r, fma(r, fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0), 1.0 / 4.0),
+    double poly = __clc_fma(
+        r,
+        __clc_fma(r,
+                  __clc_fma(r, __clc_fma(r, 1.0 / 7.0, 1.0 / 6.0), 1.0 / 5.0),
+                  1.0 / 4.0),
         1.0 / 3.0);
     poly = poly * r * r * r;
 
     double hr1r1 = 0.5 * r1 * r1;
     double poly0h = r1 + hr1r1;
     double poly0t = r1 - poly0h + hr1r1;
-    poly = fma(r1, r2, fma(0.5 * r2, r2, poly)) + r2 + poly0t;
+    poly = __clc_fma(r1, r2, __clc_fma(0.5 * r2, r2, poly)) + r2 + poly0t;
 
     tv = USE_TABLE(powlog_tbl, index);
     log_h = tv.s0;
     log_t = tv.s1;
 
-    double resT_t = fma(xexp, real_log2_tail, +log_t) - poly;
+    double resT_t = __clc_fma(xexp, real_log2_tail, +log_t) - poly;
     double resT = resT_t - poly0h;
-    double resH = fma(xexp, real_log2_lead, log_h);
+    double resH = __clc_fma(xexp, real_log2_lead, log_h);
     double resT_h = poly0h;
 
     double H = resT + resH;
@@ -300,9 +304,9 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
     double y_head = __clc_as_double(uy & 0xfffffffff8000000L);
     double y_tail = y - y_head;
 
-    double temp = fma(y_tail, H, fma(y_head, T, y_tail * T));
-    v = fma(y_head, H, temp);
-    vt = fma(y_head, H, -v) + temp;
+    double temp = __clc_fma(y_tail, H, __clc_fma(y_head, T, y_tail * T));
+    v = __clc_fma(y_head, H, temp);
+    vt = __clc_fma(y_head, H, -v) + temp;
   }
 
   // Now calculate exp of (v,vt)
@@ -326,21 +330,22 @@ _CLC_DEF _CLC_OVERLOAD double __clc_powr(double x, double y) {
     double f2 = tv.s1;
     double f = f1 + f2;
 
-    double r1 = fma(dn, -lnof2_by_64_head, v);
+    double r1 = __clc_fma(dn, -lnof2_by_64_head, v);
     double r2 = dn * lnof2_by_64_tail;
     double r = (r1 + r2) + vt;
 
-    double q = fma(
-        r,
-        fma(r,
-            fma(r,
-                fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03),
-                4.16666666662260795726e-02),
-            1.66666666665260878863e-01),
-        5.00000000000000008883e-01);
-    q = fma(r * r, q, r);
-
-    expv = fma(f, q, f2) + f1;
+    double q =
+        __clc_fma(r,
+                  __clc_fma(r,
+                            __clc_fma(r,
+                                      __clc_fma(r, 1.38889490863777199667e-03,
+                                                8.33336798434219616221e-03),
+                                      4.16666666662260795726e-02),
+                            1.66666666665260878863e-01),
+                  5.00000000000000008883e-01);
+    q = __clc_fma(r * r, q, r);
+
+    expv = __clc_fma(f, q, f2) + f1;
     expv = ldexp(expv, m);
 
     expv = v > max_exp_arg ? __clc_as_double(0x7FF0000000000000L) : expv;
diff --git a/libclc/generic/lib/math/clc_remainder.cl b/libclc/generic/lib/math/clc_remainder.cl
@@ -25,6 +25,7 @@
 #include <clc/clcmacro.h>
 #include <clc/integer/clc_clz.h>
 #include <clc/math/clc_floor.h>
+#include <clc/math/clc_fma.h>
 #include <clc/math/clc_subnormal_config.h>
 #include <clc/math/clc_trunc.h>
 #include <clc/math/math.h>
@@ -136,7 +137,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
 
     // Compute w * t in quad precision
     p = w * t;
-    pp = fma(w, t, -p);
+    pp = __clc_fma(w, t, -p);
 
     // Subtract w * t from dx
     v = dx - p;
@@ -156,7 +157,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
   int todd = lt & 1;
 
   p = w * t;
-  pp = fma(w, t, -p);
+  pp = __clc_fma(w, t, -p);
   v = dx - p;
   dx = v + (((dx - v) - p) - pp);
   i = dx < 0.0;
@@ -197,7 +198,7 @@ _CLC_DEF _CLC_OVERLOAD double __clc_remainder(double x, double y) {
   c &= (yexp<1023 & 2.0 * dx> dy) | (dx > 0.5 * dy);
   // we could use a conversion here instead since qsgn = +-1
   p = qsgn == 1 ? -1.0 : 1.0;
-  t = fma(y, p, x);
+  t = __clc_fma(y, p, x);
   ret = c ? t : ret;
 
   // We don't need anything special for |x| == 0
diff --git a/libclc/generic/lib/math/clc_remquo.cl b/libclc/generic/lib/math/clc_remquo.cl
diff --git a/libclc/generic/lib/math/clc_rootn.cl b/libclc/generic/lib/math/clc_rootn.cl
diff --git a/libclc/generic/lib/math/sincos_helpers.cl b/libclc/generic/lib/math/sincos_helpers.cl