Rollup merge of #41509 - froydnj:float-stack-reduction, r=nagisa

frewsxcv · web-flow · commit 3c1070689b72 · 2017-04-29T23:44:27.000-04:00
reduce stack requirements for floating-point formatting

Doing this speeds up float formatting by ~10% or so, and also makes the formatting code more suitable for embedded environments where stack space is at a premium.
diff --git a/src/libcore/benches/num/flt2dec/mod.rs b/src/libcore/benches/num/flt2dec/mod.rs
@@ -13,6 +13,10 @@ mod strategy {
     mod grisu;
 }
 
+use std::f64;
+use std::io::Write;
+use std::vec::Vec;
+use test::Bencher;
 use core::num::flt2dec::{decode, DecodableFloat, FullDecoded, Decoded};
 use core::num::flt2dec::MAX_SIG_DIGITS;
 
@@ -22,3 +26,23 @@ pub fn decode_finite<T: DecodableFloat>(v: T) -> Decoded {
         full_decoded => panic!("expected finite, got {:?} instead", full_decoded)
     }
 }
+
+#[bench]
+fn bench_small_shortest(b: &mut Bencher) {
+    let mut buf = Vec::with_capacity(20);
+
+    b.iter(|| {
+        buf.clear();
+        write!(&mut buf, "{}", 3.1415926f64).unwrap()
+    });
+}
+
+#[bench]
+fn bench_big_shortest(b: &mut Bencher) {
+    let mut buf = Vec::with_capacity(300);
+
+    b.iter(|| {
+        buf.clear();
+        write!(&mut buf, "{}", f64::MAX).unwrap()
+    });
+}
diff --git a/src/libcore/fmt/float.rs b/src/libcore/fmt/float.rs
@@ -9,8 +9,43 @@
 // except according to those terms.
 
 use fmt::{Formatter, Result, LowerExp, UpperExp, Display, Debug};
+use mem;
 use num::flt2dec;
 
+// Don't inline this so callers don't use the stack space this function
+// requires unless they have to.
+#[inline(never)]
+fn float_to_decimal_common_exact<T>(fmt: &mut Formatter, num: &T,
+                                    sign: flt2dec::Sign, precision: usize) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    unsafe {
+        let mut buf: [u8; 1024] = mem::uninitialized(); // enough for f32 and f64
+        let mut parts: [flt2dec::Part; 5] = mem::uninitialized();
+        let formatted = flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact,
+                                                    *num, sign, precision,
+                                                    false, &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
+}
+
+// Don't inline this so callers that call both this and the above won't wind
+// up using the combined stack space of both functions in some cases.
+#[inline(never)]
+fn float_to_decimal_common_shortest<T>(fmt: &mut Formatter,
+                                       num: &T, sign: flt2dec::Sign) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    unsafe {
+        // enough for f32 and f64
+        let mut buf: [u8; flt2dec::MAX_SIG_DIGITS] = mem::uninitialized();
+        let mut parts: [flt2dec::Part; 5] = mem::uninitialized();
+        let formatted = flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest,
+                                                 *num, sign, 0, false, &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
+}
+
 // Common code of floating point Debug and Display.
 fn float_to_decimal_common<T>(fmt: &mut Formatter, num: &T, negative_zero: bool) -> Result
     where T: flt2dec::DecodableFloat
@@ -23,16 +58,48 @@ fn float_to_decimal_common<T>(fmt: &mut Formatter, num: &T, negative_zero: bool)
         (true,  true)  => flt2dec::Sign::MinusPlusRaw,
     };
 
-    let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
-    let formatted = if let Some(precision) = fmt.precision {
-        flt2dec::to_exact_fixed_str(flt2dec::strategy::grisu::format_exact, *num, sign,
-                                    precision, false, &mut buf, &mut parts)
+    if let Some(precision) = fmt.precision {
+        float_to_decimal_common_exact(fmt, num, sign, precision)
     } else {
-        flt2dec::to_shortest_str(flt2dec::strategy::grisu::format_shortest, *num, sign,
-                                 0, false, &mut buf, &mut parts)
-    };
-    fmt.pad_formatted_parts(&formatted)
+        float_to_decimal_common_shortest(fmt, num, sign)
+    }
+}
+
+// Don't inline this so callers don't use the stack space this function
+// requires unless they have to.
+#[inline(never)]
+fn float_to_exponential_common_exact<T>(fmt: &mut Formatter, num: &T,
+                                        sign: flt2dec::Sign, precision: usize,
+                                        upper: bool) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    unsafe {
+        let mut buf: [u8; 1024] = mem::uninitialized(); // enough for f32 and f64
+        let mut parts: [flt2dec::Part; 7] = mem::uninitialized();
+        let formatted = flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact,
+                                                  *num, sign, precision,
+                                                  upper, &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
+}
+
+// Don't inline this so callers that call both this and the above won't wind
+// up using the combined stack space of both functions in some cases.
+#[inline(never)]
+fn float_to_exponential_common_shortest<T>(fmt: &mut Formatter,
+                                           num: &T, sign: flt2dec::Sign,
+                                           upper: bool) -> Result
+    where T: flt2dec::DecodableFloat
+{
+    unsafe {
+        // enough for f32 and f64
+        let mut buf: [u8; flt2dec::MAX_SIG_DIGITS] = mem::uninitialized();
+        let mut parts: [flt2dec::Part; 7] = mem::uninitialized();
+        let formatted = flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest,
+                                                     *num, sign, (0, 0), upper,
+                                                     &mut buf, &mut parts);
+        fmt.pad_formatted_parts(&formatted)
+    }
 }
 
 // Common code of floating point LowerExp and UpperExp.
@@ -45,17 +112,12 @@ fn float_to_exponential_common<T>(fmt: &mut Formatter, num: &T, upper: bool) ->
         true  => flt2dec::Sign::MinusPlus,
     };
 
-    let mut buf = [0; 1024]; // enough for f32 and f64
-    let mut parts = [flt2dec::Part::Zero(0); 16];
-    let formatted = if let Some(precision) = fmt.precision {
+    if let Some(precision) = fmt.precision {
         // 1 integral digit + `precision` fractional digits = `precision + 1` total digits
-        flt2dec::to_exact_exp_str(flt2dec::strategy::grisu::format_exact, *num, sign,
-                                  precision + 1, upper, &mut buf, &mut parts)
+        float_to_exponential_common_exact(fmt, num, sign, precision + 1, upper)
     } else {
-        flt2dec::to_shortest_exp_str(flt2dec::strategy::grisu::format_shortest, *num, sign,
-                                     (0, 0), upper, &mut buf, &mut parts)
-    };
-    fmt.pad_formatted_parts(&formatted)
+        float_to_exponential_common_shortest(fmt, num, sign, upper)
+    }
 }
 
 macro_rules! floating {