dec2flt: Refactor float traits

tgross35 · tgross35 · commit 5d3c3a115459 · 2025-03-01T22:31:36.000Z
A lot of the magic constants can be turned into expressions. This
reduces some code duplication.

Additionally, add traits to make these operations fully generic. This
will make it easier to support `f16` and `f128`.
diff --git a/library/core/src/num/dec2flt/float.rs b/library/core/src/num/dec2flt/float.rs
@@ -1,14 +1,57 @@
 //! Helper trait for generic float types.
 
+use core::f64;
+
 use crate::fmt::{Debug, LowerExp};
 use crate::num::FpCategory;
-use crate::ops::{Add, Div, Mul, Neg};
+use crate::ops::{self, Add, Div, Mul, Neg};
+
+/// Lossy `as` casting between two types.
+pub trait CastInto<T: Copy>: Copy {
+    fn cast(self) -> T;
+}
+
+/// Collection of traits that allow us to be generic over integer size.
+pub trait Integer:
+    Sized
+    + Clone
+    + Copy
+    + Debug
+    + ops::Shr<u32, Output = Self>
+    + ops::Shl<u32, Output = Self>
+    + ops::BitAnd<Output = Self>
+    + ops::BitOr<Output = Self>
+    + PartialEq
+    + CastInto<i16>
+{
+    const ZERO: Self;
+    const ONE: Self;
+}
 
-/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
+macro_rules! int {
+    ($($ty:ty),+) => {
+        $(
+            impl CastInto<i16> for $ty {
+                fn cast(self) -> i16 {
+                    self as i16
+                }
+            }
+
+            impl Integer for $ty {
+                const ZERO: Self = 0;
+                const ONE: Self = 1;
+            }
+        )+
+    }
+}
+
+int!(u32, u64);
+
+/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
 ///
 /// See the parent module's doc comment for why this is necessary.
 ///
-/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
+/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
 #[doc(hidden)]
 pub trait RawFloat:
     Sized
@@ -24,62 +67,93 @@ pub trait RawFloat:
     + Copy
     + Debug
 {
+    /// The unsigned integer with the same size as the float
+    type Int: Integer + Into<u64>;
+
+    /* general constants */
+
     const INFINITY: Self;
     const NEG_INFINITY: Self;
     const NAN: Self;
     const NEG_NAN: Self;
 
+    /// Bit width of the float
+    const BITS: u32;
+
+    /// Mantissa digits including the hidden bit (provided by core)
+    const MANTISSA_BITS: u32;
+
+    const EXPONENT_MASK: Self::Int;
+    const MANTISSA_MASK: Self::Int;
+
     /// The number of bits in the significand, *excluding* the hidden bit.
-    const MANTISSA_EXPLICIT_BITS: usize;
-
-    // Round-to-even only happens for negative values of q
-    // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
-    // the 32-bitcase.
-    //
-    // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
-    // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
-    // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
-    //
-    // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
-    // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
-    // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
-    // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
-    // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
-    //
-    // Thus we have that we only need to round ties to even when
-    // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
-    // (in the 32-bit case). In both cases,the power of five(5^|q|)
-    // fits in a 64-bit word.
+    const MANTISSA_EXPLICIT_BITS: u32 = Self::MANTISSA_BITS - 1;
+
+    /// Bits for the exponent
+    const EXPONENT_BITS: u32 = Self::BITS - Self::MANTISSA_EXPLICIT_BITS - 1;
+
+    /// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
+    const MINIMUM_EXPONENT: i32 = -(1 << (Self::EXPONENT_BITS - 1)) + 1;
+
+    /// Maximum exponent without overflowing to infinity
+    const MAXIMUM_EXPONENT: u32 = (1 << Self::EXPONENT_BITS) - 1;
+
+    /// The exponent bias value
+    const EXPONENT_BIAS: u32 = Self::MAXIMUM_EXPONENT >> 1;
+
+    /// Largest exponent value `(1 << EXP_BITS) - 1`.
+    const INFINITE_POWER: i32 = (1 << Self::EXPONENT_BITS) - 1;
+
+    /// Round-to-even only happens for negative values of q
+    /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
+    /// the 32-bitcase.
+    ///
+    /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
+    /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
+    /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
+    ///
+    /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
+    /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
+    /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
+    /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
+    /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
+    ///
+    /// Thus we have that we only need to round ties to even when
+    /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
+    /// (in the 32-bit case). In both cases,the power of five(5^|q|)
+    /// fits in a 64-bit word.
     const MIN_EXPONENT_ROUND_TO_EVEN: i32;
     const MAX_EXPONENT_ROUND_TO_EVEN: i32;
 
-    // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
-    const MIN_EXPONENT_FAST_PATH: i64;
-
-    // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
-    const MAX_EXPONENT_FAST_PATH: i64;
+    /* limits related to Fast pathing */
 
-    // Maximum exponent that can be represented for a disguised-fast path case.
-    // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
-    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
+    /// Largest decimal exponent for a non-infinite value.
+    ///
+    /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
+    /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
+    const LARGEST_POWER_OF_TEN: i32 =
+        ((Self::EXPONENT_BIAS as f64 + 1.0) / f64::consts::LOG2_10) as i32;
 
-    // Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
-    const MINIMUM_EXPONENT: i32;
+    /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
+    /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
+    const SMALLEST_POWER_OF_TEN: i32 =
+        -(((Self::EXPONENT_BIAS + Self::MANTISSA_BITS + 64) as f64) / f64::consts::LOG2_10) as i32;
 
-    // Largest exponent value `(1 << EXP_BITS) - 1`.
-    const INFINITE_POWER: i32;
+    /// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
+    // assuming FLT_EVAL_METHOD = 0
+    const MAX_EXPONENT_FAST_PATH: i64 =
+        ((Self::MANTISSA_BITS as f64) / (f64::consts::LOG2_10 - 1.0)) as i64;
 
-    // Index (in bits) of the sign.
-    const SIGN_INDEX: usize;
+    /// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
+    const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
 
-    // Smallest decimal exponent for a non-zero value.
-    const SMALLEST_POWER_OF_TEN: i32;
+    /// Maximum exponent that can be represented for a disguised-fast path case.
+    /// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
+    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
+        Self::MAX_EXPONENT_FAST_PATH + (Self::MANTISSA_BITS as f64 / f64::consts::LOG2_10) as i64;
 
-    // Largest decimal exponent for a non-infinite value.
-    const LARGEST_POWER_OF_TEN: i32;
-
-    // Maximum mantissa for the fast-path (`1 << 53` for f64).
-    const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
+    /// Maximum mantissa for the fast-path (`1 << 53` for f64).
+    const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::MANTISSA_BITS;
 
     /// Converts integer into float through an as cast.
     /// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +170,45 @@ pub trait RawFloat:
     /// Returns the category that this number falls into.
     fn classify(self) -> FpCategory;
 
+    /// Transmute to the integer representation
+    fn to_bits(self) -> Self::Int;
+
     /// Returns the mantissa, exponent and sign as integers.
-    fn integer_decode(self) -> (u64, i16, i8);
+    ///
+    /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
+    /// For 0, the exponent will be `-(EXPONENT_BIAS + MANTISSA_EXPLICIT_BITS`, which is the
+    /// minimum subnormal power.
+    fn integer_decode(self) -> (u64, i16, i8) {
+        let bits = self.to_bits();
+        let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
+        let mut exponent: i16 =
+            ((bits & Self::EXPONENT_MASK) >> Self::MANTISSA_EXPLICIT_BITS).cast();
+        let mantissa = if exponent == 0 {
+            (bits & Self::MANTISSA_MASK) << 1
+        } else {
+            (bits & Self::MANTISSA_MASK) | (Self::Int::ONE << Self::MANTISSA_EXPLICIT_BITS)
+        };
+        // Exponent bias + mantissa shift
+        exponent -= (Self::EXPONENT_BIAS + Self::MANTISSA_EXPLICIT_BITS) as i16;
+        (mantissa.into(), exponent, sign)
+    }
 }
 
 impl RawFloat for f32 {
+    type Int = u32;
+
     const INFINITY: Self = f32::INFINITY;
     const NEG_INFINITY: Self = f32::NEG_INFINITY;
     const NAN: Self = f32::NAN;
     const NEG_NAN: Self = -f32::NAN;
 
-    const MANTISSA_EXPLICIT_BITS: usize = 23;
+    const BITS: u32 = 32;
+    const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
+    const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
+    const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
+
     const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
     const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
-    const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
-    const MAX_EXPONENT_FAST_PATH: i64 = 10;
-    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
-    const MINIMUM_EXPONENT: i32 = -127;
-    const INFINITE_POWER: i32 = 0xFF;
-    const SIGN_INDEX: usize = 31;
-    const SMALLEST_POWER_OF_TEN: i32 = -65;
-    const LARGEST_POWER_OF_TEN: i32 = 38;
 
     #[inline]
     fn from_u64(v: u64) -> Self {
@@ -136,16 +228,8 @@ impl RawFloat for f32 {
         TABLE[exponent & 15]
     }
 
-    /// Returns the mantissa, exponent and sign as integers.
-    fn integer_decode(self) -> (u64, i16, i8) {
-        let bits = self.to_bits();
-        let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
-        let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
-        let mantissa =
-            if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
-        // Exponent bias + mantissa shift
-        exponent -= 127 + 23;
-        (mantissa as u64, exponent, sign)
+    fn to_bits(self) -> Self::Int {
+        self.to_bits()
     }
 
     fn classify(self) -> FpCategory {
@@ -154,22 +238,20 @@ impl RawFloat for f32 {
 }
 
 impl RawFloat for f64 {
-    const INFINITY: Self = f64::INFINITY;
-    const NEG_INFINITY: Self = f64::NEG_INFINITY;
-    const NAN: Self = f64::NAN;
-    const NEG_NAN: Self = -f64::NAN;
+    type Int = u64;
+
+    const INFINITY: Self = Self::INFINITY;
+    const NEG_INFINITY: Self = Self::NEG_INFINITY;
+    const NAN: Self = Self::NAN;
+    const NEG_NAN: Self = -Self::NAN;
+
+    const BITS: u32 = 64;
+    const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
+    const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
+    const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
 
-    const MANTISSA_EXPLICIT_BITS: usize = 52;
     const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
     const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
-    const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
-    const MAX_EXPONENT_FAST_PATH: i64 = 22;
-    const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
-    const MINIMUM_EXPONENT: i32 = -1023;
-    const INFINITE_POWER: i32 = 0x7FF;
-    const SIGN_INDEX: usize = 63;
-    const SMALLEST_POWER_OF_TEN: i32 = -342;
-    const LARGEST_POWER_OF_TEN: i32 = 308;
 
     #[inline]
     fn from_u64(v: u64) -> Self {
@@ -190,19 +272,8 @@ impl RawFloat for f64 {
         TABLE[exponent & 31]
     }
 
-    /// Returns the mantissa, exponent and sign as integers.
-    fn integer_decode(self) -> (u64, i16, i8) {
-        let bits = self.to_bits();
-        let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
-        let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
-        let mantissa = if exponent == 0 {
-            (bits & 0xfffffffffffff) << 1
-        } else {
-            (bits & 0xfffffffffffff) | 0x10000000000000
-        };
-        // Exponent bias + mantissa shift
-        exponent -= 1023 + 52;
-        (mantissa, exponent, sign)
+    fn to_bits(self) -> Self::Int {
+        self.to_bits()
     }
 
     fn classify(self) -> FpCategory {
diff --git a/library/core/src/num/dec2flt/lemire.rs b/library/core/src/num/dec2flt/lemire.rs
@@ -38,7 +38,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
     // Normalize our significant digits, so the most-significant bit is set.
     let lz = w.leading_zeros();
     w <<= lz;
-    let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3);
+    let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS as usize + 3);
     if lo == 0xFFFF_FFFF_FFFF_FFFF {
         // If we have failed to approximate w x 5^-q with our 128-bit value.
         // Since the addition of 1 could lead to an overflow which could then
@@ -89,7 +89,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
     if lo <= 1
         && q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
         && q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
-        && mantissa & 3 == 1
+        && mantissa & 0b11 == 0b01
         && (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi
     {
         // Zero the lowest bit, so we don't round up.
diff --git a/library/core/src/num/dec2flt/slow.rs b/library/core/src/num/dec2flt/slow.rs
@@ -87,7 +87,7 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {
     }
     // Shift the decimal to the hidden bit, and then round the value
     // to get the high mantissa+1 bits.
-    d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1);
+    d.left_shift(F::MANTISSA_EXPLICIT_BITS as usize + 1);
     let mut mantissa = d.round();
     if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) {
         // Rounding up overflowed to the carry bit, need to
diff --git a/src/etc/test-float-parse/src/traits.rs b/src/etc/test-float-parse/src/traits.rs
@@ -147,12 +147,12 @@ pub trait Float:
 }
 
 macro_rules! impl_float {
-    ($($fty:ty, $ity:ty, $bits:literal);+) => {
+    ($($fty:ty, $ity:ty);+) => {
         $(
             impl Float for $fty {
                 type Int = $ity;
                 type SInt = <Self::Int as Int>::Signed;
-                const BITS: u32 = $bits;
+                const BITS: u32 = <$ity>::BITS;
                 const MAN_BITS: u32 = Self::MANTISSA_DIGITS - 1;
                 const MAN_MASK: Self::Int = (Self::Int::ONE << Self::MAN_BITS) - Self::Int::ONE;
                 const SIGN_MASK: Self::Int = Self::Int::ONE << (Self::BITS-1);
@@ -168,7 +168,7 @@ macro_rules! impl_float {
     }
 }
 
-impl_float!(f32, u32, 32; f64, u64, 64);
+impl_float!(f32, u32; f64, u64);
 
 /// A test generator. Should provide an iterator that produces unique patterns to parse.
 ///

Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {`
`87`	`87`	`}`
`88`	`88`	`// Shift the decimal to the hidden bit, and then round the value`
`89`	`89`	`// to get the high mantissa+1 bits.`
`90`		`- d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1);`
	`90`	`+ d.left_shift(F::MANTISSA_EXPLICIT_BITS as usize + 1);`
`91`	`91`	`let mut mantissa = d.round();`
`92`	`92`	`if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) {`
`93`	`93`	`// Rounding up overflowed to the carry bit, need to`