Skip to content

Commit 596bc61

Browse files
committed
dec2flt: Refactor float traits
A lot of the magic constants can be turned into expressions. This reduces some code duplication. Additionally, add traits to make these operations fully generic. This will make it easier to support `f16` and `f128`.
1 parent f0f8c89 commit 596bc61

File tree

4 files changed

+166
-96
lines changed

4 files changed

+166
-96
lines changed

library/core/src/num/dec2flt/float.rs

+160-90
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,56 @@
11
//! Helper trait for generic float types.
22
3+
use core::f64;
4+
35
use crate::fmt::{Debug, LowerExp};
46
use crate::num::FpCategory;
5-
use crate::ops::{Add, Div, Mul, Neg};
7+
use crate::ops::{self, Add, Div, Mul, Neg};
8+
9+
pub trait CastInto<T: Copy>: Copy {
10+
fn cast(self) -> T;
11+
}
12+
13+
pub trait Integer:
14+
Sized
15+
+ Clone
16+
+ Copy
17+
+ Debug
18+
+ ops::Shr<u32, Output = Self>
19+
+ ops::Shl<u32, Output = Self>
20+
+ ops::BitAnd<Output = Self>
21+
+ ops::BitOr<Output = Self>
22+
+ PartialEq
23+
+ CastInto<i16>
24+
{
25+
const ZERO: Self;
26+
const ONE: Self;
27+
}
628

7-
/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
29+
macro_rules! int {
30+
($($ty:ty),+) => {
31+
$(
32+
impl CastInto<i16> for $ty {
33+
fn cast(self) -> i16 {
34+
self as i16
35+
}
36+
}
37+
38+
39+
impl Integer for $ty {
40+
const ZERO: Self = 0;
41+
const ONE: Self = 1;
42+
}
43+
)+
44+
}
45+
}
46+
47+
int!(u32, u64);
48+
49+
/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
850
///
951
/// See the parent module's doc comment for why this is necessary.
1052
///
11-
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
53+
/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
1254
#[doc(hidden)]
1355
pub trait RawFloat:
1456
Sized
@@ -24,62 +66,93 @@ pub trait RawFloat:
2466
+ Copy
2567
+ Debug
2668
{
69+
/// The unsigned integer with the same size as the float
70+
type Int: Integer + Into<u64>;
71+
72+
/* general constants */
73+
2774
const INFINITY: Self;
2875
const NEG_INFINITY: Self;
2976
const NAN: Self;
3077
const NEG_NAN: Self;
3178

79+
/// Bit width of the float
80+
const BITS: u32;
81+
82+
/// Mantissa digits including the hidden bit (provided by core)
83+
const MANTISSA_BITS: u32;
84+
85+
const EXPONENT_MASK: Self::Int;
86+
const MANTISSA_MASK: Self::Int;
87+
3288
/// The number of bits in the significand, *excluding* the hidden bit.
33-
const MANTISSA_EXPLICIT_BITS: usize;
34-
35-
// Round-to-even only happens for negative values of q
36-
// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37-
// the 32-bitcase.
38-
//
39-
// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40-
// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41-
// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42-
//
43-
// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44-
// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45-
// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46-
// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47-
// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48-
//
49-
// Thus we have that we only need to round ties to even when
50-
// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51-
// (in the 32-bit case). In both cases,the power of five(5^|q|)
52-
// fits in a 64-bit word.
89+
const MANTISSA_EXPLICIT_BITS: u32 = Self::MANTISSA_BITS - 1;
90+
91+
/// Bits for the exponent
92+
const EXPONENT_BITS: u32 = Self::BITS - Self::MANTISSA_EXPLICIT_BITS - 1;
93+
94+
/// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
95+
const MINIMUM_EXPONENT: i32 = -(1 << (Self::EXPONENT_BITS - 1)) + 1;
96+
97+
/// Maximum exponent without overflowing to infinity
98+
const MAXIMUM_EXPONENT: u32 = (1 << Self::EXPONENT_BITS) - 1;
99+
100+
/// The exponent bias value
101+
const EXPONENT_BIAS: u32 = Self::MAXIMUM_EXPONENT >> 1;
102+
103+
/// Largest exponent value `(1 << EXP_BITS) - 1`.
104+
const INFINITE_POWER: i32 = (1 << Self::EXPONENT_BITS) - 1;
105+
106+
/// Round-to-even only happens for negative values of q
107+
/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
108+
/// the 32-bitcase.
109+
///
110+
/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
111+
/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
112+
/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
113+
///
114+
/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
115+
/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
116+
/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
117+
/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
118+
/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
119+
///
120+
/// Thus we have that we only need to round ties to even when
121+
/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
122+
/// (in the 32-bit case). In both cases,the power of five(5^|q|)
123+
/// fits in a 64-bit word.
53124
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
54125
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
55126

56-
// Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57-
const MIN_EXPONENT_FAST_PATH: i64;
58-
59-
// Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60-
const MAX_EXPONENT_FAST_PATH: i64;
127+
/* limits related to Fast pathing */
61128

62-
// Maximum exponent that can be represented for a disguised-fast path case.
63-
// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
129+
/// Largest decimal exponent for a non-infinite value.
130+
///
131+
/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
132+
/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
133+
const LARGEST_POWER_OF_TEN: i32 =
134+
((Self::EXPONENT_BIAS as f64 + 1.0) / f64::consts::LOG2_10) as i32;
65135

66-
// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67-
const MINIMUM_EXPONENT: i32;
136+
/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
137+
/// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
138+
const SMALLEST_POWER_OF_TEN: i32 =
139+
-(((Self::EXPONENT_BIAS + Self::MANTISSA_BITS + 64) as f64) / f64::consts::LOG2_10) as i32;
68140

69-
// Largest exponent value `(1 << EXP_BITS) - 1`.
70-
const INFINITE_POWER: i32;
141+
/// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
142+
// assuming FLT_EVAL_METHOD = 0
143+
const MAX_EXPONENT_FAST_PATH: i64 =
144+
((Self::MANTISSA_BITS as f64) / (f64::consts::LOG2_10 - 1.0)) as i64;
71145

72-
// Index (in bits) of the sign.
73-
const SIGN_INDEX: usize;
146+
/// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
147+
const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
74148

75-
// Smallest decimal exponent for a non-zero value.
76-
const SMALLEST_POWER_OF_TEN: i32;
149+
/// Maximum exponent that can be represented for a disguised-fast path case.
150+
/// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
151+
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
152+
Self::MAX_EXPONENT_FAST_PATH + (Self::MANTISSA_BITS as f64 / f64::consts::LOG2_10) as i64;
77153

78-
// Largest decimal exponent for a non-infinite value.
79-
const LARGEST_POWER_OF_TEN: i32;
80-
81-
// Maximum mantissa for the fast-path (`1 << 53` for f64).
82-
const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
154+
/// Maximum mantissa for the fast-path (`1 << 53` for f64).
155+
const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::MANTISSA_BITS;
83156

84157
/// Converts integer into float through an as cast.
85158
/// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +169,45 @@ pub trait RawFloat:
96169
/// Returns the category that this number falls into.
97170
fn classify(self) -> FpCategory;
98171

172+
/// Transmute to the integer representation
173+
fn to_bits(self) -> Self::Int;
174+
99175
/// Returns the mantissa, exponent and sign as integers.
100-
fn integer_decode(self) -> (u64, i16, i8);
176+
///
177+
/// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
178+
/// For 0, the exponent will be `-(EXPONENT_BIAS + MANTISSA_EXPLICIT_BITS`, which is the
179+
/// minimum subnormal power.
180+
fn integer_decode(self) -> (u64, i16, i8) {
181+
let bits = self.to_bits();
182+
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
183+
let mut exponent: i16 =
184+
((bits & Self::EXPONENT_MASK) >> Self::MANTISSA_EXPLICIT_BITS).cast();
185+
let mantissa = if exponent == 0 {
186+
(bits & Self::MANTISSA_MASK) << 1
187+
} else {
188+
(bits & Self::MANTISSA_MASK) | (Self::Int::ONE << Self::MANTISSA_EXPLICIT_BITS)
189+
};
190+
// Exponent bias + mantissa shift
191+
exponent -= (Self::EXPONENT_BIAS + Self::MANTISSA_EXPLICIT_BITS) as i16;
192+
(mantissa.into(), exponent, sign)
193+
}
101194
}
102195

103196
impl RawFloat for f32 {
197+
type Int = u32;
198+
104199
const INFINITY: Self = f32::INFINITY;
105200
const NEG_INFINITY: Self = f32::NEG_INFINITY;
106201
const NAN: Self = f32::NAN;
107202
const NEG_NAN: Self = -f32::NAN;
108203

109-
const MANTISSA_EXPLICIT_BITS: usize = 23;
204+
const BITS: u32 = 32;
205+
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
206+
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
207+
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
208+
110209
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
111210
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
112-
const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
113-
const MAX_EXPONENT_FAST_PATH: i64 = 10;
114-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
115-
const MINIMUM_EXPONENT: i32 = -127;
116-
const INFINITE_POWER: i32 = 0xFF;
117-
const SIGN_INDEX: usize = 31;
118-
const SMALLEST_POWER_OF_TEN: i32 = -65;
119-
const LARGEST_POWER_OF_TEN: i32 = 38;
120211

121212
#[inline]
122213
fn from_u64(v: u64) -> Self {
@@ -136,16 +227,8 @@ impl RawFloat for f32 {
136227
TABLE[exponent & 15]
137228
}
138229

139-
/// Returns the mantissa, exponent and sign as integers.
140-
fn integer_decode(self) -> (u64, i16, i8) {
141-
let bits = self.to_bits();
142-
let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
143-
let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
144-
let mantissa =
145-
if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
146-
// Exponent bias + mantissa shift
147-
exponent -= 127 + 23;
148-
(mantissa as u64, exponent, sign)
230+
fn to_bits(self) -> Self::Int {
231+
self.to_bits()
149232
}
150233

151234
fn classify(self) -> FpCategory {
@@ -154,22 +237,20 @@ impl RawFloat for f32 {
154237
}
155238

156239
impl RawFloat for f64 {
157-
const INFINITY: Self = f64::INFINITY;
158-
const NEG_INFINITY: Self = f64::NEG_INFINITY;
159-
const NAN: Self = f64::NAN;
160-
const NEG_NAN: Self = -f64::NAN;
240+
type Int = u64;
241+
242+
const INFINITY: Self = Self::INFINITY;
243+
const NEG_INFINITY: Self = Self::NEG_INFINITY;
244+
const NAN: Self = Self::NAN;
245+
const NEG_NAN: Self = -Self::NAN;
246+
247+
const BITS: u32 = 64;
248+
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
249+
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
250+
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
161251

162-
const MANTISSA_EXPLICIT_BITS: usize = 52;
163252
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
164253
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
165-
const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
166-
const MAX_EXPONENT_FAST_PATH: i64 = 22;
167-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
168-
const MINIMUM_EXPONENT: i32 = -1023;
169-
const INFINITE_POWER: i32 = 0x7FF;
170-
const SIGN_INDEX: usize = 63;
171-
const SMALLEST_POWER_OF_TEN: i32 = -342;
172-
const LARGEST_POWER_OF_TEN: i32 = 308;
173254

174255
#[inline]
175256
fn from_u64(v: u64) -> Self {
@@ -190,19 +271,8 @@ impl RawFloat for f64 {
190271
TABLE[exponent & 31]
191272
}
192273

193-
/// Returns the mantissa, exponent and sign as integers.
194-
fn integer_decode(self) -> (u64, i16, i8) {
195-
let bits = self.to_bits();
196-
let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
197-
let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
198-
let mantissa = if exponent == 0 {
199-
(bits & 0xfffffffffffff) << 1
200-
} else {
201-
(bits & 0xfffffffffffff) | 0x10000000000000
202-
};
203-
// Exponent bias + mantissa shift
204-
exponent -= 1023 + 52;
205-
(mantissa, exponent, sign)
274+
fn to_bits(self) -> Self::Int {
275+
self.to_bits()
206276
}
207277

208278
fn classify(self) -> FpCategory {

library/core/src/num/dec2flt/lemire.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
3838
// Normalize our significant digits, so the most-significant bit is set.
3939
let lz = w.leading_zeros();
4040
w <<= lz;
41-
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3);
41+
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS as usize + 3);
4242
if lo == 0xFFFF_FFFF_FFFF_FFFF {
4343
// If we have failed to approximate w x 5^-q with our 128-bit value.
4444
// Since the addition of 1 could lead to an overflow which could then
@@ -89,7 +89,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
8989
if lo <= 1
9090
&& q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
9191
&& q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
92-
&& mantissa & 3 == 1
92+
&& mantissa & 0b11 == 0b01
9393
&& (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi
9494
{
9595
// Zero the lowest bit, so we don't round up.

library/core/src/num/dec2flt/slow.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {
8787
}
8888
// Shift the decimal to the hidden bit, and then round the value
8989
// to get the high mantissa+1 bits.
90-
d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1);
90+
d.left_shift(F::MANTISSA_EXPLICIT_BITS as usize + 1);
9191
let mut mantissa = d.round();
9292
if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) {
9393
// Rounding up overflowed to the carry bit, need to

src/etc/test-float-parse/src/traits.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ pub trait Float:
147147
}
148148

149149
macro_rules! impl_float {
150-
($($fty:ty, $ity:ty, $bits:literal);+) => {
150+
($($fty:ty, $ity:ty);+) => {
151151
$(
152152
impl Float for $fty {
153153
type Int = $ity;
154154
type SInt = <Self::Int as Int>::Signed;
155-
const BITS: u32 = $bits;
155+
const BITS: u32 = <$ity>::BITS;
156156
const MAN_BITS: u32 = Self::MANTISSA_DIGITS - 1;
157157
const MAN_MASK: Self::Int = (Self::Int::ONE << Self::MAN_BITS) - Self::Int::ONE;
158158
const SIGN_MASK: Self::Int = Self::Int::ONE << (Self::BITS-1);
@@ -168,7 +168,7 @@ macro_rules! impl_float {
168168
}
169169
}
170170

171-
impl_float!(f32, u32, 32; f64, u64, 64);
171+
impl_float!(f32, u32; f64, u64);
172172

173173
/// A test generator. Should provide an iterator that produces unique patterns to parse.
174174
///

0 commit comments

Comments
 (0)