Skip to content

Commit 5d3c3a1

Browse files
committed
dec2flt: Refactor float traits
A lot of the magic constants can be turned into expressions. This reduces some code duplication. Additionally, add traits to make these operations fully generic. This will make it easier to support `f16` and `f128`.
1 parent 126fd49 commit 5d3c3a1

File tree

4 files changed

+167
-96
lines changed

4 files changed

+167
-96
lines changed

library/core/src/num/dec2flt/float.rs

+161-90
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,57 @@
11
//! Helper trait for generic float types.
22
3+
use core::f64;
4+
35
use crate::fmt::{Debug, LowerExp};
46
use crate::num::FpCategory;
5-
use crate::ops::{Add, Div, Mul, Neg};
7+
use crate::ops::{self, Add, Div, Mul, Neg};
8+
9+
/// Lossy `as` casting between two types.
10+
pub trait CastInto<T: Copy>: Copy {
11+
fn cast(self) -> T;
12+
}
13+
14+
/// Collection of traits that allow us to be generic over integer size.
15+
pub trait Integer:
16+
Sized
17+
+ Clone
18+
+ Copy
19+
+ Debug
20+
+ ops::Shr<u32, Output = Self>
21+
+ ops::Shl<u32, Output = Self>
22+
+ ops::BitAnd<Output = Self>
23+
+ ops::BitOr<Output = Self>
24+
+ PartialEq
25+
+ CastInto<i16>
26+
{
27+
const ZERO: Self;
28+
const ONE: Self;
29+
}
630

7-
/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
31+
macro_rules! int {
32+
($($ty:ty),+) => {
33+
$(
34+
impl CastInto<i16> for $ty {
35+
fn cast(self) -> i16 {
36+
self as i16
37+
}
38+
}
39+
40+
impl Integer for $ty {
41+
const ZERO: Self = 0;
42+
const ONE: Self = 1;
43+
}
44+
)+
45+
}
46+
}
47+
48+
int!(u32, u64);
49+
50+
/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
851
///
952
/// See the parent module's doc comment for why this is necessary.
1053
///
11-
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
54+
/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
1255
#[doc(hidden)]
1356
pub trait RawFloat:
1457
Sized
@@ -24,62 +67,93 @@ pub trait RawFloat:
2467
+ Copy
2568
+ Debug
2669
{
70+
/// The unsigned integer with the same size as the float
71+
type Int: Integer + Into<u64>;
72+
73+
/* general constants */
74+
2775
const INFINITY: Self;
2876
const NEG_INFINITY: Self;
2977
const NAN: Self;
3078
const NEG_NAN: Self;
3179

80+
/// Bit width of the float
81+
const BITS: u32;
82+
83+
/// Mantissa digits including the hidden bit (provided by core)
84+
const MANTISSA_BITS: u32;
85+
86+
const EXPONENT_MASK: Self::Int;
87+
const MANTISSA_MASK: Self::Int;
88+
3289
/// The number of bits in the significand, *excluding* the hidden bit.
33-
const MANTISSA_EXPLICIT_BITS: usize;
34-
35-
// Round-to-even only happens for negative values of q
36-
// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37-
// the 32-bitcase.
38-
//
39-
// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40-
// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41-
// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42-
//
43-
// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44-
// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45-
// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46-
// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47-
// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48-
//
49-
// Thus we have that we only need to round ties to even when
50-
// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51-
// (in the 32-bit case). In both cases,the power of five(5^|q|)
52-
// fits in a 64-bit word.
90+
const MANTISSA_EXPLICIT_BITS: u32 = Self::MANTISSA_BITS - 1;
91+
92+
/// Bits for the exponent
93+
const EXPONENT_BITS: u32 = Self::BITS - Self::MANTISSA_EXPLICIT_BITS - 1;
94+
95+
/// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
96+
const MINIMUM_EXPONENT: i32 = -(1 << (Self::EXPONENT_BITS - 1)) + 1;
97+
98+
/// Maximum exponent without overflowing to infinity
99+
const MAXIMUM_EXPONENT: u32 = (1 << Self::EXPONENT_BITS) - 1;
100+
101+
/// The exponent bias value
102+
const EXPONENT_BIAS: u32 = Self::MAXIMUM_EXPONENT >> 1;
103+
104+
/// Largest exponent value `(1 << EXP_BITS) - 1`.
105+
const INFINITE_POWER: i32 = (1 << Self::EXPONENT_BITS) - 1;
106+
107+
/// Round-to-even only happens for negative values of q
108+
/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
109+
/// the 32-bitcase.
110+
///
111+
/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
112+
/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
113+
/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
114+
///
115+
/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
116+
/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
117+
/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
118+
/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
119+
/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
120+
///
121+
/// Thus we have that we only need to round ties to even when
122+
/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
123+
/// (in the 32-bit case). In both cases,the power of five(5^|q|)
124+
/// fits in a 64-bit word.
53125
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
54126
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
55127

56-
// Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57-
const MIN_EXPONENT_FAST_PATH: i64;
58-
59-
// Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60-
const MAX_EXPONENT_FAST_PATH: i64;
128+
/* limits related to Fast pathing */
61129

62-
// Maximum exponent that can be represented for a disguised-fast path case.
63-
// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
130+
/// Largest decimal exponent for a non-infinite value.
131+
///
132+
/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
133+
/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
134+
const LARGEST_POWER_OF_TEN: i32 =
135+
((Self::EXPONENT_BIAS as f64 + 1.0) / f64::consts::LOG2_10) as i32;
65136

66-
// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67-
const MINIMUM_EXPONENT: i32;
137+
/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
138+
/// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
139+
const SMALLEST_POWER_OF_TEN: i32 =
140+
-(((Self::EXPONENT_BIAS + Self::MANTISSA_BITS + 64) as f64) / f64::consts::LOG2_10) as i32;
68141

69-
// Largest exponent value `(1 << EXP_BITS) - 1`.
70-
const INFINITE_POWER: i32;
142+
/// Maximum exponent for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
143+
// assuming FLT_EVAL_METHOD = 0
144+
const MAX_EXPONENT_FAST_PATH: i64 =
145+
((Self::MANTISSA_BITS as f64) / (f64::consts::LOG2_10 - 1.0)) as i64;
71146

72-
// Index (in bits) of the sign.
73-
const SIGN_INDEX: usize;
147+
/// Minimum exponent for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
148+
const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
74149

75-
// Smallest decimal exponent for a non-zero value.
76-
const SMALLEST_POWER_OF_TEN: i32;
150+
/// Maximum exponent that can be represented for a disguised-fast path case.
151+
/// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
152+
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
153+
Self::MAX_EXPONENT_FAST_PATH + (Self::MANTISSA_BITS as f64 / f64::consts::LOG2_10) as i64;
77154

78-
// Largest decimal exponent for a non-infinite value.
79-
const LARGEST_POWER_OF_TEN: i32;
80-
81-
// Maximum mantissa for the fast-path (`1 << 53` for f64).
82-
const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
155+
/// Maximum mantissa for the fast-path (`1 << 53` for f64).
156+
const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::MANTISSA_BITS;
83157

84158
/// Converts integer into float through an as cast.
85159
/// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +170,45 @@ pub trait RawFloat:
96170
/// Returns the category that this number falls into.
97171
fn classify(self) -> FpCategory;
98172

173+
/// Transmute to the integer representation
174+
fn to_bits(self) -> Self::Int;
175+
99176
/// Returns the mantissa, exponent and sign as integers.
100-
fn integer_decode(self) -> (u64, i16, i8);
177+
///
178+
/// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
179+
/// For 0, the exponent will be `-(EXPONENT_BIAS + MANTISSA_EXPLICIT_BITS`, which is the
180+
/// minimum subnormal power.
181+
fn integer_decode(self) -> (u64, i16, i8) {
182+
let bits = self.to_bits();
183+
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
184+
let mut exponent: i16 =
185+
((bits & Self::EXPONENT_MASK) >> Self::MANTISSA_EXPLICIT_BITS).cast();
186+
let mantissa = if exponent == 0 {
187+
(bits & Self::MANTISSA_MASK) << 1
188+
} else {
189+
(bits & Self::MANTISSA_MASK) | (Self::Int::ONE << Self::MANTISSA_EXPLICIT_BITS)
190+
};
191+
// Exponent bias + mantissa shift
192+
exponent -= (Self::EXPONENT_BIAS + Self::MANTISSA_EXPLICIT_BITS) as i16;
193+
(mantissa.into(), exponent, sign)
194+
}
101195
}
102196

103197
impl RawFloat for f32 {
198+
type Int = u32;
199+
104200
const INFINITY: Self = f32::INFINITY;
105201
const NEG_INFINITY: Self = f32::NEG_INFINITY;
106202
const NAN: Self = f32::NAN;
107203
const NEG_NAN: Self = -f32::NAN;
108204

109-
const MANTISSA_EXPLICIT_BITS: usize = 23;
205+
const BITS: u32 = 32;
206+
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
207+
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
208+
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
209+
110210
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
111211
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
112-
const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
113-
const MAX_EXPONENT_FAST_PATH: i64 = 10;
114-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
115-
const MINIMUM_EXPONENT: i32 = -127;
116-
const INFINITE_POWER: i32 = 0xFF;
117-
const SIGN_INDEX: usize = 31;
118-
const SMALLEST_POWER_OF_TEN: i32 = -65;
119-
const LARGEST_POWER_OF_TEN: i32 = 38;
120212

121213
#[inline]
122214
fn from_u64(v: u64) -> Self {
@@ -136,16 +228,8 @@ impl RawFloat for f32 {
136228
TABLE[exponent & 15]
137229
}
138230

139-
/// Returns the mantissa, exponent and sign as integers.
140-
fn integer_decode(self) -> (u64, i16, i8) {
141-
let bits = self.to_bits();
142-
let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
143-
let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
144-
let mantissa =
145-
if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
146-
// Exponent bias + mantissa shift
147-
exponent -= 127 + 23;
148-
(mantissa as u64, exponent, sign)
231+
fn to_bits(self) -> Self::Int {
232+
self.to_bits()
149233
}
150234

151235
fn classify(self) -> FpCategory {
@@ -154,22 +238,20 @@ impl RawFloat for f32 {
154238
}
155239

156240
impl RawFloat for f64 {
157-
const INFINITY: Self = f64::INFINITY;
158-
const NEG_INFINITY: Self = f64::NEG_INFINITY;
159-
const NAN: Self = f64::NAN;
160-
const NEG_NAN: Self = -f64::NAN;
241+
type Int = u64;
242+
243+
const INFINITY: Self = Self::INFINITY;
244+
const NEG_INFINITY: Self = Self::NEG_INFINITY;
245+
const NAN: Self = Self::NAN;
246+
const NEG_NAN: Self = -Self::NAN;
247+
248+
const BITS: u32 = 64;
249+
const MANTISSA_BITS: u32 = Self::MANTISSA_DIGITS;
250+
const EXPONENT_MASK: Self::Int = Self::EXP_MASK;
251+
const MANTISSA_MASK: Self::Int = Self::MAN_MASK;
161252

162-
const MANTISSA_EXPLICIT_BITS: usize = 52;
163253
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
164254
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
165-
const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
166-
const MAX_EXPONENT_FAST_PATH: i64 = 22;
167-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
168-
const MINIMUM_EXPONENT: i32 = -1023;
169-
const INFINITE_POWER: i32 = 0x7FF;
170-
const SIGN_INDEX: usize = 63;
171-
const SMALLEST_POWER_OF_TEN: i32 = -342;
172-
const LARGEST_POWER_OF_TEN: i32 = 308;
173255

174256
#[inline]
175257
fn from_u64(v: u64) -> Self {
@@ -190,19 +272,8 @@ impl RawFloat for f64 {
190272
TABLE[exponent & 31]
191273
}
192274

193-
/// Returns the mantissa, exponent and sign as integers.
194-
fn integer_decode(self) -> (u64, i16, i8) {
195-
let bits = self.to_bits();
196-
let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
197-
let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
198-
let mantissa = if exponent == 0 {
199-
(bits & 0xfffffffffffff) << 1
200-
} else {
201-
(bits & 0xfffffffffffff) | 0x10000000000000
202-
};
203-
// Exponent bias + mantissa shift
204-
exponent -= 1023 + 52;
205-
(mantissa, exponent, sign)
275+
fn to_bits(self) -> Self::Int {
276+
self.to_bits()
206277
}
207278

208279
fn classify(self) -> FpCategory {

library/core/src/num/dec2flt/lemire.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
3838
// Normalize our significant digits, so the most-significant bit is set.
3939
let lz = w.leading_zeros();
4040
w <<= lz;
41-
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS + 3);
41+
let (lo, hi) = compute_product_approx(q, w, F::MANTISSA_EXPLICIT_BITS as usize + 3);
4242
if lo == 0xFFFF_FFFF_FFFF_FFFF {
4343
// If we have failed to approximate w x 5^-q with our 128-bit value.
4444
// Since the addition of 1 could lead to an overflow which could then
@@ -89,7 +89,7 @@ pub fn compute_float<F: RawFloat>(q: i64, mut w: u64) -> BiasedFp {
8989
if lo <= 1
9090
&& q >= F::MIN_EXPONENT_ROUND_TO_EVEN as i64
9191
&& q <= F::MAX_EXPONENT_ROUND_TO_EVEN as i64
92-
&& mantissa & 3 == 1
92+
&& mantissa & 0b11 == 0b01
9393
&& (mantissa << (upperbit + 64 - F::MANTISSA_EXPLICIT_BITS as i32 - 3)) == hi
9494
{
9595
// Zero the lowest bit, so we don't round up.

library/core/src/num/dec2flt/slow.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ pub(crate) fn parse_long_mantissa<F: RawFloat>(s: &[u8]) -> BiasedFp {
8787
}
8888
// Shift the decimal to the hidden bit, and then round the value
8989
// to get the high mantissa+1 bits.
90-
d.left_shift(F::MANTISSA_EXPLICIT_BITS + 1);
90+
d.left_shift(F::MANTISSA_EXPLICIT_BITS as usize + 1);
9191
let mut mantissa = d.round();
9292
if mantissa >= (1_u64 << (F::MANTISSA_EXPLICIT_BITS + 1)) {
9393
// Rounding up overflowed to the carry bit, need to

src/etc/test-float-parse/src/traits.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -147,12 +147,12 @@ pub trait Float:
147147
}
148148

149149
macro_rules! impl_float {
150-
($($fty:ty, $ity:ty, $bits:literal);+) => {
150+
($($fty:ty, $ity:ty);+) => {
151151
$(
152152
impl Float for $fty {
153153
type Int = $ity;
154154
type SInt = <Self::Int as Int>::Signed;
155-
const BITS: u32 = $bits;
155+
const BITS: u32 = <$ity>::BITS;
156156
const MAN_BITS: u32 = Self::MANTISSA_DIGITS - 1;
157157
const MAN_MASK: Self::Int = (Self::Int::ONE << Self::MAN_BITS) - Self::Int::ONE;
158158
const SIGN_MASK: Self::Int = Self::Int::ONE << (Self::BITS-1);
@@ -168,7 +168,7 @@ macro_rules! impl_float {
168168
}
169169
}
170170

171-
impl_float!(f32, u32, 32; f64, u64, 64);
171+
impl_float!(f32, u32; f64, u64);
172172

173173
/// A test generator. Should provide an iterator that produces unique patterns to parse.
174174
///

0 commit comments

Comments
 (0)