Skip to content

Commit 19a909a

Browse files
committed
dec2flt: Refactor float traits
A lot of the magic constants can be turned into expressions. This reduces some code duplication. Additionally, add traits to make these operations fully generic. This will make it easier to support `f16` and `f128`.
1 parent 6c34daf commit 19a909a

File tree

8 files changed

+267
-114
lines changed

8 files changed

+267
-114
lines changed

library/core/src/num/dec2flt/float.rs

+182-90
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,57 @@
11
//! Helper trait for generic float types.
22
3+
use core::f64;
4+
35
use crate::fmt::{Debug, LowerExp};
46
use crate::num::FpCategory;
5-
use crate::ops::{Add, Div, Mul, Neg};
7+
use crate::ops::{self, Add, Div, Mul, Neg};
8+
9+
/// Lossy `as` casting between two types.
10+
pub trait CastInto<T: Copy>: Copy {
11+
fn cast(self) -> T;
12+
}
13+
14+
/// Collection of traits that allow us to be generic over integer size.
15+
pub trait Integer:
16+
Sized
17+
+ Clone
18+
+ Copy
19+
+ Debug
20+
+ ops::Shr<u32, Output = Self>
21+
+ ops::Shl<u32, Output = Self>
22+
+ ops::BitAnd<Output = Self>
23+
+ ops::BitOr<Output = Self>
24+
+ PartialEq
25+
+ CastInto<i16>
26+
{
27+
const ZERO: Self;
28+
const ONE: Self;
29+
}
30+
31+
macro_rules! int {
32+
($($ty:ty),+) => {
33+
$(
34+
impl CastInto<i16> for $ty {
35+
fn cast(self) -> i16 {
36+
self as i16
37+
}
38+
}
39+
40+
impl Integer for $ty {
41+
const ZERO: Self = 0;
42+
const ONE: Self = 1;
43+
}
44+
)+
45+
}
46+
}
47+
48+
int!(u32, u64);
649

7-
/// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64`.
50+
/// A helper trait to avoid duplicating basically all the conversion code for IEEE floats.
851
///
952
/// See the parent module's doc comment for why this is necessary.
1053
///
11-
/// Should **never ever** be implemented for other types or be used outside the dec2flt module.
54+
/// Should **never ever** be implemented for other types or be used outside the `dec2flt` module.
1255
#[doc(hidden)]
1356
pub trait RawFloat:
1457
Sized
@@ -24,62 +67,107 @@ pub trait RawFloat:
2467
+ Copy
2568
+ Debug
2669
{
70+
/// The unsigned integer with the same size as the float
71+
type Int: Integer + Into<u64>;
72+
73+
/* general constants */
74+
2775
const INFINITY: Self;
2876
const NEG_INFINITY: Self;
2977
const NAN: Self;
3078
const NEG_NAN: Self;
3179

32-
/// The number of bits in the significand, *excluding* the hidden bit.
33-
const MANTISSA_EXPLICIT_BITS: usize;
34-
35-
// Round-to-even only happens for negative values of q
36-
// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37-
// the 32-bitcase.
38-
//
39-
// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40-
// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41-
// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42-
//
43-
// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44-
// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45-
// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46-
// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47-
// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48-
//
49-
// Thus we have that we only need to round ties to even when
50-
// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51-
// (in the 32-bit case). In both cases,the power of five(5^|q|)
52-
// fits in a 64-bit word.
53-
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
54-
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
80+
/// Bit width of the float
81+
const BITS: u32;
5582

56-
// Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57-
const MIN_EXPONENT_FAST_PATH: i64;
83+
/// The number of bits in the significand, *including* the hidden bit.
84+
const SIG_TOTAL_BITS: u32;
5885

59-
// Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60-
const MAX_EXPONENT_FAST_PATH: i64;
86+
const EXP_MASK: Self::Int;
87+
const SIG_MASK: Self::Int;
6188

62-
// Maximum exponent that can be represented for a disguised-fast path case.
63-
// This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64;
65-
66-
// Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67-
const MINIMUM_EXPONENT: i32;
89+
/// The number of bits in the significand, *excluding* the hidden bit.
90+
const SIG_BITS: u32 = Self::SIG_TOTAL_BITS - 1;
91+
92+
/// Number of bits in the exponent.
93+
const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
94+
95+
/// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
96+
/// representation.
97+
///
98+
/// This shifted fully right, use `EXP_MASK` for the shifted value.
99+
const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
100+
101+
/// Signed version of `EXP_SAT` since we convert a lot.
102+
const INFINITE_POWER: i32 = Self::EXP_SAT as i32;
103+
104+
/// The exponent bias value. This is also the maximum value of the exponent.
105+
const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
106+
107+
/// Minimum exponent value of normal values.
108+
const EXP_MIN: i32 = -(Self::EXP_BIAS as i32 - 1);
109+
110+
/// Round-to-even only happens for negative values of q
111+
/// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
112+
/// the 32-bitcase.
113+
///
114+
/// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
115+
/// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
116+
/// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
117+
///
118+
/// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
119+
/// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
120+
/// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
121+
/// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
122+
/// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
123+
///
124+
/// Thus we have that we only need to round ties to even when
125+
/// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
126+
/// (in the 32-bit case). In both cases,the power of five(5^|q|)
127+
/// fits in a 64-bit word.
128+
const MIN_EXPONENT_ROUND_TO_EVEN: i32;
129+
const MAX_EXPONENT_ROUND_TO_EVEN: i32;
68130

69-
// Largest exponent value `(1 << EXP_BITS) - 1`.
70-
const INFINITE_POWER: i32;
131+
/* limits related to Fast pathing */
132+
133+
/// Largest decimal exponent for a non-infinite value.
134+
///
135+
/// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
136+
/// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
137+
const LARGEST_POWER_OF_TEN: i32 = {
138+
let largest_pow2 = Self::EXP_BIAS + 1;
139+
pow2_to_pow10(largest_pow2 as i64) as i32
140+
};
141+
142+
/// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143+
/// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
144+
///
145+
/// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
146+
/// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values
147+
/// that are representable by the intermediate storage format. I don't actually know _why_
148+
/// the storage format is relevant here.
149+
///
150+
/// The values may be calculated using the formula. Unfortunately we cannot calculate them at
151+
/// compile time since intermediates exceed the range of an `f64`.
152+
const SMALLEST_POWER_OF_TEN: i32;
71153

72-
// Index (in bits) of the sign.
73-
const SIGN_INDEX: usize;
154+
/// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋`
155+
// assuming FLT_EVAL_METHOD = 0
156+
const MAX_EXPONENT_FAST_PATH: i64 = {
157+
let log2_5 = f64::consts::LOG2_10 - 1.0;
158+
(Self::SIG_TOTAL_BITS as f64 / log2_5) as i64
159+
};
74160

75-
// Smallest decimal exponent for a non-zero value.
76-
const SMALLEST_POWER_OF_TEN: i32;
161+
/// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋`
162+
const MIN_EXPONENT_FAST_PATH: i64 = -Self::MAX_EXPONENT_FAST_PATH;
77163

78-
// Largest decimal exponent for a non-infinite value.
79-
const LARGEST_POWER_OF_TEN: i32;
164+
/// Maximum exponent that can be represented for a disguised-fast path case.
165+
/// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋`
166+
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 =
167+
Self::MAX_EXPONENT_FAST_PATH + (Self::SIG_TOTAL_BITS as f64 / f64::consts::LOG2_10) as i64;
80168

81-
// Maximum mantissa for the fast-path (`1 << 53` for f64).
82-
const MAX_MANTISSA_FAST_PATH: u64 = 2_u64 << Self::MANTISSA_EXPLICIT_BITS;
169+
/// Maximum mantissa for the fast-path (`1 << 53` for f64).
170+
const MAX_MANTISSA_FAST_PATH: u64 = 1 << Self::SIG_TOTAL_BITS;
83171

84172
/// Converts integer into float through an as cast.
85173
/// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +184,51 @@ pub trait RawFloat:
96184
/// Returns the category that this number falls into.
97185
fn classify(self) -> FpCategory;
98186

187+
/// Transmute to the integer representation
188+
fn to_bits(self) -> Self::Int;
189+
99190
/// Returns the mantissa, exponent and sign as integers.
100-
fn integer_decode(self) -> (u64, i16, i8);
191+
///
192+
/// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
193+
/// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
194+
/// minimum subnormal power.
195+
fn integer_decode(self) -> (u64, i16, i8) {
196+
let bits = self.to_bits();
197+
let sign: i8 = if bits >> (Self::BITS - 1) == Self::Int::ZERO { 1 } else { -1 };
198+
let mut exponent: i16 = ((bits & Self::EXP_MASK) >> Self::SIG_BITS).cast();
199+
let mantissa = if exponent == 0 {
200+
(bits & Self::SIG_MASK) << 1
201+
} else {
202+
(bits & Self::SIG_MASK) | (Self::Int::ONE << Self::SIG_BITS)
203+
};
204+
// Exponent bias + mantissa shift
205+
exponent -= (Self::EXP_BIAS + Self::SIG_BITS) as i16;
206+
(mantissa.into(), exponent, sign)
207+
}
208+
}
209+
210+
/// Solve for `b` in `10^b = 2^a`
211+
const fn pow2_to_pow10(a: i64) -> i64 {
212+
let res = (a as f64) / f64::consts::LOG2_10;
213+
res as i64
101214
}
102215

103216
impl RawFloat for f32 {
217+
type Int = u32;
218+
104219
const INFINITY: Self = f32::INFINITY;
105220
const NEG_INFINITY: Self = f32::NEG_INFINITY;
106221
const NAN: Self = f32::NAN;
107222
const NEG_NAN: Self = -f32::NAN;
108223

109-
const MANTISSA_EXPLICIT_BITS: usize = 23;
224+
const BITS: u32 = 32;
225+
const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
226+
const EXP_MASK: Self::Int = Self::EXP_MASK;
227+
const SIG_MASK: Self::Int = Self::MAN_MASK;
228+
110229
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -17;
111230
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 10;
112-
const MIN_EXPONENT_FAST_PATH: i64 = -10; // assuming FLT_EVAL_METHOD = 0
113-
const MAX_EXPONENT_FAST_PATH: i64 = 10;
114-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 17;
115-
const MINIMUM_EXPONENT: i32 = -127;
116-
const INFINITE_POWER: i32 = 0xFF;
117-
const SIGN_INDEX: usize = 31;
118231
const SMALLEST_POWER_OF_TEN: i32 = -65;
119-
const LARGEST_POWER_OF_TEN: i32 = 38;
120232

121233
#[inline]
122234
fn from_u64(v: u64) -> Self {
@@ -136,16 +248,8 @@ impl RawFloat for f32 {
136248
TABLE[exponent & 15]
137249
}
138250

139-
/// Returns the mantissa, exponent and sign as integers.
140-
fn integer_decode(self) -> (u64, i16, i8) {
141-
let bits = self.to_bits();
142-
let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 };
143-
let mut exponent: i16 = ((bits >> 23) & 0xff) as i16;
144-
let mantissa =
145-
if exponent == 0 { (bits & 0x7fffff) << 1 } else { (bits & 0x7fffff) | 0x800000 };
146-
// Exponent bias + mantissa shift
147-
exponent -= 127 + 23;
148-
(mantissa as u64, exponent, sign)
251+
fn to_bits(self) -> Self::Int {
252+
self.to_bits()
149253
}
150254

151255
fn classify(self) -> FpCategory {
@@ -154,22 +258,21 @@ impl RawFloat for f32 {
154258
}
155259

156260
impl RawFloat for f64 {
157-
const INFINITY: Self = f64::INFINITY;
158-
const NEG_INFINITY: Self = f64::NEG_INFINITY;
159-
const NAN: Self = f64::NAN;
160-
const NEG_NAN: Self = -f64::NAN;
261+
type Int = u64;
262+
263+
const INFINITY: Self = Self::INFINITY;
264+
const NEG_INFINITY: Self = Self::NEG_INFINITY;
265+
const NAN: Self = Self::NAN;
266+
const NEG_NAN: Self = -Self::NAN;
267+
268+
const BITS: u32 = 64;
269+
const SIG_TOTAL_BITS: u32 = Self::MANTISSA_DIGITS;
270+
const EXP_MASK: Self::Int = Self::EXP_MASK;
271+
const SIG_MASK: Self::Int = Self::MAN_MASK;
161272

162-
const MANTISSA_EXPLICIT_BITS: usize = 52;
163273
const MIN_EXPONENT_ROUND_TO_EVEN: i32 = -4;
164274
const MAX_EXPONENT_ROUND_TO_EVEN: i32 = 23;
165-
const MIN_EXPONENT_FAST_PATH: i64 = -22; // assuming FLT_EVAL_METHOD = 0
166-
const MAX_EXPONENT_FAST_PATH: i64 = 22;
167-
const MAX_EXPONENT_DISGUISED_FAST_PATH: i64 = 37;
168-
const MINIMUM_EXPONENT: i32 = -1023;
169-
const INFINITE_POWER: i32 = 0x7FF;
170-
const SIGN_INDEX: usize = 63;
171275
const SMALLEST_POWER_OF_TEN: i32 = -342;
172-
const LARGEST_POWER_OF_TEN: i32 = 308;
173276

174277
#[inline]
175278
fn from_u64(v: u64) -> Self {
@@ -190,19 +293,8 @@ impl RawFloat for f64 {
190293
TABLE[exponent & 31]
191294
}
192295

193-
/// Returns the mantissa, exponent and sign as integers.
194-
fn integer_decode(self) -> (u64, i16, i8) {
195-
let bits = self.to_bits();
196-
let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 };
197-
let mut exponent: i16 = ((bits >> 52) & 0x7ff) as i16;
198-
let mantissa = if exponent == 0 {
199-
(bits & 0xfffffffffffff) << 1
200-
} else {
201-
(bits & 0xfffffffffffff) | 0x10000000000000
202-
};
203-
// Exponent bias + mantissa shift
204-
exponent -= 1023 + 52;
205-
(mantissa, exponent, sign)
296+
fn to_bits(self) -> Self::Int {
297+
self.to_bits()
206298
}
207299

208300
fn classify(self) -> FpCategory {

0 commit comments

Comments
 (0)