1
1
//! Helper trait for generic float types.
2
2
3
+ use core:: f64;
4
+
3
5
use crate :: fmt:: { Debug , LowerExp } ;
4
6
use crate :: num:: FpCategory ;
5
- use crate :: ops:: { Add , Div , Mul , Neg } ;
7
+ use crate :: ops:: { self , Add , Div , Mul , Neg } ;
8
+
9
+ /// Lossy `as` casting between two types.
10
+ pub trait CastInto < T : Copy > : Copy {
11
+ fn cast ( self ) -> T ;
12
+ }
13
+
14
+ /// Collection of traits that allow us to be generic over integer size.
15
+ pub trait Integer :
16
+ Sized
17
+ + Clone
18
+ + Copy
19
+ + Debug
20
+ + ops:: Shr < u32 , Output = Self >
21
+ + ops:: Shl < u32 , Output = Self >
22
+ + ops:: BitAnd < Output = Self >
23
+ + ops:: BitOr < Output = Self >
24
+ + PartialEq
25
+ + CastInto < i16 >
26
+ {
27
+ const ZERO : Self ;
28
+ const ONE : Self ;
29
+ }
30
+
31
+ macro_rules! int {
32
+ ( $( $ty: ty) ,+) => {
33
+ $(
34
+ impl CastInto <i16 > for $ty {
35
+ fn cast( self ) -> i16 {
36
+ self as i16
37
+ }
38
+ }
39
+
40
+ impl Integer for $ty {
41
+ const ZERO : Self = 0 ;
42
+ const ONE : Self = 1 ;
43
+ }
44
+ ) +
45
+ }
46
+ }
47
+
48
+ int ! ( u32 , u64 ) ;
6
49
7
- /// A helper trait to avoid duplicating basically all the conversion code for `f32` and `f64` .
50
+ /// A helper trait to avoid duplicating basically all the conversion code for IEEE floats .
8
51
///
9
52
/// See the parent module's doc comment for why this is necessary.
10
53
///
11
- /// Should **never ever** be implemented for other types or be used outside the dec2flt module.
54
+ /// Should **never ever** be implemented for other types or be used outside the ` dec2flt` module.
12
55
#[ doc( hidden) ]
13
56
pub trait RawFloat :
14
57
Sized
@@ -24,62 +67,107 @@ pub trait RawFloat:
24
67
+ Copy
25
68
+ Debug
26
69
{
70
+ /// The unsigned integer with the same size as the float
71
+ type Int : Integer + Into < u64 > ;
72
+
73
+ /* general constants */
74
+
27
75
const INFINITY : Self ;
28
76
const NEG_INFINITY : Self ;
29
77
const NAN : Self ;
30
78
const NEG_NAN : Self ;
31
79
32
- /// The number of bits in the significand, *excluding* the hidden bit.
33
- const MANTISSA_EXPLICIT_BITS : usize ;
34
-
35
- // Round-to-even only happens for negative values of q
36
- // when q ≥ −4 in the 64-bit case and when q ≥ −17 in
37
- // the 32-bitcase.
38
- //
39
- // When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
40
- // have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
41
- // 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
42
- //
43
- // When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
44
- // so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
45
- // or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
46
- // (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
47
- // or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
48
- //
49
- // Thus we have that we only need to round ties to even when
50
- // we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
51
- // (in the 32-bit case). In both cases,the power of five(5^|q|)
52
- // fits in a 64-bit word.
53
- const MIN_EXPONENT_ROUND_TO_EVEN : i32 ;
54
- const MAX_EXPONENT_ROUND_TO_EVEN : i32 ;
80
+ /// Bit width of the float
81
+ const BITS : u32 ;
55
82
56
- // Minimum exponent that for a fast path case, or `-⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
57
- const MIN_EXPONENT_FAST_PATH : i64 ;
83
+ /// The number of bits in the significand, *including* the hidden bit.
84
+ const SIG_TOTAL_BITS : u32 ;
58
85
59
- // Maximum exponent that for a fast path case, or `⌊(MANTISSA_EXPLICIT_BITS+1)/log2(5)⌋`
60
- const MAX_EXPONENT_FAST_PATH : i64 ;
86
+ const EXP_MASK : Self :: Int ;
87
+ const SIG_MASK : Self :: Int ;
61
88
62
- // Maximum exponent that can be represented for a disguised-fast path case.
63
- // This is `MAX_EXPONENT_FAST_PATH + ⌊(MANTISSA_EXPLICIT_BITS+1)/log2(10)⌋`
64
- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 ;
65
-
66
- // Minimum exponent value `-(1 << (EXP_BITS - 1)) + 1`.
67
- const MINIMUM_EXPONENT : i32 ;
89
+ /// The number of bits in the significand, *excluding* the hidden bit.
90
+ const SIG_BITS : u32 = Self :: SIG_TOTAL_BITS - 1 ;
91
+
92
+ /// Number of bits in the exponent.
93
+ const EXP_BITS : u32 = Self :: BITS - Self :: SIG_BITS - 1 ;
94
+
95
+ /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
96
+ /// representation.
97
+ ///
98
+ /// This shifted fully right, use `EXP_MASK` for the shifted value.
99
+ const EXP_SAT : u32 = ( 1 << Self :: EXP_BITS ) - 1 ;
100
+
101
+ /// Signed version of `EXP_SAT` since we convert a lot.
102
+ const INFINITE_POWER : i32 = Self :: EXP_SAT as i32 ;
103
+
104
+ /// The exponent bias value. This is also the maximum value of the exponent.
105
+ const EXP_BIAS : u32 = Self :: EXP_SAT >> 1 ;
106
+
107
+ /// Minimum exponent value of normal values.
108
+ const EXP_MIN : i32 = -( Self :: EXP_BIAS as i32 - 1 ) ;
109
+
110
+ /// Round-to-even only happens for negative values of q
111
+ /// when q ≥ −4 in the 64-bit case and when q ≥ −17 in
112
+ /// the 32-bitcase.
113
+ ///
114
+ /// When q ≥ 0,we have that 5^q ≤ 2m+1. In the 64-bit case,we
115
+ /// have 5^q ≤ 2m+1 ≤ 2^54 or q ≤ 23. In the 32-bit case,we have
116
+ /// 5^q ≤ 2m+1 ≤ 2^25 or q ≤ 10.
117
+ ///
118
+ /// When q < 0, we have w ≥ (2m+1)×5^−q. We must have that w < 2^64
119
+ /// so (2m+1)×5^−q < 2^64. We have that 2m+1 > 2^53 (64-bit case)
120
+ /// or 2m+1 > 2^24 (32-bit case). Hence,we must have 2^53×5^−q < 2^64
121
+ /// (64-bit) and 2^24×5^−q < 2^64 (32-bit). Hence we have 5^−q < 2^11
122
+ /// or q ≥ −4 (64-bit case) and 5^−q < 2^40 or q ≥ −17 (32-bitcase).
123
+ ///
124
+ /// Thus we have that we only need to round ties to even when
125
+ /// we have that q ∈ [−4,23](in the 64-bit case) or q∈[−17,10]
126
+ /// (in the 32-bit case). In both cases,the power of five(5^|q|)
127
+ /// fits in a 64-bit word.
128
+ const MIN_EXPONENT_ROUND_TO_EVEN : i32 ;
129
+ const MAX_EXPONENT_ROUND_TO_EVEN : i32 ;
68
130
69
- // Largest exponent value `(1 << EXP_BITS) - 1`.
70
- const INFINITE_POWER : i32 ;
131
+ /* limits related to Fast pathing */
132
+
133
+ /// Largest decimal exponent for a non-infinite value.
134
+ ///
135
+ /// This is the max exponent in binary converted to the max exponent in decimal. Allows fast
136
+ /// pathing anything larger than `10^LARGEST_POWER_OF_TEN`, which will round to infinity.
137
+ const LARGEST_POWER_OF_TEN : i32 = {
138
+ let largest_pow2 = Self :: EXP_BIAS + 1 ;
139
+ pow2_to_pow10 ( largest_pow2 as i64 ) as i32
140
+ } ;
141
+
142
+ /// Smallest decimal exponent for a non-zero value. This allows for fast pathing anything
143
+ /// smaller than `10^SMALLEST_POWER_OF_TEN`, which will round to zero.
144
+ ///
145
+ /// The smallest power of ten is represented by `⌊log10(2^-n / (2^64 - 1))⌋`, where `n` is
146
+ /// the smallest power of two. The `2^64 - 1)` denomenator comes from the number of values
147
+ /// that are representable by the intermediate storage format. I don't actually know _why_
148
+ /// the storage format is relevant here.
149
+ ///
150
+ /// The values may be calculated using the formula. Unfortunately we cannot calculate them at
151
+ /// compile time since intermediates exceed the range of an `f64`.
152
+ const SMALLEST_POWER_OF_TEN : i32 ;
71
153
72
- // Index (in bits) of the sign.
73
- const SIGN_INDEX : usize ;
154
+ /// Maximum exponent for a fast path case, or `⌊(SIG_BITS+1)/log2(5)⌋`
155
+ // assuming FLT_EVAL_METHOD = 0
156
+ const MAX_EXPONENT_FAST_PATH : i64 = {
157
+ let log2_5 = f64:: consts:: LOG2_10 - 1.0 ;
158
+ ( Self :: SIG_TOTAL_BITS as f64 / log2_5) as i64
159
+ } ;
74
160
75
- // Smallest decimal exponent for a non-zero value.
76
- const SMALLEST_POWER_OF_TEN : i32 ;
161
+ /// Minimum exponent for a fast path case, or `-⌊(SIG_BITS+1)/log2(5)⌋`
162
+ const MIN_EXPONENT_FAST_PATH : i64 = - Self :: MAX_EXPONENT_FAST_PATH ;
77
163
78
- // Largest decimal exponent for a non-infinite value.
79
- const LARGEST_POWER_OF_TEN : i32 ;
164
+ /// Maximum exponent that can be represented for a disguised-fast path case.
165
+ /// This is `MAX_EXPONENT_FAST_PATH + ⌊(SIG_BITS+1)/log2(10)⌋`
166
+ const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 =
167
+ Self :: MAX_EXPONENT_FAST_PATH + ( Self :: SIG_TOTAL_BITS as f64 / f64:: consts:: LOG2_10 ) as i64 ;
80
168
81
- // Maximum mantissa for the fast-path (`1 << 53` for f64).
82
- const MAX_MANTISSA_FAST_PATH : u64 = 2_u64 << Self :: MANTISSA_EXPLICIT_BITS ;
169
+ /// Maximum mantissa for the fast-path (`1 << 53` for f64).
170
+ const MAX_MANTISSA_FAST_PATH : u64 = 1 << Self :: SIG_TOTAL_BITS ;
83
171
84
172
/// Converts integer into float through an as cast.
85
173
/// This is only called in the fast-path algorithm, and therefore
@@ -96,27 +184,51 @@ pub trait RawFloat:
96
184
/// Returns the category that this number falls into.
97
185
fn classify ( self ) -> FpCategory ;
98
186
187
+ /// Transmute to the integer representation
188
+ fn to_bits ( self ) -> Self :: Int ;
189
+
99
190
/// Returns the mantissa, exponent and sign as integers.
100
- fn integer_decode ( self ) -> ( u64 , i16 , i8 ) ;
191
+ ///
192
+ /// That is, this returns `(m, p, s)` such that `s * m * 2^p` represents the original float.
193
+ /// For 0, the exponent will be `-(EXP_BIAS + SIG_BITS`, which is the
194
+ /// minimum subnormal power.
195
+ fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
196
+ let bits = self . to_bits ( ) ;
197
+ let sign: i8 = if bits >> ( Self :: BITS - 1 ) == Self :: Int :: ZERO { 1 } else { -1 } ;
198
+ let mut exponent: i16 = ( ( bits & Self :: EXP_MASK ) >> Self :: SIG_BITS ) . cast ( ) ;
199
+ let mantissa = if exponent == 0 {
200
+ ( bits & Self :: SIG_MASK ) << 1
201
+ } else {
202
+ ( bits & Self :: SIG_MASK ) | ( Self :: Int :: ONE << Self :: SIG_BITS )
203
+ } ;
204
+ // Exponent bias + mantissa shift
205
+ exponent -= ( Self :: EXP_BIAS + Self :: SIG_BITS ) as i16 ;
206
+ ( mantissa. into ( ) , exponent, sign)
207
+ }
208
+ }
209
+
210
+ /// Solve for `b` in `10^b = 2^a`
211
+ const fn pow2_to_pow10 ( a : i64 ) -> i64 {
212
+ let res = ( a as f64 ) / f64:: consts:: LOG2_10 ;
213
+ res as i64
101
214
}
102
215
103
216
impl RawFloat for f32 {
217
+ type Int = u32 ;
218
+
104
219
const INFINITY : Self = f32:: INFINITY ;
105
220
const NEG_INFINITY : Self = f32:: NEG_INFINITY ;
106
221
const NAN : Self = f32:: NAN ;
107
222
const NEG_NAN : Self = -f32:: NAN ;
108
223
109
- const MANTISSA_EXPLICIT_BITS : usize = 23 ;
224
+ const BITS : u32 = 32 ;
225
+ const SIG_TOTAL_BITS : u32 = Self :: MANTISSA_DIGITS ;
226
+ const EXP_MASK : Self :: Int = Self :: EXP_MASK ;
227
+ const SIG_MASK : Self :: Int = Self :: MAN_MASK ;
228
+
110
229
const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -17 ;
111
230
const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 10 ;
112
- const MIN_EXPONENT_FAST_PATH : i64 = -10 ; // assuming FLT_EVAL_METHOD = 0
113
- const MAX_EXPONENT_FAST_PATH : i64 = 10 ;
114
- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 = 17 ;
115
- const MINIMUM_EXPONENT : i32 = -127 ;
116
- const INFINITE_POWER : i32 = 0xFF ;
117
- const SIGN_INDEX : usize = 31 ;
118
231
const SMALLEST_POWER_OF_TEN : i32 = -65 ;
119
- const LARGEST_POWER_OF_TEN : i32 = 38 ;
120
232
121
233
#[ inline]
122
234
fn from_u64 ( v : u64 ) -> Self {
@@ -136,16 +248,8 @@ impl RawFloat for f32 {
136
248
TABLE [ exponent & 15 ]
137
249
}
138
250
139
- /// Returns the mantissa, exponent and sign as integers.
140
- fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
141
- let bits = self . to_bits ( ) ;
142
- let sign: i8 = if bits >> 31 == 0 { 1 } else { -1 } ;
143
- let mut exponent: i16 = ( ( bits >> 23 ) & 0xff ) as i16 ;
144
- let mantissa =
145
- if exponent == 0 { ( bits & 0x7fffff ) << 1 } else { ( bits & 0x7fffff ) | 0x800000 } ;
146
- // Exponent bias + mantissa shift
147
- exponent -= 127 + 23 ;
148
- ( mantissa as u64 , exponent, sign)
251
+ fn to_bits ( self ) -> Self :: Int {
252
+ self . to_bits ( )
149
253
}
150
254
151
255
fn classify ( self ) -> FpCategory {
@@ -154,22 +258,21 @@ impl RawFloat for f32 {
154
258
}
155
259
156
260
impl RawFloat for f64 {
157
- const INFINITY : Self = f64:: INFINITY ;
158
- const NEG_INFINITY : Self = f64:: NEG_INFINITY ;
159
- const NAN : Self = f64:: NAN ;
160
- const NEG_NAN : Self = -f64:: NAN ;
261
+ type Int = u64 ;
262
+
263
+ const INFINITY : Self = Self :: INFINITY ;
264
+ const NEG_INFINITY : Self = Self :: NEG_INFINITY ;
265
+ const NAN : Self = Self :: NAN ;
266
+ const NEG_NAN : Self = -Self :: NAN ;
267
+
268
+ const BITS : u32 = 64 ;
269
+ const SIG_TOTAL_BITS : u32 = Self :: MANTISSA_DIGITS ;
270
+ const EXP_MASK : Self :: Int = Self :: EXP_MASK ;
271
+ const SIG_MASK : Self :: Int = Self :: MAN_MASK ;
161
272
162
- const MANTISSA_EXPLICIT_BITS : usize = 52 ;
163
273
const MIN_EXPONENT_ROUND_TO_EVEN : i32 = -4 ;
164
274
const MAX_EXPONENT_ROUND_TO_EVEN : i32 = 23 ;
165
- const MIN_EXPONENT_FAST_PATH : i64 = -22 ; // assuming FLT_EVAL_METHOD = 0
166
- const MAX_EXPONENT_FAST_PATH : i64 = 22 ;
167
- const MAX_EXPONENT_DISGUISED_FAST_PATH : i64 = 37 ;
168
- const MINIMUM_EXPONENT : i32 = -1023 ;
169
- const INFINITE_POWER : i32 = 0x7FF ;
170
- const SIGN_INDEX : usize = 63 ;
171
275
const SMALLEST_POWER_OF_TEN : i32 = -342 ;
172
- const LARGEST_POWER_OF_TEN : i32 = 308 ;
173
276
174
277
#[ inline]
175
278
fn from_u64 ( v : u64 ) -> Self {
@@ -190,19 +293,8 @@ impl RawFloat for f64 {
190
293
TABLE [ exponent & 31 ]
191
294
}
192
295
193
- /// Returns the mantissa, exponent and sign as integers.
194
- fn integer_decode ( self ) -> ( u64 , i16 , i8 ) {
195
- let bits = self . to_bits ( ) ;
196
- let sign: i8 = if bits >> 63 == 0 { 1 } else { -1 } ;
197
- let mut exponent: i16 = ( ( bits >> 52 ) & 0x7ff ) as i16 ;
198
- let mantissa = if exponent == 0 {
199
- ( bits & 0xfffffffffffff ) << 1
200
- } else {
201
- ( bits & 0xfffffffffffff ) | 0x10000000000000
202
- } ;
203
- // Exponent bias + mantissa shift
204
- exponent -= 1023 + 52 ;
205
- ( mantissa, exponent, sign)
296
+ fn to_bits ( self ) -> Self :: Int {
297
+ self . to_bits ( )
206
298
}
207
299
208
300
fn classify ( self ) -> FpCategory {
0 commit comments