9
9
#ifndef LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
10
10
#define LLVM_LIBC_SRC___SUPPORT_FPUTIL_GENERIC_DIV_H
11
11
12
- #include " hdr/errno_macros.h"
13
12
#include " hdr/fenv_macros.h"
14
13
#include " src/__support/CPP/bit.h"
15
14
#include " src/__support/CPP/type_traits.h"
16
15
#include " src/__support/FPUtil/BasicOperations.h"
17
16
#include " src/__support/FPUtil/FEnvImpl.h"
18
17
#include " src/__support/FPUtil/FPBits.h"
19
18
#include " src/__support/FPUtil/dyadic_float.h"
20
- #include " src/__support/FPUtil/rounding_mode.h"
21
19
#include " src/__support/macros/attributes.h"
22
20
#include " src/__support/macros/optimization.h"
23
21
@@ -35,14 +33,6 @@ div(InType x, InType y) {
35
33
using InStorageType = typename InFPBits::StorageType;
36
34
using DyadicFloat =
37
35
DyadicFloat<cpp::bit_ceil (static_cast <size_t >(InFPBits::FRACTION_LEN))>;
38
- using DyadicMantissaType = typename DyadicFloat::MantissaType;
39
-
40
- // +1 for the implicit bit.
41
- constexpr int DYADIC_EXTRA_MANTISSA_LEN =
42
- DyadicMantissaType::BITS - (InFPBits::FRACTION_LEN + 1 );
43
- // +1 for the extra fractional bit in q.
44
- constexpr int Q_EXTRA_FRACTION_LEN =
45
- InFPBits::FRACTION_LEN + 1 - OutFPBits::FRACTION_LEN;
46
36
47
37
InFPBits x_bits (x);
48
38
InFPBits y_bits (y);
@@ -104,120 +94,33 @@ div(InType x, InType y) {
104
94
DyadicFloat xd (x);
105
95
DyadicFloat yd (y);
106
96
107
- bool would_q_be_subnormal = xd.mantissa < yd.mantissa ;
108
- int q_exponent = xd.get_unbiased_exponent () - yd.get_unbiased_exponent () -
109
- would_q_be_subnormal;
110
-
111
- if (q_exponent + OutFPBits::EXP_BIAS >= OutFPBits::MAX_BIASED_EXPONENT) {
112
- set_errno_if_required (ERANGE);
113
- raise_except_if_required (FE_OVERFLOW | FE_INEXACT);
97
+ // Number of iterations = full output precision + 1 rounding bit + 1 potential
98
+ // leading 0.
99
+ constexpr size_t NUM_ITERS = OutFPBits::FRACTION_LEN + 3 ;
100
+ int result_exp = xd.exponent - yd.exponent - (NUM_ITERS - 1 );
114
101
115
- switch (get_round ()) {
116
- case FE_TONEAREST:
117
- return OutFPBits::inf (result_sign).get_val ();
118
- case FE_DOWNWARD:
119
- if (result_sign.is_pos ())
120
- return OutFPBits::max_normal (result_sign).get_val ();
121
- return OutFPBits::inf (result_sign).get_val ();
122
- case FE_UPWARD:
123
- if (result_sign.is_pos ())
124
- return OutFPBits::inf (result_sign).get_val ();
125
- return OutFPBits::max_normal (result_sign).get_val ();
126
- default :
127
- return OutFPBits::max_normal (result_sign).get_val ();
128
- }
129
- }
102
+ InStorageType q = 0 ;
103
+ InStorageType r = static_cast <InStorageType>(xd.mantissa >> 2 );
104
+ InStorageType yd_mant_in = static_cast <InStorageType>(yd.mantissa >> 1 );
130
105
131
- if (q_exponent < -OutFPBits::EXP_BIAS - OutFPBits::FRACTION_LEN) {
132
- set_errno_if_required (ERANGE);
133
- raise_except_if_required (FE_UNDERFLOW | FE_INEXACT);
134
-
135
- switch (quick_get_round ()) {
136
- case FE_DOWNWARD:
137
- if (result_sign.is_pos ())
138
- return OutFPBits::zero (result_sign).get_val ();
139
- return OutFPBits::min_subnormal (result_sign).get_val ();
140
- case FE_UPWARD:
141
- if (result_sign.is_pos ())
142
- return OutFPBits::min_subnormal (result_sign).get_val ();
143
- return OutFPBits::zero (result_sign).get_val ();
144
- default :
145
- return OutFPBits::zero (result_sign).get_val ();
146
- }
147
- }
148
-
149
- InStorageType q = 1 ;
150
- InStorageType xd_mant_in = static_cast <InStorageType>(
151
- xd.mantissa >> (DYADIC_EXTRA_MANTISSA_LEN - would_q_be_subnormal));
152
- InStorageType yd_mant_in =
153
- static_cast <InStorageType>(yd.mantissa >> DYADIC_EXTRA_MANTISSA_LEN);
154
- InStorageType r = xd_mant_in - yd_mant_in;
155
-
156
- for (size_t i = 0 ; i < InFPBits::FRACTION_LEN + 1 ; i++) {
106
+ for (size_t i = 0 ; i < NUM_ITERS; ++i) {
157
107
q <<= 1 ;
158
- InStorageType t = r << 1 ;
159
- if (t < yd_mant_in) {
160
- r = t;
161
- } else {
108
+ r <<= 1 ;
109
+ if (r >= yd_mant_in) {
162
110
q += 1 ;
163
- r = t - yd_mant_in;
111
+ r -= yd_mant_in;
164
112
}
165
113
}
166
114
167
- bool round ;
168
- bool sticky;
169
- OutStorageType result;
170
-
171
- if (q_exponent > -OutFPBits::EXP_BIAS) {
172
- // Result is normal.
173
-
174
- InStorageType round_mask = InStorageType (1 ) << (Q_EXTRA_FRACTION_LEN - 1 );
175
- round = (q & round_mask) != 0 ;
176
- InStorageType sticky_mask = round_mask - 1 ;
177
- sticky = (q & sticky_mask) != 0 ;
178
-
179
- result = OutFPBits::create_value (
180
- result_sign,
181
- static_cast <OutStorageType>(q_exponent + OutFPBits::EXP_BIAS),
182
- static_cast <OutStorageType>(q >> Q_EXTRA_FRACTION_LEN))
183
- .uintval ();
184
-
185
- } else {
186
- // Result is subnormal.
115
+ DyadicFloat result (result_sign, result_exp, q);
116
+ result.mantissa += r != 0 ;
187
117
188
- // +1 because the leading bit is now part of the fraction.
189
- int extra_fraction_len =
190
- Q_EXTRA_FRACTION_LEN + 1 - q_exponent - OutFPBits::EXP_BIAS;
118
+ OutType output = static_cast <OutType>(result);
191
119
192
- InStorageType round_mask = InStorageType (1 ) << (extra_fraction_len - 1 );
193
- round = (q & round_mask) != 0 ;
194
- InStorageType sticky_mask = round_mask - 1 ;
195
- sticky = (q & sticky_mask) != 0 ;
196
-
197
- result = OutFPBits::create_value (
198
- result_sign, 0 ,
199
- static_cast <OutStorageType>(q >> extra_fraction_len))
200
- .uintval ();
201
- }
202
-
203
- if (round || sticky)
204
- raise_except_if_required (FE_INEXACT);
205
-
206
- bool lsb = (result & 1 ) != 0 ;
207
-
208
- switch (quick_get_round ()) {
209
- case FE_TONEAREST:
210
- if (round && (lsb || sticky))
211
- ++result;
212
- break ;
213
- case FE_UPWARD:
214
- ++result;
215
- break ;
216
- default :
217
- break ;
218
- }
120
+ if (test_except (FE_OVERFLOW | FE_UNDERFLOW) != 0 )
121
+ set_errno_if_required (ERANGE);
219
122
220
- return cpp::bit_cast<OutType>(result) ;
123
+ return output ;
221
124
}
222
125
223
126
} // namespace LIBC_NAMESPACE::fputil::generic
0 commit comments