36
36
#include "private.h" /* For _bc_rm_leading_zeros() */
37
37
#include "zend_alloc.h"
38
38
39
- /* Recursive vs non-recursive multiply crossover ranges. */
40
- #if defined(MULDIGITS )
41
- #include "muldigits.h"
39
+
40
+ #if SIZEOF_SIZE_T >= 8
41
+ # define BC_MUL_UINT_DIGITS 8
42
+ # define BC_MUL_UINT_OVERFLOW 100000000
42
43
#else
43
- #define MUL_BASE_DIGITS 80
44
+ # define BC_MUL_UINT_DIGITS 4
45
+ # define BC_MUL_UINT_OVERFLOW 10000
44
46
#endif
45
47
46
- int mul_base_digits = MUL_BASE_DIGITS ;
47
- #define MUL_SMALL_DIGITS mul_base_digits/4
48
48
49
49
/* Multiply utility routines */
50
50
51
- static bc_num new_sub_num (size_t length , size_t scale , char * value )
52
- {
53
- bc_num temp = (bc_num ) emalloc (sizeof (bc_struct ));
54
-
55
- temp -> n_sign = PLUS ;
56
- temp -> n_len = length ;
57
- temp -> n_scale = scale ;
58
- temp -> n_refs = 1 ;
59
- temp -> n_value = value ;
60
- return temp ;
61
- }
62
-
63
- static void _bc_simp_mul (bc_num n1 , size_t n1len , bc_num n2 , int n2len , bc_num * prod )
51
+ /*
52
+ * Converts BCD to uint, going backwards from pointer n by the number of
53
+ * characters specified by len.
54
+ */
55
+ static inline BC_UINT_T bc_partial_convert_to_uint (const char * n , size_t len )
64
56
{
65
- char * n1ptr , * n2ptr , * pvptr ;
66
- char * n1end , * n2end ; /* To the end of n1 and n2. */
67
- int sum = 0 ;
57
+ BC_UINT_T num = 0 ;
58
+ BC_UINT_T base = 1 ;
68
59
69
- int prodlen = n1len + n2len + 1 ;
60
+ for (size_t i = 0 ; i < len ; i ++ ) {
61
+ num += * n * base ;
62
+ base *= BASE ;
63
+ n -- ;
64
+ }
70
65
71
- * prod = bc_new_num_nonzeroed (prodlen , 0 );
66
+ return num ;
67
+ }
72
68
73
- n1end = (char * ) (n1 -> n_value + n1len - 1 );
74
- n2end = (char * ) (n2 -> n_value + n2len - 1 );
75
- pvptr = (char * ) ((* prod )-> n_value + prodlen - 1 );
76
-
77
- /* Here is the loop... */
78
- for (int index = 0 ; index < prodlen - 1 ; index ++ ) {
79
- n1ptr = (char * ) (n1end - MAX (0 , index - n2len + 1 ));
80
- n2ptr = (char * ) (n2end - MIN (index , n2len - 1 ));
81
- while ((n1ptr >= n1 -> n_value ) && (n2ptr <= n2end )) {
82
- sum += * n1ptr * * n2ptr ;
83
- n1ptr -- ;
84
- n2ptr ++ ;
85
- }
86
- * pvptr -- = sum % BASE ;
87
- sum = sum / BASE ;
69
+ static inline void bc_convert_to_uint (BC_UINT_T * n_uint , const char * nend , size_t nlen )
70
+ {
71
+ size_t i = 0 ;
72
+ while (nlen > 0 ) {
73
+ size_t len = MIN (BC_MUL_UINT_DIGITS , nlen );
74
+ n_uint [i ] = bc_partial_convert_to_uint (nend , len );
75
+ nend -= len ;
76
+ nlen -= len ;
77
+ i ++ ;
88
78
}
89
- * pvptr = sum ;
90
79
}
91
80
92
-
93
- /* A special adder/subtractor for the recursive divide and conquer
94
- multiply algorithm. Note: if sub is called, accum must
95
- be larger that what is being subtracted. Also, accum and val
96
- must have n_scale = 0. (e.g. they must look like integers. *) */
97
- static void _bc_shift_addsub (bc_num accum , bc_num val , int shift , bool sub )
81
+ /*
82
+ * If the n_values of n1 and n2 are both 4 (32-bit) or 8 (64-bit) digits or less,
83
+ * the calculation will be performed at high speed without using an array.
84
+ */
85
+ static inline void bc_fast_mul (bc_num n1 , size_t n1len , bc_num n2 , int n2len , bc_num * prod )
98
86
{
99
- signed char * accp , * valp ;
100
- unsigned int carry = 0 ;
101
- size_t count = val -> n_len ;
87
+ const char * n1end = n1 -> n_value + n1len - 1 ;
88
+ const char * n2end = n2 -> n_value + n2len - 1 ;
102
89
103
- if (val -> n_value [0 ] == 0 ) {
104
- count -- ;
105
- }
106
- assert (accum -> n_len + accum -> n_scale >= shift + count );
107
-
108
- /* Set up pointers and others */
109
- accp = (signed char * ) (accum -> n_value + accum -> n_len + accum -> n_scale - shift - 1 );
110
- valp = (signed char * ) (val -> n_value + val -> n_len - 1 );
111
-
112
- if (sub ) {
113
- /* Subtraction, carry is really borrow. */
114
- while (count -- ) {
115
- * accp -= * valp -- + carry ;
116
- if (* accp < 0 ) {
117
- carry = 1 ;
118
- * accp -- += BASE ;
119
- } else {
120
- carry = 0 ;
121
- accp -- ;
122
- }
123
- }
124
- while (carry ) {
125
- * accp -= carry ;
126
- if (* accp < 0 ) {
127
- * accp -- += BASE ;
128
- } else {
129
- carry = 0 ;
130
- }
131
- }
132
- } else {
133
- /* Addition */
134
- while (count -- ) {
135
- * accp += * valp -- + carry ;
136
- if (* accp > (BASE - 1 )) {
137
- carry = 1 ;
138
- * accp -- -= BASE ;
139
- } else {
140
- carry = 0 ;
141
- accp -- ;
142
- }
143
- }
144
- while (carry ) {
145
- * accp += carry ;
146
- if (* accp > (BASE - 1 )) {
147
- * accp -- -= BASE ;
148
- } else {
149
- carry = 0 ;
150
- }
151
- }
90
+ BC_UINT_T n1_uint = bc_partial_convert_to_uint (n1end , n1len );
91
+ BC_UINT_T n2_uint = bc_partial_convert_to_uint (n2end , n2len );
92
+ BC_UINT_T prod_uint = n1_uint * n2_uint ;
93
+
94
+ size_t prodlen = n1len + n2len ;
95
+ * prod = bc_new_num_nonzeroed (prodlen , 0 );
96
+ char * pptr = (* prod )-> n_value ;
97
+ char * pend = pptr + prodlen - 1 ;
98
+
99
+ while (pend >= pptr ) {
100
+ * pend -- = prod_uint % BASE ;
101
+ prod_uint /= BASE ;
152
102
}
153
103
}
154
104
155
- /* Recursive divide and conquer multiply algorithm.
156
- Based on
157
- Let u = u0 + u1*(b^n)
158
- Let v = v0 + v1*(b^n)
159
- Then uv = (B^2n+B^n)*u1*v1 + B^n*(u1-u0)*(v0-v1) + (B^n+1)*u0*v0
160
-
161
- B is the base of storage, number of digits in u1,u0 close to equal .
162
- */
163
- static void _bc_rec_mul (bc_num u , size_t ulen , bc_num v , size_t vlen , bc_num * prod )
105
+ /*
106
+ * Converts the BCD of bc_num by 4 (32 bits) or 8 (64 bits) digits to an array of BC_UINT_Ts.
107
+ * The array is generated starting with the smaller digits.
108
+ * e.g. 12345678901234567890 => {34567890, 56789012, 1234}
109
+ *
110
+ * Multiply and add these groups of numbers to perform multiplication fast.
111
+ * How much to shift the digits when adding values can be calculated from the index of the array .
112
+ */
113
+ static void bc_standard_mul (bc_num n1 , size_t n1len , bc_num n2 , size_t n2len , bc_num * prod )
164
114
{
165
- bc_num u0 , u1 , v0 , v1 ;
166
- bc_num m1 , m2 , m3 ;
167
- size_t n ;
168
- bool m1zero ;
169
-
170
- /* Base case? */
171
- if ((ulen + vlen ) < mul_base_digits
172
- || ulen < MUL_SMALL_DIGITS
173
- || vlen < MUL_SMALL_DIGITS
174
- ) {
175
- _bc_simp_mul (u , ulen , v , vlen , prod );
176
- return ;
115
+ size_t i ;
116
+ const char * n1end = n1 -> n_value + n1len - 1 ;
117
+ const char * n2end = n2 -> n_value + n2len - 1 ;
118
+ size_t prodlen = n1len + n2len ;
119
+
120
+ size_t n1_arr_size = (n1len + BC_MUL_UINT_DIGITS - 1 ) / BC_MUL_UINT_DIGITS ;
121
+ size_t n2_arr_size = (n2len + BC_MUL_UINT_DIGITS - 1 ) / BC_MUL_UINT_DIGITS ;
122
+ size_t prod_arr_size = n1_arr_size + n2_arr_size - 1 ;
123
+
124
+ /*
125
+ * let's say that N is the max of n1len and n2len (and a multiple of BC_MUL_UINT_DIGITS for simplicity),
126
+ * then this sum is <= N/BC_MUL_UINT_DIGITS + N/BC_MUL_UINT_DIGITS + N/BC_MUL_UINT_DIGITS + N/BC_MUL_UINT_DIGITS - 1
127
+ * which is equal to N - 1 if BC_MUL_UINT_DIGITS is 4, and N/2 - 1 if BC_MUL_UINT_DIGITS is 8.
128
+ */
129
+ BC_UINT_T * buf = safe_emalloc (n1_arr_size + n2_arr_size + prod_arr_size , sizeof (BC_UINT_T ), 0 );
130
+
131
+ BC_UINT_T * n1_uint = buf ;
132
+ BC_UINT_T * n2_uint = buf + n1_arr_size ;
133
+ BC_UINT_T * prod_uint = n2_uint + n2_arr_size ;
134
+
135
+ for (i = 0 ; i < prod_arr_size ; i ++ ) {
136
+ prod_uint [i ] = 0 ;
177
137
}
178
138
179
- /* Calculate n -- the u and v split point in digits. */
180
- n = (MAX (ulen , vlen ) + 1 ) / 2 ;
139
+ /* Convert to uint[] */
140
+ bc_convert_to_uint (n1_uint , n1end , n1len );
141
+ bc_convert_to_uint (n2_uint , n2end , n2len );
181
142
182
- /* Split u and v. */
183
- if (ulen < n ) {
184
- u1 = bc_copy_num (BCG (_zero_ ));
185
- u0 = new_sub_num (ulen , 0 , u -> n_value );
186
- } else {
187
- u1 = new_sub_num (ulen - n , 0 , u -> n_value );
188
- u0 = new_sub_num (n , 0 , u -> n_value + ulen - n );
189
- }
190
- if (vlen < n ) {
191
- v1 = bc_copy_num (BCG (_zero_ ));
192
- v0 = new_sub_num (vlen , 0 , v -> n_value );
193
- } else {
194
- v1 = new_sub_num (vlen - n , 0 , v -> n_value );
195
- v0 = new_sub_num (n , 0 , v -> n_value + vlen - n );
143
+ /* Multiplication and addition */
144
+ for (i = 0 ; i < n1_arr_size ; i ++ ) {
145
+ for (size_t j = 0 ; j < n2_arr_size ; j ++ ) {
146
+ prod_uint [i + j ] += n1_uint [i ] * n2_uint [j ];
147
+ }
196
148
}
197
- _bc_rm_leading_zeros (u1 );
198
- _bc_rm_leading_zeros (u0 );
199
- _bc_rm_leading_zeros (v1 );
200
- _bc_rm_leading_zeros (v0 );
201
-
202
- m1zero = bc_is_zero (u1 ) || bc_is_zero (v1 );
203
-
204
- /* Calculate sub results ... */
205
-
206
- bc_num d1 = bc_sub (u1 , u0 , 0 );
207
- bc_num d2 = bc_sub (v0 , v1 , 0 );
208
149
209
-
210
- /* Do recursive multiplies and shifted adds. */
211
- if (m1zero ) {
212
- m1 = bc_copy_num (BCG (_zero_ ));
213
- } else {
214
- _bc_rec_mul (u1 , u1 -> n_len , v1 , v1 -> n_len , & m1 );
150
+ /*
151
+ * Move a value exceeding 4/8 digits by carrying to the next digit.
152
+ * However, the last digit does nothing.
153
+ */
154
+ for (i = 0 ; i < prod_arr_size - 1 ; i ++ ) {
155
+ prod_uint [i + 1 ] += prod_uint [i ] / BC_MUL_UINT_OVERFLOW ;
156
+ prod_uint [i ] %= BC_MUL_UINT_OVERFLOW ;
215
157
}
216
158
217
- if (bc_is_zero (d1 ) || bc_is_zero (d2 )) {
218
- m2 = bc_copy_num (BCG (_zero_ ));
219
- } else {
220
- _bc_rec_mul (d1 , d1 -> n_len , d2 , d2 -> n_len , & m2 );
159
+ /* Convert to bc_num */
160
+ * prod = bc_new_num_nonzeroed (prodlen , 0 );
161
+ char * pptr = (* prod )-> n_value ;
162
+ char * pend = pptr + prodlen - 1 ;
163
+ i = 0 ;
164
+ while (i < prod_arr_size - 1 ) {
165
+ for (size_t j = 0 ; j < BC_MUL_UINT_DIGITS ; j ++ ) {
166
+ * pend -- = prod_uint [i ] % BASE ;
167
+ prod_uint [i ] /= BASE ;
168
+ }
169
+ i ++ ;
221
170
}
222
171
223
- if (bc_is_zero (u0 ) || bc_is_zero (v0 )) {
224
- m3 = bc_copy_num (BCG (_zero_ ));
225
- } else {
226
- _bc_rec_mul (u0 , u0 -> n_len , v0 , v0 -> n_len , & m3 );
172
+ /*
173
+ * The last digit may carry over.
174
+ * Also need to fill it to the end with zeros, so loop until the end of the string.
175
+ */
176
+ while (pend >= pptr ) {
177
+ * pend -- = prod_uint [i ] % BASE ;
178
+ prod_uint [i ] /= BASE ;
227
179
}
228
180
229
- /* Initialize product */
230
- * prod = bc_new_num (ulen + vlen + 1 , 0 );
231
-
232
- if (!m1zero ) {
233
- _bc_shift_addsub (* prod , m1 , 2 * n , false);
234
- _bc_shift_addsub (* prod , m1 , n , false);
235
- }
236
- _bc_shift_addsub (* prod , m3 , n , false);
237
- _bc_shift_addsub (* prod , m3 , 0 , false);
238
- _bc_shift_addsub (* prod , m2 , n , d1 -> n_sign != d2 -> n_sign );
239
-
240
- /* Now clean up! */
241
- bc_free_num (& u1 );
242
- bc_free_num (& u0 );
243
- bc_free_num (& v1 );
244
- bc_free_num (& m1 );
245
- bc_free_num (& v0 );
246
- bc_free_num (& m2 );
247
- bc_free_num (& m3 );
248
- bc_free_num (& d1 );
249
- bc_free_num (& d2 );
181
+ efree (buf );
250
182
}
251
183
252
184
/* The multiply routine. N2 times N1 is put int PROD with the scale of
@@ -255,26 +187,28 @@ static void _bc_rec_mul(bc_num u, size_t ulen, bc_num v, size_t vlen, bc_num *pr
255
187
256
188
bc_num bc_multiply (bc_num n1 , bc_num n2 , size_t scale )
257
189
{
258
- bc_num pval ;
259
- size_t len1 , len2 ;
260
- size_t full_scale , prod_scale ;
190
+ bc_num prod ;
261
191
262
192
/* Initialize things. */
263
- len1 = n1 -> n_len + n1 -> n_scale ;
264
- len2 = n2 -> n_len + n2 -> n_scale ;
265
- full_scale = n1 -> n_scale + n2 -> n_scale ;
266
- prod_scale = MIN (full_scale , MAX (scale , MAX (n1 -> n_scale , n2 -> n_scale )));
193
+ size_t len1 = n1 -> n_len + n1 -> n_scale ;
194
+ size_t len2 = n2 -> n_len + n2 -> n_scale ;
195
+ size_t full_scale = n1 -> n_scale + n2 -> n_scale ;
196
+ size_t prod_scale = MIN (full_scale , MAX (scale , MAX (n1 -> n_scale , n2 -> n_scale )));
267
197
268
198
/* Do the multiply */
269
- _bc_rec_mul (n1 , len1 , n2 , len2 , & pval );
199
+ if (len1 <= BC_MUL_UINT_DIGITS && len2 <= BC_MUL_UINT_DIGITS ) {
200
+ bc_fast_mul (n1 , len1 , n2 , len2 , & prod );
201
+ } else {
202
+ bc_standard_mul (n1 , len1 , n2 , len2 , & prod );
203
+ }
270
204
271
205
/* Assign to prod and clean up the number. */
272
- pval -> n_sign = (n1 -> n_sign == n2 -> n_sign ? PLUS : MINUS );
273
- pval -> n_len = len2 + len1 + 1 - full_scale ;
274
- pval -> n_scale = prod_scale ;
275
- _bc_rm_leading_zeros (pval );
276
- if (bc_is_zero (pval )) {
277
- pval -> n_sign = PLUS ;
206
+ prod -> n_sign = (n1 -> n_sign == n2 -> n_sign ? PLUS : MINUS );
207
+ prod -> n_len -= full_scale ;
208
+ prod -> n_scale = prod_scale ;
209
+ _bc_rm_leading_zeros (prod );
210
+ if (bc_is_zero (prod )) {
211
+ prod -> n_sign = PLUS ;
278
212
}
279
- return pval ;
213
+ return prod ;
280
214
}
0 commit comments