Skip to content

Commit 294fbbd

Browse files
committed
Refactor _bc_do_sub
_bc_do_sub has been modified to use SIMD to perform faster calculations.
1 parent 34e0c77 commit 294fbbd

File tree

1 file changed

+59
-28
lines changed

1 file changed

+59
-28
lines changed

ext/bcmath/libbcmath/src/doaddsub.c

Lines changed: 59 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,6 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
5151
sum_digits = MAX (n1->n_len, n2->n_len) + 1;
5252
sum = bc_new_num (sum_digits, MAX(sum_scale, scale_min));
5353

54-
/* Zero extra digits made by scale_min. */
55-
if (scale_min > sum_scale) {
56-
sumptr = (char *) (sum->n_value + sum_scale + sum_digits);
57-
for (int count = scale_min - sum_scale; count > 0; count--) {
58-
*sumptr++ = 0;
59-
}
60-
}
61-
6254
/* Start with the fraction part. Initialize the pointers. */
6355
n1bytes = n1->n_scale;
6456
n2bytes = n2->n_scale;
@@ -132,35 +124,25 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
132124
bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
133125
{
134126
bc_num diff;
135-
size_t diff_scale, diff_len;
136-
size_t min_scale, min_len;
137-
size_t borrow, count;
127+
128+
size_t diff_scale = MAX(n1->n_scale, n2->n_scale);
129+
size_t diff_len = MAX(n1->n_len, n2->n_len);
130+
size_t min_scale = MIN(n1->n_scale, n2->n_scale);
131+
size_t min_len = MIN(n1->n_len, n2->n_len);
132+
size_t min_bytes = min_len + min_scale;
133+
size_t borrow = 0;
134+
size_t count;
138135
int val;
139136
char *n1ptr, *n2ptr, *diffptr;
140137

141138
/* Allocate temporary storage. */
142-
diff_len = MAX(n1->n_len, n2->n_len);
143-
diff_scale = MAX(n1->n_scale, n2->n_scale);
144-
min_len = MIN(n1->n_len, n2->n_len);
145-
min_scale = MIN(n1->n_scale, n2->n_scale);
146-
diff = bc_new_num (diff_len, MAX(diff_scale, scale_min));
147-
148-
/* Zero extra digits made by scale_min. */
149-
if (scale_min > diff_scale) {
150-
diffptr = (char *) (diff->n_value + diff_len + diff_scale);
151-
for (count = scale_min - diff_scale; count > 0; count--) {
152-
*diffptr++ = 0;
153-
}
154-
}
139+
diff = bc_new_num (n1->n_len, MAX(diff_scale, scale_min));
155140

156141
/* Initialize the subtract. */
157142
n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1);
158143
n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1);
159144
diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1);
160145

161-
/* Subtract the numbers. */
162-
borrow = 0;
163-
164146
/* Take care of the longer scaled number. */
165147
if (n1->n_scale != min_scale) {
166148
/* n1 has the longer scale */
@@ -182,7 +164,56 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
182164
}
183165

184166
/* Now do the equal length scale and integer parts. */
185-
for (count = 0; count < min_len + min_scale; count++) {
167+
count = 0;
168+
if (min_bytes >= sizeof(BC_UINT_T)) {
169+
diffptr++;
170+
n1ptr++;
171+
n2ptr++;
172+
while (count + sizeof(BC_UINT_T) =< min_bytes) {
173+
diffptr -= sizeof(BC_UINT_T);
174+
n1ptr -= sizeof(BC_UINT_T);
175+
n2ptr -= sizeof(BC_UINT_T);
176+
177+
BC_UINT_T n1bytes;
178+
BC_UINT_T n2bytes;
179+
memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
180+
memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
181+
182+
#if BC_LITTLE_ENDIAN
183+
/* Bytes swap */
184+
n1bytes = BC_BSWAP(n1bytes);
185+
n2bytes = BC_BSWAP(n2bytes);
186+
#endif
187+
188+
n1bytes -= (n2bytes + borrow);
189+
/* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */
190+
bool tmp_borrow = n1bytes >= ((BC_UINT_T) 0x10 << (8 * (sizeof(BC_UINT_T) - 1)));
191+
192+
/*
193+
* If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte.
194+
* The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when
195+
* the hexadecimal number is carried down, there is a difference of 6 from the decimal
196+
* calculation, so 6 is subtracted.
197+
* Also, set all upper 4 bits to 0.
198+
*/
199+
BC_UINT_T borrow_mask = (((n1bytes | (n1bytes >> 1) | (n1bytes >> 2) | (n1bytes >> 3)) & SWAR_REPEAT(0x10)) * 0x06) >> 4;
200+
n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask;
201+
202+
#if BC_LITTLE_ENDIAN
203+
/* Bytes swap */
204+
n1bytes = BC_BSWAP(n1bytes);
205+
memcpy(diffptr, &n1bytes, sizeof(n1bytes));
206+
#endif
207+
208+
borrow = tmp_borrow;
209+
count += sizeof(BC_UINT_T);
210+
}
211+
diffptr--;
212+
n1ptr--;
213+
n2ptr--;
214+
}
215+
216+
for (; count < min_bytes; count++) {
186217
val = *n1ptr-- - *n2ptr-- - borrow;
187218
if (val < 0) {
188219
val += BASE;

0 commit comments

Comments
 (0)