Skip to content

Commit 077989c

Browse files
committed
Refactor _bc_do_sub
_bc_do_sub has been modified to use SIMD to perform faster calculations.
1 parent 536d2f7 commit 077989c

File tree

1 file changed

+61
-15
lines changed

1 file changed

+61
-15
lines changed

ext/bcmath/libbcmath/src/doaddsub.c

Lines changed: 61 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -124,36 +124,33 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min)
124124
bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
125125
{
126126
bc_num diff;
127-
size_t diff_scale, diff_len;
128-
size_t min_scale, min_len;
129-
size_t borrow, count;
127+
128+
size_t diff_scale = MAX(n1->n_scale, n2->n_scale);
129+
size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len;
130+
size_t min_scale = MIN(n1->n_scale, n2->n_scale);
131+
size_t min_len = EXPECTED(n1->n_len <= n2->n_len) ? n2->n_len : n1->n_len;
132+
size_t min_bytes = min_len + min_scale;
133+
size_t borrow = 0;
130134
int val;
131135
char *n1ptr, *n2ptr, *diffptr;
132136

133137
/* Allocate temporary storage. */
134-
diff_len = MAX(n1->n_len, n2->n_len);
135-
diff_scale = MAX(n1->n_scale, n2->n_scale);
136-
min_len = MIN(n1->n_len, n2->n_len);
137-
min_scale = MIN(n1->n_scale, n2->n_scale);
138-
diff = bc_new_num (diff_len, MAX(diff_scale, scale_min));
138+
diff = bc_new_num (n1->n_len, MAX(diff_scale, scale_min));
139139

140140
/* Initialize the subtract. */
141141
n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1);
142142
n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1);
143143
diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1);
144144

145-
/* Subtract the numbers. */
146-
borrow = 0;
147-
148145
/* Take care of the longer scaled number. */
149146
if (n1->n_scale != min_scale) {
150147
/* n1 has the longer scale */
151-
for (count = n1->n_scale - min_scale; count > 0; count--) {
148+
for (size_t count = n1->n_scale - min_scale; count > 0; count--) {
152149
*diffptr-- = *n1ptr--;
153150
}
154151
} else {
155152
/* n2 has the longer scale */
156-
for (count = n2->n_scale - min_scale; count > 0; count--) {
153+
for (size_t count = n2->n_scale - min_scale; count > 0; count--) {
157154
val = -*n2ptr-- - borrow;
158155
if (val < 0) {
159156
val += BASE;
@@ -166,7 +163,56 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
166163
}
167164

168165
/* Now do the equal length scale and integer parts. */
169-
for (count = 0; count < min_len + min_scale; count++) {
166+
size_t sub_count = 0;
167+
if (min_bytes >= sizeof(BC_UINT_T)) {
168+
diffptr++;
169+
n1ptr++;
170+
n2ptr++;
171+
while (sub_count + sizeof(BC_UINT_T) <= min_bytes) {
172+
diffptr -= sizeof(BC_UINT_T);
173+
n1ptr -= sizeof(BC_UINT_T);
174+
n2ptr -= sizeof(BC_UINT_T);
175+
176+
BC_UINT_T n1bytes;
177+
BC_UINT_T n2bytes;
178+
memcpy(&n1bytes, n1ptr, sizeof(n1bytes));
179+
memcpy(&n2bytes, n2ptr, sizeof(n2bytes));
180+
181+
#if BC_LITTLE_ENDIAN
182+
/* Bytes swap */
183+
n1bytes = BC_BSWAP(n1bytes);
184+
n2bytes = BC_BSWAP(n2bytes);
185+
#endif
186+
187+
n1bytes -= (n2bytes + borrow);
188+
/* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */
189+
bool tmp_borrow = n1bytes >= ((BC_UINT_T) 0x10 << (8 * (sizeof(BC_UINT_T) - 1)));
190+
191+
/*
192+
* If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte.
193+
* The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when
194+
* the hexadecimal number is carried down, there is a difference of 6 from the decimal
195+
* calculation, so 6 is subtracted.
196+
* Also, set all upper 4 bits to 0.
197+
*/
198+
BC_UINT_T borrow_mask = (((n1bytes | (n1bytes >> 1) | (n1bytes >> 2) | (n1bytes >> 3)) & SWAR_REPEAT(0x10)) * 0x06) >> 4;
199+
n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask;
200+
201+
#if BC_LITTLE_ENDIAN
202+
/* Bytes swap */
203+
n1bytes = BC_BSWAP(n1bytes);
204+
memcpy(diffptr, &n1bytes, sizeof(n1bytes));
205+
#endif
206+
207+
borrow = tmp_borrow;
208+
sub_count += sizeof(BC_UINT_T);
209+
}
210+
diffptr--;
211+
n1ptr--;
212+
n2ptr--;
213+
}
214+
215+
for (; sub_count < min_bytes; sub_count++) {
170216
val = *n1ptr-- - *n2ptr-- - borrow;
171217
if (val < 0) {
172218
val += BASE;
@@ -179,7 +225,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min)
179225

180226
/* If n1 has more digits than n2, we now do that subtract. */
181227
if (diff_len != min_len) {
182-
for (count = diff_len - min_len; count > 0; count--) {
228+
for (size_t count = diff_len - min_len; count > 0; count--) {
183229
val = *n1ptr-- - borrow;
184230
if (val < 0) {
185231
val += BASE;

0 commit comments

Comments
 (0)