-
Notifications
You must be signed in to change notification settings - Fork 7.9k
Refactor BCMath _bc_do_sub
#14132
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor BCMath _bc_do_sub
#14132
Changes from 2 commits
536d2f7
670eada
9a715bb
63f6968
1dede5a
6a29fb7
ce78b14
2b6cbad
318601c
86a0084
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -124,36 +124,33 @@ bc_num _bc_do_add(bc_num n1, bc_num n2, size_t scale_min) | |||||||||
bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) | ||||||||||
{ | ||||||||||
bc_num diff; | ||||||||||
size_t diff_scale, diff_len; | ||||||||||
size_t min_scale, min_len; | ||||||||||
size_t borrow, count; | ||||||||||
|
||||||||||
size_t diff_scale = MAX(n1->n_scale, n2->n_scale); | ||||||||||
size_t diff_len = EXPECTED(n1->n_len >= n2->n_len) ? n1->n_len : n2->n_len; | ||||||||||
size_t min_scale = MIN(n1->n_scale, n2->n_scale); | ||||||||||
size_t min_len = n1->n_len >= n2->n_len ? n2->n_len : n1->n_len; | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Interestingly, using |
||||||||||
size_t min_bytes = min_len + min_scale; | ||||||||||
size_t borrow = 0; | ||||||||||
int val; | ||||||||||
char *n1ptr, *n2ptr, *diffptr; | ||||||||||
|
||||||||||
/* Allocate temporary storage. */ | ||||||||||
diff_len = MAX(n1->n_len, n2->n_len); | ||||||||||
diff_scale = MAX(n1->n_scale, n2->n_scale); | ||||||||||
min_len = MIN(n1->n_len, n2->n_len); | ||||||||||
min_scale = MIN(n1->n_scale, n2->n_scale); | ||||||||||
diff = bc_new_num (diff_len, MAX(diff_scale, scale_min)); | ||||||||||
diff = bc_new_num (n1->n_len, MAX(diff_scale, scale_min)); | ||||||||||
|
||||||||||
/* Initialize the subtract. */ | ||||||||||
n1ptr = (char *) (n1->n_value + n1->n_len + n1->n_scale - 1); | ||||||||||
n2ptr = (char *) (n2->n_value + n2->n_len + n2->n_scale - 1); | ||||||||||
diffptr = (char *) (diff->n_value + diff_len + diff_scale - 1); | ||||||||||
|
||||||||||
/* Subtract the numbers. */ | ||||||||||
borrow = 0; | ||||||||||
|
||||||||||
/* Take care of the longer scaled number. */ | ||||||||||
if (n1->n_scale != min_scale) { | ||||||||||
/* n1 has the longer scale */ | ||||||||||
for (count = n1->n_scale - min_scale; count > 0; count--) { | ||||||||||
for (size_t count = n1->n_scale - min_scale; count > 0; count--) { | ||||||||||
*diffptr-- = *n1ptr--; | ||||||||||
} | ||||||||||
} else { | ||||||||||
/* n2 has the longer scale */ | ||||||||||
for (count = n2->n_scale - min_scale; count > 0; count--) { | ||||||||||
for (size_t count = n2->n_scale - min_scale; count > 0; count--) { | ||||||||||
val = -*n2ptr-- - borrow; | ||||||||||
if (val < 0) { | ||||||||||
val += BASE; | ||||||||||
|
@@ -166,7 +163,57 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) | |||||||||
} | ||||||||||
|
||||||||||
/* Now do the equal length scale and integer parts. */ | ||||||||||
for (count = 0; count < min_len + min_scale; count++) { | ||||||||||
size_t sub_count = 0; | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Analyzing the measurement results, it seems that sharing the count here and resetting it once is more expensive than using a separate variable. Perhaps it's a scope issue? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems odd, did you check via hyperfine? Perhaps that's just coincidental noise, hyperfine can filter that out. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think I measured it using hyperfine, but I'll separate the commits and measure them again individually. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not much changed. In fact, it was a little late. Therefore, I will not make this change. |
||||||||||
if (min_bytes >= sizeof(BC_UINT_T)) { | ||||||||||
diffptr++; | ||||||||||
n1ptr++; | ||||||||||
n2ptr++; | ||||||||||
while (sub_count + sizeof(BC_UINT_T) <= min_bytes) { | ||||||||||
diffptr -= sizeof(BC_UINT_T); | ||||||||||
n1ptr -= sizeof(BC_UINT_T); | ||||||||||
n2ptr -= sizeof(BC_UINT_T); | ||||||||||
|
||||||||||
BC_UINT_T n1bytes; | ||||||||||
BC_UINT_T n2bytes; | ||||||||||
memcpy(&n1bytes, n1ptr, sizeof(n1bytes)); | ||||||||||
memcpy(&n2bytes, n2ptr, sizeof(n2bytes)); | ||||||||||
|
||||||||||
#if BC_LITTLE_ENDIAN | ||||||||||
/* Bytes swap */ | ||||||||||
n1bytes = BC_BSWAP(n1bytes); | ||||||||||
n2bytes = BC_BSWAP(n2bytes); | ||||||||||
#endif | ||||||||||
|
||||||||||
n1bytes -= (n2bytes + borrow); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need the parentheses here
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed it |
||||||||||
/* If the most significant 4 bits of the 8 bytes are not 0, a carry-down has occurred. */ | ||||||||||
bool tmp_borrow = n1bytes >= ((BC_UINT_T) 0x10 << (8 * (sizeof(BC_UINT_T) - 1))); | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On first glance this can be simplified I think. Isn't it sufficient to only check the most significant bit instead? If that's correct, then it would be a faster check:
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is good idea. Fixed it |
||||||||||
|
||||||||||
/* | ||||||||||
* If any one of the upper 4 bits of each of the 8 bytes is 1, subtract 6 from that byte. | ||||||||||
* The fact that the upper 4 bits are not 0 means that a carry-down has occurred, and when | ||||||||||
* the hexadecimal number is carried down, there is a difference of 6 from the decimal | ||||||||||
* calculation, so 6 is subtracted. | ||||||||||
* Also, set all upper 4 bits to 0. | ||||||||||
*/ | ||||||||||
BC_UINT_T borrow_mask = (((n1bytes | (n1bytes >> 1) | (n1bytes >> 2) | (n1bytes >> 3)) & SWAR_REPEAT(0x10)) * 0x06) >> 4; | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure the following would be faster in practice, this should be measured, but what do you think of this. It avoids an additional shift.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm and actually, doesn't the same thing hold true here. Why do we have to check the top 4 bits, isn't just the top bit enough? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Indeed. I didn't notice that, thanks! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed it |
||||||||||
n1bytes = (n1bytes & SWAR_REPEAT(0x0F)) - borrow_mask; | ||||||||||
|
||||||||||
#if BC_LITTLE_ENDIAN | ||||||||||
/* Bytes swap */ | ||||||||||
n1bytes = BC_BSWAP(n1bytes); | ||||||||||
#endif | ||||||||||
|
||||||||||
memcpy(diffptr, &n1bytes, sizeof(n1bytes)); | ||||||||||
|
||||||||||
borrow = tmp_borrow; | ||||||||||
sub_count += sizeof(BC_UINT_T); | ||||||||||
} | ||||||||||
diffptr--; | ||||||||||
n1ptr--; | ||||||||||
n2ptr--; | ||||||||||
} | ||||||||||
|
||||||||||
for (; sub_count < min_bytes; sub_count++) { | ||||||||||
val = *n1ptr-- - *n2ptr-- - borrow; | ||||||||||
if (val < 0) { | ||||||||||
val += BASE; | ||||||||||
|
@@ -179,7 +226,7 @@ bc_num _bc_do_sub(bc_num n1, bc_num n2, size_t scale_min) | |||||||||
|
||||||||||
/* If n1 has more digits than n2, we now do that subtract. */ | ||||||||||
if (diff_len != min_len) { | ||||||||||
for (count = diff_len - min_len; count > 0; count--) { | ||||||||||
for (size_t count = diff_len - min_len; count > 0; count--) { | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why did you change this back to what it was previously? Did the change have negative performance impact? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, indeed. Separate the commits and measure each one. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I fixed it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Combining EXPECTED with a comparison was faster than using ZEND_ASSERT. |
||||||||||
val = *n1ptr-- - borrow; | ||||||||||
if (val < 0) { | ||||||||||
val += BASE; | ||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps this swar and byte magic stuff should be in a separate header, since it's really internal stuff. I'm not entirely sure though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it ok to duplicate this in each file, or should I have a separate header file?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think a new internal BC Math header is better, such headers exist for various extensions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thx, do you have a good name idea?
my ideas:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The PDO drivers and GMP use the
*_int.h
convention, Phar and ext/com use the*_internal.h
convention, MySQLi and MySQLnd use the*_priv.h
convention, and ext/filter and ext/curl use the*_private.h
convention.I think thus either using
bcmath_internal.h
orbcmath_private.h
for the name is the best idea.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I got it.
I just noticed that
private.h
already exists in bcmath, is it reasonable to rename it tobcmath_private.h
and use this? Or do you think it's better to create a newbcmath_internal.h
since that is just for libbcmath?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Aren't these new routines just used within libbcmath? In which case reusing the existing
private.h
header makes sense to meThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Okay, thx!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I moved some constants and macros to
private.h