Skip to content

Commit 7101d42

Browse files
committed
Add ASCII check for NEON UTF-8 check.
Lower than 0x7F that all bytes SIMD register, then reset previous struct.
1 parent ac74550 commit 7101d42

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

ext/mbstring/mbstring.c

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4895,9 +4895,6 @@ static bool mb_fast_check_utf8_default(zend_string *str)
48954895
size_t len = ZSTR_LEN(str);
48964896

48974897
static const int8_t _verror[] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1};
4898-
static const int8_t _prev_not_ascii[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -16, -32, -64};
4899-
4900-
int8x16_t bad_mask_prev_not_ascii = vld1q_s8(_prev_not_ascii);
49014898

49024899
/* error flag vertor */
49034900
int8x16_t has_error = vdupq_n_s8(0);
@@ -4907,13 +4904,13 @@ static bool mb_fast_check_utf8_default(zend_string *str)
49074904
if (len >= 16) {
49084905
for (; i <= len - 16; i += 16) {
49094906
int8x16_t current_bytes = vld1q_s8((int8_t *)(p + i));
4910-
/* top bit is all 0, it is ASCII */
4907+
/* All bytes are lower than 0x7F, it is ASCII */
49114908
int8x16_t is_ascii = vreinterpretq_s8_u8(vqsubq_u8(vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0x7F)));
49124909
if (vmaxvq_u8(is_ascii) == 0) {
4913-
int8x16_t bad = vceqq_s8(vandq_s8(previous.rawbytes, bad_mask_prev_not_ascii), bad_mask_prev_not_ascii);
4914-
if (vmaxvq_u8(vreinterpretq_u8_s8(bad)) != 0) {
4915-
return false;
4916-
}
4910+
previous.rawbytes = vdupq_n_s8(0);
4911+
previous.high_nibbles = vdupq_n_s8(0);
4912+
previous.carried_continuations = vdupq_n_s8(0);
4913+
continue;
49174914
}
49184915
neon_check_utf8_bytes(current_bytes, &previous, &has_error);
49194916
}

0 commit comments

Comments
 (0)