Skip to content

Commit 5ed2099

Browse files
committed
[WIP] ASCII check
1 parent 13b4162 commit 5ed2099

File tree

1 file changed

+10
-0
lines changed

1 file changed

+10
-0
lines changed

ext/mbstring/mbstring.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4895,6 +4895,9 @@ static bool mb_fast_check_utf8_default(zend_string *str)
48954895
size_t len = ZSTR_LEN(str);
48964896

48974897
static const int8_t _verror[] = {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1};
4898+
static const int8_t _prev_not_ascii[] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -16, -32, -64};
4899+
4900+
int8x16_t bad_mask_prev_not_ascii = vld1q_s8(_prev_not_ascii);
48984901

48994902
/* error flag vertor */
49004903
int8x16_t has_error = vdupq_n_s8(0);
@@ -4904,6 +4907,13 @@ static bool mb_fast_check_utf8_default(zend_string *str)
49044907
if (len >= 16) {
49054908
for (; i <= len - 16; i += 16) {
49064909
int8x16_t current_bytes = vld1q_s8((int8_t *)(p + i));
4910+
/* top bit is all 0, it is ASCII */
4911+
if (vmaxvq_u8(vreinterpretq_u8_s8(vshrq_n_s8(current_bytes, 8))) == 0) {
4912+
int8x16_t bad = vceqq_s8(vandq_s8(previous.rawbytes, bad_mask_prev_not_ascii), bad_mask_prev_not_ascii);
4913+
if (vmaxvq_u8(vreinterpretq_u8_s8(bad))) {
4914+
return false;
4915+
}
4916+
}
49074917
neon_check_utf8_bytes(current_bytes, &previous, &has_error);
49084918
}
49094919
}

0 commit comments

Comments
 (0)