@@ -4788,6 +4788,7 @@ bool utf8_range(const unsigned char *data, size_t len)
4788
4788
const uint8x16_t const_1 = vdupq_n_u8 (1 );
4789
4789
const uint8x16_t const_2 = vdupq_n_u8 (2 );
4790
4790
const uint8x16_t const_e0 = vdupq_n_u8 (0xE0 );
4791
+ const uint8x16_t const_7f = vdupq_n_u8 (0x7F );
4791
4792
4792
4793
/* We use two error registers to remove a dependency. */
4793
4794
uint8x16_t error1 = vdupq_n_u8 (0 );
@@ -4799,6 +4800,29 @@ bool utf8_range(const unsigned char *data, size_t len)
4799
4800
const uint8x16_t input_3 = vld1q_u8 (data + 32 );
4800
4801
const uint8x16_t input_4 = vld1q_u8 (data + 48 );
4801
4802
4803
+ uint64_t ascii_paired = vgetq_lane_u64 (vreinterpretq_u64_u8 (prev_first_len ), 0 );
4804
+ if (ascii_paired == 0 ) {
4805
+ uint8x16_t is_ascii_0 = vorrq_u8 (input_1 , input_2 );
4806
+ is_ascii_0 = vorrq_u8 (is_ascii_0 , input_3 );
4807
+ is_ascii_0 = vorrq_u8 (is_ascii_0 , input_4 );
4808
+
4809
+ uint8x16_t is_ascii = vqsubq_u8 (is_ascii_0 , const_7f );
4810
+ uint64_t is_ascii_paired = vgetq_lane_u64 (vreinterpretq_u64_u8 (is_ascii ), 0 );
4811
+
4812
+ /* ascii */
4813
+ if (is_ascii_paired == 0 ) {
4814
+ const uint8x16_t high_nibbles_4 = vshrq_n_u8 (input_4 , 4 );
4815
+ const uint8x16_t first_len_4 = vqtbl1q_u8 (first_len_tbl , high_nibbles_4 );
4816
+
4817
+ prev_input = input_4 ;
4818
+ prev_first_len = first_len_4 ;
4819
+
4820
+ data += 64 ;
4821
+ len -= 64 ;
4822
+ continue ;
4823
+ }
4824
+ }
4825
+
4802
4826
/* high_nibbles = input >> 4 */
4803
4827
const uint8x16_t high_nibbles_1 = vshrq_n_u8 (input_1 , 4 );
4804
4828
const uint8x16_t high_nibbles_2 = vshrq_n_u8 (input_2 , 4 );
@@ -4947,8 +4971,7 @@ bool utf8_range(const unsigned char *data, size_t len)
4947
4971
/* first_len = legal character length minus 1 */
4948
4972
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
4949
4973
/* first_len = first_len_tbl[high_nibbles] */
4950
- const uint8x16_t first_len =
4951
- vqtbl1q_u8 (first_len_tbl , high_nibbles );
4974
+ const uint8x16_t first_len = vqtbl1q_u8 (first_len_tbl , high_nibbles );
4952
4975
4953
4976
/* First Byte: set range index to 8 for bytes within 0xC0 ~ 0xFF */
4954
4977
/* range = first_range_tbl[high_nibbles] */
@@ -4957,8 +4980,7 @@ bool utf8_range(const unsigned char *data, size_t len)
4957
4980
/* Second Byte: set range index to first_len */
4958
4981
/* 0 for 00~7F, 1 for C0~DF, 2 for E0~EF, 3 for F0~FF */
4959
4982
/* range |= (first_len, prev_first_len) << 1 byte */
4960
- range =
4961
- vorrq_u8 (range , vextq_u8 (prev_first_len , first_len , 15 ));
4983
+ range = vorrq_u8 (range , vextq_u8 (prev_first_len , first_len , 15 ));
4962
4984
4963
4985
/* Third Byte: set range index to saturate_sub(first_len, 1) */
4964
4986
/* 0 for 00~7F, 0 for C0~DF, 1 for E0~EF, 2 for F0~FF */
@@ -5033,8 +5055,9 @@ bool utf8_range(const unsigned char *data, size_t len)
5033
5055
/* Merge our error counters together */
5034
5056
error1 = vorrq_u8 (error1 , error2 );
5035
5057
5058
+ uint64_t error_raw_last = vgetq_lane_u64 (vreinterpretq_u64_u8 (error1 ), 0 );
5036
5059
/* Delay error check till loop ends */
5037
- if (vmaxvq_u8 ( error1 ) ) {
5060
+ if (error_raw_last != 0 ) {
5038
5061
return false;
5039
5062
}
5040
5063
0 commit comments