Skip to content

Commit 98e5308

Browse files
authored
Update Lexbor (#16288)
Sync up to lexbor/lexbor@72236d3. Reason: pulling in mainly lexbor/lexbor@cbf1263 for the WHATWG encoding update.
1 parent 564db54 commit 98e5308

File tree

9 files changed

+2201
-2097
lines changed

9 files changed

+2201
-2097
lines changed

ext/dom/lexbor/lexbor/core/swar.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,35 @@ lexbor_swar_seek4(const lxb_char_t *data, const lxb_char_t *end,
5959
return data;
6060
}
6161

62+
lxb_inline const lxb_char_t *
63+
lexbor_swar_seek3(const lxb_char_t *data, const lxb_char_t *end,
64+
lxb_char_t c1, lxb_char_t c2, lxb_char_t c3)
65+
{
66+
size_t bytes, matches, t1, t2, t3;
67+
68+
if (LEXBOR_SWAR_IS_LITTLE_ENDIAN) {
69+
while (data + sizeof(size_t) <= end) {
70+
memcpy(&bytes, data, sizeof(size_t));
71+
72+
t1 = bytes ^ LEXBOR_SWAR_REPEAT(c1);
73+
t2 = bytes ^ LEXBOR_SWAR_REPEAT(c2);
74+
t3 = bytes ^ LEXBOR_SWAR_REPEAT(c3);
75+
matches = LEXBOR_SWAR_HAS_ZERO(t1) | LEXBOR_SWAR_HAS_ZERO(t2)
76+
| LEXBOR_SWAR_HAS_ZERO(t3);
77+
78+
if (matches) {
79+
data += ((((matches - 1) & LEXBOR_SWAR_ONES) * LEXBOR_SWAR_ONES)
80+
>> (sizeof(size_t) * 8 - 8)) - 1;
81+
break;
82+
} else {
83+
data += sizeof(size_t);
84+
}
85+
}
86+
}
87+
88+
return data;
89+
}
90+
6291

6392
#ifdef __cplusplus
6493
} /* extern "C" */

ext/dom/lexbor/lexbor/encoding/big5.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019 Alexander Borisov
2+
* Copyright (C) 2024 Alexander Borisov
33
*
44
* Author: Alexander Borisov <[email protected]>
55
*/

ext/dom/lexbor/lexbor/encoding/decode.c

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2955,6 +2955,77 @@ lxb_encoding_decode_valid_utf_8_single(const lxb_char_t **data,
29552955
return cp;
29562956
}
29572957

2958+
lxb_codepoint_t
2959+
lxb_encoding_decode_valid_utf_8_single_reverse(const lxb_char_t **end,
2960+
const lxb_char_t *begin)
2961+
{
2962+
lxb_codepoint_t cp;
2963+
const lxb_char_t *p = *end;
2964+
2965+
while (p > begin) {
2966+
p -= 1;
2967+
2968+
if (*p < 0x80){
2969+
cp = (lxb_codepoint_t) *p;
2970+
2971+
(*end) = p;
2972+
return cp;
2973+
}
2974+
else if ((*p & 0xe0) == 0xc0) {
2975+
/* 110xxxxx 10xxxxxx */
2976+
2977+
if (*end - p < 2) {
2978+
*end = p;
2979+
return LXB_ENCODING_DECODE_ERROR;
2980+
}
2981+
2982+
cp = (p[0] ^ (0xC0 & p[0])) << 6;
2983+
cp |= (p[1] ^ (0x80 & p[1]));
2984+
2985+
(*end) = p;
2986+
return cp;
2987+
}
2988+
else if ((*p & 0xf0) == 0xe0) {
2989+
/* 1110xxxx 10xxxxxx 10xxxxxx */
2990+
2991+
if (*end - p < 3) {
2992+
*end = p;
2993+
return LXB_ENCODING_DECODE_ERROR;
2994+
}
2995+
2996+
cp = (p[0] ^ (0xE0 & p[0])) << 12;
2997+
cp |= (p[1] ^ (0x80 & p[1])) << 6;
2998+
cp |= (p[2] ^ (0x80 & p[2]));
2999+
3000+
(*end) = p;
3001+
return cp;
3002+
}
3003+
else if ((*p & 0xf8) == 0xf0) {
3004+
/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
3005+
3006+
if (*end - p < 4) {
3007+
*end = p;
3008+
return LXB_ENCODING_DECODE_ERROR;
3009+
}
3010+
3011+
cp = (p[0] ^ (0xF0 & p[0])) << 18;
3012+
cp |= (p[1] ^ (0x80 & p[1])) << 12;
3013+
cp |= (p[2] ^ (0x80 & p[2])) << 6;
3014+
cp |= (p[3] ^ (0x80 & p[3]));
3015+
3016+
(*end) = p;
3017+
return cp;
3018+
}
3019+
else if (*end - p >= 4) {
3020+
break;
3021+
}
3022+
}
3023+
3024+
*end = p;
3025+
3026+
return LXB_ENCODING_DECODE_ERROR;
3027+
}
3028+
29583029
uint8_t
29593030
lxb_encoding_decode_utf_8_length(lxb_char_t data)
29603031
{

ext/dom/lexbor/lexbor/encoding/decode.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,10 @@ LXB_API lxb_codepoint_t
306306
lxb_encoding_decode_valid_utf_8_single(const lxb_char_t **data,
307307
const lxb_char_t *end);
308308

309+
LXB_API lxb_codepoint_t
310+
lxb_encoding_decode_valid_utf_8_single_reverse(const lxb_char_t **end,
311+
const lxb_char_t *begin);
312+
309313
LXB_API uint8_t
310314
lxb_encoding_decode_utf_8_length(lxb_char_t data);
311315

ext/dom/lexbor/lexbor/encoding/euc_kr.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2019 Alexander Borisov
2+
* Copyright (C) 2024 Alexander Borisov
33
*
44
* Author: Alexander Borisov <[email protected]>
55
*/

0 commit comments

Comments
 (0)