Skip to content

Commit 4df90af

Browse files
authored
Update Lexbor (#14729)
1 parent 62a3bbd commit 4df90af

File tree

7 files changed

+89
-9
lines changed

7 files changed

+89
-9
lines changed

ext/dom/lexbor/lexbor/core/diyfp.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ lexbor_cached_power_bin(int exp, int *dec_exp);
7171
/*
7272
* Inline functions
7373
*/
74-
#if (LEXBOR_HAVE_BUILTIN_CLZLL)
74+
#ifdef LEXBOR_HAVE_BUILTIN_CLZLL
7575
#define nxt_leading_zeros64(x) (((x) == 0) ? 64 : __builtin_clzll(x))
7676

7777
#else
@@ -199,7 +199,7 @@ lexbor_diyfp_sub(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
199199
lxb_inline lexbor_diyfp_t
200200
lexbor_diyfp_mul(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
201201
{
202-
#if (LEXBOR_HAVE_UNSIGNED_INT128)
202+
#ifdef LEXBOR_HAVE_UNSIGNED_INT128
203203

204204
uint64_t l, h;
205205
lxb_uint128_t u128;

ext/dom/lexbor/lexbor/core/perf.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) 2018 Alexander Borisov
2+
* Copyright (C) 2018-2024 Alexander Borisov
33
*
44
* Author: Alexander Borisov <[email protected]>
55
*/
@@ -14,9 +14,6 @@ extern "C" {
1414
#include "lexbor/core/base.h"
1515

1616

17-
#ifdef LEXBOR_WITH_PERF
18-
19-
2017
LXB_API void *
2118
lexbor_perf_create(void);
2219

@@ -36,8 +33,6 @@ LXB_API double
3633
lexbor_perf_in_sec(void *perf);
3734

3835

39-
#endif /* LEXBOR_WITH_PERF */
40-
4136
#ifdef __cplusplus
4237
} /* extern "C" */
4338
#endif

ext/dom/lexbor/lexbor/core/swar.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright (C) 2024 Alexander Borisov
3+
*
4+
* Author: Niels Dossche <[email protected]>
5+
*/
6+
7+
#ifndef LEXBOR_SWAR_H
8+
#define LEXBOR_SWAR_H
9+
10+
#ifdef __cplusplus
11+
extern "C" {
12+
#endif
13+
14+
15+
#include "lexbor/core/base.h"
16+
17+
18+
/*
19+
* Based on techniques from https://graphics.stanford.edu/~seander/bithacks.html
20+
*/
21+
#define LEXBOR_SWAR_ONES (~((size_t) 0) / 0xFF)
22+
#define LEXBOR_SWAR_REPEAT(x) (LEXBOR_SWAR_ONES * (x))
23+
#define LEXBOR_SWAR_HAS_ZERO(v) (((v) - LEXBOR_SWAR_ONES) & ~(v) & LEXBOR_SWAR_REPEAT(0x80))
24+
#define LEXBOR_SWAR_IS_LITTLE_ENDIAN (*(unsigned char *) &(uint16_t){1})
25+
26+
27+
/*
28+
* When handling hot loops that search for a set of characters,
29+
* this function can be used to quickly move the data pointer much
30+
* closer to the first occurrence of such a character.
31+
*/
32+
lxb_inline const lxb_char_t *
33+
lexbor_swar_seek4(const lxb_char_t *data, const lxb_char_t *end,
34+
lxb_char_t c1, lxb_char_t c2, lxb_char_t c3, lxb_char_t c4)
35+
{
36+
size_t bytes, matches, t1, t2, t3, t4;
37+
38+
if (LEXBOR_SWAR_IS_LITTLE_ENDIAN) {
39+
while (data + sizeof(size_t) <= end) {
40+
memcpy(&bytes, data, sizeof(size_t));
41+
42+
t1 = bytes ^ LEXBOR_SWAR_REPEAT(c1);
43+
t2 = bytes ^ LEXBOR_SWAR_REPEAT(c2);
44+
t3 = bytes ^ LEXBOR_SWAR_REPEAT(c3);
45+
t4 = bytes ^ LEXBOR_SWAR_REPEAT(c4);
46+
matches = LEXBOR_SWAR_HAS_ZERO(t1) | LEXBOR_SWAR_HAS_ZERO(t2)
47+
| LEXBOR_SWAR_HAS_ZERO(t3) | LEXBOR_SWAR_HAS_ZERO(t4);
48+
49+
if (matches) {
50+
data += ((((matches - 1) & LEXBOR_SWAR_ONES) * LEXBOR_SWAR_ONES)
51+
>> (sizeof(size_t) * 8 - 8)) - 1;
52+
break;
53+
} else {
54+
data += sizeof(size_t);
55+
}
56+
}
57+
}
58+
59+
return data;
60+
}
61+
62+
63+
#ifdef __cplusplus
64+
} /* extern "C" */
65+
#endif
66+
67+
#endif /* LEXBOR_SWAR_H */
68+

ext/dom/lexbor/lexbor/css/parser.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,16 @@ lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz)
6262
parser->rules_end = parser->rules_begin + lxb_rules_length;
6363
parser->rules = parser->rules_begin;
6464

65+
/*
66+
* Zero those parameters that can be used (passed to the function).
67+
* The parser->rules->phase parameter will be assigned at the end of the
68+
* parsing.
69+
*
70+
* The point is that parser->rules[0] is used as a stub before exiting
71+
* parsing.
72+
*/
73+
parser->rules->context = NULL;
74+
6575
/* Temp */
6676
parser->pos = NULL;
6777
parser->str.length = 0;

ext/dom/lexbor/lexbor/html/encoding.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,8 @@ lxb_html_get_attribute(const lxb_char_t *data, const lxb_char_t *end,
477477
data++;
478478
}
479479

480+
*name_end = data;
481+
480482
spaces_state:
481483

482484
data = lxb_html_encoding_skip_spaces(data, end);

ext/dom/lexbor/lexbor/html/tokenizer/state.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LEXBOR_STR_RES_MAP_HEX
1616
#define LEXBOR_STR_RES_MAP_NUM
1717
#include "lexbor/core/str_res.h"
18+
#include "lexbor/core/swar.h"
1819

1920
#define LXB_HTML_TOKENIZER_RES_ENTITIES_SBST
2021
#include "lexbor/html/tokenizer/res.h"
@@ -226,6 +227,8 @@ lxb_html_tokenizer_state_data(lxb_html_tokenizer_t *tkz,
226227
{
227228
lxb_html_tokenizer_state_begin_set(tkz, data);
228229

230+
data = lexbor_swar_seek4(data, end, 0x3C, 0x26, 0x0D, 0x00);
231+
229232
while (data != end) {
230233
switch (*data) {
231234
/* U+003C LESS-THAN SIGN (<) */
@@ -906,6 +909,8 @@ lxb_html_tokenizer_state_attribute_value_double_quoted(lxb_html_tokenizer_t *tkz
906909

907910
lxb_html_tokenizer_state_begin_set(tkz, data);
908911

912+
data = lexbor_swar_seek4(data, end, 0x22, 0x26, 0x0D, 0x00);
913+
909914
while (data != end) {
910915
switch (*data) {
911916
/* U+0022 QUOTATION MARK (") */

ext/dom/lexbor/lexbor/html/tree/insertion_mode/in_head.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t *tree,
175175
* We can create function for this, but...
176176
*
177177
* The "in head noscript" insertion mode use this
178-
* is you change this code, please, change it in in head noscript" mode
178+
* is you change this code, please, change it in head noscript" mode
179179
*/
180180
case LXB_TAG__TEXT: {
181181
lxb_html_token_t ws_token = {0};

0 commit comments

Comments
 (0)