Skip to content

Update Lexbor #14729

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ext/dom/lexbor/lexbor/core/diyfp.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ lexbor_cached_power_bin(int exp, int *dec_exp);
/*
* Inline functions
*/
#if (LEXBOR_HAVE_BUILTIN_CLZLL)
#ifdef LEXBOR_HAVE_BUILTIN_CLZLL
#define nxt_leading_zeros64(x) (((x) == 0) ? 64 : __builtin_clzll(x))

#else
Expand Down Expand Up @@ -199,7 +199,7 @@ lexbor_diyfp_sub(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
lxb_inline lexbor_diyfp_t
lexbor_diyfp_mul(lexbor_diyfp_t lhs, lexbor_diyfp_t rhs)
{
#if (LEXBOR_HAVE_UNSIGNED_INT128)
#ifdef LEXBOR_HAVE_UNSIGNED_INT128

uint64_t l, h;
lxb_uint128_t u128;
Expand Down
7 changes: 1 addition & 6 deletions ext/dom/lexbor/lexbor/core/perf.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (C) 2018 Alexander Borisov
* Copyright (C) 2018-2024 Alexander Borisov
*
* Author: Alexander Borisov <[email protected]>
*/
Expand All @@ -14,9 +14,6 @@ extern "C" {
#include "lexbor/core/base.h"


#ifdef LEXBOR_WITH_PERF


LXB_API void *
lexbor_perf_create(void);

Expand All @@ -36,8 +33,6 @@ LXB_API double
lexbor_perf_in_sec(void *perf);


#endif /* LEXBOR_WITH_PERF */

#ifdef __cplusplus
} /* extern "C" */
#endif
Expand Down
68 changes: 68 additions & 0 deletions ext/dom/lexbor/lexbor/core/swar.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (C) 2024 Alexander Borisov
*
* Author: Niels Dossche <[email protected]>
*/

#ifndef LEXBOR_SWAR_H
#define LEXBOR_SWAR_H

#ifdef __cplusplus
extern "C" {
#endif


#include "lexbor/core/base.h"


/*
* Based on techniques from https://graphics.stanford.edu/~seander/bithacks.html
*/
#define LEXBOR_SWAR_ONES (~((size_t) 0) / 0xFF)
#define LEXBOR_SWAR_REPEAT(x) (LEXBOR_SWAR_ONES * (x))
#define LEXBOR_SWAR_HAS_ZERO(v) (((v) - LEXBOR_SWAR_ONES) & ~(v) & LEXBOR_SWAR_REPEAT(0x80))
#define LEXBOR_SWAR_IS_LITTLE_ENDIAN (*(unsigned char *) &(uint16_t){1})


/*
* When handling hot loops that search for a set of characters,
* this function can be used to quickly move the data pointer much
* closer to the first occurrence of such a character.
*/
lxb_inline const lxb_char_t *
lexbor_swar_seek4(const lxb_char_t *data, const lxb_char_t *end,
lxb_char_t c1, lxb_char_t c2, lxb_char_t c3, lxb_char_t c4)
{
size_t bytes, matches, t1, t2, t3, t4;

if (LEXBOR_SWAR_IS_LITTLE_ENDIAN) {
while (data + sizeof(size_t) <= end) {
memcpy(&bytes, data, sizeof(size_t));

t1 = bytes ^ LEXBOR_SWAR_REPEAT(c1);
t2 = bytes ^ LEXBOR_SWAR_REPEAT(c2);
t3 = bytes ^ LEXBOR_SWAR_REPEAT(c3);
t4 = bytes ^ LEXBOR_SWAR_REPEAT(c4);
matches = LEXBOR_SWAR_HAS_ZERO(t1) | LEXBOR_SWAR_HAS_ZERO(t2)
| LEXBOR_SWAR_HAS_ZERO(t3) | LEXBOR_SWAR_HAS_ZERO(t4);

if (matches) {
data += ((((matches - 1) & LEXBOR_SWAR_ONES) * LEXBOR_SWAR_ONES)
>> (sizeof(size_t) * 8 - 8)) - 1;
break;
} else {
data += sizeof(size_t);
}
}
}

return data;
}


#ifdef __cplusplus
} /* extern "C" */
#endif

#endif /* LEXBOR_SWAR_H */

10 changes: 10 additions & 0 deletions ext/dom/lexbor/lexbor/css/parser.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,16 @@ lxb_css_parser_init(lxb_css_parser_t *parser, lxb_css_syntax_tokenizer_t *tkz)
parser->rules_end = parser->rules_begin + lxb_rules_length;
parser->rules = parser->rules_begin;

/*
* Zero those parameters that can be used (passed to the function).
* The parser->rules->phase parameter will be assigned at the end of the
* parsing.
*
* The point is that parser->rules[0] is used as a stub before exiting
* parsing.
*/
parser->rules->context = NULL;

/* Temp */
parser->pos = NULL;
parser->str.length = 0;
Expand Down
2 changes: 2 additions & 0 deletions ext/dom/lexbor/lexbor/html/encoding.c
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,8 @@ lxb_html_get_attribute(const lxb_char_t *data, const lxb_char_t *end,
data++;
}

*name_end = data;

spaces_state:

data = lxb_html_encoding_skip_spaces(data, end);
Expand Down
5 changes: 5 additions & 0 deletions ext/dom/lexbor/lexbor/html/tokenizer/state.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#define LEXBOR_STR_RES_MAP_HEX
#define LEXBOR_STR_RES_MAP_NUM
#include "lexbor/core/str_res.h"
#include "lexbor/core/swar.h"

#define LXB_HTML_TOKENIZER_RES_ENTITIES_SBST
#include "lexbor/html/tokenizer/res.h"
Expand Down Expand Up @@ -226,6 +227,8 @@ lxb_html_tokenizer_state_data(lxb_html_tokenizer_t *tkz,
{
lxb_html_tokenizer_state_begin_set(tkz, data);

data = lexbor_swar_seek4(data, end, 0x3C, 0x26, 0x0D, 0x00);

while (data != end) {
switch (*data) {
/* U+003C LESS-THAN SIGN (<) */
Expand Down Expand Up @@ -906,6 +909,8 @@ lxb_html_tokenizer_state_attribute_value_double_quoted(lxb_html_tokenizer_t *tkz

lxb_html_tokenizer_state_begin_set(tkz, data);

data = lexbor_swar_seek4(data, end, 0x22, 0x26, 0x0D, 0x00);

while (data != end) {
switch (*data) {
/* U+0022 QUOTATION MARK (") */
Expand Down
2 changes: 1 addition & 1 deletion ext/dom/lexbor/lexbor/html/tree/insertion_mode/in_head.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ lxb_html_tree_insertion_mode_in_head_open(lxb_html_tree_t *tree,
* We can create function for this, but...
*
* The "in head noscript" insertion mode use this
* is you change this code, please, change it in in head noscript" mode
* is you change this code, please, change it in head noscript" mode
*/
case LXB_TAG__TEXT: {
lxb_html_token_t ws_token = {0};
Expand Down
Loading