Skip to content

Commit 033a66b

Browse files
committed
Merge branch 'PHP-8.4'
* PHP-8.4: Fix lowercase HTML attribute exceptions Fix GH-17802: \Dom\HTMLDocument querySelector attribute name is case sensitive in HTML
2 parents 8af9042 + 74df3e0 commit 033a66b

File tree

3 files changed

+147
-4
lines changed

3 files changed

+147
-4
lines changed

ext/dom/lexbor/lexbor/selectors-adapted/selectors.c

Lines changed: 83 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,21 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xml
6565

6666
static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name)
6767
{
68-
const xmlAttr *attr = xmlHasProp(node, (const xmlChar *) name);
68+
const xmlAttr *attr = NULL;
69+
ZEND_ASSERT(node->doc != NULL);
70+
if (php_dom_ns_is_html_and_document_is_html(node)) {
71+
/* No need to handle DTD entities as we're in HTML. */
72+
size_t name_bound = strlen((const char *) name) + 1;
73+
for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) {
74+
if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) {
75+
attr = cur;
76+
break;
77+
}
78+
}
79+
} else {
80+
attr = xmlHasProp(node, (const xmlChar *) name);
81+
}
82+
6983
if (attr != NULL && attr->ns != NULL) {
7084
return NULL;
7185
}
@@ -85,8 +99,67 @@ static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(c
8599
return ret;
86100
}
87101

102+
static bool lxb_selectors_attrib_name_cmp(const lxb_css_selector_t *selector, const char *name, size_t len)
103+
{
104+
return selector->name.length == len && lexbor_str_data_nlocmp_right((const lxb_char_t *) name, selector->name.data, len);
105+
}
106+
107+
/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors
108+
* "Attribute selectors on an HTML element in an HTML document must treat the values of attributes with the following names as ASCII case-insensitive:" */
109+
static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_t *selector)
110+
{
111+
return lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept"))
112+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept-charset"))
113+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("align"))
114+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("alink"))
115+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("axis"))
116+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("bgcolor"))
117+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("charset"))
118+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("checked"))
119+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("clear"))
120+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("codetype"))
121+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("color"))
122+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("compact"))
123+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("declare"))
124+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("defer"))
125+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("dir"))
126+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("direction"))
127+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("disabled"))
128+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("enctype"))
129+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("face"))
130+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("frame"))
131+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("hreflang"))
132+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("http-equiv"))
133+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("lang"))
134+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("language"))
135+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("link"))
136+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("media"))
137+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("method"))
138+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("multiple"))
139+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nohref"))
140+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noresize"))
141+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noshade"))
142+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nowrap"))
143+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("readonly"))
144+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rel"))
145+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rev"))
146+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rules"))
147+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scope"))
148+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scrolling"))
149+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("selected"))
150+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("shape"))
151+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("target"))
152+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("text"))
153+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("type"))
154+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valign"))
155+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valuetype"))
156+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("vlink"));
157+
}
158+
88159
static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
89160
{
161+
entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector);
162+
90163
if (node->doc != NULL && node->doc->dict != NULL) {
91164
const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length);
92165
if (interned != NULL) {
@@ -1287,10 +1360,10 @@ lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src,
12871360
}
12881361

12891362
static bool
1290-
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src)
1363+
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, bool force_modifier_i, const lexbor_str_t *trg, const lexbor_str_t *src)
12911364
{
12921365
bool res;
1293-
bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I;
1366+
bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I || force_modifier_i;
12941367

12951368
switch (attr->match) {
12961369
case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */
@@ -1402,7 +1475,13 @@ lxb_selectors_match_attribute(const lxb_css_selector_t *selector,
14021475
}
14031476

14041477
dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr);
1405-
bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src);
1478+
ZEND_ASSERT(node->doc != NULL);
1479+
bool res = lxb_selectors_match_attribute_value(
1480+
attr,
1481+
entry->id.attr_case_insensitive && php_dom_ns_is_html_and_document_is_html(node),
1482+
&trg.str,
1483+
src
1484+
);
14061485
dom_lxb_str_wrapper_release(&trg);
14071486
return res;
14081487
}

ext/dom/lexbor/lexbor/selectors-adapted/selectors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ typedef lxb_selectors_entry_t *
7878
typedef struct {
7979
const xmlChar *name;
8080
bool interned;
81+
bool attr_case_insensitive;
8182
} lxb_selectors_adapted_id;
8283

8384
struct lxb_selectors_entry {
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
--TEST--
2+
GH-17802 (\Dom\HTMLDocument querySelector attribute name is case sensitive in HTML)
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
$text = <<<TEXT
9+
<html>
10+
<head>
11+
<meta charset="Windows-1252">
12+
</head>
13+
<body>
14+
</body>
15+
</html>
16+
TEXT;
17+
18+
$dom = \Dom\HTMLDocument::createFromString($text, options: LIBXML_NOERROR);
19+
$meta2 = $dom->head->appendChild($dom->createElementNS('urn:x', 'meta'));
20+
$meta2->setAttribute('charset', 'x');
21+
echo $dom->saveHtml(), "\n";
22+
23+
echo "--- charseT ---\n";
24+
25+
foreach ($dom->querySelectorAll('meta[charseT]') as $entry) {
26+
var_dump($dom->saveHtml($entry));
27+
}
28+
29+
echo "--- charset ---\n";
30+
31+
foreach ($dom->querySelectorAll('meta[charset]') as $entry) {
32+
var_dump($dom->saveHtml($entry));
33+
}
34+
35+
echo "--- charseT and lowercase value ---\n";
36+
37+
foreach ($dom->querySelectorAll('meta[charseT="windows-1252"]') as $entry) {
38+
var_dump($dom->saveHtml($entry));
39+
}
40+
41+
echo "--- charset and lowercase value ---\n";
42+
43+
foreach ($dom->querySelectorAll('meta[charset="windows-1252"]') as $entry) {
44+
var_dump($dom->saveHtml($entry));
45+
}
46+
47+
?>
48+
--EXPECT--
49+
<html><head>
50+
<meta charset="Windows-1252">
51+
<meta charset="x"></meta></head>
52+
<body>
53+
54+
</body></html>
55+
--- charseT ---
56+
string(29) "<meta charset="Windows-1252">"
57+
--- charset ---
58+
string(29) "<meta charset="Windows-1252">"
59+
string(25) "<meta charset="x"></meta>"
60+
--- charseT and lowercase value ---
61+
string(29) "<meta charset="Windows-1252">"
62+
--- charset and lowercase value ---
63+
string(29) "<meta charset="Windows-1252">"

0 commit comments

Comments
 (0)