Skip to content

Commit 3a4a320

Browse files
committed
Fix GH-17802: \Dom\HTMLDocument querySelector attribute name is case sensitive in HTML
According to https://html.spec.whatwg.org/#case-sensitivity-of-selectors, the CSS selector attribute name must be converted to lowercase in HTML elements, and then compared case-sensitive to the attribute name in the element. We implement this not by doing the explicit conversion, but by a manual loop using a function that first converts the rhs characters to lowercase and keeps the lhs characters the same, achieving the same effect.
1 parent ddde315 commit 3a4a320

File tree

2 files changed

+62
-1
lines changed

2 files changed

+62
-1
lines changed

ext/dom/lexbor/lexbor/selectors-adapted/selectors.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,21 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xml
6565

6666
static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name)
6767
{
68-
const xmlAttr *attr = xmlHasProp(node, (const xmlChar *) name);
68+
const xmlAttr *attr = NULL;
69+
ZEND_ASSERT(node->doc != NULL);
70+
if (php_dom_ns_is_html_and_document_is_html(node)) {
71+
/* No need to handle DTD entities as we're in HTML. */
72+
size_t name_bound = strlen((const char *) name) + 1;
73+
for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) {
74+
if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) {
75+
attr = cur;
76+
break;
77+
}
78+
}
79+
} else {
80+
attr = xmlHasProp(node, (const xmlChar *) name);
81+
}
82+
6983
if (attr != NULL && attr->ns != NULL) {
7084
return NULL;
7185
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
--TEST--
2+
GH-17802 (\Dom\HTMLDocument querySelector attribute name is case sensitive in HTML)
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
$text = <<<TEXT
9+
<html>
10+
<head>
11+
<meta charset="Windows-1252">
12+
</head>
13+
<body>
14+
</body>
15+
</html>
16+
TEXT;
17+
18+
$dom = \Dom\HTMLDocument::createFromString($text, options: LIBXML_NOERROR);
19+
$meta2 = $dom->head->appendChild($dom->createElementNS('urn:x', 'meta'));
20+
$meta2->setAttribute('charset', 'x');
21+
echo $dom->saveHtml(), "\n";
22+
23+
echo "--- charseT ---\n";
24+
25+
foreach ($dom->querySelectorAll('meta[charseT]') as $entry) {
26+
var_dump($dom->saveHtml($entry));
27+
}
28+
29+
echo "--- charset ---\n";
30+
31+
foreach ($dom->querySelectorAll('meta[charset]') as $entry) {
32+
var_dump($dom->saveHtml($entry));
33+
}
34+
35+
?>
36+
--EXPECT--
37+
<html><head>
38+
<meta charset="Windows-1252">
39+
<meta charset="x"></meta></head>
40+
<body>
41+
42+
</body></html>
43+
--- charseT ---
44+
string(29) "<meta charset="Windows-1252">"
45+
--- charset ---
46+
string(29) "<meta charset="Windows-1252">"
47+
string(25) "<meta charset="x"></meta>"

0 commit comments

Comments
 (0)