Skip to content

Commit 74df3e0

Browse files
committed
Fix lowercase HTML attribute exceptions
Closes GH-17815.
1 parent 3a4a320 commit 74df3e0

File tree

4 files changed

+87
-3
lines changed

4 files changed

+87
-3
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ PHP NEWS
2121
- DOM:
2222
. Fixed bug GH-17609 (Typo in error message: Dom\NO_DEFAULT_NS instead of
2323
Dom\HTML_NO_DEFAULT_NS). (nielsdos)
24+
. Fixed bug GH-17802 (\Dom\HTMLDocument querySelector attribute name is case
25+
sensitive in HTML). (nielsdos)
2426

2527
- GD:
2628
. Fixed bug GH-17703 (imagescale with both width and height negative values

ext/dom/lexbor/lexbor/selectors-adapted/selectors.c

Lines changed: 68 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,67 @@ static zend_always_inline dom_lxb_str_wrapper lxb_selectors_adapted_attr_value(c
9999
return ret;
100100
}
101101

102+
static bool lxb_selectors_attrib_name_cmp(const lxb_css_selector_t *selector, const char *name, size_t len)
103+
{
104+
return selector->name.length == len && lexbor_str_data_nlocmp_right((const lxb_char_t *) name, selector->name.data, len);
105+
}
106+
107+
/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors
108+
* "Attribute selectors on an HTML element in an HTML document must treat the values of attributes with the following names as ASCII case-insensitive:" */
109+
static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_t *selector)
110+
{
111+
return lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept"))
112+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("accept-charset"))
113+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("align"))
114+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("alink"))
115+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("axis"))
116+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("bgcolor"))
117+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("charset"))
118+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("checked"))
119+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("clear"))
120+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("codetype"))
121+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("color"))
122+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("compact"))
123+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("declare"))
124+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("defer"))
125+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("dir"))
126+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("direction"))
127+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("disabled"))
128+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("enctype"))
129+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("face"))
130+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("frame"))
131+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("hreflang"))
132+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("http-equiv"))
133+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("lang"))
134+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("language"))
135+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("link"))
136+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("media"))
137+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("method"))
138+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("multiple"))
139+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nohref"))
140+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noresize"))
141+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("noshade"))
142+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("nowrap"))
143+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("readonly"))
144+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rel"))
145+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rev"))
146+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("rules"))
147+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scope"))
148+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("scrolling"))
149+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("selected"))
150+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("shape"))
151+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("target"))
152+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("text"))
153+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("type"))
154+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valign"))
155+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("valuetype"))
156+
|| lxb_selectors_attrib_name_cmp(selector, ZEND_STRL("vlink"));
157+
}
158+
102159
static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
103160
{
161+
entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector);
162+
104163
if (node->doc != NULL && node->doc->dict != NULL) {
105164
const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length);
106165
if (interned != NULL) {
@@ -1304,10 +1363,10 @@ lxb_selectors_match_class(const lexbor_str_t *target, const lexbor_str_t *src,
13041363
}
13051364

13061365
static bool
1307-
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, const lexbor_str_t *trg, const lexbor_str_t *src)
1366+
lxb_selectors_match_attribute_value(const lxb_css_selector_attribute_t *attr, bool force_modifier_i, const lexbor_str_t *trg, const lexbor_str_t *src)
13081367
{
13091368
bool res;
1310-
bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I;
1369+
bool ins = attr->modifier == LXB_CSS_SELECTOR_MODIFIER_I || force_modifier_i;
13111370

13121371
switch (attr->match) {
13131372
case LXB_CSS_SELECTOR_MATCH_EQUAL: /* = */
@@ -1419,7 +1478,13 @@ lxb_selectors_match_attribute(const lxb_css_selector_t *selector,
14191478
}
14201479

14211480
dom_lxb_str_wrapper trg = lxb_selectors_adapted_attr_value(dom_attr);
1422-
bool res = lxb_selectors_match_attribute_value(attr, &trg.str, src);
1481+
ZEND_ASSERT(node->doc != NULL);
1482+
bool res = lxb_selectors_match_attribute_value(
1483+
attr,
1484+
entry->id.attr_case_insensitive && php_dom_ns_is_html_and_document_is_html(node),
1485+
&trg.str,
1486+
src
1487+
);
14231488
dom_lxb_str_wrapper_release(&trg);
14241489
return res;
14251490
}

ext/dom/lexbor/lexbor/selectors-adapted/selectors.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ typedef lxb_selectors_entry_t *
7878
typedef struct {
7979
const xmlChar *name;
8080
bool interned;
81+
bool attr_case_insensitive;
8182
} lxb_selectors_adapted_id;
8283

8384
struct lxb_selectors_entry {

ext/dom/tests/modern/css_selectors/gh17802.phpt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,18 @@ foreach ($dom->querySelectorAll('meta[charset]') as $entry) {
3232
var_dump($dom->saveHtml($entry));
3333
}
3434

35+
echo "--- charseT and lowercase value ---\n";
36+
37+
foreach ($dom->querySelectorAll('meta[charseT="windows-1252"]') as $entry) {
38+
var_dump($dom->saveHtml($entry));
39+
}
40+
41+
echo "--- charset and lowercase value ---\n";
42+
43+
foreach ($dom->querySelectorAll('meta[charset="windows-1252"]') as $entry) {
44+
var_dump($dom->saveHtml($entry));
45+
}
46+
3547
?>
3648
--EXPECT--
3749
<html><head>
@@ -45,3 +57,7 @@ string(29) "<meta charset="Windows-1252">"
4557
--- charset ---
4658
string(29) "<meta charset="Windows-1252">"
4759
string(25) "<meta charset="x"></meta>"
60+
--- charseT and lowercase value ---
61+
string(29) "<meta charset="Windows-1252">"
62+
--- charset and lowercase value ---
63+
string(29) "<meta charset="Windows-1252">"

0 commit comments

Comments
 (0)