Skip to content

Commit a411d78

Browse files
committed
Fix GH-20395: \Dom\ParentNode::querySelector and \Dom\ParentNode::querySelectorAll requires elements in $selectors to be lowercase
The selector needs to be compared in a lowercase manner. This also almost completely obsoletes the interned string optimization, so get rid of that for simplicity sake. While there is still theoretical benefit, it is only 1-2% in my random tests, not worth it anymore.
1 parent ca084ac commit a411d78

File tree

3 files changed

+80
-48
lines changed

3 files changed

+80
-48
lines changed

ext/dom/lexbor/lexbor/selectors-adapted/selectors.c

Lines changed: 52 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,24 @@ static void dom_lxb_str_wrapper_release(dom_lxb_str_wrapper *wrapper)
3535
}
3636
}
3737

38-
static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_literal(const xmlNode *node, const char *name)
38+
static bool lxb_selectors_str_cmp_loright(const char *lhs, const char *rhs)
3939
{
40-
return strcmp((const char *) node->name, name) == 0;
40+
while (true) {
41+
if (*rhs != zend_tolower_ascii(*lhs)) {
42+
return false;
43+
}
44+
if (!*lhs) {
45+
return true;
46+
}
47+
++rhs;
48+
++lhs;
49+
}
50+
}
51+
52+
/* `name` is lowercase */
53+
static zend_always_inline bool lxb_selectors_cmp_html_name_lit(const xmlNode *node, const char *name)
54+
{
55+
return lxb_selectors_str_cmp_loright((const char *) node->name, name);
4156
}
4257

4358
static zend_always_inline bool lxb_selectors_adapted_cmp_ns(const xmlNode *a, const xmlNode *b)
@@ -46,16 +61,18 @@ static zend_always_inline bool lxb_selectors_adapted_cmp_ns(const xmlNode *a, co
4661
return a->ns == b->ns || (a->ns != NULL && b->ns != NULL && xmlStrEqual(a->ns->href, b->ns->href));
4762
}
4863

64+
/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors */
4965
static zend_always_inline bool lxb_selectors_adapted_cmp_local_name_id(const xmlNode *node, const lxb_selectors_adapted_id *id)
5066
{
51-
uintptr_t ptr = (uintptr_t) node->name;
52-
if (id->interned && (ptr & (ZEND_MM_ALIGNMENT - 1)) != 0) {
53-
/* It cannot be a heap-allocated string because the pointer is not properly aligned for a heap allocation.
54-
* Therefore, it must be interned into the dictionary pool. */
55-
return node->name == id->name;
67+
ZEND_ASSERT(node->doc != NULL);
68+
if (php_dom_ns_is_html_and_document_is_html(node)) {
69+
/* From https://html.spec.whatwg.org/#case-sensitivity-of-selectors:
70+
* The element name must be compared case sensitively _after_ converting the selector to lowercase.
71+
* E.g. selector "DIV" must match element "div" but not "Div". */
72+
return lxb_selectors_str_cmp_loright((const char *) id->name, (const char *) node->name);
73+
} else {
74+
return strcmp((const char *) node->name, (const char *) id->name) == 0;
5675
}
57-
58-
return strcmp((const char *) node->name, (const char *) id->name) == 0;
5976
}
6077

6178
static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNode *node, const lxb_char_t *name)
@@ -64,9 +81,8 @@ static zend_always_inline const xmlAttr *lxb_selectors_adapted_attr(const xmlNod
6481
ZEND_ASSERT(node->doc != NULL);
6582
if (php_dom_ns_is_html_and_document_is_html(node)) {
6683
/* No need to handle DTD entities as we're in HTML. */
67-
size_t name_bound = strlen((const char *) name) + 1;
6884
for (const xmlAttr *cur = node->properties; cur != NULL; cur = cur->next) {
69-
if (lexbor_str_data_nlocmp_right(cur->name, name, name_bound)) {
85+
if (lxb_selectors_str_cmp_loright((const char *) name, (const char *) cur->name)) {
7086
attr = cur;
7187
break;
7288
}
@@ -154,18 +170,7 @@ static bool lxb_selectors_is_lowercased_html_attrib_name(const lxb_css_selector_
154170
static void lxb_selectors_adapted_set_entry_id_ex(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
155171
{
156172
entry->id.attr_case_insensitive = lxb_selectors_is_lowercased_html_attrib_name(selector);
157-
158-
if (node->doc != NULL && node->doc->dict != NULL) {
159-
const xmlChar *interned = xmlDictExists(node->doc->dict, selector->name.data, selector->name.length);
160-
if (interned != NULL) {
161-
entry->id.name = interned;
162-
entry->id.interned = true;
163-
return;
164-
}
165-
}
166-
167173
entry->id.name = selector->name.data;
168-
entry->id.interned = false;
169174
}
170175

171176
static zend_always_inline void lxb_selectors_adapted_set_entry_id(lxb_selectors_entry_t *entry, const lxb_css_selector_t *selector, const xmlNode *node)
@@ -1686,8 +1691,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
16861691
case LXB_CSS_SELECTOR_PSEUDO_CLASS_ANY_LINK:
16871692
/* https://drafts.csswg.org/selectors/#the-any-link-pseudo */
16881693
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1689-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "a")
1690-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "area")))
1694+
&& (lxb_selectors_cmp_html_name_lit(node, "a")
1695+
|| lxb_selectors_cmp_html_name_lit(node, "area")))
16911696
{
16921697
return lxb_selectors_adapted_has_attr(node, "href");
16931698
}
@@ -1705,7 +1710,7 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
17051710
if (!php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
17061711
return false;
17071712
}
1708-
if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")) {
1713+
if (lxb_selectors_cmp_html_name_lit(node, "input")) {
17091714
const xmlAttr *dom_attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "type");
17101715
if (dom_attr == NULL) {
17111716
return false;
@@ -1729,7 +1734,7 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
17291734

17301735
return res;
17311736
}
1732-
else if(lxb_selectors_adapted_cmp_local_name_literal(node, "option")) {
1737+
else if(lxb_selectors_cmp_html_name_lit(node, "option")) {
17331738
return lxb_selectors_adapted_has_attr(node, "selected");
17341739
}
17351740

@@ -1802,8 +1807,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18021807
case LXB_CSS_SELECTOR_PSEUDO_CLASS_LINK:
18031808
/* https://html.spec.whatwg.org/multipage/semantics-other.html#selector-link */
18041809
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1805-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "a")
1806-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "area")))
1810+
&& (lxb_selectors_cmp_html_name_lit(node, "a")
1811+
|| lxb_selectors_cmp_html_name_lit(node, "area")))
18071812
{
18081813
return lxb_selectors_adapted_has_attr(node, "href");
18091814
}
@@ -1823,9 +1828,9 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18231828

18241829
case LXB_CSS_SELECTOR_PSEUDO_CLASS_OPTIONAL:
18251830
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1826-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1827-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1828-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1831+
&& (lxb_selectors_cmp_html_name_lit(node, "input")
1832+
|| lxb_selectors_cmp_html_name_lit(node, "select")
1833+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")))
18291834
{
18301835
return !lxb_selectors_adapted_has_attr(node, "required");
18311836
}
@@ -1840,8 +1845,8 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18401845

18411846
case LXB_CSS_SELECTOR_PSEUDO_CLASS_PLACEHOLDER_SHOWN:
18421847
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1843-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1844-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1848+
&& (lxb_selectors_cmp_html_name_lit(node, "input")
1849+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")))
18451850
{
18461851
return lxb_selectors_adapted_has_attr(node, "placeholder");
18471852
}
@@ -1856,9 +1861,9 @@ lxb_selectors_pseudo_class(const lxb_css_selector_t *selector,
18561861

18571862
case LXB_CSS_SELECTOR_PSEUDO_CLASS_REQUIRED:
18581863
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
1859-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
1860-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "select")
1861-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")))
1864+
&& (lxb_selectors_cmp_html_name_lit(node, "input")
1865+
|| lxb_selectors_cmp_html_name_lit(node, "select")
1866+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")))
18621867
{
18631868
return lxb_selectors_adapted_has_attr(node, "required");
18641869
}
@@ -2104,32 +2109,32 @@ lxb_selectors_pseudo_class_disabled(const xmlNode *node)
21042109
}
21052110

21062111
if (lxb_selectors_adapted_has_attr(node, "disabled")
2107-
&& (lxb_selectors_adapted_cmp_local_name_literal(node, "button")
2108-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "input")
2109-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "select")
2110-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")
2111-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "optgroup")
2112-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")))
2112+
&& (lxb_selectors_cmp_html_name_lit(node, "button")
2113+
|| lxb_selectors_cmp_html_name_lit(node, "input")
2114+
|| lxb_selectors_cmp_html_name_lit(node, "select")
2115+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")
2116+
|| lxb_selectors_cmp_html_name_lit(node, "optgroup")
2117+
|| lxb_selectors_cmp_html_name_lit(node, "fieldset")))
21132118
{
21142119
return true;
21152120
}
21162121

2117-
if (lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")) {
2122+
if (lxb_selectors_cmp_html_name_lit(node, "fieldset")) {
21182123
const xmlNode *fieldset = node;
21192124
node = node->parent;
21202125

21212126
while (node != NULL && CMP_NODE_TYPE(node, XML_ELEMENT_NODE)) {
21222127
/* node is a disabled fieldset that is an ancestor of fieldset */
21232128
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)
2124-
&& lxb_selectors_adapted_cmp_local_name_literal(node, "fieldset")
2129+
&& lxb_selectors_cmp_html_name_lit(node, "fieldset")
21252130
&& lxb_selectors_adapted_has_attr(node, "disabled"))
21262131
{
21272132
/* Search first legend child and figure out if fieldset is a descendent from that. */
21282133
const xmlNode *search_current = node->children;
21292134
do {
21302135
if (search_current->type == XML_ELEMENT_NODE
21312136
&& php_dom_ns_is_fast(search_current, php_dom_ns_is_html_magic_token)
2132-
&& lxb_selectors_adapted_cmp_local_name_literal(search_current, "legend")) {
2137+
&& lxb_selectors_cmp_html_name_lit(search_current, "legend")) {
21332138
/* search_current is a legend element. */
21342139
const xmlNode *inner_search_current = fieldset;
21352140

@@ -2235,8 +2240,8 @@ static bool
22352240
lxb_selectors_pseudo_class_read_write(const xmlNode *node)
22362241
{
22372242
if (php_dom_ns_is_fast(node, php_dom_ns_is_html_magic_token)) {
2238-
if (lxb_selectors_adapted_cmp_local_name_literal(node, "input")
2239-
|| lxb_selectors_adapted_cmp_local_name_literal(node, "textarea")) {
2243+
if (lxb_selectors_cmp_html_name_lit(node, "input")
2244+
|| lxb_selectors_cmp_html_name_lit(node, "textarea")) {
22402245
return !lxb_selectors_adapted_has_attr(node, "readonly") && !lxb_selectors_adapted_has_attr(node, "disabled");
22412246
} else {
22422247
const xmlAttr *attr = lxb_selectors_adapted_attr(node, (const lxb_char_t *) "contenteditable");

ext/dom/lexbor/lexbor/selectors-adapted/selectors.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ typedef lxb_selectors_entry_t *
7777

7878
typedef struct {
7979
const xmlChar *name;
80-
bool interned;
8180
bool attr_case_insensitive;
8281
} lxb_selectors_adapted_id;
8382

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
--TEST--
2+
GH-20395 (\Dom\ParentNode::querySelector and \Dom\ParentNode::querySelectorAll requires elements in $selectors to be lowercase)
3+
--EXTENSIONS--
4+
dom
5+
--CREDITS--
6+
DeveloperRob
7+
--FILE--
8+
<?php
9+
10+
$html = '<!doctype html><html><head></head><body></body></html>';
11+
$dom = Dom\HtmlDocument::createFromString($html);
12+
var_dump(is_null($dom->querySelector('html')));
13+
var_dump(is_null($dom->querySelector('Html')));
14+
var_dump(is_null($dom->querySelector('HTML')));
15+
16+
$dom->body->appendChild($dom->createElement('div'));
17+
$dom->body->appendChild($dom->createElementNS('http://www.w3.org/1999/xhtml', 'Div'));
18+
19+
foreach ($dom->querySelectorAll('div') as $div) {
20+
var_dump($div->localName);
21+
}
22+
23+
?>
24+
--EXPECT--
25+
bool(false)
26+
bool(false)
27+
bool(false)
28+
string(3) "div"

0 commit comments

Comments
 (0)