Skip to content

Commit a38e3c9

Browse files
committed
Fix #79700: Bad performance with namespaced nodes due to wrong libxml assumption
* Use a prepending strategy instead of appending in dom_set_old_ns() Looping to the end of the list is wasteful. We can just put the new nodes at the front of the list. I don't believe we can fully prepend, because libxml2 may assume that the xml namespace is the first one, so we'll put the new ones as the second one. * Reuse namespaces from doc->oldNs if possible in dom_get_ns() * Add a test for reconciling a reused namespace * Explain why there can't be a cycle between oldNs and nsDef Closes GH-11376. Also fixes #77894.
1 parent 50b4df1 commit a38e3c9

File tree

3 files changed

+85
-16
lines changed

3 files changed

+85
-16
lines changed

NEWS

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ PHP NEWS
66
. Fix GH-11388 (Allow "final" modifier when importing a method from a trait).
77
(nielsdos)
88

9+
- DOM:
10+
. Fix #79700 (wrong use of libxml oldNs leads to performance problem).
11+
(nielsdos)
12+
. Fix #77894 (DOMNode::C14N() very slow on generated DOMDocuments even after
13+
normalisation). (nielsdos)
14+
915
08 Jun 2023, PHP 8.3.0alpha1
1016

1117
- CLI:

ext/dom/php_dom.c

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1369,11 +1369,16 @@ void dom_normalize (xmlNodePtr nodep)
13691369

13701370
/* {{{ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) */
13711371
void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) {
1372-
xmlNs *cur;
1373-
13741372
if (doc == NULL)
13751373
return;
13761374

1375+
ZEND_ASSERT(ns->next == NULL);
1376+
1377+
/* Note: we'll use a prepend strategy instead of append to
1378+
* make sure we don't lose performance when the list is long.
1379+
* As libxml2 could assume the xml node is the first one, we'll place our
1380+
* new entries after the first one. */
1381+
13771382
if (doc->oldNs == NULL) {
13781383
doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
13791384
if (doc->oldNs == NULL) {
@@ -1383,13 +1388,10 @@ void dom_set_old_ns(xmlDoc *doc, xmlNs *ns) {
13831388
doc->oldNs->type = XML_LOCAL_NAMESPACE;
13841389
doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE);
13851390
doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml");
1391+
} else {
1392+
ns->next = doc->oldNs->next;
13861393
}
1387-
1388-
cur = doc->oldNs;
1389-
while (cur->next != NULL) {
1390-
cur = cur->next;
1391-
}
1392-
cur->next = ns;
1394+
doc->oldNs->next = ns;
13931395
}
13941396
/* }}} end dom_set_old_ns */
13951397

@@ -1411,6 +1413,9 @@ static void dom_reconcile_ns_internal(xmlDocPtr doc, xmlNodePtr nodep)
14111413
} else {
14121414
prevns->next = nsdftptr;
14131415
}
1416+
/* Note: we can't get here if the ns is already on the oldNs list.
1417+
* This is because in that case the definition won't be on the node, and
1418+
* therefore won't be in the nodep->nsDef list. */
14141419
dom_set_old_ns(doc, curns);
14151420
curns = prevns;
14161421
}
@@ -1509,22 +1514,38 @@ NAMESPACE_ERR: Raised if
15091514

15101515
/* {{{ xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) */
15111516
xmlNsPtr dom_get_ns(xmlNodePtr nodep, char *uri, int *errorcode, char *prefix) {
1512-
xmlNsPtr nsptr = NULL;
1513-
1514-
*errorcode = 0;
1517+
xmlNsPtr nsptr;
15151518

15161519
if (! ((prefix && !strcmp (prefix, "xml") && strcmp(uri, (char *)XML_XML_NAMESPACE)) ||
15171520
(prefix && !strcmp (prefix, "xmlns") && strcmp(uri, (char *)DOM_XMLNS_NAMESPACE)) ||
15181521
(prefix && !strcmp(uri, (char *)DOM_XMLNS_NAMESPACE) && strcmp (prefix, "xmlns")))) {
1522+
/* Reuse the old namespaces from doc->oldNs if possible, before creating a new one.
1523+
* This will prevent the oldNs list from growing with duplicates. */
1524+
xmlDocPtr doc = nodep->doc;
1525+
if (doc && doc->oldNs != NULL) {
1526+
nsptr = doc->oldNs;
1527+
do {
1528+
if (xmlStrEqual(nsptr->prefix, (xmlChar *)prefix) && xmlStrEqual(nsptr->href, (xmlChar *)uri)) {
1529+
goto out;
1530+
}
1531+
nsptr = nsptr->next;
1532+
} while (nsptr);
1533+
}
1534+
/* Couldn't reuse one, create a new one. */
15191535
nsptr = xmlNewNs(nodep, (xmlChar *)uri, (xmlChar *)prefix);
1536+
if (UNEXPECTED(nsptr == NULL)) {
1537+
goto err;
1538+
}
1539+
} else {
1540+
goto err;
15201541
}
15211542

1522-
if (nsptr == NULL) {
1523-
*errorcode = NAMESPACE_ERR;
1524-
}
1525-
1543+
out:
1544+
*errorcode = 0;
15261545
return nsptr;
1527-
1546+
err:
1547+
*errorcode = NAMESPACE_ERR;
1548+
return NULL;
15281549
}
15291550
/* }}} end dom_get_ns */
15301551

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
--TEST--
2+
Reconcile a reused namespace from doc->oldNs
3+
--EXTENSIONS--
4+
dom
5+
--FILE--
6+
<?php
7+
8+
$dom = new DOMDocument();
9+
$root = $dom->createElementNS('http://www.w3.org/2000/xhtml', 'html');
10+
11+
$dom->loadXML(<<<XML
12+
<?xml version="1.0"?>
13+
<html
14+
xmlns="http://www.w3.org/2000/xhtml"
15+
xmlns:a="http://example.com/A"
16+
xmlns:b="http://example.com/B"
17+
/>
18+
XML);
19+
$root = $dom->firstElementChild;
20+
21+
echo "Add first\n";
22+
$element = $dom->createElementNS('http://example.com/B', 'p', 'Hello World');
23+
$root->appendChild($element);
24+
25+
echo "Add second\n";
26+
$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World');
27+
$root->appendChild($element);
28+
29+
echo "Add third\n";
30+
$element = $dom->createElementNS('http://example.com/A', 'p', 'Hello World');
31+
$root->appendChild($element);
32+
33+
var_dump($dom->saveXML());
34+
35+
?>
36+
--EXPECT--
37+
Add first
38+
Add second
39+
Add third
40+
string(201) "<?xml version="1.0"?>
41+
<html xmlns="http://www.w3.org/2000/xhtml" xmlns:a="http://example.com/A" xmlns:b="http://example.com/B"><b:p>Hello World</b:p><a:p>Hello World</a:p><a:p>Hello World</a:p></html>
42+
"

0 commit comments

Comments
 (0)