Skip to content

Commit b2e4802

Browse files
committed
Speedup setting attributes on etree implementations
1 parent b075e51 commit b2e4802

File tree

2 files changed

+55
-30
lines changed

2 files changed

+55
-30
lines changed

html5lib/treebuilders/etree.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55

66
import re
77

8+
from copy import copy
9+
810
from . import base
911
from .. import _ihatexml
1012
from .. import constants
@@ -61,16 +63,17 @@ def _getAttributes(self):
6163
return self._element.attrib
6264

6365
def _setAttributes(self, attributes):
64-
# Delete existing attributes first
65-
# XXX - there may be a better way to do this...
66-
for key in list(self._element.attrib.keys()):
67-
del self._element.attrib[key]
68-
for key, value in attributes.items():
69-
if isinstance(key, tuple):
70-
name = "{%s}%s" % (key[2], key[1])
71-
else:
72-
name = key
73-
self._element.set(name, value)
66+
el_attrib = self._element.attrib
67+
el_attrib.clear()
68+
if attributes:
69+
# calling .items _always_ allocates, and the above truthy check is cheaper than the
70+
# allocation on average
71+
for key, value in attributes.items():
72+
if isinstance(key, tuple):
73+
name = "{%s}%s" % (key[2], key[1])
74+
else:
75+
name = key
76+
el_attrib[name] = value
7477

7578
attributes = property(_getAttributes, _setAttributes)
7679

@@ -129,8 +132,8 @@ def insertText(self, data, insertBefore=None):
129132

130133
def cloneNode(self):
131134
element = type(self)(self.name, self.namespace)
132-
for name, value in self.attributes.items():
133-
element.attributes[name] = value
135+
if self._element.attrib:
136+
element._element.attrib = copy(self._element.attrib)
134137
return element
135138

136139
def reparentChildren(self, newParent):

html5lib/treebuilders/etree_lxml.py

+40-18
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,19 @@
1616
import re
1717
import sys
1818

19+
try:
20+
from collections.abc import MutableMapping
21+
except ImportError:
22+
from collections import MutableMapping
23+
1924
from . import base
2025
from ..constants import DataLossWarning
2126
from .. import constants
2227
from . import etree as etree_builders
2328
from .. import _ihatexml
2429

2530
import lxml.etree as etree
31+
from six import PY3, binary_type
2632

2733

2834
fullTree = True
@@ -189,26 +195,37 @@ def __init__(self, namespaceHTMLElements, fullTree=False):
189195
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
190196
self.namespaceHTMLElements = namespaceHTMLElements
191197

192-
class Attributes(dict):
193-
def __init__(self, element, value=None):
194-
if value is None:
195-
value = {}
198+
class Attributes(MutableMapping):
199+
def __init__(self, element):
196200
self._element = element
197-
dict.__init__(self, value) # pylint:disable=non-parent-init-called
198-
for key, value in self.items():
199-
if isinstance(key, tuple):
200-
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
201-
else:
202-
name = infosetFilter.coerceAttribute(key)
203-
self._element._element.attrib[name] = value
204201

205-
def __setitem__(self, key, value):
206-
dict.__setitem__(self, key, value)
202+
def _coerceKey(self, key):
207203
if isinstance(key, tuple):
208204
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
209205
else:
210206
name = infosetFilter.coerceAttribute(key)
211-
self._element._element.attrib[name] = value
207+
return name
208+
209+
def __getitem__(self, key):
210+
value = self._element._element.attrib[self._coerceKey(key)]
211+
if not PY3 and isinstance(value, binary_type):
212+
value = value.decode("ascii")
213+
return value
214+
215+
def __setitem__(self, key, value):
216+
self._element._element.attrib[self._coerceKey(key)] = value
217+
218+
def __delitem__(self, key):
219+
del self._element._element.attrib[self._coerceKey(key)]
220+
221+
def __iter__(self):
222+
return iter(self._element._element.attrib)
223+
224+
def __len__(self):
225+
return len(self._element._element.attrib)
226+
227+
def clear(self):
228+
return self._element._element.attrib.clear()
212229

213230
class Element(builder.Element):
214231
def __init__(self, name, namespace):
@@ -229,17 +246,22 @@ def _getName(self):
229246
def _getAttributes(self):
230247
return self._attributes
231248

232-
def _setAttributes(self, attributes):
233-
self._attributes = Attributes(self, attributes)
249+
def _setAttributes(self, value):
250+
attributes = self.attributes
251+
attributes.clear()
252+
attributes.update(value)
234253

235254
attributes = property(_getAttributes, _setAttributes)
236255

237256
def insertText(self, data, insertBefore=None):
238257
data = infosetFilter.coerceCharacters(data)
239258
builder.Element.insertText(self, data, insertBefore)
240259

241-
def appendChild(self, child):
242-
builder.Element.appendChild(self, child)
260+
def cloneNode(self):
261+
element = type(self)(self.name, self.namespace)
262+
if self._element.attrib:
263+
element._element.attrib.update(self._element.attrib)
264+
return element
243265

244266
class Comment(builder.Comment):
245267
def __init__(self, data):

0 commit comments

Comments
 (0)