Closed
Description
Hi,
I'm facing an issue with this code: "AttributeError: 'unicode' object has no attribute 'tag'"
import html5lib
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("lxml"), namespaceHTMLElements=False)
serializer = html5lib.serializer.HTMLSerializer(omit_optional_tags=False)
walker = html5lib.treewalkers.getTreeWalker("lxml")
# works
src = u"experiences"
tree = parser.parseFragment(src, container="div")
stream = walker(tree)
output = serializer.serialize(stream)
print("\n".join(output))
# Doesn't work
src = u"exp\xe9riences"
tree = parser.parseFragment(src, container="div")
stream = walker(tree)
output = serializer.serialize(stream)
print("\n".join(output))
I think the error lies in the isstring
method of FragmentWrapper
class in treewalker/lxmletree.py
Changing:
def ensure_str(s):
if s is None:
return None
elif isinstance(s, text_type):
return s
else:
return s.decode("utf-8", "strict")
class FragmentWrapper(object):
def __init__(self, fragment_root, obj):
...
self.isstring = isinstance(obj, str) or isinstance(obj, bytes)
# Support for bytes here is Py2
if self.isstring:
self.obj = ensure_str(self.obj)
to
def ensure_str(s):
if s is None:
return None
elif isinstance(s, text_type):
return s
else:
return s.decode("utf-8", "strict")
class FragmentWrapper(object):
def __init__(self, fragment_root, obj):
...
self.isstring = isinstance(obj, str) or isinstance(obj, bytes) or isinstance(obj, text_type)
# Support for bytes here is Py2
if self.isstring:
self.obj = ensure_str(self.obj)
seems to do the job... What do you think?
Metadata
Metadata
Assignees
Labels
No labels