Skip to content

Commit 461bda3

Browse files
willkggsnedders
authored andcommitted
First pass at documenting serializer (#376)
1 parent ed8e017 commit 461bda3

File tree

1 file changed

+119
-44
lines changed

1 file changed

+119
-44
lines changed

html5lib/serializer.py

+119-44
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,33 @@ def htmlentityreplace_errors(exc):
6868
else:
6969
return xmlcharrefreplace_errors(exc)
7070

71+
7172
register_error("htmlentityreplace", htmlentityreplace_errors)
7273

7374

7475
def serialize(input, tree="etree", encoding=None, **serializer_opts):
76+
"""Serializes the input token stream using the specified treewalker
77+
78+
:arg input: the token stream to serialize
79+
80+
:arg tree: the treewalker to use
81+
82+
:arg encoding: the encoding to use
83+
84+
:arg serializer_opts: any options to pass to the
85+
:py:class:`html5lib.serializer.HTMLSerializer` that gets created
86+
87+
:returns: the tree serialized as a string
88+
89+
Example:
90+
91+
>>> from html5lib.html5parser import parse
92+
>>> from html5lib.serializer import serialize
93+
>>> token_stream = parse('<html><body><p>Hi!</p></body></html>')
94+
>>> serialize(token_stream, omit_optional_tags=False)
95+
'<html><head></head><body><p>Hi!</p></body></html>'
96+
97+
"""
7598
# XXX: Should we cache this?
7699
walker = treewalkers.getTreeWalker(tree)
77100
s = HTMLSerializer(**serializer_opts)
@@ -110,50 +133,83 @@ class HTMLSerializer(object):
110133
"strip_whitespace", "sanitize")
111134

112135
def __init__(self, **kwargs):
113-
"""Initialize HTMLSerializer.
114-
115-
Keyword options (default given first unless specified) include:
116-
117-
inject_meta_charset=True|False
118-
Whether it insert a meta element to define the character set of the
119-
document.
120-
quote_attr_values="legacy"|"spec"|"always"
121-
Whether to quote attribute values that don't require quoting
122-
per legacy browser behaviour, when required by the standard, or always.
123-
quote_char=u'"'|u"'"
124-
Use given quote character for attribute quoting. Default is to
125-
use double quote unless attribute value contains a double quote,
126-
in which case single quotes are used instead.
127-
escape_lt_in_attrs=False|True
128-
Whether to escape < in attribute values.
129-
escape_rcdata=False|True
130-
Whether to escape characters that need to be escaped within normal
131-
elements within rcdata elements such as style.
132-
resolve_entities=True|False
133-
Whether to resolve named character entities that appear in the
134-
source tree. The XML predefined entities &lt; &gt; &amp; &quot; &apos;
135-
are unaffected by this setting.
136-
strip_whitespace=False|True
137-
Whether to remove semantically meaningless whitespace. (This
138-
compresses all whitespace to a single space except within pre.)
139-
minimize_boolean_attributes=True|False
140-
Shortens boolean attributes to give just the attribute value,
141-
for example <input disabled="disabled"> becomes <input disabled>.
142-
use_trailing_solidus=False|True
143-
Includes a close-tag slash at the end of the start tag of void
144-
elements (empty elements whose end tag is forbidden). E.g. <hr/>.
145-
space_before_trailing_solidus=True|False
146-
Places a space immediately before the closing slash in a tag
147-
using a trailing solidus. E.g. <hr />. Requires use_trailing_solidus.
148-
sanitize=False|True
149-
Strip all unsafe or unknown constructs from output.
150-
See `html5lib user documentation`_
151-
omit_optional_tags=True|False
152-
Omit start/end tags that are optional.
153-
alphabetical_attributes=False|True
154-
Reorder attributes to be in alphabetical order.
155-
156-
.. _html5lib user documentation: http://code.google.com/p/html5lib/wiki/UserDocumentation
136+
"""Initialize HTMLSerializer
137+
138+
:arg inject_meta_charset: Whether or not to inject the meta charset.
139+
140+
Defaults to ``True``.
141+
142+
:arg quote_attr_values: Whether to quote attribute values that don't
143+
require quoting per legacy browser behavior (``"legacy"``), when
144+
required by the standard (``"spec"``), or always (``"always"``).
145+
146+
Defaults to ``"legacy"``.
147+
148+
:arg quote_char: Use given quote character for attribute quoting.
149+
150+
Defaults to ``"`` which will use double quotes unless attribute
151+
value contains a double quote, in which case single quotes are
152+
used.
153+
154+
:arg escape_lt_in_attrs: Whether or not to escape ``<`` in attribute
155+
values.
156+
157+
Defaults to ``False``.
158+
159+
:arg escape_rcdata: Whether to escape characters that need to be
160+
escaped within normal elements within rcdata elements such as
161+
style.
162+
163+
Defaults to ``False``.
164+
165+
:arg resolve_entities: Whether to resolve named character entities that
166+
appear in the source tree. The XML predefined entities &lt; &gt;
167+
&amp; &quot; &apos; are unaffected by this setting.
168+
169+
Defaults to ``True``.
170+
171+
:arg strip_whitespace: Whether to remove semantically meaningless
172+
whitespace. (This compresses all whitespace to a single space
173+
except within ``pre``.)
174+
175+
Defaults to ``False``.
176+
177+
:arg minimize_boolean_attributes: Shortens boolean attributes to give
178+
just the attribute value, for example::
179+
180+
<input disabled="disabled">
181+
182+
becomes::
183+
184+
<input disabled>
185+
186+
Defaults to ``True``.
187+
188+
:arg use_trailing_solidus: Includes a close-tag slash at the end of the
189+
start tag of void elements (empty elements whose end tag is
190+
forbidden). E.g. ``<hr/>``.
191+
192+
Defaults to ``False``.
193+
194+
:arg space_before_trailing_solidus: Places a space immediately before
195+
the closing slash in a tag using a trailing solidus. E.g.
196+
``<hr />``. Requires ``use_trailing_solidus=True``.
197+
198+
Defaults to ``True``.
199+
200+
:arg sanitize: Strip all unsafe or unknown constructs from output.
201+
See :py:class:`html5lib.filters.sanitizer.Filter`.
202+
203+
Defaults to ``False``.
204+
205+
:arg omit_optional_tags: Omit start/end tags that are optional.
206+
207+
Defaults to ``True``.
208+
209+
:arg alphabetical_attributes: Reorder attributes to be in alphabetical order.
210+
211+
Defaults to ``False``.
212+
157213
"""
158214
unexpected_args = frozenset(kwargs) - frozenset(self.options)
159215
if len(unexpected_args) > 0:
@@ -317,6 +373,25 @@ def serialize(self, treewalker, encoding=None):
317373
self.serializeError(token["data"])
318374

319375
def render(self, treewalker, encoding=None):
376+
"""Serializes the stream from the treewalker into a string
377+
378+
:arg treewalker: the treewalker to serialize
379+
380+
:arg encoding: the string encoding to use
381+
382+
:returns: the serialized tree
383+
384+
Example:
385+
386+
>>> from html5lib import parse, getTreeWalker
387+
>>> from html5lib.serializer import HTMLSerializer
388+
>>> token_stream = parse('<html><body>Hi!</body></html>')
389+
>>> walker = getTreeWalker('etree')
390+
>>> serializer = HTMLSerializer(omit_optional_tags=False)
391+
>>> serializer.render(walker(token_stream))
392+
'<html><head></head><body>Hi!</body></html>'
393+
394+
"""
320395
if encoding:
321396
return b"".join(list(self.serialize(treewalker, encoding)))
322397
else:

0 commit comments

Comments
 (0)