Skip to content

First pass at documenting html5lib.filters #375

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions html5lib/filters/alphabeticalattributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def _attr_key(attr):


class Filter(base.Filter):
"""Alphabetizes attributes for elements"""
def __iter__(self):
for token in base.Filter.__iter__(self):
if token["type"] in ("StartTag", "EmptyTag"):
Expand Down
8 changes: 8 additions & 0 deletions html5lib/filters/inject_meta_charset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,15 @@


class Filter(base.Filter):
"""Injects ``<meta charset=ENCODING>`` tag into head of document"""
def __init__(self, source, encoding):
"""Creates a Filter

:arg source: the source token stream

:arg encoding: the encoding to set

"""
base.Filter.__init__(self, source)
self.encoding = encoding

Expand Down
12 changes: 12 additions & 0 deletions html5lib/filters/lint.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,19 @@


class Filter(base.Filter):
"""Lints the token stream for errors

If it finds any errors, it'll raise an ``AssertionError``.

"""
def __init__(self, source, require_matching_tags=True):
"""Creates a Filter

:arg source: the source token stream

:arg require_matching_tags: whether or not to require matching tags

"""
super(Filter, self).__init__(source)
self.require_matching_tags = require_matching_tags

Expand Down
1 change: 1 addition & 0 deletions html5lib/filters/optionaltags.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@


class Filter(base.Filter):
"""Removes optional tags from the token stream"""
def slider(self):
previous1 = previous2 = None
for token in self.source:
Expand Down
43 changes: 37 additions & 6 deletions html5lib/filters/sanitizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,7 +705,7 @@


class Filter(base.Filter):
""" sanitization of XHTML+MathML+SVG and of inline style attributes."""
"""Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes"""
def __init__(self,
source,
allowed_elements=allowed_elements,
Expand All @@ -718,6 +718,37 @@ def __init__(self,
attr_val_is_uri=attr_val_is_uri,
svg_attr_val_allows_ref=svg_attr_val_allows_ref,
svg_allow_local_href=svg_allow_local_href):
"""Creates a Filter

:arg allowed_elements: set of elements to allow--everything else will
be escaped

:arg allowed_attributes: set of attributes to allow in
elements--everything else will be stripped

:arg allowed_css_properties: set of CSS properties to allow--everything
else will be stripped

:arg allowed_css_keywords: set of CSS keywords to allow--everything
else will be stripped

:arg allowed_svg_properties: set of SVG properties to allow--everything
else will be removed

:arg allowed_protocols: set of allowed protocols for URIs

:arg allowed_content_types: set of allowed content types for ``data`` URIs.

:arg attr_val_is_uri: set of attributes that have URI values--values
that have a scheme not listed in ``allowed_protocols`` are removed

:arg svg_attr_val_allows_ref: set of SVG attributes that can have
references

:arg svg_allow_local_href: set of SVG elements that can have local
hrefs--these are removed

"""
super(Filter, self).__init__(source)
self.allowed_elements = allowed_elements
self.allowed_attributes = allowed_attributes
Expand All @@ -737,11 +768,11 @@ def __iter__(self):
yield token

# Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and
# stripping out all # attributes not in ALLOWED_ATTRIBUTES. Style
# attributes are parsed, and a restricted set, # specified by
# ALLOWED_CSS_PROPERTIES and ALLOWED_CSS_KEYWORDS, are allowed through.
# attributes in ATTR_VAL_IS_URI are scanned, and only URI schemes specified
# in ALLOWED_PROTOCOLS are allowed.
# stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes
# are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and
# ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI
# are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are
# allowed.
#
# sanitize_html('<script> do_nasty_stuff() </script>')
# => &lt;script> do_nasty_stuff() &lt;/script>
Expand Down
2 changes: 1 addition & 1 deletion html5lib/filters/whitespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@


class Filter(base.Filter):

"""Collapses whitespace except in pre, textarea, and script elements"""
spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements))

def __iter__(self):
Expand Down