html5lib · jayvdb · Jul 26, 2016
diff --git a/.pylintrc b/.pylintrc
@@ -3,7 +3,7 @@ ignore=tests
 
 [MESSAGES CONTROL]
 # messages up to fixme should probably be fixed somehow
-disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda
+disable = redefined-builtin,attribute-defined-outside-init,anomalous-backslash-in-string,no-self-use,redefined-outer-name,bad-continuation,wrong-import-order,superfluous-parens,no-member,duplicate-code,super-init-not-called,abstract-method,property-on-old-class,wrong-import-position,no-name-in-module,no-init,bad-mcs-classmethod-argument,bad-classmethod-argument,fixme,invalid-name,import-error,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-boolean-expressions,too-many-branches,too-many-instance-attributes,too-many-locals,too-many-lines,too-many-public-methods,too-many-return-statements,too-many-statements,missing-docstring,line-too-long,locally-disabled,locally-enabled,bad-builtin,deprecated-lambda,bad-option-value,star-args,abstract-class-little-used,abstract-class-not-used
 
 [FORMAT]
 max-line-length=139

diff --git a/.travis.yml b/.travis.yml
@@ -27,6 +27,7 @@ script:
   - if [[ $TRAVIS_PYTHON_VERSION == pypy* ]]; then py.test; fi
   - if [[ $TRAVIS_PYTHON_VERSION != pypy* ]]; then coverage run -m pytest; fi
   - bash flake8-run.sh
+  - pylint --rcfile=.pylintrc html5lib
 
 after_script:
   - python debug-info.py

diff --git a/debug-info.py b/debug-info.py
@@ -1,4 +1,4 @@
-from __future__ import print_function, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 import platform
 import sys

diff --git a/flake8-run.sh b/flake8-run.sh
@@ -5,5 +5,6 @@ if [[ ! -x $(which flake8) ]]; then
   exit 1
 fi
 
+flake8 --version
 flake8 `dirname $0`
 exit $?
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
@@ -238,8 +238,9 @@ def position(self):
         return (line + 1, col)
 
     def char(self):
-        """ Read one character from the stream or queue if available. Return
-            EOF when EOF is reached.
+        """Read one character from the stream or queue if available.
+
+        Return EOF when EOF is reached.
         """
         # Read a new chunk from the input stream if necessary
         if self.chunkOffset >= self.chunkSize:
@@ -318,7 +319,7 @@ def characterErrorsUCS2(self, data):
                 self.errors.append("invalid-codepoint")
 
     def charsUntil(self, characters, opposite=False):
-        """ Returns a string of characters from the stream up to but not
+        """Returns a string of characters from the stream up to but not
         including any character in 'characters' or EOF. 'characters' must be
         a container that supports the 'in' method and iteration over its
         characters.
@@ -330,7 +331,7 @@ def charsUntil(self, characters, opposite=False):
         except KeyError:
             if __debug__:
                 for c in characters:
-                    assert(ord(c) < 128)
+                    assert ord(c) < 128
             regex = "".join(["\\x%02x" % ord(c) for c in characters])
             if not opposite:
                 regex = "^%s" % regex
@@ -449,7 +450,7 @@ def openStream(self, source):
 
         try:
             stream.seek(stream.tell())
-        except:  # pylint:disable=bare-except
+        except Exception:  # pylint: disable=broad-except
             stream = BufferedStream(stream)
 
         return stream
@@ -567,8 +568,7 @@ def detectBOM(self):
             return None
 
     def detectEncodingMeta(self):
-        """Report the encoding declared by the meta element
-        """
+        """Report the encoding declared by the meta element."""
         buffer = self.rawStream.read(self.numBytesMeta)
         assert isinstance(buffer, bytes)
         parser = EncodingParser(buffer)
@@ -686,10 +686,12 @@ def jumpTo(self, bytes):
 
 
 class EncodingParser(object):
-    """Mini parser for detecting character encoding from meta elements"""
+    """Mini parser for detecting character encoding from meta elements."""
 
     def __init__(self, data):
-        """string - the data to work on for encoding detection"""
+        """Constructor.
+
+        data - the data to work on for encoding detection"""
         self.data = EncodingBytes(data)
         self.encoding = None
 

diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
@@ -19,7 +19,7 @@
 
 
 class HTMLTokenizer(object):
-    """ This class takes care of tokenizing HTML.
+    """This class takes care of tokenizing HTML.
 
     * self.currentToken
       Holds the token that is currently being processed.
@@ -47,7 +47,7 @@ def __init__(self, stream, parser=None, **kwargs):
         super(HTMLTokenizer, self).__init__()
 
     def __iter__(self):
-        """ This is where the magic happens.
+        """This is where the magic happens.
 
         We do our usually processing through the states and when we have a token
         to return we yield the token which pauses processing until the next token
@@ -215,8 +215,7 @@ def consumeEntity(self, allowedChar=None, fromAttribute=False):
             self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output})
 
     def processEntityInAttribute(self, allowedChar):
-        """This method replaces the need for "entityInAttributeValueState".
-        """
+        """This method replaces the need for "entityInAttributeValueState"."""
         self.consumeEntity(allowedChar=allowedChar, fromAttribute=True)
 
     def emitCurrentToken(self):
@@ -1686,8 +1685,7 @@ def bogusDoctypeState(self):
             self.stream.unget(data)
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
-        else:
-            pass
+
         return True
 
     def cdataSectionState(self):

diff --git a/html5lib/_utils.py b/html5lib/_utils.py
@@ -30,14 +30,14 @@
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
         assert isinstance(_x, text_type)
-except:  # pylint:disable=bare-except
+except Exception:  # pylint: disable=broad-except
     supports_lone_surrogates = False
 else:
     supports_lone_surrogates = True
 
 
 class MethodDispatcher(dict):
-    """Dict with 2 special properties:
+    """Dict with 2 special properties.
 
     On initiation, keys that are lists, sets or tuples are converted to
     multiple keys so accessing any one of the items in the original

diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
@@ -705,7 +705,7 @@
 
 
 class Filter(base.Filter):
-    """ sanitization of XHTML+MathML+SVG and of inline style attributes."""
+    """Sanitization of XHTML+MathML+SVG and of inline style attributes."""
     def __init__(self,
                  source,
                  allowed_elements=allowed_elements,

diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
@@ -55,10 +55,11 @@ def __new__(meta, classname, bases, classDict):
 
 class HTMLParser(object):
     """HTML parser. Generates a tree structure from a stream of (possibly
-        malformed) HTML"""
+       malformed) HTML"""
 
     def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False):
-        """
+        """Constructor.
+
         strict - raise an exception when a parse error is encountered
 
         tree - a treebuilder class controlling the type of tree that will be
@@ -108,10 +109,9 @@ def reset(self):
                 self.tokenizer.state = self.tokenizer.rawtextState
             elif self.innerHTML == 'plaintext':
                 self.tokenizer.state = self.tokenizer.plaintextState
-            else:
-                # state already is data state
-                # self.tokenizer.state = self.tokenizer.dataState
-                pass
+            # else state already is data state
+            # i.e. self.tokenizer.state = self.tokenizer.dataState
+
             self.phase = self.phases["beforeHtml"]
             self.phase.insertHtmlElement()
             self.resetInsertionMode()
@@ -262,7 +262,7 @@ def parseError(self, errorcode="XXX-undefined-error", datavars=None):
             raise ParseError(E[errorcode] % datavars)
 
     def normalizeToken(self, token):
-        """ HTML5 specific normalizations to the token stream """
+        """HTML5 specific normalizations to the token stream."""
 
         if token["type"] == tokenTypes["StartTag"]:
             raw = token["data"]
@@ -358,10 +358,7 @@ def log(function):
         def wrapped(self, *args, **kwargs):
             if function.__name__.startswith("process") and len(args) > 0:
                 token = args[0]
-                try:
-                    info = {"type": type_names[token['type']]}
-                except:
-                    raise
+                info = {"type": type_names[token['type']]}
                 if token['type'] in tagTokenTypes:
                     info["name"] = token['name']
 
@@ -383,8 +380,7 @@ def getMetaclass(use_metaclass, metaclass_func):
 
     # pylint:disable=unused-argument
     class Phase(with_metaclass(getMetaclass(debug, log))):
-        """Base class for helper object that implements each phase of processing
-        """
+        """Base class for helper object that implements each phase of processing."""
 
         def __init__(self, parser, tree):
             self.parser = parser
@@ -1285,7 +1281,7 @@ def startTagSvg(self, token):
                 token["selfClosingAcknowledged"] = True
 
         def startTagMisplaced(self, token):
-            """ Elements that should be children of other elements that have a
+            """Elements that should be children of other elements that have a
             different insertion mode; here they are ignored
             "caption", "col", "colgroup", "frame", "frameset", "head",
             "option", "optgroup", "tbody", "td", "tfoot", "th", "thead",
@@ -2730,4 +2726,3 @@ def impliedTagToken(name, type="EndTag", attributes=None,
 
 class ParseError(Exception):
     """Error in parsed document"""
-    pass
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
@@ -166,14 +166,14 @@ def __init__(self, **kwargs):
         self.strict = False
 
     def encode(self, string):
-        assert(isinstance(string, text_type))
+        assert isinstance(string, text_type)
         if self.encoding:
             return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
     def encodeStrict(self, string):
-        assert(isinstance(string, text_type))
+        assert isinstance(string, text_type)
         if self.encoding:
             return string.encode(self.encoding, "strict")
         else:
@@ -331,4 +331,3 @@ def serializeError(self, data="XXX ERROR MESSAGE NEEDED"):
 
 class SerializeError(Exception):
     """Error in serialized tree"""
-    pass
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
@@ -1,3 +1,5 @@
+from __future__ import absolute_import, division, unicode_literals
+
 import os.path
 
 import pkg_resources

diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
@@ -49,7 +49,8 @@
         }
 
 try:
-    import lxml.etree as lxml  # noqa
+    import lxml.etree as lxml
+    del lxml
 except ImportError:
     treeTypes['lxml'] = None
 else:
@@ -60,7 +61,8 @@
 
 # Genshi impls
 try:
-    import genshi  # noqa
+    import genshi
+    del genshi
 except ImportError:
     treeTypes["genshi"] = None
 else:
@@ -132,7 +134,7 @@ def normaliseOutput(self, data):
 
 def convert(stripChars):
     def convertData(data):
-        """convert the output of str(document) to the format used in the testcases"""
+        """Convert the output of str(document) to the format used in the testcases"""
         data = data.split("\n")
         rv = []
         for line in data:

diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals
 
 import os
 
@@ -105,7 +105,8 @@ def test_encoding():
 
 # pylint:disable=wrong-import-position
 try:
-    import chardet  # noqa
+    import chardet
+    del chardet
 except ImportError:
     print("chardet not found, skipping chardet tests")
 else:

diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
@@ -4,11 +4,15 @@
 
 import io
 
-from . import support  # noqa
+from . import support
+
 
 from html5lib.constants import namespaces, tokenTypes
 from html5lib import parse, parseFragment, HTMLParser
 
+# above import has side-effects; mark it as used and del it
+del support
+
 
 # tests that aren't autogenerated from text files
 def test_assertDoctypeCloneable():

diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
@@ -74,7 +74,7 @@ def _convertAttrib(self, attribs):
         attrs = {}
         for attrib in attribs:
             name = (attrib["namespace"], attrib["name"])
-            assert(name not in attrs)
+            assert name not in attrs
             attrs[name] = attrib["value"]
         return attrs
 
@@ -93,7 +93,7 @@ def runSerializerTest(input, expected, options):
     encoding = options.get("encoding", None)
 
     if encoding:
-        expected = list(map(lambda x: x.encode(encoding), expected))
+        expected = list(x.encode(encoding) for x in expected)
 
     result = serialize_html(input, options)
     if len(expected) == 1:

diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
@@ -1,7 +1,5 @@
 from __future__ import absolute_import, division, unicode_literals
 
-from . import support  # noqa
-
 import codecs
 import sys
 from io import BytesIO, StringIO
@@ -11,10 +9,15 @@
 import six
 from six.moves import http_client, urllib
 
+from . import support
+
 from html5lib._inputstream import (BufferedStream, HTMLInputStream,
                                    HTMLUnicodeInputStream, HTMLBinaryInputStream)
 from html5lib._utils import supports_lone_surrogates
 
+# above import has side-effects; mark it as used and del it
+del support
+
 
 def test_basic():
     s = b"abc"
@@ -182,8 +185,8 @@ def test_position2():
 
 
 def test_python_issue_20007():
-    """
-    Make sure we have a work-around for Python bug #20007
+    """Ensure we have a work-around for Python bug #20007.
+
     http://bugs.python.org/issue20007
     """
     class FakeSocket(object):
@@ -198,8 +201,8 @@ def makefile(self, _mode, _bufsize=None):
 
 
 def test_python_issue_20007_b():
-    """
-    Make sure we have a work-around for Python bug #20007
+    """Ensure we have a work-around for Python bug #20007 (part b).
+
     http://bugs.python.org/issue20007
     """
     if six.PY2:
-Original file line number
+Diff line change
@@ Expand Up / @@ -5,5 +5,6 @@ if [[ ! -x $(which flake8) ]]; then @@
       exit 1
     fi
+    flake8 --version
     flake8 `dirname $0`
     exit $?