html5lib · gsnedders · Nov 30, 2017 · Nov 30, 2017
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
@@ -1,29 +1,32 @@
-"""A collection of modules for building different kinds of tree from
-HTML documents.
+"""A collection of modules for building different kinds of trees from HTML
+documents.
 
 To create a treebuilder for a new type of tree, you need to do
 implement several things:
 
-1) A set of classes for various types of elements: Document, Doctype,
-Comment, Element. These must implement the interface of
-_base.treebuilders.Node (although comment nodes have a different
-signature for their constructor, see treebuilders.etree.Comment)
-Textual content may also be implemented as another node type, or not, as
-your tree implementation requires.
-
-2) A treebuilder object (called TreeBuilder by convention) that
-inherits from treebuilders._base.TreeBuilder. This has 4 required attributes:
-documentClass - the class to use for the bottommost node of a document
-elementClass - the class to use for HTML Elements
-commentClass - the class to use for comments
-doctypeClass - the class to use for doctypes
-It also has one required method:
-getDocument - Returns the root node of the complete document tree
-
-3) If you wish to run the unit tests, you must also create a
-testSerializer method on your treebuilder which accepts a node and
-returns a string containing Node and its children serialized according
-to the format used in the unittests
+1. A set of classes for various types of elements: Document, Doctype, Comment,
+   Element. These must implement the interface of ``base.treebuilders.Node``
+   (although comment nodes have a different signature for their constructor,
+   see ``treebuilders.etree.Comment``) Textual content may also be implemented
+   as another node type, or not, as your tree implementation requires.
+
+2. A treebuilder object (called ``TreeBuilder`` by convention) that inherits
+   from ``treebuilders.base.TreeBuilder``. This has 4 required attributes:
+
+   * ``documentClass`` - the class to use for the bottommost node of a document
+   * ``elementClass`` - the class to use for HTML Elements
+   * ``commentClass`` - the class to use for comments
+   * ``doctypeClass`` - the class to use for doctypes
+
+   It also has one required method:
+
+   * ``getDocument`` - Returns the root node of the complete document tree
+
+3. If you wish to run the unit tests, you must also create a ``testSerializer``
+   method on your treebuilder which accepts a node and returns a string
+   containing Node and its children serialized according to the format used in
+   the unittests
+
 """
 
 from __future__ import absolute_import, division, unicode_literals
@@ -34,23 +37,32 @@
 
 
 def getTreeBuilder(treeType, implementation=None, **kwargs):
-    """Get a TreeBuilder class for various types of tree with built-in support
-
-    treeType - the name of the tree type required (case-insensitive). Supported
-               values are:
-
-               "dom" - A generic builder for DOM implementations, defaulting to
-                       a xml.dom.minidom based implementation.
-               "etree" - A generic builder for tree implementations exposing an
-                         ElementTree-like interface, defaulting to
-                         xml.etree.cElementTree if available and
-                         xml.etree.ElementTree if not.
-               "lxml" - A etree-based builder for lxml.etree, handling
-                        limitations of lxml's implementation.
-
-    implementation - (Currently applies to the "etree" and "dom" tree types). A
-                      module implementing the tree type e.g.
-                      xml.etree.ElementTree or xml.etree.cElementTree."""
+    """Get a TreeBuilder class for various types of trees with built-in support
+
+    :arg treeType: the name of the tree type required (case-insensitive). Supported
+        values are:
+
+        * "dom" - A generic builder for DOM implementations, defaulting to a
+          xml.dom.minidom based implementation.
+        * "etree" - A generic builder for tree implementations exposing an
+          ElementTree-like interface, defaulting to xml.etree.cElementTree if
+          available and xml.etree.ElementTree if not.
+        * "lxml" - A etree-based builder for lxml.etree, handling limitations
+          of lxml's implementation.
+
+    :arg implementation: (Currently applies to the "etree" and "dom" tree
+        types). A module implementing the tree type e.g. xml.etree.ElementTree
+        or xml.etree.cElementTree.
+
+    :arg kwargs: Any additional options to pass to the TreeBuilder when
+        creating it.
+
+    Example:
+
+    >>> from html5lib.treebuilders import getTreeBuilder
+    >>> builder = getTreeBuilder('etree')
+
+    """
 
     treeType = treeType.lower()
     if treeType not in treeBuilderCache:

diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
@@ -21,22 +21,25 @@
 
 
 class Node(object):
+    """Represents an item in the tree"""
     def __init__(self, name):
-        """Node representing an item in the tree.
-        name - The tag name associated with the node
-        parent - The parent of the current node (or None for the document node)
-        value - The value of the current node (applies to text nodes and
-        comments
-        attributes - a dict holding name, value pairs for attributes of the node
-        childNodes - a list of child nodes of the current node. This must
-        include all elements but not necessarily other node types
-        _flags - A list of miscellaneous flags that can be set on the node
+        """Creates a Node
+
+        :arg name: The tag name associated with the node
+
         """
+        # The tag name assocaited with the node
         self.name = name
+        # The parent of the current node (or None for the document node)
         self.parent = None
+        # The value of the current node (applies to text nodes and comments)
         self.value = None
+        # A dict holding name -> value pairs for attributes of the node
         self.attributes = {}
+        # A list of child nodes of the current node. This must include all
+        # elements but not necessarily other node types.
         self.childNodes = []
+        # A list of miscellaneous flags that can be set on the node.
         self._flags = []
 
     def __str__(self):
@@ -53,30 +56,51 @@ def __repr__(self):
 
     def appendChild(self, node):
         """Insert node as a child of the current node
+
+        :arg node: the node to insert
+
         """
         raise NotImplementedError
 
     def insertText(self, data, insertBefore=None):
         """Insert data as text in the current node, positioned before the
         start of node insertBefore or to the end of the node's text.
+
+        :arg data: the data to insert
+
+        :arg insertBefore: True if you want to insert the text before the node
+            and False if you want to insert it after the node
+
         """
         raise NotImplementedError
 
     def insertBefore(self, node, refNode):
         """Insert node as a child of the current node, before refNode in the
         list of child nodes. Raises ValueError if refNode is not a child of
-        the current node"""
+        the current node
+
+        :arg node: the node to insert
+
+        :arg refNode: the child node to insert the node before
+
+        """
         raise NotImplementedError
 
     def removeChild(self, node):
         """Remove node from the children of the current node
+
+        :arg node: the child node to remove
+
         """
         raise NotImplementedError
 
     def reparentChildren(self, newParent):
         """Move all the children of the current node to newParent.
         This is needed so that trees that don't store text as nodes move the
         text in the correct way
+
+        :arg newParent: the node to move all this node's children to
+
         """
         # XXX - should this method be made more general?
         for child in self.childNodes:
@@ -121,10 +145,12 @@ def nodesEqual(self, node1, node2):
 
 class TreeBuilder(object):
     """Base treebuilder implementation
-    documentClass - the class to use for the bottommost node of a document
-    elementClass - the class to use for HTML Elements
-    commentClass - the class to use for comments
-    doctypeClass - the class to use for doctypes
+
+    * documentClass - the class to use for the bottommost node of a document
+    * elementClass - the class to use for HTML Elements
+    * commentClass - the class to use for comments
+    * doctypeClass - the class to use for doctypes
+
     """
     # pylint:disable=not-callable
 
@@ -144,6 +170,11 @@ class TreeBuilder(object):
     fragmentClass = None
 
     def __init__(self, namespaceHTMLElements):
+        """Create a TreeBuilder
+
+        :arg namespaceHTMLElements: whether or not to namespace HTML elements
+
+        """
         if namespaceHTMLElements:
             self.defaultNamespace = "http://www.w3.org/1999/xhtml"
         else:
@@ -367,17 +398,20 @@ def generateImpliedEndTags(self, exclude=None):
             self.generateImpliedEndTags(exclude)
 
     def getDocument(self):
-        "Return the final tree"
+        """Return the final tree"""
         return self.document
 
     def getFragment(self):
-        "Return the final fragment"
+        """Return the final fragment"""
         # assert self.innerHTML
         fragment = self.fragmentClass()
         self.openElements[0].reparentChildren(fragment)
         return fragment
 
     def testSerializer(self, node):
         """Serialize the subtree of node in the format required by unit tests
-        node - the node from which to start serializing"""
+
+        :arg node: the node from which to start serializing
+
+        """
         raise NotImplementedError
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
@@ -309,7 +309,6 @@ def insertCommentMain(self, data, parent=None):
         super(TreeBuilder, self).insertComment(data, parent)
 
     def insertRoot(self, token):
-        """Create the document root"""
         # Because of the way libxml2 works, it doesn't seem to be possible to
         # alter information like the doctype after the tree has been parsed.
         # Therefore we need to use the built-in parser to create our initial