-
Notifications
You must be signed in to change notification settings - Fork 294
Get rid of getPhases #272
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Get rid of getPhases #272
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,5 @@ | ||
from __future__ import absolute_import, division, unicode_literals | ||
from six import with_metaclass, viewkeys | ||
|
||
import types | ||
from six import viewkeys | ||
|
||
from . import _inputstream | ||
from . import _tokenizer | ||
|
@@ -13,7 +11,7 @@ | |
from .constants import ( | ||
spaceCharacters, asciiUpper2Lower, | ||
specialElements, headingElements, cdataElements, rcdataElements, | ||
tokenTypes, tagTokenTypes, | ||
tokenTypes, | ||
namespaces, | ||
htmlIntegrationPointElements, mathmlTextIntegrationPointElements, | ||
adjustForeignAttributes as adjustForeignAttributesMap, | ||
|
@@ -71,18 +69,6 @@ def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElemen | |
return p.parseFragment(doc, container=container, **kwargs) | ||
|
||
|
||
def method_decorator_metaclass(function): | ||
class Decorated(type): | ||
def __new__(meta, classname, bases, classDict): | ||
for attributeName, attribute in classDict.items(): | ||
if isinstance(attribute, types.FunctionType): | ||
attribute = function(attribute) | ||
|
||
classDict[attributeName] = attribute | ||
return type.__new__(meta, classname, bases, classDict) | ||
return Decorated | ||
|
||
|
||
class HTMLParser(object): | ||
"""HTML parser | ||
|
||
|
@@ -112,14 +98,15 @@ def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=Fa | |
|
||
# Raise an exception on the first error encountered | ||
self.strict = strict | ||
self.debug = debug | ||
|
||
if tree is None: | ||
tree = treebuilders.getTreeBuilder("etree") | ||
self.tree = tree(namespaceHTMLElements) | ||
self.errors = [] | ||
|
||
self.phases = {name: cls(self, self.tree) for name, cls in | ||
getPhases(debug).items()} | ||
_phases.items()} | ||
|
||
def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): | ||
|
||
|
@@ -201,6 +188,9 @@ def mainLoop(self): | |
DoctypeToken = tokenTypes["Doctype"] | ||
ParseErrorToken = tokenTypes["ParseError"] | ||
|
||
type_names = {value: key for key, value in tokenTypes.items()} | ||
debug = self.debug | ||
|
||
for token in self.tokenizer: | ||
prev_token = None | ||
new_token = token | ||
|
@@ -232,6 +222,17 @@ def mainLoop(self): | |
else: | ||
phase = self.phases["inForeignContent"] | ||
|
||
if debug: | ||
info = {"type": type_names[type]} | ||
if type in (StartTagToken, EndTagToken): | ||
info["name"] = new_token['name'] | ||
|
||
self.log.append((self.tokenizer.state.__name__, | ||
self.phase.__class__.__name__, | ||
phase.__class__.__name__, | ||
"process" + info["type"], | ||
info)) | ||
|
||
if type == CharactersToken: | ||
new_token = phase.processCharacters(new_token) | ||
elif type == SpaceCharactersToken: | ||
|
@@ -393,37 +394,7 @@ def parseRCDataRawtext(self, token, contentType): | |
self.phase = self.phases["text"] | ||
|
||
|
||
@_utils.memoize | ||
def getPhases(debug): | ||
def log(function): | ||
"""Logger that records which phase processes each token""" | ||
type_names = {value: key for key, value in tokenTypes.items()} | ||
|
||
def wrapped(self, *args, **kwargs): | ||
if function.__name__.startswith("process") and len(args) > 0: | ||
token = args[0] | ||
info = {"type": type_names[token['type']]} | ||
if token['type'] in tagTokenTypes: | ||
info["name"] = token['name'] | ||
|
||
self.parser.log.append((self.parser.tokenizer.state.__name__, | ||
self.parser.phase.__class__.__name__, | ||
self.__class__.__name__, | ||
function.__name__, | ||
info)) | ||
return function(self, *args, **kwargs) | ||
else: | ||
return function(self, *args, **kwargs) | ||
return wrapped | ||
|
||
def getMetaclass(use_metaclass, metaclass_func): | ||
if use_metaclass: | ||
return method_decorator_metaclass(metaclass_func) | ||
else: | ||
return type | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice :) I get some minor worries about difficult-to-debug future issues when built-in Python keywords like |
||
|
||
# pylint:disable=unused-argument | ||
class Phase(with_metaclass(getMetaclass(debug, log))): | ||
class Phase(object): | ||
"""Base class for helper object that implements each phase of processing | ||
""" | ||
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") | ||
|
@@ -495,6 +466,7 @@ def processEndTag(self, token): | |
self.__endTagCache.pop(next(iter(self.__endTagCache))) | ||
return func(token) | ||
|
||
|
||
class InitialPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -625,6 +597,7 @@ def processEOF(self): | |
self.anythingElse() | ||
return True | ||
|
||
|
||
class BeforeHtmlPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -662,6 +635,7 @@ def processEndTag(self, token): | |
self.insertHtmlElement() | ||
return token | ||
|
||
|
||
class BeforeHeadPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -707,6 +681,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InHeadPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -809,6 +784,7 @@ def anythingElse(self): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InHeadNoscriptPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -872,6 +848,7 @@ def anythingElse(self): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class AfterHeadPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -938,6 +915,7 @@ def anythingElse(self): | |
endTagHtmlBodyBr)]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InBodyPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody | ||
# the really-really-really-very crazy mode | ||
|
@@ -1662,6 +1640,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class TextPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -1695,6 +1674,7 @@ def endTagOther(self, token): | |
("script", endTagScript)]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InTablePhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table | ||
__slots__ = tuple() | ||
|
@@ -1840,6 +1820,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InTableTextPhase(Phase): | ||
__slots__ = ("originalPhase", "characterTokens") | ||
|
||
|
@@ -1887,6 +1868,7 @@ def processEndTag(self, token): | |
self.parser.phase = self.originalPhase | ||
return token | ||
|
||
|
||
class InCaptionPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption | ||
__slots__ = tuple() | ||
|
@@ -1957,6 +1939,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InColumnGroupPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column | ||
__slots__ = tuple() | ||
|
@@ -2021,6 +2004,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InTableBodyPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0 | ||
__slots__ = tuple() | ||
|
@@ -2119,6 +2103,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InRowPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row | ||
__slots__ = tuple() | ||
|
@@ -2208,6 +2193,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InCellPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell | ||
__slots__ = tuple() | ||
|
@@ -2284,6 +2270,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InSelectPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -2383,6 +2370,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InSelectInTablePhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -2421,6 +2409,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InForeignContentPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -2535,6 +2524,7 @@ def processEndTag(self, token): | |
break | ||
return new_token | ||
|
||
|
||
class AfterBodyPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -2581,6 +2571,7 @@ def endTagOther(self, token): | |
endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class InFramesetPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset | ||
__slots__ = tuple() | ||
|
@@ -2637,6 +2628,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class AfterFramesetPhase(Phase): | ||
# http://www.whatwg.org/specs/web-apps/current-work/#after3 | ||
__slots__ = tuple() | ||
|
@@ -2673,6 +2665,7 @@ def endTagOther(self, token): | |
]) | ||
endTagHandler.default = endTagOther | ||
|
||
|
||
class AfterAfterBodyPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -2710,6 +2703,7 @@ def processEndTag(self, token): | |
]) | ||
startTagHandler.default = startTagOther | ||
|
||
|
||
class AfterAfterFramesetPhase(Phase): | ||
__slots__ = tuple() | ||
|
||
|
@@ -2747,7 +2741,8 @@ def processEndTag(self, token): | |
|
||
# pylint:enable=unused-argument | ||
|
||
return { | ||
|
||
_phases = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The core of the improvement - looks good generally. What do you think about going one step further and moving this to the initialization of the |
||
"initial": InitialPhase, | ||
"beforeHtml": BeforeHtmlPhase, | ||
"beforeHead": BeforeHeadPhase, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -68,7 +68,6 @@ def test_debug_log(): | |
('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'p', 'type': 'StartTag'}), | ||
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}), | ||
('dataState', 'InBodyPhase', 'InBodyPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}), | ||
('dataState', 'InBodyPhase', 'InHeadPhase', 'processStartTag', {'name': 'script', 'type': 'StartTag'}), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch! How did you discover this duplicate entry, out of interest? |
||
('scriptDataState', 'TextPhase', 'TextPhase', 'processCharacters', {'type': 'Characters'}), | ||
('dataState', 'TextPhase', 'TextPhase', 'processEndTag', {'name': 'script', 'type': 'EndTag'}), | ||
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'}), | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@gsnedders Is there a reason to bring
tokenTypes
andself.debug
into local method variables here?At L225 we could compare using
if self.debug
and similarly at L226 we could useinfo = {"type": tokenTypes[type]}
.(am guessing it's possibly an artifact of some IDE-based refactoring? Hopefully a straightforward cleanup either way)