html5lib · jayaddison · Jan 9, 2021 · Sep 20, 2021
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
@@ -7,7 +7,7 @@
 
 from .constants import spaceCharacters
 from .constants import entities
-from .constants import asciiLetters, asciiUpper2Lower
+from .constants import asciiLetters
 from .constants import digits, hexDigits, EOF
 from .constants import tokenTypes, tagTokenTypes
 from .constants import replacementCharacters
@@ -233,7 +233,7 @@ def emitCurrentToken(self):
         token = self.currentToken
         # Add token to the queue to be yielded
         if (token["type"] in tagTokenTypes):
-            token["name"] = token["name"].translate(asciiUpper2Lower)
+            token["name"] = token["name"].lower()
             if token["type"] == tokenTypes["StartTag"]:
                 raw = token["data"]
                 data = attributeMap(raw)
@@ -927,7 +927,7 @@ def attributeNameState(self):
             # start tag token is emitted so values can still be safely appended
             # to attributes, but we do want to report the parse error in time.
             self.currentToken["data"][-1][0] = (
-                self.currentToken["data"][-1][0].translate(asciiUpper2Lower))
+                self.currentToken["data"][-1][0].lower())
             for name, _ in self.currentToken["data"][:-1]:
                 if self.currentToken["data"][-1][0] == name:
                     self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
@@ -1348,10 +1348,10 @@ def beforeDoctypeNameState(self):
     def doctypeNameState(self):
         data = self.stream.char()
         if data in spaceCharacters:
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.currentToken["name"] = self.currentToken["name"].lower()
             self.state = self.afterDoctypeNameState
         elif data == ">":
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.currentToken["name"] = self.currentToken["name"].lower()
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         elif data == "\u0000":
@@ -1363,7 +1363,7 @@ def doctypeNameState(self):
             self.tokenQueue.append({"type": tokenTypes["ParseError"], "data":
                                     "eof-in-doctype-name"})
             self.currentToken["correct"] = False
-            self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower)
+            self.currentToken["name"] = self.currentToken["name"].lower()
             self.tokenQueue.append(self.currentToken)
             self.state = self.dataState
         else:

diff --git a/html5lib/constants.py b/html5lib/constants.py
@@ -538,14 +538,11 @@
     "tr"
 ])
 
-asciiLowercase = frozenset(string.ascii_lowercase)
 asciiUppercase = frozenset(string.ascii_uppercase)
 asciiLetters = frozenset(string.ascii_letters)
 digits = frozenset(string.digits)
 hexDigits = frozenset(string.hexdigits)
 
-asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
-
 # Heading elements need to be ordered
 headingElements = (
     "h1",

diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
@@ -11,7 +11,7 @@
 
 from . import _utils
 from .constants import (
-    spaceCharacters, asciiUpper2Lower,
+    spaceCharacters,
     specialElements, headingElements, cdataElements, rcdataElements,
     tokenTypes, tagTokenTypes,
     namespaces,
@@ -183,8 +183,7 @@ def isHTMLIntegrationPoint(self, element):
         if (element.name == "annotation-xml" and
                 element.namespace == namespaces["mathml"]):
             return ("encoding" in element.attributes and
-                    element.attributes["encoding"].translate(
-                        asciiUpper2Lower) in
+                    element.attributes["encoding"].lower() in
                     ("text/html", "application/xhtml+xml"))
         else:
             return (element.namespace, element.name) in htmlIntegrationPointElements
@@ -520,7 +519,7 @@ def processDoctype(self, token):
             self.tree.insertDoctype(token)
 
             if publicId != "":
-                publicId = publicId.translate(asciiUpper2Lower)
+                publicId = publicId.lower()
 
             if (not correct or token["name"] != "html" or
                     publicId.startswith(
@@ -1165,7 +1164,7 @@ def startTagInput(self, token):
             framesetOK = self.parser.framesetOK
             self.startTagVoidFormatting(token)
             if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                    token["data"]["type"].lower() == "hidden"):
                 # input type=hidden doesn't change framesetOK
                 self.parser.framesetOK = framesetOK
 
@@ -1771,7 +1770,7 @@ def startTagStyleScript(self, token):
 
         def startTagInput(self, token):
             if ("type" in token["data"] and
-                    token["data"]["type"].translate(asciiUpper2Lower) == "hidden"):
+                    token["data"]["type"].lower() == "hidden"):
                 self.parser.parseError("unexpected-hidden-input-in-table")
                 self.tree.insertElement(token)
                 # XXX associate with form
@@ -2512,11 +2511,11 @@ def processStartTag(self, token):
         def processEndTag(self, token):
             nodeIndex = len(self.tree.openElements) - 1
             node = self.tree.openElements[-1]
-            if node.name.translate(asciiUpper2Lower) != token["name"]:
+            if node.name.lower() != token["name"]:
                 self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
 
             while True:
-                if node.name.translate(asciiUpper2Lower) == token["name"]:
+                if node.name.lower() == token["name"]:
                     # XXX this isn't in the spec but it seems necessary
                     if self.parser.phase == self.parser.phases["inTableText"]:
                         self.parser.phase.flushCharacters()