Skip to content

"Unterminated character set" exception when using extra extension #1444

Closed
@noamgat

Description

@noamgat

Hi,

When using the "extra" extensions, some invalid markdowns (i think?) are causing exceptions rather than returning plaintext line.

Example:

txt = """
*[^1^]: This is going to crash if extra extension is enabled
"""

import markdown
ok = markdown.markdown(txt, extensions=[])
not_ok = markdown.markdown(txt, extensions=['extra'])

Is this expected behavior?

Full exception stack trace:

Cell In[29], line 7
      5 import markdown
      6 ok = markdown.markdown(txt, extensions=[])
----> 7 not_ok = markdown.markdown(txt, extensions=['extra'])

File lib/python3.10/site-packages/markdown/core.py:482, in markdown(text, **kwargs)
    464 """
    465 Convert a markdown string to HTML and return HTML as a Unicode string.
    466 
   (...)
    479 
    480 """
    481 md = Markdown(**kwargs)
--> 482 return md.convert(text)

File lib/python3.10/site-packages/markdown/core.py:357, in Markdown.convert(self, source)
    354     self.lines = prep.run(self.lines)
    356 # Parse the high-level elements.
--> 357 root = self.parser.parseDocument(self.lines).getroot()
    359 # Run the tree-processors
    360 for treeprocessor in self.treeprocessors:

File lib/python3.10/site-packages/markdown/blockparser.py:117, in BlockParser.parseDocument(self, lines)
    115 # Create an `ElementTree` from the lines
    116 self.root = etree.Element(self.md.doc_tag)
--> 117 self.parseChunk(self.root, '\n'.join(lines))
    118 return etree.ElementTree(self.root)

File lib/python3.10/site-packages/markdown/blockparser.py:136, in BlockParser.parseChunk(self, parent, text)
    120 def parseChunk(self, parent: etree.Element, text: str) -> None:
    121     """ Parse a chunk of Markdown text and attach to given `etree` node.
    122 
    123     While the `text` argument is generally assumed to contain multiple
   (...)
    134 
    135     """
--> 136     self.parseBlocks(parent, text.split('\n\n'))

File lib/python3.10/site-packages/markdown/blockparser.py:158, in BlockParser.parseBlocks(self, parent, blocks)
    156 for processor in self.blockprocessors:
    157     if processor.test(parent, blocks[0]):
--> 158         if processor.run(parent, blocks) is not False:
    159             # run returns True or None
    160             break

File lib/python3.10/site-packages/markdown/extensions/abbr.py:61, in AbbrPreprocessor.run(self, parent, blocks)
     58 abbr = m.group('abbr').strip()
     59 title = m.group('title').strip()
     60 self.parser.md.inlinePatterns.register(
---> 61     AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
     62 )
     63 if block[m.end():].strip():
     64     # Add any content after match back to blocks as separate block
     65     blocks.insert(0, block[m.end():].lstrip('\n'))

File lib/python3.10/site-packages/markdown/extensions/abbr.py:94, in AbbrInlineProcessor.__init__(self, pattern, title)
     93 def __init__(self, pattern: str, title: str):
---> 94     super().__init__(pattern)
     95     self.title = title

File lib/python3.10/site-packages/markdown/inlinepatterns.py:297, in InlineProcessor.__init__(self, pattern, md)
    287 """
    288 Create an instant of an inline processor.
    289 
   (...)
    294 
    295 """
    296 self.pattern = pattern
--> 297 self.compiled_re = re.compile(pattern, re.DOTALL | re.UNICODE)
    299 # API for Markdown to pass `safe_mode` into instance
    300 self.safe_mode = False

File lib/python3.10/re.py:251, in compile(pattern, flags)
    249 def compile(pattern, flags=0):
    250     "Compile a regular expression pattern, returning a Pattern object."
--> 251     return _compile(pattern, flags)

File lib/python3.10/re.py:303, in _compile(pattern, flags)
    301 if not sre_compile.isstring(pattern):
    302     raise TypeError("first argument must be string or compiled pattern")
--> 303 p = sre_compile.compile(pattern, flags)
    304 if not (flags & DEBUG):
    305     if len(_cache) >= _MAXCACHE:
    306         # Drop the oldest item

File lib/python3.10/sre_compile.py:788, in compile(p, flags)
    786 if isstring(p):
    787     pattern = p
--> 788     p = sre_parse.parse(p, flags)
    789 else:
    790     pattern = None

File lib/python3.10/sre_parse.py:955, in parse(str, flags, state)
    952 state.str = str
    954 try:
--> 955     p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)
    956 except Verbose:
    957     # the VERBOSE flag was switched on inside the pattern.  to be
    958     # on the safe side, we'll parse the whole thing again...
    959     state = State()

File lib/python3.10/sre_parse.py:444, in _parse_sub(source, state, verbose, nested)
    442 start = source.tell()
    443 while True:
--> 444     itemsappend(_parse(source, state, verbose, nested + 1,
    445                        not nested and not items))
    446     if not sourcematch("|"):
    447         break

File lib/python3.10/sre_parse.py:841, in _parse(source, state, verbose, nested, first)
    838         raise source.error(err.msg, len(name) + 1) from None
    839 sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and
    840                not (del_flags & SRE_FLAG_VERBOSE))
--> 841 p = _parse_sub(source, state, sub_verbose, nested + 1)
    842 if not source.match(")"):
    843     raise source.error("missing ), unterminated subpattern",
    844                        source.tell() - start)

File lib/python3.10/sre_parse.py:444, in _parse_sub(source, state, verbose, nested)
    442 start = source.tell()
    443 while True:
--> 444     itemsappend(_parse(source, state, verbose, nested + 1,
    445                        not nested and not items))
    446     if not sourcematch("|"):
    447         break

File lib/python3.10/sre_parse.py:550, in _parse(source, state, verbose, nested, first)
    548 this = sourceget()
    549 if this is None:
--> 550     raise source.error("unterminated character set",
    551                        source.tell() - here)
    552 if this == "]" and set:
    553     break

error: unterminated character set at position 17

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugBug report.confirmedConfirmed bug report or approved feature request.extensionRelated to one or more of the included extensions.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions