mirror of
https://github.com/djohnlewis/stackdump
synced 2025-01-23 07:01:41 +00:00
99 lines
3.4 KiB
Python
99 lines
3.4 KiB
Python
|
|
import util
|
|
import odict
|
|
|
|
class State(list):
|
|
""" Track the current and nested state of the parser.
|
|
|
|
This utility class is used to track the state of the BlockParser and
|
|
support multiple levels if nesting. It's just a simple API wrapped around
|
|
a list. Each time a state is set, that state is appended to the end of the
|
|
list. Each time a state is reset, that state is removed from the end of
|
|
the list.
|
|
|
|
Therefore, each time a state is set for a nested block, that state must be
|
|
reset when we back out of that level of nesting or the state could be
|
|
corrupted.
|
|
|
|
While all the methods of a list object are available, only the three
|
|
defined below need be used.
|
|
|
|
"""
|
|
|
|
def set(self, state):
|
|
""" Set a new state. """
|
|
self.append(state)
|
|
|
|
def reset(self):
|
|
""" Step back one step in nested state. """
|
|
self.pop()
|
|
|
|
def isstate(self, state):
|
|
""" Test that top (current) level is of given state. """
|
|
if len(self):
|
|
return self[-1] == state
|
|
else:
|
|
return False
|
|
|
|
class BlockParser:
|
|
""" Parse Markdown blocks into an ElementTree object.
|
|
|
|
A wrapper class that stitches the various BlockProcessors together,
|
|
looping through them and creating an ElementTree object.
|
|
"""
|
|
|
|
def __init__(self, markdown):
|
|
self.blockprocessors = odict.OrderedDict()
|
|
self.state = State()
|
|
self.markdown = markdown
|
|
|
|
def parseDocument(self, lines):
|
|
""" Parse a markdown document into an ElementTree.
|
|
|
|
Given a list of lines, an ElementTree object (not just a parent Element)
|
|
is created and the root element is passed to the parser as the parent.
|
|
The ElementTree object is returned.
|
|
|
|
This should only be called on an entire document, not pieces.
|
|
|
|
"""
|
|
# Create a ElementTree from the lines
|
|
self.root = util.etree.Element(self.markdown.doc_tag)
|
|
self.parseChunk(self.root, '\n'.join(lines))
|
|
return util.etree.ElementTree(self.root)
|
|
|
|
def parseChunk(self, parent, text):
|
|
""" Parse a chunk of markdown text and attach to given etree node.
|
|
|
|
While the ``text`` argument is generally assumed to contain multiple
|
|
blocks which will be split on blank lines, it could contain only one
|
|
block. Generally, this method would be called by extensions when
|
|
block parsing is required.
|
|
|
|
The ``parent`` etree Element passed in is altered in place.
|
|
Nothing is returned.
|
|
|
|
"""
|
|
self.parseBlocks(parent, text.split('\n\n'))
|
|
|
|
def parseBlocks(self, parent, blocks):
|
|
""" Process blocks of markdown text and attach to given etree node.
|
|
|
|
Given a list of ``blocks``, each blockprocessor is stepped through
|
|
until there are no blocks left. While an extension could potentially
|
|
call this method directly, it's generally expected to be used internally.
|
|
|
|
This is a public method as an extension may need to add/alter additional
|
|
BlockProcessors which call this method to recursively parse a nested
|
|
block.
|
|
|
|
"""
|
|
while blocks:
|
|
for processor in self.blockprocessors.values():
|
|
if processor.test(parent, blocks[0]):
|
|
if processor.run(parent, blocks) is not False:
|
|
# run returns True or None
|
|
break
|
|
|
|
|