mirror of
https://github.com/djohnlewis/stackdump
synced 2025-12-14 20:03:26 +00:00
Added markdown parsing for comments so links in comments now appear properly.
Also rewrote part of the HTML rewriting code so it doesn't introduce an additional wrapping element in the output which was added due to a html5lib requirements on input.
This commit is contained in:
101
python/packages/markdown/postprocessors.py
Normal file
101
python/packages/markdown/postprocessors.py
Normal file
@@ -0,0 +1,101 @@
|
||||
"""
|
||||
POST-PROCESSORS
|
||||
=============================================================================
|
||||
|
||||
Markdown also allows post-processors, which are similar to preprocessors in
|
||||
that they need to implement a "run" method. However, they are run after core
|
||||
processing.
|
||||
|
||||
"""
|
||||
|
||||
import re
|
||||
import util
|
||||
import odict
|
||||
|
||||
def build_postprocessors(md_instance, **kwargs):
|
||||
""" Build the default postprocessors for Markdown. """
|
||||
postprocessors = odict.OrderedDict()
|
||||
postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
|
||||
postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
|
||||
postprocessors["unescape"] = UnescapePostprocessor()
|
||||
return postprocessors
|
||||
|
||||
|
||||
class Postprocessor(util.Processor):
|
||||
"""
|
||||
Postprocessors are run after the ElementTree it converted back into text.
|
||||
|
||||
Each Postprocessor implements a "run" method that takes a pointer to a
|
||||
text string, modifies it as necessary and returns a text string.
|
||||
|
||||
Postprocessors must extend markdown.Postprocessor.
|
||||
|
||||
"""
|
||||
|
||||
def run(self, text):
|
||||
"""
|
||||
Subclasses of Postprocessor should implement a `run` method, which
|
||||
takes the html document as a single text string and returns a
|
||||
(possibly modified) string.
|
||||
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class RawHtmlPostprocessor(Postprocessor):
|
||||
""" Restore raw html to the document. """
|
||||
|
||||
def run(self, text):
|
||||
""" Iterate over html stash and restore "safe" html. """
|
||||
for i in range(self.markdown.htmlStash.html_counter):
|
||||
html, safe = self.markdown.htmlStash.rawHtmlBlocks[i]
|
||||
if self.markdown.safeMode and not safe:
|
||||
if str(self.markdown.safeMode).lower() == 'escape':
|
||||
html = self.escape(html)
|
||||
elif str(self.markdown.safeMode).lower() == 'remove':
|
||||
html = ''
|
||||
else:
|
||||
html = self.markdown.html_replacement_text
|
||||
if self.isblocklevel(html) and (safe or not self.markdown.safeMode):
|
||||
text = text.replace("<p>%s</p>" %
|
||||
(self.markdown.htmlStash.get_placeholder(i)),
|
||||
html + "\n")
|
||||
text = text.replace(self.markdown.htmlStash.get_placeholder(i),
|
||||
html)
|
||||
return text
|
||||
|
||||
def escape(self, html):
|
||||
""" Basic html escaping """
|
||||
html = html.replace('&', '&')
|
||||
html = html.replace('<', '<')
|
||||
html = html.replace('>', '>')
|
||||
return html.replace('"', '"')
|
||||
|
||||
def isblocklevel(self, html):
|
||||
m = re.match(r'^\<\/?([^ >]+)', html)
|
||||
if m:
|
||||
if m.group(1)[0] in ('!', '?', '@', '%'):
|
||||
# Comment, php etc...
|
||||
return True
|
||||
return util.isBlockLevel(m.group(1))
|
||||
return False
|
||||
|
||||
|
||||
class AndSubstitutePostprocessor(Postprocessor):
|
||||
""" Restore valid entities """
|
||||
|
||||
def run(self, text):
|
||||
text = text.replace(util.AMP_SUBSTITUTE, "&")
|
||||
return text
|
||||
|
||||
|
||||
class UnescapePostprocessor(Postprocessor):
|
||||
""" Restore escaped chars """
|
||||
|
||||
RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
|
||||
|
||||
def unescape(self, m):
|
||||
return unichr(int(m.group(1)))
|
||||
|
||||
def run(self, text):
|
||||
return self.RE.sub(self.unescape, text)
|
||||
Reference in New Issue
Block a user