Added markdown parsing for comments so links in comments now appear properly.

Also rewrote part of the HTML rewriting code so it doesn't introduce an additional wrapping element in the output which was added due to a html5lib requirements on input.
2025-12-14 20:03:26 +00:00 · 2012-12-15 21:43:06 +11:00
parent 5ac8492f38
commit 993bee4fc1
33 changed files with 5217 additions and 10 deletions
--- a/python/packages/markdown/postprocessors.py
+++ b/python/packages/markdown/postprocessors.py
@@ -0,0 +1,101 @@
+"""
+POST-PROCESSORS
+=============================================================================
+
+Markdown also allows post-processors, which are similar to preprocessors in
+that they need to implement a "run" method. However, they are run after core
+processing.
+
+"""
+
+import re
+import util
+import odict
+
+def build_postprocessors(md_instance, **kwargs):
+    """ Build the default postprocessors for Markdown. """
+    postprocessors = odict.OrderedDict()
+    postprocessors["raw_html"] = RawHtmlPostprocessor(md_instance)
+    postprocessors["amp_substitute"] = AndSubstitutePostprocessor()
+    postprocessors["unescape"] = UnescapePostprocessor()
+    return postprocessors
+
+
+class Postprocessor(util.Processor):
+    """
+    Postprocessors are run after the ElementTree it converted back into text.
+
+    Each Postprocessor implements a "run" method that takes a pointer to a
+    text string, modifies it as necessary and returns a text string.
+
+    Postprocessors must extend markdown.Postprocessor.
+
+    """
+
+    def run(self, text):
+        """
+        Subclasses of Postprocessor should implement a `run` method, which
+        takes the html document as a single text string and returns a
+        (possibly modified) string.
+
+        """
+        pass
+
+
+class RawHtmlPostprocessor(Postprocessor):
+    """ Restore raw html to the document. """
+
+    def run(self, text):
+        """ Iterate over html stash and restore "safe" html. """
+        for i in range(self.markdown.htmlStash.html_counter):
+            html, safe  = self.markdown.htmlStash.rawHtmlBlocks[i]
+            if self.markdown.safeMode and not safe:
+                if str(self.markdown.safeMode).lower() == 'escape':
+                    html = self.escape(html)
+                elif str(self.markdown.safeMode).lower() == 'remove':
+                    html = ''
+                else:
+                    html = self.markdown.html_replacement_text
+            if self.isblocklevel(html) and (safe or not self.markdown.safeMode):
+                text = text.replace("<p>%s</p>" % 
+                            (self.markdown.htmlStash.get_placeholder(i)),
+                            html + "\n")
+            text =  text.replace(self.markdown.htmlStash.get_placeholder(i), 
+                                 html)
+        return text
+
+    def escape(self, html):
+        """ Basic html escaping """
+        html = html.replace('&', '&amp;')
+        html = html.replace('<', '&lt;')
+        html = html.replace('>', '&gt;')
+        return html.replace('"', '&quot;')
+
+    def isblocklevel(self, html):
+        m = re.match(r'^\<\/?([^ >]+)', html)
+        if m:
+            if m.group(1)[0] in ('!', '?', '@', '%'):
+                # Comment, php etc...
+                return True
+            return util.isBlockLevel(m.group(1))
+        return False
+
+
+class AndSubstitutePostprocessor(Postprocessor):
+    """ Restore valid entities """
+
+    def run(self, text):
+        text =  text.replace(util.AMP_SUBSTITUTE, "&")
+        return text
+
+
+class UnescapePostprocessor(Postprocessor):
+    """ Restore escaped chars """
+
+    RE = re.compile('%s(\d+)%s' % (util.STX, util.ETX))
+
+    def unescape(self, m):
+        return unichr(int(m.group(1)))
+
+    def run(self, text):
+        return self.RE.sub(self.unescape, text)