mirror of
https://github.com/djohnlewis/stackdump
synced 2025-12-15 04:13:24 +00:00
Added markdown parsing for comments so links in comments now appear properly.
Also rewrote part of the HTML rewriting code so it doesn't introduce an additional wrapping element in the output which was added due to a html5lib requirements on input.
This commit is contained in:
114
python/packages/markdown/util.py
Normal file
114
python/packages/markdown/util.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import re
|
||||
from logging import CRITICAL
|
||||
|
||||
import etree_loader
|
||||
|
||||
|
||||
"""
|
||||
CONSTANTS
|
||||
=============================================================================
|
||||
"""
|
||||
|
||||
"""
|
||||
Constants you might want to modify
|
||||
-----------------------------------------------------------------------------
|
||||
"""
|
||||
|
||||
BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
|
||||
"|script|noscript|form|fieldset|iframe|math"
|
||||
"|hr|hr/|style|li|dt|dd|thead|tbody"
|
||||
"|tr|th|td|section|footer|header|group|figure"
|
||||
"|figcaption|aside|article|canvas|output"
|
||||
"|progress|video)$", re.IGNORECASE)
|
||||
# Placeholders
|
||||
STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder
|
||||
ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder
|
||||
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
|
||||
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
|
||||
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
|
||||
AMP_SUBSTITUTE = STX+"amp"+ETX
|
||||
|
||||
"""
|
||||
Constants you probably do not need to change
|
||||
-----------------------------------------------------------------------------
|
||||
"""
|
||||
|
||||
RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
|
||||
# Hebrew (0590-05FF), Arabic (0600-06FF),
|
||||
# Syriac (0700-074F), Arabic supplement (0750-077F),
|
||||
# Thaana (0780-07BF), Nko (07C0-07FF).
|
||||
(u'\u2D30', u'\u2D7F'), # Tifinagh
|
||||
)
|
||||
|
||||
# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
|
||||
# markdown.util import etree`). Do not import it by yourself.
|
||||
|
||||
etree = etree_loader.importETree()
|
||||
|
||||
"""
|
||||
AUXILIARY GLOBAL FUNCTIONS
|
||||
=============================================================================
|
||||
"""
|
||||
|
||||
|
||||
def isBlockLevel(tag):
|
||||
"""Check if the tag is a block level HTML tag."""
|
||||
if isinstance(tag, basestring):
|
||||
return BLOCK_LEVEL_ELEMENTS.match(tag)
|
||||
# Some ElementTree tags are not strings, so return False.
|
||||
return False
|
||||
|
||||
"""
|
||||
MISC AUXILIARY CLASSES
|
||||
=============================================================================
|
||||
"""
|
||||
|
||||
class AtomicString(unicode):
|
||||
"""A string which should not be further processed."""
|
||||
pass
|
||||
|
||||
|
||||
class Processor:
|
||||
def __init__(self, markdown_instance=None):
|
||||
if markdown_instance:
|
||||
self.markdown = markdown_instance
|
||||
|
||||
|
||||
class HtmlStash:
|
||||
"""
|
||||
This class is used for stashing HTML objects that we extract
|
||||
in the beginning and replace with place-holders.
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
""" Create a HtmlStash. """
|
||||
self.html_counter = 0 # for counting inline html segments
|
||||
self.rawHtmlBlocks=[]
|
||||
|
||||
def store(self, html, safe=False):
|
||||
"""
|
||||
Saves an HTML segment for later reinsertion. Returns a
|
||||
placeholder string that needs to be inserted into the
|
||||
document.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* html: an html segment
|
||||
* safe: label an html segment as safe for safemode
|
||||
|
||||
Returns : a placeholder string
|
||||
|
||||
"""
|
||||
self.rawHtmlBlocks.append((html, safe))
|
||||
placeholder = self.get_placeholder(self.html_counter)
|
||||
self.html_counter += 1
|
||||
return placeholder
|
||||
|
||||
def reset(self):
|
||||
self.html_counter = 0
|
||||
self.rawHtmlBlocks = []
|
||||
|
||||
def get_placeholder(self, key):
|
||||
return "%swzxhzdk:%d%s" % (STX, key, ETX)
|
||||
|
||||
Reference in New Issue
Block a user