mirror of
https://github.com/djohnlewis/stackdump
synced 2025-01-23 07:01:41 +00:00
115 lines
3.4 KiB
Python
115 lines
3.4 KiB
Python
|
# -*- coding: utf-8 -*-
|
||
|
import re
|
||
|
from logging import CRITICAL
|
||
|
|
||
|
import etree_loader
|
||
|
|
||
|
|
||
|
"""
|
||
|
CONSTANTS
|
||
|
=============================================================================
|
||
|
"""
|
||
|
|
||
|
"""
|
||
|
Constants you might want to modify
|
||
|
-----------------------------------------------------------------------------
|
||
|
"""
|
||
|
|
||
|
BLOCK_LEVEL_ELEMENTS = re.compile("^(p|div|h[1-6]|blockquote|pre|table|dl|ol|ul"
|
||
|
"|script|noscript|form|fieldset|iframe|math"
|
||
|
"|hr|hr/|style|li|dt|dd|thead|tbody"
|
||
|
"|tr|th|td|section|footer|header|group|figure"
|
||
|
"|figcaption|aside|article|canvas|output"
|
||
|
"|progress|video)$", re.IGNORECASE)
|
||
|
# Placeholders
|
||
|
STX = u'\u0002' # Use STX ("Start of text") for start-of-placeholder
|
||
|
ETX = u'\u0003' # Use ETX ("End of text") for end-of-placeholder
|
||
|
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
|
||
|
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
|
||
|
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]{4})')
|
||
|
AMP_SUBSTITUTE = STX+"amp"+ETX
|
||
|
|
||
|
"""
|
||
|
Constants you probably do not need to change
|
||
|
-----------------------------------------------------------------------------
|
||
|
"""
|
||
|
|
||
|
RTL_BIDI_RANGES = ( (u'\u0590', u'\u07FF'),
|
||
|
# Hebrew (0590-05FF), Arabic (0600-06FF),
|
||
|
# Syriac (0700-074F), Arabic supplement (0750-077F),
|
||
|
# Thaana (0780-07BF), Nko (07C0-07FF).
|
||
|
(u'\u2D30', u'\u2D7F'), # Tifinagh
|
||
|
)
|
||
|
|
||
|
# Extensions should use "markdown.util.etree" instead of "etree" (or do `from
|
||
|
# markdown.util import etree`). Do not import it by yourself.
|
||
|
|
||
|
etree = etree_loader.importETree()
|
||
|
|
||
|
"""
|
||
|
AUXILIARY GLOBAL FUNCTIONS
|
||
|
=============================================================================
|
||
|
"""
|
||
|
|
||
|
|
||
|
def isBlockLevel(tag):
|
||
|
"""Check if the tag is a block level HTML tag."""
|
||
|
if isinstance(tag, basestring):
|
||
|
return BLOCK_LEVEL_ELEMENTS.match(tag)
|
||
|
# Some ElementTree tags are not strings, so return False.
|
||
|
return False
|
||
|
|
||
|
"""
|
||
|
MISC AUXILIARY CLASSES
|
||
|
=============================================================================
|
||
|
"""
|
||
|
|
||
|
class AtomicString(unicode):
|
||
|
"""A string which should not be further processed."""
|
||
|
pass
|
||
|
|
||
|
|
||
|
class Processor:
|
||
|
def __init__(self, markdown_instance=None):
|
||
|
if markdown_instance:
|
||
|
self.markdown = markdown_instance
|
||
|
|
||
|
|
||
|
class HtmlStash:
|
||
|
"""
|
||
|
This class is used for stashing HTML objects that we extract
|
||
|
in the beginning and replace with place-holders.
|
||
|
"""
|
||
|
|
||
|
def __init__ (self):
|
||
|
""" Create a HtmlStash. """
|
||
|
self.html_counter = 0 # for counting inline html segments
|
||
|
self.rawHtmlBlocks=[]
|
||
|
|
||
|
def store(self, html, safe=False):
|
||
|
"""
|
||
|
Saves an HTML segment for later reinsertion. Returns a
|
||
|
placeholder string that needs to be inserted into the
|
||
|
document.
|
||
|
|
||
|
Keyword arguments:
|
||
|
|
||
|
* html: an html segment
|
||
|
* safe: label an html segment as safe for safemode
|
||
|
|
||
|
Returns : a placeholder string
|
||
|
|
||
|
"""
|
||
|
self.rawHtmlBlocks.append((html, safe))
|
||
|
placeholder = self.get_placeholder(self.html_counter)
|
||
|
self.html_counter += 1
|
||
|
return placeholder
|
||
|
|
||
|
def reset(self):
|
||
|
self.html_counter = 0
|
||
|
self.rawHtmlBlocks = []
|
||
|
|
||
|
def get_placeholder(self, key):
|
||
|
return "%swzxhzdk:%d%s" % (STX, key, ETX)
|
||
|
|