1
0
mirror of https://github.com/djohnlewis/stackdump synced 2025-01-22 22:51:36 +00:00
stackdump/python/packages/markdown/extensions/html_tidy.py
Samuel Lai 993bee4fc1 Added markdown parsing for comments so links in comments now appear properly.
Also rewrote part of the HTML rewriting code so it doesn't introduce an additional wrapping element in the output which was added due to a html5lib requirements on input.
2012-12-15 21:43:06 +11:00

70 lines
2.2 KiB
Python

#!/usr/bin/env python
"""
HTML Tidy Extension for Python-Markdown
=======================================
Runs [HTML Tidy][] on the output of Python-Markdown using the [uTidylib][]
Python wrapper. Both libtidy and uTidylib must be installed on your system.
Note than any Tidy [options][] can be passed in as extension configs. So,
for example, to output HTML rather than XHTML, set ``output_xhtml=0``. To
indent the output, set ``indent=auto`` and to have Tidy wrap the output in
``<html>`` and ``<body>`` tags, set ``show_body_only=0``.
[HTML Tidy]: http://tidy.sourceforge.net/
[uTidylib]: http://utidylib.berlios.de/
[options]: http://tidy.sourceforge.net/docs/quickref.html
Copyright (c)2008 [Waylan Limberg](http://achinghead.com)
License: [BSD](http://www.opensource.org/licenses/bsd-license.php)
Dependencies:
* [Python2.3+](http://python.org)
* [Markdown 2.0+](http://packages.python.org/Markdown/)
* [HTML Tidy](http://utidylib.berlios.de/)
* [uTidylib](http://utidylib.berlios.de/)
"""
import markdown
try:
import tidy
except ImportError:
tidy = None
class TidyExtension(markdown.Extension):
def __init__(self, configs):
# Set defaults to match typical markdown behavior.
self.config = dict(output_xhtml=1,
show_body_only=1,
char_encoding='utf8'
)
# Merge in user defined configs overriding any present if nessecary.
for c in configs:
self.config[c[0]] = c[1]
def extendMarkdown(self, md, md_globals):
# Save options to markdown instance
md.tidy_options = self.config
# Add TidyProcessor to postprocessors
if tidy:
md.postprocessors['tidy'] = TidyProcessor(md)
class TidyProcessor(markdown.postprocessors.Postprocessor):
def run(self, text):
# Pass text to Tidy. As Tidy does not accept unicode we need to encode
# it and decode its return value.
enc = self.markdown.tidy_options.get('char_encoding', 'utf8')
return unicode(tidy.parseString(text.encode(enc),
**self.markdown.tidy_options),
encoding=enc)
def makeExtension(configs=None):
return TidyExtension(configs=configs)