Added markdown parsing for comments so links in comments now appear properly.

Also rewrote part of the HTML rewriting code so it doesn't introduce an additional wrapping element in the output which was added due to a html5lib requirements on input.
2025-12-16 21:03:26 +00:00 · 2012-12-15 21:43:06 +11:00
parent 5ac8492f38
commit 993bee4fc1
33 changed files with 5217 additions and 10 deletions
--- a/python/packages/markdown/extensions/headerid.py
+++ b/python/packages/markdown/extensions/headerid.py
@@ -0,0 +1,201 @@
+#!/usr/bin/python
+
+"""
+HeaderID Extension for Python-Markdown
+======================================
+
+Auto-generate id attributes for HTML headers.
+
+Basic usage:
+
+    >>> import markdown
+    >>> text = "# Some Header #"
+    >>> md = markdown.markdown(text, ['headerid'])
+    >>> print md
+    <h1 id="some-header">Some Header</h1>
+
+All header IDs are unique:
+
+    >>> text = '''
+    ... #Header
+    ... #Header
+    ... #Header'''
+    >>> md = markdown.markdown(text, ['headerid'])
+    >>> print md
+    <h1 id="header">Header</h1>
+    <h1 id="header_1">Header</h1>
+    <h1 id="header_2">Header</h1>
+
+To fit within a html template's hierarchy, set the header base level:
+
+    >>> text = '''
+    ... #Some Header
+    ... ## Next Level'''
+    >>> md = markdown.markdown(text, ['headerid(level=3)'])
+    >>> print md
+    <h3 id="some-header">Some Header</h3>
+    <h4 id="next-level">Next Level</h4>
+
+Works with inline markup.
+
+    >>> text = '#Some *Header* with [markup](http://example.com).'
+    >>> md = markdown.markdown(text, ['headerid'])
+    >>> print md
+    <h1 id="some-header-with-markup">Some <em>Header</em> with <a href="http://example.com">markup</a>.</h1>
+
+Turn off auto generated IDs:
+
+    >>> text = '''
+    ... # Some Header
+    ... # Another Header'''
+    >>> md = markdown.markdown(text, ['headerid(forceid=False)'])
+    >>> print md
+    <h1>Some Header</h1>
+    <h1>Another Header</h1>
+
+Use with MetaData extension:
+
+    >>> text = '''header_level: 2
+    ... header_forceid: Off
+    ...
+    ... # A Header'''
+    >>> md = markdown.markdown(text, ['headerid', 'meta'])
+    >>> print md
+    <h2>A Header</h2>
+
+Copyright 2007-2011 [Waylan Limberg](http://achinghead.com/).
+
+Project website: <http://packages.python.org/Markdown/extensions/header_id.html>
+Contact: markdown@freewisdom.org
+
+License: BSD (see ../docs/LICENSE for details) 
+
+Dependencies:
+* [Python 2.3+](http://python.org)
+* [Markdown 2.0+](http://packages.python.org/Markdown/)
+
+"""
+
+import markdown
+from markdown.util import etree
+import re
+from string import ascii_lowercase, digits, punctuation
+import logging
+import unicodedata
+
+logger = logging.getLogger('MARKDOWN')
+
+IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
+
+
+def slugify(value, separator):
+    """ Slugify a string, to make it URL friendly. """
+    value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
+    value = re.sub('[^\w\s-]', '', value.decode('ascii')).strip().lower()
+    return re.sub('[%s\s]+' % separator, separator, value)
+
+
+def unique(id, ids):
+    """ Ensure id is unique in set of ids. Append '_1', '_2'... if not """
+    while id in ids or not id:
+        m = IDCOUNT_RE.match(id)
+        if m:
+            id = '%s_%d'% (m.group(1), int(m.group(2))+1)
+        else:
+            id = '%s_%d'% (id, 1)
+    ids.append(id)
+    return id
+
+
+def itertext(elem):
+    """ Loop through all children and return text only. 
+    
+    Reimplements method of same name added to ElementTree in Python 2.7
+    
+    """
+    if elem.text:
+        yield elem.text
+    for e in elem:
+        for s in itertext(e):
+            yield s
+        if e.tail:
+            yield e.tail
+
+
+class HeaderIdTreeprocessor(markdown.treeprocessors.Treeprocessor):
+    """ Assign IDs to headers. """
+
+    IDs = set()
+
+    def run(self, doc):
+        start_level, force_id = self._get_meta()
+        slugify = self.config['slugify']
+        sep = self.config['separator']
+        for elem in doc.getiterator():
+            if elem.tag in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6']:
+                if force_id:
+                    if "id" in elem.attrib:
+                        id = elem.id
+                    else:
+                        id = slugify(u''.join(itertext(elem)), sep)
+                    elem.set('id', unique(id, self.IDs))
+                if start_level:
+                    level = int(elem.tag[-1]) + start_level
+                    if level > 6:
+                        level = 6
+                    elem.tag = 'h%d' % level
+
+
+    def _get_meta(self):
+        """ Return meta data suported by this ext as a tuple """
+        level = int(self.config['level']) - 1
+        force = self._str2bool(self.config['forceid'])
+        if hasattr(self.md, 'Meta'):
+            if self.md.Meta.has_key('header_level'):
+                level = int(self.md.Meta['header_level'][0]) - 1
+            if self.md.Meta.has_key('header_forceid'): 
+                force = self._str2bool(self.md.Meta['header_forceid'][0])
+        return level, force
+
+    def _str2bool(self, s, default=False):
+        """ Convert a string to a booleen value. """
+        s = str(s)
+        if s.lower() in ['0', 'f', 'false', 'off', 'no', 'n']:
+            return False
+        elif s.lower() in ['1', 't', 'true', 'on', 'yes', 'y']:
+            return True
+        return default
+
+
+class HeaderIdExtension (markdown.Extension):
+    def __init__(self, configs):
+        # set defaults
+        self.config = {
+                'level' : ['1', 'Base level for headers.'],
+                'forceid' : ['True', 'Force all headers to have an id.'],
+                'separator' : ['-', 'Word separator.'],
+                'slugify' : [slugify, 'Callable to generate anchors'], 
+            }
+
+        for key, value in configs:
+            self.setConfig(key, value)
+
+    def extendMarkdown(self, md, md_globals):
+        md.registerExtension(self)
+        self.processor = HeaderIdTreeprocessor()
+        self.processor.md = md
+        self.processor.config = self.getConfigs()
+        # Replace existing hasheader in place.
+        md.treeprocessors.add('headerid', self.processor, '>inline')
+
+    def reset(self):
+        self.processor.IDs = []
+
+
+def makeExtension(configs=None):
+    return HeaderIdExtension(configs=configs)
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
+