mirror of
https://github.com/djohnlewis/stackdump
synced 2025-01-23 15:11:36 +00:00
1da980424c
Images are also replaced with a placeholder.
42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
from xml.dom import Node
|
|
|
|
import gettext
|
|
_ = gettext.gettext
|
|
|
|
import _base
|
|
from html5lib.constants import voidElements
|
|
|
|
class TreeWalker(_base.NonRecursiveTreeWalker):
|
|
def getNodeDetails(self, node):
|
|
if node.nodeType == Node.DOCUMENT_TYPE_NODE:
|
|
return _base.DOCTYPE, node.name, node.publicId, node.systemId
|
|
|
|
elif node.nodeType in (Node.TEXT_NODE, Node.CDATA_SECTION_NODE):
|
|
return _base.TEXT, node.nodeValue
|
|
|
|
elif node.nodeType == Node.ELEMENT_NODE:
|
|
attrs = {}
|
|
for attr in node.attributes.keys():
|
|
attr = node.getAttributeNode(attr)
|
|
attrs[(attr.namespaceURI,attr.localName)] = attr.value
|
|
return (_base.ELEMENT, node.namespaceURI, node.nodeName,
|
|
attrs, node.hasChildNodes())
|
|
|
|
elif node.nodeType == Node.COMMENT_NODE:
|
|
return _base.COMMENT, node.nodeValue
|
|
|
|
elif node.nodeType in (Node.DOCUMENT_NODE, Node.DOCUMENT_FRAGMENT_NODE):
|
|
return (_base.DOCUMENT,)
|
|
|
|
else:
|
|
return _base.UNKNOWN, node.nodeType
|
|
|
|
def getFirstChild(self, node):
|
|
return node.firstChild
|
|
|
|
def getNextSibling(self, node):
|
|
return node.nextSibling
|
|
|
|
def getParentNode(self, node):
|
|
return node.parentNode
|