mirror of
https://github.com/djohnlewis/stackdump
synced 2025-12-07 08:23:25 +00:00
Links in question view are now parsed and links are re-written where possible to point to the stackdump instance. They are also styled differently to highlight this.
Images are also replaced with a placeholder.
This commit is contained in:
176
python/packages/html5lib/treewalkers/_base.py
Normal file
176
python/packages/html5lib/treewalkers/_base.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import gettext
|
||||
_ = gettext.gettext
|
||||
|
||||
from html5lib.constants import voidElements, spaceCharacters
|
||||
spaceCharacters = u"".join(spaceCharacters)
|
||||
|
||||
class TreeWalker(object):
|
||||
def __init__(self, tree):
|
||||
self.tree = tree
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError
|
||||
|
||||
def error(self, msg):
|
||||
return {"type": "SerializeError", "data": msg}
|
||||
|
||||
def normalizeAttrs(self, attrs):
|
||||
newattrs = {}
|
||||
if attrs:
|
||||
#TODO: treewalkers should always have attrs
|
||||
for (namespace,name),value in attrs.iteritems():
|
||||
namespace = unicode(namespace) if namespace else None
|
||||
name = unicode(name)
|
||||
value = unicode(value)
|
||||
newattrs[(namespace,name)] = value
|
||||
return newattrs
|
||||
|
||||
def emptyTag(self, namespace, name, attrs, hasChildren=False):
|
||||
yield {"type": "EmptyTag", "name": unicode(name),
|
||||
"namespace":unicode(namespace),
|
||||
"data": self.normalizeAttrs(attrs)}
|
||||
if hasChildren:
|
||||
yield self.error(_("Void element has children"))
|
||||
|
||||
def startTag(self, namespace, name, attrs):
|
||||
return {"type": "StartTag",
|
||||
"name": unicode(name),
|
||||
"namespace":unicode(namespace),
|
||||
"data": self.normalizeAttrs(attrs)}
|
||||
|
||||
def endTag(self, namespace, name):
|
||||
return {"type": "EndTag",
|
||||
"name": unicode(name),
|
||||
"namespace":unicode(namespace),
|
||||
"data": {}}
|
||||
|
||||
def text(self, data):
|
||||
data = unicode(data)
|
||||
middle = data.lstrip(spaceCharacters)
|
||||
left = data[:len(data)-len(middle)]
|
||||
if left:
|
||||
yield {"type": "SpaceCharacters", "data": left}
|
||||
data = middle
|
||||
middle = data.rstrip(spaceCharacters)
|
||||
right = data[len(middle):]
|
||||
if middle:
|
||||
yield {"type": "Characters", "data": middle}
|
||||
if right:
|
||||
yield {"type": "SpaceCharacters", "data": right}
|
||||
|
||||
def comment(self, data):
|
||||
return {"type": "Comment", "data": unicode(data)}
|
||||
|
||||
def doctype(self, name, publicId=None, systemId=None, correct=True):
|
||||
return {"type": "Doctype",
|
||||
"name": name is not None and unicode(name) or u"",
|
||||
"publicId": publicId,
|
||||
"systemId": systemId,
|
||||
"correct": correct}
|
||||
|
||||
def entity(self, name):
|
||||
return {"type": "Entity", "name": unicode(name)}
|
||||
|
||||
def unknown(self, nodeType):
|
||||
return self.error(_("Unknown node type: ") + nodeType)
|
||||
|
||||
class RecursiveTreeWalker(TreeWalker):
|
||||
def walkChildren(self, node):
|
||||
raise NodeImplementedError
|
||||
|
||||
def element(self, node, namespace, name, attrs, hasChildren):
|
||||
if name in voidElements:
|
||||
for token in self.emptyTag(namespace, name, attrs, hasChildren):
|
||||
yield token
|
||||
else:
|
||||
yield self.startTag(name, attrs)
|
||||
if hasChildren:
|
||||
for token in self.walkChildren(node):
|
||||
yield token
|
||||
yield self.endTag(name)
|
||||
|
||||
from xml.dom import Node
|
||||
|
||||
DOCUMENT = Node.DOCUMENT_NODE
|
||||
DOCTYPE = Node.DOCUMENT_TYPE_NODE
|
||||
TEXT = Node.TEXT_NODE
|
||||
ELEMENT = Node.ELEMENT_NODE
|
||||
COMMENT = Node.COMMENT_NODE
|
||||
ENTITY = Node.ENTITY_NODE
|
||||
UNKNOWN = "<#UNKNOWN#>"
|
||||
|
||||
class NonRecursiveTreeWalker(TreeWalker):
|
||||
def getNodeDetails(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def getFirstChild(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def getNextSibling(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def getParentNode(self, node):
|
||||
raise NotImplementedError
|
||||
|
||||
def __iter__(self):
|
||||
currentNode = self.tree
|
||||
while currentNode is not None:
|
||||
details = self.getNodeDetails(currentNode)
|
||||
type, details = details[0], details[1:]
|
||||
hasChildren = False
|
||||
endTag = None
|
||||
|
||||
if type == DOCTYPE:
|
||||
yield self.doctype(*details)
|
||||
|
||||
elif type == TEXT:
|
||||
for token in self.text(*details):
|
||||
yield token
|
||||
|
||||
elif type == ELEMENT:
|
||||
namespace, name, attributes, hasChildren = details
|
||||
if name in voidElements:
|
||||
for token in self.emptyTag(namespace, name, attributes,
|
||||
hasChildren):
|
||||
yield token
|
||||
hasChildren = False
|
||||
else:
|
||||
endTag = name
|
||||
yield self.startTag(namespace, name, attributes)
|
||||
|
||||
elif type == COMMENT:
|
||||
yield self.comment(details[0])
|
||||
|
||||
elif type == ENTITY:
|
||||
yield self.entity(details[0])
|
||||
|
||||
elif type == DOCUMENT:
|
||||
hasChildren = True
|
||||
|
||||
else:
|
||||
yield self.unknown(details[0])
|
||||
|
||||
if hasChildren:
|
||||
firstChild = self.getFirstChild(currentNode)
|
||||
else:
|
||||
firstChild = None
|
||||
|
||||
if firstChild is not None:
|
||||
currentNode = firstChild
|
||||
else:
|
||||
while currentNode is not None:
|
||||
details = self.getNodeDetails(currentNode)
|
||||
type, details = details[0], details[1:]
|
||||
if type == ELEMENT:
|
||||
namespace, name, attributes, hasChildren = details
|
||||
if name not in voidElements:
|
||||
yield self.endTag(namespace, name)
|
||||
if self.tree is currentNode:
|
||||
currentNode = None
|
||||
break
|
||||
nextSibling = self.getNextSibling(currentNode)
|
||||
if nextSibling is not None:
|
||||
currentNode = nextSibling
|
||||
break
|
||||
else:
|
||||
currentNode = self.getParentNode(currentNode)
|
||||
Reference in New Issue
Block a user