mirror of
https://github.com/djohnlewis/stackdump
synced 2025-01-22 22:51:36 +00:00
292 lines
11 KiB
Python
292 lines
11 KiB
Python
|
|
from xml.dom import minidom, Node, XML_NAMESPACE, XMLNS_NAMESPACE
|
|
try:
|
|
from types import ModuleType
|
|
except:
|
|
from new import module as ModuleType
|
|
import re
|
|
import weakref
|
|
|
|
import _base
|
|
from html5lib import constants, ihatexml
|
|
from html5lib.constants import namespaces
|
|
|
|
moduleCache = {}
|
|
|
|
def getDomModule(DomImplementation):
|
|
name = "_" + DomImplementation.__name__+"builder"
|
|
if name in moduleCache:
|
|
return moduleCache[name]
|
|
else:
|
|
mod = ModuleType(name)
|
|
objs = getDomBuilder(DomImplementation)
|
|
mod.__dict__.update(objs)
|
|
moduleCache[name] = mod
|
|
return mod
|
|
|
|
def getDomBuilder(DomImplementation):
|
|
Dom = DomImplementation
|
|
class AttrList(object):
|
|
def __init__(self, element):
|
|
self.element = element
|
|
def __iter__(self):
|
|
return self.element.attributes.items().__iter__()
|
|
def __setitem__(self, name, value):
|
|
self.element.setAttribute(name, value)
|
|
def __len__(self):
|
|
return len(self.element.attributes.items())
|
|
def items(self):
|
|
return [(item[0], item[1]) for item in
|
|
self.element.attributes.items()]
|
|
def keys(self):
|
|
return self.element.attributes.keys()
|
|
def __getitem__(self, name):
|
|
return self.element.getAttribute(name)
|
|
|
|
def __contains__(self, name):
|
|
if isinstance(name, tuple):
|
|
raise NotImplementedError
|
|
else:
|
|
return self.element.hasAttribute(name)
|
|
|
|
class NodeBuilder(_base.Node):
|
|
def __init__(self, element):
|
|
_base.Node.__init__(self, element.nodeName)
|
|
self.element = element
|
|
|
|
namespace = property(lambda self:hasattr(self.element, "namespaceURI")
|
|
and self.element.namespaceURI or None)
|
|
|
|
def appendChild(self, node):
|
|
node.parent = self
|
|
self.element.appendChild(node.element)
|
|
|
|
def insertText(self, data, insertBefore=None):
|
|
text = self.element.ownerDocument.createTextNode(data)
|
|
if insertBefore:
|
|
self.element.insertBefore(text, insertBefore.element)
|
|
else:
|
|
self.element.appendChild(text)
|
|
|
|
def insertBefore(self, node, refNode):
|
|
self.element.insertBefore(node.element, refNode.element)
|
|
node.parent = self
|
|
|
|
def removeChild(self, node):
|
|
if node.element.parentNode == self.element:
|
|
self.element.removeChild(node.element)
|
|
node.parent = None
|
|
|
|
def reparentChildren(self, newParent):
|
|
while self.element.hasChildNodes():
|
|
child = self.element.firstChild
|
|
self.element.removeChild(child)
|
|
newParent.element.appendChild(child)
|
|
self.childNodes = []
|
|
|
|
def getAttributes(self):
|
|
return AttrList(self.element)
|
|
|
|
def setAttributes(self, attributes):
|
|
if attributes:
|
|
for name, value in attributes.items():
|
|
if isinstance(name, tuple):
|
|
if name[0] is not None:
|
|
qualifiedName = (name[0] + ":" + name[1])
|
|
else:
|
|
qualifiedName = name[1]
|
|
self.element.setAttributeNS(name[2], qualifiedName,
|
|
value)
|
|
else:
|
|
self.element.setAttribute(
|
|
name, value)
|
|
attributes = property(getAttributes, setAttributes)
|
|
|
|
def cloneNode(self):
|
|
return NodeBuilder(self.element.cloneNode(False))
|
|
|
|
def hasContent(self):
|
|
return self.element.hasChildNodes()
|
|
|
|
def getNameTuple(self):
|
|
if self.namespace == None:
|
|
return namespaces["html"], self.name
|
|
else:
|
|
return self.namespace, self.name
|
|
|
|
nameTuple = property(getNameTuple)
|
|
|
|
class TreeBuilder(_base.TreeBuilder):
|
|
def documentClass(self):
|
|
self.dom = Dom.getDOMImplementation().createDocument(None,None,None)
|
|
return weakref.proxy(self)
|
|
|
|
def insertDoctype(self, token):
|
|
name = token["name"]
|
|
publicId = token["publicId"]
|
|
systemId = token["systemId"]
|
|
|
|
domimpl = Dom.getDOMImplementation()
|
|
doctype = domimpl.createDocumentType(name, publicId, systemId)
|
|
self.document.appendChild(NodeBuilder(doctype))
|
|
if Dom == minidom:
|
|
doctype.ownerDocument = self.dom
|
|
|
|
def elementClass(self, name, namespace=None):
|
|
if namespace is None and self.defaultNamespace is None:
|
|
node = self.dom.createElement(name)
|
|
else:
|
|
node = self.dom.createElementNS(namespace, name)
|
|
|
|
return NodeBuilder(node)
|
|
|
|
def commentClass(self, data):
|
|
return NodeBuilder(self.dom.createComment(data))
|
|
|
|
def fragmentClass(self):
|
|
return NodeBuilder(self.dom.createDocumentFragment())
|
|
|
|
def appendChild(self, node):
|
|
self.dom.appendChild(node.element)
|
|
|
|
def testSerializer(self, element):
|
|
return testSerializer(element)
|
|
|
|
def getDocument(self):
|
|
return self.dom
|
|
|
|
def getFragment(self):
|
|
return _base.TreeBuilder.getFragment(self).element
|
|
|
|
def insertText(self, data, parent=None):
|
|
data=data
|
|
if parent <> self:
|
|
_base.TreeBuilder.insertText(self, data, parent)
|
|
else:
|
|
# HACK: allow text nodes as children of the document node
|
|
if hasattr(self.dom, '_child_node_types'):
|
|
if not Node.TEXT_NODE in self.dom._child_node_types:
|
|
self.dom._child_node_types=list(self.dom._child_node_types)
|
|
self.dom._child_node_types.append(Node.TEXT_NODE)
|
|
self.dom.appendChild(self.dom.createTextNode(data))
|
|
|
|
name = None
|
|
|
|
def testSerializer(element):
|
|
element.normalize()
|
|
rv = []
|
|
def serializeElement(element, indent=0):
|
|
if element.nodeType == Node.DOCUMENT_TYPE_NODE:
|
|
if element.name:
|
|
if element.publicId or element.systemId:
|
|
publicId = element.publicId or ""
|
|
systemId = element.systemId or ""
|
|
rv.append( """|%s<!DOCTYPE %s "%s" "%s">"""%(
|
|
' '*indent, element.name, publicId, systemId))
|
|
else:
|
|
rv.append("|%s<!DOCTYPE %s>"%(' '*indent, element.name))
|
|
else:
|
|
rv.append("|%s<!DOCTYPE >"%(' '*indent,))
|
|
elif element.nodeType == Node.DOCUMENT_NODE:
|
|
rv.append("#document")
|
|
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
|
|
rv.append("#document-fragment")
|
|
elif element.nodeType == Node.COMMENT_NODE:
|
|
rv.append("|%s<!-- %s -->"%(' '*indent, element.nodeValue))
|
|
elif element.nodeType == Node.TEXT_NODE:
|
|
rv.append("|%s\"%s\"" %(' '*indent, element.nodeValue))
|
|
else:
|
|
if (hasattr(element, "namespaceURI") and
|
|
element.namespaceURI != None):
|
|
name = "%s %s"%(constants.prefixes[element.namespaceURI],
|
|
element.nodeName)
|
|
else:
|
|
name = element.nodeName
|
|
rv.append("|%s<%s>"%(' '*indent, name))
|
|
if element.hasAttributes():
|
|
attributes = []
|
|
for i in range(len(element.attributes)):
|
|
attr = element.attributes.item(i)
|
|
name = attr.nodeName
|
|
value = attr.value
|
|
ns = attr.namespaceURI
|
|
if ns:
|
|
name = "%s %s"%(constants.prefixes[ns], attr.localName)
|
|
else:
|
|
name = attr.nodeName
|
|
attributes.append((name, value))
|
|
|
|
for name, value in sorted(attributes):
|
|
rv.append('|%s%s="%s"' % (' '*(indent+2), name, value))
|
|
indent += 2
|
|
for child in element.childNodes:
|
|
serializeElement(child, indent)
|
|
serializeElement(element, 0)
|
|
|
|
return "\n".join(rv)
|
|
|
|
def dom2sax(node, handler, nsmap={'xml':XML_NAMESPACE}):
|
|
if node.nodeType == Node.ELEMENT_NODE:
|
|
if not nsmap:
|
|
handler.startElement(node.nodeName, node.attributes)
|
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
|
handler.endElement(node.nodeName)
|
|
else:
|
|
attributes = dict(node.attributes.itemsNS())
|
|
|
|
# gather namespace declarations
|
|
prefixes = []
|
|
for attrname in node.attributes.keys():
|
|
attr = node.getAttributeNode(attrname)
|
|
if (attr.namespaceURI == XMLNS_NAMESPACE or
|
|
(attr.namespaceURI == None and attr.nodeName.startswith('xmlns'))):
|
|
prefix = (attr.nodeName != 'xmlns' and attr.nodeName or None)
|
|
handler.startPrefixMapping(prefix, attr.nodeValue)
|
|
prefixes.append(prefix)
|
|
nsmap = nsmap.copy()
|
|
nsmap[prefix] = attr.nodeValue
|
|
del attributes[(attr.namespaceURI, attr.nodeName)]
|
|
|
|
# apply namespace declarations
|
|
for attrname in node.attributes.keys():
|
|
attr = node.getAttributeNode(attrname)
|
|
if attr.namespaceURI == None and ':' in attr.nodeName:
|
|
prefix = attr.nodeName.split(':')[0]
|
|
if nsmap.has_key(prefix):
|
|
del attributes[(attr.namespaceURI, attr.nodeName)]
|
|
attributes[(nsmap[prefix],attr.nodeName)]=attr.nodeValue
|
|
|
|
# SAX events
|
|
ns = node.namespaceURI or nsmap.get(None,None)
|
|
handler.startElementNS((ns,node.nodeName), node.nodeName, attributes)
|
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
|
handler.endElementNS((ns, node.nodeName), node.nodeName)
|
|
for prefix in prefixes: handler.endPrefixMapping(prefix)
|
|
|
|
elif node.nodeType in [Node.TEXT_NODE, Node.CDATA_SECTION_NODE]:
|
|
handler.characters(node.nodeValue)
|
|
|
|
elif node.nodeType == Node.DOCUMENT_NODE:
|
|
handler.startDocument()
|
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
|
handler.endDocument()
|
|
|
|
elif node.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
|
|
for child in node.childNodes: dom2sax(child, handler, nsmap)
|
|
|
|
else:
|
|
# ATTRIBUTE_NODE
|
|
# ENTITY_NODE
|
|
# PROCESSING_INSTRUCTION_NODE
|
|
# COMMENT_NODE
|
|
# DOCUMENT_TYPE_NODE
|
|
# NOTATION_NODE
|
|
pass
|
|
|
|
return locals()
|
|
|
|
# Keep backwards compatibility with things that directly load
|
|
# classes/functions from this module
|
|
for key, value in getDomModule(minidom).__dict__.items():
|
|
globals()[key] = value
|