1
0
mirror of https://github.com/djohnlewis/stackdump synced 2025-01-22 22:51:36 +00:00
stackdump/python/packages/formencode/rewritingparser.py

162 lines
4.9 KiB
Python

import cgi
import HTMLParser
import re
from htmlentitydefs import name2codepoint
def html_quote(v):
if v is None:
return ''
elif hasattr(v, '__html__'):
return v.__html__()
elif isinstance(v, basestring):
return cgi.escape(v, 1)
else:
if hasattr(v, '__unicode__'):
v = unicode(v)
else:
v = str(v)
return cgi.escape(v, 1)
class RewritingParser(HTMLParser.HTMLParser):
listener = None
skip_next = False
def __init__(self):
self._content = []
HTMLParser.HTMLParser.__init__(self)
def feed(self, data):
self.data_is_str = isinstance(data, str)
self.source = data
self.lines = data.split('\n')
self.source_pos = 1, 0
if self.listener:
self.listener.reset()
HTMLParser.HTMLParser.feed(self, data)
_entityref_re = re.compile('&([a-zA-Z][-.a-zA-Z\d]*);')
_charref_re = re.compile('&#(\d+|[xX][a-fA-F\d]+);')
def unescape(self, s):
s = self._entityref_re.sub(self._sub_entityref, s)
s = self._charref_re.sub(self._sub_charref, s)
return s
def _sub_entityref(self, match):
name = match.group(1)
if name not in name2codepoint:
# If we don't recognize it, pass it through as though it
# wasn't an entity ref at all
return match.group(0)
return unichr(name2codepoint[name])
def _sub_charref(self, match):
num = match.group(1)
if num.lower().startswith('x'):
num = int(num[1:], 16)
else:
num = int(num)
return unichr(num)
def handle_misc(self, whatever):
self.write_pos()
handle_charref = handle_misc
handle_entityref = handle_misc
handle_data = handle_misc
handle_comment = handle_misc
handle_decl = handle_misc
handle_pi = handle_misc
unknown_decl = handle_misc
handle_endtag = handle_misc
def write_tag(self, tag, attrs, startend=False):
attr_text = ''.join([' %s="%s"' % (n, html_quote(v))
for (n, v) in attrs
if not n.startswith('form:')])
if startend:
attr_text += " /"
self.write_text('<%s%s>' % (tag, attr_text))
def skip_output(self):
return False
def write_pos(self):
cur_line, cur_offset = self.getpos()
if self.skip_output():
self.source_pos = self.getpos()
return
if self.skip_next:
self.skip_next = False
self.source_pos = self.getpos()
return
if cur_line == self.source_pos[0]:
self.write_text(
self.lines[cur_line-1][self.source_pos[1]:cur_offset])
else:
self.write_text(
self.lines[self.source_pos[0]-1][self.source_pos[1]:])
self.write_text('\n')
for i in range(self.source_pos[0]+1, cur_line):
self.write_text(self.lines[i-1])
self.write_text('\n')
self.write_text(self.lines[cur_line-1][:cur_offset])
self.source_pos = self.getpos()
def write_text(self, text):
self._content.append(text)
def get_attr(self, attr, name, default=None):
for n, value in attr:
if n.lower() == name:
return value
return default
def set_attr(self, attr, name, value):
for i in range(len(attr)):
if attr[i][0].lower() == name:
attr[i] = (name, value)
return
attr.append((name, value))
def del_attr(self, attr, name):
for i in range(len(attr)):
if attr[i][0].lower() == name:
del attr[i]
break
def add_class(self, attr, class_name):
current = self.get_attr(attr, 'class', '')
new = current + ' ' + class_name
self.set_attr(attr, 'class', new.strip())
def text(self):
try:
return self._text
except AttributeError:
raise Exception(
"You must .close() a parser instance before getting "
"the text from it")
def _get_text(self):
try:
return ''.join([
t for t in self._content if not isinstance(t, tuple)])
except UnicodeDecodeError, e:
if self.data_is_str:
e.reason += (
" the form was passed in as an encoded string, but"
" some data or error messages were unicode strings;"
" the form should be passed in as a unicode string")
else:
e.reason += (
" the form was passed in as an unicode string, but"
" some data or error message was an encoded string;"
" the data and error messages should be passed in as"
" unicode strings")
raise