1
0
mirror of https://github.com/djohnlewis/stackdump synced 2025-12-07 00:13:33 +00:00

Initial commit. Still building up the env and some parsing code.

This commit is contained in:
Samuel Lai
2011-09-11 14:29:39 +10:00
commit af2eafeccd
301 changed files with 82327 additions and 0 deletions

View File

@@ -0,0 +1 @@
#

View File

@@ -0,0 +1,196 @@
"""
Exports a SQLObject class (possibly annotated) to a CSV file.
"""
import os
import csv
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
import sqlobject
__all__ = ['export_csv', 'export_csv_zip']
def export_csv(soClass, select=None, writer=None, connection=None,
orderBy=None):
"""
Export the SQLObject class ``soClass`` to a CSV file.
``soClass`` can also be a SelectResult object, as returned by
``.select()``. If it is a class, all objects will be retrieved,
ordered by ``orderBy`` if given, or the ``.csvOrderBy`` attribute
if present (but csvOrderBy will only be applied when no select
result is given).
You can also pass in select results (or simply a list of
instances) in ``select`` -- if you have a list of objects (not a
SelectResult instance, as produced by ``.select()``) then you must
pass it in with ``select`` and pass the class in as the first
argument.
``writer`` is a ``csv.writer()`` object, or a file-like object.
If not given, the string of the file will be returned.
Uses ``connection`` as the data source, if given, otherwise the
default connection.
Columns can be annotated with ``.csvTitle`` attributes, which will
form the attributes of the columns, or 'title' (secondarily), or
if nothing then the column attribute name.
If a column has a ``.noCSV`` attribute which is true, then the
column will be suppressed.
Additionally a class can have an ``.extraCSVColumns`` attribute,
which should be a list of strings/tuples. If a tuple, it should
be like ``(attribute, title)``, otherwise it is the attribute,
which will also be the title. These will be appended to the end
of the CSV file; the attribute will be retrieved from instances.
Also a ``.csvColumnOrder`` attribute can be on the class, which is
the string names of attributes in the order they should be
presented.
"""
return_fileobj = None
if not writer:
return_fileobj = StringIO()
writer = csv.writer(return_fileobj)
elif not hasattr(writer, 'writerow'):
writer = csv.writer(writer)
if isinstance(soClass, sqlobject.SQLObject.SelectResultsClass):
assert select is None, (
"You cannot pass in a select argument (%r) and a SelectResult argument (%r) for soClass"
% (select, soClass))
select = soClass
soClass = select.sourceClass
elif select is None:
select = soClass.select()
if getattr(soClass, 'csvOrderBy', None):
select = select.orderBy(soClass.csvOrderBy)
if orderBy:
select = select.orderBy(orderBy)
if connection:
select = select.connection(connection)
_actually_export_csv(soClass, select, writer)
if return_fileobj:
# They didn't pass any writer or file object in, so we return
# the string result:
return return_fileobj.getvalue()
def _actually_export_csv(soClass, select, writer):
attributes, titles = _find_columns(soClass)
writer.writerow(titles)
for soInstance in select:
row = [getattr(soInstance, attr)
for attr in attributes]
writer.writerow(row)
def _find_columns(soClass):
order = []
attrs = {}
for col in soClass.sqlmeta.columnList:
if getattr(col, 'noCSV', False):
continue
order.append(col.name)
title = col.name
if hasattr(col, 'csvTitle'):
title = col.csvTitle
elif getattr(col, 'title', None) is not None:
title = col.title
attrs[col.name] = title
for attrDesc in getattr(soClass, 'extraCSVColumns', []):
if isinstance(attrDesc, (list, tuple)):
attr, title = attrDesc
else:
attr = title = attrDesc
order.append(attr)
attrs[attr] = title
if hasattr(soClass, 'csvColumnOrder'):
oldOrder = order
order = soClass.csvColumnOrder
for attr in order:
if attr not in oldOrder:
raise KeyError(
"Attribute %r in csvColumnOrder (on class %r) does not exist as a column or in .extraCSVColumns (I have: %r)"
% (attr, soClass, oldOrder))
oldOrder.remove(attr)
order.extend(oldOrder)
titles = [attrs[attr] for attr in order]
return order, titles
def export_csv_zip(soClasses, file=None, zip=None, filename_prefix='',
connection=None):
"""
Export several SQLObject classes into a .zip file. Each
item in the ``soClasses`` list may be a SQLObject class,
select result, or ``(soClass, select)`` tuple.
Each file in the zip will be named after the class name (with
``.csv`` appended), or using the filename in the ``.csvFilename``
attribute.
If ``file`` is given, the zip will be written to that. ``file``
may be a string (a filename) or a file-like object. If not given,
a string will be returnd.
If ``zip`` is given, then the files will be written to that zip
file.
All filenames will be prefixed with ``filename_prefix`` (which may
be a directory name, for instance).
"""
import zipfile
close_file_when_finished = False
close_zip_when_finished = True
return_when_finished = False
if file:
if isinstance(file, basestring):
close_when_finished = True
file = open(file, 'wb')
elif zip:
close_zip_when_finished = False
else:
return_when_finished = True
file = StringIO()
if not zip:
zip = zipfile.ZipFile(file, mode='w')
try:
_actually_export_classes(soClasses, zip, filename_prefix,
connection)
finally:
if close_zip_when_finished:
zip.close()
if close_file_when_finished:
file.close()
if return_when_finished:
return file.getvalue()
def _actually_export_classes(soClasses, zip, filename_prefix,
connection):
for classDesc in soClasses:
if isinstance(classDesc, (tuple, list)):
soClass, select = classDesc
elif isinstance(classDesc, sqlobject.SQLObject.SelectResultsClass):
select = classDesc
soClass = select.sourceClass
else:
soClass = classDesc
select = None
filename = getattr(soClass, 'csvFilename', soClass.__name__)
if not os.path.splitext(filename)[1]:
filename += '.csv'
filename = filename_prefix + filename
zip.writestr(filename,
export_csv(soClass, select, connection=connection))

View File

@@ -0,0 +1,349 @@
"""
Import from a CSV file or directory of files.
CSV files should have a header line that lists columns. Headers can
also be appended with ``:type`` to indicate the type of the field.
``escaped`` is the default, though it can be overridden by the importer.
Supported types:
``:python``:
A python expression, run through ``eval()``. This can be a
security risk, pass in ``allow_python=False`` if you don't want to
allow it.
``:int``:
Integer
``:float``:
Float
``:str``:
String
``:escaped``:
A string with backslash escapes (note that you don't put quotation
marks around the value)
``:base64``:
A base64-encoded string
``:date``:
ISO date, like YYYY-MM-DD; this can also be ``NOW+days`` or
``NOW-days``
``:datetime``:
ISO date/time like YYYY-MM-DDTHH:MM:SS (either T or a space can be
used to separate the time, and seconds are optional). This can
also be ``NOW+seconds`` or ``NOW-seconds``
``:bool``:
Converts true/false/yes/no/on/off/1/0 to boolean value
``:ref``:
This will be resolved to the ID of the object named in this column
(None if the column is empty). @@: Since there's no ordering,
there's no way to promise the object already exists.
You can also get back references to the objects if you have a special
``[name]`` column.
Any column named ``[comment]`` or with no name will be ignored.
In any column you can put ``[default]`` to exclude the value and use
whatever default the class wants. ``[null]`` will use NULL.
Lines that begin with ``[comment]`` are ignored.
"""
from datetime import datetime, date, timedelta
import os
import csv
import types
__all__ = ['load_csv_from_directory',
'load_csv',
'create_data']
DEFAULT_TYPE = 'escaped'
def create_data(data, class_getter, keyorder=None):
"""
Create the ``data``, which is the return value from
``load_csv()``. Classes will be resolved with the callable
``class_getter``; or if ``class_getter`` is a module then the
class names will be attributes of that.
Returns a dictionary of ``{object_name: object(s)}``, using the
names from the ``[name]`` columns (if there are any). If a name
is used multiple times, you get a list of objects, not a single
object.
If ``keyorder`` is given, then the keys will be retrieved in that
order. It can be a list/tuple of names, or a sorting function.
If not given and ``class_getter`` is a module and has a
``soClasses`` function, then that will be used for the order.
"""
objects = {}
classnames = data.keys()
if (not keyorder and isinstance(class_getter, types.ModuleType)
and hasattr(class_getter, 'soClasses')):
keyorder = [c.__name__ for c in class_getter.soClasses]
if not keyorder:
classnames.sort()
elif isinstance(keyorder, (list, tuple)):
all = classnames
classnames = [name for name in keyorder if name in classnames]
for name in all:
if name not in classnames:
classnames.append(name)
else:
classnames.sort(keyorder)
for classname in classnames:
items = data[classname]
if not items:
continue
if isinstance(class_getter, types.ModuleType):
soClass = getattr(class_getter, classname)
else:
soClass = class_getter(classname)
for item in items:
for key, value in item.items():
if isinstance(value, Reference):
resolved = objects.get(value.name)
if not resolved:
raise ValueError(
"Object reference to %r does not have target"
% value.name)
elif (isinstance(resolved, list)
and len(resolved) > 1):
raise ValueError(
"Object reference to %r is ambiguous (got %r)"
% (value.name, resolved))
item[key] = resolved.id
if '[name]' in item:
name = item.pop('[name]').strip()
else:
name = None
inst = soClass(**item)
if name:
if name in objects:
if isinstance(objects[name], list):
objects[name].append(inst)
else:
objects[name] = [objects[name], inst]
else:
objects[name] = inst
return objects
def load_csv_from_directory(directory,
allow_python=True, default_type=DEFAULT_TYPE,
allow_multiple_classes=True):
"""
Load the data from all the files in a directory. Filenames
indicate the class, with ``general.csv`` for data not associated
with a class. Return data just like ``load_csv`` does.
This might cause problems on case-insensitive filesystems.
"""
results = {}
for filename in os.listdir(directory):
base, ext = os.path.splitext(filename)
if ext.lower() != '.csv':
continue
f = open(os.path.join(directory, filename), 'rb')
csvreader = csv.reader(f)
data = load_csv(csvreader, allow_python=allow_python,
default_type=default_type,
default_class=base,
allow_multiple_classes=allow_multiple_classes)
f.close()
for classname, items in data.items():
results.setdefault(classname, []).extend(items)
return results
def load_csv(csvreader, allow_python=True, default_type=DEFAULT_TYPE,
default_class=None, allow_multiple_classes=True):
"""
Loads the CSV file, returning a list of dictionaries with types
coerced.
"""
current_class = default_class
current_headers = None
results = {}
for row in csvreader:
if not [cell for cell in row if cell.strip()]:
# empty row
continue
if row and row[0].strip() == 'CLASS:':
if not allow_multiple_classes:
raise ValueError(
"CLASS: line in CSV file, but multiple classes are not allowed in this file (line: %r)"
% row)
if not row[1:]:
raise ValueError(
"CLASS: in line in CSV file, with no class name in next column (line: %r)"
% row)
current_class = row[1]
current_headers = None
continue
if not current_class:
raise ValueError(
"No CLASS: line given, and there is no default class for this file (line: %r"
% row)
if current_headers is None:
current_headers = _parse_headers(row, default_type)
continue
if row[0] == '[comment]':
continue
# Pad row with empty strings:
row += ['']*(len(current_headers) - len(row))
row_converted = {}
for value, (name, coercer, args) in zip(row, current_headers):
if name is None:
# Comment
continue
if value == '[default]':
continue
if value == '[null]':
row_converted[name] = None
continue
args = (value,) + args
row_converted[name] = coercer(*args)
results.setdefault(current_class, []).append(row_converted)
return results
def _parse_headers(header_row, default_type):
headers = []
for name in header_row:
original_name = name
if ':' in name:
name, type = name.split(':', 1)
else:
type = default_type
if type == 'python' and not allow_python:
raise ValueError(
":python header given when python headers are not allowed (with header %r"
% original_name)
name = name.strip()
if name == '[comment]' or not name:
headers.append((None, None, None))
continue
type = type.strip().lower()
if '(' in type:
type, arg = type.split('(', 1)
if not arg.endswith(')'):
raise ValueError(
"Arguments (in ()'s) do not end with ): %r"
% original_name)
args = (arg[:-1],)
else:
args = ()
if name == '[name]':
type = 'str'
coercer, args = get_coercer(type)
headers.append((name, coercer, args))
return headers
_coercers = {}
def get_coercer(type):
if type not in _coercers:
raise ValueError(
"Coercion type %r not known (I know: %s)"
% (type, ', '.join(_coercers.keys())))
return _coercers[type]
def register_coercer(type, coercer, *args):
_coercers[type] = (coercer, args)
def identity(v):
return v
register_coercer('str', identity)
register_coercer('string', identity)
def decode_string(v, encoding):
return v.decode(encoding)
register_coercer('escaped', decode_string, 'string_escape')
register_coercer('strescaped', decode_string, 'string_escape')
register_coercer('base64', decode_string, 'base64')
register_coercer('int', int)
register_coercer('float', float)
def parse_python(v):
return eval(v, {}, {})
register_coercer('python', parse_python)
def parse_date(v):
v = v.strip()
if not v:
return None
if v.startswith('NOW-') or v.startswith('NOW+'):
days = int(v[3:])
now = date.today()
return now+timedelta(days)
else:
parsed = time.strptime(v, '%Y-%m-%d')
return date.fromtimestamp(time.mktime(parsed))
register_coercer('date', parse_date)
def parse_datetime(v):
v = v.strip()
if not v:
return None
if v.startswith('NOW-') or v.startswith('NOW+'):
seconds = int(v[3:])
now = datetime.now()
return now+timedelta(0, seconds)
else:
fmts = ['%Y-%m-%dT%H:%M:%S',
'%Y-%m-%d %H:%M:%S',
'%Y-%m-%dT%H:%M',
'%Y-%m-%d %H:%M']
for fmt in fmts[:-1]:
try:
parsed = time.strptime(v, fmt)
break
except ValueError:
pass
else:
parsed = time.strptime(v, fmts[-1])
return datetime.fromtimestamp(time.mktime(parsed))
register_coercer('datetime', parse_datetime)
class Reference(object):
def __init__(self, name):
self.name = name
def parse_ref(v):
if not v.strip():
return None
else:
return Reference(v)
register_coercer('ref', parse_ref)
def parse_bool(v):
v = v.strip().lower()
if v in ('y', 'yes', 't', 'true', 'on', '1'):
return True
elif v in ('n', 'no', 'f', 'false', 'off', '0'):
return False
raise ValueError(
"Value is not boolean-like: %r" % value)
register_coercer('bool', parse_bool)
register_coercer('boolean', parse_bool)

View File

@@ -0,0 +1,42 @@
import sys
import imp
def load_module(module_name):
mod = __import__(module_name)
components = module_name.split('.')
for comp in components[1:]:
mod = getattr(mod, comp)
return mod
def load_module_from_name(filename, module_name):
if module_name in sys.modules:
return sys.modules[module_name]
init_filename = os.path.join(os.path.dirname(filename), '__init__.py')
if not os.path.exists(init_filename):
try:
f = open(init_filename, 'w')
except (OSError, IOError), e:
raise IOError(
'Cannot write __init__.py file into directory %s (%s)\n'
% (os.path.dirname(filename), e))
f.write('#\n')
f.close()
fp = None
if module_name in sys.modules:
return sys.modules[module_name]
if '.' in module_name:
parent_name = '.'.join(module_name.split('.')[:-1])
base_name = module_name.split('.')[-1]
parent = load_module_from_name(os.path.dirname(filename),
parent_name)
else:
base_name = module_name
fp = None
try:
fp, pathname, stuff = imp.find_module(
base_name, [os.path.dirname(filename)])
module = imp.load_module(module_name, fp, pathname, stuff)
finally:
if fp is not None:
fp.close()
return module

View File

@@ -0,0 +1,6 @@
try:
from threading import local
except ImportError:
# No threads, so "thread local" means process-global
class local(object):
pass