Initial commit. Still building up the env and some parsing code.

2026-02-12 17:03:30 +00:00 · 2011-09-11 14:29:39 +10:00
commit af2eafeccd
301 changed files with 82327 additions and 0 deletions
--- a/python/packages/sqlobject/util/init.py
+++ b/python/packages/sqlobject/util/init.py
@@ -0,0 +1 @@
+#
--- a/python/packages/sqlobject/util/csvexport.py
+++ b/python/packages/sqlobject/util/csvexport.py
@@ -0,0 +1,196 @@
+"""
+Exports a SQLObject class (possibly annotated) to a CSV file.
+"""
+import os
+import csv
+try:
+    from cStringIO import StringIO
+except ImportError:
+    from StringIO import StringIO
+import sqlobject
+
+__all__ = ['export_csv', 'export_csv_zip']
+
+def export_csv(soClass, select=None, writer=None, connection=None,
+               orderBy=None):
+    """
+    Export the SQLObject class ``soClass`` to a CSV file.
+
+    ``soClass`` can also be a SelectResult object, as returned by
+    ``.select()``.  If it is a class, all objects will be retrieved,
+    ordered by ``orderBy`` if given, or the ``.csvOrderBy`` attribute
+    if present (but csvOrderBy will only be applied when no select
+    result is given).
+
+    You can also pass in select results (or simply a list of
+    instances) in ``select`` -- if you have a list of objects (not a
+    SelectResult instance, as produced by ``.select()``) then you must
+    pass it in with ``select`` and pass the class in as the first
+    argument.
+
+    ``writer`` is a ``csv.writer()`` object, or a file-like object.
+    If not given, the string of the file will be returned.
+
+    Uses ``connection`` as the data source, if given, otherwise the
+    default connection.
+
+    Columns can be annotated with ``.csvTitle`` attributes, which will
+    form the attributes of the columns, or 'title' (secondarily), or
+    if nothing then the column attribute name.
+
+    If a column has a ``.noCSV`` attribute which is true, then the
+    column will be suppressed.
+
+    Additionally a class can have an ``.extraCSVColumns`` attribute,
+    which should be a list of strings/tuples.  If a tuple, it should
+    be like ``(attribute, title)``, otherwise it is the attribute,
+    which will also be the title.  These will be appended to the end
+    of the CSV file; the attribute will be retrieved from instances.
+
+    Also a ``.csvColumnOrder`` attribute can be on the class, which is
+    the string names of attributes in the order they should be
+    presented.
+    """
+
+    return_fileobj = None
+    if not writer:
+        return_fileobj = StringIO()
+        writer = csv.writer(return_fileobj)
+    elif not hasattr(writer, 'writerow'):
+        writer = csv.writer(writer)
+
+    if isinstance(soClass, sqlobject.SQLObject.SelectResultsClass):
+        assert select is None, (
+            "You cannot pass in a select argument (%r) and a SelectResult argument (%r) for soClass"
+            % (select, soClass))
+        select = soClass
+        soClass = select.sourceClass
+    elif select is None:
+        select = soClass.select()
+        if getattr(soClass, 'csvOrderBy', None):
+            select = select.orderBy(soClass.csvOrderBy)
+
+    if orderBy:
+        select = select.orderBy(orderBy)
+    if connection:
+        select = select.connection(connection)
+
+    _actually_export_csv(soClass, select, writer)
+
+    if return_fileobj:
+        # They didn't pass any writer or file object in, so we return
+        # the string result:
+        return return_fileobj.getvalue()
+
+def _actually_export_csv(soClass, select, writer):
+    attributes, titles = _find_columns(soClass)
+    writer.writerow(titles)
+    for soInstance in select:
+        row = [getattr(soInstance, attr)
+               for attr in attributes]
+        writer.writerow(row)
+
+def _find_columns(soClass):
+    order = []
+    attrs = {}
+    for col in soClass.sqlmeta.columnList:
+        if getattr(col, 'noCSV', False):
+            continue
+        order.append(col.name)
+        title = col.name
+        if hasattr(col, 'csvTitle'):
+            title = col.csvTitle
+        elif getattr(col, 'title', None) is not None:
+            title = col.title
+        attrs[col.name] = title
+
+    for attrDesc in getattr(soClass, 'extraCSVColumns', []):
+        if isinstance(attrDesc, (list, tuple)):
+            attr, title = attrDesc
+        else:
+            attr = title = attrDesc
+        order.append(attr)
+        attrs[attr] = title
+
+    if hasattr(soClass, 'csvColumnOrder'):
+        oldOrder = order
+        order = soClass.csvColumnOrder
+        for attr in order:
+            if attr not in oldOrder:
+                raise KeyError(
+                    "Attribute %r in csvColumnOrder (on class %r) does not exist as a column or in .extraCSVColumns (I have: %r)"
+                    % (attr, soClass, oldOrder))
+            oldOrder.remove(attr)
+        order.extend(oldOrder)
+
+    titles = [attrs[attr] for attr in order]
+    return order, titles
+
+
+def export_csv_zip(soClasses, file=None, zip=None, filename_prefix='',
+                   connection=None):
+    """
+    Export several SQLObject classes into a .zip file.  Each
+    item in the ``soClasses`` list may be a SQLObject class,
+    select result, or ``(soClass, select)`` tuple.
+
+    Each file in the zip will be named after the class name (with
+    ``.csv`` appended), or using the filename in the ``.csvFilename``
+    attribute.
+
+    If ``file`` is given, the zip will be written to that.  ``file``
+    may be a string (a filename) or a file-like object.  If not given,
+    a string will be returnd.
+
+    If ``zip`` is given, then the files will be written to that zip
+    file.
+
+    All filenames will be prefixed with ``filename_prefix`` (which may
+    be a directory name, for instance).
+    """
+    import zipfile
+    close_file_when_finished = False
+    close_zip_when_finished = True
+    return_when_finished = False
+    if file:
+        if isinstance(file, basestring):
+            close_when_finished = True
+            file = open(file, 'wb')
+    elif zip:
+        close_zip_when_finished = False
+    else:
+        return_when_finished = True
+        file = StringIO()
+
+    if not zip:
+        zip = zipfile.ZipFile(file, mode='w')
+
+    try:
+        _actually_export_classes(soClasses, zip, filename_prefix,
+                                 connection)
+    finally:
+        if close_zip_when_finished:
+            zip.close()
+        if close_file_when_finished:
+            file.close()
+
+    if return_when_finished:
+        return file.getvalue()
+
+def _actually_export_classes(soClasses, zip, filename_prefix,
+                             connection):
+    for classDesc in soClasses:
+        if isinstance(classDesc, (tuple, list)):
+            soClass, select = classDesc
+        elif isinstance(classDesc, sqlobject.SQLObject.SelectResultsClass):
+            select = classDesc
+            soClass = select.sourceClass
+        else:
+            soClass = classDesc
+            select = None
+        filename = getattr(soClass, 'csvFilename', soClass.__name__)
+        if not os.path.splitext(filename)[1]:
+            filename += '.csv'
+        filename = filename_prefix + filename
+        zip.writestr(filename,
+                     export_csv(soClass, select, connection=connection))
--- a/python/packages/sqlobject/util/csvimport.py
+++ b/python/packages/sqlobject/util/csvimport.py
@@ -0,0 +1,349 @@
+"""
+Import from a CSV file or directory of files.
+
+CSV files should have a header line that lists columns.  Headers can
+also be appended with ``:type`` to indicate the type of the field.
+``escaped`` is the default, though it can be overridden by the importer.
+Supported types:
+
+``:python``:
+    A python expression, run through ``eval()``.  This can be a
+    security risk, pass in ``allow_python=False`` if you don't want to
+    allow it.
+    
+``:int``:
+    Integer
+
+``:float``:
+    Float
+
+``:str``:
+    String
+
+``:escaped``:
+    A string with backslash escapes (note that you don't put quotation
+    marks around the value)
+
+``:base64``:
+    A base64-encoded string
+
+``:date``:
+    ISO date, like YYYY-MM-DD; this can also be ``NOW+days`` or
+    ``NOW-days``
+
+``:datetime``:
+    ISO date/time like YYYY-MM-DDTHH:MM:SS (either T or a space can be
+    used to separate the time, and seconds are optional).  This can
+    also be ``NOW+seconds`` or ``NOW-seconds``
+
+``:bool``:
+    Converts true/false/yes/no/on/off/1/0 to boolean value
+
+``:ref``:
+    This will be resolved to the ID of the object named in this column
+    (None if the column is empty).  @@: Since there's no ordering,
+    there's no way to promise the object already exists.
+
+You can also get back references to the objects if you have a special
+``[name]`` column.
+
+Any column named ``[comment]`` or with no name will be ignored.
+
+In any column you can put ``[default]`` to exclude the value and use
+whatever default the class wants.  ``[null]`` will use NULL.
+
+Lines that begin with ``[comment]`` are ignored.
+"""
+
+from datetime import datetime, date, timedelta
+import os
+import csv
+import types
+
+__all__ = ['load_csv_from_directory',
+           'load_csv',
+           'create_data']
+
+DEFAULT_TYPE = 'escaped'
+
+def create_data(data, class_getter, keyorder=None):
+    """
+    Create the ``data``, which is the return value from
+    ``load_csv()``.  Classes will be resolved with the callable
+    ``class_getter``; or if ``class_getter`` is a module then the
+    class names will be attributes of that.
+
+    Returns a dictionary of ``{object_name: object(s)}``, using the
+    names from the ``[name]`` columns (if there are any).  If a name
+    is used multiple times, you get a list of objects, not a single
+    object.
+
+    If ``keyorder`` is given, then the keys will be retrieved in that
+    order.  It can be a list/tuple of names, or a sorting function.
+    If not given and ``class_getter`` is a module and has a
+    ``soClasses`` function, then that will be used for the order.
+    """
+    objects = {}
+    classnames = data.keys()
+    if (not keyorder and isinstance(class_getter, types.ModuleType)
+        and hasattr(class_getter, 'soClasses')):
+        keyorder = [c.__name__ for c in class_getter.soClasses]
+    if not keyorder:
+        classnames.sort()
+    elif isinstance(keyorder, (list, tuple)):
+        all = classnames
+        classnames = [name for name in keyorder if name in classnames]
+        for name in all:
+            if name not in classnames:
+                classnames.append(name)
+    else:
+        classnames.sort(keyorder)
+    for classname in classnames:
+        items = data[classname]
+        if not items:
+            continue
+        if isinstance(class_getter, types.ModuleType):
+            soClass = getattr(class_getter, classname)
+        else:
+            soClass = class_getter(classname)
+        for item in items:
+            for key, value in item.items():
+                if isinstance(value, Reference):
+                    resolved = objects.get(value.name)
+                    if not resolved:
+                        raise ValueError(
+                            "Object reference to %r does not have target"
+                            % value.name)
+                    elif (isinstance(resolved, list)
+                          and len(resolved) > 1):
+                        raise ValueError(
+                            "Object reference to %r is ambiguous (got %r)"
+                            % (value.name, resolved))
+                    item[key] = resolved.id
+            if '[name]' in item:
+                name = item.pop('[name]').strip()
+            else:
+                name = None
+            inst = soClass(**item)
+            if name:
+                if name in objects:
+                    if isinstance(objects[name], list):
+                        objects[name].append(inst)
+                    else:
+                        objects[name] = [objects[name], inst]
+                else:
+                    objects[name] = inst
+    return objects
+                
+
+def load_csv_from_directory(directory,
+                            allow_python=True, default_type=DEFAULT_TYPE,
+                            allow_multiple_classes=True):
+    """
+    Load the data from all the files in a directory.  Filenames
+    indicate the class, with ``general.csv`` for data not associated
+    with a class.  Return data just like ``load_csv`` does.
+
+    This might cause problems on case-insensitive filesystems.
+    """
+    results = {}
+    for filename in os.listdir(directory):
+        base, ext = os.path.splitext(filename)
+        if ext.lower() != '.csv':
+            continue
+        f = open(os.path.join(directory, filename), 'rb')
+        csvreader = csv.reader(f)
+        data = load_csv(csvreader, allow_python=allow_python,
+                        default_type=default_type,
+                        default_class=base,
+                        allow_multiple_classes=allow_multiple_classes)
+        f.close()
+        for classname, items in data.items():
+            results.setdefault(classname, []).extend(items)
+    return results
+
+def load_csv(csvreader, allow_python=True, default_type=DEFAULT_TYPE,
+             default_class=None, allow_multiple_classes=True):
+    """
+    Loads the CSV file, returning a list of dictionaries with types
+    coerced.
+    """
+    current_class = default_class
+    current_headers = None
+    results = {}
+    
+    for row in csvreader:
+        if not [cell for cell in row if cell.strip()]:
+            # empty row
+            continue
+
+        if row and row[0].strip() == 'CLASS:':
+            if not allow_multiple_classes:
+                raise ValueError(
+                    "CLASS: line in CSV file, but multiple classes are not allowed in this file (line: %r)"
+                    % row)
+            if not row[1:]:
+                raise ValueError(
+                    "CLASS: in line in CSV file, with no class name in next column (line: %r)"
+                    % row)
+            current_class = row[1]
+            current_headers = None
+            continue
+
+        if not current_class:
+            raise ValueError(
+                "No CLASS: line given, and there is no default class for this file (line: %r"
+                % row)
+
+        if current_headers is None:
+            current_headers = _parse_headers(row, default_type)
+            continue
+
+        if row[0] == '[comment]':
+            continue
+
+        # Pad row with empty strings:
+        row += ['']*(len(current_headers) - len(row))
+        row_converted = {}
+        for value, (name, coercer, args) in zip(row, current_headers):
+            if name is None:
+                # Comment
+                continue
+            if value == '[default]':
+                continue
+            if value == '[null]':
+                row_converted[name] = None
+                continue
+            args = (value,) + args
+            row_converted[name] = coercer(*args)
+
+        results.setdefault(current_class, []).append(row_converted)
+
+    return results
+
+def _parse_headers(header_row, default_type):
+    headers = []
+    for name in header_row:
+        original_name = name
+        if ':' in name:
+            name, type = name.split(':', 1)
+        else:
+            type = default_type
+        if type == 'python' and not allow_python:
+            raise ValueError(
+                ":python header given when python headers are not allowed (with header %r"
+                % original_name)
+        name = name.strip()
+        if name == '[comment]' or not name:
+            headers.append((None, None, None))
+            continue
+        type = type.strip().lower()
+        if '(' in type:
+            type, arg = type.split('(', 1)
+            if not arg.endswith(')'):
+                raise ValueError(
+                    "Arguments (in ()'s) do not end with ): %r"
+                    % original_name)
+            args = (arg[:-1],)
+        else:
+            args = ()
+        if name == '[name]':
+            type = 'str'
+        coercer, args = get_coercer(type)
+        headers.append((name, coercer, args))
+    return headers
+
+_coercers = {}
+def get_coercer(type):
+    if type not in _coercers:
+        raise ValueError(
+            "Coercion type %r not known (I know: %s)"
+            % (type, ', '.join(_coercers.keys())))
+    return _coercers[type]
+
+def register_coercer(type, coercer, *args):
+    _coercers[type] = (coercer, args)
+
+def identity(v):
+    return v
+
+register_coercer('str', identity)
+register_coercer('string', identity)
+
+def decode_string(v, encoding):
+    return v.decode(encoding)
+
+register_coercer('escaped', decode_string, 'string_escape')
+register_coercer('strescaped', decode_string, 'string_escape')
+register_coercer('base64', decode_string, 'base64')
+
+register_coercer('int', int)
+register_coercer('float', float)
+
+def parse_python(v):
+    return eval(v, {}, {})
+
+register_coercer('python', parse_python)
+
+def parse_date(v):
+    v = v.strip()
+    if not v:
+        return None
+    if v.startswith('NOW-') or v.startswith('NOW+'):
+        days = int(v[3:])
+        now = date.today()
+        return now+timedelta(days)
+    else:
+        parsed = time.strptime(v, '%Y-%m-%d')
+        return date.fromtimestamp(time.mktime(parsed))
+
+register_coercer('date', parse_date)
+
+def parse_datetime(v):
+    v = v.strip()
+    if not v:
+        return None
+    if v.startswith('NOW-') or v.startswith('NOW+'):
+        seconds = int(v[3:])
+        now = datetime.now()
+        return now+timedelta(0, seconds)
+    else:
+        fmts = ['%Y-%m-%dT%H:%M:%S',
+                '%Y-%m-%d %H:%M:%S',
+                '%Y-%m-%dT%H:%M',
+                '%Y-%m-%d %H:%M']
+        for fmt in fmts[:-1]:
+            try:
+                parsed = time.strptime(v, fmt)
+                break
+            except ValueError:
+                pass
+        else:
+            parsed = time.strptime(v, fmts[-1])
+        return datetime.fromtimestamp(time.mktime(parsed))
+    
+register_coercer('datetime', parse_datetime)
+
+class Reference(object):
+    def __init__(self, name):
+        self.name = name
+
+def parse_ref(v):
+    if not v.strip():
+        return None
+    else:
+        return Reference(v)
+
+register_coercer('ref', parse_ref)
+
+def parse_bool(v):
+    v = v.strip().lower()
+    if v in ('y', 'yes', 't', 'true', 'on', '1'):
+        return True
+    elif v in ('n', 'no', 'f', 'false', 'off', '0'):
+        return False
+    raise ValueError(
+        "Value is not boolean-like: %r" % value)
+
+register_coercer('bool', parse_bool)
+register_coercer('boolean', parse_bool)
--- a/python/packages/sqlobject/util/moduleloader.py
+++ b/python/packages/sqlobject/util/moduleloader.py
@@ -0,0 +1,42 @@
+import sys
+import imp
+
+def load_module(module_name):
+    mod = __import__(module_name)
+    components = module_name.split('.')
+    for comp in components[1:]:
+        mod = getattr(mod, comp)
+    return mod
+
+def load_module_from_name(filename, module_name):
+    if module_name in sys.modules:
+        return sys.modules[module_name]
+    init_filename = os.path.join(os.path.dirname(filename), '__init__.py')
+    if not os.path.exists(init_filename):
+        try:
+            f = open(init_filename, 'w')
+        except (OSError, IOError), e:
+            raise IOError(
+                'Cannot write __init__.py file into directory %s (%s)\n'
+                % (os.path.dirname(filename), e))
+        f.write('#\n')
+        f.close()
+    fp = None
+    if module_name in sys.modules:
+        return sys.modules[module_name]
+    if '.' in module_name:
+        parent_name = '.'.join(module_name.split('.')[:-1])
+        base_name = module_name.split('.')[-1]
+        parent = load_module_from_name(os.path.dirname(filename),
+                                       parent_name)
+    else:
+        base_name = module_name
+    fp = None
+    try:
+        fp, pathname, stuff = imp.find_module(
+            base_name, [os.path.dirname(filename)])
+        module = imp.load_module(module_name, fp, pathname, stuff)
+    finally:
+        if fp is not None:
+            fp.close()
+    return module
--- a/python/packages/sqlobject/util/threadinglocal.py
+++ b/python/packages/sqlobject/util/threadinglocal.py
@@ -0,0 +1,6 @@
+try:
+    from threading import local
+except ImportError:
+    # No threads, so "thread local" means process-global
+    class local(object):
+        pass