1
0
mirror of https://github.com/djohnlewis/stackdump synced 2024-12-04 23:17:37 +00:00

Extracted models out of dataproc/insert.py so they can be reused elsewhere.

This commit is contained in:
Samuel Lai 2011-10-30 17:12:01 +11:00
parent 18850e5bb5
commit a83bed32b5
2 changed files with 47 additions and 41 deletions

View File

@ -10,9 +10,11 @@ import xml.sax
from datetime import datetime
import re
from sqlobject import *
from sqlobject import sqlhub, connectionForURI, AND, OR
from pysolr import Solr
from stackdump.models import Site, Badge, Comment, User
try:
# For Python < 2.6 or people using a newer version of simplejson
import simplejson as json
@ -22,45 +24,6 @@ except ImportError:
script_dir = os.path.dirname(sys.argv[0])
# MODELS
class Site(SQLObject):
name = UnicodeCol()
desc = UnicodeCol()
class Badge(SQLObject):
sourceId = IntCol()
site = ForeignKey('Site', cascade=True)
userId = IntCol()
name = UnicodeCol()
date = DateTimeCol()
class Comment(SQLObject):
sourceId = IntCol()
site = ForeignKey('Site', cascade=True)
postId = IntCol()
score = IntCol()
text = UnicodeCol()
creationDate = DateTimeCol()
userId = IntCol()
json_fields = [ 'id', 'score', 'text', 'creationDate', 'userId' ]
class User(SQLObject):
sourceId = IntCol()
site = ForeignKey('Site', cascade=True)
reputation = IntCol()
creationDate = DateTimeCol()
displayName = UnicodeCol()
emailHash = UnicodeCol()
lastAccessDate = DateTimeCol()
websiteUrl = UnicodeCol()
location = UnicodeCol()
age = IntCol()
aboutMe = UnicodeCol()
views = IntCol()
upVotes = IntCol()
downVotes = IntCol()
# SAX HANDLERS
ISO_DATE_FORMAT = '%Y-%m-%dT%H:%M:%S.%f'
@ -505,7 +468,7 @@ if not os.path.exists(xml_root):
print('The given XML root path does not exist.')
sys.exit(1)
db_path = os.path.abspath(os.path.join(script_dir, '../../data/stackdump.sqlite'))
db_path = os.path.abspath(os.path.join(script_dir, '../../../../data/stackdump.sqlite'))
# connect to the database
print('Connecting to the database...')

View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
# This file contains all the model definitions for the database.
from sqlobject import *
class Site(SQLObject):
name = UnicodeCol()
desc = UnicodeCol()
class Badge(SQLObject):
sourceId = IntCol()
site = ForeignKey('Site', cascade=True)
userId = IntCol()
name = UnicodeCol()
date = DateTimeCol()
class Comment(SQLObject):
sourceId = IntCol()
site = ForeignKey('Site', cascade=True)
postId = IntCol()
score = IntCol()
text = UnicodeCol()
creationDate = DateTimeCol()
userId = IntCol()
json_fields = [ 'id', 'score', 'text', 'creationDate', 'userId' ]
class User(SQLObject):
sourceId = IntCol()
site = ForeignKey('Site', cascade=True)
reputation = IntCol()
creationDate = DateTimeCol()
displayName = UnicodeCol()
emailHash = UnicodeCol()
lastAccessDate = DateTimeCol()
websiteUrl = UnicodeCol()
location = UnicodeCol()
age = IntCol()
aboutMe = UnicodeCol()
views = IntCol()
upVotes = IntCol()
downVotes = IntCol()