mirror of
https://github.com/djohnlewis/stackdump
synced 2024-12-04 23:17:37 +00:00
Extracted models out of dataproc/insert.py so they can be reused elsewhere.
This commit is contained in:
parent
18850e5bb5
commit
a83bed32b5
@ -10,9 +10,11 @@ import xml.sax
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
from sqlobject import *
|
||||
from sqlobject import sqlhub, connectionForURI, AND, OR
|
||||
from pysolr import Solr
|
||||
|
||||
from stackdump.models import Site, Badge, Comment, User
|
||||
|
||||
try:
|
||||
# For Python < 2.6 or people using a newer version of simplejson
|
||||
import simplejson as json
|
||||
@ -22,45 +24,6 @@ except ImportError:
|
||||
|
||||
script_dir = os.path.dirname(sys.argv[0])
|
||||
|
||||
# MODELS
|
||||
class Site(SQLObject):
|
||||
name = UnicodeCol()
|
||||
desc = UnicodeCol()
|
||||
|
||||
class Badge(SQLObject):
|
||||
sourceId = IntCol()
|
||||
site = ForeignKey('Site', cascade=True)
|
||||
userId = IntCol()
|
||||
name = UnicodeCol()
|
||||
date = DateTimeCol()
|
||||
|
||||
class Comment(SQLObject):
|
||||
sourceId = IntCol()
|
||||
site = ForeignKey('Site', cascade=True)
|
||||
postId = IntCol()
|
||||
score = IntCol()
|
||||
text = UnicodeCol()
|
||||
creationDate = DateTimeCol()
|
||||
userId = IntCol()
|
||||
|
||||
json_fields = [ 'id', 'score', 'text', 'creationDate', 'userId' ]
|
||||
|
||||
class User(SQLObject):
|
||||
sourceId = IntCol()
|
||||
site = ForeignKey('Site', cascade=True)
|
||||
reputation = IntCol()
|
||||
creationDate = DateTimeCol()
|
||||
displayName = UnicodeCol()
|
||||
emailHash = UnicodeCol()
|
||||
lastAccessDate = DateTimeCol()
|
||||
websiteUrl = UnicodeCol()
|
||||
location = UnicodeCol()
|
||||
age = IntCol()
|
||||
aboutMe = UnicodeCol()
|
||||
views = IntCol()
|
||||
upVotes = IntCol()
|
||||
downVotes = IntCol()
|
||||
|
||||
# SAX HANDLERS
|
||||
ISO_DATE_FORMAT = '%Y-%m-%dT%H:%M:%S.%f'
|
||||
|
||||
@ -505,7 +468,7 @@ if not os.path.exists(xml_root):
|
||||
print('The given XML root path does not exist.')
|
||||
sys.exit(1)
|
||||
|
||||
db_path = os.path.abspath(os.path.join(script_dir, '../../data/stackdump.sqlite'))
|
||||
db_path = os.path.abspath(os.path.join(script_dir, '../../../../data/stackdump.sqlite'))
|
||||
|
||||
# connect to the database
|
||||
print('Connecting to the database...')
|
||||
|
43
python/src/stackdump/models.py
Normal file
43
python/src/stackdump/models.py
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This file contains all the model definitions for the database.
|
||||
|
||||
from sqlobject import *
|
||||
|
||||
class Site(SQLObject):
|
||||
name = UnicodeCol()
|
||||
desc = UnicodeCol()
|
||||
|
||||
class Badge(SQLObject):
|
||||
sourceId = IntCol()
|
||||
site = ForeignKey('Site', cascade=True)
|
||||
userId = IntCol()
|
||||
name = UnicodeCol()
|
||||
date = DateTimeCol()
|
||||
|
||||
class Comment(SQLObject):
|
||||
sourceId = IntCol()
|
||||
site = ForeignKey('Site', cascade=True)
|
||||
postId = IntCol()
|
||||
score = IntCol()
|
||||
text = UnicodeCol()
|
||||
creationDate = DateTimeCol()
|
||||
userId = IntCol()
|
||||
|
||||
json_fields = [ 'id', 'score', 'text', 'creationDate', 'userId' ]
|
||||
|
||||
class User(SQLObject):
|
||||
sourceId = IntCol()
|
||||
site = ForeignKey('Site', cascade=True)
|
||||
reputation = IntCol()
|
||||
creationDate = DateTimeCol()
|
||||
displayName = UnicodeCol()
|
||||
emailHash = UnicodeCol()
|
||||
lastAccessDate = DateTimeCol()
|
||||
websiteUrl = UnicodeCol()
|
||||
location = UnicodeCol()
|
||||
age = IntCol()
|
||||
aboutMe = UnicodeCol()
|
||||
views = IntCol()
|
||||
upVotes = IntCol()
|
||||
downVotes = IntCol()
|
Loading…
Reference in New Issue
Block a user