mirror of
https://github.com/djohnlewis/stackdump
synced 2024-12-04 23:17:37 +00:00
Merge import-perf-improvements branch to default.
This commit is contained in:
commit
a597b2e588
@ -246,6 +246,11 @@ class Solr(object):
|
|||||||
Optionally accepts ``timeout`` for wait seconds until giving up on a
|
Optionally accepts ``timeout`` for wait seconds until giving up on a
|
||||||
request. Default is ``60`` seconds.
|
request. Default is ``60`` seconds.
|
||||||
|
|
||||||
|
Optionally accepts ``assume_clean`` to skip cleaning request of invalid XML
|
||||||
|
characters. This offers a slight performance improvement, but only set this
|
||||||
|
to ``True`` if you know your request is clean (e.g. coming from other XML
|
||||||
|
data). Bad things will happen otherwise. Default is ``False``.
|
||||||
|
|
||||||
Usage::
|
Usage::
|
||||||
|
|
||||||
solr = pysolr.Solr('http://localhost:8983/solr')
|
solr = pysolr.Solr('http://localhost:8983/solr')
|
||||||
@ -253,10 +258,11 @@ class Solr(object):
|
|||||||
solr = pysolr.Solr('http://localhost:8983/solr', timeout=10)
|
solr = pysolr.Solr('http://localhost:8983/solr', timeout=10)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, url, decoder=None, timeout=60):
|
def __init__(self, url, decoder=None, timeout=60, assume_clean=False):
|
||||||
self.decoder = decoder or json.JSONDecoder()
|
self.decoder = decoder or json.JSONDecoder()
|
||||||
self.url = url
|
self.url = url
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
|
self.assume_clean = assume_clean
|
||||||
self.log = self._get_log()
|
self.log = self._get_log()
|
||||||
self.session = requests.Session()
|
self.session = requests.Session()
|
||||||
self.session.stream = False
|
self.session.stream = False
|
||||||
@ -506,7 +512,10 @@ class Solr(object):
|
|||||||
|
|
||||||
value = "{0}".format(value)
|
value = "{0}".format(value)
|
||||||
|
|
||||||
return clean_xml_string(value)
|
if self.assume_clean:
|
||||||
|
return value
|
||||||
|
else:
|
||||||
|
return clean_xml_string(value)
|
||||||
|
|
||||||
def _to_python(self, value):
|
def _to_python(self, value):
|
||||||
"""
|
"""
|
||||||
|
@ -26,7 +26,7 @@ import html5lib
|
|||||||
from html5lib.filters._base import Filter as HTML5LibFilterBase
|
from html5lib.filters._base import Filter as HTML5LibFilterBase
|
||||||
import markdown
|
import markdown
|
||||||
|
|
||||||
from stackdump.models import Site, Badge, Comment, User
|
from stackdump.models import Site, Badge, User
|
||||||
from stackdump import settings
|
from stackdump import settings
|
||||||
|
|
||||||
# STATIC VARIABLES
|
# STATIC VARIABLES
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -19,6 +19,7 @@ SOLR_URL = 'http://localhost:8983/solr/stackdump/'
|
|||||||
|
|
||||||
import os
|
import os
|
||||||
DATABASE_CONN_STR = 'sqlite:///' + os.path.join(os.path.dirname(__file__), '..', '..', '..', 'data', 'stackdump.sqlite')
|
DATABASE_CONN_STR = 'sqlite:///' + os.path.join(os.path.dirname(__file__), '..', '..', '..', 'data', 'stackdump.sqlite')
|
||||||
|
TEMP_COMMENTS_DATABASE_DIR = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'data')
|
||||||
|
|
||||||
# if the website is hosted under a subpath, specify it here. It must end with a
|
# if the website is hosted under a subpath, specify it here. It must end with a
|
||||||
# slash.
|
# slash.
|
||||||
|
@ -5,6 +5,10 @@
|
|||||||
from sqlobject import SQLObject, UnicodeCol, DateTimeCol, IntCol, ForeignKey, \
|
from sqlobject import SQLObject, UnicodeCol, DateTimeCol, IntCol, ForeignKey, \
|
||||||
DatabaseIndex
|
DatabaseIndex
|
||||||
|
|
||||||
|
|
||||||
|
ISO_DATE_FORMAT = '%Y-%m-%dT%H:%M:%S.%f'
|
||||||
|
|
||||||
|
|
||||||
class Site(SQLObject):
|
class Site(SQLObject):
|
||||||
name = UnicodeCol()
|
name = UnicodeCol()
|
||||||
desc = UnicodeCol()
|
desc = UnicodeCol()
|
||||||
@ -15,34 +19,23 @@ class Site(SQLObject):
|
|||||||
|
|
||||||
siteKey_index = DatabaseIndex(key, unique=True)
|
siteKey_index = DatabaseIndex(key, unique=True)
|
||||||
|
|
||||||
|
|
||||||
class Badge(SQLObject):
|
class Badge(SQLObject):
|
||||||
sourceId = IntCol()
|
sourceId = IntCol()
|
||||||
site = ForeignKey('Site', cascade=True)
|
site = ForeignKey('Site', cascade=True)
|
||||||
userId = IntCol()
|
userId = IntCol()
|
||||||
name = UnicodeCol()
|
name = UnicodeCol()
|
||||||
date = DateTimeCol()
|
date = DateTimeCol(datetimeFormat=ISO_DATE_FORMAT)
|
||||||
|
|
||||||
class Comment(SQLObject):
|
|
||||||
sourceId = IntCol()
|
|
||||||
site = ForeignKey('Site', cascade=True)
|
|
||||||
postId = IntCol()
|
|
||||||
score = IntCol()
|
|
||||||
text = UnicodeCol()
|
|
||||||
creationDate = DateTimeCol()
|
|
||||||
userId = IntCol()
|
|
||||||
|
|
||||||
siteId_postId_index = DatabaseIndex(site, postId)
|
|
||||||
|
|
||||||
json_fields = [ 'id', 'score', 'text', 'creationDate', 'userId' ]
|
|
||||||
|
|
||||||
class User(SQLObject):
|
class User(SQLObject):
|
||||||
sourceId = IntCol()
|
sourceId = IntCol()
|
||||||
site = ForeignKey('Site', cascade=True)
|
site = ForeignKey('Site', cascade=True)
|
||||||
reputation = IntCol()
|
reputation = IntCol()
|
||||||
creationDate = DateTimeCol()
|
creationDate = DateTimeCol(datetimeFormat=ISO_DATE_FORMAT)
|
||||||
displayName = UnicodeCol()
|
displayName = UnicodeCol()
|
||||||
emailHash = UnicodeCol()
|
emailHash = UnicodeCol()
|
||||||
lastAccessDate = DateTimeCol()
|
lastAccessDate = DateTimeCol(datetimeFormat=ISO_DATE_FORMAT)
|
||||||
websiteUrl = UnicodeCol()
|
websiteUrl = UnicodeCol()
|
||||||
location = UnicodeCol()
|
location = UnicodeCol()
|
||||||
age = IntCol()
|
age = IntCol()
|
||||||
|
Loading…
Reference in New Issue
Block a user