1
0
mirror of https://github.com/djohnlewis/stackdump synced 2025-04-04 16:53:27 +00:00

Added a 'no data' page with instructions on how to import data into Stackdump.

This commit is contained in:
Samuel Lai 2012-02-12 13:56:37 +11:00
parent adccd41724
commit db3bf11310
3 changed files with 146 additions and 6 deletions
python
media/css
src/stackdump

@ -318,3 +318,30 @@ h1.answers {
background-color: #CCCCCC;
padding: 7px 10px;
}
.nodata.row {
margin-top: 50px;
margin-bottom: 20px;
}
.nodata h2 {
margin-top: 25px;
margin-bottom: 7px;
}
.nodata li {
color: inherit;
border-bottom: 1px solid #F2F2F2;
padding-bottom: 7px;
margin-bottom: 7px;
}
.nodata li:last-of-type {
border-bottom: none;
padding-bottom: inherit;
margin-bottom: inherit;
}
.nodata pre {
margin-top: 7px;
}

@ -13,9 +13,10 @@ except ImportError:
# For Python >= 2.6
import json
from bottle import get, run, static_file, debug, request, HTTPError
from bottle import get, run, static_file, debug, request, error, HTTPError
from jinja2 import Environment, PackageLoader
from sqlobject import sqlhub, connectionForURI, AND, OR, IN, SQLObjectNotFound
from sqlobject.dberrors import OperationalError
from pysolr import Solr
import iso8601
@ -191,13 +192,23 @@ def site_logos(site_key):
def serve_static(filename):
return static_file(filename, root=MEDIA_ROOT)
@error(500)
@uses_templates
def error500(error):
ex = error.exception
if isinstance(ex, NoSitesImportedError):
return render_template('nodata.html')
# otherwise, return the standard error message
return repr(error)
@get('/')
@uses_templates
@uses_solr
@uses_db
def index():
context = { }
context['sites'] = Site.select()
context['sites'] = get_sites()
context['random_questions'] = get_random_questions()
@ -210,7 +221,7 @@ def index():
@uses_db
def site_index(site_key):
context = { }
context['sites'] = Site.select()
context['sites'] = get_sites()
try:
context['site'] = Site.selectBy(key=site_key).getOne()
@ -227,7 +238,7 @@ def site_index(site_key):
@uses_db
def search():
context = { }
context['sites'] = Site.select()
context['sites'] = get_sites()
search_context = perform_search()
if not search_context:
@ -244,7 +255,7 @@ def search():
def site_search(site_key):
context = { }
# the template uses this to allow searching on other sites
context['sites'] = Site.select()
context['sites'] = get_sites()
try:
context['site'] = Site.selectBy(key=site_key).getOne()
@ -325,6 +336,33 @@ def get_template_settings():
return template_settings
class NoSitesImportedError(Exception):
def __init__(self, cause=None):
self.cause = cause
def __str__(self):
s = 'NoSitesImportedError('
if self.cause:
s += str(type(self.cause)) + ' ' + str(self.cause)
s += ')'
return s
def get_sites():
'''\
Retrieves a list of Site objects or if there are none, raises a
NoSitesImportedError. This error is designed to trigger the 500 error
handler.
'''
try:
sites = list(Site.select())
if len(sites) == 0:
raise NoSitesImportedError()
return sites
except OperationalError as e:
raise NoSitesImportedError(e)
def decode_json_fields(obj):
'''\
Looks for keys in obj that end in -json, decodes the corresponding value and

@ -0,0 +1,75 @@
{% extends 'base.html' %}
{% block title %}
Stackdump - Import data instructions
{% endblock %}
{% block body %}
<div class="row nodata">
<div class="span16">
<h1>No data has been imported... yet.</h1>
<p>
Before you can start using Stackdump, you need to import some
<a href="http://www.stackexchange.com">StackExchange</a> data
into it. To do this, you'll need to download the StackExchange
data dump, then run some scripts on the Stackdump server.
</p>
<h2>Get the StackExchange data dump</h2>
<p>
StackExchange data dumps are distributed using BitTorrent. You
will need a BitTorrent client like
<a href="http://www.utorrent.com">uTorrent</a> to download it.
</p>
<p>
The data dumps can be downloaded from
<a href="http://www.clearbits.net/creators/146-stack-exchange-data-dump">http://www.clearbits.net/creators/146-stack-exchange-data-dump</a>.
</p>
<h2>Extract the dump</h2>
<p>
Once downloaded, you will end up with a directory with another
directory within it named <em>Content</em>. Inside that
<em>Content</em> directory contains the data dumps of each site
compressed in a <a href="http://www.7-zip.org/">7-zip</a> file.
</p>
<p>
For each of the sites you wish to import into Stackdump, extract
the compressed file to a temporary location (each compressed
file contains another directory inside it with the actual data,
so you can just extract each file into the same temporary
location).
</p>
<h2>Import them into Stackdump</h2>
<p>
This process can take upwards of 10 hours or more depending on
the size of the dump you're trying to import.
</p>
<p>
Before you can import data though, you need to download the
required metadata so Stackdump can load the dumps properly.
</p>
<ol>
<li>Fire up a terminal/command prompt and navigate to the directory you extracted Stackdump into.</li>
<li>
Execute the following command -
<pre>./start_python.sh python/src/stackdump/dataproc/get_sites_info.py</pre>
</li>
</ol>
<p>
Now that you have the site metadata, you can import the dumps.
For each dump you wish to import, do the following -
</p>
<ol>
<li>Fire up a terminal/command prompt and navigate to the directory you extracted Stackdump into.</li>
<li>Find the directory containing the data dump XML files. This is likely to be a directory inside the temporary location you extracted to earlier. The directory will contain files like <em>posts.xml</em>, <em>users.xml</em> and <em>comments.xml</em>.</li>
<li>
Execute the following command, replacing <em>path_to_dir_with_xml</em> with the path from the previous step -
<pre>./start_python.sh python/src/stackdump/dataproc/import.py path_to_dir_with_xml</pre>
</li>
</ol>
</div>
</div>
{% endblock %}