1
0
mirror of https://github.com/djohnlewis/stackdump synced 2024-12-04 23:17:37 +00:00

Added a 'no data' page with instructions on how to import data into Stackdump.

This commit is contained in:
Samuel Lai 2012-02-12 13:56:37 +11:00
parent adccd41724
commit db3bf11310
3 changed files with 146 additions and 6 deletions

View File

@ -317,4 +317,31 @@ h1.answers {
border-top: solid 1px #999999;
background-color: #CCCCCC;
padding: 7px 10px;
}
.nodata.row {
margin-top: 50px;
margin-bottom: 20px;
}
.nodata h2 {
margin-top: 25px;
margin-bottom: 7px;
}
.nodata li {
color: inherit;
border-bottom: 1px solid #F2F2F2;
padding-bottom: 7px;
margin-bottom: 7px;
}
.nodata li:last-of-type {
border-bottom: none;
padding-bottom: inherit;
margin-bottom: inherit;
}
.nodata pre {
margin-top: 7px;
}

View File

@ -13,9 +13,10 @@ except ImportError:
# For Python >= 2.6
import json
from bottle import get, run, static_file, debug, request, HTTPError
from bottle import get, run, static_file, debug, request, error, HTTPError
from jinja2 import Environment, PackageLoader
from sqlobject import sqlhub, connectionForURI, AND, OR, IN, SQLObjectNotFound
from sqlobject import sqlhub, connectionForURI, AND, OR, IN, SQLObjectNotFound
from sqlobject.dberrors import OperationalError
from pysolr import Solr
import iso8601
@ -191,13 +192,23 @@ def site_logos(site_key):
def serve_static(filename):
return static_file(filename, root=MEDIA_ROOT)
@error(500)
@uses_templates
def error500(error):
ex = error.exception
if isinstance(ex, NoSitesImportedError):
return render_template('nodata.html')
# otherwise, return the standard error message
return repr(error)
@get('/')
@uses_templates
@uses_solr
@uses_db
def index():
context = { }
context['sites'] = Site.select()
context['sites'] = get_sites()
context['random_questions'] = get_random_questions()
@ -210,7 +221,7 @@ def index():
@uses_db
def site_index(site_key):
context = { }
context['sites'] = Site.select()
context['sites'] = get_sites()
try:
context['site'] = Site.selectBy(key=site_key).getOne()
@ -227,7 +238,7 @@ def site_index(site_key):
@uses_db
def search():
context = { }
context['sites'] = Site.select()
context['sites'] = get_sites()
search_context = perform_search()
if not search_context:
@ -244,7 +255,7 @@ def search():
def site_search(site_key):
context = { }
# the template uses this to allow searching on other sites
context['sites'] = Site.select()
context['sites'] = get_sites()
try:
context['site'] = Site.selectBy(key=site_key).getOne()
@ -325,6 +336,33 @@ def get_template_settings():
return template_settings
class NoSitesImportedError(Exception):
def __init__(self, cause=None):
self.cause = cause
def __str__(self):
s = 'NoSitesImportedError('
if self.cause:
s += str(type(self.cause)) + ' ' + str(self.cause)
s += ')'
return s
def get_sites():
'''\
Retrieves a list of Site objects or if there are none, raises a
NoSitesImportedError. This error is designed to trigger the 500 error
handler.
'''
try:
sites = list(Site.select())
if len(sites) == 0:
raise NoSitesImportedError()
return sites
except OperationalError as e:
raise NoSitesImportedError(e)
def decode_json_fields(obj):
'''\
Looks for keys in obj that end in -json, decodes the corresponding value and

View File

@ -0,0 +1,75 @@
{% extends 'base.html' %}
{% block title %}
Stackdump - Import data instructions
{% endblock %}
{% block body %}
<div class="row nodata">
<div class="span16">
<h1>No data has been imported... yet.</h1>
<p>
Before you can start using Stackdump, you need to import some
<a href="http://www.stackexchange.com">StackExchange</a> data
into it. To do this, you'll need to download the StackExchange
data dump, then run some scripts on the Stackdump server.
</p>
<h2>Get the StackExchange data dump</h2>
<p>
StackExchange data dumps are distributed using BitTorrent. You
will need a BitTorrent client like
<a href="http://www.utorrent.com">uTorrent</a> to download it.
</p>
<p>
The data dumps can be downloaded from
<a href="http://www.clearbits.net/creators/146-stack-exchange-data-dump">http://www.clearbits.net/creators/146-stack-exchange-data-dump</a>.
</p>
<h2>Extract the dump</h2>
<p>
Once downloaded, you will end up with a directory with another
directory within it named <em>Content</em>. Inside that
<em>Content</em> directory contains the data dumps of each site
compressed in a <a href="http://www.7-zip.org/">7-zip</a> file.
</p>
<p>
For each of the sites you wish to import into Stackdump, extract
the compressed file to a temporary location (each compressed
file contains another directory inside it with the actual data,
so you can just extract each file into the same temporary
location).
</p>
<h2>Import them into Stackdump</h2>
<p>
This process can take upwards of 10 hours or more depending on
the size of the dump you're trying to import.
</p>
<p>
Before you can import data though, you need to download the
required metadata so Stackdump can load the dumps properly.
</p>
<ol>
<li>Fire up a terminal/command prompt and navigate to the directory you extracted Stackdump into.</li>
<li>
Execute the following command -
<pre>./start_python.sh python/src/stackdump/dataproc/get_sites_info.py</pre>
</li>
</ol>
<p>
Now that you have the site metadata, you can import the dumps.
For each dump you wish to import, do the following -
</p>
<ol>
<li>Fire up a terminal/command prompt and navigate to the directory you extracted Stackdump into.</li>
<li>Find the directory containing the data dump XML files. This is likely to be a directory inside the temporary location you extracted to earlier. The directory will contain files like <em>posts.xml</em>, <em>users.xml</em> and <em>comments.xml</em>.</li>
<li>
Execute the following command, replacing <em>path_to_dir_with_xml</em> with the path from the previous step -
<pre>./start_python.sh python/src/stackdump/dataproc/import.py path_to_dir_with_xml</pre>
</li>
</ol>
</div>
</div>
{% endblock %}