Added a 'no data' page with instructions on how to import data into Stackdump.

2025-04-04 16:53:27 +00:00 · 2012-02-12 13:56:37 +11:00 · 2012-02-12 13:56:37 +11:00 · db3bf11310
commit db3bf11310
parent adccd41724
3 changed files with 146 additions and 6 deletions
--- a/python/media/css/main.css
+++ b/python/media/css/main.css
@ -317,4 +317,31 @@ h1.answers {
    border-top: solid 1px #999999;
    background-color: #CCCCCC;
    padding: 7px 10px;
+}
+
+.nodata.row {
+    margin-top: 50px;
+    margin-bottom: 20px;
+}
+
+.nodata h2 {
+    margin-top: 25px;
+    margin-bottom: 7px;
+}
+
+.nodata li {
+    color: inherit;
+    border-bottom: 1px solid #F2F2F2;
+    padding-bottom: 7px;
+    margin-bottom: 7px;
+}
+
+.nodata li:last-of-type {
+    border-bottom: none;
+    padding-bottom: inherit;
+    margin-bottom:  inherit;
+}
+
+.nodata pre {
+    margin-top: 7px;
 }
--- a/python/src/stackdump/app.py
+++ b/python/src/stackdump/app.py
@ -13,9 +13,10 @@ except ImportError:
    # For Python >= 2.6
    import json

-from bottle import get, run, static_file, debug, request, HTTPError
+from bottle import get, run, static_file, debug, request, error, HTTPError
 from jinja2 import Environment, PackageLoader
-from sqlobject import sqlhub, connectionForURI, AND, OR, IN, SQLObjectNotFound 
+from sqlobject import sqlhub, connectionForURI, AND, OR, IN, SQLObjectNotFound
+from sqlobject.dberrors import OperationalError
 from pysolr import Solr
 import iso8601

@ -191,13 +192,23 @@ def site_logos(site_key):
 def serve_static(filename):
    return static_file(filename, root=MEDIA_ROOT)

+@error(500)
+@uses_templates
+def error500(error):
+    ex = error.exception
+    if isinstance(ex, NoSitesImportedError):
+        return render_template('nodata.html')
+    
+    # otherwise, return the standard error message
+    return repr(error)
+
@get('/')
@uses_templates
@uses_solr
@uses_db
 def index():
    context = { }
-    context['sites'] = Site.select()
+    context['sites'] = get_sites()
    
    context['random_questions'] = get_random_questions()
    
@ -210,7 +221,7 @@ def index():
@uses_db
 def site_index(site_key):
    context = { }
-    context['sites'] = Site.select()
+    context['sites'] = get_sites()
    
    try:
        context['site'] = Site.selectBy(key=site_key).getOne()
@ -227,7 +238,7 @@ def site_index(site_key):
@uses_db
 def search():
    context = { }
-    context['sites'] = Site.select()
+    context['sites'] = get_sites()
    
    search_context = perform_search()
    if not search_context:
@ -244,7 +255,7 @@ def search():
 def site_search(site_key):
    context = { }
    # the template uses this to allow searching on other sites
-    context['sites'] = Site.select()
+    context['sites'] = get_sites()
    
    try:
        context['site'] = Site.selectBy(key=site_key).getOne()
@ -325,6 +336,33 @@ def get_template_settings():
    
    return template_settings

+class NoSitesImportedError(Exception):
+    def __init__(self, cause=None):
+        self.cause = cause
+    
+    def __str__(self):
+        s = 'NoSitesImportedError('
+        if self.cause:
+            s += str(type(self.cause)) + ' ' + str(self.cause)
+        s += ')'
+        
+        return s
+
+def get_sites():
+    '''\
+    Retrieves a list of Site objects or if there are none, raises a
+    NoSitesImportedError. This error is designed to trigger the 500 error
+    handler.
+    '''
+    try:
+        sites = list(Site.select())
+        if len(sites) == 0:
+            raise NoSitesImportedError()
+        
+        return sites
+    except OperationalError as e:
+        raise NoSitesImportedError(e)
+
 def decode_json_fields(obj):
    '''\
    Looks for keys in obj that end in -json, decodes the corresponding value and
--- a/python/src/stackdump/templates/nodata.html
+++ b/python/src/stackdump/templates/nodata.html
@ -0,0 +1,75 @@
+{% extends 'base.html' %}
+
+{% block title %}
+Stackdump - Import data instructions
+{% endblock %}
+
+{% block body %}
+    <div class="row nodata">
+        <div class="span16">
+            <h1>No data has been imported... yet.</h1>
+            <p>
+                Before you can start using Stackdump, you need to import some
+                <a href="http://www.stackexchange.com">StackExchange</a> data
+                into it. To do this, you'll need to download the StackExchange
+                data dump, then run some scripts on the Stackdump server.
+            </p>
+            
+            <h2>Get the StackExchange data dump</h2>
+            <p>
+                StackExchange data dumps are distributed using BitTorrent. You
+                will need a BitTorrent client like
+                <a href="http://www.utorrent.com">uTorrent</a> to download it.
+            </p>
+            <p>
+                The data dumps can be downloaded from 
+                <a href="http://www.clearbits.net/creators/146-stack-exchange-data-dump">http://www.clearbits.net/creators/146-stack-exchange-data-dump</a>.
+            </p>
+
+            <h2>Extract the dump</h2>
+            <p>
+                Once downloaded, you will end up with a directory with another
+                directory within it named <em>Content</em>. Inside that
+                <em>Content</em> directory contains the data dumps of each site
+                compressed in a <a href="http://www.7-zip.org/">7-zip</a> file.
+            </p>
+            <p>
+                For each of the sites you wish to import into Stackdump, extract
+                the compressed file to a temporary location (each compressed
+                file contains another directory inside it with the actual data,
+                so you can just extract each file into the same temporary
+                location).
+            </p>
+            
+            <h2>Import them into Stackdump</h2>
+            <p>
+                This process can take upwards of 10 hours or more depending on
+                the size of the dump you're trying to import.
+            </p>
+            <p>
+                Before you can import data though, you need to download the
+                required metadata so Stackdump can load the dumps properly.
+            </p>
+            <ol>
+                <li>Fire up a terminal/command prompt and navigate to the directory you extracted Stackdump into.</li>
+                <li>
+                    Execute the following command -
+                    <pre>./start_python.sh python/src/stackdump/dataproc/get_sites_info.py</pre>
+                </li>
+            </ol>
+            <p>
+                Now that you have the site metadata, you can import the dumps.
+                For each dump you wish to import, do the following -
+            </p>
+            <ol>
+                <li>Fire up a terminal/command prompt and navigate to the directory you extracted Stackdump into.</li>
+                <li>Find the directory containing the data dump XML files. This is likely to be a directory inside the temporary location you extracted to earlier. The directory will contain files like <em>posts.xml</em>, <em>users.xml</em> and <em>comments.xml</em>.</li>
+                <li>
+                    Execute the following command, replacing <em>path_to_dir_with_xml</em> with the path from the previous step -
+                    <pre>./start_python.sh python/src/stackdump/dataproc/import.py path_to_dir_with_xml</pre>
+                </li>
+            </ol>
+            
+        </div>
+    </div>
+{% endblock %}