1
0
mirror of https://github.com/djohnlewis/stackdump synced 2024-12-04 23:17:37 +00:00

Renamed the commands directory and added a script to make them easier to call.

Also deleted the get_sites script as it wasn't very useful, and renamed others
to be more meaningful.
This commit is contained in:
Samuel Lai 2012-08-18 20:50:13 +10:00
parent e776e95d84
commit e8adaa9b54
5 changed files with 39 additions and 38 deletions

39
manage.sh Executable file
View File

@ -0,0 +1,39 @@
#!/bin/bash
##
# This script makes it easier to execute the management commands for Stackdump.
#
# Run without parameters to get a list of commands.
##
SCRIPT_DIR=`dirname $0`
COMMANDS_DIR=$SCRIPT_DIR/python/src/stackdump/commands
if [ -z "$1" ]
then
echo "Stackdump management commands:"
commands=`ls "$COMMANDS_DIR"`
for c in $commands
do
command=`echo $c | cut -d . -f1`
echo -e "\t$command"
done
echo
echo "Execute $0 command to run it, e.g. $0 manage_sites"
else
# look for command
command="$COMMANDS_DIR/$1.py"
if [ ! -e $command ]
then
echo "The command $1 does not exist. Run without any parameters to list commands."
exit 1
fi
# shift off the command name so we don't pass it on
shift
$SCRIPT_DIR/start_python.sh $command $@
fi

View File

@ -1,38 +0,0 @@
#!/usr/bin/env python
# This script reads a directory containing the Stack Exchange data dump and
# returns a list of sites.
import sys
import os
import re
CONTENT_FILENAME_RE = re.compile(r'^(.+)\.7z$|^(.+)\.7z\.\d{3}$')
if len(sys.argv) != 2:
print('One argument is expected - the path to the data dump directory.')
sys.exit(1)
dump_path = sys.argv[1]
print('Using the data dump path: ' + dump_path + '\n')
if not os.path.exists(dump_path):
print('The given data dump path does not exist.')
sys.exit(1)
# we expect it to contain an 'Content' directory
dump_path = os.path.join(dump_path, 'Content')
if not os.path.exists(dump_path):
print('The given data dump path is invalid. The Content subdirectory was expected.')
sys.exit(1)
filenames = os.listdir(dump_path)
sites = set()
for f in filenames:
match = CONTENT_FILENAME_RE.match(f)
if match:
sites.add(match.group(match.lastindex))
print sites