diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2a66262 --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +^JAVA_CMD$ +^PYTHON_CMD$ + +.DS_Store + +# ignore any data +^data/*$ + +# ignore working bytecode +\.class$ +\.pyc$ + +^datadump/* + +# ignore test and tutorial directories +test/*$ +tests/*$ +testsuite/*$ +tutorial/*$ + +# Solr/Jetty +^java/solr/server/solr-webapp/* +^java/solr/server/logs/* + +# ignore the downloaded logos +^python/media/images/logos/* + +# PyCharm project files +^.idea/ diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..26d3352 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,3 @@ +# Default ignored files +/shelf/ +/workspace.xml diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..1d755ad --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/stackdump.iml b/.idea/stackdump.iml new file mode 100644 index 0000000..d0876a7 --- /dev/null +++ b/.idea/stackdump.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/manage.sh b/manage.sh index e0639cc..ec6bfca 100755 --- a/manage.sh +++ b/manage.sh @@ -35,5 +35,5 @@ else # shift off the command name so we don't pass it on shift - $SCRIPT_DIR/start_python.sh $command "$@" + $SCRIPT_DIR/start_python3.sh $command "$@" fi diff --git a/python/src/stackdump/commands/download_site_info.py b/python/src/stackdump/commands/download_site_info.py index 751fb16..08a379d 100644 --- a/python/src/stackdump/commands/download_site_info.py +++ b/python/src/stackdump/commands/download_site_info.py @@ -6,29 +6,37 @@ import urllib from xml.etree import ElementTree import os import sys +We + +se_dir = os.path.join(os.environ.get('HOME'), 'stackexchange') +sites_path = os.path.join(se_dir, 'Sites.xml') script_dir = os.path.dirname(sys.argv[0]) sites_file_path = os.path.join(script_dir, '../../../../data/sites') -# ensure the data directory exists +# ensure the data directory exists\\\\ if not os.path.exists(os.path.dirname(sites_file_path)): os.mkdir(os.path.dirname(sites_file_path)) # download the sites RSS file -print 'Downloading StackExchange sites RSS file...', -urllib.urlretrieve('http://stackexchange.com/feeds/sites', sites_file_path) -print 'done.' +print('Downloading StackExchange sites XML file...',) +urllib.urlretrieve('https://archive.org/download/stackexchange/Sites.xml', sites_file_path) +print('done.') -print '' +print('') -# parse sites RSS file and download logos -logos_dir_path = os.path.join(script_dir, '../../../media/images/logos') +# parse sites RSS file and download logosc +images_dir_path = os.path.join(script_dir, '../../../media/images/logos') +logos_dir_path = os.path.join(images_dir_path, 'logos') +icons_dir_path = os.path.join(images_dir_path, 'icons') +badgos_dir_path = os.path.join(images_dir_path, 'badgos') if not os.path.exists(logos_dir_path): os.mkdir(logos_dir_path) -with open(sites_file_path) as f: +with open(sites_path) as f: sites_file = ElementTree.parse(f) - entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry') + entries = sites_file.findall('sites/row') + print(entries) for entry in entries: entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text.encode('ascii', 'ignore') @@ -42,6 +50,7 @@ with open(sites_file_path) as f: if site_key.endswith('.stackexchange'): site_key = site_key[:-len('.stackexchange')] - print 'Downloading logo for %s...' % entry_title, - urllib.urlretrieve('http://cdn.sstatic.net/Sites/%s/img/icon-48.png' % site_key, os.path.join(logos_dir_path, '%s.png' % site_key)) - print 'done.' + print('Downloading logo for %s...' % entry_title, + urllib.urlretrieve('http://cdn.sstatic.net/Sites/%s/img/icon-48.png' % site_key, + os.path.join(logos_dir_path, '%s.png' % site_key))) + print('done.') diff --git a/start_python3.sh b/start_python3.sh new file mode 100755 index 0000000..4911e4b --- /dev/null +++ b/start_python3.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +## +# This script attempts to find a version of Python on the system PATH, and +# checks that it is 2.5+. +# +# A alternate Python command can be specified in a file named PYTHON_CMD in this +# script's directory. This path will override any lookup on the system PATH. +## + +# FUNCTIONS +function checkPythonVersion { + if [ ! -z "$1" ] + then + PYTHON_VER_MAJOR=`echo $1 | cut -d "." -f 1` + PYTHON_VER_MINOR=`echo $1 | cut -d "." -f 2` + + if [ $PYTHON_VER_MAJOR -eq "3" -a $PYTHON_VER_MINOR -ge "5" ] + then + return 1 + fi + fi + + return 0 +} + +# MAIN +SCRIPT_DIR=`dirname $0` +PYTHON_CMD=python3 + +# if there is a PYTHON_CMD file in the script directory, use that instead +if [ -e "$SCRIPT_DIR/PYTHON_CMD" ] +then + PYTHON_CMD=`cat "$SCRIPT_DIR/PYTHON_CMD"` +fi + +if [ ! -z "`which "$PYTHON_CMD" 2>/dev/null`" ] +then + # check if Python is the right version + PYTHON_VER=`"$PYTHON_CMD" -V 2>&1 | cut -d " " -f 2` + checkPythonVersion "$PYTHON_VER" + if [ $? == 1 ] + then + echo "Using Python `which "$PYTHON_CMD"`" + + # execution ends here if Python is found + PYTHONPATH=$SCRIPT_DIR/python3/packages:$SCRIPT_DIR/python3/src:$PYTHONPATH + env "PYTHONPATH=$PYTHONPATH" "$PYTHON_CMD" "$@" + exit $? + fi +fi + +# if we get here, it means the right version of Python was not found +echo 'No suitable version of Python was found. Python 2.5 or later is required.'