diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..2a66262
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,29 @@
+^JAVA_CMD$
+^PYTHON_CMD$
+
+.DS_Store
+
+# ignore any data
+^data/*$
+
+# ignore working bytecode
+\.class$
+\.pyc$
+
+^datadump/*
+
+# ignore test and tutorial directories
+test/*$
+tests/*$
+testsuite/*$
+tutorial/*$
+
+# Solr/Jetty
+^java/solr/server/solr-webapp/*
+^java/solr/server/logs/*
+
+# ignore the downloaded logos
+^python/media/images/logos/*
+
+# PyCharm project files
+^.idea/
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..26d3352
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,3 @@
+# Default ignored files
+/shelf/
+/workspace.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..1d755ad
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/stackdump.iml b/.idea/stackdump.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/stackdump.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/manage.sh b/manage.sh
index e0639cc..ec6bfca 100755
--- a/manage.sh
+++ b/manage.sh
@@ -35,5 +35,5 @@ else
# shift off the command name so we don't pass it on
shift
- $SCRIPT_DIR/start_python.sh $command "$@"
+ $SCRIPT_DIR/start_python3.sh $command "$@"
fi
diff --git a/python/src/stackdump/commands/download_site_info.py b/python/src/stackdump/commands/download_site_info.py
index 751fb16..08a379d 100644
--- a/python/src/stackdump/commands/download_site_info.py
+++ b/python/src/stackdump/commands/download_site_info.py
@@ -6,29 +6,37 @@ import urllib
from xml.etree import ElementTree
import os
import sys
+We
+
+se_dir = os.path.join(os.environ.get('HOME'), 'stackexchange')
+sites_path = os.path.join(se_dir, 'Sites.xml')
script_dir = os.path.dirname(sys.argv[0])
sites_file_path = os.path.join(script_dir, '../../../../data/sites')
-# ensure the data directory exists
+# ensure the data directory exists\\\\
if not os.path.exists(os.path.dirname(sites_file_path)):
os.mkdir(os.path.dirname(sites_file_path))
# download the sites RSS file
-print 'Downloading StackExchange sites RSS file...',
-urllib.urlretrieve('http://stackexchange.com/feeds/sites', sites_file_path)
-print 'done.'
+print('Downloading StackExchange sites XML file...',)
+urllib.urlretrieve('https://archive.org/download/stackexchange/Sites.xml', sites_file_path)
+print('done.')
-print ''
+print('')
-# parse sites RSS file and download logos
-logos_dir_path = os.path.join(script_dir, '../../../media/images/logos')
+# parse sites RSS file and download logosc
+images_dir_path = os.path.join(script_dir, '../../../media/images/logos')
+logos_dir_path = os.path.join(images_dir_path, 'logos')
+icons_dir_path = os.path.join(images_dir_path, 'icons')
+badgos_dir_path = os.path.join(images_dir_path, 'badgos')
if not os.path.exists(logos_dir_path):
os.mkdir(logos_dir_path)
-with open(sites_file_path) as f:
+with open(sites_path) as f:
sites_file = ElementTree.parse(f)
- entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry')
+ entries = sites_file.findall('sites/row')
+ print(entries)
for entry in entries:
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text.encode('ascii', 'ignore')
@@ -42,6 +50,7 @@ with open(sites_file_path) as f:
if site_key.endswith('.stackexchange'):
site_key = site_key[:-len('.stackexchange')]
- print 'Downloading logo for %s...' % entry_title,
- urllib.urlretrieve('http://cdn.sstatic.net/Sites/%s/img/icon-48.png' % site_key, os.path.join(logos_dir_path, '%s.png' % site_key))
- print 'done.'
+ print('Downloading logo for %s...' % entry_title,
+ urllib.urlretrieve('http://cdn.sstatic.net/Sites/%s/img/icon-48.png' % site_key,
+ os.path.join(logos_dir_path, '%s.png' % site_key)))
+ print('done.')
diff --git a/start_python3.sh b/start_python3.sh
new file mode 100755
index 0000000..4911e4b
--- /dev/null
+++ b/start_python3.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+##
+# This script attempts to find a version of Python on the system PATH, and
+# checks that it is 2.5+.
+#
+# A alternate Python command can be specified in a file named PYTHON_CMD in this
+# script's directory. This path will override any lookup on the system PATH.
+##
+
+# FUNCTIONS
+function checkPythonVersion {
+ if [ ! -z "$1" ]
+ then
+ PYTHON_VER_MAJOR=`echo $1 | cut -d "." -f 1`
+ PYTHON_VER_MINOR=`echo $1 | cut -d "." -f 2`
+
+ if [ $PYTHON_VER_MAJOR -eq "3" -a $PYTHON_VER_MINOR -ge "5" ]
+ then
+ return 1
+ fi
+ fi
+
+ return 0
+}
+
+# MAIN
+SCRIPT_DIR=`dirname $0`
+PYTHON_CMD=python3
+
+# if there is a PYTHON_CMD file in the script directory, use that instead
+if [ -e "$SCRIPT_DIR/PYTHON_CMD" ]
+then
+ PYTHON_CMD=`cat "$SCRIPT_DIR/PYTHON_CMD"`
+fi
+
+if [ ! -z "`which "$PYTHON_CMD" 2>/dev/null`" ]
+then
+ # check if Python is the right version
+ PYTHON_VER=`"$PYTHON_CMD" -V 2>&1 | cut -d " " -f 2`
+ checkPythonVersion "$PYTHON_VER"
+ if [ $? == 1 ]
+ then
+ echo "Using Python `which "$PYTHON_CMD"`"
+
+ # execution ends here if Python is found
+ PYTHONPATH=$SCRIPT_DIR/python3/packages:$SCRIPT_DIR/python3/src:$PYTHONPATH
+ env "PYTHONPATH=$PYTHONPATH" "$PYTHON_CMD" "$@"
+ exit $?
+ fi
+fi
+
+# if we get here, it means the right version of Python was not found
+echo 'No suitable version of Python was found. Python 2.5 or later is required.'