mirror of https://github.com/djohnlewis/stackdump synced 2025-02-23 13:44:46 +00:00

Added scripts for deleting a site from the system, and getting site info from the net.

This commit is contained in:
Samuel Lai 2011-11-01 22:02:25 +11:00
parent 489d9aec22
commit 5e930bbc08
2 changed files with 90 additions and 0 deletions

View File

@ -0,0 +1,47 @@
#!/usr/bin/env python
# This script deletes the site specified by the ID in the first parameter.
import os
import sys
from sqlobject import sqlhub, connectionForURI, AND, OR
from pysolr import Solr
from stackdump.models import Site
script_dir = os.path.dirname(sys.argv[0])
if len(sys.argv) < 2:
print 'The site ID needs to be specified as the first parameter.'
# connect to the data sources
db_path = os.path.abspath(os.path.join(script_dir, '../../../../data/stackdump.sqlite'))
# connect to the database
print('Connecting to the database...')
conn_str = 'sqlite://' + db_path
sqlhub.processConnection = connectionForURI(conn_str)
# connect to solr
print('Connecting to solr...')
solr = Solr("http://localhost:8983/solr/")
site_id = int(sys.argv[1])
site = Site.select(Site.q.id==site_id).getOne(None)
if not site:
print 'Site ID %d does not exist.' % site_id
site_name = site.name
print('Deleting site "%s" from the database... ' % site.name)
Site.delete(site.id) # the relationship cascades, so other rows will be deleted
print('Deleting site "%s" from solr... ' % site_name)
solr.delete(q='siteName:"%s"' % site_name)

View File

@ -0,0 +1,43 @@
#!/usr/bin/env python
# This script downloads the sites RSS file and associated logos from the net.
import urllib
from xml.etree import ElementTree
import os
import sys
script_dir = os.path.dirname(sys.argv[0])
sites_file_path = os.path.join(script_dir, '../../../../data/sites')
# download the sites RSS file
print 'Downloading StackExchange sites RSS file...',
urllib.urlretrieve('http://stackexchange.com/feeds/sites', sites_file_path)
print 'done.'
print ''
# parse sites RSS file and download logos
logos_dir_path = os.path.join(script_dir, '../../../media/images/logos')
if not os.path.exists(logos_dir_path):
with open(sites_file_path) as f:
sites_file = ElementTree.parse(f)
entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry')
for entry in entries:
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text
# extract the key from the url - remove the http:// and .com
site_key = entry.find('{http://www.w3.org/2005/Atom}id').text
if site_key.startswith('http://'):
site_key = site_key[len('http://'):]
if site_key.endswith('.com'):
site_key = site_key[:-len('.com')]
if site_key.endswith('.stackexchange'):
site_key = site_key[:-len('.stackexchange')]
print 'Downloading logo for %s...' % entry_title,
urllib.urlretrieve('http://sstatic.net/%s/img/icon-48.png' % site_key, os.path.join(logos_dir_path, '%s.png' % site_key))
print 'done.'