mirror of
https://github.com/djohnlewis/stackdump
synced 2024-12-04 23:17:37 +00:00
Added scripts for deleting a site from the system, and getting site info from the net.
This commit is contained in:
parent
489d9aec22
commit
5e930bbc08
47
python/src/stackdump/dataproc/delete_site.py
Normal file
47
python/src/stackdump/dataproc/delete_site.py
Normal file
@ -0,0 +1,47 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This script deletes the site specified by the ID in the first parameter.
|
||||
|
||||
import os
|
||||
import sys
|
||||
|
||||
from sqlobject import sqlhub, connectionForURI, AND, OR
|
||||
from pysolr import Solr
|
||||
|
||||
from stackdump.models import Site
|
||||
|
||||
script_dir = os.path.dirname(sys.argv[0])
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print 'The site ID needs to be specified as the first parameter.'
|
||||
sys.exit(1)
|
||||
|
||||
# connect to the data sources
|
||||
db_path = os.path.abspath(os.path.join(script_dir, '../../../../data/stackdump.sqlite'))
|
||||
|
||||
# connect to the database
|
||||
print('Connecting to the database...')
|
||||
conn_str = 'sqlite://' + db_path
|
||||
sqlhub.processConnection = connectionForURI(conn_str)
|
||||
print('Connected.\n')
|
||||
|
||||
# connect to solr
|
||||
print('Connecting to solr...')
|
||||
solr = Solr("http://localhost:8983/solr/")
|
||||
print('Connected.\n')
|
||||
|
||||
site_id = int(sys.argv[1])
|
||||
site = Site.select(Site.q.id==site_id).getOne(None)
|
||||
if not site:
|
||||
print 'Site ID %d does not exist.' % site_id
|
||||
sys.exit(1)
|
||||
|
||||
site_name = site.name
|
||||
print('Deleting site "%s" from the database... ' % site.name)
|
||||
sys.stdout.flush()
|
||||
Site.delete(site.id) # the relationship cascades, so other rows will be deleted
|
||||
print('Deleted.\n')
|
||||
|
||||
print('Deleting site "%s" from solr... ' % site_name)
|
||||
solr.delete(q='siteName:"%s"' % site_name)
|
||||
print('Deleted.\n')
|
43
python/src/stackdump/dataproc/get_sites_info.py
Normal file
43
python/src/stackdump/dataproc/get_sites_info.py
Normal file
@ -0,0 +1,43 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This script downloads the sites RSS file and associated logos from the net.
|
||||
|
||||
import urllib
|
||||
from xml.etree import ElementTree
|
||||
import os
|
||||
import sys
|
||||
|
||||
script_dir = os.path.dirname(sys.argv[0])
|
||||
sites_file_path = os.path.join(script_dir, '../../../../data/sites')
|
||||
|
||||
# download the sites RSS file
|
||||
print 'Downloading StackExchange sites RSS file...',
|
||||
urllib.urlretrieve('http://stackexchange.com/feeds/sites', sites_file_path)
|
||||
print 'done.'
|
||||
|
||||
print ''
|
||||
|
||||
# parse sites RSS file and download logos
|
||||
logos_dir_path = os.path.join(script_dir, '../../../media/images/logos')
|
||||
if not os.path.exists(logos_dir_path):
|
||||
os.mkdir(logos_dir_path)
|
||||
|
||||
with open(sites_file_path) as f:
|
||||
sites_file = ElementTree.parse(f)
|
||||
entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry')
|
||||
|
||||
for entry in entries:
|
||||
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text
|
||||
|
||||
# extract the key from the url - remove the http:// and .com
|
||||
site_key = entry.find('{http://www.w3.org/2005/Atom}id').text
|
||||
if site_key.startswith('http://'):
|
||||
site_key = site_key[len('http://'):]
|
||||
if site_key.endswith('.com'):
|
||||
site_key = site_key[:-len('.com')]
|
||||
if site_key.endswith('.stackexchange'):
|
||||
site_key = site_key[:-len('.stackexchange')]
|
||||
|
||||
print 'Downloading logo for %s...' % entry_title,
|
||||
urllib.urlretrieve('http://sstatic.net/%s/img/icon-48.png' % site_key, os.path.join(logos_dir_path, '%s.png' % site_key))
|
||||
print 'done.'
|
Loading…
Reference in New Issue
Block a user