1
0
mirror of https://github.com/djohnlewis/stackdump synced 2025-12-06 07:53:28 +00:00

added gitigore

This commit is contained in:
djohnlewis
2021-05-24 09:15:01 +01:00
parent f20e281d3d
commit 20693a8764
9 changed files with 136 additions and 13 deletions

View File

@@ -6,29 +6,37 @@ import urllib
from xml.etree import ElementTree
import os
import sys
We
se_dir = os.path.join(os.environ.get('HOME'), 'stackexchange')
sites_path = os.path.join(se_dir, 'Sites.xml')
script_dir = os.path.dirname(sys.argv[0])
sites_file_path = os.path.join(script_dir, '../../../../data/sites')
# ensure the data directory exists
# ensure the data directory exists\\\\
if not os.path.exists(os.path.dirname(sites_file_path)):
os.mkdir(os.path.dirname(sites_file_path))
# download the sites RSS file
print 'Downloading StackExchange sites RSS file...',
urllib.urlretrieve('http://stackexchange.com/feeds/sites', sites_file_path)
print 'done.'
print('Downloading StackExchange sites XML file...',)
urllib.urlretrieve('https://archive.org/download/stackexchange/Sites.xml', sites_file_path)
print('done.')
print ''
print('')
# parse sites RSS file and download logos
logos_dir_path = os.path.join(script_dir, '../../../media/images/logos')
# parse sites RSS file and download logosc
images_dir_path = os.path.join(script_dir, '../../../media/images/logos')
logos_dir_path = os.path.join(images_dir_path, 'logos')
icons_dir_path = os.path.join(images_dir_path, 'icons')
badgos_dir_path = os.path.join(images_dir_path, 'badgos')
if not os.path.exists(logos_dir_path):
os.mkdir(logos_dir_path)
with open(sites_file_path) as f:
with open(sites_path) as f:
sites_file = ElementTree.parse(f)
entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry')
entries = sites_file.findall('sites/row')
print(entries)
for entry in entries:
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text.encode('ascii', 'ignore')
@@ -42,6 +50,7 @@ with open(sites_file_path) as f:
if site_key.endswith('.stackexchange'):
site_key = site_key[:-len('.stackexchange')]
print 'Downloading logo for %s...' % entry_title,
urllib.urlretrieve('http://cdn.sstatic.net/Sites/%s/img/icon-48.png' % site_key, os.path.join(logos_dir_path, '%s.png' % site_key))
print 'done.'
print('Downloading logo for %s...' % entry_title,
urllib.urlretrieve('http://cdn.sstatic.net/Sites/%s/img/icon-48.png' % site_key,
os.path.join(logos_dir_path, '%s.png' % site_key)))
print('done.')