1
0
mirror of https://github.com/djohnlewis/stackdump synced 2024-12-04 23:17:37 +00:00

Fixed a bug where site names with non-ASCII characters caused a crash.

This commit is contained in:
Sam 2013-10-14 07:32:45 +11:00
parent 9cac41929b
commit 706fd5ef9d
2 changed files with 8 additions and 2 deletions

View File

@ -31,7 +31,7 @@ with open(sites_file_path) as f:
entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry')
for entry in entries:
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text.encode('ascii', 'ignore')
# extract the key from the url - remove the http:// and .com
site_key = entry.find('{http://www.w3.org/2005/Atom}id').text

View File

@ -689,7 +689,13 @@ def import_site(xml_root, site_name, dump_date, site_desc, site_key,
if not site_desc:
site_desc = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
print 'Name: %s\nKey: %s\nDescription: %s\nDump Date: %s\nBase URL: %s\n' % (site_name, site_key, site_desc, dump_date, site_base_url)
print 'Name: %s\nKey: %s\nDescription: %s\nDump Date: %s\nBase URL: %s\n' % (
site_name.encode('ascii', 'ignore') if site_name else None,
site_key,
site_desc.encode('ascii', 'ignore') if site_desc else None,
dump_date,
site_base_url
)
# the base URL is optional.
if not (site_name and site_key and site_desc and dump_date):