mirror of
https://github.com/djohnlewis/stackdump
synced 2024-12-04 23:17:37 +00:00
Fixed a bug where site names with non-ASCII characters caused a crash.
This commit is contained in:
parent
9cac41929b
commit
706fd5ef9d
@ -31,7 +31,7 @@ with open(sites_file_path) as f:
|
||||
entries = sites_file.findall('{http://www.w3.org/2005/Atom}entry')
|
||||
|
||||
for entry in entries:
|
||||
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text
|
||||
entry_title = entry.find('{http://www.w3.org/2005/Atom}title').text.encode('ascii', 'ignore')
|
||||
|
||||
# extract the key from the url - remove the http:// and .com
|
||||
site_key = entry.find('{http://www.w3.org/2005/Atom}id').text
|
||||
|
@ -689,7 +689,13 @@ def import_site(xml_root, site_name, dump_date, site_desc, site_key,
|
||||
if not site_desc:
|
||||
site_desc = entry.find('{http://www.w3.org/2005/Atom}summary').text.strip()
|
||||
|
||||
print 'Name: %s\nKey: %s\nDescription: %s\nDump Date: %s\nBase URL: %s\n' % (site_name, site_key, site_desc, dump_date, site_base_url)
|
||||
print 'Name: %s\nKey: %s\nDescription: %s\nDump Date: %s\nBase URL: %s\n' % (
|
||||
site_name.encode('ascii', 'ignore') if site_name else None,
|
||||
site_key,
|
||||
site_desc.encode('ascii', 'ignore') if site_desc else None,
|
||||
dump_date,
|
||||
site_base_url
|
||||
)
|
||||
|
||||
# the base URL is optional.
|
||||
if not (site_name and site_key and site_desc and dump_date):
|
||||
|
Loading…
Reference in New Issue
Block a user