1
0
mirror of https://github.com/djohnlewis/stackdump synced 2024-12-04 23:17:37 +00:00

Fixed an uncaught exception caused by broken URLs.

Examples of fixed pages: photo/11689, stackoverflow/315911
This commit is contained in:
Alexei Baboulevitch 2014-05-12 14:35:28 +02:00
parent a06d2a4c55
commit 7f6ed7b438

View File

@ -827,27 +827,31 @@ def _rewrite_html(html, app_url_root, sites_by_urls):
internal_link = False internal_link = False
url = t.get('href', None) url = t.get('href', None)
if url: if url:
host = urllib2.Request(url).get_host() try:
site = sites_by_urls.get(host, None) host = urllib2.Request(url).get_host()
if site: except ValueError:
# rewrite this URL for stackdump # invalid URL or local anchor, leaving as-is
question_id = SE_QUESTION_ID_RE.search(url) internal_link = True
if question_id: else:
question_id = question_id.groupdict()['id'] site = sites_by_urls.get(host, None)
url = '%s%s/%s' % (app_url_root, site.key, question_id) if site:
t.set('href', url) # rewrite this URL for stackdump
t.set('class', t.get('class', '') + ' internal-link') question_id = SE_QUESTION_ID_RE.search(url)
internal_link = True if question_id:
question_id = question_id.groupdict()['id']
answer_id = SE_ANSWER_ID_RE.search(url) url = '%s%s/%s' % (app_url_root, site.key, question_id)
if answer_id: t.set('href', url)
answer_id = answer_id.groupdict()['id'] internal_link = True
url = '%s%s/a/%s' % (app_url_root, site.key, answer_id) answer_id = SE_ANSWER_ID_RE.search(url)
t.set('href', url) if answer_id:
t.set('class', t.get('class', '') + ' internal-link') answer_id = answer_id.groupdict()['id']
internal_link = True url = '%s%s/a/%s' % (app_url_root, site.key, answer_id)
t.set('href', url)
internal_link = True
if not internal_link: if internal_link:
t.set('class', t.get('class', '') + ' internal-link')
else:
t.set('class', t.get('class', '') + ' external-link') t.set('class', t.get('class', '') + ' external-link')
# get a string back # get a string back