mirror of
https://github.com/djohnlewis/stackdump
synced 2025-04-07 10:13:27 +00:00
Fixed an uncaught exception caused by broken URLs.
Examples of fixed pages: photo/11689, stackoverflow/315911
This commit is contained in:
parent
a06d2a4c55
commit
7f6ed7b438
@ -827,7 +827,12 @@ def _rewrite_html(html, app_url_root, sites_by_urls):
|
|||||||
internal_link = False
|
internal_link = False
|
||||||
url = t.get('href', None)
|
url = t.get('href', None)
|
||||||
if url:
|
if url:
|
||||||
|
try:
|
||||||
host = urllib2.Request(url).get_host()
|
host = urllib2.Request(url).get_host()
|
||||||
|
except ValueError:
|
||||||
|
# invalid URL or local anchor, leaving as-is
|
||||||
|
internal_link = True
|
||||||
|
else:
|
||||||
site = sites_by_urls.get(host, None)
|
site = sites_by_urls.get(host, None)
|
||||||
if site:
|
if site:
|
||||||
# rewrite this URL for stackdump
|
# rewrite this URL for stackdump
|
||||||
@ -836,18 +841,17 @@ def _rewrite_html(html, app_url_root, sites_by_urls):
|
|||||||
question_id = question_id.groupdict()['id']
|
question_id = question_id.groupdict()['id']
|
||||||
url = '%s%s/%s' % (app_url_root, site.key, question_id)
|
url = '%s%s/%s' % (app_url_root, site.key, question_id)
|
||||||
t.set('href', url)
|
t.set('href', url)
|
||||||
t.set('class', t.get('class', '') + ' internal-link')
|
|
||||||
internal_link = True
|
internal_link = True
|
||||||
|
|
||||||
answer_id = SE_ANSWER_ID_RE.search(url)
|
answer_id = SE_ANSWER_ID_RE.search(url)
|
||||||
if answer_id:
|
if answer_id:
|
||||||
answer_id = answer_id.groupdict()['id']
|
answer_id = answer_id.groupdict()['id']
|
||||||
url = '%s%s/a/%s' % (app_url_root, site.key, answer_id)
|
url = '%s%s/a/%s' % (app_url_root, site.key, answer_id)
|
||||||
t.set('href', url)
|
t.set('href', url)
|
||||||
t.set('class', t.get('class', '') + ' internal-link')
|
|
||||||
internal_link = True
|
internal_link = True
|
||||||
|
|
||||||
if not internal_link:
|
if internal_link:
|
||||||
|
t.set('class', t.get('class', '') + ' internal-link')
|
||||||
|
else:
|
||||||
t.set('class', t.get('class', '') + ' external-link')
|
t.set('class', t.get('class', '') + ' external-link')
|
||||||
|
|
||||||
# get a string back
|
# get a string back
|
||||||
|
Loading…
x
Reference in New Issue
Block a user