1
0
mirror of https://github.com/djohnlewis/stackdump synced 2025-01-22 14:41:39 +00:00

Tags are now parsed during import, and inserted into the index as an array field.

Also changed names of multivalued Solr fields so they are plural.
This commit is contained in:
Samuel Lai 2011-11-06 18:02:06 +11:00
parent 098a4f2fa9
commit 045b50fe6c
4 changed files with 8 additions and 24 deletions

View File

@ -505,13 +505,13 @@
<!-- TODO: the title field should be boosted at index-time -->
<field name="title" type="text_general" indexed="true" stored="false" required="true"/>
<field name="question-json" type="string" indexed="false" stored="true" required="true"/>
<field name="answer-json" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="answers-json" type="string" indexed="false" stored="true" multiValued="true"/>
<field name="ownerUserId" type="tint" indexed="true" stored="true" required="true"/>
<field name="lastEditorUserId" type="tint" indexed="false" stored="false"/>
<field name="lastActivityDate" type="tdate" indexed="true" stored="false"/>
<field name="communityOwnedDate" type="tdate" indexed="false" stored="false"/>
<field name="closedDate" type="tdate" indexed="false" stored="false"/>
<field name="tag" type="string" indexed="true" stored="false" multiValued="true"/>
<field name="tags" type="string" indexed="true" stored="false" multiValued="true"/>
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->

View File

@ -27,21 +27,6 @@ MEDIA_ROOT = os.path.abspath(BOTTLE_ROOT + '/../../media')
thread_locals = threading.local()
# CUSTOM TEMPLATE TAGS AND FILTERS
def parse_se_tags(value):
'''\
Parses the string of tags as given in the StackExchange XML site dump. The
format is:
<feature-request><filter>
'''
# if it isn't a string, just do nothing
if not isinstance(value, basestring):
return value
return re.findall(r'<([^>]+)>', value)
# RESOURCE DECORATORS
def uses_templates(fn):
@ -62,7 +47,6 @@ def uses_templates(fn):
# template.
extensions=['jinja2.ext.autoescape']
)
thread_locals.template_env.filters['parse_se_tags'] = parse_se_tags
if not fn:
init_templates()

View File

@ -201,7 +201,7 @@ class PostContentHandler(xml.sax.ContentHandler):
AnswerCount="3" CommentCount="1" FavoriteCount="3" />
"""
TAGS_RE = re.compile(u'&lt;([\w\d\-]+)&gt;')
TAGS_RE = re.compile(u'<([^>]+)>')
def __init__(self, site):
self.site = site
@ -385,7 +385,7 @@ class PostContentHandler(xml.sax.ContentHandler):
doc['text'] = search_text
# serialise answers to JSON
doc['answer-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
doc['answers-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
# map other fields to search index doc
doc['id'] = str(q['id'])
@ -427,7 +427,7 @@ class PostContentHandler(xml.sax.ContentHandler):
question_obj['closedDate'] = q['closedDate']
question_obj['title'] = q['title']
if 'tags' in q:
question_obj['tags'] = q['tags']
question_obj['tags'] = PostContentHandler.TAGS_RE.findall(q['tags'])
question_obj['favoriteCount'] = q['favoriteCount']
question_obj['comments'] = q['comments']

View File

@ -29,8 +29,8 @@
<p>vote{% if r.question.score != 1 %}s{% endif %}</p>
</div>
<div class="post-stat">
<p class="post-stat-value">{{ r.answer|length }}</p>
<p>answer{% if r.answer|length != 1 %}s{% endif %}</p>
<p class="post-stat-value">{{ r.answers|length }}</p>
<p>answer{% if r.answers|length != 1 %}s{% endif %}</p>
</div>
</div>
<div class="post-summary">
@ -41,7 +41,7 @@
<strong>{{ r.question.creationDate }}</strong>.
</p>
<div class="post-tags">
{% for t in r.question.tags|parse_se_tags %}
{% for t in r.question.tags %}
<span class="label">{{ t }}</span>
{% endfor %}
</div>