mirror of
https://github.com/djohnlewis/stackdump
synced 2024-12-04 23:17:37 +00:00
Tags are now parsed during import, and inserted into the index as an array field.
Also changed names of multivalued Solr fields so they are plural.
This commit is contained in:
parent
098a4f2fa9
commit
045b50fe6c
@ -505,13 +505,13 @@
|
||||
<!-- TODO: the title field should be boosted at index-time -->
|
||||
<field name="title" type="text_general" indexed="true" stored="false" required="true"/>
|
||||
<field name="question-json" type="string" indexed="false" stored="true" required="true"/>
|
||||
<field name="answer-json" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
<field name="answers-json" type="string" indexed="false" stored="true" multiValued="true"/>
|
||||
<field name="ownerUserId" type="tint" indexed="true" stored="true" required="true"/>
|
||||
<field name="lastEditorUserId" type="tint" indexed="false" stored="false"/>
|
||||
<field name="lastActivityDate" type="tdate" indexed="true" stored="false"/>
|
||||
<field name="communityOwnedDate" type="tdate" indexed="false" stored="false"/>
|
||||
<field name="closedDate" type="tdate" indexed="false" stored="false"/>
|
||||
<field name="tag" type="string" indexed="true" stored="false" multiValued="true"/>
|
||||
<field name="tags" type="string" indexed="true" stored="false" multiValued="true"/>
|
||||
|
||||
<!-- catchall field, containing all other searchable text fields (implemented
|
||||
via copyField further on in this schema -->
|
||||
|
@ -27,21 +27,6 @@ MEDIA_ROOT = os.path.abspath(BOTTLE_ROOT + '/../../media')
|
||||
thread_locals = threading.local()
|
||||
|
||||
|
||||
# CUSTOM TEMPLATE TAGS AND FILTERS
|
||||
|
||||
def parse_se_tags(value):
|
||||
'''\
|
||||
Parses the string of tags as given in the StackExchange XML site dump. The
|
||||
format is:
|
||||
|
||||
<feature-request><filter>
|
||||
'''
|
||||
# if it isn't a string, just do nothing
|
||||
if not isinstance(value, basestring):
|
||||
return value
|
||||
|
||||
return re.findall(r'<([^>]+)>', value)
|
||||
|
||||
# RESOURCE DECORATORS
|
||||
|
||||
def uses_templates(fn):
|
||||
@ -62,7 +47,6 @@ def uses_templates(fn):
|
||||
# template.
|
||||
extensions=['jinja2.ext.autoescape']
|
||||
)
|
||||
thread_locals.template_env.filters['parse_se_tags'] = parse_se_tags
|
||||
|
||||
if not fn:
|
||||
init_templates()
|
||||
|
@ -201,7 +201,7 @@ class PostContentHandler(xml.sax.ContentHandler):
|
||||
AnswerCount="3" CommentCount="1" FavoriteCount="3" />
|
||||
|
||||
"""
|
||||
TAGS_RE = re.compile(u'<([\w\d\-]+)>')
|
||||
TAGS_RE = re.compile(u'<([^>]+)>')
|
||||
|
||||
def __init__(self, site):
|
||||
self.site = site
|
||||
@ -385,7 +385,7 @@ class PostContentHandler(xml.sax.ContentHandler):
|
||||
doc['text'] = search_text
|
||||
|
||||
# serialise answers to JSON
|
||||
doc['answer-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
|
||||
doc['answers-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
|
||||
|
||||
# map other fields to search index doc
|
||||
doc['id'] = str(q['id'])
|
||||
@ -427,7 +427,7 @@ class PostContentHandler(xml.sax.ContentHandler):
|
||||
question_obj['closedDate'] = q['closedDate']
|
||||
question_obj['title'] = q['title']
|
||||
if 'tags' in q:
|
||||
question_obj['tags'] = q['tags']
|
||||
question_obj['tags'] = PostContentHandler.TAGS_RE.findall(q['tags'])
|
||||
question_obj['favoriteCount'] = q['favoriteCount']
|
||||
question_obj['comments'] = q['comments']
|
||||
|
||||
|
@ -29,8 +29,8 @@
|
||||
<p>vote{% if r.question.score != 1 %}s{% endif %}</p>
|
||||
</div>
|
||||
<div class="post-stat">
|
||||
<p class="post-stat-value">{{ r.answer|length }}</p>
|
||||
<p>answer{% if r.answer|length != 1 %}s{% endif %}</p>
|
||||
<p class="post-stat-value">{{ r.answers|length }}</p>
|
||||
<p>answer{% if r.answers|length != 1 %}s{% endif %}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="post-summary">
|
||||
@ -41,7 +41,7 @@
|
||||
<strong>{{ r.question.creationDate }}</strong>.
|
||||
</p>
|
||||
<div class="post-tags">
|
||||
{% for t in r.question.tags|parse_se_tags %}
|
||||
{% for t in r.question.tags %}
|
||||
<span class="label">{{ t }}</span>
|
||||
{% endfor %}
|
||||
</div>
|
||||
|
Loading…
Reference in New Issue
Block a user