1
0
mirror of https://github.com/djohnlewis/stackdump synced 2024-12-04 06:57:36 +00:00

Fixed #7. Turns out post IDs are not unique across sites.

This change will require re-indexing of all sites unfortunately. On the upside, more questions to browse!
This commit is contained in:
Samuel Lai 2014-02-27 17:57:34 +11:00
parent cdb93e6f68
commit 01f9b10c27
2 changed files with 8 additions and 1 deletions

View File

@ -110,6 +110,10 @@
<!-- we'll get the values out of the JSON, so most fields are not stored -->
<!-- fields are listed here so searches can be performed against them -->
<!-- this is used by Lucene to uniquely identify a post across all sites.
It is of the form "siteKey-id" and is necessary because post IDs are
reused across sites. -->
<field name="documentId" type="string" indexed="true" stored="true" required="true" />
<!-- the ID field needs to be a string for the QueryElevationComponent -->
<field name="id" type="string" indexed="true" stored="true" required="true" />
<field name="siteKey" type="string" indexed="true" stored="true" required="true" />
@ -196,7 +200,7 @@
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>id</uniqueKey>
<uniqueKey>documentId</uniqueKey>
<!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
parsing a query string that isn't explicit about the field. Machine (non-user)

View File

@ -459,6 +459,9 @@ class PostContentHandler(xml.sax.ContentHandler):
doc['answers-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
# map other fields to search index doc
# this is the ID for Solr to uniquely identify this question across all
# sites
doc['documentId'] = self.site.key + '-' + str(q['id'])
doc['id'] = str(q['id'])
doc['siteKey'] = self.site.key
doc['creationDate'] = q['creationDate']