mirror of
https://github.com/djohnlewis/stackdump
synced 2024-12-04 23:17:37 +00:00
Fixed #7. Turns out post IDs are not unique across sites.
This change will require re-indexing of all sites unfortunately. On the upside, more questions to browse!
This commit is contained in:
parent
cdb93e6f68
commit
01f9b10c27
@ -110,6 +110,10 @@
|
|||||||
|
|
||||||
<!-- we'll get the values out of the JSON, so most fields are not stored -->
|
<!-- we'll get the values out of the JSON, so most fields are not stored -->
|
||||||
<!-- fields are listed here so searches can be performed against them -->
|
<!-- fields are listed here so searches can be performed against them -->
|
||||||
|
<!-- this is used by Lucene to uniquely identify a post across all sites.
|
||||||
|
It is of the form "siteKey-id" and is necessary because post IDs are
|
||||||
|
reused across sites. -->
|
||||||
|
<field name="documentId" type="string" indexed="true" stored="true" required="true" />
|
||||||
<!-- the ID field needs to be a string for the QueryElevationComponent -->
|
<!-- the ID field needs to be a string for the QueryElevationComponent -->
|
||||||
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
<field name="id" type="string" indexed="true" stored="true" required="true" />
|
||||||
<field name="siteKey" type="string" indexed="true" stored="true" required="true" />
|
<field name="siteKey" type="string" indexed="true" stored="true" required="true" />
|
||||||
@ -196,7 +200,7 @@
|
|||||||
<!-- Field to use to determine and enforce document uniqueness.
|
<!-- Field to use to determine and enforce document uniqueness.
|
||||||
Unless this field is marked with required="false", it will be a required field
|
Unless this field is marked with required="false", it will be a required field
|
||||||
-->
|
-->
|
||||||
<uniqueKey>id</uniqueKey>
|
<uniqueKey>documentId</uniqueKey>
|
||||||
|
|
||||||
<!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
|
<!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
|
||||||
parsing a query string that isn't explicit about the field. Machine (non-user)
|
parsing a query string that isn't explicit about the field. Machine (non-user)
|
||||||
|
@ -459,6 +459,9 @@ class PostContentHandler(xml.sax.ContentHandler):
|
|||||||
doc['answers-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
|
doc['answers-json'] = [ json.dumps(a, default=self.json_default_handler) for a in q['answers'] ]
|
||||||
|
|
||||||
# map other fields to search index doc
|
# map other fields to search index doc
|
||||||
|
# this is the ID for Solr to uniquely identify this question across all
|
||||||
|
# sites
|
||||||
|
doc['documentId'] = self.site.key + '-' + str(q['id'])
|
||||||
doc['id'] = str(q['id'])
|
doc['id'] = str(q['id'])
|
||||||
doc['siteKey'] = self.site.key
|
doc['siteKey'] = self.site.key
|
||||||
doc['creationDate'] = q['creationDate']
|
doc['creationDate'] = q['creationDate']
|
||||||
|
Loading…
Reference in New Issue
Block a user