Skip to content

Commit 9958642

Browse files
wolfogreGiteaBot
andauthored
Fix issues indexer document mapping (#25619)
Fix regression of #5363 (so long ago). The old code definded a document mapping for `issueIndexerDocType`, and assigned it to `BleveIndexerData` as its type. (`BleveIndexerData` has been renamed to `IndexerData` in #25174, but nothing more.) But the old code never used `BleveIndexerData`, it wrote the index with an anonymous struct type. Nonetheless, bleve would use the default auto-mapping for struct it didn't know, so the indexer still worked. This means the custom document mapping was always dead code. The custom document mapping is not useless, it can reduce index storage, this PR brings it back and disable default mapping to prevent it from happening again. Since `IndexerData`(`BleveIndexerData`) has JSON tags, and bleve uses them first, so we should use `repo_id` as the field name instead of `RepoID`. I did a test to compare the storage size before and after this, with about 3k real comments that were migrated from some public repos. Before: ```text [ 160] . ├── [ 42] index_meta.json ├── [ 13] rupture_meta.json └── [ 128] store ├── [6.9M] 00000000005d.zap └── [256K] root.bolt ``` After: ```text [ 160] . ├── [ 42] index_meta.json ├── [ 13] rupture_meta.json └── [ 128] store ├── [3.5M] 000000000065.zap └── [256K] root.bolt ``` It saves about half the storage space. --------- Co-authored-by: Giteabot <[email protected]>
1 parent dae022a commit 9958642

File tree

1 file changed

+12
-20
lines changed

1 file changed

+12
-20
lines changed

modules/indexer/issues/bleve/bleve.go

Lines changed: 12 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import (
2323
const (
2424
issueIndexerAnalyzer = "issueIndexer"
2525
issueIndexerDocType = "issueIndexerDocType"
26-
issueIndexerLatestVersion = 2
26+
issueIndexerLatestVersion = 3
2727
)
2828

2929
// numericEqualityQuery a numeric equality query for the given value and field
@@ -67,15 +67,16 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
6767
docMapping := bleve.NewDocumentMapping()
6868

6969
numericFieldMapping := bleve.NewNumericFieldMapping()
70+
numericFieldMapping.Store = false
7071
numericFieldMapping.IncludeInAll = false
71-
docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
72+
docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)
7273

7374
textFieldMapping := bleve.NewTextFieldMapping()
7475
textFieldMapping.Store = false
7576
textFieldMapping.IncludeInAll = false
76-
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
77-
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
78-
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
77+
docMapping.AddFieldMappingsAt("title", textFieldMapping)
78+
docMapping.AddFieldMappingsAt("content", textFieldMapping)
79+
docMapping.AddFieldMappingsAt("comments", textFieldMapping)
7980

8081
if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
8182
return nil, err
@@ -91,6 +92,7 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
9192
mapping.DefaultAnalyzer = issueIndexerAnalyzer
9293
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
9394
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
95+
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs
9496

9597
return mapping, nil
9698
}
@@ -116,17 +118,7 @@ func NewIndexer(indexDir string) *Indexer {
116118
func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error {
117119
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
118120
for _, issue := range issues {
119-
if err := batch.Index(indexer_internal.Base36(issue.ID), struct {
120-
RepoID int64
121-
Title string
122-
Content string
123-
Comments []string
124-
}{
125-
RepoID: issue.RepoID,
126-
Title: issue.Title,
127-
Content: issue.Content,
128-
Comments: issue.Comments,
129-
}); err != nil {
121+
if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
130122
return err
131123
}
132124
}
@@ -149,7 +141,7 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
149141
func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) {
150142
var repoQueriesP []*query.NumericRangeQuery
151143
for _, repoID := range repoIDs {
152-
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID"))
144+
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "repo_id"))
153145
}
154146
repoQueries := make([]query.Query, len(repoQueriesP))
155147
for i, v := range repoQueriesP {
@@ -159,9 +151,9 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l
159151
indexerQuery := bleve.NewConjunctionQuery(
160152
bleve.NewDisjunctionQuery(repoQueries...),
161153
bleve.NewDisjunctionQuery(
162-
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
163-
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
164-
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
154+
newMatchPhraseQuery(keyword, "title", issueIndexerAnalyzer),
155+
newMatchPhraseQuery(keyword, "content", issueIndexerAnalyzer),
156+
newMatchPhraseQuery(keyword, "comments", issueIndexerAnalyzer),
165157
))
166158
search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
167159
search.SortBy([]string{"-_score"})

0 commit comments

Comments
 (0)