Skip to content

Commit 2e71aad

Browse files
authored
ranking: parametrize contribution of document ranks (#479)
We tune the contribution of document ranks to the final ranking with a damping factor. The zero value of the damping factor leads to an equal contribution of scores and ranks, while a damping factor of 1 effectively eliminates the influence of document ranks. The default is 0 IE equal contribution.
1 parent 8645eff commit 2e71aad

File tree

3 files changed

+44
-14
lines changed

3 files changed

+44
-14
lines changed

api.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,11 @@ type SearchOptions struct {
803803
// sorting matches.
804804
UseDocumentRanks bool
805805

806+
// RanksDampingFactor determines the contribution of documents ranks to the
807+
// final ranking based on RRF. A value in (0,1] reduces the contribution,
808+
// while a value in (-inf,0) increases it.
809+
RanksDampingFactor float64
810+
806811
// Trace turns on opentracing for this request if true and if the Jaeger address was provided as
807812
// a command-line flag
808813
Trace bool

contentprovider.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -900,7 +900,7 @@ func SortFiles(ms []FileMatch, opts *SearchOptions) {
900900
sort.Stable(fileMatchesByRank{fileMatches: ms, rrfScore: rrfScore})
901901

902902
for i := range rrfScore {
903-
rrfScore[i] += 1 / (k + float64(i))
903+
rrfScore[i] += (1 - opts.RanksDampingFactor) / (k + float64(i))
904904
if opts.DebugScore {
905905
ms[i].Debug += fmt.Sprintf("%d), ", i)
906906
}

contentprovider_test.go

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package zoekt
33
import (
44
"bytes"
55
"fmt"
6+
"math"
67
"testing"
78

89
"github.com/google/go-cmp/cmp"
@@ -335,22 +336,46 @@ func TestSortFiles(t *testing.T) {
335336
{FileName: "d4", Score: 1, Ranks: []float64{0.5}},
336337
}
337338

338-
// Document RRF(Score) RRF(Ranks) SUM Rank
339-
// d3 1/(60+1) 1/(60+0) 0,0330601092896175 0
340-
// d2 1/(60+0) 1/(60+3) 0,0325396825396826 1
341-
// d1 1/(60+2) 1/(60+1) 0,0325224748810153 2
342-
// d4 1/(60+3) 1/(60+2) 0,0320020481310804 3
339+
cases := []struct {
340+
name string
341+
dampingFactor float64
342+
wantOrder []string
343+
}{
344+
// Document RRF(Score) RRF(Ranks) SUM Rank
345+
// d3 1/(60+1) 1/(60+0) 0,0330601092896175 0
346+
// d2 1/(60+0) 1/(60+3) 0,0325396825396826 1
347+
// d1 1/(60+2) 1/(60+1) 0,0325224748810153 2
348+
// d4 1/(60+3) 1/(60+2) 0,0320020481310804 3
349+
{
350+
"equal",
351+
0,
352+
[]string{"d3", "d2", "d1", "d4"},
353+
},
354+
{
355+
"scores only",
356+
1,
357+
[]string{"d2", "d3", "d1", "d4"},
358+
},
359+
{
360+
"ranks only",
361+
math.Inf(-1),
362+
[]string{"d3", "d1", "d4", "d2"},
363+
},
364+
}
343365

344-
SortFiles(in, &SearchOptions{UseDocumentRanks: true, DebugScore: true})
366+
for _, tt := range cases {
367+
t.Run("", func(t *testing.T) {
345368

346-
wantOrder := []string{"d3", "d2", "d1", "d4"}
369+
SortFiles(in, &SearchOptions{UseDocumentRanks: true, DebugScore: true, RanksDampingFactor: tt.dampingFactor})
347370

348-
var haveOrder = []string{}
349-
for _, f := range in {
350-
haveOrder = append(haveOrder, f.FileName)
351-
}
371+
var haveOrder = []string{}
372+
for _, f := range in {
373+
haveOrder = append(haveOrder, f.FileName)
374+
}
352375

353-
if d := cmp.Diff(wantOrder, haveOrder); d != "" {
354-
t.Fatalf("-want, +got\n%s\n", d)
376+
if d := cmp.Diff(tt.wantOrder, haveOrder); d != "" {
377+
t.Fatalf("-want, +got\n%s\n", d)
378+
}
379+
})
355380
}
356381
}

0 commit comments

Comments
 (0)