2014-04-17 22:55:53 +02:00
|
|
|
// Copyright (c) 2014 Couchbase, Inc.
|
2016-10-02 16:13:14 +02:00
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
2014-09-02 16:54:50 +02:00
|
|
|
|
2016-10-02 16:29:39 +02:00
|
|
|
package scorer
|
2014-04-17 22:55:53 +02:00
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"math"
|
|
|
|
|
2014-08-28 21:38:57 +02:00
|
|
|
"github.com/blevesearch/bleve/index"
|
2014-09-01 17:15:38 +02:00
|
|
|
"github.com/blevesearch/bleve/search"
|
2014-04-17 22:55:53 +02:00
|
|
|
)
|
|
|
|
|
|
|
|
type TermQueryScorer struct {
|
2014-07-30 18:30:38 +02:00
|
|
|
queryTerm string
|
|
|
|
queryField string
|
|
|
|
queryBoost float64
|
2014-04-17 22:55:53 +02:00
|
|
|
docTerm uint64
|
|
|
|
docTotal uint64
|
|
|
|
idf float64
|
2017-01-06 02:49:45 +01:00
|
|
|
options search.SearcherOptions
|
2014-09-01 17:15:38 +02:00
|
|
|
idfExplanation *search.Explanation
|
2014-04-17 22:55:53 +02:00
|
|
|
queryNorm float64
|
|
|
|
queryWeight float64
|
2014-09-01 17:15:38 +02:00
|
|
|
queryWeightExplanation *search.Explanation
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2017-01-06 02:49:45 +01:00
|
|
|
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
|
2014-04-17 22:55:53 +02:00
|
|
|
rv := TermQueryScorer{
|
2014-07-30 18:30:38 +02:00
|
|
|
queryTerm: queryTerm,
|
|
|
|
queryField: queryField,
|
|
|
|
queryBoost: queryBoost,
|
2014-07-11 20:45:32 +02:00
|
|
|
docTerm: docTerm,
|
|
|
|
docTotal: docTotal,
|
|
|
|
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
|
2017-01-06 02:49:45 +01:00
|
|
|
options: options,
|
2014-07-11 20:45:32 +02:00
|
|
|
queryWeight: 1.0,
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2017-01-06 02:49:45 +01:00
|
|
|
if options.Explain {
|
2014-09-01 17:15:38 +02:00
|
|
|
rv.idfExplanation = &search.Explanation{
|
2014-04-17 22:55:53 +02:00
|
|
|
Value: rv.idf,
|
|
|
|
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return &rv
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *TermQueryScorer) Weight() float64 {
|
2014-07-30 18:30:38 +02:00
|
|
|
sum := s.queryBoost * s.idf
|
2014-04-17 22:55:53 +02:00
|
|
|
return sum * sum
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
|
|
|
|
s.queryNorm = qnorm
|
|
|
|
|
|
|
|
// update the query weight
|
2014-07-30 18:30:38 +02:00
|
|
|
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2017-01-06 02:49:45 +01:00
|
|
|
if s.options.Explain {
|
2014-09-01 17:15:38 +02:00
|
|
|
childrenExplanations := make([]*search.Explanation, 3)
|
|
|
|
childrenExplanations[0] = &search.Explanation{
|
2014-07-30 18:30:38 +02:00
|
|
|
Value: s.queryBoost,
|
2014-04-17 22:55:53 +02:00
|
|
|
Message: "boost",
|
|
|
|
}
|
|
|
|
childrenExplanations[1] = s.idfExplanation
|
2014-09-01 17:15:38 +02:00
|
|
|
childrenExplanations[2] = &search.Explanation{
|
2014-04-17 22:55:53 +02:00
|
|
|
Value: s.queryNorm,
|
|
|
|
Message: "queryNorm",
|
|
|
|
}
|
2014-09-01 17:15:38 +02:00
|
|
|
s.queryWeightExplanation = &search.Explanation{
|
2014-04-17 22:55:53 +02:00
|
|
|
Value: s.queryWeight,
|
2014-07-30 18:30:38 +02:00
|
|
|
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost),
|
2014-04-17 22:55:53 +02:00
|
|
|
Children: childrenExplanations,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-08-09 04:21:47 +02:00
|
|
|
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
|
2014-09-01 17:15:38 +02:00
|
|
|
var scoreExplanation *search.Explanation
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2014-07-11 20:45:32 +02:00
|
|
|
// need to compute score
|
|
|
|
var tf float64
|
2014-09-03 23:48:40 +02:00
|
|
|
if termMatch.Freq < MaxSqrtCache {
|
|
|
|
tf = SqrtCache[int(termMatch.Freq)]
|
2014-07-11 20:45:32 +02:00
|
|
|
} else {
|
|
|
|
tf = math.Sqrt(float64(termMatch.Freq))
|
|
|
|
}
|
|
|
|
score := tf * termMatch.Norm * s.idf
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2017-01-06 02:49:45 +01:00
|
|
|
if s.options.Explain {
|
2014-09-01 17:15:38 +02:00
|
|
|
childrenExplanations := make([]*search.Explanation, 3)
|
|
|
|
childrenExplanations[0] = &search.Explanation{
|
2014-07-11 20:45:32 +02:00
|
|
|
Value: tf,
|
2014-07-30 18:30:38 +02:00
|
|
|
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
2014-09-01 17:15:38 +02:00
|
|
|
childrenExplanations[1] = &search.Explanation{
|
2014-07-11 20:45:32 +02:00
|
|
|
Value: termMatch.Norm,
|
2014-07-30 18:30:38 +02:00
|
|
|
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
2014-07-11 20:45:32 +02:00
|
|
|
childrenExplanations[2] = s.idfExplanation
|
2014-09-01 17:15:38 +02:00
|
|
|
scoreExplanation = &search.Explanation{
|
2014-07-11 20:45:32 +02:00
|
|
|
Value: score,
|
2014-07-30 18:30:38 +02:00
|
|
|
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
|
2014-07-11 20:45:32 +02:00
|
|
|
Children: childrenExplanations,
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-07-11 20:45:32 +02:00
|
|
|
// if the query weight isn't 1, multiply
|
|
|
|
if s.queryWeight != 1.0 {
|
|
|
|
score = score * s.queryWeight
|
2017-01-06 02:49:45 +01:00
|
|
|
if s.options.Explain {
|
2014-09-01 17:15:38 +02:00
|
|
|
childExplanations := make([]*search.Explanation, 2)
|
2014-07-11 20:45:32 +02:00
|
|
|
childExplanations[0] = s.queryWeightExplanation
|
|
|
|
childExplanations[1] = scoreExplanation
|
2014-09-01 17:15:38 +02:00
|
|
|
scoreExplanation = &search.Explanation{
|
2014-07-11 20:45:32 +02:00
|
|
|
Value: score,
|
2014-07-30 18:30:38 +02:00
|
|
|
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
|
2014-07-11 20:45:32 +02:00
|
|
|
Children: childExplanations,
|
|
|
|
}
|
|
|
|
}
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2016-08-09 04:21:47 +02:00
|
|
|
rv := ctx.DocumentMatchPool.Get()
|
2016-08-03 23:01:27 +02:00
|
|
|
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
|
2016-07-21 01:29:20 +02:00
|
|
|
rv.Score = score
|
2017-01-06 02:49:45 +01:00
|
|
|
if s.options.Explain {
|
2014-04-17 22:55:53 +02:00
|
|
|
rv.Expl = scoreExplanation
|
|
|
|
}
|
|
|
|
|
|
|
|
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
|
2017-01-07 21:34:06 +01:00
|
|
|
locs := make([]search.Location, len(termMatch.Vectors))
|
|
|
|
locsUsed := 0
|
|
|
|
|
|
|
|
totalPositions := 0
|
|
|
|
for _, v := range termMatch.Vectors {
|
|
|
|
totalPositions += len(v.ArrayPositions)
|
|
|
|
}
|
|
|
|
positions := make([]float64, totalPositions)
|
|
|
|
positionsUsed := 0
|
2014-07-21 23:05:55 +02:00
|
|
|
|
2014-09-01 17:15:38 +02:00
|
|
|
rv.Locations = make(search.FieldTermLocationMap)
|
2014-07-21 23:05:55 +02:00
|
|
|
for _, v := range termMatch.Vectors {
|
|
|
|
tlm := rv.Locations[v.Field]
|
|
|
|
if tlm == nil {
|
2014-09-01 17:15:38 +02:00
|
|
|
tlm = make(search.TermLocationMap)
|
2017-01-07 21:34:06 +01:00
|
|
|
rv.Locations[v.Field] = tlm
|
2014-07-21 23:05:55 +02:00
|
|
|
}
|
|
|
|
|
2017-01-07 21:34:06 +01:00
|
|
|
loc := &locs[locsUsed]
|
|
|
|
locsUsed++
|
|
|
|
|
|
|
|
loc.Pos = float64(v.Pos)
|
|
|
|
loc.Start = float64(v.Start)
|
|
|
|
loc.End = float64(v.End)
|
2014-07-21 23:05:55 +02:00
|
|
|
|
2015-07-31 17:16:11 +02:00
|
|
|
if len(v.ArrayPositions) > 0 {
|
2017-01-08 07:14:22 +01:00
|
|
|
loc.ArrayPositions = positions[positionsUsed : positionsUsed+len(v.ArrayPositions)]
|
2015-07-31 17:16:11 +02:00
|
|
|
for i, ap := range v.ArrayPositions {
|
|
|
|
loc.ArrayPositions[i] = float64(ap)
|
|
|
|
}
|
2017-01-07 21:34:06 +01:00
|
|
|
positionsUsed += len(v.ArrayPositions)
|
2015-07-31 17:16:11 +02:00
|
|
|
}
|
|
|
|
|
2017-01-07 21:34:06 +01:00
|
|
|
tlm[s.queryTerm] = append(tlm[s.queryTerm], loc)
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-21 01:29:20 +02:00
|
|
|
return rv
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|