0
0
Fork 0
bleve/search/scorer_term.go

173 lines
4.9 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"fmt"
"math"
"github.com/couchbaselabs/bleve/index"
)
const MAX_SCORE_CACHE = 64
type TermQueryScorer struct {
query *TermQuery
docTerm uint64
docTotal uint64
idf float64
explain bool
idfExplanation *Explanation
scoreCache map[int]float64
scoreExplanationCache map[int]*Explanation
queryNorm float64
queryWeight float64
queryWeightExplanation *Explanation
}
func NewTermQueryScorer(query *TermQuery, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
rv := TermQueryScorer{
query: query,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain,
scoreCache: make(map[int]float64, MAX_SCORE_CACHE),
scoreExplanationCache: make(map[int]*Explanation, MAX_SCORE_CACHE),
queryWeight: 1.0,
}
if explain {
rv.idfExplanation = &Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
}
}
return &rv
}
func (s *TermQueryScorer) Weight() float64 {
sum := s.query.Boost() * s.idf
return sum * sum
}
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.query.Boost() * s.idf * s.queryNorm
if s.explain {
childrenExplanations := make([]*Explanation, 3)
childrenExplanations[0] = &Explanation{
Value: s.query.Boost(),
Message: "boost",
}
childrenExplanations[1] = s.idfExplanation
childrenExplanations[2] = &Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.query.Field, string(s.query.Term), s.query.Boost()),
Children: childrenExplanations,
}
}
}
func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch {
var scoreExplanation *Explanation
// see if the score was cached
score, ok := s.scoreCache[int(termMatch.Freq)]
if !ok {
// need to compute score
var tf float64
if termMatch.Freq < MAX_SQRT_CACHE {
tf = SQRT_CACHE[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score = tf * termMatch.Norm * s.idf
if s.explain {
childrenExplanations := make([]*Explanation, 3)
childrenExplanations[0] = &Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.query.Field, string(s.query.Term), termMatch.Freq),
}
childrenExplanations[1] = &Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.query.Field, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.query.Field, string(s.query.Term), termMatch.ID),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
childExplanations := make([]*Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.query.Field, string(s.query.Term), s.query.Boost(), termMatch.ID),
Children: childExplanations,
}
}
}
if termMatch.Freq < MAX_SCORE_CACHE {
s.scoreCache[int(termMatch.Freq)] = score
if s.explain {
s.scoreExplanationCache[int(termMatch.Freq)] = scoreExplanation
}
}
}
if ok && s.explain {
scoreExplanation = s.scoreExplanationCache[int(termMatch.Freq)]
}
rv := DocumentMatch{
ID: termMatch.ID,
Score: score,
}
if s.explain {
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
locations := make(Locations, len(termMatch.Vectors))
for i, v := range termMatch.Vectors {
loc := Location{
Pos: float64(v.Pos),
Start: float64(v.Start),
End: float64(v.End),
}
locations[i] = &loc
}
tlm := make(TermLocationMap)
tlm[s.query.Term] = locations
rv.Locations = make(FieldTermLocationMap)
rv.Locations[s.query.Field] = tlm
}
return &rv
}