0
0
Fork 0
bleve/search/scorers/scorer_term.go

172 lines
4.7 KiB
Go
Raw Normal View History

2014-04-17 22:55:53 +02:00
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package scorers
2014-04-17 22:55:53 +02:00
import (
"fmt"
"math"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
2014-04-17 22:55:53 +02:00
)
const MAX_SCORE_CACHE = 64
type TermQueryScorer struct {
queryTerm string
queryField string
queryBoost float64
2014-04-17 22:55:53 +02:00
docTerm uint64
docTotal uint64
idf float64
explain bool
idfExplanation *search.Explanation
2014-04-17 22:55:53 +02:00
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
2014-04-17 22:55:53 +02:00
}
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, explain bool) *TermQueryScorer {
2014-04-17 22:55:53 +02:00
rv := TermQueryScorer{
queryTerm: queryTerm,
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
explain: explain,
queryWeight: 1.0,
2014-04-17 22:55:53 +02:00
}
if explain {
rv.idfExplanation = &search.Explanation{
2014-04-17 22:55:53 +02:00
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
}
}
return &rv
}
func (s *TermQueryScorer) Weight() float64 {
sum := s.queryBoost * s.idf
2014-04-17 22:55:53 +02:00
return sum * sum
}
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
2014-04-17 22:55:53 +02:00
if s.explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost,
2014-04-17 22:55:53 +02:00
Message: "boost",
}
childrenExplanations[1] = s.idfExplanation
childrenExplanations[2] = &search.Explanation{
2014-04-17 22:55:53 +02:00
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
2014-04-17 22:55:53 +02:00
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, string(s.queryTerm), s.queryBoost),
2014-04-17 22:55:53 +02:00
Children: childrenExplanations,
}
}
}
func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *search.DocumentMatch {
var scoreExplanation *search.Explanation
2014-04-17 22:55:53 +02:00
// need to compute score
var tf float64
if termMatch.Freq < MAX_SQRT_CACHE {
tf = SQRT_CACHE[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score := tf * termMatch.Norm * s.idf
2014-04-17 22:55:53 +02:00
if s.explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, string(s.queryTerm), termMatch.Freq),
2014-04-17 22:55:53 +02:00
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
2014-04-17 22:55:53 +02:00
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, string(s.queryTerm), termMatch.ID),
Children: childrenExplanations,
2014-04-17 22:55:53 +02:00
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, string(s.queryTerm), s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
2014-04-17 22:55:53 +02:00
}
rv := search.DocumentMatch{
2014-04-17 22:55:53 +02:00
ID: termMatch.ID,
Score: score,
}
if s.explain {
rv.Expl = scoreExplanation
}
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
2014-07-21 23:05:55 +02:00
rv.Locations = make(search.FieldTermLocationMap)
2014-07-21 23:05:55 +02:00
for _, v := range termMatch.Vectors {
tlm := rv.Locations[v.Field]
if tlm == nil {
tlm = make(search.TermLocationMap)
2014-07-21 23:05:55 +02:00
}
loc := search.Location{
2014-04-17 22:55:53 +02:00
Pos: float64(v.Pos),
Start: float64(v.Start),
End: float64(v.End),
}
2014-07-21 23:05:55 +02:00
locations := tlm[s.queryTerm]
2014-07-21 23:05:55 +02:00
if locations == nil {
locations = make(search.Locations, 1)
2014-07-21 23:05:55 +02:00
locations[0] = &loc
} else {
locations = append(locations, &loc)
}
tlm[s.queryTerm] = locations
2014-07-21 23:05:55 +02:00
rv.Locations[v.Field] = tlm
2014-04-17 22:55:53 +02:00
}
2014-07-21 23:05:55 +02:00
2014-04-17 22:55:53 +02:00
}
return &rv
}