0
0
Fork 0

refactor searchers

- TermSearcher has alternate constructor if term is []byte, this can avoid
  copying in some cases.  TermScorer updated to accept []byte term. Also
  removed a few struct fields which were not being used.

- New MultiTermSearcher searches for documents containing any of a list of
  terms.  Current implementation simply uses DisjunctionSearcher.

- Several other searcher constructors now simply build a list of terms and
  then delegate to the MultiTermSearcher
  - NewPrefixSearcher
  - NewRegexpSearcher
  - NewFuzzySearcher
  - NewNumericRangeSearcher

- NewGeoBoundingBoxSearcher and NewGeoPointDistanceSearcher make use of
  the MultiTermSearcher internally, and follow the pattern of returning
  an existing search.Searcher, as opposed to their own wrapping struct.

- Callback filter functions used in NewGeoBoundingBoxSearcher and
  NewGeoPointDistanceSearcher have been extracted into separate functions
  which makes the code much easier to read.
This commit is contained in:
Marty Schoch 2017-03-31 16:58:06 -04:00
parent 0d41e80b66
commit f8fdfebb6c
10 changed files with 281 additions and 502 deletions

View File

@ -23,7 +23,7 @@ import (
)
type TermQueryScorer struct {
queryTerm string
queryTerm []byte
queryField string
queryBoost float64
docTerm uint64
@ -36,7 +36,7 @@ type TermQueryScorer struct {
queryWeightExplanation *search.Explanation
}
func NewTermQueryScorer(queryTerm string, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: queryTerm,
queryField: queryField,
@ -174,7 +174,7 @@ func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.Term
positionsUsed += len(v.ArrayPositions)
}
tlm[s.queryTerm] = append(tlm[s.queryTerm], loc)
tlm[string(s.queryTerm)] = append(tlm[string(s.queryTerm)], loc)
}
}

View File

@ -27,7 +27,7 @@ func TestTermScorer(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = "beer"
var queryTerm = []byte("beer")
var queryField = "desc"
var queryBoost = 1.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, search.SearcherOptions{Explain: true})
@ -168,7 +168,7 @@ func TestTermScorerWithQueryNorm(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = "beer"
var queryTerm = []byte("beer")
var queryField = "desc"
var queryBoost = 3.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, search.SearcherOptions{Explain: true})

View File

@ -19,17 +19,9 @@ import (
"github.com/blevesearch/bleve/search"
)
type FuzzySearcher struct {
indexReader index.IndexReader
term string
prefix int
fuzziness int
field string
options search.SearcherOptions
searcher *DisjunctionSearcher
}
func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzziness int, field string, boost float64, options search.SearcherOptions) (*FuzzySearcher, error) {
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
@ -40,46 +32,18 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
}
}
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness, field, prefixTerm)
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return &FuzzySearcher{
indexReader: indexReader,
term: term,
prefix: prefix,
fuzziness: fuzziness,
field: field,
options: options,
searcher: searcher,
}, nil
return NewMultiTermSearcher(indexReader, candidateTerms, field,
boost, options)
}
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzziness int, field, prefixTerm string) (rv []string, err error) {
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
@ -108,36 +72,3 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string, fuzzine
return rv, err
}
func (s *FuzzySearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *FuzzySearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *FuzzySearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *FuzzySearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *FuzzySearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *FuzzySearcher) Close() error {
return s.searcher.Close()
}
func (s *FuzzySearcher) Min() int {
return 0
}
func (s *FuzzySearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

View File

@ -22,155 +22,76 @@ import (
"github.com/blevesearch/bleve/search"
)
type GeoBoundingBoxSearcher struct {
indexReader index.IndexReader
field string
minLon float64
minLat float64
maxLon float64
maxLat float64
options search.SearcherOptions
rangeBounds []*geoRange
searcher *DisjunctionSearcher
}
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
*GeoBoundingBoxSearcher, error) {
search.Searcher, error) {
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
rv := &GeoBoundingBoxSearcher{
indexReader: indexReader,
minLon: minLon,
minLat: minLat,
maxLon: maxLon,
maxLat: maxLat,
field: field,
options: options,
}
rv.computeRange(0, (geo.GeoBits<<1)-1)
var termsOnBoundary []search.Searcher
var termsNotOnBoundary []search.Searcher
for _, r := range rv.rangeBounds {
ts, err := NewTermSearcher(indexReader, string(r.cell), field, 1.0, options)
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := computeRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
var onBoundarySearcher search.Searcher
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(indexReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader,
notOnBoundaryTerms, field, boost, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
if r.boundary && checkBoundaries {
termsOnBoundary = append(termsOnBoundary, ts)
} else {
termsNotOnBoundary = append(termsNotOnBoundary, ts)
}
openedSearchers = append(openedSearchers)
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
var filterOnBoundarySearcher search.Searcher
if len(termsOnBoundary) > 0 {
onBoundarySearcher, err := NewDisjunctionSearcher(indexReader,
termsOnBoundary, 0, options)
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
filterOnBoundarySearcher = NewFilteringSearcher(onBoundarySearcher,
func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err = indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
var shift uint
shift, err = prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
}
return false
})
openedSearchers = append(openedSearchers, filterOnBoundarySearcher)
}
notOnBoundarySearcher, err := NewDisjunctionSearcher(indexReader,
termsNotOnBoundary, 0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
// if there is no filterOnBoundary searcher,
// just return the notOnBoundarySearcher
if filterOnBoundarySearcher == nil {
rv.searcher = notOnBoundarySearcher
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
rv.searcher, err = NewDisjunctionSearcher(indexReader,
[]search.Searcher{filterOnBoundarySearcher, notOnBoundarySearcher},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
}
func (s *GeoBoundingBoxSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *GeoBoundingBoxSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *GeoBoundingBoxSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *GeoBoundingBoxSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *GeoBoundingBoxSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *GeoBoundingBoxSearcher) Close() error {
return s.searcher.Close()
}
func (s *GeoBoundingBoxSearcher) Min() int {
return 0
}
func (s *GeoBoundingBoxSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
return NewMatchNoneSearcher(indexReader)
}
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func (s *GeoBoundingBoxSearcher) computeRange(term uint64, shift uint) {
func computeRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
@ -179,11 +100,19 @@ func (s *GeoBoundingBoxSearcher) computeRange(term uint64, shift uint) {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
s.relateAndRecurse(term, lowerMax, shift)
s.relateAndRecurse(split, upperMax, shift)
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
func (s *GeoBoundingBoxSearcher) relateAndRecurse(start, end uint64, res uint) {
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
@ -193,29 +122,52 @@ func (s *GeoBoundingBoxSearcher) relateAndRecurse(start, end uint64, res uint) {
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
s.minLon, s.minLat, s.maxLon, s.maxLat)
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
s.minLon, s.minLat, s.maxLon, s.maxLat)) {
s.rangeBounds = append(s.rangeBounds,
newGeoRange(start, res, level, !within))
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
s.minLon, s.minLat, s.maxLon, s.maxLat) {
s.computeRange(start, res-1)
sminLon, sminLat, smaxLon, smaxLat) {
return computeRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
}
return nil, nil
}
type geoRange struct {
cell []byte
level uint
boundary bool
}
func newGeoRange(lower uint64, res uint, level uint, boundary bool) *geoRange {
return &geoRange{
level: level,
boundary: boundary,
cell: numeric.MustNewPrefixCodedInt64(int64(lower), res),
func buildRectFilter(indexReader index.IndexReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
if err == nil && found {
return geo.BoundingBoxContains(lon, lat,
minLon, minLat, maxLon, maxLat)
}
return false
}
}

View File

@ -21,36 +21,34 @@ import (
"github.com/blevesearch/bleve/search"
)
type GeoPointDistanceSearcher struct {
indexReader index.IndexReader
field string
centerLon float64
centerLat float64
dist float64
options search.SearcherOptions
searcher *FilteringSearcher
}
func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (*GeoPointDistanceSearcher, error) {
rv := &GeoPointDistanceSearcher{
indexReader: indexReader,
centerLon: centerLon,
centerLat: centerLat,
dist: dist,
field: field,
options: options,
}
options search.SearcherOptions) (search.Searcher, error) {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat :=
geo.ComputeBoundingBox(centerLon, centerLat, dist)
var boxSearcher search.Searcher
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options)
if err != nil {
return nil, err
}
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher,
buildDistFilter(indexReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
// if the desired box crosses the dateline, it is automatically split into
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
@ -67,86 +65,51 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
return nil, err
}
boxSearcher, err = NewDisjunctionSearcher(indexReader,
boxSearcher, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{leftSearcher, rightSearcher}, 0, options)
if err != nil {
_ = leftSearcher.Close()
_ = rightSearcher.Close()
return nil, err
}
} else {
// build geoboundinggox searcher for that bounding box
var err error
boxSearcher, err = NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
// wrap it in a filtering searcher which checks the actual distance
rv.searcher = NewFilteringSearcher(boxSearcher,
func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
func buildDistFilter(indexReader index.IndexReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := indexReader.DocumentVisitFieldTerms(d.IndexInternalID,
[]string{field}, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
})
if err == nil && found {
dist := geo.Haversin(lon, lat, rv.centerLon, rv.centerLat)
if dist <= rv.dist/1000 {
return true
}
})
if err == nil && found {
dist := geo.Haversin(lon, lat, centerLon, centerLat)
if dist <= maxDist/1000 {
return true
}
return false
})
return rv, nil
}
func (s *GeoPointDistanceSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *GeoPointDistanceSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *GeoPointDistanceSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *GeoPointDistanceSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *GeoPointDistanceSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *GeoPointDistanceSearcher) Close() error {
return s.searcher.Close()
}
func (s *GeoPointDistanceSearcher) Min() int {
return 0
}
func (s *GeoPointDistanceSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}
return false
}
}

View File

@ -0,0 +1,78 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return searcher, nil
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcherBytes(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return searcher, nil
}

View File

@ -23,16 +23,9 @@ import (
"github.com/blevesearch/bleve/search"
)
type NumericRangeSearcher struct {
indexReader index.IndexReader
min *float64
max *float64
field string
options search.SearcherOptions
searcher *DisjunctionSearcher
}
func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string, boost float64, options search.SearcherOptions) (*NumericRangeSearcher, error) {
func NewNumericRangeSearcher(indexReader index.IndexReader,
min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
@ -65,61 +58,8 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *f
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr()
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, string(term), field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return &NumericRangeSearcher{
indexReader: indexReader,
min: min,
max: max,
field: field,
options: options,
searcher: searcher,
}, nil
}
func (s *NumericRangeSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *NumericRangeSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *NumericRangeSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *NumericRangeSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *NumericRangeSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *NumericRangeSearcher) Close() error {
return s.searcher.Close()
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options)
}
type termRange struct {
@ -190,7 +130,8 @@ func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound || lowerWrapped || upperWrapped {
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
@ -225,11 +166,3 @@ func newRangeBytes(minBytes, maxBytes []byte) *termRange {
endTerm: maxBytes,
}
}
func (s *NumericRangeSearcher) Min() int {
return 0
}
func (s *NumericRangeSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
}

View File

@ -26,7 +26,9 @@ import (
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
prefixTerm, complete := pattern.LiteralPrefix()
var candidateTerms []string
@ -35,39 +37,19 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, fi
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm)
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
for _, cterm := range candidateTerms {
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return searcher, err
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options)
}
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {

View File

@ -22,12 +22,9 @@ import (
type TermSearcher struct {
indexReader index.IndexReader
term string
field string
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
options search.SearcherOptions
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
@ -40,12 +37,27 @@ func NewTermSearcher(indexReader index.IndexReader, term string, field string, b
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer([]byte(term), field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
reader, err := indexReader.TermFieldReader(term, field, true, true, options.IncludeTermVectors)
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
term: term,
field: field,
options: options,
reader: reader,
scorer: scorer,
}, nil

View File

@ -19,93 +19,21 @@ import (
"github.com/blevesearch/bleve/search"
)
type TermPrefixSearcher struct {
indexReader index.IndexReader
prefix string
field string
options search.SearcherOptions
searcher *DisjunctionSearcher
}
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, options search.SearcherOptions) (*TermPrefixSearcher, error) {
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
if err != nil {
return nil, err
}
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
qsearchersClose := func() {
for _, searcher := range qsearchers {
_ = searcher.Close()
}
}
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
var qsearcher *TermSearcher
qsearcher, err = NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, options)
if err != nil {
qsearchersClose()
_ = fieldDict.Close()
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
err = fieldDict.Close()
if err != nil {
qsearchersClose()
return nil, err
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return &TermPrefixSearcher{
indexReader: indexReader,
prefix: prefix,
field: field,
options: options,
searcher: searcher,
}, nil
}
func (s *TermPrefixSearcher) Count() uint64 {
return s.searcher.Count()
}
func (s *TermPrefixSearcher) Weight() float64 {
return s.searcher.Weight()
}
func (s *TermPrefixSearcher) SetQueryNorm(qnorm float64) {
s.searcher.SetQueryNorm(qnorm)
}
func (s *TermPrefixSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return s.searcher.Next(ctx)
}
func (s *TermPrefixSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return s.searcher.Advance(ctx, ID)
}
func (s *TermPrefixSearcher) Close() error {
return s.searcher.Close()
}
func (s *TermPrefixSearcher) Min() int {
return 0
}
func (s *TermPrefixSearcher) DocumentMatchPoolSize() int {
return s.searcher.DocumentMatchPoolSize()
return NewMultiTermSearcher(indexReader, terms, field, boost, options)
}