0
0
Fork 0
bleve/search/search.go

144 lines
4.4 KiB
Go
Raw Normal View History

2014-04-17 22:55:53 +02:00
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
2014-04-17 22:55:53 +02:00
package search
import (
"fmt"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
)
major refactor of index/search API index id's are now opaque (until finally returned to top-level user) - the TermFieldDoc's returned by TermFieldReader no longer contain doc id - instead they return an opaque IndexInternalID - items returned are still in the "natural index order" - but that is no longer guaranteed to be "doc id order" - correct behavior requires that they all follow the same order - but not any particular order - new API FinalizeDocID which converts index internal ID's to public string ID - APIs used internally which previously took doc id now take IndexInternalID - that is DocumentFieldTerms() and DocumentFieldTermsForFields() - however, APIs that are used externally do not reflect this change - that is Document() - DocumentIDReader follows the same changes, but this is less obvious - behavior clarified, used to iterate doc ids, BUT NOT in doc id order - method STILL available to iterate doc ids in range - but again, you won't get them in any meaningful order - new method to iterate actual doc ids from list of possible ids - this was introduced to make the DocIDSearcher continue working searchers now work with the new opaque index internal doc ids - they return new DocumentMatchInternal (which does not have string ID) scorerers also work with these opaque index internal doc ids - they return DocumentMatchInternal (which does not have string ID) collectors now also perform a final step of converting the final result - they STILL return traditional DocumentMatch (with string ID) - but they now also require an IndexReader (so that they can do the conversion)
2016-07-31 19:46:18 +02:00
2014-04-17 22:55:53 +02:00
type Location struct {
Pos float64 `json:"pos"`
Start float64 `json:"start"`
End float64 `json:"end"`
ArrayPositions []float64 `json:"array_positions"`
2014-04-17 22:55:53 +02:00
}
// SameArrayElement returns true if two locations are point to
// the same array element
func (l *Location) SameArrayElement(other *Location) bool {
if len(l.ArrayPositions) != len(other.ArrayPositions) {
return false
}
for i, elem := range l.ArrayPositions {
if other.ArrayPositions[i] != elem {
return false
}
}
return true
}
2014-04-17 22:55:53 +02:00
type Locations []*Location
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
existingLocations, exists := t[term]
if exists {
existingLocations = append(existingLocations, location)
t[term] = existingLocations
} else {
locations := make(Locations, 1)
locations[0] = location
t[term] = locations
}
}
2014-04-17 22:55:53 +02:00
type FieldTermLocationMap map[string]TermLocationMap
type FieldFragmentMap map[string][]string
2014-04-17 22:55:53 +02:00
type DocumentMatch struct {
Index string `json:"index,omitempty"`
ID string `json:"id"`
IndexInternalID index.IndexInternalID `json:"-"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// as we learn field terms, we can cache important ones for later use
// for example, sorting and building facets need these values
CachedFieldTerms index.FieldTerms `json:"-"`
// if we load the document for this hit, remember it so we dont load again
Document *document.Document `json:"-"`
// used to maintain natural index order
HitNumber uint64 `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
if dm.Fields == nil {
dm.Fields = make(map[string]interface{})
}
existingVal, ok := dm.Fields[name]
if !ok {
dm.Fields[name] = value
return
}
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
2014-04-17 22:55:53 +02:00
}
// Reset allows an already allocated DocumentMatch to be reused
func (dm *DocumentMatch) Reset() *DocumentMatch {
// remember the []byte used for the IndexInternalID
indexInternalId := dm.IndexInternalID
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalId[:0]
return dm
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
2014-04-17 22:55:53 +02:00
type DocumentMatchCollection []*DocumentMatch
func (c DocumentMatchCollection) Len() int { return len(c) }
func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
2014-04-17 22:55:53 +02:00
type Searcher interface {
Next(ctx *SearchContext) (*DocumentMatch, error)
Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
Close() error
2014-04-17 22:55:53 +02:00
Weight() float64
SetQueryNorm(float64)
Count() uint64
Min() int
DocumentMatchPoolSize() int
}
// SearchContext represents the context around a single search
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
2014-04-17 22:55:53 +02:00
}