0eba2a3f0c
previously we parsed/returned large sections of the documents back index row in order to compute facet information. this would require parsing the protobuf of the entire back index row. unfortunately this creates considerable garbage. this new version introduces a visitor/callback approach to working with data inside the back index row. the benefit of this approach is that we can let the higher-level code see values, prior to any copies of data being made or intermediate garbage being created. implementations of the callback must copy any value which they would like to retain beyond the callback. NOTE: this approach is duplicates code from the automatically generated protobuf code NOTE: this approach assumes that the "field" field be serialized before the "terms" field. This is guaranteed by our currently generated protobuf encoder, and is recommended by the protobuf spec. But, decoders SHOULD support them occuring in any order, which we do not.
524 lines
13 KiB
Go
524 lines
13 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package search
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/blevesearch/bleve/numeric"
|
|
)
|
|
|
|
var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
|
|
var LowTerm = string([]byte{0x00})
|
|
|
|
type SearchSort interface {
|
|
UpdateVisitor(field string, term []byte)
|
|
Value(a *DocumentMatch) string
|
|
Descending() bool
|
|
|
|
RequiresDocID() bool
|
|
RequiresScoring() bool
|
|
RequiresFields() []string
|
|
}
|
|
|
|
func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
|
|
descending, ok := input["desc"].(bool)
|
|
by, ok := input["by"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("search sort must specify by")
|
|
}
|
|
switch by {
|
|
case "id":
|
|
return &SortDocID{
|
|
Desc: descending,
|
|
}, nil
|
|
case "score":
|
|
return &SortScore{
|
|
Desc: descending,
|
|
}, nil
|
|
case "field":
|
|
field, ok := input["field"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("search sort mode field must specify field")
|
|
}
|
|
rv := &SortField{
|
|
Field: field,
|
|
Desc: descending,
|
|
}
|
|
typ, ok := input["type"].(string)
|
|
if ok {
|
|
switch typ {
|
|
case "auto":
|
|
rv.Type = SortFieldAuto
|
|
case "string":
|
|
rv.Type = SortFieldAsString
|
|
case "number":
|
|
rv.Type = SortFieldAsNumber
|
|
case "date":
|
|
rv.Type = SortFieldAsDate
|
|
default:
|
|
return nil, fmt.Errorf("unknown sort field type: %s", typ)
|
|
}
|
|
}
|
|
mode, ok := input["mode"].(string)
|
|
if ok {
|
|
switch mode {
|
|
case "default":
|
|
rv.Mode = SortFieldDefault
|
|
case "min":
|
|
rv.Mode = SortFieldMin
|
|
case "max":
|
|
rv.Mode = SortFieldMax
|
|
default:
|
|
return nil, fmt.Errorf("unknown sort field mode: %s", mode)
|
|
}
|
|
}
|
|
missing, ok := input["missing"].(string)
|
|
if ok {
|
|
switch missing {
|
|
case "first":
|
|
rv.Missing = SortFieldMissingFirst
|
|
case "last":
|
|
rv.Missing = SortFieldMissingLast
|
|
default:
|
|
return nil, fmt.Errorf("unknown sort field missing: %s", missing)
|
|
}
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unknown search sort by: %s", by)
|
|
}
|
|
|
|
func ParseSearchSortString(input string) SearchSort {
|
|
descending := false
|
|
if strings.HasPrefix(input, "-") {
|
|
descending = true
|
|
input = input[1:]
|
|
} else if strings.HasPrefix(input, "+") {
|
|
input = input[1:]
|
|
}
|
|
if input == "_id" {
|
|
return &SortDocID{
|
|
Desc: descending,
|
|
}
|
|
} else if input == "_score" {
|
|
return &SortScore{
|
|
Desc: descending,
|
|
}
|
|
}
|
|
return &SortField{
|
|
Field: input,
|
|
Desc: descending,
|
|
}
|
|
}
|
|
|
|
func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
|
|
// first try to parse it as string
|
|
var sortString string
|
|
err := json.Unmarshal(input, &sortString)
|
|
if err != nil {
|
|
var sortObj map[string]interface{}
|
|
err = json.Unmarshal(input, &sortObj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return ParseSearchSortObj(sortObj)
|
|
}
|
|
return ParseSearchSortString(sortString), nil
|
|
}
|
|
|
|
func ParseSortOrderStrings(in []string) SortOrder {
|
|
rv := make(SortOrder, 0, len(in))
|
|
for _, i := range in {
|
|
ss := ParseSearchSortString(i)
|
|
rv = append(rv, ss)
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
|
|
rv := make(SortOrder, 0, len(in))
|
|
for _, i := range in {
|
|
ss, err := ParseSearchSortJSON(i)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv = append(rv, ss)
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
type SortOrder []SearchSort
|
|
|
|
func (so SortOrder) Value(doc *DocumentMatch) {
|
|
for _, soi := range so {
|
|
doc.Sort = append(doc.Sort, soi.Value(doc))
|
|
}
|
|
}
|
|
|
|
func (so SortOrder) UpdateVisitor(field string, term []byte) {
|
|
for _, soi := range so {
|
|
soi.UpdateVisitor(field, term)
|
|
}
|
|
}
|
|
|
|
// Compare will compare two document matches using the specified sort order
|
|
// if both are numbers, we avoid converting back to term
|
|
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
|
|
// compare the documents on all search sorts until a differences is found
|
|
for x := range so {
|
|
c := 0
|
|
if cachedScoring[x] {
|
|
if i.Score < j.Score {
|
|
c = -1
|
|
} else if i.Score > j.Score {
|
|
c = 1
|
|
}
|
|
} else {
|
|
iVal := i.Sort[x]
|
|
jVal := j.Sort[x]
|
|
c = strings.Compare(iVal, jVal)
|
|
}
|
|
|
|
if c == 0 {
|
|
continue
|
|
}
|
|
if cachedDesc[x] {
|
|
c = -c
|
|
}
|
|
return c
|
|
}
|
|
// if they are the same at this point, impose order based on index natural sort order
|
|
if i.HitNumber == j.HitNumber {
|
|
return 0
|
|
} else if i.HitNumber > j.HitNumber {
|
|
return 1
|
|
}
|
|
return -1
|
|
}
|
|
|
|
func (so SortOrder) RequiresScore() bool {
|
|
rv := false
|
|
for _, soi := range so {
|
|
if soi.RequiresScoring() {
|
|
rv = true
|
|
}
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) RequiresDocID() bool {
|
|
rv := false
|
|
for _, soi := range so {
|
|
if soi.RequiresDocID() {
|
|
rv = true
|
|
}
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) RequiredFields() []string {
|
|
var rv []string
|
|
for _, soi := range so {
|
|
rv = append(rv, soi.RequiresFields()...)
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) CacheIsScore() []bool {
|
|
var rv []bool
|
|
for _, soi := range so {
|
|
rv = append(rv, soi.RequiresScoring())
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) CacheDescending() []bool {
|
|
var rv []bool
|
|
for _, soi := range so {
|
|
rv = append(rv, soi.Descending())
|
|
}
|
|
return rv
|
|
}
|
|
|
|
// SortFieldType lets you control some internal sort behavior
|
|
// normally leaving this to the zero-value of SortFieldAuto is fine
|
|
type SortFieldType int
|
|
|
|
const (
|
|
// SortFieldAuto applies heuristics attempt to automatically sort correctly
|
|
SortFieldAuto SortFieldType = iota
|
|
// SortFieldAsString forces sort as string (no prefix coded terms removed)
|
|
SortFieldAsString
|
|
// SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
|
|
SortFieldAsNumber
|
|
// SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
|
|
SortFieldAsDate
|
|
)
|
|
|
|
// SortFieldMode describes the behavior if the field has multiple values
|
|
type SortFieldMode int
|
|
|
|
const (
|
|
// SortFieldDefault uses the first (or only) value, this is the default zero-value
|
|
SortFieldDefault SortFieldMode = iota // FIXME name is confusing
|
|
// SortFieldMin uses the minimum value
|
|
SortFieldMin
|
|
// SortFieldMax uses the maximum value
|
|
SortFieldMax
|
|
)
|
|
|
|
// SortFieldMissing controls where documents missing a field value should be sorted
|
|
type SortFieldMissing int
|
|
|
|
const (
|
|
// SortFieldMissingLast sorts documents missing a field at the end
|
|
SortFieldMissingLast SortFieldMissing = iota
|
|
|
|
// SortFieldMissingFirst sorts documents missing a field at the beginning
|
|
SortFieldMissingFirst
|
|
)
|
|
|
|
// SortField will sort results by the value of a stored field
|
|
// Field is the name of the field
|
|
// Descending reverse the sort order (default false)
|
|
// Type allows forcing of string/number/date behavior (default auto)
|
|
// Mode controls behavior for multi-values fields (default first)
|
|
// Missing controls behavior of missing values (default last)
|
|
type SortField struct {
|
|
Field string
|
|
Desc bool
|
|
Type SortFieldType
|
|
Mode SortFieldMode
|
|
Missing SortFieldMissing
|
|
values []string
|
|
}
|
|
|
|
// UpdateVisitor notifies this sort field that in this document
|
|
// this field has the specified term
|
|
func (s *SortField) UpdateVisitor(field string, term []byte) {
|
|
if field == s.Field {
|
|
s.values = append(s.values, string(term))
|
|
}
|
|
}
|
|
|
|
// Value returns the sort value of the DocumentMatch
|
|
// it also resets the state of this SortField for
|
|
// processing the next document
|
|
func (s *SortField) Value(i *DocumentMatch) string {
|
|
iTerms := s.filterTermsByType(s.values)
|
|
iTerm := s.filterTermsByMode(iTerms)
|
|
s.values = nil
|
|
return iTerm
|
|
}
|
|
|
|
// Descending determines the order of the sort
|
|
func (s *SortField) Descending() bool {
|
|
return s.Desc
|
|
}
|
|
|
|
func (s *SortField) filterTermsByMode(terms []string) string {
|
|
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
|
|
return terms[0]
|
|
} else if len(terms) > 1 {
|
|
switch s.Mode {
|
|
case SortFieldMin:
|
|
sort.Strings(terms)
|
|
return terms[0]
|
|
case SortFieldMax:
|
|
sort.Strings(terms)
|
|
return terms[len(terms)-1]
|
|
}
|
|
}
|
|
|
|
// handle missing terms
|
|
if s.Missing == SortFieldMissingLast {
|
|
if s.Desc {
|
|
return LowTerm
|
|
}
|
|
return HighTerm
|
|
}
|
|
if s.Desc {
|
|
return HighTerm
|
|
}
|
|
return LowTerm
|
|
}
|
|
|
|
// filterTermsByType attempts to make one pass on the terms
|
|
// if we are in auto-mode AND all the terms look like prefix-coded numbers
|
|
// return only the terms which had shift of 0
|
|
// if we are in explicit number or date mode, return only valid
|
|
// prefix coded numbers with shift of 0
|
|
func (s *SortField) filterTermsByType(terms []string) []string {
|
|
stype := s.Type
|
|
if stype == SortFieldAuto {
|
|
allTermsPrefixCoded := true
|
|
var termsWithShiftZero []string
|
|
for _, term := range terms {
|
|
valid, shift := numeric.ValidPrefixCodedTerm(term)
|
|
if valid && shift == 0 {
|
|
termsWithShiftZero = append(termsWithShiftZero, term)
|
|
} else if !valid {
|
|
allTermsPrefixCoded = false
|
|
}
|
|
}
|
|
if allTermsPrefixCoded {
|
|
terms = termsWithShiftZero
|
|
}
|
|
} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
|
|
var termsWithShiftZero []string
|
|
for _, term := range terms {
|
|
valid, shift := numeric.ValidPrefixCodedTerm(term)
|
|
if valid && shift == 0 {
|
|
termsWithShiftZero = append(termsWithShiftZero)
|
|
}
|
|
}
|
|
terms = termsWithShiftZero
|
|
}
|
|
return terms
|
|
}
|
|
|
|
// RequiresDocID says this SearchSort does not require the DocID be loaded
|
|
func (s *SortField) RequiresDocID() bool { return false }
|
|
|
|
// RequiresScoring says this SearchStore does not require scoring
|
|
func (s *SortField) RequiresScoring() bool { return false }
|
|
|
|
// RequiresFields says this SearchStore requires the specified stored field
|
|
func (s *SortField) RequiresFields() []string { return []string{s.Field} }
|
|
|
|
func (s *SortField) MarshalJSON() ([]byte, error) {
|
|
// see if simple format can be used
|
|
if s.Missing == SortFieldMissingLast &&
|
|
s.Mode == SortFieldDefault &&
|
|
s.Type == SortFieldAuto {
|
|
if s.Desc {
|
|
return json.Marshal("-" + s.Field)
|
|
}
|
|
return json.Marshal(s.Field)
|
|
}
|
|
sfm := map[string]interface{}{
|
|
"by": "field",
|
|
"field": s.Field,
|
|
}
|
|
if s.Desc {
|
|
sfm["desc"] = true
|
|
}
|
|
if s.Missing > SortFieldMissingLast {
|
|
switch s.Missing {
|
|
case SortFieldMissingFirst:
|
|
sfm["missing"] = "first"
|
|
}
|
|
}
|
|
if s.Mode > SortFieldDefault {
|
|
switch s.Mode {
|
|
case SortFieldMin:
|
|
sfm["mode"] = "min"
|
|
case SortFieldMax:
|
|
sfm["mode"] = "max"
|
|
}
|
|
}
|
|
if s.Type > SortFieldAuto {
|
|
switch s.Type {
|
|
case SortFieldAsString:
|
|
sfm["type"] = "string"
|
|
case SortFieldAsNumber:
|
|
sfm["type"] = "number"
|
|
case SortFieldAsDate:
|
|
sfm["type"] = "date"
|
|
}
|
|
}
|
|
|
|
return json.Marshal(sfm)
|
|
}
|
|
|
|
// SortDocID will sort results by the document identifier
|
|
type SortDocID struct {
|
|
Desc bool
|
|
}
|
|
|
|
// UpdateVisitor is a no-op for SortDocID as it's value
|
|
// is not dependent on any field terms
|
|
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
|
|
|
|
}
|
|
|
|
// Value returns the sort value of the DocumentMatch
|
|
func (s *SortDocID) Value(i *DocumentMatch) string {
|
|
return i.ID
|
|
}
|
|
|
|
// Descending determines the order of the sort
|
|
func (s *SortDocID) Descending() bool {
|
|
return s.Desc
|
|
}
|
|
|
|
// RequiresDocID says this SearchSort does require the DocID be loaded
|
|
func (s *SortDocID) RequiresDocID() bool { return true }
|
|
|
|
// RequiresScoring says this SearchStore does not require scoring
|
|
func (s *SortDocID) RequiresScoring() bool { return false }
|
|
|
|
// RequiresFields says this SearchStore does not require any stored fields
|
|
func (s *SortDocID) RequiresFields() []string { return nil }
|
|
|
|
func (s *SortDocID) MarshalJSON() ([]byte, error) {
|
|
if s.Desc {
|
|
return json.Marshal("-_id")
|
|
}
|
|
return json.Marshal("_id")
|
|
}
|
|
|
|
// SortScore will sort results by the document match score
|
|
type SortScore struct {
|
|
Desc bool
|
|
}
|
|
|
|
// UpdateVisitor is a no-op for SortScore as it's value
|
|
// is not dependent on any field terms
|
|
func (s *SortScore) UpdateVisitor(field string, term []byte) {
|
|
|
|
}
|
|
|
|
// Value returns the sort value of the DocumentMatch
|
|
func (s *SortScore) Value(i *DocumentMatch) string {
|
|
return "_score"
|
|
}
|
|
|
|
// Descending determines the order of the sort
|
|
func (s *SortScore) Descending() bool {
|
|
return s.Desc
|
|
}
|
|
|
|
// RequiresDocID says this SearchSort does not require the DocID be loaded
|
|
func (s *SortScore) RequiresDocID() bool { return false }
|
|
|
|
// RequiresScoring says this SearchStore does require scoring
|
|
func (s *SortScore) RequiresScoring() bool { return true }
|
|
|
|
// RequiresFields says this SearchStore does not require any store fields
|
|
func (s *SortScore) RequiresFields() []string { return nil }
|
|
|
|
func (s *SortScore) MarshalJSON() ([]byte, error) {
|
|
if s.Desc {
|
|
return json.Marshal("-_score")
|
|
}
|
|
return json.Marshal("_score")
|
|
}
|