489 lines
12 KiB
Go
489 lines
12 KiB
Go
// Copyright (c) 2014 Couchbase, Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
|
|
package search
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"sort"
|
|
"strings"
|
|
|
|
"github.com/blevesearch/bleve/numeric_util"
|
|
)
|
|
|
|
var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
|
|
var LowTerm = string([]byte{0x00})
|
|
|
|
type SearchSort interface {
|
|
Value(a *DocumentMatch) string
|
|
Descending() bool
|
|
|
|
RequiresDocID() bool
|
|
RequiresScoring() bool
|
|
RequiresFields() []string
|
|
}
|
|
|
|
func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
|
|
descending, ok := input["desc"].(bool)
|
|
by, ok := input["by"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("search sort must specify by")
|
|
}
|
|
switch by {
|
|
case "id":
|
|
return &SortDocID{
|
|
Desc: descending,
|
|
}, nil
|
|
case "score":
|
|
return &SortScore{
|
|
Desc: descending,
|
|
}, nil
|
|
case "field":
|
|
field, ok := input["field"].(string)
|
|
if !ok {
|
|
return nil, fmt.Errorf("search sort mode field must specify field")
|
|
}
|
|
rv := &SortField{
|
|
Field: field,
|
|
Desc: descending,
|
|
}
|
|
typ, ok := input["type"].(string)
|
|
if ok {
|
|
switch typ {
|
|
case "auto":
|
|
rv.Type = SortFieldAuto
|
|
case "string":
|
|
rv.Type = SortFieldAsString
|
|
case "number":
|
|
rv.Type = SortFieldAsNumber
|
|
case "date":
|
|
rv.Type = SortFieldAsDate
|
|
default:
|
|
return nil, fmt.Errorf("unkown sort field type: %s", typ)
|
|
}
|
|
}
|
|
mode, ok := input["mode"].(string)
|
|
if ok {
|
|
switch mode {
|
|
case "default":
|
|
rv.Mode = SortFieldDefault
|
|
case "min":
|
|
rv.Mode = SortFieldMin
|
|
case "max":
|
|
rv.Mode = SortFieldMax
|
|
default:
|
|
return nil, fmt.Errorf("unknown sort field mode: %s", mode)
|
|
}
|
|
}
|
|
missing, ok := input["missing"].(string)
|
|
if ok {
|
|
switch missing {
|
|
case "first":
|
|
rv.Missing = SortFieldMissingFirst
|
|
case "last":
|
|
rv.Missing = SortFieldMissingLast
|
|
default:
|
|
return nil, fmt.Errorf("unknown sort field missing: %s", missing)
|
|
}
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
return nil, fmt.Errorf("unknown search sort by: %s", by)
|
|
}
|
|
|
|
func ParseSearchSortString(input string) SearchSort {
|
|
descending := false
|
|
if strings.HasPrefix(input, "-") {
|
|
descending = true
|
|
input = input[1:]
|
|
} else if strings.HasPrefix(input, "+") {
|
|
input = input[1:]
|
|
}
|
|
if input == "_id" {
|
|
return &SortDocID{
|
|
Desc: descending,
|
|
}
|
|
} else if input == "_score" {
|
|
return &SortScore{
|
|
Desc: descending,
|
|
}
|
|
}
|
|
return &SortField{
|
|
Field: input,
|
|
Desc: descending,
|
|
}
|
|
}
|
|
|
|
func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
|
|
// first try to parse it as string
|
|
var sortString string
|
|
err := json.Unmarshal(input, &sortString)
|
|
if err != nil {
|
|
var sortObj map[string]interface{}
|
|
err = json.Unmarshal(input, &sortObj)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return ParseSearchSortObj(sortObj)
|
|
}
|
|
return ParseSearchSortString(sortString), nil
|
|
}
|
|
|
|
func ParseSortOrderStrings(in []string) SortOrder {
|
|
rv := make(SortOrder, 0, len(in))
|
|
for _, i := range in {
|
|
ss := ParseSearchSortString(i)
|
|
rv = append(rv, ss)
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
|
|
rv := make(SortOrder, 0, len(in))
|
|
for _, i := range in {
|
|
ss, err := ParseSearchSortJSON(i)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv = append(rv, ss)
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
type SortOrder []SearchSort
|
|
|
|
func (so SortOrder) Value(doc *DocumentMatch) {
|
|
for _, soi := range so {
|
|
doc.Sort = append(doc.Sort, soi.Value(doc))
|
|
}
|
|
}
|
|
|
|
// Compare will compare two document matches using the specified sort order
|
|
// if both are numbers, we avoid converting back to term
|
|
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
|
|
// compare the documents on all search sorts until a differences is found
|
|
for x := range so {
|
|
c := 0
|
|
if cachedScoring[x] {
|
|
if i.Score < j.Score {
|
|
c = -1
|
|
} else if i.Score > j.Score {
|
|
c = 1
|
|
}
|
|
} else {
|
|
iVal := i.Sort[x]
|
|
jVal := j.Sort[x]
|
|
c = strings.Compare(iVal, jVal)
|
|
}
|
|
|
|
if c == 0 {
|
|
continue
|
|
}
|
|
if cachedDesc[x] {
|
|
c = -c
|
|
}
|
|
return c
|
|
}
|
|
// if they are the same at this point, impose order based on index natural sort order
|
|
if i.HitNumber == j.HitNumber {
|
|
return 0
|
|
} else if i.HitNumber > j.HitNumber {
|
|
return 1
|
|
}
|
|
return -1
|
|
}
|
|
|
|
func (so SortOrder) RequiresScore() bool {
|
|
rv := false
|
|
for _, soi := range so {
|
|
if soi.RequiresScoring() {
|
|
rv = true
|
|
}
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) RequiresDocID() bool {
|
|
rv := false
|
|
for _, soi := range so {
|
|
if soi.RequiresDocID() {
|
|
rv = true
|
|
}
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) RequiredFields() []string {
|
|
var rv []string
|
|
for _, soi := range so {
|
|
rv = append(rv, soi.RequiresFields()...)
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) CacheIsScore() []bool {
|
|
var rv []bool
|
|
for _, soi := range so {
|
|
rv = append(rv, soi.RequiresScoring())
|
|
}
|
|
return rv
|
|
}
|
|
|
|
func (so SortOrder) CacheDescending() []bool {
|
|
var rv []bool
|
|
for _, soi := range so {
|
|
rv = append(rv, soi.Descending())
|
|
}
|
|
return rv
|
|
}
|
|
|
|
// SortFieldType lets you control some internal sort behavior
|
|
// normally leaving this to the zero-value of SortFieldAuto is fine
|
|
type SortFieldType int
|
|
|
|
const (
|
|
// SortFieldAuto applies heuristics attempt to automatically sort correctly
|
|
SortFieldAuto SortFieldType = iota
|
|
// SortFieldAsString forces sort as string (no prefix coded terms removed)
|
|
SortFieldAsString
|
|
// SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
|
|
SortFieldAsNumber
|
|
// SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
|
|
SortFieldAsDate
|
|
)
|
|
|
|
// SortFieldMode describes the behavior if the field has multiple values
|
|
type SortFieldMode int
|
|
|
|
const (
|
|
// SortFieldDefault uses the first (or only) value, this is the default zero-value
|
|
SortFieldDefault SortFieldMode = iota // FIXME name is confusing
|
|
// SortFieldMin uses the minimum value
|
|
SortFieldMin
|
|
// SortFieldMax uses the maximum value
|
|
SortFieldMax
|
|
)
|
|
|
|
// SortFieldMissing controls where documents missing a field value should be sorted
|
|
type SortFieldMissing int
|
|
|
|
const (
|
|
// SortFieldMissingLast sorts documents missing a field at the end
|
|
SortFieldMissingLast SortFieldMissing = iota
|
|
|
|
// SortFieldMissingFirst sorts documents missing a field at the beginning
|
|
SortFieldMissingFirst
|
|
)
|
|
|
|
// SortField will sort results by the value of a stored field
|
|
// Field is the name of the field
|
|
// Descending reverse the sort order (default false)
|
|
// Type allows forcing of string/number/date behavior (default auto)
|
|
// Mode controls behavior for multi-values fields (default first)
|
|
// Missing controls behavior of missing values (default last)
|
|
type SortField struct {
|
|
Field string
|
|
Desc bool
|
|
Type SortFieldType
|
|
Mode SortFieldMode
|
|
Missing SortFieldMissing
|
|
}
|
|
|
|
// Value returns the sort value of the DocumentMatch
|
|
func (s *SortField) Value(i *DocumentMatch) string {
|
|
iTerms := i.CachedFieldTerms[s.Field]
|
|
iTerms = s.filterTermsByType(iTerms)
|
|
iTerm := s.filterTermsByMode(iTerms)
|
|
return iTerm
|
|
}
|
|
|
|
// Descending determines the order of the sort
|
|
func (s *SortField) Descending() bool {
|
|
return s.Desc
|
|
}
|
|
|
|
func (s *SortField) filterTermsByMode(terms []string) string {
|
|
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
|
|
return terms[0]
|
|
} else if len(terms) > 1 {
|
|
switch s.Mode {
|
|
case SortFieldMin:
|
|
sort.Strings(terms)
|
|
return terms[0]
|
|
case SortFieldMax:
|
|
sort.Strings(terms)
|
|
return terms[len(terms)-1]
|
|
}
|
|
}
|
|
|
|
// handle missing terms
|
|
if s.Missing == SortFieldMissingLast {
|
|
if s.Desc {
|
|
return LowTerm
|
|
}
|
|
return HighTerm
|
|
}
|
|
if s.Desc {
|
|
return HighTerm
|
|
}
|
|
return LowTerm
|
|
}
|
|
|
|
// filterTermsByType attempts to make one pass on the terms
|
|
// if we are in auto-mode AND all the terms look like prefix-coded numbers
|
|
// return only the terms which had shift of 0
|
|
// if we are in explicit number or date mode, return only valid
|
|
// prefix coded numbers with shift of 0
|
|
func (s *SortField) filterTermsByType(terms []string) []string {
|
|
stype := s.Type
|
|
if stype == SortFieldAuto {
|
|
allTermsPrefixCoded := true
|
|
var termsWithShiftZero []string
|
|
for _, term := range terms {
|
|
valid, shift := numeric_util.ValidPrefixCodedTerm(term)
|
|
if valid && shift == 0 {
|
|
termsWithShiftZero = append(termsWithShiftZero, term)
|
|
} else if !valid {
|
|
allTermsPrefixCoded = false
|
|
}
|
|
}
|
|
if allTermsPrefixCoded {
|
|
terms = termsWithShiftZero
|
|
}
|
|
} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
|
|
var termsWithShiftZero []string
|
|
for _, term := range terms {
|
|
valid, shift := numeric_util.ValidPrefixCodedTerm(term)
|
|
if valid && shift == 0 {
|
|
termsWithShiftZero = append(termsWithShiftZero)
|
|
}
|
|
}
|
|
terms = termsWithShiftZero
|
|
}
|
|
return terms
|
|
}
|
|
|
|
// RequiresDocID says this SearchSort does not require the DocID be loaded
|
|
func (s *SortField) RequiresDocID() bool { return false }
|
|
|
|
// RequiresScoring says this SearchStore does not require scoring
|
|
func (s *SortField) RequiresScoring() bool { return false }
|
|
|
|
// RequiresFields says this SearchStore requires the specified stored field
|
|
func (s *SortField) RequiresFields() []string { return []string{s.Field} }
|
|
|
|
func (s *SortField) MarshalJSON() ([]byte, error) {
|
|
// see if simple format can be used
|
|
if s.Missing == SortFieldMissingLast &&
|
|
s.Mode == SortFieldDefault &&
|
|
s.Type == SortFieldAuto {
|
|
if s.Desc {
|
|
return json.Marshal("-" + s.Field)
|
|
}
|
|
return json.Marshal(s.Field)
|
|
}
|
|
sfm := map[string]interface{}{
|
|
"by": "field",
|
|
"field": s.Field,
|
|
}
|
|
if s.Desc {
|
|
sfm["desc"] = true
|
|
}
|
|
if s.Missing > SortFieldMissingLast {
|
|
switch s.Missing {
|
|
case SortFieldMissingFirst:
|
|
sfm["missing"] = "first"
|
|
}
|
|
}
|
|
if s.Mode > SortFieldDefault {
|
|
switch s.Mode {
|
|
case SortFieldMin:
|
|
sfm["mode"] = "min"
|
|
case SortFieldMax:
|
|
sfm["mode"] = "max"
|
|
}
|
|
}
|
|
if s.Type > SortFieldAuto {
|
|
switch s.Type {
|
|
case SortFieldAsString:
|
|
sfm["type"] = "string"
|
|
case SortFieldAsNumber:
|
|
sfm["type"] = "number"
|
|
case SortFieldAsDate:
|
|
sfm["type"] = "date"
|
|
}
|
|
}
|
|
|
|
return json.Marshal(sfm)
|
|
}
|
|
|
|
// SortDocID will sort results by the document identifier
|
|
type SortDocID struct {
|
|
Desc bool
|
|
}
|
|
|
|
// Value returns the sort value of the DocumentMatch
|
|
func (s *SortDocID) Value(i *DocumentMatch) string {
|
|
return i.ID
|
|
}
|
|
|
|
// Descending determines the order of the sort
|
|
func (s *SortDocID) Descending() bool {
|
|
return s.Desc
|
|
}
|
|
|
|
// RequiresDocID says this SearchSort does require the DocID be loaded
|
|
func (s *SortDocID) RequiresDocID() bool { return true }
|
|
|
|
// RequiresScoring says this SearchStore does not require scoring
|
|
func (s *SortDocID) RequiresScoring() bool { return false }
|
|
|
|
// RequiresFields says this SearchStore does not require any stored fields
|
|
func (s *SortDocID) RequiresFields() []string { return nil }
|
|
|
|
func (s *SortDocID) MarshalJSON() ([]byte, error) {
|
|
if s.Desc {
|
|
return json.Marshal("-_id")
|
|
}
|
|
return json.Marshal("_id")
|
|
}
|
|
|
|
// SortScore will sort results by the document match score
|
|
type SortScore struct {
|
|
Desc bool
|
|
}
|
|
|
|
// Value returns the sort value of the DocumentMatch
|
|
func (s *SortScore) Value(i *DocumentMatch) string {
|
|
return "_score"
|
|
}
|
|
|
|
// Descending determines the order of the sort
|
|
func (s *SortScore) Descending() bool {
|
|
return s.Desc
|
|
}
|
|
|
|
// RequiresDocID says this SearchSort does not require the DocID be loaded
|
|
func (s *SortScore) RequiresDocID() bool { return false }
|
|
|
|
// RequiresScoring says this SearchStore does require scoring
|
|
func (s *SortScore) RequiresScoring() bool { return true }
|
|
|
|
// RequiresFields says this SearchStore does not require any store fields
|
|
func (s *SortScore) RequiresFields() []string { return nil }
|
|
|
|
func (s *SortScore) MarshalJSON() ([]byte, error) {
|
|
if s.Desc {
|
|
return json.Marshal("-_score")
|
|
}
|
|
return json.Marshal("_score")
|
|
}
|