0
0
Fork 0

adds support for more complex field sorts with object (not string)

previously from JSON we would just deserialize strings like
"-abv" or "city" or "_id" or "_score" as simple sorts
on fields, ids or scores respectively

while this is simple and compact, it can be ambiguous (for
example if you have a field starting with - or if you have a field
named "_id" already.  also, this simple syntax doesnt allow us
to specify more cmoplex options to deal with type/mode/missing

we keep support for the simple string syntax, but now also
recognize a more expressive syntax like:

{
  "by": "field",
  "field": "abv",
  "desc": true,
  "type": "string",
  "mode": "min",
  "missing": "first"
}

type, mode and missing are optional and default to
"auto", "default", and "last" respectively
This commit is contained in:
Marty Schoch 2016-08-17 14:33:51 -07:00
parent 750e0ac16c
commit 27ba6187bc
4 changed files with 175 additions and 41 deletions

View File

@ -10,8 +10,6 @@
package upside_down package upside_down
import ( import (
"fmt"
"github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store"
@ -107,10 +105,9 @@ func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []stri
fieldsMap := make(map[uint16]string, len(fields)) fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields { for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false) id, ok := i.index.fieldCache.FieldNamed(f, false)
if !ok { if ok {
return nil, fmt.Errorf("Field %s was not found in cache", f) fieldsMap[id] = f
} }
fieldsMap[id] = f
} }
for _, entry := range back.termEntries { for _, entry := range back.termEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok { if field, ok := fieldsMap[uint16(*entry.Field)]; ok {

View File

@ -254,7 +254,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
if temp.Sort == nil { if temp.Sort == nil {
r.Sort = search.SortOrder{&search.SortScore{Descending: true}} r.Sort = search.SortOrder{&search.SortScore{Descending: true}}
} else { } else {
r.Sort, err = search.ParseSortOrder(temp.Sort) r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
if err != nil { if err != nil {
return err return err
} }

View File

@ -42,12 +42,23 @@ func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
for _, facetBuilder := range fb.facets { for _, facetBuilder := range fb.facets {
fields = append(fields, facetBuilder.Field()) fields = append(fields, facetBuilder.Field())
} }
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fields)
if err != nil { if len(fields) > 0 {
return err // find out which fields haven't been loaded yet
fieldsToLoad := docMatch.CachedFieldTerms.FieldsNotYetCached(fields)
// look them up
fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.IndexInternalID, fieldsToLoad)
if err != nil {
return err
}
// cache these as well
if docMatch.CachedFieldTerms == nil {
docMatch.CachedFieldTerms = make(map[string][]string)
}
docMatch.CachedFieldTerms.Merge(fieldTerms)
} }
for _, facetBuilder := range fb.facets { for _, facetBuilder := range fb.facets {
facetBuilder.Update(fieldTerms) facetBuilder.Update(docMatch.CachedFieldTerms)
} }
return nil return nil
} }

View File

@ -11,6 +11,7 @@ package search
import ( import (
"encoding/json" "encoding/json"
"fmt"
"sort" "sort"
"strings" "strings"
@ -28,36 +29,117 @@ type SearchSort interface {
RequiresFields() []string RequiresFields() []string
} }
func ParseSearchSort(input json.RawMessage) (SearchSort, error) { func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
var tmp string descending, ok := input["desc"].(bool)
err := json.Unmarshal(input, &tmp) by, ok := input["by"].(string)
if err != nil { if !ok {
return nil, err return nil, fmt.Errorf("search sort must specify by")
} }
descending := false switch by {
if strings.HasPrefix(tmp, "-") { case "id":
descending = true
tmp = tmp[1:]
}
if tmp == "_id" {
return &SortDocID{ return &SortDocID{
Descending: descending, Descending: descending,
}, nil }, nil
} else if tmp == "_score" { case "score":
return &SortScore{
Descending: descending,
}, nil
case "field":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode field must specify field")
}
rv := &SortField{
Field: field,
Descending: descending,
}
typ, ok := input["type"].(string)
if ok {
switch typ {
case "auto":
rv.Type = SortFieldAuto
case "string":
rv.Type = SortFieldAsString
case "number":
rv.Type = SortFieldAsNumber
case "date":
rv.Type = SortFieldAsDate
default:
return nil, fmt.Errorf("unkown sort field type: %s", typ)
}
}
mode, ok := input["mode"].(string)
if ok {
switch mode {
case "default":
rv.Mode = SortFieldDefault
case "min":
rv.Mode = SortFieldMin
case "max":
rv.Mode = SortFieldMax
default:
return nil, fmt.Errorf("unknown sort field mode: %s", mode)
}
}
missing, ok := input["missing"].(string)
if ok {
switch missing {
case "first":
rv.Missing = SortFieldMissingFirst
case "last":
rv.Missing = SortFieldMissingLast
default:
return nil, fmt.Errorf("unknown sort field missing: %s", missing)
}
}
return rv, nil
}
return nil, fmt.Errorf("unknown search sort by: %s", by)
}
func ParseSearchSortString(input string) (SearchSort, error) {
descending := false
if strings.HasPrefix(input, "-") {
descending = true
input = input[1:]
} else if strings.HasPrefix(input, "+") {
input = input[1:]
}
if input == "_id" {
return &SortDocID{
Descending: descending,
}, nil
} else if input == "_score" {
return &SortScore{ return &SortScore{
Descending: descending, Descending: descending,
}, nil }, nil
} }
return &SortField{ return &SortField{
Field: tmp, Field: input,
Descending: descending, Descending: descending,
}, nil }, nil
} }
func ParseSortOrder(in []json.RawMessage) (SortOrder, error) { func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
// first try to parse it as string
var sortString string
err := json.Unmarshal(input, &sortString)
if err != nil {
var sortObj map[string]interface{}
err = json.Unmarshal(input, &sortObj)
if err != nil {
return nil, err
}
return ParseSearchSortObj(sortObj)
}
return ParseSearchSortString(sortString)
}
func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
rv := make(SortOrder, 0, len(in)) rv := make(SortOrder, 0, len(in))
for _, i := range in { for _, i := range in {
ss, err := ParseSearchSort(i) ss, err := ParseSearchSortJSON(i)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -133,16 +215,24 @@ const (
type SortFieldMode int type SortFieldMode int
const ( const (
// SortFieldFirst uses the first (or only) value, this is the default zero-value // SortFieldDefault uses the first (or only) value, this is the default zero-value
SortFieldFirst SortFieldMode = iota // FIXME name is confusing SortFieldDefault SortFieldMode = iota // FIXME name is confusing
// SortFieldMin uses the minimum value // SortFieldMin uses the minimum value
SortFieldMin SortFieldMin
// SortFieldMax uses the maximum value // SortFieldMax uses the maximum value
SortFieldMax SortFieldMax
) )
const SortFieldMissingLast = "_last" // SortFieldMissing controls where documents missing a field value should be sorted
const SortFieldMissingFirst = "_first" type SortFieldMissing int
const (
// SortFieldMissingLast sorts documents missing a field at the end
SortFieldMissingLast SortFieldMissing = iota
// SortFieldMissingFirst sorts documents missing a field at the beginning
SortFieldMissingFirst
)
// SortField will sort results by the value of a stored field // SortField will sort results by the value of a stored field
// Field is the name of the field // Field is the name of the field
@ -155,7 +245,7 @@ type SortField struct {
Descending bool Descending bool
Type SortFieldType Type SortFieldType
Mode SortFieldMode Mode SortFieldMode
Missing string Missing SortFieldMissing
} }
// Compare orders DocumentMatch instances by stored field values // Compare orders DocumentMatch instances by stored field values
@ -174,7 +264,7 @@ func (s *SortField) Compare(i, j *DocumentMatch) int {
} }
func (s *SortField) filterTermsByMode(terms []string) string { func (s *SortField) filterTermsByMode(terms []string) string {
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldFirst) { if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
return terms[0] return terms[0]
} else if len(terms) > 1 { } else if len(terms) > 1 {
switch s.Mode { switch s.Mode {
@ -188,18 +278,16 @@ func (s *SortField) filterTermsByMode(terms []string) string {
} }
// handle missing terms // handle missing terms
if s.Missing == "" || s.Missing == SortFieldMissingLast { if s.Missing == SortFieldMissingLast {
if s.Descending { if s.Descending {
return LowTerm return LowTerm
} }
return HighTerm return HighTerm
} else if s.Missing == SortFieldMissingFirst {
if s.Descending {
return HighTerm
}
return LowTerm
} }
return s.Missing if s.Descending {
return HighTerm
}
return LowTerm
} }
// filterTermsByType attempts to make one pass on the terms // filterTermsByType attempts to make one pass on the terms
@ -246,10 +334,48 @@ func (s *SortField) RequiresScoring() bool { return false }
func (s *SortField) RequiresFields() []string { return []string{s.Field} } func (s *SortField) RequiresFields() []string { return []string{s.Field} }
func (s *SortField) MarshalJSON() ([]byte, error) { func (s *SortField) MarshalJSON() ([]byte, error) {
if s.Descending { // see if simple format can be used
return json.Marshal("-" + s.Field) if s.Missing == SortFieldMissingLast &&
s.Mode == SortFieldDefault &&
s.Type == SortFieldAuto {
if s.Descending {
return json.Marshal("-" + s.Field)
}
return json.Marshal(s.Field)
} }
return json.Marshal(s.Field) sfm := map[string]interface{}{
"by": "field",
"field": s.Field,
}
if s.Descending {
sfm["desc"] = true
}
if s.Missing > SortFieldMissingLast {
switch s.Missing {
case SortFieldMissingFirst:
sfm["missing"] = "first"
}
}
if s.Mode > SortFieldDefault {
switch s.Mode {
case SortFieldMin:
sfm["mode"] = "min"
case SortFieldMax:
sfm["mode"] = "max"
}
}
if s.Type > SortFieldAuto {
switch s.Type {
case SortFieldAsString:
sfm["type"] = "string"
case SortFieldAsNumber:
sfm["type"] = "number"
case SortFieldAsDate:
sfm["type"] = "date"
}
}
return json.Marshal(sfm)
} }
// SortDocID will sort results by the document identifier // SortDocID will sort results by the document identifier