0
0
Fork 0

added support for returning facet results with requests

supports terms, numeric ranges, and date ranges
closes #14
This commit is contained in:
Marty Schoch 2014-08-11 11:03:29 -04:00
parent e21b7f4436
commit 7bbaa8ecd5
10 changed files with 628 additions and 6 deletions

View File

@ -27,6 +27,7 @@ type Index interface {
DocCount() uint64
Document(id string) (*document.Document, error)
DocumentFieldTerms(id string) (FieldTerms, error)
Fields() ([]string, error)
@ -35,6 +36,8 @@ type Index interface {
DumpFields()
}
type FieldTerms map[string][]string
type TermFieldVector struct {
Field string
Pos uint64

View File

@ -641,6 +641,24 @@ func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
return rv, nil
}
func (udc *UpsideDownCouch) DocumentFieldTerms(id string) (index.FieldTerms, error) {
back, err := udc.backIndexRowForDoc(id)
if err != nil {
return nil, err
}
rv := make(index.FieldTerms, len(back.entries))
for _, entry := range back.entries {
fieldName := udc.fieldIndexToName(entry.field)
terms, ok := rv[fieldName]
if !ok {
terms = make([]string, 0)
}
terms = append(terms, string(entry.term))
rv[fieldName] = terms
}
return rv, nil
}
func decodeFieldType(typ byte, name string, value []byte) document.Field {
switch typ {
case 't':

View File

@ -116,6 +116,33 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
if err != nil {
return nil, err
}
if req.Facets != nil {
facetsBuilder := search.NewFacetsBuilder(i.i)
for facetName, facetRequest := range req.Facets {
if facetRequest.NumericRanges != nil {
// build numeric range facet
facetBuilder := search.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size)
for _, nr := range facetRequest.NumericRanges {
facetBuilder.AddRange(nr.Name, nr.Min, nr.Max)
}
facetsBuilder.Add(facetName, facetBuilder)
} else if facetRequest.DateTimeRanges != nil {
// build date range facet
facetBuilder := search.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size)
for _, dr := range facetRequest.DateTimeRanges {
facetBuilder.AddRange(dr.Name, dr.Start, dr.End)
}
facetsBuilder.Add(facetName, facetBuilder)
} else {
// build terms facet
facetBuilder := search.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size)
facetsBuilder.Add(facetName, facetBuilder)
}
}
collector.SetFacetsBuilder(facetsBuilder)
}
err = collector.Collect(searcher)
if err != nil {
return nil, err
@ -192,6 +219,7 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
Total: collector.Total(),
MaxScore: collector.MaxScore(),
Took: collector.Took(),
Facets: collector.FacetResults(),
}, nil
}

View File

@ -16,6 +16,80 @@ import (
"github.com/couchbaselabs/bleve/search"
)
type NumericRange struct {
Name string `json:"name,omitempty"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
}
type DateTimeRange struct {
Name string `json:"name,omitempty"`
Start time.Time `json:"start,omitempty"`
End time.Time `json:"end,omitempty"`
}
func (dr *DateTimeRange) UnmarshalJSON(input []byte) error {
var temp struct {
Name string `json:"name,omitempty"`
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
}
err := json.Unmarshal(input, &temp)
if err != nil {
return err
}
// FIXME allow alternate date parsers
dateTimeParser := Config.Analysis.DateTimeParsers[*Config.DefaultDateTimeFormat]
dr.Name = temp.Name
if temp.Start != nil {
start, err := dateTimeParser.ParseDateTime(*temp.Start)
if err == nil {
dr.Start = start
}
}
if temp.End != nil {
end, err := dateTimeParser.ParseDateTime(*temp.End)
if err == nil {
dr.End = end
}
}
return nil
}
type FacetRequest struct {
Size int
Field string
NumericRanges []*NumericRange `json:"numeric_ranges,omitempty"`
DateTimeRanges []*DateTimeRange `json:"date_ranges,omitempty"`
}
func NewFacetRequest(field string, size int) *FacetRequest {
return &FacetRequest{
Field: field,
Size: size,
}
}
func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*DateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges, &DateTimeRange{Name: name, Start: start, End: end})
}
func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
if fr.NumericRanges == nil {
fr.NumericRanges = make([]*NumericRange, 0, 1)
}
fr.NumericRanges = append(fr.NumericRanges, &NumericRange{Name: name, Min: min, Max: max})
}
type FacetsRequest map[string]*FacetRequest
type HighlightRequest struct {
Style *string `json:"style"`
Fields []string `json:"fields"`
@ -37,9 +111,17 @@ type SearchRequest struct {
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
}
func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
if r.Facets == nil {
r.Facets = make(FacetsRequest, 1)
}
r.Facets[facetName] = f
}
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
var temp struct {
Q json.RawMessage `json:"query"`
@ -47,6 +129,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
}
@ -60,6 +143,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Explain = temp.Explain
r.Highlight = temp.Highlight
r.Fields = temp.Fields
r.Facets = temp.Facets
r.Query, err = ParseQuery(temp.Q)
if err != nil {
return err
@ -95,6 +179,7 @@ type SearchResult struct {
Total uint64 `json:"total_hits"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
}
func (sr *SearchResult) String() string {

View File

@ -18,4 +18,6 @@ type Collector interface {
Total() uint64
MaxScore() float64
Took() time.Duration
SetFacetsBuilder(facetsBuilder *FacetsBuilder)
FacetResults() FacetResults
}

View File

@ -14,12 +14,13 @@ import (
)
type TopScoreCollector struct {
k int
skip int
results *list.List
took time.Duration
maxScore float64
total uint64
k int
skip int
results *list.List
took time.Duration
maxScore float64
total uint64
facetsBuilder *FacetsBuilder
}
func NewTopScorerCollector(k int) *TopScoreCollector {
@ -55,6 +56,9 @@ func (tksc *TopScoreCollector) Collect(searcher Searcher) error {
next, err := searcher.Next()
for err == nil && next != nil {
tksc.collectSingle(next)
if tksc.facetsBuilder != nil {
tksc.facetsBuilder.Update(next)
}
next, err = searcher.Next()
}
// compute search duration
@ -112,3 +116,15 @@ func (tksc *TopScoreCollector) Results() DocumentMatchCollection {
}
return DocumentMatchCollection{}
}
func (tksc *TopScoreCollector) SetFacetsBuilder(facetsBuilder *FacetsBuilder) {
tksc.facetsBuilder = facetsBuilder
}
func (tksc *TopScoreCollector) FacetResults() FacetResults {
if tksc.facetsBuilder != nil {
return tksc.facetsBuilder.Results()
} else {
return FacetResults{}
}
}

View File

@ -0,0 +1,145 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"container/list"
"time"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/numeric_util"
)
type dateTimeRange struct {
start time.Time
end time.Time
}
type DateTimeFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*dateTimeRange
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
return &DateTimeFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*dateTimeRange, 0),
}
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
end: end,
}
fb.ranges[name] = &r
}
func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric_util.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
}
}
}
}
} else {
fb.missing++
}
}
func (fb *DateTimeFacetBuilder) Result() FacetResult {
rv := FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
// FIXME better implementation needed here this is quick and dirty
topN := list.New()
// walk entries and find top N
OUTER:
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &DateRangeFacet{
Name: term,
Count: count,
}
if !dateRange.start.IsZero() {
start := dateRange.start.Format(time.RFC3339Nano)
tf.Start = &start
}
if !dateRange.end.IsZero() {
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
for e := topN.Front(); e != nil; e = e.Next() {
curr := e.Value.(*DateRangeFacet)
if tf.Count < curr.Count {
topN.InsertBefore(tf, e)
// if we just made the list too long
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
continue OUTER
}
}
// if we got to the end, we still have to add it
topN.PushBack(tf)
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
}
// we now have the list of the top N facets
rv.DateRanges = make([]*DateRangeFacet, topN.Len())
i := 0
notOther := 0
for e := topN.Back(); e != nil; e = e.Prev() {
rv.DateRanges[i] = e.Value.(*DateRangeFacet)
i++
notOther += e.Value.(*DateRangeFacet).Count
}
rv.Other = fb.total - notOther
return rv
}

View File

@ -0,0 +1,138 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"container/list"
"github.com/couchbaselabs/bleve/index"
"github.com/couchbaselabs/bleve/numeric_util"
)
type numericRange struct {
min *float64
max *float64
}
type NumericFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*numericRange
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
return &NumericFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*numericRange, 0),
}
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,
max: max,
}
fb.ranges[name] = &r
}
func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
// only consider the values which are shifted 0
prefixCoded := numeric_util.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric_util.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
existingCount, existed := fb.termsCount[rangeName]
if existed {
fb.termsCount[rangeName] = existingCount + 1
} else {
fb.termsCount[rangeName] = 1
}
fb.total++
}
}
}
}
}
} else {
fb.missing++
}
}
func (fb *NumericFacetBuilder) Result() FacetResult {
rv := FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
// FIXME better implementation needed here this is quick and dirty
topN := list.New()
// walk entries and find top N
OUTER:
for term, count := range fb.termsCount {
numericRange := fb.ranges[term]
tf := &NumericRangeFacet{
Name: term,
Count: count,
Min: numericRange.min,
Max: numericRange.max,
}
for e := topN.Front(); e != nil; e = e.Next() {
curr := e.Value.(*NumericRangeFacet)
if tf.Count < curr.Count {
topN.InsertBefore(tf, e)
// if we just made the list too long
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
continue OUTER
}
}
// if we got to the end, we still have to add it
topN.PushBack(tf)
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
}
// we now have the list of the top N facets
rv.NumericRanges = make([]*NumericRangeFacet, topN.Len())
i := 0
notOther := 0
for e := topN.Back(); e != nil; e = e.Prev() {
rv.NumericRanges[i] = e.Value.(*NumericRangeFacet)
i++
notOther += e.Value.(*NumericRangeFacet).Count
}
rv.Other = fb.total - notOther
return rv
}

View File

@ -0,0 +1,102 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"container/list"
"github.com/couchbaselabs/bleve/index"
)
type TermsFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
return &TermsFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
}
}
func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) {
terms, ok := ft[fb.field]
if ok {
for _, term := range terms {
existingCount, existed := fb.termsCount[term]
if existed {
fb.termsCount[term] = existingCount + 1
} else {
fb.termsCount[term] = 1
}
fb.total++
}
} else {
fb.missing++
}
}
func (fb *TermsFacetBuilder) Result() FacetResult {
rv := FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
// FIXME better implementation needed here this is quick and dirty
topN := list.New()
// walk entries and find top N
OUTER:
for term, count := range fb.termsCount {
tf := &TermFacet{
Term: term,
Count: count,
}
for e := topN.Front(); e != nil; e = e.Next() {
curr := e.Value.(*TermFacet)
if tf.Count < curr.Count {
topN.InsertBefore(tf, e)
// if we just made the list too long
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
continue OUTER
}
}
// if we got to the end, we still have to add it
topN.PushBack(tf)
if topN.Len() > fb.size {
// remove the head
topN.Remove(topN.Front())
}
}
// we now have the list of the top N facets
rv.Terms = make([]*TermFacet, topN.Len())
i := 0
notOther := 0
for e := topN.Back(); e != nil; e = e.Prev() {
rv.Terms[i] = e.Value.(*TermFacet)
i++
notOther += e.Value.(*TermFacet).Count
}
rv.Other = fb.total - notOther
return rv
}

85
search/facets_builder.go Normal file
View File

@ -0,0 +1,85 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package search
import (
"github.com/couchbaselabs/bleve/index"
)
type FacetBuilder interface {
Update(index.FieldTerms)
Result() FacetResult
}
type FacetsBuilder struct {
index index.Index
facets map[string]FacetBuilder
}
func NewFacetsBuilder(index index.Index) *FacetsBuilder {
return &FacetsBuilder{
index: index,
facets: make(map[string]FacetBuilder, 0),
}
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facets[name] = facetBuilder
}
func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error {
fieldTerms, err := fb.index.DocumentFieldTerms(docMatch.ID)
if err != nil {
return err
}
for _, facetBuilder := range fb.facets {
facetBuilder.Update(fieldTerms)
}
return nil
}
type TermFacet struct {
Term string `json:"term"`
Count int `json:"count"`
}
type NumericRangeFacet struct {
Name string `json:"name"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
Count int `json:"count"`
}
type DateRangeFacet struct {
Name string `json:"name"`
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
Count int `json:"count"`
}
type FacetResult struct {
Field string `json:"field"`
Total int `json:"total"`
Missing int `json:"missing"`
Other int `json:"other"`
Terms []*TermFacet `json:"terms,omitempty"`
NumericRanges []*NumericRangeFacet `json:"numeric_ranges,omitempty"`
DateRanges []*DateRangeFacet `json:"date_ranges,omitempty"`
}
type FacetResults map[string]FacetResult
func (fb *FacetsBuilder) Results() FacetResults {
fr := make(FacetResults)
for facetName, facetBuilder := range fb.facets {
facetResult := facetBuilder.Result()
fr[facetName] = facetResult
}
return fr
}