From 9a9b630a6d3e69637e853bd519e84ba3de69eff4 Mon Sep 17 00:00:00 2001 From: slavikm Date: Sun, 17 Jul 2016 15:31:35 -0700 Subject: [PATCH 1/3] Make facets much faster --- index/index.go | 2 ++ index/upside_down/index_reader.go | 33 +++++++++++++++++++++++++ search.go | 16 +++++++++++- search/facets/facet_builder_datetime.go | 4 +++ search/facets/facet_builder_numeric.go | 4 +++ search/facets/facet_builder_terms.go | 4 +++ search/facets_builder.go | 11 ++++++++- 7 files changed, 72 insertions(+), 2 deletions(-) diff --git a/index/index.go b/index/index.go index 1515f9a4..8684bcc7 100644 --- a/index/index.go +++ b/index/index.go @@ -77,8 +77,10 @@ type IndexReader interface { Document(id string) (*document.Document, error) DocumentFieldTerms(id string) (FieldTerms, error) + DocumentFieldTermsForFields(id string, fieldIDs []uint16, fields []string) (FieldTerms, error) Fields() ([]string, error) + FieldIDs(fields []string) ([]uint16, error) GetInternal(key []byte) ([]byte, error) diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index fb43a86e..a0706331 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -13,6 +13,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" + "fmt" ) type IndexReader struct { @@ -110,6 +111,27 @@ func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { return rv, nil } +func (i *IndexReader) DocumentFieldTermsForFields(id string, fieldIDs []uint16, fields []string) (index.FieldTerms, error) { + back, err := i.index.backIndexRowForDoc(i.kvreader, id) + if err != nil { + return nil, err + } + rv := make(index.FieldTerms, len(fieldIDs)) + for _, entry := range back.termEntries { + for id, field := range fieldIDs { + if field == uint16(*entry.Field) { + terms, ok := rv[fields[id]] + if !ok { + terms = make([]string, 0) + } + terms = append(terms, *entry.Term) + rv[fields[id]] = terms + } + } + } + return rv, nil +} + func (i *IndexReader) Fields() (fields []string, err error) { fields = make([]string, 0) it := i.kvreader.PrefixIterator([]byte{'f'}) @@ -139,6 +161,17 @@ func (i *IndexReader) Fields() (fields []string, err error) { return } +func (i *IndexReader) FieldIDs(fields []string) (ids []uint16, err error) { + for _, f := range fields { + id, found := i.index.fieldCache.FieldNamed(f, false) + if !found { + return nil, fmt.Errorf("Field %s was not found in cache", f) + } + ids = append(ids, id) + } + return +} + func (i *IndexReader) GetInternal(key []byte) ([]byte, error) { internalRow := NewInternalRow(key, nil) return i.kvreader.Get(internalRow.Key()) diff --git a/search.go b/search.go index e9ca34be..06048718 100644 --- a/search.go +++ b/search.go @@ -191,6 +191,8 @@ func (h *HighlightRequest) AddField(field string) { // Facets describe the set of facets to be computed. // Explain triggers inclusion of additional search // result score explanations. +// Sort specifies the sorting for the returned results +// results will be sorted by score if this is empty // // A special field named "*" can be used to return all fields. type SearchRequest struct { @@ -201,6 +203,7 @@ type SearchRequest struct { Fields []string `json:"fields"` Facets FacetsRequest `json:"facets"` Explain bool `json:"explain"` + Sort map[string]bool `json:"sort"` } func (sr *SearchRequest) Validate() error { @@ -220,6 +223,14 @@ func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) { r.Facets[facetName] = f } +// AddSort field in ascending or descending direction +func (r *SearchRequest) AddSort(field string, ascending bool) { + if r.Sort == nil { + r.Sort = make(map[string]bool) + } + r.Sort[field] = ascending +} + // UnmarshalJSON deserializes a JSON representation of // a SearchRequest func (r *SearchRequest) UnmarshalJSON(input []byte) error { @@ -231,6 +242,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { Fields []string `json:"fields"` Facets FacetsRequest `json:"facets"` Explain bool `json:"explain"` + Sort map[string]bool `json:"sort"` } err := json.Unmarshal(input, &temp) @@ -248,6 +260,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { r.Highlight = temp.Highlight r.Fields = temp.Fields r.Facets = temp.Facets + r.Sort = temp.Sort r.Query, err = ParseQuery(temp.Q) if err != nil { return err @@ -280,6 +293,7 @@ func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchReque Size: size, From: from, Explain: explain, + Sort: make(map[string]bool), } } @@ -307,7 +321,7 @@ func (iem IndexErrMap) UnmarshalJSON(data []byte) error { return nil } -// SearchStatus is a secion in the SearchResult reporting how many +// SearchStatus is a section in the SearchResult reporting how many // underlying indexes were queried, how many were successful/failed // and a map of any errors that were encountered type SearchStatus struct { diff --git a/search/facets/facet_builder_datetime.go b/search/facets/facet_builder_datetime.go index 38981464..7e94d523 100644 --- a/search/facets/facet_builder_datetime.go +++ b/search/facets/facet_builder_datetime.go @@ -49,6 +49,10 @@ func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) { fb.ranges[name] = &r } +func (fb *DateTimeFacetBuilder) Field() string { + return fb.field +} + func (fb *DateTimeFacetBuilder) Update(ft index.FieldTerms) { terms, ok := ft[fb.field] if ok { diff --git a/search/facets/facet_builder_numeric.go b/search/facets/facet_builder_numeric.go index f5acfb0f..a1ac3112 100644 --- a/search/facets/facet_builder_numeric.go +++ b/search/facets/facet_builder_numeric.go @@ -48,6 +48,10 @@ func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) { fb.ranges[name] = &r } +func (fb *NumericFacetBuilder) Field() string { + return fb.field +} + func (fb *NumericFacetBuilder) Update(ft index.FieldTerms) { terms, ok := ft[fb.field] if ok { diff --git a/search/facets/facet_builder_terms.go b/search/facets/facet_builder_terms.go index 35c56f22..44881390 100644 --- a/search/facets/facet_builder_terms.go +++ b/search/facets/facet_builder_terms.go @@ -32,6 +32,10 @@ func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder { } } +func (fb *TermsFacetBuilder) Field() string { + return fb.field +} + func (fb *TermsFacetBuilder) Update(ft index.FieldTerms) { terms, ok := ft[fb.field] if ok { diff --git a/search/facets_builder.go b/search/facets_builder.go index f41be294..032b4a7c 100644 --- a/search/facets_builder.go +++ b/search/facets_builder.go @@ -18,6 +18,7 @@ import ( type FacetBuilder interface { Update(index.FieldTerms) Result() *FacetResult + Field() string } type FacetsBuilder struct { @@ -37,7 +38,15 @@ func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { } func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { - fieldTerms, err := fb.indexReader.DocumentFieldTerms(docMatch.ID) + var fields []string + for _, facetBuilder := range fb.facets { + fields = append(fields, facetBuilder.Field()) + } + fieldIds, err := fb.indexReader.FieldIDs(fields) + if err != nil { + return err + } + fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.ID, fieldIds, fields) if err != nil { return err } From ce64c17be1a8c4b1412a5141de666653b35d48d1 Mon Sep 17 00:00:00 2001 From: slavikm Date: Sun, 17 Jul 2016 16:29:17 -0700 Subject: [PATCH 2/3] Do field cache only once per search --- index/index.go | 2 +- index/upside_down/index_reader.go | 15 ++++++++------- search.go | 16 +--------------- search/facets_builder.go | 23 ++++++++++++++++------- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/index/index.go b/index/index.go index 8684bcc7..e1456e63 100644 --- a/index/index.go +++ b/index/index.go @@ -77,7 +77,7 @@ type IndexReader interface { Document(id string) (*document.Document, error) DocumentFieldTerms(id string) (FieldTerms, error) - DocumentFieldTermsForFields(id string, fieldIDs []uint16, fields []string) (FieldTerms, error) + DocumentFieldTermsForFields(id string, fields map[string]uint16) (FieldTerms, error) Fields() ([]string, error) FieldIDs(fields []string) ([]uint16, error) diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index a0706331..f1edef9a 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -10,10 +10,11 @@ package upside_down import ( + "fmt" + "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" - "fmt" ) type IndexReader struct { @@ -111,21 +112,21 @@ func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { return rv, nil } -func (i *IndexReader) DocumentFieldTermsForFields(id string, fieldIDs []uint16, fields []string) (index.FieldTerms, error) { +func (i *IndexReader) DocumentFieldTermsForFields(id string, fields map[string]uint16) (index.FieldTerms, error) { back, err := i.index.backIndexRowForDoc(i.kvreader, id) if err != nil { return nil, err } - rv := make(index.FieldTerms, len(fieldIDs)) + rv := make(index.FieldTerms, len(fields)) for _, entry := range back.termEntries { - for id, field := range fieldIDs { - if field == uint16(*entry.Field) { - terms, ok := rv[fields[id]] + for field, id := range fields { + if id == uint16(*entry.Field) { + terms, ok := rv[field] if !ok { terms = make([]string, 0) } terms = append(terms, *entry.Term) - rv[fields[id]] = terms + rv[field] = terms } } } diff --git a/search.go b/search.go index 06048718..e9ca34be 100644 --- a/search.go +++ b/search.go @@ -191,8 +191,6 @@ func (h *HighlightRequest) AddField(field string) { // Facets describe the set of facets to be computed. // Explain triggers inclusion of additional search // result score explanations. -// Sort specifies the sorting for the returned results -// results will be sorted by score if this is empty // // A special field named "*" can be used to return all fields. type SearchRequest struct { @@ -203,7 +201,6 @@ type SearchRequest struct { Fields []string `json:"fields"` Facets FacetsRequest `json:"facets"` Explain bool `json:"explain"` - Sort map[string]bool `json:"sort"` } func (sr *SearchRequest) Validate() error { @@ -223,14 +220,6 @@ func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) { r.Facets[facetName] = f } -// AddSort field in ascending or descending direction -func (r *SearchRequest) AddSort(field string, ascending bool) { - if r.Sort == nil { - r.Sort = make(map[string]bool) - } - r.Sort[field] = ascending -} - // UnmarshalJSON deserializes a JSON representation of // a SearchRequest func (r *SearchRequest) UnmarshalJSON(input []byte) error { @@ -242,7 +231,6 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { Fields []string `json:"fields"` Facets FacetsRequest `json:"facets"` Explain bool `json:"explain"` - Sort map[string]bool `json:"sort"` } err := json.Unmarshal(input, &temp) @@ -260,7 +248,6 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error { r.Highlight = temp.Highlight r.Fields = temp.Fields r.Facets = temp.Facets - r.Sort = temp.Sort r.Query, err = ParseQuery(temp.Q) if err != nil { return err @@ -293,7 +280,6 @@ func NewSearchRequestOptions(q Query, size, from int, explain bool) *SearchReque Size: size, From: from, Explain: explain, - Sort: make(map[string]bool), } } @@ -321,7 +307,7 @@ func (iem IndexErrMap) UnmarshalJSON(data []byte) error { return nil } -// SearchStatus is a section in the SearchResult reporting how many +// SearchStatus is a secion in the SearchResult reporting how many // underlying indexes were queried, how many were successful/failed // and a map of any errors that were encountered type SearchStatus struct { diff --git a/search/facets_builder.go b/search/facets_builder.go index 032b4a7c..a9d3b854 100644 --- a/search/facets_builder.go +++ b/search/facets_builder.go @@ -24,29 +24,38 @@ type FacetBuilder interface { type FacetsBuilder struct { indexReader index.IndexReader facets map[string]FacetBuilder + fieldIDs map[string]uint16 // Not thread safe } func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { return &FacetsBuilder{ indexReader: indexReader, facets: make(map[string]FacetBuilder, 0), + fieldIDs: make(map[string]uint16, 0), } } func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { fb.facets[name] = facetBuilder + fieldIDs, err := fb.indexReader.FieldIDs([]string{facetBuilder.Field()}) + if err == nil { + fb.fieldIDs[facetBuilder.Field()] = fieldIDs[0] + } } func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { - var fields []string for _, facetBuilder := range fb.facets { - fields = append(fields, facetBuilder.Field()) + field := facetBuilder.Field() + // Just in-case we added a field since creating the facets builder + if _, ok := fb.fieldIDs[field]; !ok { + fieldIDs, err := fb.indexReader.FieldIDs([]string{field}) + if err != nil { + return err + } + fb.fieldIDs[field] = fieldIDs[0] + } } - fieldIds, err := fb.indexReader.FieldIDs(fields) - if err != nil { - return err - } - fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.ID, fieldIds, fields) + fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.ID, fb.fieldIDs) if err != nil { return err } From fc990bc2d1f615257f10ce07c72bdf05238cbe4e Mon Sep 17 00:00:00 2001 From: slavikm Date: Tue, 19 Jul 2016 20:42:45 -0700 Subject: [PATCH 3/3] Remove the field IDs from outside of the index --- index/index.go | 3 +-- index/upside_down/index_reader.go | 35 +++++++++++++------------------ search/facets_builder.go | 19 +++-------------- 3 files changed, 19 insertions(+), 38 deletions(-) diff --git a/index/index.go b/index/index.go index e1456e63..dcef48e9 100644 --- a/index/index.go +++ b/index/index.go @@ -77,10 +77,9 @@ type IndexReader interface { Document(id string) (*document.Document, error) DocumentFieldTerms(id string) (FieldTerms, error) - DocumentFieldTermsForFields(id string, fields map[string]uint16) (FieldTerms, error) + DocumentFieldTermsForFields(id string, fields []string) (FieldTerms, error) Fields() ([]string, error) - FieldIDs(fields []string) ([]uint16, error) GetInternal(key []byte) ([]byte, error) diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index f1edef9a..49655adf 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -112,22 +112,28 @@ func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { return rv, nil } -func (i *IndexReader) DocumentFieldTermsForFields(id string, fields map[string]uint16) (index.FieldTerms, error) { +func (i *IndexReader) DocumentFieldTermsForFields(id string, fields []string) (index.FieldTerms, error) { back, err := i.index.backIndexRowForDoc(i.kvreader, id) if err != nil { return nil, err } rv := make(index.FieldTerms, len(fields)) + fieldsMap := make(map[uint16]string, len(fields)) + for _, f := range fields { + id, ok := i.index.fieldCache.FieldNamed(f, false) + if !ok { + return nil, fmt.Errorf("Field %s was not found in cache", f) + } + fieldsMap[id] = f + } for _, entry := range back.termEntries { - for field, id := range fields { - if id == uint16(*entry.Field) { - terms, ok := rv[field] - if !ok { - terms = make([]string, 0) - } - terms = append(terms, *entry.Term) - rv[field] = terms + if field, ok := fieldsMap[uint16(*entry.Field)]; ok { + terms, ok := rv[field] + if !ok { + terms = make([]string, 0) } + terms = append(terms, *entry.Term) + rv[field] = terms } } return rv, nil @@ -162,17 +168,6 @@ func (i *IndexReader) Fields() (fields []string, err error) { return } -func (i *IndexReader) FieldIDs(fields []string) (ids []uint16, err error) { - for _, f := range fields { - id, found := i.index.fieldCache.FieldNamed(f, false) - if !found { - return nil, fmt.Errorf("Field %s was not found in cache", f) - } - ids = append(ids, id) - } - return -} - func (i *IndexReader) GetInternal(key []byte) ([]byte, error) { internalRow := NewInternalRow(key, nil) return i.kvreader.Get(internalRow.Key()) diff --git a/search/facets_builder.go b/search/facets_builder.go index a9d3b854..4d52ec2b 100644 --- a/search/facets_builder.go +++ b/search/facets_builder.go @@ -24,38 +24,25 @@ type FacetBuilder interface { type FacetsBuilder struct { indexReader index.IndexReader facets map[string]FacetBuilder - fieldIDs map[string]uint16 // Not thread safe } func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder { return &FacetsBuilder{ indexReader: indexReader, facets: make(map[string]FacetBuilder, 0), - fieldIDs: make(map[string]uint16, 0), } } func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) { fb.facets[name] = facetBuilder - fieldIDs, err := fb.indexReader.FieldIDs([]string{facetBuilder.Field()}) - if err == nil { - fb.fieldIDs[facetBuilder.Field()] = fieldIDs[0] - } } func (fb *FacetsBuilder) Update(docMatch *DocumentMatch) error { + var fields []string for _, facetBuilder := range fb.facets { - field := facetBuilder.Field() - // Just in-case we added a field since creating the facets builder - if _, ok := fb.fieldIDs[field]; !ok { - fieldIDs, err := fb.indexReader.FieldIDs([]string{field}) - if err != nil { - return err - } - fb.fieldIDs[field] = fieldIDs[0] - } + fields = append(fields, facetBuilder.Field()) } - fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.ID, fb.fieldIDs) + fieldTerms, err := fb.indexReader.DocumentFieldTermsForFields(docMatch.ID, fields) if err != nil { return err }