0
0
bleve/index_alias_impl.go
Marty Schoch 522f9d5cc7 significant change to index format, support dictionary rows
this introduces disk format v4
now the summary rows for a term are stored in their own
"dictionary row" format, previously the same information
was stored in special term frequency rows
this now allows us to easily iterate all the terms for a field
in sorted order (useful for many other fuzzy data structures)

at the top-level of bleve you can now browse terms within a field
using the following api on the Index interface:

  FieldDict(field string) (index.FieldDict, error)
  FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
  FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)

fixes #127
2015-03-10 16:22:19 -04:00

577 lines
10 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"sort"
"sync"
"time"
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/search"
)
type indexAliasImpl struct {
indexes []Index
mutex sync.RWMutex
open bool
}
// NewIndexAlias creates a new IndexAlias over the provided
// Index objects.
func NewIndexAlias(indexes ...Index) *indexAliasImpl {
return &indexAliasImpl{
indexes: indexes,
open: true,
}
}
func (i *indexAliasImpl) isAliasToSingleIndex() error {
if len(i.indexes) < 1 {
return ErrorAliasEmpty
} else if len(i.indexes) > 1 {
return ErrorAliasMulti
}
return nil
}
func (i *indexAliasImpl) Index(id string, data interface{}) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].Index(id, data)
}
func (i *indexAliasImpl) Delete(id string) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].Delete(id)
}
func (i *indexAliasImpl) Batch(b *Batch) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].Batch(b)
}
func (i *indexAliasImpl) Document(id string) (*document.Document, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].Document(id)
}
func (i *indexAliasImpl) DocCount() (uint64, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
rv := uint64(0)
if !i.open {
return 0, ErrorIndexClosed
}
for _, index := range i.indexes {
otherCount, err := index.DocCount()
if err != nil {
return 0, err
}
rv += otherCount
}
return rv, nil
}
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
// short circuit the simple case
if len(i.indexes) == 1 {
return i.indexes[0].Search(req)
}
return MultiSearch(req, i.indexes...)
}
func (i *indexAliasImpl) Fields() ([]string, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].Fields()
}
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDict(field)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) DumpAll() chan interface{} {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].DumpAll()
}
func (i *indexAliasImpl) DumpDoc(id string) chan interface{} {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].DumpDoc(id)
}
func (i *indexAliasImpl) DumpFields() chan interface{} {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].DumpFields()
}
func (i *indexAliasImpl) Close() error {
i.mutex.Lock()
defer i.mutex.Unlock()
i.open = false
return nil
}
func (i *indexAliasImpl) Mapping() *IndexMapping {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].Mapping()
}
func (i *indexAliasImpl) Stats() *IndexStat {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].Stats()
}
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].GetInternal(key)
}
func (i *indexAliasImpl) SetInternal(key, val []byte) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].SetInternal(key, val)
}
func (i *indexAliasImpl) DeleteInternal(key []byte) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].DeleteInternal(key)
}
func (i *indexAliasImpl) Advanced() (index.Index, store.KVStore, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, nil, err
}
return i.indexes[0].Advanced()
}
func (i *indexAliasImpl) Add(indexes ...Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
i.indexes = append(i.indexes, indexes...)
}
func (i *indexAliasImpl) removeSingle(index Index) {
for pos, in := range i.indexes {
if in == index {
i.indexes = append(i.indexes[:pos], i.indexes[pos+1:]...)
break
}
}
}
func (i *indexAliasImpl) Remove(indexes ...Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
for _, in := range indexes {
i.removeSingle(in)
}
}
func (i *indexAliasImpl) Swap(in, out []Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
// add
i.indexes = append(i.indexes, in...)
// delete
for _, ind := range out {
i.removeSingle(ind)
}
}
// createChildSearchRequest creates a separate
// request from the original
// For now, avoid data race on req structure.
// TODO disable highligh/field load on child
// requests, and add code to do this only on
// the actual final results.
// Perhaps that part needs to be optional,
// could be slower in remote usages.
func createChildSearchRequest(req *SearchRequest) *SearchRequest {
rv := SearchRequest{
Query: req.Query,
Size: req.Size + req.From,
From: 0,
Highlight: req.Highlight,
Fields: req.Fields,
Facets: req.Facets,
Explain: req.Explain,
}
return &rv
}
// MultiSearch executes a SearchRequest across multiple
// Index objects, then merges the results.
func MultiSearch(req *SearchRequest, indexes ...Index) (*SearchResult, error) {
searchStart := time.Now()
results := make(chan *SearchResult)
errs := make(chan error)
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
var searchChildIndex = func(waitGroup *sync.WaitGroup, in Index, results chan *SearchResult, errs chan error) {
go func() {
defer waitGroup.Done()
childReq := createChildSearchRequest(req)
searchResult, err := in.Search(childReq)
if err != nil {
errs <- err
} else {
results <- searchResult
}
}()
}
for _, in := range indexes {
waitGroup.Add(1)
searchChildIndex(&waitGroup, in, results, errs)
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(results)
close(errs)
}()
var sr *SearchResult
var err error
var result *SearchResult
ok := true
for ok {
select {
case result, ok = <-results:
if ok {
if sr == nil {
// first result
sr = result
} else {
// merge with previous
sr.Merge(result)
}
}
case err, ok = <-errs:
// for now stop on any error
// FIXME offer other behaviors
if err != nil {
return nil, err
}
}
}
// merge just concatenated all the hits
// now lets clean it up
// first sort it by score
sort.Sort(sr.Hits)
// now skip over the correct From
if req.From > 0 && len(sr.Hits) > req.From {
sr.Hits = sr.Hits[req.From:]
} else if req.From > 0 {
sr.Hits = search.DocumentMatchCollection{}
}
// now trim to the correct size
if req.Size > 0 && len(sr.Hits) > req.Size {
sr.Hits = sr.Hits[0:req.Size]
}
// fix up facets
for name, fr := range req.Facets {
sr.Facets.Fixup(name, fr.Size)
}
// fix up original request
sr.Request = req
searchDuration := time.Since(searchStart)
sr.Took = searchDuration
return sr, nil
}
func (i *indexAliasImpl) NewBatch() *Batch {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].NewBatch()
}
type indexAliasImplFieldDict struct {
index *indexAliasImpl
fieldDict index.FieldDict
}
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) {
return f.fieldDict.Next()
}
func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}