![Steve Yen](/assets/img/avatar_default.png)
This commit improves handling when an incoming batch has internal-data updates only and no doc updates. In this case, a nil segment instead of an empty segment instance is used in the segmentIntroduction. The segmentIntroduction, that is, might now hold only internal-data updates only. To handle synchronous persistence, a new field that's a slice of persisted notification channels is added to the IndexSnapshot struct, which the persister goroutine will close as each IndexSnapshot is persisted. Also, as part of this change, instead of checking the unsafeBatch flag in several places, we instead check for non-nil'ness of these persisted chan's.
423 lines
10 KiB
Go
423 lines
10 KiB
Go
// Copyright (c) 2017 Couchbase, Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package scorch
|
|
|
|
import (
|
|
"bytes"
|
|
"container/heap"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"sort"
|
|
"sync"
|
|
|
|
"github.com/RoaringBitmap/roaring"
|
|
"github.com/blevesearch/bleve/document"
|
|
"github.com/blevesearch/bleve/index"
|
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
|
)
|
|
|
|
type asynchSegmentResult struct {
|
|
dictItr segment.DictionaryIterator
|
|
|
|
index int
|
|
docs *roaring.Bitmap
|
|
|
|
postings segment.PostingsList
|
|
|
|
err error
|
|
}
|
|
|
|
type IndexSnapshot struct {
|
|
parent *Scorch
|
|
segment []*SegmentSnapshot
|
|
offsets []uint64
|
|
internal map[string][]byte
|
|
epoch uint64
|
|
|
|
m sync.Mutex // Protects the fields that follow.
|
|
refs int64
|
|
|
|
persisted []chan error
|
|
}
|
|
|
|
func (i *IndexSnapshot) AddRef() {
|
|
i.m.Lock()
|
|
i.refs++
|
|
i.m.Unlock()
|
|
}
|
|
|
|
func (i *IndexSnapshot) DecRef() (err error) {
|
|
i.m.Lock()
|
|
i.refs--
|
|
if i.refs == 0 {
|
|
for _, s := range i.segment {
|
|
if s != nil {
|
|
err2 := s.segment.DecRef()
|
|
if err == nil {
|
|
err = err2
|
|
}
|
|
}
|
|
}
|
|
if i.parent != nil {
|
|
go i.parent.AddEligibleForRemoval(i.epoch)
|
|
}
|
|
}
|
|
i.m.Unlock()
|
|
return err
|
|
}
|
|
|
|
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
|
|
|
results := make(chan *asynchSegmentResult)
|
|
for index, segment := range i.segment {
|
|
go func(index int, segment *SegmentSnapshot) {
|
|
dict, err := segment.Dictionary(field)
|
|
if err != nil {
|
|
results <- &asynchSegmentResult{err: err}
|
|
} else {
|
|
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
|
}
|
|
}(index, segment)
|
|
}
|
|
|
|
var err error
|
|
rv := &IndexSnapshotFieldDict{
|
|
snapshot: i,
|
|
cursors: make([]*segmentDictCursor, 0, len(i.segment)),
|
|
}
|
|
for count := 0; count < len(i.segment); count++ {
|
|
asr := <-results
|
|
if asr.err != nil && err == nil {
|
|
err = asr.err
|
|
} else {
|
|
next, err2 := asr.dictItr.Next()
|
|
if err2 != nil && err == nil {
|
|
err = err2
|
|
}
|
|
if next != nil {
|
|
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
|
itr: asr.dictItr,
|
|
curr: next,
|
|
})
|
|
}
|
|
}
|
|
}
|
|
// after ensuring we've read all items on channel
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
// prepare heap
|
|
heap.Init(rv)
|
|
|
|
return rv, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
|
|
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
|
return i.Iterator()
|
|
})
|
|
}
|
|
|
|
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
|
|
endTerm []byte) (index.FieldDict, error) {
|
|
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
|
return i.RangeIterator(string(startTerm), string(endTerm))
|
|
})
|
|
}
|
|
|
|
func (i *IndexSnapshot) FieldDictPrefix(field string,
|
|
termPrefix []byte) (index.FieldDict, error) {
|
|
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
|
return i.PrefixIterator(string(termPrefix))
|
|
})
|
|
}
|
|
|
|
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
|
|
results := make(chan *asynchSegmentResult)
|
|
for index, segment := range i.segment {
|
|
go func(index int, segment *SegmentSnapshot) {
|
|
results <- &asynchSegmentResult{
|
|
index: index,
|
|
docs: segment.DocNumbersLive(),
|
|
}
|
|
}(index, segment)
|
|
}
|
|
|
|
return i.newDocIDReader(results)
|
|
}
|
|
|
|
func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
|
|
results := make(chan *asynchSegmentResult)
|
|
for index, segment := range i.segment {
|
|
go func(index int, segment *SegmentSnapshot) {
|
|
docs, err := segment.DocNumbers(ids)
|
|
if err != nil {
|
|
results <- &asynchSegmentResult{err: err}
|
|
} else {
|
|
results <- &asynchSegmentResult{
|
|
index: index,
|
|
docs: docs,
|
|
}
|
|
}
|
|
}(index, segment)
|
|
}
|
|
|
|
return i.newDocIDReader(results)
|
|
}
|
|
|
|
func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
|
|
rv := &IndexSnapshotDocIDReader{
|
|
snapshot: i,
|
|
iterators: make([]roaring.IntIterable, len(i.segment)),
|
|
}
|
|
var err error
|
|
for count := 0; count < len(i.segment); count++ {
|
|
asr := <-results
|
|
if asr.err != nil && err != nil {
|
|
err = asr.err
|
|
} else {
|
|
rv.iterators[asr.index] = asr.docs.Iterator()
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return rv, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) Fields() ([]string, error) {
|
|
// FIXME not making this concurrent for now as it's not used in hot path
|
|
// of any searches at the moment (just a debug aid)
|
|
fieldsMap := map[string]struct{}{}
|
|
for _, segment := range i.segment {
|
|
fields := segment.Fields()
|
|
for _, field := range fields {
|
|
fieldsMap[field] = struct{}{}
|
|
}
|
|
}
|
|
rv := make([]string, 0, len(fieldsMap))
|
|
for k := range fieldsMap {
|
|
rv = append(rv, k)
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
|
|
return i.internal[string(key)], nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) DocCount() (uint64, error) {
|
|
var rv uint64
|
|
for _, segment := range i.segment {
|
|
rv += segment.Count()
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) Document(id string) (rv *document.Document, err error) {
|
|
// FIXME could be done more efficiently directly, but reusing for simplicity
|
|
tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if cerr := tfr.Close(); err == nil && cerr != nil {
|
|
err = cerr
|
|
}
|
|
}()
|
|
|
|
next, err := tfr.Next(nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if next == nil {
|
|
// no such doc exists
|
|
return nil, nil
|
|
}
|
|
|
|
docNum, err := docInternalToNumber(next.ID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
|
|
|
rv = document.NewDocument(id)
|
|
err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, value []byte, pos []uint64) bool {
|
|
if name == "_id" {
|
|
return true
|
|
}
|
|
switch typ {
|
|
case 't':
|
|
rv.AddField(document.NewTextField(name, pos, value))
|
|
case 'n':
|
|
rv.AddField(document.NewNumericFieldFromBytes(name, pos, value))
|
|
case 'd':
|
|
rv.AddField(document.NewDateTimeFieldFromBytes(name, pos, value))
|
|
case 'b':
|
|
rv.AddField(document.NewBooleanFieldFromBytes(name, pos, value))
|
|
case 'g':
|
|
rv.AddField(document.NewGeoPointFieldFromBytes(name, pos, value))
|
|
}
|
|
|
|
return true
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return rv, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
|
|
segmentIndex := sort.Search(len(i.offsets),
|
|
func(x int) bool {
|
|
return i.offsets[x] > docNum
|
|
}) - 1
|
|
|
|
localDocNum := docNum - i.offsets[segmentIndex]
|
|
return int(segmentIndex), localDocNum
|
|
}
|
|
|
|
func (i *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
|
|
docNum, err := docInternalToNumber(id)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
|
|
|
var found bool
|
|
var rv string
|
|
err = i.segment[segmentIndex].VisitDocument(localDocNum, func(field string, typ byte, value []byte, pos []uint64) bool {
|
|
if field == "_id" {
|
|
found = true
|
|
rv = string(value)
|
|
return false
|
|
}
|
|
return true
|
|
})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
if found {
|
|
return rv, nil
|
|
}
|
|
return "", fmt.Errorf("document number %d not found", docNum)
|
|
}
|
|
|
|
func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
|
|
// FIXME could be done more efficiently directly, but reusing for simplicity
|
|
tfr, err := i.TermFieldReader([]byte(id), "_id", false, false, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer func() {
|
|
if cerr := tfr.Close(); err == nil && cerr != nil {
|
|
err = cerr
|
|
}
|
|
}()
|
|
|
|
next, err := tfr.Next(nil)
|
|
if err != nil || next == nil {
|
|
return nil, err
|
|
}
|
|
|
|
return next.ID, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
|
|
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
|
|
|
|
rv := &IndexSnapshotTermFieldReader{
|
|
term: term,
|
|
snapshot: i,
|
|
postings: make([]segment.PostingsList, len(i.segment)),
|
|
iterators: make([]segment.PostingsIterator, len(i.segment)),
|
|
includeFreq: includeFreq,
|
|
includeNorm: includeNorm,
|
|
includeTermVectors: includeTermVectors,
|
|
}
|
|
for i, segment := range i.segment {
|
|
dict, err := segment.Dictionary(field)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
pl, err := dict.PostingsList(string(term), nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv.postings[i] = pl
|
|
rv.iterators[i] = pl.Iterator()
|
|
}
|
|
return rv, nil
|
|
}
|
|
|
|
func docNumberToBytes(buf []byte, in uint64) []byte {
|
|
if len(buf) != 8 {
|
|
buf = make([]byte, 8)
|
|
}
|
|
binary.BigEndian.PutUint64(buf, in)
|
|
return buf
|
|
}
|
|
|
|
func docInternalToNumber(in index.IndexInternalID) (uint64, error) {
|
|
var res uint64
|
|
err := binary.Read(bytes.NewReader(in), binary.BigEndian, &res)
|
|
if err != nil {
|
|
return res, err
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
|
|
fields []string, visitor index.DocumentFieldTermVisitor) error {
|
|
|
|
docNum, err := docInternalToNumber(id)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
|
|
if segmentIndex >= len(i.segment) {
|
|
return nil
|
|
}
|
|
|
|
ss := i.segment[segmentIndex]
|
|
|
|
err = ss.cachedDocs.prepareFields(fields, ss)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
for _, field := range fields {
|
|
if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
|
|
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
|
for {
|
|
i := bytes.Index(tlist, TermSeparatorSplitSlice)
|
|
if i < 0 {
|
|
break
|
|
}
|
|
visitor(field, tlist[0:i])
|
|
tlist = tlist[i+1:]
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|