0
0
Fork 0

update segment API to return error in key places

This commit is contained in:
Marty Schoch 2017-12-04 18:06:06 -05:00
parent b74cf4b081
commit 22ffc8940e
10 changed files with 212 additions and 96 deletions

View File

@ -53,7 +53,11 @@ func (s *Scorch) mainLoop() {
// see if optimistic work included this segment
delta, ok := next.obsoletes[s.root.segment[i].id]
if !ok {
delta = s.root.segment[i].segment.DocNumbers(next.ids)
var err error
delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
if err != nil {
panic(err)
}
}
newSnapshot.segment[i] = &SegmentSnapshot{
id: s.root.segment[i].id,

View File

@ -154,7 +154,10 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
// get read lock, to optimistically prepare obsoleted info
s.rootLock.RLock()
for i := range s.root.segment {
delta := s.root.segment[i].segment.DocNumbers(ids)
delta, err := s.root.segment[i].segment.DocNumbers(ids)
if err != nil {
return err
}
introduction.obsoletes[s.root.segment[i].id] = delta
}
s.rootLock.RUnlock()

View File

@ -31,13 +31,14 @@ type Dictionary struct {
}
// PostingsList returns the postings list for the specified term
func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) segment.PostingsList {
func (d *Dictionary) PostingsList(term string,
except *roaring.Bitmap) (segment.PostingsList, error) {
return &PostingsList{
dictionary: d,
term: term,
postingsID: d.segment.Dicts[d.fieldID][term],
except: except,
}
}, nil
}
// Iterator returns an iterator for this dictionary

View File

@ -74,9 +74,9 @@ type PostingsIterator struct {
}
// Next returns the next posting on the postings list, or nil at the end
func (i *PostingsIterator) Next() segment.Posting {
func (i *PostingsIterator) Next() (segment.Posting, error) {
if i.actual == nil || !i.actual.HasNext() {
return nil
return nil, nil
}
n := i.actual.Next()
allN := i.all.Next()
@ -99,7 +99,7 @@ func (i *PostingsIterator) Next() segment.Posting {
i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
i.offset++
return rv
return rv, nil
}
// Posting is a single entry in a postings list

View File

@ -119,12 +119,12 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
}
// Dictionary returns the term dictionary for the specified field
func (s *Segment) Dictionary(field string) segment.TermDictionary {
func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
return &Dictionary{
segment: s,
field: field,
fieldID: uint16(s.getOrDefineField(field, false)),
}
}, nil
}
// Count returns the number of documents in this segment
@ -135,7 +135,7 @@ func (s *Segment) Count() uint64 {
// DocNumbers returns a bitset corresponding to the doc numbers of all the
// provided _id strings
func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
func (s *Segment) DocNumbers(ids []string) (*roaring.Bitmap, error) {
rv := roaring.New()
// guard against empty segment
@ -149,5 +149,5 @@ func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {
}
}
}
return rv
return rv, nil
}

View File

@ -31,12 +31,18 @@ func TestEmpty(t *testing.T) {
t.Errorf("expected count 0, got %d", emptySegment.Count())
}
dict := emptySegment.Dictionary("name")
dict, err := emptySegment.Dictionary("name")
if err != nil {
t.Fatal(err)
}
if dict == nil {
t.Fatal("got nil dict, expected non-nil")
}
postingsList := dict.PostingsList("marty", nil)
postingsList, err := dict.PostingsList("marty", nil)
if err != nil {
t.Fatal(err)
}
if postingsList == nil {
t.Fatal("got nil postings list, expected non-nil")
}
@ -47,10 +53,13 @@ func TestEmpty(t *testing.T) {
}
count := 0
nextPosting := postingsItr.Next()
for nextPosting != nil {
nextPosting, err := postingsItr.Next()
for nextPosting != nil && err == nil {
count++
nextPosting = postingsItr.Next()
nextPosting, err = postingsItr.Next()
}
if err != nil {
t.Fatal(err)
}
if count != 0 {
@ -58,7 +67,7 @@ func TestEmpty(t *testing.T) {
}
// now try and visit a document
err := emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
err = emptySegment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
t.Errorf("document visitor called, not expected")
return true
})
@ -164,12 +173,18 @@ func TestSingle(t *testing.T) {
}
// check the _id field
dict := segment.Dictionary("_id")
dict, err := segment.Dictionary("_id")
if err != nil {
t.Fatal(err)
}
if dict == nil {
t.Fatal("got nil dict, expected non-nil")
}
postingsList := dict.PostingsList("a", nil)
postingsList, err := dict.PostingsList("a", nil)
if err != nil {
t.Fatal(err)
}
if postingsList == nil {
t.Fatal("got nil postings list, expected non-nil")
}
@ -180,8 +195,8 @@ func TestSingle(t *testing.T) {
}
count := 0
nextPosting := postingsItr.Next()
for nextPosting != nil {
nextPosting, err := postingsItr.Next()
for nextPosting != nil && err == nil {
count++
if nextPosting.Frequency() != 1 {
t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
@ -193,7 +208,10 @@ func TestSingle(t *testing.T) {
t.Errorf("expected norm 1.0, got %f", nextPosting.Norm())
}
nextPosting = postingsItr.Next()
nextPosting, err = postingsItr.Next()
}
if err != nil {
t.Fatal(err)
}
if count != 1 {
@ -201,12 +219,18 @@ func TestSingle(t *testing.T) {
}
// check the name field
dict = segment.Dictionary("name")
dict, err = segment.Dictionary("name")
if err != nil {
t.Fatal(err)
}
if dict == nil {
t.Fatal("got nil dict, expected non-nil")
}
postingsList = dict.PostingsList("wow", nil)
postingsList, err = dict.PostingsList("wow", nil)
if err != nil {
t.Fatal(err)
}
if postingsList == nil {
t.Fatal("got nil postings list, expected non-nil")
}
@ -217,8 +241,8 @@ func TestSingle(t *testing.T) {
}
count = 0
nextPosting = postingsItr.Next()
for nextPosting != nil {
nextPosting, err = postingsItr.Next()
for nextPosting != nil && err == nil {
count++
if nextPosting.Frequency() != 1 {
t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
@ -244,7 +268,10 @@ func TestSingle(t *testing.T) {
}
}
nextPosting = postingsItr.Next()
nextPosting, err = postingsItr.Next()
}
if err != nil {
t.Fatal(err)
}
if count != 1 {
@ -252,12 +279,18 @@ func TestSingle(t *testing.T) {
}
// check the _all field (composite)
dict = segment.Dictionary("_all")
dict, err = segment.Dictionary("_all")
if err != nil {
t.Fatal(err)
}
if dict == nil {
t.Fatal("got nil dict, expected non-nil")
}
postingsList = dict.PostingsList("wow", nil)
postingsList, err = dict.PostingsList("wow", nil)
if err != nil {
t.Fatal(err)
}
if postingsList == nil {
t.Fatal("got nil postings list, expected non-nil")
}
@ -268,8 +301,8 @@ func TestSingle(t *testing.T) {
}
count = 0
nextPosting = postingsItr.Next()
for nextPosting != nil {
nextPosting, err = postingsItr.Next()
for nextPosting != nil && err == nil {
count++
if nextPosting.Frequency() != 1 {
t.Errorf("expected frequency 1, got %d", nextPosting.Frequency())
@ -296,7 +329,10 @@ func TestSingle(t *testing.T) {
}
}
nextPosting = postingsItr.Next()
nextPosting, err = postingsItr.Next()
}
if err != nil {
t.Fatal(err)
}
if count != 1 {
@ -305,7 +341,7 @@ func TestSingle(t *testing.T) {
// now try and visit a document
var fieldValuesSeen int
err := segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
err = segment.VisitDocument(0, func(field string, typ byte, value []byte, pos []uint64) bool {
fieldValuesSeen++
return true
})
@ -487,12 +523,18 @@ func TestMultiple(t *testing.T) {
}
// check the desc field
dict := segment.Dictionary("desc")
dict, err := segment.Dictionary("desc")
if err != nil {
t.Fatal(err)
}
if dict == nil {
t.Fatal("got nil dict, expected non-nil")
}
postingsList := dict.PostingsList("thing", nil)
postingsList, err := dict.PostingsList("thing", nil)
if err != nil {
t.Fatal(err)
}
if postingsList == nil {
t.Fatal("got nil postings list, expected non-nil")
}
@ -503,10 +545,13 @@ func TestMultiple(t *testing.T) {
}
count := 0
nextPosting := postingsItr.Next()
for nextPosting != nil {
nextPosting, err := postingsItr.Next()
for nextPosting != nil && err == nil {
count++
nextPosting = postingsItr.Next()
nextPosting, err = postingsItr.Next()
}
if err != nil {
t.Fatal(err)
}
if count != 2 {
@ -514,10 +559,16 @@ func TestMultiple(t *testing.T) {
}
// get docnum of a
exclude := segment.DocNumbers([]string{"a"})
exclude, err := segment.DocNumbers([]string{"a"})
if err != nil {
t.Fatal(err)
}
// look for term 'thing' excluding doc 'a'
postingsListExcluding := dict.PostingsList("thing", exclude)
postingsListExcluding, err := dict.PostingsList("thing", exclude)
if err != nil {
t.Fatal(err)
}
if postingsList == nil {
t.Fatal("got nil postings list, expected non-nil")
}
@ -528,10 +579,13 @@ func TestMultiple(t *testing.T) {
}
count = 0
nextPosting = postingsItrExcluding.Next()
for nextPosting != nil {
nextPosting, err = postingsItrExcluding.Next()
for nextPosting != nil && err == nil {
count++
nextPosting = postingsItrExcluding.Next()
nextPosting, err = postingsItrExcluding.Next()
}
if err != nil {
t.Fatal(err)
}
if count != 1 {

View File

@ -25,18 +25,18 @@ import (
type DocumentFieldValueVisitor func(field string, typ byte, value []byte, pos []uint64) bool
type Segment interface {
Dictionary(field string) TermDictionary
Dictionary(field string) (TermDictionary, error)
VisitDocument(num uint64, visitor DocumentFieldValueVisitor) error
Count() uint64
DocNumbers([]string) *roaring.Bitmap
DocNumbers([]string) (*roaring.Bitmap, error)
Fields() []string
}
type TermDictionary interface {
PostingsList(term string, except *roaring.Bitmap) PostingsList
PostingsList(term string, except *roaring.Bitmap) (PostingsList, error)
Iterator() DictionaryIterator
PrefixIterator(prefix string) DictionaryIterator
@ -59,7 +59,7 @@ type PostingsList interface {
}
type PostingsIterator interface {
Next() Posting
Next() (Posting, error)
}
type Posting interface {

View File

@ -27,6 +27,17 @@ import (
"github.com/blevesearch/bleve/index/scorch/segment"
)
type asynchSegmentResult struct {
dictItr segment.DictionaryIterator
index int
docs *roaring.Bitmap
postings segment.PostingsList
err error
}
type IndexSnapshot struct {
segment []*SegmentSnapshot
offsets []uint64
@ -35,31 +46,44 @@ type IndexSnapshot struct {
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
results := make(chan segment.DictionaryIterator)
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) {
dict := segment.Dictionary(field)
results <- makeItr(dict)
dict, err := segment.Dictionary(field)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
}
}(index, segment)
}
var err error
rv := &IndexSnapshotFieldDict{
snapshot: i,
cursors: make([]*segmentDictCursor, 0, len(i.segment)),
}
for count := 0; count < len(i.segment); count++ {
di := <-results
next, err := di.Next()
if err != nil {
return nil, err
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: di,
curr: next,
})
asr := <-results
if asr.err != nil && err == nil {
err = asr.err
} else {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: next,
})
}
}
}
// after ensuring we've read all items on channel
if err != nil {
return nil, err
}
// prepare heap
heap.Init(rv)
@ -87,10 +111,10 @@ func (i *IndexSnapshot) FieldDictPrefix(field string,
}
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
results := make(chan *segmentDocNumsResult)
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) {
results <- &segmentDocNumsResult{
results <- &asynchSegmentResult{
index: index,
docs: segment.DocNumbersLive(),
}
@ -101,12 +125,17 @@ func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
}
func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
results := make(chan *segmentDocNumsResult)
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) {
results <- &segmentDocNumsResult{
index: index,
docs: segment.DocNumbers(ids),
docs, err := segment.DocNumbers(ids)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{
index: index,
docs: docs,
}
}
}(index, segment)
}
@ -114,19 +143,23 @@ func (i *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error)
return i.newDocIDReader(results)
}
type segmentDocNumsResult struct {
index int
docs *roaring.Bitmap
}
func (i *IndexSnapshot) newDocIDReader(results chan *segmentDocNumsResult) (index.DocIDReader, error) {
func (i *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
rv := &IndexSnapshotDocIDReader{
snapshot: i,
iterators: make([]roaring.IntIterable, len(i.segment)),
}
var err error
for count := 0; count < len(i.segment); count++ {
sdnr := <-results
rv.iterators[sdnr.index] = sdnr.docs.Iterator()
asr := <-results
if asr.err != nil && err != nil {
err = asr.err
} else {
rv.iterators[asr.index] = asr.docs.Iterator()
}
}
if err != nil {
return nil, err
}
return rv, nil
@ -262,23 +295,27 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err
func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
type segmentPostingResult struct {
index int
postings segment.PostingsList
}
results := make(chan *segmentPostingResult)
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
go func(index int, segment *SegmentSnapshot) {
dict := segment.Dictionary(field)
pl := dict.PostingsList(string(term), nil)
results <- &segmentPostingResult{
index: index,
postings: pl,
dict, err := segment.Dictionary(field)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
pl, err := dict.PostingsList(string(term), nil)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{
index: index,
postings: pl,
}
}
}
}(index, segment)
}
var err error
rv := &IndexSnapshotTermFieldReader{
snapshot: i,
postings: make([]segment.PostingsList, len(i.segment)),
@ -288,9 +325,16 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
includeTermVectors: includeTermVectors,
}
for count := 0; count < len(i.segment); count++ {
spr := <-results
rv.postings[spr.index] = spr.postings
rv.iterators[spr.index] = spr.postings.Iterator()
asr := <-results
if asr.err != nil && err == nil {
err = asr.err
} else {
rv.postings[asr.index] = asr.postings
rv.iterators[asr.index] = asr.postings.Iterator()
}
}
if err != nil {
return nil, err
}
return rv, nil

View File

@ -38,7 +38,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in
}
// find the next hit
for i.segmentOffset < len(i.postings) {
next := i.iterators[i.segmentOffset].Next()
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return nil, err
}
if next != nil {
// make segment number into global number by adding offset
globalOffset := i.snapshot.offsets[i.segmentOffset]

View File

@ -24,7 +24,7 @@ type SegmentDictionarySnapshot struct {
d segment.TermDictionary
}
func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) segment.PostingsList {
func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
return s.d.PostingsList(term, s.s.deleted)
}
@ -58,19 +58,26 @@ func (s *SegmentSnapshot) Count() uint64 {
return rv
}
func (s *SegmentSnapshot) Dictionary(field string) segment.TermDictionary {
func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
d, err := s.segment.Dictionary(field)
if err != nil {
return nil, err
}
return &SegmentDictionarySnapshot{
s: s,
d: s.segment.Dictionary(field),
}
d: d,
}, nil
}
func (s *SegmentSnapshot) DocNumbers(docIDs []string) *roaring.Bitmap {
rv := s.segment.DocNumbers(docIDs)
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
rv, err := s.segment.DocNumbers(docIDs)
if err != nil {
return nil, err
}
if s.deleted != nil {
rv.AndNot(s.deleted)
}
return rv
return rv, nil
}
// DocNumbersLive returns bitsit containing doc numbers for all live docs