2017-12-01 21:42:50 +01:00
|
|
|
// Copyright (c) 2017 Couchbase, Inc.
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
package scorch
|
|
|
|
|
|
|
|
import (
|
2017-12-14 08:08:29 +01:00
|
|
|
"sync"
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
"github.com/RoaringBitmap/roaring"
|
2017-12-10 14:55:59 +01:00
|
|
|
"github.com/blevesearch/bleve/index"
|
2017-09-29 18:42:37 +02:00
|
|
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
|
|
|
)
|
|
|
|
|
2017-12-14 08:08:29 +01:00
|
|
|
var TermSeparator byte = 0xff
|
|
|
|
|
|
|
|
var TermSeparatorSplitSlice = []byte{TermSeparator}
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
type SegmentDictionarySnapshot struct {
|
|
|
|
s *SegmentSnapshot
|
|
|
|
d segment.TermDictionary
|
|
|
|
}
|
|
|
|
|
2017-12-05 00:06:06 +01:00
|
|
|
func (s *SegmentDictionarySnapshot) PostingsList(term string, except *roaring.Bitmap) (segment.PostingsList, error) {
|
2017-12-20 02:44:25 +01:00
|
|
|
// TODO: if except is non-nil, perhaps need to OR it with s.s.deleted?
|
2017-09-29 18:42:37 +02:00
|
|
|
return s.d.PostingsList(term, s.s.deleted)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *SegmentDictionarySnapshot) Iterator() segment.DictionaryIterator {
|
|
|
|
return s.d.Iterator()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *SegmentDictionarySnapshot) PrefixIterator(prefix string) segment.DictionaryIterator {
|
|
|
|
return s.d.PrefixIterator(prefix)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *SegmentDictionarySnapshot) RangeIterator(start, end string) segment.DictionaryIterator {
|
|
|
|
return s.d.RangeIterator(start, end)
|
|
|
|
}
|
|
|
|
|
|
|
|
type SegmentSnapshot struct {
|
|
|
|
id uint64
|
|
|
|
segment segment.Segment
|
|
|
|
deleted *roaring.Bitmap
|
2017-12-07 00:33:47 +01:00
|
|
|
|
2017-12-14 08:08:29 +01:00
|
|
|
cachedDocs *cachedDocs
|
2017-12-07 00:33:47 +01:00
|
|
|
}
|
|
|
|
|
2017-12-13 19:41:03 +01:00
|
|
|
func (s *SegmentSnapshot) Id() uint64 {
|
|
|
|
return s.id
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *SegmentSnapshot) FullSize() int64 {
|
|
|
|
return int64(s.segment.Count())
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s SegmentSnapshot) LiveSize() int64 {
|
|
|
|
return int64(s.Count())
|
|
|
|
}
|
|
|
|
|
2017-12-07 00:33:47 +01:00
|
|
|
func (s *SegmentSnapshot) Close() error {
|
|
|
|
return s.segment.Close()
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
|
|
|
|
return s.segment.VisitDocument(num, visitor)
|
|
|
|
}
|
|
|
|
|
2017-12-10 14:55:59 +01:00
|
|
|
func (s *SegmentSnapshot) DocumentVisitFieldTerms(num uint64, fields []string,
|
|
|
|
visitor index.DocumentFieldTermVisitor) error {
|
|
|
|
collection := make(map[string][][]byte)
|
|
|
|
// collect field indexed values
|
|
|
|
for _, field := range fields {
|
|
|
|
dict, err := s.Dictionary(field)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dictItr := dict.Iterator()
|
|
|
|
var next *index.DictEntry
|
|
|
|
next, err = dictItr.Next()
|
|
|
|
for next != nil && err == nil {
|
|
|
|
postings, err2 := dict.PostingsList(next.Term, nil)
|
|
|
|
if err2 != nil {
|
|
|
|
return err2
|
|
|
|
}
|
|
|
|
postingsItr := postings.Iterator()
|
|
|
|
nextPosting, err2 := postingsItr.Next()
|
|
|
|
for err2 == nil && nextPosting != nil && nextPosting.Number() <= num {
|
|
|
|
if nextPosting.Number() == num {
|
|
|
|
// got what we're looking for
|
|
|
|
collection[field] = append(collection[field], []byte(next.Term))
|
|
|
|
}
|
|
|
|
nextPosting, err = postingsItr.Next()
|
|
|
|
}
|
|
|
|
if err2 != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
next, err = dictItr.Next()
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// invoke callback
|
|
|
|
for field, values := range collection {
|
|
|
|
for _, value := range values {
|
|
|
|
visitor(field, value)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
func (s *SegmentSnapshot) Count() uint64 {
|
2017-12-07 00:33:47 +01:00
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
rv := s.segment.Count()
|
|
|
|
if s.deleted != nil {
|
|
|
|
rv -= s.deleted.GetCardinality()
|
|
|
|
}
|
|
|
|
return rv
|
|
|
|
}
|
|
|
|
|
2017-12-05 00:06:06 +01:00
|
|
|
func (s *SegmentSnapshot) Dictionary(field string) (segment.TermDictionary, error) {
|
|
|
|
d, err := s.segment.Dictionary(field)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-09-29 18:42:37 +02:00
|
|
|
return &SegmentDictionarySnapshot{
|
|
|
|
s: s,
|
2017-12-05 00:06:06 +01:00
|
|
|
d: d,
|
|
|
|
}, nil
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
2017-12-05 00:06:06 +01:00
|
|
|
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
|
|
|
|
rv, err := s.segment.DocNumbers(docIDs)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
2017-09-29 18:42:37 +02:00
|
|
|
if s.deleted != nil {
|
|
|
|
rv.AndNot(s.deleted)
|
|
|
|
}
|
2017-12-05 00:06:06 +01:00
|
|
|
return rv, nil
|
2017-09-29 18:42:37 +02:00
|
|
|
}
|
|
|
|
|
2017-12-01 14:54:39 +01:00
|
|
|
// DocNumbersLive returns bitsit containing doc numbers for all live docs
|
|
|
|
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
|
|
|
|
rv := roaring.NewBitmap()
|
|
|
|
rv.AddRange(0, s.segment.Count())
|
|
|
|
if s.deleted != nil {
|
|
|
|
rv.AndNot(s.deleted)
|
|
|
|
}
|
|
|
|
return rv
|
|
|
|
}
|
|
|
|
|
2017-09-29 18:42:37 +02:00
|
|
|
func (s *SegmentSnapshot) Fields() []string {
|
|
|
|
return s.segment.Fields()
|
|
|
|
}
|
2017-12-14 08:08:29 +01:00
|
|
|
|
|
|
|
type cachedFieldDocs struct {
|
|
|
|
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
|
|
|
|
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
|
|
|
|
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
|
|
|
|
}
|
|
|
|
|
2017-12-14 22:16:06 +01:00
|
|
|
func (cfd *cachedFieldDocs) prepareFields(field string, ss *SegmentSnapshot) {
|
2017-12-14 08:08:29 +01:00
|
|
|
defer close(cfd.readyCh)
|
|
|
|
|
|
|
|
dict, err := ss.segment.Dictionary(field)
|
|
|
|
if err != nil {
|
|
|
|
cfd.err = err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
dictItr := dict.Iterator()
|
|
|
|
next, err := dictItr.Next()
|
2017-12-14 22:16:06 +01:00
|
|
|
for err == nil && next != nil {
|
2017-12-14 08:08:29 +01:00
|
|
|
postings, err1 := dict.PostingsList(next.Term, nil)
|
|
|
|
if err1 != nil {
|
|
|
|
cfd.err = err1
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
postingsItr := postings.Iterator()
|
|
|
|
nextPosting, err2 := postingsItr.Next()
|
2017-12-14 22:16:06 +01:00
|
|
|
for err2 == nil && nextPosting != nil {
|
|
|
|
docNum := nextPosting.Number()
|
|
|
|
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
|
|
|
|
cfd.docs[docNum] = append(cfd.docs[docNum], TermSeparator)
|
2017-12-14 08:08:29 +01:00
|
|
|
nextPosting, err2 = postingsItr.Next()
|
|
|
|
}
|
|
|
|
|
|
|
|
if err2 != nil {
|
|
|
|
cfd.err = err2
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
next, err = dictItr.Next()
|
|
|
|
}
|
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
cfd.err = err
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
type cachedDocs struct {
|
|
|
|
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
|
|
|
cache map[string]*cachedFieldDocs // Keyed by field
|
|
|
|
}
|
|
|
|
|
2017-12-14 22:16:06 +01:00
|
|
|
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
2017-12-14 08:08:29 +01:00
|
|
|
c.m.Lock()
|
|
|
|
if c.cache == nil {
|
|
|
|
c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, field := range wantedFields {
|
|
|
|
_, exists := c.cache[field]
|
|
|
|
if !exists {
|
|
|
|
c.cache[field] = &cachedFieldDocs{
|
|
|
|
readyCh: make(chan struct{}),
|
|
|
|
docs: make(map[uint64][]byte),
|
|
|
|
}
|
|
|
|
|
2017-12-14 22:16:06 +01:00
|
|
|
go c.cache[field].prepareFields(field, ss)
|
2017-12-14 08:08:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, field := range wantedFields {
|
|
|
|
cachedFieldDocs := c.cache[field]
|
|
|
|
c.m.Unlock()
|
|
|
|
<-cachedFieldDocs.readyCh
|
|
|
|
|
|
|
|
if cachedFieldDocs.err != nil {
|
|
|
|
return cachedFieldDocs.err
|
|
|
|
}
|
|
|
|
c.m.Lock()
|
|
|
|
}
|
|
|
|
|
|
|
|
c.m.Unlock()
|
|
|
|
return nil
|
|
|
|
}
|
2017-12-29 02:48:38 +01:00
|
|
|
|
|
|
|
func (c *cachedDocs) sizeInBytes() uint64 {
|
|
|
|
sizeInBytes := 0
|
|
|
|
c.m.Lock()
|
|
|
|
for k, v := range c.cache { // cachedFieldDocs
|
|
|
|
sizeInBytes += len(k)
|
|
|
|
if v != nil {
|
|
|
|
for _, entry := range v.docs { // docs
|
|
|
|
sizeInBytes += 8 /* size of uint64 */ + len(entry)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
c.m.Unlock()
|
|
|
|
return uint64(sizeInBytes)
|
|
|
|
}
|