0
0
Fork 0

initial version of scorch which persists index to disk

This commit is contained in:
Marty Schoch 2017-12-06 18:33:47 -05:00
parent b1346b4c8a
commit adac4f41db
19 changed files with 839 additions and 92 deletions

View File

@ -23,20 +23,26 @@ import (
)
func TestIndexFieldDict(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
cerr := idx.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
@ -96,7 +102,7 @@ func TestIndexFieldDict(t *testing.T) {
dict2, err := indexReader.FieldDict("desc")
if err != nil {
t.Errorf("error creating reader: %v", err)
t.Fatalf("error creating reader: %v", err)
}
defer func() {
err := dict2.Close()

View File

@ -15,6 +15,8 @@
package scorch
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
)
@ -26,17 +28,21 @@ type segmentIntroduction struct {
ids []string
internal map[string][]byte
applied chan struct{}
applied chan error
persisted chan error
}
func (s *Scorch) mainLoop() {
var notify notificationChan
OUTER:
for {
select {
case <-s.closeCh:
return
break OUTER
case notify = <-s.introducerNotifier:
case next := <-s.introductions:
// acquire lock
s.rootLock.Lock()
@ -45,7 +51,9 @@ func (s *Scorch) mainLoop() {
segment: make([]*SegmentSnapshot, len(s.root.segment)+1),
offsets: make([]uint64, len(s.root.segment)+1),
internal: make(map[string][]byte, len(s.root.segment)),
epoch: s.nextSnapshotEpoch,
}
s.nextSnapshotEpoch++
// iterate through current segments
var running uint64
@ -56,12 +64,15 @@ func (s *Scorch) mainLoop() {
var err error
delta, err = s.root.segment[i].segment.DocNumbers(next.ids)
if err != nil {
panic(err)
next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
close(next.applied)
continue OUTER
}
}
newSnapshot.segment[i] = &SegmentSnapshot{
id: s.root.segment[i].id,
segment: s.root.segment[i].segment,
notify: s.root.segment[i].notify,
}
// apply new obsoletions
if s.root.segment[i].deleted == nil {
@ -80,6 +91,12 @@ func (s *Scorch) mainLoop() {
segment: next.data,
}
newSnapshot.offsets[len(s.root.segment)] = running
if !s.unsafeBatch {
newSnapshot.segment[len(s.root.segment)].notify = append(
newSnapshot.segment[len(s.root.segment)].notify,
next.persisted,
)
}
// copy old values
for key, oldVal := range s.root.internal {
newSnapshot.internal[key] = oldVal
@ -97,6 +114,13 @@ func (s *Scorch) mainLoop() {
// release lock
s.rootLock.Unlock()
close(next.applied)
if notify != nil {
close(notify)
notify = nil
}
}
}
s.asyncTasks.Done()
}

329
index/scorch/persister.go Normal file
View File

@ -0,0 +1,329 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"bytes"
"fmt"
"log"
"os"
"strings"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
scorchBolt "github.com/blevesearch/bleve/index/scorch/segment/bolt"
"github.com/blevesearch/bleve/index/scorch/segment/mem"
"github.com/boltdb/bolt"
)
type notificationChan chan struct{}
func (s *Scorch) persisterLoop() {
var lastPersistedEpoch uint64
OUTER:
for {
select {
case <-s.closeCh:
break OUTER
default:
// check to see if there is a new snapshot to persist
s.rootLock.RLock()
ourSnapshot := s.root
s.rootLock.RUnlock()
//for ourSnapshot.epoch != lastPersistedEpoch {
if ourSnapshot.epoch != lastPersistedEpoch {
// lets get started
err := s.persistSnapshot(ourSnapshot)
if err != nil {
log.Printf("got err persisting snapshot: %v", err)
continue OUTER
}
lastPersistedEpoch = ourSnapshot.epoch
}
// tell the introducer we're waiting for changes
// first make a notification chan
notifyUs := make(notificationChan)
// give it to the introducer
select {
case <-s.closeCh:
break OUTER
case s.introducerNotifier <- notifyUs:
}
// check again
s.rootLock.RLock()
ourSnapshot = s.root
s.rootLock.RUnlock()
if ourSnapshot.epoch != lastPersistedEpoch {
// lets get started
err := s.persistSnapshot(ourSnapshot)
if err != nil {
log.Printf("got err persisting snapshot: %v", err)
continue OUTER
}
lastPersistedEpoch = ourSnapshot.epoch
}
// now wait for it (but also detect close)
select {
case <-s.closeCh:
break OUTER
case <-notifyUs:
// woken up, next loop should pick up work
}
}
}
s.asyncTasks.Done()
}
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot) error {
// start a write transaction
tx, err := s.rootBolt.Begin(true)
if err != nil {
return err
}
defer func() {
if err == nil {
err = tx.Commit()
} else {
_ = tx.Rollback()
}
}()
snapshotsBucket, err := tx.CreateBucketIfNotExists(boltSnapshotsBucket)
if err != nil {
return err
}
newSnapshotKey := segment.EncodeUvarintAscending(nil, snapshot.epoch)
snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
if err != nil {
return err
}
// persist internal values
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(boltInternalKey)
if err != nil {
return err
}
// TODO optimize writing these in order?
for k, v := range snapshot.internal {
internalBucket.Put([]byte(k), v)
}
newSegmentPaths := make(map[uint64]string)
// first ensure that each segment in this snapshot has been persisted
for i, segmentSnapshot := range snapshot.segment {
snapshotSegmentKey := segment.EncodeUvarintAscending(nil, uint64(i))
snapshotSegmentBucket, err2 := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
if err2 != nil {
return err2
}
switch seg := segmentSnapshot.segment.(type) {
case *mem.Segment:
// need to persist this to disk
filename := fmt.Sprintf("%x.bolt", segmentSnapshot.id)
path := s.path + string(os.PathSeparator) + filename
err2 := scorchBolt.PersistSegment(seg, path, 1024)
if err2 != nil {
return fmt.Errorf("error persisting segment: %v", err2)
}
newSegmentPaths[segmentSnapshot.id] = path
snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
case *scorchBolt.Segment:
path := seg.Path()
filename := strings.TrimPrefix(path, s.path+string(os.PathSeparator))
snapshotSegmentBucket.Put(boltPathKey, []byte(filename))
default:
return fmt.Errorf("unknown segment type: %T", seg)
}
// store current deleted bits
var roaringBuf bytes.Buffer
if segmentSnapshot.deleted != nil {
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
if err != nil {
return fmt.Errorf("error persisting roaring bytes: %v", err)
}
snapshotSegmentBucket.Put(boltDeletedKey, roaringBuf.Bytes())
}
}
// now try to open all the new snapshots
newSegments := make(map[uint64]segment.Segment)
for segmentID, path := range newSegmentPaths {
newSegments[segmentID], err = scorchBolt.Open(path)
if err != nil {
return fmt.Errorf("error opening new segment at %s, %v", path, err)
}
}
// get write lock and update the current snapshot with disk-based versions
var notifications []chan error
s.rootLock.Lock()
newIndexSnapshot := &IndexSnapshot{
epoch: s.root.epoch,
segment: make([]*SegmentSnapshot, len(s.root.segment)),
offsets: make([]uint64, len(s.root.offsets)),
internal: make(map[string][]byte, len(s.root.internal)),
}
for i, segmentSnapshot := range s.root.segment {
// see if this segment has been replaced
if replacement, ok := newSegments[segmentSnapshot.id]; ok {
newSegmentSnapshot := &SegmentSnapshot{
segment: replacement,
deleted: segmentSnapshot.deleted,
id: segmentSnapshot.id,
}
newIndexSnapshot.segment[i] = newSegmentSnapshot
// add the old segment snapshots notifications to the list
for _, notification := range segmentSnapshot.notify {
notifications = append(notifications, notification)
}
} else {
newIndexSnapshot.segment[i] = s.root.segment[i]
}
newIndexSnapshot.offsets[i] = s.root.offsets[i]
}
for k, v := range s.root.internal {
newIndexSnapshot.internal[k] = v
}
s.root = newIndexSnapshot
s.rootLock.Unlock()
// now that we've given up the lock, notify everyone that we've safely
// persisted their data
for _, notification := range notifications {
close(notification)
}
return nil
}
// bolt snapshot code
var boltSnapshotsBucket = []byte{'s'}
var boltPathKey = []byte{'p'}
var boltDeletedKey = []byte{'d'}
var boltInternalKey = []byte{'i'}
func (s *Scorch) loadFromBolt() error {
return s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(boltSnapshotsBucket)
if snapshots == nil {
return nil
}
c := snapshots.Cursor()
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
_, snapshotEpoch, err := segment.DecodeUvarintAscending(k)
if err != nil {
log.Printf("unable to parse segment epoch % x, contiuing", k)
continue
}
snapshot := snapshots.Bucket(k)
if snapshot == nil {
log.Printf("snapshot key, but bucket missing % x, continuing", k)
continue
}
indexSnapshot, err := s.loadSnapshot(snapshot)
if err != nil {
log.Printf("unable to load snapshot, %v continuing", err)
continue
}
indexSnapshot.epoch = snapshotEpoch
// set the nextSegmentID
for _, segment := range indexSnapshot.segment {
if segment.id > s.nextSegmentID {
s.nextSegmentID = segment.id
}
}
s.nextSegmentID++
s.nextSnapshotEpoch = snapshotEpoch + 1
s.root = indexSnapshot
break
}
return nil
})
}
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
rv := &IndexSnapshot{
internal: make(map[string][]byte),
}
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
if k[0] == boltInternalKey[0] {
internalBucket := snapshot.Bucket(k)
internalBucket.ForEach(func(key []byte, val []byte) error {
copiedVal := append([]byte(nil), val...)
rv.internal[string(key)] = copiedVal
return nil
})
} else {
segmentBucket := snapshot.Bucket(k)
if segmentBucket == nil {
return nil, fmt.Errorf("segment key, but bucket missing % x", k)
}
segmentSnapshot, err := s.loadSegment(segmentBucket)
if err != nil {
return nil, fmt.Errorf("failed to load segment: %v", err)
}
_, segmentSnapshot.id, err = segment.DecodeUvarintAscending(k)
if err != nil {
return nil, fmt.Errorf("failed to decode segment id: %v", err)
}
rv.segment = append(rv.segment, segmentSnapshot)
rv.offsets = append(rv.offsets, running)
running += segmentSnapshot.segment.Count()
}
}
return rv, nil
}
func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
pathBytes := segmentBucket.Get(boltPathKey)
if pathBytes == nil {
return nil, fmt.Errorf("segment path missing")
}
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
segment, err := scorchBolt.Open(segmentPath)
if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err)
}
rv := &SegmentSnapshot{
segment: segment,
}
deletedBytes := segmentBucket.Get(boltDeletedKey)
if deletedBytes != nil {
deletedBitmap := roaring.NewBitmap()
r := bytes.NewReader(deletedBytes)
_, err := deletedBitmap.ReadFrom(r)
if err != nil {
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
}
rv.deleted = deletedBitmap
}
return rv, nil
}

View File

@ -23,14 +23,21 @@ import (
)
func TestIndexReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -205,14 +212,21 @@ func TestIndexReader(t *testing.T) {
}
func TestIndexDocIdReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -309,14 +323,21 @@ func TestIndexDocIdReader(t *testing.T) {
}
func TestIndexDocIdOnlyReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()

View File

@ -16,6 +16,8 @@ package scorch
import (
"encoding/json"
"fmt"
"os"
"sync"
"sync/atomic"
"time"
@ -28,6 +30,7 @@ import (
"github.com/blevesearch/bleve/index/scorch/segment/mem"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/registry"
"github.com/boltdb/bolt"
)
const Name = "scorch"
@ -35,40 +38,95 @@ const Name = "scorch"
const Version uint8 = 1
type Scorch struct {
version uint8
storeConfig map[string]interface{}
analysisQueue *index.AnalysisQueue
stats *Stats
nextSegmentID uint64
version uint8
config map[string]interface{}
analysisQueue *index.AnalysisQueue
stats *Stats
nextSegmentID uint64
nextSnapshotEpoch uint64
path string
unsafeBatch bool
rootLock sync.RWMutex
root *IndexSnapshot
closeCh chan struct{}
introductions chan *segmentIntroduction
closeCh chan struct{}
introductions chan *segmentIntroduction
introducerNotifier chan notificationChan
rootBolt *bolt.DB
asyncTasks sync.WaitGroup
}
func NewScorch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
func NewScorch(storeName string, config map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
rv := &Scorch{
version: Version,
storeConfig: storeConfig,
analysisQueue: analysisQueue,
stats: &Stats{},
root: &IndexSnapshot{},
version: Version,
config: config,
analysisQueue: analysisQueue,
stats: &Stats{},
root: &IndexSnapshot{},
nextSnapshotEpoch: 1,
}
return rv, nil
}
func (s *Scorch) Open() error {
var ok bool
s.path, ok = s.config["path"].(string)
if !ok {
return fmt.Errorf("must specify path")
}
if s.path == "" {
return os.ErrInvalid
}
err := os.MkdirAll(s.path, 0700)
if err != nil {
return err
}
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
s.rootBolt, err = bolt.Open(rootBoltPath, 0600, nil)
if err != nil {
return err
}
// now see if there is any existing state to load
err = s.loadFromBolt()
if err != nil {
return err
}
s.closeCh = make(chan struct{})
s.introductions = make(chan *segmentIntroduction)
s.introducerNotifier = make(chan notificationChan)
s.asyncTasks.Add(1)
go s.mainLoop()
s.asyncTasks.Add(1)
go s.persisterLoop()
return nil
}
func (s *Scorch) Close() error {
func (s *Scorch) Close() (err error) {
// signal to async tasks we want to close
close(s.closeCh)
return nil
// wait for them to close
s.asyncTasks.Wait()
// now close the root bolt
err = s.rootBolt.Close()
s.rootLock.Lock()
for _, segment := range s.root.segment {
cerr := segment.Close()
if err == nil {
err = cerr
}
}
return
}
func (s *Scorch) Update(doc *document.Document) error {
@ -85,7 +143,6 @@ func (s *Scorch) Delete(id string) error {
// Batch applices a batch of changes to the index atomically
func (s *Scorch) Batch(batch *index.Batch) error {
analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@ -148,7 +205,11 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
ids: ids,
obsoletes: make(map[uint64]*roaring.Bitmap),
internal: internalOps,
applied: make(chan struct{}),
applied: make(chan error),
}
if !s.unsafeBatch {
introduction.persisted = make(chan error)
}
// get read lock, to optimistically prepare obsoleted info
@ -165,9 +226,16 @@ func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
s.introductions <- introduction
// block until this segment is applied
<-introduction.applied
err := <-introduction.applied
if err != nil {
return err
}
return nil
if !s.unsafeBatch {
err = <-introduction.persisted
}
return err
}
func (s *Scorch) SetInternal(key, val []byte) error {

View File

@ -16,6 +16,7 @@ package scorch
import (
"log"
"os"
"reflect"
"regexp"
"strconv"
@ -29,13 +30,28 @@ import (
"github.com/blevesearch/bleve/index"
)
func DestroyTest() error {
return os.RemoveAll("/tmp/bleve-scorch-test")
}
var testConfig = map[string]interface{}{
"path": "/tmp/bleve-scorch-test",
}
var testAnalyzer = &analysis.Analyzer{
Tokenizer: regexpTokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
func TestIndexInsert(t *testing.T) {
func TestIndexOpenReopen(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
@ -43,6 +59,105 @@ func TestIndexInsert(t *testing.T) {
if err != nil {
t.Errorf("error opening index: %v", err)
}
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// insert a doc
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
// check the doc count again after reopening it
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsert(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
@ -93,14 +208,21 @@ func TestIndexInsert(t *testing.T) {
}
func TestIndexInsertThenDelete(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -204,8 +326,15 @@ func TestIndexInsertThenDelete(t *testing.T) {
}
func TestIndexInsertThenUpdate(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
@ -213,7 +342,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
var expectedCount uint64
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -264,15 +393,28 @@ func TestIndexInsertThenUpdate(t *testing.T) {
}
func TestIndexInsertMultiple(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
@ -318,19 +460,26 @@ func TestIndexInsertMultiple(t *testing.T) {
}
func TestIndexInsertWithStore(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
cerr := idx.Close()
if err != nil {
t.Fatal(err)
t.Fatal(cerr)
}
}()
@ -416,14 +565,21 @@ func TestIndexInsertWithStore(t *testing.T) {
}
func TestIndexInternalCRUD(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -503,14 +659,21 @@ func TestIndexInternalCRUD(t *testing.T) {
}
func TestIndexBatch(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -609,14 +772,21 @@ func TestIndexBatch(t *testing.T) {
}
func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -817,14 +987,21 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
}
func TestIndexInsertFields(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -879,14 +1056,21 @@ func TestIndexInsertFields(t *testing.T) {
}
func TestIndexUpdateComposites(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -947,14 +1131,21 @@ func TestIndexUpdateComposites(t *testing.T) {
}
func TestIndexTermReaderCompositeFields(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -1005,14 +1196,21 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
}
func TestConcurrentUpdate(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
@ -1054,14 +1252,21 @@ func TestConcurrentUpdate(t *testing.T) {
}
func TestLargeField(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, nil, analysisQueue)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()

View File

@ -21,6 +21,7 @@ import (
"github.com/RoaringBitmap/roaring"
"github.com/Smerity/govarint"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/blevesearch/bleve/index/scorch/segment/mem"
"github.com/boltdb/bolt"
"github.com/couchbaselabs/vellum"
@ -47,8 +48,7 @@ var versionKey = []byte{'v'}
var version = 0
func persistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
func PersistSegment(memSegment *mem.Segment, path string, chunkFactor uint32) (err error) {
db, err := bolt.Open(path, 0777, nil)
if err != nil {
return err
@ -133,13 +133,13 @@ func persistFields(memSegment *mem.Segment, tx *bolt.Tx) error {
}
// we use special varint which is still guaranteed to sort correctly
fieldBuf := make([]byte, 0, maxVarintSize)
fieldBuf := make([]byte, 0, segment.MaxVarintSize)
for fieldID, fieldName := range memSegment.FieldsInv {
if fieldID != 0 {
// reset buffer if necessary
fieldBuf = fieldBuf[:0]
}
fieldBuf = EncodeUvarintAscending(fieldBuf, uint64(fieldID))
fieldBuf = segment.EncodeUvarintAscending(fieldBuf, uint64(fieldID))
err = bucket.Put(fieldBuf, []byte(fieldName))
if err != nil {
return err
@ -160,7 +160,7 @@ func persistDictionary(memSegment *mem.Segment, tx *bolt.Tx) error {
// the (presumably) heavier lifting involved in building the FST could
// be done concurrently.
fieldBuf := make([]byte, 0, maxVarintSize)
fieldBuf := make([]byte, 0, segment.MaxVarintSize)
for fieldID, fieldTerms := range memSegment.DictKeys {
if fieldID != 0 {
// reset buffers if necessary
@ -188,7 +188,7 @@ func persistDictionary(memSegment *mem.Segment, tx *bolt.Tx) error {
// put this FST into bolt
// we use special varint which is still guaranteed to sort correctly
fieldBuf = EncodeUvarintAscending(fieldBuf, uint64(fieldID))
fieldBuf = segment.EncodeUvarintAscending(fieldBuf, uint64(fieldID))
err = bucket.Put(fieldBuf, buffer.Bytes())
if err != nil {
return err
@ -205,13 +205,13 @@ func persistPostings(memSegment *mem.Segment, tx *bolt.Tx) error {
}
bucket.FillPercent = 1.0
postingIDBuf := make([]byte, 0, maxVarintSize)
postingIDBuf := make([]byte, 0, segment.MaxVarintSize)
for postingID := range memSegment.Postings {
if postingID != 0 {
// reset buffers if necessary
postingIDBuf = postingIDBuf[:0]
}
postingIDBuf = EncodeUvarintAscending(postingIDBuf, uint64(postingID))
postingIDBuf = segment.EncodeUvarintAscending(postingIDBuf, uint64(postingID))
var postingsBuf bytes.Buffer
_, err := memSegment.Postings[postingID].WriteTo(&postingsBuf)
if err != nil {
@ -234,13 +234,13 @@ func persistPostingsDetails(memSegment *mem.Segment, tx *bolt.Tx,
}
bucket.FillPercent = 1.0
postingIDBuf := make([]byte, 0, maxVarintSize)
postingIDBuf := make([]byte, 0, segment.MaxVarintSize)
for postingID := range memSegment.Postings {
if postingID != 0 {
// reset buffers if necessary
postingIDBuf = postingIDBuf[:0]
}
postingIDBuf = EncodeUvarintAscending(postingIDBuf, uint64(postingID))
postingIDBuf = segment.EncodeUvarintAscending(postingIDBuf, uint64(postingID))
// make bucket for posting details
postingBucket, err := bucket.CreateBucket(postingIDBuf)
@ -264,7 +264,7 @@ func persistPostingDetails(memSegment *mem.Segment, postingBucket *bolt.Bucket,
var err error
var chunkBucket *bolt.Bucket
var currChunk uint32
chunkIDBuf := make([]byte, 0, maxVarintSize)
chunkIDBuf := make([]byte, 0, segment.MaxVarintSize)
postingsListItr := memSegment.Postings[postingID].Iterator()
var encoder *govarint.Base128Encoder
var locEncoder *govarint.Base128Encoder
@ -303,7 +303,7 @@ func persistPostingDetails(memSegment *mem.Segment, postingBucket *bolt.Bucket,
}
// prepare next chunk
chunkIDBuf = EncodeUvarintAscending(chunkIDBuf, uint64(chunk))
chunkIDBuf = segment.EncodeUvarintAscending(chunkIDBuf, uint64(chunk))
chunkBucket, err = postingBucket.CreateBucket(chunkIDBuf)
if err != nil {
return err
@ -410,7 +410,7 @@ func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
var curr int
// we use special varint which is still guaranteed to sort correctly
docNumBuf := make([]byte, 0, maxVarintSize)
docNumBuf := make([]byte, 0, segment.MaxVarintSize)
for docNum, storedValues := range memSegment.Stored {
var metaBuf bytes.Buffer
var data, compressed []byte
@ -420,7 +420,7 @@ func persistStored(memSegment *mem.Segment, tx *bolt.Tx) error {
curr = 0
}
// create doc sub-bucket
docNumBuf = EncodeUvarintAscending(docNumBuf, uint64(docNum))
docNumBuf = segment.EncodeUvarintAscending(docNumBuf, uint64(docNum))
docBucket, err := bucket.CreateBucket(docNumBuf)
if err != nil {
return err

View File

@ -28,7 +28,7 @@ func TestBuild(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch.bolt")
memSegment := buildMemSegment()
err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
if err != nil {
t.Fatal(err)
}

View File

@ -51,7 +51,7 @@ func (d *Dictionary) postingsList(term string, except *roaring.Bitmap) (*Posting
}
if exists {
rv.postingsID = postingsID
postingsIDKey := EncodeUvarintAscending(nil, postingsID)
postingsIDKey := segment.EncodeUvarintAscending(nil, postingsID)
bucket := d.segment.tx.Bucket(postingsBucket)
if bucket == nil {
return nil, fmt.Errorf("postings bucket missing")

View File

@ -109,7 +109,7 @@ func TestDictionary(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch.bolt")
memSegment := buildMemSegmentForDict()
err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}

View File

@ -91,7 +91,7 @@ type PostingsIterator struct {
func (i *PostingsIterator) loadChunk(chunk int) error {
// load correct chunk bytes
chunkID := EncodeUvarintAscending(nil, uint64(chunk))
chunkID := segment.EncodeUvarintAscending(nil, uint64(chunk))
chunkBucket := i.detailBucket.Bucket(chunkID)
if chunkBucket == nil {
return fmt.Errorf("chunk %d missing", chunkID)

View File

@ -123,7 +123,7 @@ func (s *Segment) loadFields() (err error) {
}
} else {
_, fieldID, err2 := DecodeUvarintAscending(k)
_, fieldID, err2 := segment.DecodeUvarintAscending(k)
if err2 != nil {
return err2
}
@ -164,7 +164,11 @@ func (s *Segment) Count() uint64 {
// Dictionary returns the term dictionary for the specified field
func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
return s.dictionary(field)
dict, err := s.dictionary(field)
if err == nil && dict == nil {
return &segment.EmptyDictionary{}, nil
}
return dict, err
}
func (s *Segment) dictionary(field string) (*Dictionary, error) {
@ -177,7 +181,7 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
rv.fieldID = s.fieldsMap[field]
if rv.fieldID > 0 {
rv.fieldID = rv.fieldID - 1
fieldIDKey := EncodeUvarintAscending(nil, uint64(rv.fieldID))
fieldIDKey := segment.EncodeUvarintAscending(nil, uint64(rv.fieldID))
bucket := s.tx.Bucket(dictBucket)
if bucket == nil {
return nil, fmt.Errorf("dictionary bucket missing")
@ -196,6 +200,8 @@ func (s *Segment) dictionary(field string) (*Dictionary, error) {
}
}
} else {
return nil, nil
}
return rv, nil
@ -208,7 +214,7 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
if storedBuucket == nil {
return fmt.Errorf("stored bucket missing")
}
docNumKey := EncodeUvarintAscending(nil, num)
docNumKey := segment.EncodeUvarintAscending(nil, num)
docBucket := storedBuucket.Bucket(docNumKey)
if docBucket == nil {
return fmt.Errorf("segment has no doc number %d", num)
@ -307,3 +313,7 @@ func (s *Segment) Close() error {
}
return s.db.Close()
}
func (s *Segment) Path() string {
return s.db.Path()
}

View File

@ -25,7 +25,7 @@ func TestOpen(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch.bolt")
memSegment := buildMemSegment()
err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}
@ -325,7 +325,7 @@ func TestOpenMulti(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch.bolt")
memSegment := buildMemSegmentMulti()
err := persistSegment(memSegment, "/tmp/scorch.bolt", 1024)
err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1024)
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}
@ -425,7 +425,7 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
_ = os.RemoveAll("/tmp/scorch.bolt")
memSegment := buildMemSegmentMulti()
err := persistSegment(memSegment, "/tmp/scorch.bolt", 1)
err := PersistSegment(memSegment, "/tmp/scorch.bolt", 1)
if err != nil {
t.Fatalf("error persisting segment: %v", err)
}

View File

@ -0,0 +1,61 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package segment
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
)
type EmptyDictionary struct{}
func (e *EmptyDictionary) PostingsList(term string,
except *roaring.Bitmap) (PostingsList, error) {
return &EmptyPostingsList{}, nil
}
func (e *EmptyDictionary) Iterator() DictionaryIterator {
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) PrefixIterator(prefix string) DictionaryIterator {
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) RangeIterator(start, end string) DictionaryIterator {
return &EmptyDictionaryIterator{}
}
type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil
}
type EmptyPostingsList struct{}
func (e *EmptyPostingsList) Iterator() PostingsIterator {
return &EmptyPostingsIterator{}
}
func (e *EmptyPostingsList) Count() uint64 {
return 0
}
type EmptyPostingsIterator struct{}
func (e *EmptyPostingsIterator) Next() (Posting, error) {
return nil, nil
}

View File

@ -17,12 +17,12 @@
// Modified to not use pkg/errors
package bolt
package segment
import "fmt"
const (
maxVarintSize = 9
MaxVarintSize = 9
// IntMin is chosen such that the range of int tags does not overlap the
// ascii character set that is frequently used in testing.

View File

@ -17,7 +17,7 @@
// Modified to only test the parts we borrowed
package bolt
package segment
import (
"bytes"

View File

@ -15,6 +15,8 @@
package mem
import (
"fmt"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
)
@ -117,12 +119,25 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
return nil
}
func (s *Segment) getField(name string) (int, error) {
fieldID, ok := s.FieldsMap[name]
if !ok {
return 0, fmt.Errorf("no field named %s", name)
}
return int(fieldID - 1), nil
}
// Dictionary returns the term dictionary for the specified field
func (s *Segment) Dictionary(field string) (segment.TermDictionary, error) {
fieldID, err := s.getField(field)
if err != nil {
// no such field, return empty dictionary
return &segment.EmptyDictionary{}, nil
}
return &Dictionary{
segment: s,
field: field,
fieldID: uint16(s.getOrDefineField(field, false)),
fieldID: uint16(fieldID),
}, nil
}

View File

@ -42,6 +42,7 @@ type IndexSnapshot struct {
segment []*SegmentSnapshot
offsets []uint64
internal map[string][]byte
epoch uint64
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {

View File

@ -44,6 +44,12 @@ type SegmentSnapshot struct {
id uint64
segment segment.Segment
deleted *roaring.Bitmap
notify []chan error
}
func (s *SegmentSnapshot) Close() error {
return s.segment.Close()
}
func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
@ -51,6 +57,7 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
}
func (s *SegmentSnapshot) Count() uint64 {
rv := s.segment.Count()
if s.deleted != nil {
rv -= s.deleted.GetCardinality()