diff --git a/config/config.go b/config/config.go index bdb3036e..e033dad8 100644 --- a/config/config.go +++ b/config/config.go @@ -86,7 +86,6 @@ import ( _ "github.com/blevesearch/bleve/index/store/boltdb" _ "github.com/blevesearch/bleve/index/store/goleveldb" _ "github.com/blevesearch/bleve/index/store/gtreap" - _ "github.com/blevesearch/bleve/index/store/inmem" // index types _ "github.com/blevesearch/bleve/index/upside_down" diff --git a/index.go b/index.go index 73c49d8b..f8169443 100644 --- a/index.go +++ b/index.go @@ -160,7 +160,10 @@ type Index interface { NewBatch() *Batch Batch(b *Batch) error + // Document returns specified document or nil if the document is not + // indexed or stored. Document(id string) (*document.Document, error) + // DocCount returns the number of documents in the index. DocCount() (uint64, error) Search(req *SearchRequest) (*SearchResult, error) @@ -185,6 +188,8 @@ type Index interface { SetInternal(key, val []byte) error DeleteInternal(key []byte) error + // Advanced returns the indexer and data store, exposing lower level + // methods to enumerate records and access data. Advanced() (index.Index, store.KVStore, error) } diff --git a/index/analysis.go b/index/analysis.go index 96b70e77..38db2169 100644 --- a/index/analysis.go +++ b/index/analysis.go @@ -12,7 +12,12 @@ package index import "github.com/blevesearch/bleve/document" type IndexRow interface { + KeySize() int + KeyTo([]byte) (int, error) Key() []byte + + ValueSize() int + ValueTo([]byte) (int, error) Value() []byte } diff --git a/index/index.go b/index/index.go index 4ad16679..b763ead7 100644 --- a/index/index.go +++ b/index/index.go @@ -16,6 +16,8 @@ import ( "github.com/blevesearch/bleve/document" ) +var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") + type Index interface { Open() error Close() error @@ -33,6 +35,8 @@ type Index interface { DumpDoc(id string) chan interface{} DumpFields() chan interface{} + // Reader returns a low-level accessor on the index data. Close it to + // release associated resources. Reader() (IndexReader, error) Stats() json.Marshaler @@ -42,9 +46,15 @@ type Index interface { type IndexReader interface { TermFieldReader(term []byte, field string) (TermFieldReader, error) + + // DocIDReader returns an iterator over documents which identifiers are + // greater than or equal to start and smaller than end. The caller must + // close returned instance to release associated resources. DocIDReader(start, end string) (DocIDReader, error) FieldDict(field string) (FieldDict, error) + + // FieldDictRange is currently defined to include the start and end terms FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error) FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) @@ -95,8 +105,17 @@ type FieldDict interface { Close() error } +// DocIDReader is the interface exposing enumeration of documents identifiers. +// Close the reader to release associated resources. type DocIDReader interface { + // Next returns the next document identifier in ascending lexicographic + // byte order, or io.EOF when the end of the sequence is reached. Next() (string, error) + + // Advance resets the iteration to the first identifier greater than or + // equal to ID. If ID is smaller than the start of the range, the iteration + // will start there instead. If ID is greater than or equal to the end of + // the range, Next() call will return io.EOF. Advance(ID string) (string, error) Close() error } diff --git a/index/store/batch.go b/index/store/batch.go index 4a3c76bc..07a22ddd 100644 --- a/index/store/batch.go +++ b/index/store/batch.go @@ -15,55 +15,39 @@ type op struct { } type EmulatedBatch struct { - w KVWriter - ops []*op - merge *EmulatedMerge + Ops []*op + Merger *EmulatedMerge } -func NewEmulatedBatch(w KVWriter, mo MergeOperator) *EmulatedBatch { +func NewEmulatedBatch(mo MergeOperator) *EmulatedBatch { return &EmulatedBatch{ - w: w, - ops: make([]*op, 0, 1000), - merge: NewEmulatedMerge(mo), + Ops: make([]*op, 0, 1000), + Merger: NewEmulatedMerge(mo), } } func (b *EmulatedBatch) Set(key, val []byte) { - b.ops = append(b.ops, &op{key, val}) + ck := make([]byte, len(key)) + copy(ck, key) + cv := make([]byte, len(val)) + copy(cv, val) + b.Ops = append(b.Ops, &op{ck, cv}) } func (b *EmulatedBatch) Delete(key []byte) { - b.ops = append(b.ops, &op{key, nil}) + ck := make([]byte, len(key)) + copy(ck, key) + b.Ops = append(b.Ops, &op{ck, nil}) } func (b *EmulatedBatch) Merge(key, val []byte) { - b.merge.Merge(key, val) + ck := make([]byte, len(key)) + copy(ck, key) + cv := make([]byte, len(val)) + copy(cv, val) + b.Merger.Merge(key, val) } -func (b *EmulatedBatch) Execute() error { - // first process merges - err := b.merge.Execute(b.w) - if err != nil { - return err - } - - // now apply all the ops - for _, op := range b.ops { - if op.V != nil { - err := b.w.Set(op.K, op.V) - if err != nil { - return err - } - } else { - err := b.w.Delete(op.K) - if err != nil { - return err - } - } - } - return nil -} - -func (b *EmulatedBatch) Close() error { - return nil +func (b *EmulatedBatch) Reset() { + b.Ops = b.Ops[:0] } diff --git a/index/store/boltdb/iterator.go b/index/store/boltdb/iterator.go index 0cf87049..63bde9f5 100644 --- a/index/store/boltdb/iterator.go +++ b/index/store/boltdb/iterator.go @@ -10,6 +10,8 @@ package boltdb import ( + "bytes" + "github.com/boltdb/bolt" ) @@ -17,24 +19,36 @@ type Iterator struct { store *Store tx *bolt.Tx cursor *bolt.Cursor + prefix []byte + start []byte + end []byte valid bool key []byte val []byte } -func (i *Iterator) SeekFirst() { - i.key, i.val = i.cursor.First() +func (i *Iterator) updateValid() { i.valid = (i.key != nil) + if i.valid { + if i.prefix != nil { + i.valid = bytes.HasPrefix(i.key, i.prefix) + } else if i.end != nil { + i.valid = bytes.Compare(i.key, i.end) < 0 + } + } } func (i *Iterator) Seek(k []byte) { + if bytes.Compare(k, i.start) < 0 { + k = i.start + } i.key, i.val = i.cursor.Seek(k) - i.valid = (i.key != nil) + i.updateValid() } func (i *Iterator) Next() { i.key, i.val = i.cursor.Next() - i.valid = (i.key != nil) + i.updateValid() } func (i *Iterator) Current() ([]byte, []byte, bool) { diff --git a/index/store/boltdb/reader.go b/index/store/boltdb/reader.go index 2179ebb5..71815a72 100644 --- a/index/store/boltdb/reader.go +++ b/index/store/boltdb/reader.go @@ -19,16 +19,17 @@ type Reader struct { tx *bolt.Tx } -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - func (r *Reader) Get(key []byte) ([]byte, error) { - rv := r.tx.Bucket([]byte(r.store.bucket)).Get(key) + var rv []byte + v := r.tx.Bucket([]byte(r.store.bucket)).Get(key) + if v != nil { + rv = make([]byte, len(v)) + copy(rv, v) + } return rv, nil } -func (r *Reader) Iterator(key []byte) store.KVIterator { +func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { b := r.tx.Bucket([]byte(r.store.bucket)) cursor := b.Cursor() @@ -36,9 +37,26 @@ func (r *Reader) Iterator(key []byte) store.KVIterator { store: r.store, tx: r.tx, cursor: cursor, + prefix: prefix, } - rv.Seek(key) + rv.Seek(prefix) + return rv +} + +func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { + b := r.tx.Bucket([]byte(r.store.bucket)) + cursor := b.Cursor() + + rv := &Iterator{ + store: r.store, + tx: r.tx, + cursor: cursor, + start: start, + end: end, + } + + rv.Seek(start) return rv } diff --git a/index/store/boltdb/store.go b/index/store/boltdb/store.go index 01cfda2f..93600094 100644 --- a/index/store/boltdb/store.go +++ b/index/store/boltdb/store.go @@ -19,7 +19,6 @@ package boltdb import ( "fmt" - "sync" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" @@ -33,41 +32,45 @@ type Store struct { bucket string db *bolt.DB noSync bool - writer sync.Mutex mo store.MergeOperator } -func New(path string, bucket string) *Store { - rv := Store{ - path: path, - bucket: bucket, +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + path, ok := config["path"].(string) + if !ok { + return nil, fmt.Errorf("must specify path") } - return &rv -} -func (bs *Store) Open() error { + bucket, ok := config["bucket"].(string) + if !ok { + bucket = "bleve" + } - var err error - bs.db, err = bolt.Open(bs.path, 0600, nil) + noSync, _ := config["nosync"].(bool) + + db, err := bolt.Open(path, 0600, nil) if err != nil { - return err + return nil, err } - bs.db.NoSync = bs.noSync + db.NoSync = noSync - err = bs.db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucketIfNotExists([]byte(bs.bucket)) + err = db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists([]byte(bucket)) return err }) if err != nil { - return err + return nil, err } - return nil -} - -func (bs *Store) SetMergeOperator(mo store.MergeOperator) { - bs.mo = mo + rv := Store{ + path: path, + bucket: bucket, + db: db, + mo: mo, + noSync: noSync, + } + return &rv, nil } func (bs *Store) Close() error { @@ -86,41 +89,11 @@ func (bs *Store) Reader() (store.KVReader, error) { } func (bs *Store) Writer() (store.KVWriter, error) { - bs.writer.Lock() - tx, err := bs.db.Begin(true) - if err != nil { - bs.writer.Unlock() - return nil, err - } - reader := &Reader{ - store: bs, - tx: tx, - } return &Writer{ - store: bs, - tx: tx, - reader: reader, + store: bs, }, nil } -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - - bucket, ok := config["bucket"].(string) - if !ok { - bucket = "bleve" - } - - noSync, _ := config["nosync"].(bool) - - store := New(path, bucket) - store.noSync = noSync - return store, nil -} - func init() { - registry.RegisterKVStore(Name, StoreConstructor) + registry.RegisterKVStore(Name, New) } diff --git a/index/store/boltdb/store_test.go b/index/store/boltdb/store_test.go index d188de28..4abde11b 100644 --- a/index/store/boltdb/store_test.go +++ b/index/store/boltdb/store_test.go @@ -10,289 +10,76 @@ package boltdb import ( - "fmt" "os" - "reflect" "testing" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/test" ) -func TestStore(t *testing.T) { - s := New("test", "bleve") - err := s.Open() +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, map[string]interface{}{"path": "test"}) if err != nil { t.Fatal(err) } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) + return rv } -func TestReaderIsolation(t *testing.T) { - s := New("test", "bleve") - err := s.Open() +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() if err != nil { t.Fatal(err) } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() + err = os.RemoveAll("test") if err != nil { t.Fatal(err) } } -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - - // ************************************************** - // this is a hack to try to pre-emptively overflow - // boltdb writes *MAY* block a long reader - // in particular, if the write requires additional - // allocation, it must acquire the same lock as - // the reader, thus cannot continue until that - // reader is closed. - // in general this is not a problem for bleve - // (though it may affect performance in some cases) - // but it is a problem for this test which attemps - // to easily verify that readers are isolated - // this hack writes enough initial data such that - // the subsequent writes do not require additional - // space - hackSize := 1000 - for i := 0; i < hackSize; i++ { - k := fmt.Sprintf("x%d", i) - err = writer.Set([]byte(k), []byte("filler")) - if err != nil { - t.Fatal(err) - } - } - // ************************************************** - - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != hackSize+1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != hackSize+2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != hackSize+1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - +func TestBoltDBKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestKVCrud(t, s) +} + +func TestBoltDBReaderIsolation(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderIsolation(t, s) +} + +func TestBoltDBReaderOwnsGetBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderOwnsGetBytes(t, s) +} + +func TestBoltDBWriterOwnsBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestBoltDBPrefixIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestPrefixIterator(t, s) +} + +func TestBoltDBRangeIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIterator(t, s) +} + +func TestBoltDBRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + +func TestBoltDBMerge(t *testing.T) { + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) + test.CommonTestMerge(t, s) } diff --git a/index/store/boltdb/writer.go b/index/store/boltdb/writer.go index 0a70bfff..3972ab88 100644 --- a/index/store/boltdb/writer.go +++ b/index/store/boltdb/writer.go @@ -10,41 +10,61 @@ package boltdb import ( + "fmt" + "github.com/blevesearch/bleve/index/store" - "github.com/boltdb/bolt" ) type Writer struct { - store *Store - tx *bolt.Tx - reader *Reader -} - -func (w *Writer) Set(key, val []byte) error { - return w.tx.Bucket([]byte(w.store.bucket)).Put(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.tx.Bucket([]byte(w.store.bucket)).Delete(key) + store *Store } func (w *Writer) NewBatch() store.KVBatch { - return store.NewEmulatedBatch(w, w.store.mo) + return store.NewEmulatedBatch(w.store.mo) +} + +func (w *Writer) ExecuteBatch(batch store.KVBatch) error { + + emulatedBatch, ok := batch.(*store.EmulatedBatch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + tx, err := w.store.db.Begin(true) + if err != nil { + return err + } + + for k, mergeOps := range emulatedBatch.Merger.Merges { + kb := []byte(k) + existingVal := tx.Bucket([]byte(w.store.bucket)).Get(kb) + mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) + if !fullMergeOk { + return fmt.Errorf("merge operator returned failure") + } + err = tx.Bucket([]byte(w.store.bucket)).Put(kb, mergedVal) + if err != nil { + return err + } + } + + for _, op := range emulatedBatch.Ops { + if op.V != nil { + err := tx.Bucket([]byte(w.store.bucket)).Put(op.K, op.V) + if err != nil { + return err + } + } else { + err := tx.Bucket([]byte(w.store.bucket)).Delete(op.K) + if err != nil { + return err + } + } + } + + return tx.Commit() } func (w *Writer) Close() error { - defer w.store.writer.Unlock() - return w.tx.Commit() -} - -func (w *Writer) BytesSafeAfterClose() bool { - return w.reader.BytesSafeAfterClose() -} - -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.reader.Get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.reader.Iterator(key) + return nil } diff --git a/index/store/goleveldb/batch.go b/index/store/goleveldb/batch.go index b56a3bb7..91b2598e 100644 --- a/index/store/goleveldb/batch.go +++ b/index/store/goleveldb/batch.go @@ -15,7 +15,7 @@ import ( ) type Batch struct { - w *Writer + store *Store merge *store.EmulatedMerge batch *leveldb.Batch } @@ -32,22 +32,7 @@ func (b *Batch) Merge(key, val []byte) { b.merge.Merge(key, val) } -func (b *Batch) Execute() error { - - // first process merges - ops, err := b.merge.ExecuteDeferred(b.w) - if err != nil { - return err - } - for _, op := range ops { - b.batch.Put(op.K, op.V) - } - - wopts := defaultWriteOptions() - err = b.w.store.db.Write(b.batch, wopts) - return err -} - -func (b *Batch) Close() error { - return nil +func (b *Batch) Reset() { + b.batch.Reset() + b.merge = store.NewEmulatedMerge(b.store.mo) } diff --git a/index/store/goleveldb/config.go b/index/store/goleveldb/config.go new file mode 100644 index 00000000..102debb7 --- /dev/null +++ b/index/store/goleveldb/config.go @@ -0,0 +1,52 @@ +package goleveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) { + + ro, ok := config["read_only"].(bool) + if ok { + o.ReadOnly = ro + } + + cim, ok := config["create_if_missing"].(bool) + if ok { + o.ErrorIfMissing = !cim + } + + eie, ok := config["error_if_exists"].(bool) + if ok { + o.ErrorIfExist = eie + } + + wbs, ok := config["write_buffer_size"].(float64) + if ok { + o.WriteBuffer = int(wbs) + } + + bs, ok := config["block_size"].(float64) + if ok { + o.BlockSize = int(bs) + } + + bri, ok := config["block_restart_interval"].(float64) + if ok { + o.BlockRestartInterval = int(bri) + } + + lcc, ok := config["lru_cache_capacity"].(float64) + if ok { + o.BlockCacheCapacity = int(lcc) + } + + bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) + if ok { + bf := filter.NewBloomFilter(int(bfbpk)) + o.Filter = bf + } + + return o, nil +} diff --git a/index/store/goleveldb/iterator.go b/index/store/goleveldb/iterator.go index 5c8a955e..7ed4ffe3 100644 --- a/index/store/goleveldb/iterator.go +++ b/index/store/goleveldb/iterator.go @@ -9,53 +9,18 @@ package goleveldb -import ( - "github.com/syndtr/goleveldb/leveldb" - "github.com/syndtr/goleveldb/leveldb/iterator" -) +import "github.com/syndtr/goleveldb/leveldb/iterator" type Iterator struct { store *Store iterator iterator.Iterator - copyk []byte - copyv []byte -} - -func newIterator(store *Store) *Iterator { - ropts := defaultReadOptions() - iter := store.db.NewIterator(nil, ropts) - rv := Iterator{ - store: store, - iterator: iter, - } - return &rv -} - -func newIteratorWithSnapshot(store *Store, snapshot *leveldb.Snapshot) *Iterator { - options := defaultReadOptions() - iter := snapshot.NewIterator(nil, options) - rv := Iterator{ - store: store, - iterator: iter, - } - return &rv -} - -func (ldi *Iterator) SeekFirst() { - ldi.copyk = nil - ldi.copyv = nil - ldi.iterator.First() } func (ldi *Iterator) Seek(key []byte) { - ldi.copyk = nil - ldi.copyv = nil ldi.iterator.Seek(key) } func (ldi *Iterator) Next() { - ldi.copyk = nil - ldi.copyv = nil ldi.iterator.Next() } @@ -67,21 +32,11 @@ func (ldi *Iterator) Current() ([]byte, []byte, bool) { } func (ldi *Iterator) Key() []byte { - k := ldi.iterator.Key() - if ldi.copyk == nil { - ldi.copyk = make([]byte, len(k)) - copy(ldi.copyk, k) - } - return ldi.copyk + return ldi.iterator.Key() } func (ldi *Iterator) Value() []byte { - v := ldi.iterator.Value() - if ldi.copyv == nil { - ldi.copyv = make([]byte, len(v)) - copy(ldi.copyv, v) - } - return ldi.copyv + return ldi.iterator.Value() } func (ldi *Iterator) Valid() bool { @@ -89,7 +44,6 @@ func (ldi *Iterator) Valid() bool { } func (ldi *Iterator) Close() error { - ldi.copyk = nil - ldi.copyv = nil + ldi.iterator.Release() return nil } diff --git a/index/store/goleveldb/reader.go b/index/store/goleveldb/reader.go index 23316a22..7807d571 100644 --- a/index/store/goleveldb/reader.go +++ b/index/store/goleveldb/reader.go @@ -12,6 +12,7 @@ package goleveldb import ( "github.com/blevesearch/bleve/index/store" "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/util" ) type Reader struct { @@ -19,26 +20,37 @@ type Reader struct { snapshot *leveldb.Snapshot } -func newReader(store *Store) (*Reader, error) { - snapshot, _ := store.db.GetSnapshot() - return &Reader{ - store: store, - snapshot: snapshot, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - func (r *Reader) Get(key []byte) ([]byte, error) { - return r.store.getWithSnapshot(key, r.snapshot) + b, err := r.snapshot.Get(key, r.store.defaultReadOptions) + if err == leveldb.ErrNotFound { + return nil, nil + } + return b, err } -func (r *Reader) Iterator(key []byte) store.KVIterator { - rv := newIteratorWithSnapshot(r.store, r.snapshot) - rv.Seek(key) - return rv +func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { + byteRange := util.BytesPrefix(prefix) + iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions) + iter.First() + rv := Iterator{ + store: r.store, + iterator: iter, + } + return &rv +} + +func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { + byteRange := &util.Range{ + Start: start, + Limit: end, + } + iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions) + iter.First() + rv := Iterator{ + store: r.store, + iterator: iter, + } + return &rv } func (r *Reader) Close() error { diff --git a/index/store/goleveldb/store.go b/index/store/goleveldb/store.go index 3ab874fd..26d9a3a0 100644 --- a/index/store/goleveldb/store.go +++ b/index/store/goleveldb/store.go @@ -11,167 +11,72 @@ package goleveldb import ( "fmt" - "sync" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" "github.com/syndtr/goleveldb/leveldb" - "github.com/syndtr/goleveldb/leveldb/filter" "github.com/syndtr/goleveldb/leveldb/opt" ) const Name = "goleveldb" type Store struct { - path string - opts *opt.Options - db *leveldb.DB - writer sync.Mutex - mo store.MergeOperator + path string + opts *opt.Options + db *leveldb.DB + mo store.MergeOperator + + defaultWriteOptions *opt.WriteOptions + defaultReadOptions *opt.ReadOptions } -func New(path string, config map[string]interface{}) (*Store, error) { - rv := Store{ - path: path, - opts: &opt.Options{}, +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + + path, ok := config["path"].(string) + if !ok { + return nil, fmt.Errorf("must specify path") } - _, err := applyConfig(rv.opts, config) + opts, err := applyConfig(&opt.Options{}, config) if err != nil { return nil, err } - return &rv, nil -} - -func (ldbs *Store) Open() error { - var err error - ldbs.db, err = leveldb.OpenFile(ldbs.path, ldbs.opts) + db, err := leveldb.OpenFile(path, opts) if err != nil { - return err + return nil, err } - return nil -} -func (ldbs *Store) SetMergeOperator(mo store.MergeOperator) { - ldbs.mo = mo -} - -func (ldbs *Store) get(key []byte) ([]byte, error) { - options := defaultReadOptions() - b, err := ldbs.db.Get(key, options) - if err == leveldb.ErrNotFound { - return nil, nil + rv := Store{ + path: path, + opts: opts, + db: db, + mo: mo, + defaultReadOptions: &opt.ReadOptions{}, + defaultWriteOptions: &opt.WriteOptions{}, } - return b, err -} - -func (ldbs *Store) getWithSnapshot(key []byte, snapshot *leveldb.Snapshot) ([]byte, error) { - options := defaultReadOptions() - b, err := snapshot.Get(key, options) - if err == leveldb.ErrNotFound { - return nil, nil - } - return b, err -} - -func (ldbs *Store) set(key, val []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.setlocked(key, val) -} - -func (ldbs *Store) setlocked(key, val []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Put(key, val, options) - return err -} - -func (ldbs *Store) delete(key []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.deletelocked(key) -} - -func (ldbs *Store) deletelocked(key []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Delete(key, options) - return err + rv.defaultWriteOptions.Sync = true + return &rv, nil } func (ldbs *Store) Close() error { return ldbs.db.Close() } -func (ldbs *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(ldbs) - rv.Seek(key) - return rv -} - func (ldbs *Store) Reader() (store.KVReader, error) { - return newReader(ldbs) + snapshot, _ := ldbs.db.GetSnapshot() + return &Reader{ + store: ldbs, + snapshot: snapshot, + }, nil } func (ldbs *Store) Writer() (store.KVWriter, error) { - return newWriter(ldbs) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - return New(path, config) + return &Writer{ + store: ldbs, + }, nil } func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func applyConfig(o *opt.Options, config map[string]interface{}) ( - *opt.Options, error) { - - ro, ok := config["read_only"].(bool) - if ok { - o.ReadOnly = ro - } - - cim, ok := config["create_if_missing"].(bool) - if ok { - o.ErrorIfMissing = !cim - } - - eie, ok := config["error_if_exists"].(bool) - if ok { - o.ErrorIfExist = eie - } - - wbs, ok := config["write_buffer_size"].(float64) - if ok { - o.WriteBuffer = int(wbs) - } - - bs, ok := config["block_size"].(float64) - if ok { - o.BlockSize = int(bs) - } - - bri, ok := config["block_restart_interval"].(float64) - if ok { - o.BlockRestartInterval = int(bri) - } - - lcc, ok := config["lru_cache_capacity"].(float64) - if ok { - o.BlockCacheCapacity = int(lcc) - } - - bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) - if ok { - bf := filter.NewBloomFilter(int(bfbpk)) - o.Filter = bf - } - - return o, nil + registry.RegisterKVStore(Name, New) } diff --git a/index/store/goleveldb/store_test.go b/index/store/goleveldb/store_test.go index 65db6481..6fceee78 100644 --- a/index/store/goleveldb/store_test.go +++ b/index/store/goleveldb/store_test.go @@ -11,378 +11,78 @@ package goleveldb import ( "os" - "reflect" "testing" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/test" ) -var leveldbTestOptions = map[string]interface{}{ - "create_if_missing": true, +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, map[string]interface{}{ + "path": "test", + "create_if_missing": true, + }) + if err != nil { + t.Fatal(err) + } + return rv } -func TestLevelDBStore(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", leveldbTestOptions) +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() if err != nil { t.Fatal(err) } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) -} - -func TestLevelDBStoreIterator(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStoreIterator(t, s) -} - -func TestReaderIsolation(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() + err = os.RemoveAll("test") if err != nil { t.Fatal(err) } } -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it = newReader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it = reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - +func TestGoLevelDBKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestKVCrud(t, s) } -func CommonTestKVStoreIterator(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - - data := []struct { - k []byte - v []byte - }{ - {[]byte("t\x09\x00paint\xff/sponsor/gold/thumbtack/"), []byte("a")}, - {[]byte("t\x09\x00party\xff/sponsor/gold/thumbtack/"), []byte("a")}, - {[]byte("t\x09\x00personal\xff/sponsor/gold/thumbtack/"), []byte("a")}, - {[]byte("t\x09\x00plan\xff/sponsor/gold/thumbtack/"), []byte("a")}, - } - - batch := writer.NewBatch() - for _, d := range data { - batch.Set(d.k, d.v) - } - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("a")) - keys := make([][]byte, 0, len(data)) - key, _, valid := it.Current() - for valid { - keys = append(keys, key) - it.Next() - key, _, valid = it.Current() - } - - if len(keys) != len(data) { - t.Errorf("expected same number of keys, got %d != %d", len(keys), len(data)) - } - for i, dk := range data { - if !reflect.DeepEqual(dk.k, keys[i]) { - t.Errorf("expected key %s got %s", dk.k, keys[i]) - } - - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } +func TestGoLevelDBReaderIsolation(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderIsolation(t, s) +} + +func TestGoLevelDBReaderOwnsGetBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderOwnsGetBytes(t, s) +} + +func TestGoLevelDBWriterOwnsBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestGoLevelDBPrefixIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestPrefixIterator(t, s) +} + +func TestGoLevelDBRangeIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIterator(t, s) +} + +func TestGoLevelDBRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + +func TestGoLevelDBMerge(t *testing.T) { + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) + test.CommonTestMerge(t, s) } diff --git a/index/store/goleveldb/util.go b/index/store/goleveldb/util.go deleted file mode 100644 index 4e22808b..00000000 --- a/index/store/goleveldb/util.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package goleveldb - -import ( - "github.com/syndtr/goleveldb/leveldb/opt" -) - -func defaultWriteOptions() *opt.WriteOptions { - wo := &opt.WriteOptions{} - // request fsync on write for safety - wo.Sync = true - return wo -} - -func defaultReadOptions() *opt.ReadOptions { - ro := &opt.ReadOptions{} - return ro -} diff --git a/index/store/goleveldb/writer.go b/index/store/goleveldb/writer.go index ce1d03c9..c5229005 100644 --- a/index/store/goleveldb/writer.go +++ b/index/store/goleveldb/writer.go @@ -10,6 +10,8 @@ package goleveldb import ( + "fmt" + "github.com/blevesearch/bleve/index/store" "github.com/syndtr/goleveldb/leveldb" ) @@ -18,46 +20,40 @@ type Writer struct { store *Store } -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Set(key, val []byte) error { - return w.store.setlocked(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.store.deletelocked(key) -} - func (w *Writer) NewBatch() store.KVBatch { rv := Batch{ - w: w, + store: w.store, merge: store.NewEmulatedMerge(w.store.mo), batch: new(leveldb.Batch), } return &rv } +func (w *Writer) ExecuteBatch(b store.KVBatch) error { + batch, ok := b.(*Batch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + // first process merges + for k, mergeOps := range batch.merge.Merges { + kb := []byte(k) + existingVal, err := w.store.db.Get(kb, w.store.defaultReadOptions) + if err != nil && err != leveldb.ErrNotFound { + return err + } + mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) + if !fullMergeOk { + return fmt.Errorf("merge operator returned failure") + } + // add the final merge to this batch + batch.batch.Put(kb, mergedVal) + } + + // now execute the batch + return w.store.db.Write(batch.batch, w.store.defaultWriteOptions) +} + func (w *Writer) Close() error { - w.store.writer.Unlock() return nil } - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/store/gtreap/gtreap_test.go b/index/store/gtreap/gtreap_test.go deleted file mode 100644 index 19b568d1..00000000 --- a/index/store/gtreap/gtreap_test.go +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -package gtreap - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -func TestGTreapStore(t *testing.T) { - s, err := StoreConstructor(nil) - if err != nil { - t.Fatal(err) - } - - CommonTestKVStore(t, s) -} - -func TestReaderIsolation(t *testing.T) { - s, err := StoreConstructor(nil) - if err != nil { - t.Fatal(err) - } - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - valB := []byte("val-b") - err = writer.Set([]byte("b"), valB) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got %s", val) - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } -} diff --git a/index/store/gtreap/iterator.go b/index/store/gtreap/iterator.go index 3bed1a2b..bc56eb34 100644 --- a/index/store/gtreap/iterator.go +++ b/index/store/gtreap/iterator.go @@ -15,6 +15,7 @@ package gtreap import ( + "bytes" "sync" "github.com/steveyen/gtreap" @@ -28,22 +29,16 @@ type Iterator struct { nextCh chan *Item curr *Item currOk bool -} -func newIterator(t *gtreap.Treap) *Iterator { - return &Iterator{t: t} -} - -func (w *Iterator) SeekFirst() { - min := w.t.Min() - if min != nil { - w.restart(min.(*Item)) - } else { - w.restart(nil) - } + prefix []byte + start []byte + end []byte } func (w *Iterator) Seek(k []byte) { + if bytes.Compare(k, w.start) < 0 { + k = w.start + } w.restart(&Item{k: k}) } @@ -93,6 +88,11 @@ func (w *Iterator) Current() ([]byte, []byte, bool) { if !w.currOk || w.curr == nil { return nil, nil, false } + if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) { + return nil, nil, false + } else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 { + return nil, nil, false + } return w.curr.k, w.curr.v, w.currOk } diff --git a/index/store/gtreap/reader.go b/index/store/gtreap/reader.go index 4a18a452..6f92a751 100644 --- a/index/store/gtreap/reader.go +++ b/index/store/gtreap/reader.go @@ -24,20 +24,34 @@ type Reader struct { t *gtreap.Treap } -func (w *Reader) BytesSafeAfterClose() bool { - return false -} - func (w *Reader) Get(k []byte) (v []byte, err error) { + var rv []byte itm := w.t.Get(&Item{k: k}) if itm != nil { - return itm.(*Item).v, nil + rv = make([]byte, len(itm.(*Item).v)) + copy(rv, itm.(*Item).v) + return rv, nil } return nil, nil } -func (w *Reader) Iterator(k []byte) store.KVIterator { - return newIterator(w.t).restart(&Item{k: k}) +func (w *Reader) PrefixIterator(k []byte) store.KVIterator { + rv := Iterator{ + t: w.t, + prefix: k, + } + rv.restart(&Item{k: k}) + return &rv +} + +func (w *Reader) RangeIterator(start, end []byte) store.KVIterator { + rv := Iterator{ + t: w.t, + start: start, + end: end, + } + rv.restart(&Item{k: start}) + return &rv } func (w *Reader) Close() error { diff --git a/index/store/gtreap/gtreap.go b/index/store/gtreap/store.go similarity index 65% rename from index/store/gtreap/gtreap.go rename to index/store/gtreap/store.go index e6f1eb65..7b0048f2 100644 --- a/index/store/gtreap/gtreap.go +++ b/index/store/gtreap/store.go @@ -12,36 +12,24 @@ // Package gtreap provides an in-memory implementation of the // KVStore interfaces using the gtreap balanced-binary treap, // copy-on-write data structure. + package gtreap import ( "bytes" - "fmt" "sync" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" - "github.com/steveyen/gtreap" ) const Name = "gtreap" -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -const MAX_CONCURRENT_WRITERS = 1 - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - s := &Store{ - availableWriters: make(chan bool, MAX_CONCURRENT_WRITERS), - t: gtreap.NewTreap(itemCompare), - } - for i := 0; i < MAX_CONCURRENT_WRITERS; i++ { - s.availableWriters <- true - } - return s, nil +type Store struct { + m sync.Mutex + t *gtreap.Treap + mo store.MergeOperator } type Item struct { @@ -53,29 +41,15 @@ func itemCompare(a, b interface{}) int { return bytes.Compare(a.(*Item).k, b.(*Item).k) } -type Store struct { - availableWriters chan bool - - m sync.Mutex - t *gtreap.Treap - - mo store.MergeOperator -} - -type Writer struct { - s *Store -} - -func (s *Store) Open() error { - return nil -} - -func (s *Store) SetMergeOperator(mo store.MergeOperator) { - s.mo = mo +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + rv := Store{ + t: gtreap.NewTreap(itemCompare), + mo: mo, + } + return &rv, nil } func (s *Store) Close() error { - close(s.availableWriters) return nil } @@ -87,10 +61,9 @@ func (s *Store) Reader() (store.KVReader, error) { } func (s *Store) Writer() (store.KVWriter, error) { - available, ok := <-s.availableWriters - if !ok || !available { - return nil, fmt.Errorf("no available writers") - } - return &Writer{s: s}, nil } + +func init() { + registry.RegisterKVStore(Name, New) +} diff --git a/index/store/gtreap/store_test.go b/index/store/gtreap/store_test.go new file mode 100644 index 00000000..b7686ee6 --- /dev/null +++ b/index/store/gtreap/store_test.go @@ -0,0 +1,83 @@ +// Copyright (c) 2014 Couchbase, Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package gtreap + +import ( + "testing" + + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/test" +) + +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, nil) + if err != nil { + t.Fatal(err) + } + return rv +} + +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() + if err != nil { + t.Fatal(err) + } +} + +func TestGTreapKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestKVCrud(t, s) +} + +func TestGTreapReaderIsolation(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderIsolation(t, s) +} + +func TestGTreapReaderOwnsGetBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderOwnsGetBytes(t, s) +} + +func TestGTreapWriterOwnsBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestGTreapPrefixIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestPrefixIterator(t, s) +} + +func TestGTreapRangeIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIterator(t, s) +} + +func TestGTreapRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + +func TestGTreapMerge(t *testing.T) { + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) + test.CommonTestMerge(t, s) +} diff --git a/index/store/gtreap/writer.go b/index/store/gtreap/writer.go index aa95256c..4490b158 100644 --- a/index/store/gtreap/writer.go +++ b/index/store/gtreap/writer.go @@ -15,58 +15,55 @@ package gtreap import ( + "fmt" "math/rand" "github.com/blevesearch/bleve/index/store" ) -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Get(k []byte) (v []byte, err error) { - w.s.m.Lock() - t := w.s.t - w.s.m.Unlock() - - itm := t.Get(&Item{k: k}) - if itm != nil { - return itm.(*Item).v, nil - } - return nil, nil -} - -func (w *Writer) Iterator(k []byte) store.KVIterator { - w.s.m.Lock() - t := w.s.t - w.s.m.Unlock() - - return newIterator(t).restart(&Item{k: k}) -} - -func (w *Writer) Close() error { - w.s.availableWriters <- true - w.s = nil - - return nil -} - -func (w *Writer) Set(k, v []byte) (err error) { - w.s.m.Lock() - w.s.t = w.s.t.Upsert(&Item{k: k, v: v}, rand.Int()) - w.s.m.Unlock() - - return nil -} - -func (w *Writer) Delete(k []byte) (err error) { - w.s.m.Lock() - w.s.t = w.s.t.Delete(&Item{k: k}) - w.s.m.Unlock() - - return nil +type Writer struct { + s *Store } func (w *Writer) NewBatch() store.KVBatch { - return store.NewEmulatedBatch(w, w.s.mo) + return store.NewEmulatedBatch(w.s.mo) +} + +func (w *Writer) ExecuteBatch(batch store.KVBatch) error { + + emulatedBatch, ok := batch.(*store.EmulatedBatch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + w.s.m.Lock() + for k, mergeOps := range emulatedBatch.Merger.Merges { + kb := []byte(k) + var existingVal []byte + existingItem := w.s.t.Get(&Item{k: kb}) + if existingItem != nil { + existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v + } + mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps) + if !fullMergeOk { + return fmt.Errorf("merge operator returned failure") + } + w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int()) + } + + for _, op := range emulatedBatch.Ops { + if op.V != nil { + w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int()) + } else { + w.s.t = w.s.t.Delete(&Item{k: op.K}) + } + } + w.s.m.Unlock() + + return nil +} + +func (w *Writer) Close() error { + w.s = nil + return nil } diff --git a/index/store/inmem/iterator.go b/index/store/inmem/iterator.go deleted file mode 100644 index 042bd890..00000000 --- a/index/store/inmem/iterator.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "github.com/ryszard/goskiplist/skiplist" -) - -type Iterator struct { - store *Store - iterator skiplist.Iterator - valid bool -} - -func newIterator(store *Store) *Iterator { - rv := Iterator{ - store: store, - iterator: store.list.Iterator(), - } - return &rv -} - -func (i *Iterator) SeekFirst() { - i.Seek([]byte{0}) -} - -func (i *Iterator) Seek(k []byte) { - i.valid = i.iterator.Seek(string(k)) -} - -func (i *Iterator) Next() { - i.valid = i.iterator.Next() -} - -func (i *Iterator) Current() ([]byte, []byte, bool) { - if i.valid { - return []byte(i.Key()), []byte(i.Value()), true - } - return nil, nil, false -} - -func (i *Iterator) Key() []byte { - if i.valid { - return []byte(i.iterator.Key().(string)) - } - return nil -} - -func (i *Iterator) Value() []byte { - if i.valid { - return []byte(i.iterator.Value().(string)) - } - return nil -} - -func (i *Iterator) Valid() bool { - return i.valid -} - -func (i *Iterator) Close() error { - i.iterator.Close() - return nil -} diff --git a/index/store/inmem/reader.go b/index/store/inmem/reader.go deleted file mode 100644 index d9966277..00000000 --- a/index/store/inmem/reader.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Reader struct { - store *Store -} - -func newReader(store *Store) (*Reader, error) { - return &Reader{ - store: store, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - -func (r *Reader) Get(key []byte) ([]byte, error) { - return r.store.get(key) -} - -func (r *Reader) Iterator(key []byte) store.KVIterator { - return r.store.iterator(key) -} - -func (r *Reader) Close() error { - return nil -} diff --git a/index/store/inmem/store.go b/index/store/inmem/store.go deleted file mode 100644 index d121de59..00000000 --- a/index/store/inmem/store.go +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "sync" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - "github.com/ryszard/goskiplist/skiplist" -) - -const Name = "mem" - -type Store struct { - list *skiplist.SkipList - writer sync.Mutex - mo store.MergeOperator -} - -func New() (*Store, error) { - rv := Store{ - list: skiplist.NewStringMap(), - } - - return &rv, nil -} - -func MustOpen() *Store { - rv := Store{ - list: skiplist.NewStringMap(), - } - - return &rv -} - -func (i *Store) Open() error { - return nil -} - -func (i *Store) SetMergeOperator(mo store.MergeOperator) { - i.mo = mo -} - -func (i *Store) get(key []byte) ([]byte, error) { - val, ok := i.list.Get(string(key)) - if ok { - return []byte(val.(string)), nil - } - return nil, nil -} - -func (i *Store) set(key, val []byte) error { - i.writer.Lock() - defer i.writer.Unlock() - return i.setlocked(key, val) -} - -func (i *Store) setlocked(key, val []byte) error { - i.list.Set(string(key), string(val)) - return nil -} - -func (i *Store) delete(key []byte) error { - i.writer.Lock() - defer i.writer.Unlock() - return i.deletelocked(key) -} - -func (i *Store) deletelocked(key []byte) error { - i.list.Delete(string(key)) - return nil -} - -func (i *Store) Close() error { - return nil -} - -func (i *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(i) - rv.Seek(key) - return rv -} - -func (i *Store) Reader() (store.KVReader, error) { - return newReader(i) -} - -func (i *Store) Writer() (store.KVWriter, error) { - return newWriter(i) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - return New() -} - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} diff --git a/index/store/inmem/store_test.go b/index/store/inmem/store_test.go deleted file mode 100644 index 39079459..00000000 --- a/index/store/inmem/store_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -func TestStore(t *testing.T) { - s, err := New() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it = newReader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it = reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - -} diff --git a/index/store/inmem/writer.go b/index/store/inmem/writer.go deleted file mode 100644 index f88c4e01..00000000 --- a/index/store/inmem/writer.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Writer struct { - store *Store -} - -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Set(key, val []byte) error { - return w.store.setlocked(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.store.deletelocked(key) -} - -func (w *Writer) NewBatch() store.KVBatch { - return store.NewEmulatedBatch(w, w.store.mo) -} - -func (w *Writer) Close() error { - w.store.writer.Unlock() - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/store/kvstore.go b/index/store/kvstore.go index b96fce8b..e1e04d73 100644 --- a/index/store/kvstore.go +++ b/index/store/kvstore.go @@ -9,50 +9,113 @@ package store -type KVBatch interface { - Set(key, val []byte) - Delete(key []byte) - Merge(key, val []byte) - Execute() error +// KVStore is an abstraction for working with KV stores +type KVStore interface { + + // Writer returns a KVWriter which can be used to + // make changes to the KVStore. If a writer cannot + // be obtained a non-nil error is returned. + Writer() (KVWriter, error) + + // Reader returns a KVReader which can be used to + // read data from the KVStore. If a reader cannot + // be obtained a non-nil error is returned. + Reader() (KVReader, error) + + // Close closes the KVStore Close() error } +// KVReader is an abstraction of an **ISOLATED** reader +// In this context isolated is defined to mean that +// writes/deletes made after the KVReader is opened +// are not observed. +// Because there is usually a cost associated with +// keeping isolated readers active, users should +// close them as soon as they are no longer needed. +type KVReader interface { + + // Get returns the value associated with the key + // If the key does not exist, nil is returned. + // The caller owns the bytes returned. + Get(key []byte) ([]byte, error) + + // PrefixIterator returns a KVIterator that will + // visit all K/V pairs with the provided prefix + PrefixIterator(prefix []byte) KVIterator + + // RangeIterator returns a KVIterator that will + // visit all K/V pairs >= start AND < end + RangeIterator(start, end []byte) KVIterator + + // Close closes the iterator + Close() error +} + +// KVIterator is an abstraction around key iteration type KVIterator interface { - SeekFirst() - Seek([]byte) + + // Seek will advance the iterator to the specified key + Seek(key []byte) + + // Next will advance the iterator to the next key Next() - Current() ([]byte, []byte, bool) + // Key returns the key pointed to by the iterator + // The bytes returned are **ONLY** valid until the next call to Seek/Next/Close + // Continued use after that requires that they be copied. Key() []byte + + // Value returns the value pointed to by the iterator + // The bytes returned are **ONLY** valid until the next call to Seek/Next/Close + // Continued use after that requires that they be copied. Value() []byte + + // Valid returns whether or not the iterator is in a valid state Valid() bool + // Current returns Key(),Value(),Valid() in a single operation + Current() ([]byte, []byte, bool) + + // Close closes the iterator Close() error } -type KVStore interface { - Open() error - SetMergeOperator(MergeOperator) - Writer() (KVWriter, error) - Reader() (KVReader, error) - Close() error -} - +// KVWriter is an abstraction for mutating the KVStore +// KVWriter does **NOT** enforce restrictions of a single writer +// if the underlying KVStore allows concurrent writes, the +// KVWriter interface should also do so, it is up to the caller +// to do this in a way that is safe and makes sense type KVWriter interface { - KVReader - Set(key, val []byte) error - Delete(key []byte) error - NewBatch() KVBatch -} -type KVReader interface { - BytesSafeAfterClose() bool - Get(key []byte) ([]byte, error) - Iterator(key []byte) KVIterator + // NewBatch returns a KVBatch for performaing batch operations on this kvstore + NewBatch() KVBatch + + // ExecuteBatch will execute the KVBatch, the provided KVBatch **MUST** have + // been created by the same KVStore (though not necessarily the same KVWriter) + // Batch execution is atomic, either all the operations or none will be performed + ExecuteBatch(batch KVBatch) error + + // Close closes the writer Close() error } -type RangeIterable interface { - // iterates keys >= start and < end - RangeIterator(start, end []byte) KVIterator +// KVBatch is an abstraction for making multiple KV mutations at once +type KVBatch interface { + + // Set updates the key with the specified value + // both key and value []byte may be reused as soon as this call returns + Set(key, val []byte) + + // Delete removes the specified key + // the key []byte may be reused as soon as this call returns + Delete(key []byte) + + // Merge merges old value with the new value at the specified key + // as prescribed by the KVStores merge operator + // both key and value []byte may be reused as soon as this call returns + Merge(key, val []byte) + + // Reset frees resources for this batch and allows reuse + Reset() } diff --git a/index/store/merge.go b/index/store/merge.go index 390727bf..6e1a7da7 100644 --- a/index/store/merge.go +++ b/index/store/merge.go @@ -9,10 +9,6 @@ package store -import ( - "fmt" -) - // At the moment this happens to be the same interface as described by // RocksDB, but this may not always be the case. @@ -32,41 +28,20 @@ type MergeOperator interface { Name() string } -// EmulatedMergeSingle removes some duplicated code across -// KV stores which do not support merge operations -// on their own. It is up to the caller to ensure -// that an appropriate lock has been acquired in -// order for this behavior to be valid -func EmulatedMergeSingle(writer KVWriter, mo MergeOperator, key []byte, operand []byte) error { - existingValue, err := writer.Get(key) - if err != nil { - return err - } - newValue, ok := mo.FullMerge(key, existingValue, [][]byte{operand}) - if !ok { - return fmt.Errorf("merge operator returned failure") - } - err = writer.Set(key, newValue) - if err != nil { - return err - } - return nil -} - type EmulatedMerge struct { - merges map[string][][]byte + Merges map[string][][]byte mo MergeOperator } func NewEmulatedMerge(mo MergeOperator) *EmulatedMerge { return &EmulatedMerge{ - merges: make(map[string][][]byte), + Merges: make(map[string][][]byte), mo: mo, } } func (m *EmulatedMerge) Merge(key, val []byte) { - ops, ok := m.merges[string(key)] + ops, ok := m.Merges[string(key)] if ok && len(ops) > 0 { last := ops[len(ops)-1] mergedVal, partialMergeOk := m.mo.PartialMerge(key, last, val) @@ -80,41 +55,5 @@ func (m *EmulatedMerge) Merge(key, val []byte) { } else { ops = [][]byte{val} } - m.merges[string(key)] = ops -} - -func (m *EmulatedMerge) Execute(w KVWriter) error { - for k, mergeOps := range m.merges { - kb := []byte(k) - existingVal, err := w.Get(kb) - if err != nil { - return err - } - mergedVal, fullMergeOk := m.mo.FullMerge(kb, existingVal, mergeOps) - if !fullMergeOk { - return fmt.Errorf("merge operator returned failure") - } - err = w.Set(kb, mergedVal) - if err != nil { - return err - } - } - return nil -} - -func (m *EmulatedMerge) ExecuteDeferred(w KVWriter) ([]*op, error) { - rv := make([]*op, 0, 1000) - for k, mergeOps := range m.merges { - kb := []byte(k) - existingVal, err := w.Get(kb) - if err != nil { - return nil, err - } - mergedVal, fullMergeOk := m.mo.FullMerge(kb, existingVal, mergeOps) - if !fullMergeOk { - return nil, fmt.Errorf("merge operator returned failure") - } - rv = append(rv, &op{kb, mergedVal}) - } - return rv, nil + m.Merges[string(key)] = ops } diff --git a/index/store/metrics/batch.go b/index/store/metrics/batch.go new file mode 100644 index 00000000..8c30448f --- /dev/null +++ b/index/store/metrics/batch.go @@ -0,0 +1,26 @@ +package metrics + +import "github.com/blevesearch/bleve/index/store" + +type Batch struct { + s *Store + o store.KVBatch +} + +func (b *Batch) Set(key, val []byte) { + b.o.Set(key, val) +} + +func (b *Batch) Delete(key []byte) { + b.o.Delete(key) +} + +func (b *Batch) Merge(key, val []byte) { + b.s.TimerBatchMerge.Time(func() { + b.o.Merge(key, val) + }) +} + +func (b *Batch) Reset() { + b.o.Reset() +} diff --git a/index/store/metrics/iterator.go b/index/store/metrics/iterator.go new file mode 100644 index 00000000..ff44eb78 --- /dev/null +++ b/index/store/metrics/iterator.go @@ -0,0 +1,44 @@ +package metrics + +import "github.com/blevesearch/bleve/index/store" + +type Iterator struct { + s *Store + o store.KVIterator +} + +func (i *Iterator) Seek(x []byte) { + i.s.TimerIteratorSeek.Time(func() { + i.o.Seek(x) + }) +} + +func (i *Iterator) Next() { + i.s.TimerIteratorNext.Time(func() { + i.o.Next() + }) +} + +func (i *Iterator) Current() ([]byte, []byte, bool) { + return i.o.Current() +} + +func (i *Iterator) Key() []byte { + return i.o.Key() +} + +func (i *Iterator) Value() []byte { + return i.o.Value() +} + +func (i *Iterator) Valid() bool { + return i.o.Valid() +} + +func (i *Iterator) Close() error { + err := i.o.Close() + if err != nil { + i.s.AddError("Iterator.Close", err, nil) + } + return err +} diff --git a/index/store/metrics/metrics.go b/index/store/metrics/metrics.go deleted file mode 100644 index 7651a848..00000000 --- a/index/store/metrics/metrics.go +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright (c) 2015 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -// Package metrics provides a bleve.store.KVStore implementation that -// wraps another, real KVStore implementation, and uses go-metrics to -// track runtime performance metrics. -package metrics - -import ( - "container/list" - "encoding/json" - "fmt" - "io" - "sync" - "time" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - - "github.com/rcrowley/go-metrics" -) - -const Name = "metrics" -const MaxErrors = 100 - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - name, ok := config["kvStoreName_actual"].(string) - if !ok || name == "" { - return nil, fmt.Errorf("metrics: missing kvStoreName_actual,"+ - " config: %#v", config) - } - - if name == Name { - return nil, fmt.Errorf("metrics: circular kvStoreName_actual") - } - - ctr := registry.KVStoreConstructorByName(name) - if ctr == nil { - return nil, fmt.Errorf("metrics: no kv store constructor,"+ - " kvStoreName_actual: %s", name) - } - - kvs, err := ctr(config) - if err != nil { - return nil, err - } - - return NewBleveMetricsStore(kvs), nil -} - -func NewBleveMetricsStore(o store.KVStore) *Store { - return &Store{ - o: o, - - TimerReaderGet: metrics.NewTimer(), - TimerReaderIterator: metrics.NewTimer(), - TimerWriterGet: metrics.NewTimer(), - TimerWriterIterator: metrics.NewTimer(), - TimerWriterSet: metrics.NewTimer(), - TimerWriterDelete: metrics.NewTimer(), - TimerIteratorSeekFirst: metrics.NewTimer(), - TimerIteratorSeek: metrics.NewTimer(), - TimerIteratorNext: metrics.NewTimer(), - TimerBatchMerge: metrics.NewTimer(), - TimerBatchExecute: metrics.NewTimer(), - - errors: list.New(), - } -} - -// The following structs are wrappers around "real" bleve kvstore -// implementations. - -type Store struct { - o store.KVStore - - TimerReaderGet metrics.Timer - TimerReaderIterator metrics.Timer - TimerWriterGet metrics.Timer - TimerWriterIterator metrics.Timer - TimerWriterSet metrics.Timer - TimerWriterDelete metrics.Timer - TimerIteratorSeekFirst metrics.Timer - TimerIteratorSeek metrics.Timer - TimerIteratorNext metrics.Timer - TimerBatchMerge metrics.Timer - TimerBatchExecute metrics.Timer - - m sync.Mutex // Protects the fields that follow. - errors *list.List // Capped list of StoreError's. -} - -type StoreError struct { - Time string - Op string - Err string - Key string -} - -type Reader struct { - s *Store - o store.KVReader -} - -type Writer struct { - s *Store - o store.KVWriter -} - -type Iterator struct { - s *Store - o store.KVIterator -} - -type Batch struct { - s *Store - o store.KVBatch -} - -func (s *Store) Open() error { - return s.o.Open() -} - -func (s *Store) Close() error { - return s.o.Close() -} - -func (s *Store) SetMergeOperator(mo store.MergeOperator) { - s.o.SetMergeOperator(mo) -} - -func (s *Store) Reader() (store.KVReader, error) { - o, err := s.o.Reader() - if err != nil { - s.AddError("Reader", err, nil) - return nil, err - } - return &Reader{s: s, o: o}, nil -} - -func (s *Store) Writer() (store.KVWriter, error) { - o, err := s.o.Writer() - if err != nil { - s.AddError("Writer", err, nil) - return nil, err - } - return &Writer{s: s, o: o}, nil -} - -func (s *Store) Actual() store.KVStore { - return s.o -} - -func (w *Reader) BytesSafeAfterClose() bool { - return w.o.BytesSafeAfterClose() -} - -func (w *Reader) Get(key []byte) (v []byte, err error) { - w.s.TimerReaderGet.Time(func() { - v, err = w.o.Get(key) - if err != nil { - w.s.AddError("Reader.Get", err, key) - } - }) - return -} - -func (w *Reader) Iterator(key []byte) (i store.KVIterator) { - w.s.TimerReaderIterator.Time(func() { - i = &Iterator{s: w.s, o: w.o.Iterator(key)} - }) - return -} - -func (w *Reader) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Reader.Close", err, nil) - } - return err -} - -func (w *Writer) BytesSafeAfterClose() bool { - return w.o.BytesSafeAfterClose() -} - -func (w *Writer) Get(key []byte) (v []byte, err error) { - w.s.TimerWriterGet.Time(func() { - v, err = w.o.Get(key) - if err != nil { - w.s.AddError("Writer.Get", err, key) - } - }) - return -} - -func (w *Writer) Iterator(key []byte) (i store.KVIterator) { - w.s.TimerWriterIterator.Time(func() { - i = &Iterator{s: w.s, o: w.o.Iterator(key)} - }) - return -} - -func (w *Writer) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Writer.Close", err, nil) - } - return err -} - -func (w *Writer) Set(key, val []byte) (err error) { - w.s.TimerWriterSet.Time(func() { - err = w.o.Set(key, val) - if err != nil { - w.s.AddError("Writer.Set", err, key) - } - }) - return -} - -func (w *Writer) Delete(key []byte) (err error) { - w.s.TimerWriterDelete.Time(func() { - err = w.o.Delete(key) - if err != nil { - w.s.AddError("Writer.Delete", err, key) - } - }) - return -} - -func (w *Writer) NewBatch() store.KVBatch { - return &Batch{s: w.s, o: w.o.NewBatch()} -} - -func (w *Iterator) SeekFirst() { - w.s.TimerIteratorSeekFirst.Time(func() { - w.o.SeekFirst() - }) -} - -func (w *Iterator) Seek(x []byte) { - w.s.TimerIteratorSeek.Time(func() { - w.o.Seek(x) - }) -} - -func (w *Iterator) Next() { - w.s.TimerIteratorNext.Time(func() { - w.o.Next() - }) -} - -func (w *Iterator) Current() ([]byte, []byte, bool) { - return w.o.Current() -} - -func (w *Iterator) Key() []byte { - return w.o.Key() -} - -func (w *Iterator) Value() []byte { - return w.o.Value() -} - -func (w *Iterator) Valid() bool { - return w.o.Valid() -} - -func (w *Iterator) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Iterator.Close", err, nil) - } - return err -} - -func (w *Batch) Set(key, val []byte) { - w.o.Set(key, val) -} - -func (w *Batch) Delete(key []byte) { - w.o.Delete(key) -} - -func (w *Batch) Merge(key, val []byte) { - w.s.TimerBatchMerge.Time(func() { - w.o.Merge(key, val) - }) -} - -func (w *Batch) Execute() (err error) { - w.s.TimerBatchExecute.Time(func() { - err = w.o.Execute() - if err != nil { - w.s.AddError("Batch.Execute", err, nil) - } - }) - return -} - -func (w *Batch) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Batch.Close", err, nil) - } - return err -} - -// -------------------------------------------------------- - -func (s *Store) AddError(op string, err error, key []byte) { - e := &StoreError{ - Time: time.Now().Format(time.RFC3339Nano), - Op: op, - Err: fmt.Sprintf("%v", err), - Key: string(key), - } - - s.m.Lock() - for s.errors.Len() >= MaxErrors { - s.errors.Remove(s.errors.Front()) - } - s.errors.PushBack(e) - s.m.Unlock() -} - -// -------------------------------------------------------- - -func (s *Store) WriteJSON(w io.Writer) { - w.Write([]byte(`{"TimerReaderGet":`)) - WriteTimerJSON(w, s.TimerReaderGet) - w.Write([]byte(`,"TimerReaderIterator":`)) - WriteTimerJSON(w, s.TimerReaderIterator) - w.Write([]byte(`,"TimerWriterGet":`)) - WriteTimerJSON(w, s.TimerWriterGet) - w.Write([]byte(`,"TimerWriterIterator":`)) - WriteTimerJSON(w, s.TimerWriterIterator) - w.Write([]byte(`,"TimerWriterSet":`)) - WriteTimerJSON(w, s.TimerWriterSet) - w.Write([]byte(`,"TimerWriterDelete":`)) - WriteTimerJSON(w, s.TimerWriterDelete) - w.Write([]byte(`,"TimerIteratorSeekFirst":`)) - WriteTimerJSON(w, s.TimerIteratorSeekFirst) - w.Write([]byte(`,"TimerIteratorSeek":`)) - WriteTimerJSON(w, s.TimerIteratorSeek) - w.Write([]byte(`,"TimerIteratorNext":`)) - WriteTimerJSON(w, s.TimerIteratorNext) - w.Write([]byte(`,"TimerBatchMerge":`)) - WriteTimerJSON(w, s.TimerBatchMerge) - w.Write([]byte(`,"TimerBatchExecute":`)) - WriteTimerJSON(w, s.TimerBatchExecute) - - w.Write([]byte(`,"Errors":[`)) - s.m.Lock() - e := s.errors.Front() - i := 0 - for e != nil { - se, ok := e.Value.(*StoreError) - if ok && se != nil { - if i > 0 { - w.Write([]byte(",")) - } - buf, err := json.Marshal(se) - if err == nil { - w.Write(buf) - } - } - e = e.Next() - i = i + 1 - } - s.m.Unlock() - w.Write([]byte(`]`)) - - w.Write([]byte(`}`)) -} - -func (s *Store) WriteCSVHeader(w io.Writer) { - WriteTimerCSVHeader(w, "TimerReaderGet") - WriteTimerCSVHeader(w, "TimerReaderIterator") - WriteTimerCSVHeader(w, "TimerWriterGet") - WriteTimerCSVHeader(w, "TimerWriterIterator") - WriteTimerCSVHeader(w, "TimerWriterSet") - WriteTimerCSVHeader(w, "TimerWriterDelete") - WriteTimerCSVHeader(w, "TimerIteratorSeekFirst") - WriteTimerCSVHeader(w, "TimerIteratorSeek") - WriteTimerCSVHeader(w, "TimerIteratorNext") - WriteTimerCSVHeader(w, "TimerBatchMerge") - WriteTimerCSVHeader(w, "TimerBatchExecute") -} - -func (s *Store) WriteCSV(w io.Writer) { - WriteTimerCSV(w, s.TimerReaderGet) - WriteTimerCSV(w, s.TimerReaderIterator) - WriteTimerCSV(w, s.TimerWriterGet) - WriteTimerCSV(w, s.TimerWriterIterator) - WriteTimerCSV(w, s.TimerWriterSet) - WriteTimerCSV(w, s.TimerWriterDelete) - WriteTimerCSV(w, s.TimerIteratorSeekFirst) - WriteTimerCSV(w, s.TimerIteratorSeek) - WriteTimerCSV(w, s.TimerIteratorNext) - WriteTimerCSV(w, s.TimerBatchMerge) - WriteTimerCSV(w, s.TimerBatchExecute) -} - -// -------------------------------------------------------- - -// NOTE: This is copy & pasted from cbft as otherwise there -// would be an import cycle. - -var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999} - -func WriteTimerJSON(w io.Writer, timer metrics.Timer) { - t := timer.Snapshot() - p := t.Percentiles(timerPercentiles) - - fmt.Fprintf(w, `{"count":%9d,`, t.Count()) - fmt.Fprintf(w, `"min":%9d,`, t.Min()) - fmt.Fprintf(w, `"max":%9d,`, t.Max()) - fmt.Fprintf(w, `"mean":%12.2f,`, t.Mean()) - fmt.Fprintf(w, `"stddev":%12.2f,`, t.StdDev()) - fmt.Fprintf(w, `"percentiles":{`) - fmt.Fprintf(w, `"median":%12.2f,`, p[0]) - fmt.Fprintf(w, `"75%%":%12.2f,`, p[1]) - fmt.Fprintf(w, `"95%%":%12.2f,`, p[2]) - fmt.Fprintf(w, `"99%%":%12.2f,`, p[3]) - fmt.Fprintf(w, `"99.9%%":%12.2f},`, p[4]) - fmt.Fprintf(w, `"rates":{`) - fmt.Fprintf(w, `"1-min":%12.2f,`, t.Rate1()) - fmt.Fprintf(w, `"5-min":%12.2f,`, t.Rate5()) - fmt.Fprintf(w, `"15-min":%12.2f,`, t.Rate15()) - fmt.Fprintf(w, `"mean":%12.2f}}`, t.RateMean()) -} - -func WriteTimerCSVHeader(w io.Writer, prefix string) { - fmt.Fprintf(w, "%s-count,", prefix) - fmt.Fprintf(w, "%s-min,", prefix) - fmt.Fprintf(w, "%s-max,", prefix) - fmt.Fprintf(w, "%s-mean,", prefix) - fmt.Fprintf(w, "%s-stddev,", prefix) - fmt.Fprintf(w, "%s-percentile-50%%,", prefix) - fmt.Fprintf(w, "%s-percentile-75%%,", prefix) - fmt.Fprintf(w, "%s-percentile-95%%,", prefix) - fmt.Fprintf(w, "%s-percentile-99%%,", prefix) - fmt.Fprintf(w, "%s-percentile-99.9%%,", prefix) - fmt.Fprintf(w, "%s-rate-1-min,", prefix) - fmt.Fprintf(w, "%s-rate-5-min,", prefix) - fmt.Fprintf(w, "%s-rate-15-min,", prefix) - fmt.Fprintf(w, "%s-rate-mean", prefix) -} - -func WriteTimerCSV(w io.Writer, timer metrics.Timer) { - t := timer.Snapshot() - p := t.Percentiles(timerPercentiles) - - fmt.Fprintf(w, `%d,`, t.Count()) - fmt.Fprintf(w, `%d,`, t.Min()) - fmt.Fprintf(w, `%d,`, t.Max()) - fmt.Fprintf(w, `%f,`, t.Mean()) - fmt.Fprintf(w, `%f,`, t.StdDev()) - fmt.Fprintf(w, `%f,`, p[0]) - fmt.Fprintf(w, `%f,`, p[1]) - fmt.Fprintf(w, `%f,`, p[2]) - fmt.Fprintf(w, `%f,`, p[3]) - fmt.Fprintf(w, `%f,`, p[4]) - fmt.Fprintf(w, `%f,`, t.Rate1()) - fmt.Fprintf(w, `%f,`, t.Rate5()) - fmt.Fprintf(w, `%f,`, t.Rate15()) - fmt.Fprintf(w, `%f`, t.RateMean()) -} diff --git a/index/store/metrics/metrics_test.go b/index/store/metrics/metrics_test.go index 4c94fbfe..5b8e4735 100644 --- a/index/store/metrics/metrics_test.go +++ b/index/store/metrics/metrics_test.go @@ -16,35 +16,31 @@ import ( "bytes" "encoding/json" "fmt" - "reflect" "testing" - "github.com/blevesearch/bleve/index/store" - _ "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/store/gtreap" ) func TestMetricsStore(t *testing.T) { - s, err := StoreConstructor(map[string]interface{}{}) + s, err := New(nil, map[string]interface{}{}) if err == nil { t.Errorf("expected err when bad config") } - s, err = StoreConstructor(map[string]interface{}{ + s, err = New(nil, map[string]interface{}{ "kvStoreName_actual": "some-invalid-kvstore-name", }) if err == nil { t.Errorf("expected err when unknown kvStoreName_actual") } - s, err = StoreConstructor(map[string]interface{}{ - "kvStoreName_actual": "gtreap", + s, err = New(nil, map[string]interface{}{ + "kvStoreName_actual": gtreap.Name, }) if err != nil { t.Fatal(err) } - CommonTestKVStore(t, s) - b := bytes.NewBuffer(nil) s.(*Store).WriteJSON(b) if b.Len() <= 0 { @@ -72,240 +68,9 @@ func TestMetricsStore(t *testing.T) { } } -func TestReaderIsolation(t *testing.T) { - s, err := StoreConstructor(map[string]interface{}{ - "kvStoreName_actual": "gtreap", - }) - if err != nil { - t.Fatal(err) - } - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } -} - func TestErrors(t *testing.T) { - s, err := StoreConstructor(map[string]interface{}{ - "kvStoreName_actual": "gtreap", + s, err := New(nil, map[string]interface{}{ + "kvStoreName_actual": gtreap.Name, }) if err != nil { t.Fatal(err) diff --git a/index/store/metrics/reader.go b/index/store/metrics/reader.go new file mode 100644 index 00000000..c555c736 --- /dev/null +++ b/index/store/metrics/reader.go @@ -0,0 +1,40 @@ +package metrics + +import "github.com/blevesearch/bleve/index/store" + +type Reader struct { + s *Store + o store.KVReader +} + +func (r *Reader) Get(key []byte) (v []byte, err error) { + r.s.TimerReaderGet.Time(func() { + v, err = r.o.Get(key) + if err != nil { + r.s.AddError("Reader.Get", err, key) + } + }) + return +} + +func (r *Reader) PrefixIterator(prefix []byte) (i store.KVIterator) { + r.s.TimerReaderPrefixIterator.Time(func() { + i = &Iterator{s: r.s, o: r.o.PrefixIterator(prefix)} + }) + return +} + +func (r *Reader) RangeIterator(start, end []byte) (i store.KVIterator) { + r.s.TimerReaderRangeIterator.Time(func() { + i = &Iterator{s: r.s, o: r.o.RangeIterator(start, end)} + }) + return +} + +func (r *Reader) Close() error { + err := r.o.Close() + if err != nil { + r.s.AddError("Reader.Close", err, nil) + } + return err +} diff --git a/index/store/metrics/store.go b/index/store/metrics/store.go new file mode 100644 index 00000000..fab47afd --- /dev/null +++ b/index/store/metrics/store.go @@ -0,0 +1,196 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +// Package metrics provides a bleve.store.KVStore implementation that +// wraps another, real KVStore implementation, and uses go-metrics to +// track runtime performance metrics. +package metrics + +import ( + "container/list" + "encoding/json" + "fmt" + "io" + "sync" + "time" + + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/registry" + "github.com/rcrowley/go-metrics" +) + +const Name = "metrics" + +type Store struct { + o store.KVStore + + TimerReaderGet metrics.Timer + TimerReaderPrefixIterator metrics.Timer + TimerReaderRangeIterator metrics.Timer + TimerWriterExecuteBatch metrics.Timer + TimerIteratorSeek metrics.Timer + TimerIteratorNext metrics.Timer + TimerBatchMerge metrics.Timer + + m sync.Mutex // Protects the fields that follow. + errors *list.List // Capped list of StoreError's. +} + +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + + name, ok := config["kvStoreName_actual"].(string) + if !ok || name == "" { + return nil, fmt.Errorf("metrics: missing kvStoreName_actual,"+ + " config: %#v", config) + } + + if name == Name { + return nil, fmt.Errorf("metrics: circular kvStoreName_actual") + } + + ctr := registry.KVStoreConstructorByName(name) + if ctr == nil { + return nil, fmt.Errorf("metrics: no kv store constructor,"+ + " kvStoreName_actual: %s", name) + } + + kvs, err := ctr(mo, config) + if err != nil { + return nil, err + } + + return &Store{ + o: kvs, + + TimerReaderGet: metrics.NewTimer(), + TimerReaderPrefixIterator: metrics.NewTimer(), + TimerReaderRangeIterator: metrics.NewTimer(), + TimerWriterExecuteBatch: metrics.NewTimer(), + TimerIteratorSeek: metrics.NewTimer(), + TimerIteratorNext: metrics.NewTimer(), + TimerBatchMerge: metrics.NewTimer(), + + errors: list.New(), + }, nil +} + +func init() { + registry.RegisterKVStore(Name, New) +} + +func (s *Store) Close() error { + return s.o.Close() +} + +func (s *Store) Reader() (store.KVReader, error) { + o, err := s.o.Reader() + if err != nil { + s.AddError("Reader", err, nil) + return nil, err + } + return &Reader{s: s, o: o}, nil +} + +func (s *Store) Writer() (store.KVWriter, error) { + o, err := s.o.Writer() + if err != nil { + s.AddError("Writer", err, nil) + return nil, err + } + return &Writer{s: s, o: o}, nil +} + +// Metric specific code below: + +const MaxErrors = 100 + +type StoreError struct { + Time string + Op string + Err string + Key string +} + +func (s *Store) AddError(op string, err error, key []byte) { + e := &StoreError{ + Time: time.Now().Format(time.RFC3339Nano), + Op: op, + Err: fmt.Sprintf("%v", err), + Key: string(key), + } + + s.m.Lock() + for s.errors.Len() >= MaxErrors { + s.errors.Remove(s.errors.Front()) + } + s.errors.PushBack(e) + s.m.Unlock() +} + +func (s *Store) WriteJSON(w io.Writer) { + w.Write([]byte(`{"TimerReaderGet":`)) + WriteTimerJSON(w, s.TimerReaderGet) + w.Write([]byte(`,"TimerReaderPrefixIterator":`)) + WriteTimerJSON(w, s.TimerReaderPrefixIterator) + w.Write([]byte(`,"TimerReaderRangeIterator":`)) + WriteTimerJSON(w, s.TimerReaderRangeIterator) + w.Write([]byte(`,"TimerWriterExecuteBatch":`)) + WriteTimerJSON(w, s.TimerWriterExecuteBatch) + w.Write([]byte(`,"TimerIteratorSeek":`)) + WriteTimerJSON(w, s.TimerIteratorSeek) + w.Write([]byte(`,"TimerIteratorNext":`)) + WriteTimerJSON(w, s.TimerIteratorNext) + w.Write([]byte(`,"TimerBatchMerge":`)) + WriteTimerJSON(w, s.TimerBatchMerge) + + w.Write([]byte(`,"Errors":[`)) + s.m.Lock() + e := s.errors.Front() + i := 0 + for e != nil { + se, ok := e.Value.(*StoreError) + if ok && se != nil { + if i > 0 { + w.Write([]byte(",")) + } + buf, err := json.Marshal(se) + if err == nil { + w.Write(buf) + } + } + e = e.Next() + i = i + 1 + } + s.m.Unlock() + w.Write([]byte(`]`)) + + w.Write([]byte(`}`)) +} + +func (s *Store) WriteCSVHeader(w io.Writer) { + WriteTimerCSVHeader(w, "TimerReaderGet") + WriteTimerCSVHeader(w, "TimerReaderPrefixIterator") + WriteTimerCSVHeader(w, "TimerReaderRangeIterator") + WriteTimerCSVHeader(w, "TimerWtierExecuteBatch") + WriteTimerCSVHeader(w, "TimerIteratorSeek") + WriteTimerCSVHeader(w, "TimerIteratorNext") + WriteTimerCSVHeader(w, "TimerBatchMerge") +} + +func (s *Store) WriteCSV(w io.Writer) { + WriteTimerCSV(w, s.TimerReaderGet) + WriteTimerCSV(w, s.TimerReaderPrefixIterator) + WriteTimerCSV(w, s.TimerReaderRangeIterator) + WriteTimerCSV(w, s.TimerWriterExecuteBatch) + WriteTimerCSV(w, s.TimerIteratorSeek) + WriteTimerCSV(w, s.TimerIteratorNext) + WriteTimerCSV(w, s.TimerBatchMerge) +} diff --git a/index/store/metrics/store_test.go b/index/store/metrics/store_test.go new file mode 100644 index 00000000..f51d6d61 --- /dev/null +++ b/index/store/metrics/store_test.go @@ -0,0 +1,72 @@ +package metrics + +import ( + "testing" + + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/store/test" +) + +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, map[string]interface{}{"kvStoreName_actual": gtreap.Name}) + if err != nil { + t.Fatal(err) + } + return rv +} + +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() + if err != nil { + t.Fatal(err) + } +} + +func TestMetricsKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestKVCrud(t, s) +} + +func TestMetricsReaderIsolation(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderIsolation(t, s) +} + +func TestMetricsReaderOwnsGetBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestReaderOwnsGetBytes(t, s) +} + +func TestMetricsWriterOwnsBytes(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestMetricsPrefixIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestPrefixIterator(t, s) +} + +func TestMetricsRangeIterator(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIterator(t, s) +} + +func TestMetricsRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + +func TestMetricsMerge(t *testing.T) { + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) + test.CommonTestMerge(t, s) +} diff --git a/index/store/metrics/util.go b/index/store/metrics/util.go new file mode 100644 index 00000000..053e38ee --- /dev/null +++ b/index/store/metrics/util.go @@ -0,0 +1,72 @@ +package metrics + +import ( + "fmt" + "io" + + "github.com/rcrowley/go-metrics" +) + +// NOTE: This is copy & pasted from cbft as otherwise there +// would be an import cycle. + +var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999} + +func WriteTimerJSON(w io.Writer, timer metrics.Timer) { + t := timer.Snapshot() + p := t.Percentiles(timerPercentiles) + + fmt.Fprintf(w, `{"count":%9d,`, t.Count()) + fmt.Fprintf(w, `"min":%9d,`, t.Min()) + fmt.Fprintf(w, `"max":%9d,`, t.Max()) + fmt.Fprintf(w, `"mean":%12.2f,`, t.Mean()) + fmt.Fprintf(w, `"stddev":%12.2f,`, t.StdDev()) + fmt.Fprintf(w, `"percentiles":{`) + fmt.Fprintf(w, `"median":%12.2f,`, p[0]) + fmt.Fprintf(w, `"75%%":%12.2f,`, p[1]) + fmt.Fprintf(w, `"95%%":%12.2f,`, p[2]) + fmt.Fprintf(w, `"99%%":%12.2f,`, p[3]) + fmt.Fprintf(w, `"99.9%%":%12.2f},`, p[4]) + fmt.Fprintf(w, `"rates":{`) + fmt.Fprintf(w, `"1-min":%12.2f,`, t.Rate1()) + fmt.Fprintf(w, `"5-min":%12.2f,`, t.Rate5()) + fmt.Fprintf(w, `"15-min":%12.2f,`, t.Rate15()) + fmt.Fprintf(w, `"mean":%12.2f}}`, t.RateMean()) +} + +func WriteTimerCSVHeader(w io.Writer, prefix string) { + fmt.Fprintf(w, "%s-count,", prefix) + fmt.Fprintf(w, "%s-min,", prefix) + fmt.Fprintf(w, "%s-max,", prefix) + fmt.Fprintf(w, "%s-mean,", prefix) + fmt.Fprintf(w, "%s-stddev,", prefix) + fmt.Fprintf(w, "%s-percentile-50%%,", prefix) + fmt.Fprintf(w, "%s-percentile-75%%,", prefix) + fmt.Fprintf(w, "%s-percentile-95%%,", prefix) + fmt.Fprintf(w, "%s-percentile-99%%,", prefix) + fmt.Fprintf(w, "%s-percentile-99.9%%,", prefix) + fmt.Fprintf(w, "%s-rate-1-min,", prefix) + fmt.Fprintf(w, "%s-rate-5-min,", prefix) + fmt.Fprintf(w, "%s-rate-15-min,", prefix) + fmt.Fprintf(w, "%s-rate-mean", prefix) +} + +func WriteTimerCSV(w io.Writer, timer metrics.Timer) { + t := timer.Snapshot() + p := t.Percentiles(timerPercentiles) + + fmt.Fprintf(w, `%d,`, t.Count()) + fmt.Fprintf(w, `%d,`, t.Min()) + fmt.Fprintf(w, `%d,`, t.Max()) + fmt.Fprintf(w, `%f,`, t.Mean()) + fmt.Fprintf(w, `%f,`, t.StdDev()) + fmt.Fprintf(w, `%f,`, p[0]) + fmt.Fprintf(w, `%f,`, p[1]) + fmt.Fprintf(w, `%f,`, p[2]) + fmt.Fprintf(w, `%f,`, p[3]) + fmt.Fprintf(w, `%f,`, p[4]) + fmt.Fprintf(w, `%f,`, t.Rate1()) + fmt.Fprintf(w, `%f,`, t.Rate5()) + fmt.Fprintf(w, `%f,`, t.Rate15()) + fmt.Fprintf(w, `%f`, t.RateMean()) +} diff --git a/index/store/metrics/writer.go b/index/store/metrics/writer.go new file mode 100644 index 00000000..0f23bfdf --- /dev/null +++ b/index/store/metrics/writer.go @@ -0,0 +1,38 @@ +package metrics + +import ( + "fmt" + + "github.com/blevesearch/bleve/index/store" +) + +type Writer struct { + s *Store + o store.KVWriter +} + +func (w *Writer) Close() error { + err := w.o.Close() + if err != nil { + w.s.AddError("Writer.Close", err, nil) + } + return err +} + +func (w *Writer) NewBatch() store.KVBatch { + return &Batch{s: w.s, o: w.o.NewBatch()} +} + +func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) { + batch, ok := b.(*Batch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + w.s.TimerWriterExecuteBatch.Time(func() { + err = w.o.ExecuteBatch(batch.o) + if err != nil { + w.s.AddError("Writer.ExecuteBatch", err, nil) + } + }) + return +} diff --git a/index/store/null/null.go b/index/store/null/null.go index f5d9d450..53ebbca8 100644 --- a/index/store/null/null.go +++ b/index/store/null/null.go @@ -18,167 +18,87 @@ const Name = "null" type Store struct{} -func New() (*Store, error) { - rv := Store{} - return &rv, nil -} - -func (i *Store) Open() error { - return nil -} - -func (i *Store) SetMergeOperator(mo store.MergeOperator) { - +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + return &Store{}, nil } func (i *Store) Close() error { return nil } -func (i *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(i) - return rv -} - func (i *Store) Reader() (store.KVReader, error) { - return newReader(i) + return &reader{}, nil } func (i *Store) Writer() (store.KVWriter, error) { - return newWriter(i) + return &writer{}, nil } -func (i *Store) newBatch() store.KVBatch { - return newBatch(i) -} +type reader struct{} -type Reader struct { - store *Store -} - -func newReader(store *Store) (*Reader, error) { - return &Reader{ - store: store, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return true -} - -func (r *Reader) Get(key []byte) ([]byte, error) { +func (r *reader) Get(key []byte) ([]byte, error) { return nil, nil } -func (r *Reader) Iterator(key []byte) store.KVIterator { - return r.store.iterator(key) +func (r *reader) PrefixIterator(prefix []byte) store.KVIterator { + return &iterator{} } -func (r *Reader) Close() error { +func (r *reader) RangeIterator(start, end []byte) store.KVIterator { + return &iterator{} +} + +func (r *reader) Close() error { return nil } -type Iterator struct{} +type iterator struct{} -func newIterator(store *Store) *Iterator { - return &Iterator{} -} +func (i *iterator) SeekFirst() {} +func (i *iterator) Seek(k []byte) {} +func (i *iterator) Next() {} -func (i *Iterator) SeekFirst() {} - -func (i *Iterator) Seek(k []byte) {} - -func (i *Iterator) Next() {} - -func (i *Iterator) Current() ([]byte, []byte, bool) { +func (i *iterator) Current() ([]byte, []byte, bool) { return nil, nil, false } -func (i *Iterator) Key() []byte { +func (i *iterator) Key() []byte { return nil } -func (i *Iterator) Value() []byte { +func (i *iterator) Value() []byte { return nil } -func (i *Iterator) Valid() bool { +func (i *iterator) Valid() bool { return false } -func (i *Iterator) Close() error { +func (i *iterator) Close() error { return nil } -type Batch struct{} +type batch struct{} -func newBatch(s *Store) *Batch { - rv := Batch{} - return &rv +func (i *batch) Set(key, val []byte) {} +func (i *batch) Delete(key []byte) {} +func (i *batch) Merge(key, val []byte) {} +func (i *batch) Reset() {} + +type writer struct{} + +func (w *writer) NewBatch() store.KVBatch { + return &batch{} } -func (i *Batch) Set(key, val []byte) { -} - -func (i *Batch) Delete(key []byte) { -} - -func (i *Batch) Merge(key, val []byte) { -} - -func (i *Batch) Execute() error { +func (w *writer) ExecuteBatch(store.KVBatch) error { return nil } -func (i *Batch) Close() error { +func (w *writer) Close() error { return nil } -type Writer struct { - store *Store -} - -func newWriter(store *Store) (*Writer, error) { - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return true -} - -func (w *Writer) Set(key, val []byte) error { - return nil -} - -func (w *Writer) Delete(key []byte) error { - return nil -} - -func (w *Writer) NewBatch() store.KVBatch { - return newBatch(w.store) -} - -func (w *Writer) Close() error { - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return nil, nil -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - return New() -} - func init() { - registry.RegisterKVStore(Name, StoreConstructor) + registry.RegisterKVStore(Name, New) } diff --git a/index/store/null/null_test.go b/index/store/null/null_test.go index 720d482a..5e921801 100644 --- a/index/store/null/null_test.go +++ b/index/store/null/null_test.go @@ -7,32 +7,22 @@ import ( ) func TestStore(t *testing.T) { - s, err := New() + s, err := New(nil, nil) if err != nil { t.Fatal(err) } - CommonTestKVStore(t, s) + NullTestKVStore(t, s) } -func CommonTestKVStore(t *testing.T, s store.KVStore) { +// NullTestKVStore has very different expectations +// compared to CommonTestKVStore +func NullTestKVStore(t *testing.T, s store.KVStore) { writer, err := s.Writer() if err != nil { t.Error(err) } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } batch := writer.NewBatch() batch.Set([]byte("b"), []byte("val-b")) @@ -45,7 +35,7 @@ func CommonTestKVStore(t *testing.T, s store.KVStore) { batch.Set([]byte("i"), []byte("val-i")) batch.Set([]byte("j"), []byte("val-j")) - err = batch.Execute() + err = writer.ExecuteBatch(batch) if err != nil { t.Fatal(err) } @@ -64,7 +54,7 @@ func CommonTestKVStore(t *testing.T, s store.KVStore) { t.Fatal(err) } }() - it := reader.Iterator([]byte("b")) + it := reader.RangeIterator([]byte("b"), nil) key, val, valid := it.Current() if valid { t.Fatalf("valid true, expected false") diff --git a/index/store/test/README.md b/index/store/test/README.md new file mode 100644 index 00000000..392df281 --- /dev/null +++ b/index/store/test/README.md @@ -0,0 +1,11 @@ +# Generic KVStore implementation tests + +These are a set of common tests that should pass on any correct KVStore implementation. + +Each test function in this package has the form: + + func CommonTest(t *testing.T, s store.KVStore) {...} + +A KVStore implementation test should use the same name, including its own KVStore name in the test function. It should instantiate an instance of the store, and pass the testing.T and store to the common function. + +The common test functions should *NOT* close the KVStore. The KVStore test implementation should close the store and cleanup any state. \ No newline at end of file diff --git a/index/store/test/bytes.go b/index/store/test/bytes.go new file mode 100644 index 00000000..d09a2720 --- /dev/null +++ b/index/store/test/bytes.go @@ -0,0 +1,262 @@ +package test + +import ( + "bytes" + "reflect" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// tests which focus on the byte ownership + +// CommonTestReaderOwnsGetBytes attempts to mutate the returned bytes +// first, while the reader is still open, second after that reader is +// closed, then the original key is read again, to ensure these +// modifications did not cause panic, or mutate the stored value +func CommonTestReaderOwnsGetBytes(t *testing.T, s store.KVStore) { + + originalKey := []byte("key") + originalVal := []byte("val") + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write key/val + batch := writer.NewBatch() + batch.Set(originalKey, originalVal) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // read key + returnedVal, err := reader.Get(originalKey) + if err != nil { + t.Fatal(err) + } + + // check that it is the expected value + if !reflect.DeepEqual(returnedVal, originalVal) { + t.Fatalf("expected value: %v for '%s', got %v", originalVal, originalKey, returnedVal) + } + + // mutate the returned value with reader still open + for i := range returnedVal { + returnedVal[i] = '1' + } + + // read the key again + returnedVal2, err := reader.Get(originalKey) + if err != nil { + t.Fatal(err) + } + + // check that it is the expected value + if !reflect.DeepEqual(returnedVal2, originalVal) { + t.Fatalf("expected value: %v for '%s', got %v", originalVal, originalKey, returnedVal2) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // mutate the original returned value again + for i := range returnedVal { + returnedVal[i] = '2' + } + + // open another reader + reader, err = s.Reader() + if err != nil { + t.Fatal(err) + } + + // read the key again + returnedVal3, err := reader.Get(originalKey) + if err != nil { + t.Fatal(err) + } + + // check that it is the expected value + if !reflect.DeepEqual(returnedVal3, originalVal) { + t.Fatalf("expected value: %v for '%s', got %v", originalVal, originalKey, returnedVal3) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // finally check that the value we mutated still has what we set it to + for i := range returnedVal { + if returnedVal[i] != '2' { + t.Errorf("expected byte to be '2', got %v", returnedVal[i]) + } + } +} + +func CommonTestWriterOwnsBytes(t *testing.T, s store.KVStore) { + + keyBuffer := make([]byte, 5) + valBuffer := make([]byte, 5) + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write key/val pairs reusing same buffer + batch := writer.NewBatch() + for i := 0; i < 10; i++ { + keyBuffer[0] = 'k' + keyBuffer[1] = 'e' + keyBuffer[2] = 'y' + keyBuffer[3] = '-' + keyBuffer[4] = byte('0' + i) + valBuffer[0] = 'v' + valBuffer[1] = 'a' + valBuffer[2] = 'l' + valBuffer[3] = '-' + valBuffer[4] = byte('0' + i) + batch.Set(keyBuffer, valBuffer) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // check that we can read back what we expect + allks := make([][]byte, 0) + allvs := make([][]byte, 0) + iter := reader.RangeIterator(nil, nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + allks = append(allks, copyk) + v := iter.Key() + copyv := make([]byte, len(v)) + copy(copyv, v) + allvs = append(allvs, copyv) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + if len(allks) != 10 { + t.Fatalf("expected 10 k/v pairs, got %d", len(allks)) + } + for i, key := range allks { + val := allvs[i] + if !bytes.HasSuffix(key, []byte{byte('0' + i)}) { + t.Errorf("expected key %v to end in %d", key, []byte{byte('0' + i)}) + } + if !bytes.HasSuffix(val, []byte{byte('0' + i)}) { + t.Errorf("expected val %v to end in %d", val, []byte{byte('0' + i)}) + } + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // open a writer + writer, err = s.Writer() + if err != nil { + t.Fatal(err) + } + + // now delete using same approach + batch = writer.NewBatch() + for i := 0; i < 10; i++ { + keyBuffer[0] = 'k' + keyBuffer[1] = 'e' + keyBuffer[2] = 'y' + keyBuffer[3] = '-' + keyBuffer[4] = byte('0' + i) + batch.Delete(keyBuffer) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err = s.Reader() + if err != nil { + t.Fatal(err) + } + + // check that we can read back what we expect + allks = make([][]byte, 0) + iter = reader.RangeIterator(nil, nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + allks = append(allks, copyk) + v := iter.Key() + copyv := make([]byte, len(v)) + copy(copyv, v) + allvs = append(allvs, copyv) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + if len(allks) != 0 { + t.Fatalf("expected 0 k/v pairs remaining, got %d", len(allks)) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/index/store/test/crud.go b/index/store/test/crud.go new file mode 100644 index 00000000..ce4c37e6 --- /dev/null +++ b/index/store/test/crud.go @@ -0,0 +1,98 @@ +package test + +import ( + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// basic crud tests + +func CommonTestKVCrud(t *testing.T, s store.KVStore) { + + writer, err := s.Writer() + if err != nil { + t.Error(err) + } + + batch := writer.NewBatch() + batch.Set([]byte("a"), []byte("val-a")) + batch.Set([]byte("z"), []byte("val-z")) + batch.Delete([]byte("z")) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + batch.Reset() + + batch.Set([]byte("b"), []byte("val-b")) + batch.Set([]byte("c"), []byte("val-c")) + batch.Set([]byte("d"), []byte("val-d")) + batch.Set([]byte("e"), []byte("val-e")) + batch.Set([]byte("f"), []byte("val-f")) + batch.Set([]byte("g"), []byte("val-g")) + batch.Set([]byte("h"), []byte("val-h")) + batch.Set([]byte("i"), []byte("val-i")) + batch.Set([]byte("j"), []byte("val-j")) + + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + reader, err := s.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := reader.Close() + if err != nil { + t.Fatal(err) + } + }() + it := reader.RangeIterator([]byte("b"), nil) + key, val, valid := it.Current() + if !valid { + t.Fatalf("valid false, expected true") + } + if string(key) != "b" { + t.Fatalf("expected key b, got %s", key) + } + if string(val) != "val-b" { + t.Fatalf("expected value val-b, got %s", val) + } + + it.Next() + key, val, valid = it.Current() + if !valid { + t.Fatalf("valid false, expected true") + } + if string(key) != "c" { + t.Fatalf("expected key c, got %s", key) + } + if string(val) != "val-c" { + t.Fatalf("expected value val-c, got %s", val) + } + + it.Seek([]byte("i")) + key, val, valid = it.Current() + if !valid { + t.Fatalf("valid false, expected true") + } + if string(key) != "i" { + t.Fatalf("expected key i, got %s", key) + } + if string(val) != "val-i" { + t.Fatalf("expected value val-i, got %s", val) + } + + err = it.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/index/store/test/isolation.go b/index/store/test/isolation.go new file mode 100644 index 00000000..2bceca7c --- /dev/null +++ b/index/store/test/isolation.go @@ -0,0 +1,177 @@ +package test + +import ( + "fmt" + "reflect" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// tests focused on verfiying that readers are isolated from writers + +func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { + // insert a kv pair + writer, err := s.Writer() + if err != nil { + t.Error(err) + } + + // ************************************************** + // this is a hack only required for BoltDB + // however its harmless so to keep the tests + // the same everywhere, we include it here + // + // this is a hack to try to pre-emptively overflow + // boltdb writes *MAY* block a long reader + // in particular, if the write requires additional + // allocation, it must acquire the same lock as + // the reader, thus cannot continue until that + // reader is closed. + // in general this is not a problem for bleve + // (though it may affect performance in some cases) + // but it is a problem for this test which attemps + // to easily verify that readers are isolated + // this hack writes enough initial data such that + // the subsequent writes do not require additional + // space + hackSize := 1000 + batch := writer.NewBatch() + for i := 0; i < hackSize; i++ { + k := fmt.Sprintf("x%d", i) + batch.Set([]byte(k), []byte("filler")) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + // ************************************************** + + batch = writer.NewBatch() + batch.Set([]byte("a"), []byte("val-a")) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // create an isolated reader + reader, err := s.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := reader.Close() + if err != nil { + t.Fatal(err) + } + }() + + // verify that we see the value already inserted + val, err := reader.Get([]byte("a")) + if err != nil { + t.Error(err) + } + if !reflect.DeepEqual(val, []byte("val-a")) { + t.Errorf("expected val-a, got nil") + } + + // verify that an iterator sees it + count := 0 + it := reader.RangeIterator([]byte{0}, []byte{'x'}) + defer func() { + err := it.Close() + if err != nil { + t.Fatal(err) + } + }() + for it.Valid() { + it.Next() + count++ + } + if count != 1 { + t.Errorf("expected iterator to see 1, saw %d", count) + } + + // add something after the reader was created + writer, err = s.Writer() + if err != nil { + t.Error(err) + } + batch = writer.NewBatch() + batch.Set([]byte("b"), []byte("val-b")) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // ensure that a newer reader sees it + newReader, err := s.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := newReader.Close() + if err != nil { + t.Fatal(err) + } + }() + val, err = newReader.Get([]byte("b")) + if err != nil { + t.Error(err) + } + if !reflect.DeepEqual(val, []byte("val-b")) { + t.Errorf("expected val-b, got nil") + } + + // ensure that the direct iterator sees it + count = 0 + it2 := newReader.RangeIterator([]byte{0}, []byte{'x'}) + defer func() { + err := it2.Close() + if err != nil { + t.Fatal(err) + } + }() + for it2.Valid() { + it2.Next() + count++ + } + if count != 2 { + t.Errorf("expected iterator to see 2, saw %d", count) + } + + // but that the isolated reader does not + val, err = reader.Get([]byte("b")) + if err != nil { + t.Error(err) + } + if val != nil { + t.Errorf("expected nil, got %v", val) + } + + // and ensure that the iterator on the isolated reader also does not + count = 0 + it3 := reader.RangeIterator([]byte{0}, []byte{'x'}) + defer func() { + err := it3.Close() + if err != nil { + t.Fatal(err) + } + }() + for it3.Valid() { + it3.Next() + count++ + } + if count != 1 { + t.Errorf("expected iterator to see 1, saw %d", count) + } + +} diff --git a/index/store/test/iterator.go b/index/store/test/iterator.go new file mode 100644 index 00000000..0d7d8992 --- /dev/null +++ b/index/store/test/iterator.go @@ -0,0 +1,344 @@ +package test + +import ( + "bytes" + "reflect" + "strings" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// tests around the correct behavior of iterators + +type testRow struct { + key []byte + val []byte +} + +func batchWriteRows(s store.KVStore, rows []testRow) error { + // open a writer + writer, err := s.Writer() + if err != nil { + return err + } + + // write the data + batch := writer.NewBatch() + for _, row := range rows { + batch.Set(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + return err + } + + // close the writer + err = writer.Close() + if err != nil { + return err + } + return nil +} + +func CommonTestPrefixIterator(t *testing.T, s store.KVStore) { + + data := []testRow{ + {[]byte("apple"), []byte("val")}, + {[]byte("cat1"), []byte("val")}, + {[]byte("cat2"), []byte("val")}, + {[]byte("cat3"), []byte("val")}, + {[]byte("dog1"), []byte("val")}, + {[]byte("dog2"), []byte("val")}, + {[]byte("dog4"), []byte("val")}, + {[]byte("elephant"), []byte("val")}, + } + + expectedCats := [][]byte{ + []byte("cat1"), + []byte("cat2"), + []byte("cat3"), + } + + expectedDogs := [][]byte{ + []byte("dog1"), + // we seek to "dog3" and ensure it skips over "dog2" + // but still finds "dog4" even though there was no "dog3" + []byte("dog4"), + } + + err := batchWriteRows(s, data) + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // get a prefix reader + cats := make([][]byte, 0) + iter := reader.PrefixIterator([]byte("cat")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + cats = append(cats, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found all the cats + if !reflect.DeepEqual(cats, expectedCats) { + t.Fatalf("expected cats %v, got %v", expectedCats, cats) + } + + // get a prefix reader + dogs := make([][]byte, 0) + iter = reader.PrefixIterator([]byte("dog")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + dogs = append(dogs, copyk) + if len(dogs) < 2 { + iter.Seek([]byte("dog3")) + } else { + iter.Next() + } + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found the expected dogs + if !reflect.DeepEqual(dogs, expectedDogs) { + t.Fatalf("expected dogs %v, got %v", expectedDogs, dogs) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } +} + +func CommonTestRangeIterator(t *testing.T, s store.KVStore) { + + data := []testRow{ + {[]byte("a1"), []byte("val")}, + {[]byte("b1"), []byte("val")}, + {[]byte("b2"), []byte("val")}, + {[]byte("b3"), []byte("val")}, + {[]byte("c1"), []byte("val")}, + {[]byte("c2"), []byte("val")}, + {[]byte("c4"), []byte("val")}, + {[]byte("d1"), []byte("val")}, + } + + expectedAll := make([][]byte, 0) + expectedBToC := make([][]byte, 0) + expectedCToDSeek3 := make([][]byte, 0) + expectedCToEnd := make([][]byte, 0) + for _, row := range data { + expectedAll = append(expectedAll, row.key) + if bytes.HasPrefix(row.key, []byte("b")) { + expectedBToC = append(expectedBToC, row.key) + } + if bytes.HasPrefix(row.key, []byte("c")) && !bytes.HasSuffix(row.key, []byte("2")) { + expectedCToDSeek3 = append(expectedCToDSeek3, row.key) + } + if bytes.Compare(row.key, []byte("c")) > 0 { + expectedCToEnd = append(expectedCToEnd, row.key) + } + } + + err := batchWriteRows(s, data) + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // get a range iterator (all) + all := make([][]byte, 0) + iter := reader.RangeIterator(nil, nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + all = append(all, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found all + if !reflect.DeepEqual(all, expectedAll) { + t.Fatalf("expected all %v, got %v", expectedAll, all) + } + + // get range iterator from b - c + bToC := make([][]byte, 0) + iter = reader.RangeIterator([]byte("b"), []byte("c")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + bToC = append(bToC, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found b-c + if !reflect.DeepEqual(bToC, expectedBToC) { + t.Fatalf("expected b-c %v, got %v", expectedBToC, bToC) + } + + // get range iterator from c - d, but seek to 'c3' + cToDSeek3 := make([][]byte, 0) + iter = reader.RangeIterator([]byte("c"), []byte("d")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + cToDSeek3 = append(cToDSeek3, copyk) + if len(cToDSeek3) < 2 { + iter.Seek([]byte("c3")) + } else { + iter.Next() + } + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found c-d with seek to c3 + if !reflect.DeepEqual(cToDSeek3, expectedCToDSeek3) { + t.Fatalf("expected b-c %v, got %v", expectedCToDSeek3, cToDSeek3) + } + + // get range iterator from c to the end + cToEnd := make([][]byte, 0) + iter = reader.RangeIterator([]byte("c"), nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + cToEnd = append(cToEnd, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found c to end + if !reflect.DeepEqual(cToEnd, expectedCToEnd) { + t.Fatalf("expected b-c %v, got %v", expectedCToEnd, cToEnd) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } +} + +func CommonTestRangeIteratorSeek(t *testing.T, s store.KVStore) { + + data := []testRow{ + {[]byte("a1"), []byte("val")}, + {[]byte("b1"), []byte("val")}, + {[]byte("c1"), []byte("val")}, + {[]byte("d1"), []byte("val")}, + {[]byte("e1"), []byte("val")}, + } + + err := batchWriteRows(s, data) + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // get an iterator on a central subset of the data + start := []byte("b1") + end := []byte("d1") + iter := reader.RangeIterator(start, end) + + // seek before, at and after every possible key + targets := [][]byte{} + for _, row := range data { + prefix := string(row.key[:1]) + targets = append(targets, []byte(prefix+"0")) + targets = append(targets, []byte(prefix+"1")) + targets = append(targets, []byte(prefix+"2")) + } + for _, target := range targets { + found := []string{} + for iter.Seek(target); iter.Valid(); iter.Next() { + found = append(found, string(iter.Key())) + if len(found) > len(data) { + t.Fatalf("enumerated more than data keys after seeking to %s", + string(target)) + } + } + wanted := []string{} + for _, row := range data { + if bytes.Compare(row.key, start) < 0 || + bytes.Compare(row.key, target) < 0 || + bytes.Compare(row.key, end) >= 0 { + continue + } + wanted = append(wanted, string(row.key)) + } + fs := strings.Join(found, ", ") + ws := strings.Join(wanted, ", ") + if fs != ws { + t.Fatalf("iterating from %s returned [%s] instead of [%s]", + string(target), fs, ws) + } + } + + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + if err != nil { + t.Fatal(err) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/index/store/test/merge.go b/index/store/test/merge.go new file mode 100644 index 00000000..0cd661f8 --- /dev/null +++ b/index/store/test/merge.go @@ -0,0 +1,108 @@ +package test + +import ( + "encoding/binary" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// test merge behavior + +func encodeUint64(in uint64) []byte { + rv := make([]byte, 8) + binary.LittleEndian.PutUint64(rv, in) + return rv +} + +func CommonTestMerge(t *testing.T, s store.KVStore) { + + testKey := []byte("k1") + + data := []struct { + key []byte + val []byte + }{ + {testKey, encodeUint64(1)}, + {testKey, encodeUint64(1)}, + } + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write the data + batch := writer.NewBatch() + for _, row := range data { + batch.Merge(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // read key + returnedVal, err := reader.Get(testKey) + if err != nil { + t.Fatal(err) + } + + // check the value + mergedval := binary.LittleEndian.Uint64(returnedVal) + if mergedval != 2 { + t.Errorf("expected 2, got %d", mergedval) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + +} + +// a test merge operator which is just an incrementing counter of uint64 +type TestMergeCounter struct{} + +func (mc *TestMergeCounter) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) { + var newval uint64 + if len(existingValue) > 0 { + newval = binary.LittleEndian.Uint64(existingValue) + } + + // now process operands + for _, operand := range operands { + next := binary.LittleEndian.Uint64(operand) + newval += next + } + + rv := make([]byte, 8) + binary.LittleEndian.PutUint64(rv, newval) + return rv, true +} + +func (mc *TestMergeCounter) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) { + left := binary.LittleEndian.Uint64(leftOperand) + right := binary.LittleEndian.Uint64(rightOperand) + rv := make([]byte, 8) + binary.LittleEndian.PutUint64(rv, left+right) + return rv, true +} + +func (mc *TestMergeCounter) Name() string { + return "test_merge_counter" +} diff --git a/index/upside_down/analysis_test.go b/index/upside_down/analysis_test.go index 5c5cd78f..6b46d2d3 100644 --- a/index/upside_down/analysis_test.go +++ b/index/upside_down/analysis_test.go @@ -18,12 +18,11 @@ func BenchmarkAnalyze(b *testing.B) { b.Fatal(err) } - s, err := null.New() + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) d := document.NewDocument("1") f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer) diff --git a/index/upside_down/benchmark_boltdb_test.go b/index/upside_down/benchmark_boltdb_test.go index 549f8d7b..f6ba4f73 100644 --- a/index/upside_down/benchmark_boltdb_test.go +++ b/index/upside_down/benchmark_boltdb_test.go @@ -10,68 +10,61 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/boltdb" ) -func CreateBoltDB() (store.KVStore, error) { - s := boltdb.New("test", "bleve") - return s, nil -} - -func DestroyBoltDB() error { - return os.RemoveAll("test") +var boltTestConfig = map[string]interface{}{ + "path": "test", } func BenchmarkBoltDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateBoltDB, DestroyBoltDB, 1) + CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 1) } func BenchmarkBoltDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateBoltDB, DestroyBoltDB, 2) + CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 2) } func BenchmarkBoltDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateBoltDB, DestroyBoltDB, 4) + CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 4) } // batches func BenchmarkBoltDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 1, 10) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 10) } func BenchmarkBoltDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 2, 10) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 10) } func BenchmarkBoltDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 4, 10) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 10) } func BenchmarkBoltDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 1, 100) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 100) } func BenchmarkBoltDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 2, 100) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 100) } func BenchmarkBoltDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 4, 100) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 100) } func BenchmarkBoltBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 1, 1000) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 1000) } func BenchmarkBoltBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 2, 1000) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 1000) } func BenchmarkBoltBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 4, 1000) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_common_test.go b/index/upside_down/benchmark_common_test.go index d04095dd..363ff443 100644 --- a/index/upside_down/benchmark_common_test.go +++ b/index/upside_down/benchmark_common_test.go @@ -10,13 +10,13 @@ package upside_down import ( + "os" "strconv" "testing" _ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" ) @@ -33,10 +33,13 @@ var benchmarkDocBodies = []string{ "The expansion ratio of a liquefied and cryogenic substance is the volume of a given amount of that substance in liquid form compared to the volume of the same amount of substance in gaseous form, at room temperature and normal atmospheric pressure.", } -type KVStoreCreate func() (store.KVStore, error) type KVStoreDestroy func() error -func CommonBenchmarkIndex(b *testing.B, create KVStoreCreate, destroy KVStoreDestroy, analysisWorkers int) { +func DestroyTest() error { + return os.RemoveAll("test") +} + +func CommonBenchmarkIndex(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers int) { cache := registry.NewCache() analyzer, err := cache.AnalyzerNamed("standard") @@ -50,12 +53,11 @@ func CommonBenchmarkIndex(b *testing.B, create KVStoreCreate, destroy KVStoreDes b.ResetTimer() b.StopTimer() for i := 0; i < b.N; i++ { - s, err := create() + analysisQueue := index.NewAnalysisQueue(analysisWorkers) + idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(analysisWorkers) - idx := NewUpsideDownCouch(s, analysisQueue) err = idx.Open() if err != nil { @@ -81,7 +83,7 @@ func CommonBenchmarkIndex(b *testing.B, create KVStoreCreate, destroy KVStoreDes } } -func CommonBenchmarkIndexBatch(b *testing.B, create KVStoreCreate, destroy KVStoreDestroy, analysisWorkers, batchSize int) { +func CommonBenchmarkIndexBatch(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers, batchSize int) { cache := registry.NewCache() analyzer, err := cache.AnalyzerNamed("standard") @@ -93,12 +95,11 @@ func CommonBenchmarkIndexBatch(b *testing.B, create KVStoreCreate, destroy KVSto b.StopTimer() for i := 0; i < b.N; i++ { - s, err := create() + analysisQueue := index.NewAnalysisQueue(analysisWorkers) + idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(analysisWorkers) - idx := NewUpsideDownCouch(s, analysisQueue) err = idx.Open() if err != nil { diff --git a/index/upside_down/benchmark_forestdb_test.go b/index/upside_down/benchmark_forestdb_test.go index 89f5cffe..5ece4334 100644 --- a/index/upside_down/benchmark_forestdb_test.go +++ b/index/upside_down/benchmark_forestdb_test.go @@ -15,72 +15,194 @@ import ( "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/blevex/forestdb" ) -func CreateForestDB() (store.KVStore, error) { - err := os.MkdirAll("testdir", 0700) - if err != nil { - return nil, err - } - s, err := forestdb.New("testdir/test", true, nil) - if err != nil { - return nil, err - } - return s, nil +var forestDBTestOption = map[string]interface{}{ + "path": "testdir/test", + "create_if_missing": true, } +// internally used to reset, so we also +// re-make the testdir func DestroyForestDB() error { - return os.RemoveAll("testdir") + err := os.RemoveAll("testdir") + if err != nil { + return err + } + err = os.MkdirAll("testdir", 0700) + if err != nil { + return err + } + return nil } func BenchmarkForestDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 1) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndex(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1) } func BenchmarkForestDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 2) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndex(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2) } func BenchmarkForestDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 4) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndex(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4) } // batches func BenchmarkForestDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 10) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1, 10) } func BenchmarkForestDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 10) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2, 10) } func BenchmarkForestDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 10) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4, 10) } func BenchmarkForestDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 100) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1, 100) } func BenchmarkForestDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 100) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2, 100) } func BenchmarkForestDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 100) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4, 100) } func BenchmarkForestDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 1000) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1, 1000) } func BenchmarkForestDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 1000) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2, 1000) } func BenchmarkForestDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 1000) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4, 1000) } diff --git a/index/upside_down/benchmark_goleveldb_test.go b/index/upside_down/benchmark_goleveldb_test.go index ba003367..2c0fa2cd 100644 --- a/index/upside_down/benchmark_goleveldb_test.go +++ b/index/upside_down/benchmark_goleveldb_test.go @@ -10,71 +10,62 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/goleveldb" ) var goLevelDBTestOptions = map[string]interface{}{ "create_if_missing": true, -} - -func CreateGoLevelDB() (store.KVStore, error) { - return goleveldb.New("test", goLevelDBTestOptions) -} - -func DestroyGoLevelDB() error { - return os.RemoveAll("test") + "path": "test", } func BenchmarkGoLevelDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoLevelDB, DestroyGoLevelDB, 1) + CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1) } func BenchmarkGoLevelDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoLevelDB, DestroyGoLevelDB, 2) + CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2) } func BenchmarkGoLevelDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoLevelDB, DestroyGoLevelDB, 4) + CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4) } // batches func BenchmarkGoLevelDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 1, 10) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 10) } func BenchmarkGoLevelDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 2, 10) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 10) } func BenchmarkGoLevelDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 4, 10) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 10) } func BenchmarkGoLevelDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 1, 100) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 100) } func BenchmarkGoLevelDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 2, 100) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 100) } func BenchmarkGoLevelDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 4, 100) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 100) } func BenchmarkGoLevelDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 1, 1000) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 1000) } func BenchmarkGoLevelDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 2, 1000) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 1000) } func BenchmarkGoLevelDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 4, 1000) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_gorocksdb_test.go b/index/upside_down/benchmark_gorocksdb_test.go index fb931c9b..8c2848e4 100644 --- a/index/upside_down/benchmark_gorocksdb_test.go +++ b/index/upside_down/benchmark_gorocksdb_test.go @@ -12,71 +12,62 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/blevex/rocksdb" ) var rocksdbTestOptions = map[string]interface{}{ + "path": "test", "create_if_missing": true, } -func CreateGoRocksDB() (store.KVStore, error) { - return rocksdb.New("test", rocksdbTestOptions) -} - -func DestroyGoRocksDB() error { - return os.RemoveAll("test") -} - func BenchmarkRocksDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 1) + CommonBenchmarkIndex(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1) } func BenchmarkRocksDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 2) + CommonBenchmarkIndex(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2) } func BenchmarkRocksDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 4) + CommonBenchmarkIndex(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4) } // batches func BenchmarkRocksDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 10) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1, 10) } func BenchmarkRocksDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 10) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2, 10) } func BenchmarkRocksDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 10) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4, 10) } func BenchmarkRocksDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 100) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1, 100) } func BenchmarkRocksDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 100) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2, 100) } func BenchmarkRocksDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 100) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4, 100) } func BenchmarkRocksDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 1000) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1, 1000) } func BenchmarkRocksDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 1000) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2, 1000) } func BenchmarkRocksDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 1000) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_gtreap_test.go b/index/upside_down/benchmark_gtreap_test.go index 673d830f..2963da62 100644 --- a/index/upside_down/benchmark_gtreap_test.go +++ b/index/upside_down/benchmark_gtreap_test.go @@ -12,64 +12,55 @@ package upside_down import ( "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/gtreap" ) -func CreateGTreap() (store.KVStore, error) { - return gtreap.StoreConstructor(nil) -} - -func DestroyGTreap() error { - return nil -} - func BenchmarkGTreapIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGTreap, DestroyGTreap, 1) + CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 1) } func BenchmarkGTreapIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGTreap, DestroyGTreap, 2) + CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 2) } func BenchmarkGTreapIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGTreap, DestroyGTreap, 4) + CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 4) } // batches func BenchmarkGTreapIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 1, 10) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 10) } func BenchmarkGTreapIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 2, 10) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 10) } func BenchmarkGTreapIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 4, 10) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 10) } func BenchmarkGTreapIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 1, 100) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 100) } func BenchmarkGTreapIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 2, 100) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 100) } func BenchmarkGTreapIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 4, 100) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 100) } func BenchmarkGTreapIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 1, 1000) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 1000) } func BenchmarkGTreapIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 2, 1000) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 1000) } func BenchmarkGTreapIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 4, 1000) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_inmem_test.go b/index/upside_down/benchmark_inmem_test.go deleted file mode 100644 index ee06bd5b..00000000 --- a/index/upside_down/benchmark_inmem_test.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package upside_down - -import ( - "testing" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/inmem" -) - -func CreateInMem() (store.KVStore, error) { - return inmem.New() -} - -func DestroyInMem() error { - return nil -} - -func BenchmarkInMemIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateInMem, DestroyInMem, 1) -} - -func BenchmarkInMemIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateInMem, DestroyInMem, 2) -} - -func BenchmarkInMemIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateInMem, DestroyInMem, 4) -} - -// batches - -func BenchmarkInMemIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 1, 10) -} - -func BenchmarkInMemIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 2, 10) -} - -func BenchmarkInMemIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 4, 10) -} - -func BenchmarkInMemIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 1, 100) -} - -func BenchmarkInMemIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 2, 100) -} - -func BenchmarkInMemIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 4, 100) -} - -func BenchmarkInMemIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 1, 1000) -} - -func BenchmarkInMemIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 2, 1000) -} - -func BenchmarkInMemIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 4, 1000) -} diff --git a/index/upside_down/benchmark_leveldb_test.go b/index/upside_down/benchmark_leveldb_test.go index 9c84fdad..dea8bc93 100644 --- a/index/upside_down/benchmark_leveldb_test.go +++ b/index/upside_down/benchmark_leveldb_test.go @@ -12,71 +12,62 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/blevex/leveldb" ) var leveldbTestOptions = map[string]interface{}{ + "path": "test", "create_if_missing": true, } -func CreateLevelDB() (store.KVStore, error) { - return leveldb.New("test", leveldbTestOptions) -} - -func DestroyLevelDB() error { - return os.RemoveAll("test") -} - func BenchmarkLevelDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 1) + CommonBenchmarkIndex(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1) } func BenchmarkLevelDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 2) + CommonBenchmarkIndex(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2) } func BenchmarkLevelDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 4) + CommonBenchmarkIndex(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4) } // batches func BenchmarkLevelDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 10) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1, 10) } func BenchmarkLevelDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 10) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2, 10) } func BenchmarkLevelDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 10) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4, 10) } func BenchmarkLevelDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 100) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1, 100) } func BenchmarkLevelDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 100) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2, 100) } func BenchmarkLevelDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 100) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4, 100) } func BenchmarkLevelDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 1000) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1, 1000) } func BenchmarkLevelDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 1000) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2, 1000) } func BenchmarkLevelDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 1000) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_null_test.go b/index/upside_down/benchmark_null_test.go index a02813a3..ee0f7c77 100644 --- a/index/upside_down/benchmark_null_test.go +++ b/index/upside_down/benchmark_null_test.go @@ -12,64 +12,55 @@ package upside_down import ( "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/null" ) -func CreateNull() (store.KVStore, error) { - return null.New() -} - -func DestroyNull() error { - return nil -} - func BenchmarkNullIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateNull, DestroyNull, 1) + CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 1) } func BenchmarkNullIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateNull, DestroyNull, 2) + CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 2) } func BenchmarkNullIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateNull, DestroyNull, 4) + CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 4) } // batches func BenchmarkNullIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 1, 10) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 10) } func BenchmarkNullIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 2, 10) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 10) } func BenchmarkNullIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 4, 10) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 10) } func BenchmarkNullIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 1, 100) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 100) } func BenchmarkNullIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 2, 100) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 100) } func BenchmarkNullIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 4, 100) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 100) } func BenchmarkNullIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 1, 1000) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 1000) } func BenchmarkNullIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 2, 1000) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 1000) } func BenchmarkNullIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 4, 1000) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 1000) } diff --git a/index/upside_down/dump.go b/index/upside_down/dump.go index 05333918..c773c8aa 100644 --- a/index/upside_down/dump.go +++ b/index/upside_down/dump.go @@ -26,7 +26,7 @@ func (udc *UpsideDownCouch) dumpPrefix(kvreader store.KVReader, rv chan interfac if start == nil { start = []byte{0} } - it := kvreader.Iterator(start) + it := kvreader.PrefixIterator(start) defer func() { cerr := it.Close() if cerr != nil { @@ -36,9 +36,28 @@ func (udc *UpsideDownCouch) dumpPrefix(kvreader store.KVReader, rv chan interfac key, val, valid := it.Current() for valid { - if prefix != nil && !bytes.HasPrefix(key, prefix) { - break + row, err := ParseFromKeyValue(key, val) + if err != nil { + rv <- err + return } + rv <- row + + it.Next() + key, val, valid = it.Current() + } +} + +func (udc *UpsideDownCouch) dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { + it := kvreader.RangeIterator(start, end) + defer func() { + cerr := it.Close() + if cerr != nil { + rv <- cerr + } + }() + key, val, valid := it.Current() + for valid { row, err := ParseFromKeyValue(key, val) if err != nil { @@ -70,7 +89,7 @@ func (udc *UpsideDownCouch) DumpAll() chan interface{} { } }() - udc.dumpPrefix(kvreader, rv, nil) + udc.dumpRange(kvreader, rv, nil, nil) }() return rv } @@ -149,7 +168,7 @@ func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} { // now walk term keys in order and add them as well if len(keys) > 0 { - it := kvreader.Iterator(keys[0]) + it := kvreader.RangeIterator(keys[0], nil) defer func() { cerr := it.Close() if cerr != nil { diff --git a/index/upside_down/dump_test.go b/index/upside_down/dump_test.go index 044ea1c3..5547366e 100644 --- a/index/upside_down/dump_test.go +++ b/index/upside_down/dump_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "testing" "time" @@ -22,17 +21,18 @@ import ( func TestDump(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } diff --git a/index/upside_down/field_dict.go b/index/upside_down/field_dict.go index f5feb120..62b56c1c 100644 --- a/index/upside_down/field_dict.go +++ b/index/upside_down/field_dict.go @@ -10,7 +10,6 @@ package upside_down import ( - "bytes" "fmt" "github.com/blevesearch/bleve/index" @@ -20,7 +19,6 @@ import ( type UpsideDownCouchFieldDict struct { indexReader *IndexReader iterator store.KVIterator - endKey []byte field uint16 } @@ -29,16 +27,17 @@ func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTe startKey := NewDictionaryRow(startTerm, field, 0).Key() if endTerm == nil { endTerm = []byte{ByteSeparator} + } else { + endTerm = incrementBytes(endTerm) } endKey := NewDictionaryRow(endTerm, field, 0).Key() - it := indexReader.kvreader.Iterator(startKey) + it := indexReader.kvreader.RangeIterator(startKey, endKey) return &UpsideDownCouchFieldDict{ indexReader: indexReader, iterator: it, field: field, - endKey: endKey, }, nil } @@ -49,11 +48,6 @@ func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { return nil, nil } - // past end term - if bytes.Compare(key, r.endKey) > 0 { - return nil, nil - } - currRow, err := NewDictionaryRowKV(key, val) if err != nil { return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err) @@ -71,16 +65,3 @@ func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { func (r *UpsideDownCouchFieldDict) Close() error { return r.iterator.Close() } - -func incrementBytes(in []byte) []byte { - rv := make([]byte, len(in)) - copy(rv, in) - for i := len(rv) - 1; i >= 0; i-- { - rv[i] = rv[i] + 1 - if rv[i] != 0 { - // didn't overflow, so stop - break - } - } - return rv -} diff --git a/index/upside_down/field_dict_test.go b/index/upside_down/field_dict_test.go index 5ad91f32..82f56fb4 100644 --- a/index/upside_down/field_dict_test.go +++ b/index/upside_down/field_dict_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "reflect" "testing" @@ -21,17 +20,18 @@ import ( func TestIndexFieldDict(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index c151033e..4df490e4 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -10,8 +10,6 @@ package upside_down import ( - "bytes" - "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" @@ -64,7 +62,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { doc = document.NewDocument(id) storedRow := NewStoredRow(id, 0, []uint64{}, 'x', nil) storedRowScanPrefix := storedRow.ScanPrefixForDoc() - it := i.kvreader.Iterator(storedRowScanPrefix) + it := i.kvreader.PrefixIterator(storedRowScanPrefix) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr @@ -72,14 +70,8 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { }() key, val, valid := it.Current() for valid { - if !bytes.HasPrefix(key, storedRowScanPrefix) { - break - } - safeVal := val - if !i.kvreader.BytesSafeAfterClose() { - safeVal = make([]byte, len(val)) - copy(safeVal, val) - } + safeVal := make([]byte, len(val)) + copy(safeVal, val) var row *StoredRow row, err = NewStoredRowKV(key, safeVal) if err != nil { @@ -120,7 +112,7 @@ func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { func (i *IndexReader) Fields() (fields []string, err error) { fields = make([]string, 0) - it := i.kvreader.Iterator([]byte{'f'}) + it := i.kvreader.PrefixIterator([]byte{'f'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr @@ -128,9 +120,6 @@ func (i *IndexReader) Fields() (fields []string, err error) { }() key, val, valid := it.Current() for valid { - if !bytes.HasPrefix(key, []byte{'f'}) { - break - } var row UpsideDownCouchRow row, err = ParseFromKeyValue(key, val) if err != nil { @@ -162,3 +151,16 @@ func (i *IndexReader) DocCount() uint64 { func (i *IndexReader) Close() error { return i.kvreader.Close() } + +func incrementBytes(in []byte) []byte { + rv := make([]byte, len(in)) + copy(rv, in) + for i := len(rv) - 1; i >= 0; i-- { + rv[i] = rv[i] + 1 + if rv[i] != 0 { + // didn't overflow, so stop + break + } + } + return rv +} diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index cf511186..12879c04 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -10,19 +10,16 @@ package upside_down import ( - "bytes" - "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" ) type UpsideDownCouchTermFieldReader struct { - indexReader *IndexReader - iterator store.KVIterator - count uint64 - term []byte - field uint16 - readerPrefix []byte + indexReader *IndexReader + iterator store.KVIterator + count uint64 + term []byte + field uint16 } func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) { @@ -45,25 +42,14 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi } tfr := NewTermFrequencyRow(term, field, "", 0, 0) - readerPrefix := tfr.Key() - - var it store.KVIterator - switch kvreader := indexReader.kvreader.(type) { - case store.RangeIterable: - etfr := NewTermFrequencyRow(term[:len(term)-1], field, "", 0, 0) - nextTermPrefix := etfr.Key() - it = kvreader.RangeIterator(readerPrefix, nextTermPrefix) - default: - it = kvreader.Iterator(readerPrefix) - } + it := indexReader.kvreader.PrefixIterator(tfr.Key()) return &UpsideDownCouchTermFieldReader{ - indexReader: indexReader, - iterator: it, - count: dictionaryRow.count, - term: term, - field: field, - readerPrefix: readerPrefix, + indexReader: indexReader, + iterator: it, + count: dictionaryRow.count, + term: term, + field: field, }, nil } @@ -75,10 +61,6 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { - if !bytes.HasPrefix(key, r.readerPrefix) { - // end of the line - return nil, nil - } tfr, err := NewTermFrequencyRowKV(key, val) if err != nil { return nil, err @@ -101,10 +83,6 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermField r.iterator.Seek(tfr.Key()) key, val, valid := r.iterator.Current() if valid { - if !bytes.HasPrefix(key, r.readerPrefix) { - // end of the line - return nil, nil - } tfr, err := NewTermFrequencyRowKV(key, val) if err != nil { return nil, err @@ -131,8 +109,6 @@ func (r *UpsideDownCouchTermFieldReader) Close() error { type UpsideDownCouchDocIDReader struct { indexReader *IndexReader iterator store.KVIterator - start string - end string } func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) { @@ -143,24 +119,18 @@ func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) end = string([]byte{0xff}) } bisr := NewBackIndexRow(start, nil, nil) - it := indexReader.kvreader.Iterator(bisr.Key()) + bier := NewBackIndexRow(end, nil, nil) + it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) return &UpsideDownCouchDocIDReader{ indexReader: indexReader, iterator: it, - start: start, - end: end, }, nil } func (r *UpsideDownCouchDocIDReader) Next() (string, error) { key, val, valid := r.iterator.Current() if valid { - bier := NewBackIndexRow(r.end, nil, nil) - if bytes.Compare(key, bier.Key()) > 0 { - // end of the line - return "", nil - } br, err := NewBackIndexRowKV(key, val) if err != nil { return "", err @@ -176,11 +146,6 @@ func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) { r.iterator.Seek(bir.Key()) key, val, valid := r.iterator.Current() if valid { - bier := NewBackIndexRow(r.end, nil, nil) - if bytes.Compare(key, bier.Key()) > 0 { - // end of the line - return "", nil - } br, err := NewBackIndexRowKV(key, val) if err != nil { return "", err diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index 9f37e106..72c7ccc9 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "reflect" "testing" @@ -21,17 +20,18 @@ import ( func TestIndexReader(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -195,17 +195,18 @@ func TestIndexReader(t *testing.T) { func TestIndexDocIdReader(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } diff --git a/index/upside_down/row.go b/index/upside_down/row.go index a0e6dcfc..442459ef 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -24,8 +24,12 @@ const ByteSeparator byte = 0xff type UpsideDownCouchRowStream chan UpsideDownCouchRow type UpsideDownCouchRow interface { + KeySize() int + KeyTo([]byte) (int, error) Key() []byte Value() []byte + ValueSize() int + ValueTo([]byte) (int, error) } func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { @@ -61,10 +65,28 @@ func (v *VersionRow) Key() []byte { return []byte{'v'} } +func (v *VersionRow) KeySize() int { + return 1 +} + +func (v *VersionRow) KeyTo(buf []byte) (int, error) { + buf[0] = 'v' + return 1, nil +} + func (v *VersionRow) Value() []byte { return []byte{byte(v.version)} } +func (v *VersionRow) ValueSize() int { + return 1 +} + +func (v *VersionRow) ValueTo(buf []byte) (int, error) { + buf[0] = v.version + return 1, nil +} + func (v *VersionRow) String() string { return fmt.Sprintf("Version: %d", v.version) } @@ -93,16 +115,34 @@ type InternalRow struct { } func (i *InternalRow) Key() []byte { - buf := make([]byte, len(i.key)+1) + buf := make([]byte, i.KeySize()) + size, _ := i.KeyTo(buf) + return buf[:size] +} + +func (i *InternalRow) KeySize() int { + return len(i.key) + 1 +} + +func (i *InternalRow) KeyTo(buf []byte) (int, error) { buf[0] = 'i' - copy(buf[1:], i.key) - return buf + actual := copy(buf[1:], i.key) + return 1 + actual, nil } func (i *InternalRow) Value() []byte { return i.val } +func (i *InternalRow) ValueSize() int { + return len(i.val) +} + +func (i *InternalRow) ValueTo(buf []byte) (int, error) { + actual := copy(buf, i.val) + return actual, nil +} + func (i *InternalRow) String() string { return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val) } @@ -129,16 +169,35 @@ type FieldRow struct { } func (f *FieldRow) Key() []byte { - buf := make([]byte, 3) + buf := make([]byte, f.KeySize()) + size, _ := f.KeyTo(buf) + return buf[:size] +} + +func (f *FieldRow) KeySize() int { + return 3 +} + +func (f *FieldRow) KeyTo(buf []byte) (int, error) { buf[0] = 'f' binary.LittleEndian.PutUint16(buf[1:3], f.index) - return buf + return 3, nil } func (f *FieldRow) Value() []byte { return append([]byte(f.name), ByteSeparator) } +func (f *FieldRow) ValueSize() int { + return len(f.name) + 1 +} + +func (f *FieldRow) ValueTo(buf []byte) (int, error) { + size := copy(buf, f.name) + buf[size] = ByteSeparator + return size + 1, nil +} + func (f *FieldRow) String() string { return fmt.Sprintf("Field: %d Name: %s", f.index, f.name) } @@ -182,18 +241,35 @@ type DictionaryRow struct { } func (dr *DictionaryRow) Key() []byte { - buf := make([]byte, 3+len(dr.term)) + buf := make([]byte, dr.KeySize()) + size, _ := dr.KeyTo(buf) + return buf[:size] +} + +func (dr *DictionaryRow) KeySize() int { + return len(dr.term) + 3 +} + +func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { buf[0] = 'd' binary.LittleEndian.PutUint16(buf[1:3], dr.field) - copy(buf[3:], dr.term) - return buf + size := copy(buf[3:], dr.term) + return size + 3, nil } func (dr *DictionaryRow) Value() []byte { - used := 0 - buf := make([]byte, binary.MaxVarintLen64) - used += binary.PutUvarint(buf, dr.count) - return buf[0:used] + buf := make([]byte, dr.ValueSize()) + size, _ := dr.ValueTo(buf) + return buf[:size] +} + +func (dr *DictionaryRow) ValueSize() int { + return binary.MaxVarintLen64 +} + +func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { + used := binary.PutUvarint(buf, dr.count) + return used, nil } func (dr *DictionaryRow) String() string { @@ -304,13 +380,22 @@ func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte { } func (tfr *TermFrequencyRow) Key() []byte { - buf := make([]byte, 3+len(tfr.term)+1+len(tfr.doc)) + buf := make([]byte, tfr.KeySize()) + size, _ := tfr.KeyTo(buf) + return buf[:size] +} + +func (tfr *TermFrequencyRow) KeySize() int { + return 3 + len(tfr.term) + 1 + len(tfr.doc) +} + +func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { buf[0] = 't' binary.LittleEndian.PutUint16(buf[1:3], tfr.field) termLen := copy(buf[3:], tfr.term) buf[3+termLen] = ByteSeparator - copy(buf[3+termLen+1:], tfr.doc) - return buf + docLen := copy(buf[3+termLen+1:], tfr.doc) + return 3 + termLen + 1 + docLen, nil } func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { @@ -318,15 +403,32 @@ func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { return dr.Key() } +func (tfr *TermFrequencyRow) DictionaryRowKeySize() int { + dr := NewDictionaryRow(tfr.term, tfr.field, 0) + return dr.KeySize() +} + +func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) { + dr := NewDictionaryRow(tfr.term, tfr.field, 0) + return dr.KeyTo(buf) +} + func (tfr *TermFrequencyRow) Value() []byte { - used := 0 + buf := make([]byte, tfr.ValueSize()) + size, _ := tfr.ValueTo(buf) + return buf[:size] +} + +func (tfr *TermFrequencyRow) ValueSize() int { bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64 for _, vector := range tfr.vectors { bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64 } - buf := make([]byte, bufLen) + return bufLen +} - used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], tfr.freq) +func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) { + used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq) normuint32 := math.Float32bits(tfr.norm) newbuf := buf[used : used+binary.MaxVarintLen64] @@ -342,7 +444,7 @@ func (tfr *TermFrequencyRow) Value() []byte { used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition) } } - return buf[0:used] + return used, nil } func (tfr *TermFrequencyRow) String() string { @@ -514,19 +616,41 @@ func (br *BackIndexRow) AllStoredKeys() [][]byte { } func (br *BackIndexRow) Key() []byte { - buf := make([]byte, len(br.doc)+1) + buf := make([]byte, br.KeySize()) + size, _ := br.KeyTo(buf) + return buf[:size] +} + +func (br *BackIndexRow) KeySize() int { + return len(br.doc) + 1 +} + +func (br *BackIndexRow) KeyTo(buf []byte) (int, error) { buf[0] = 'b' - copy(buf[1:], br.doc) - return buf + used := copy(buf[1:], br.doc) + return used + 1, nil } func (br *BackIndexRow) Value() []byte { + buf := make([]byte, br.ValueSize()) + size, _ := br.ValueTo(buf) + return buf[:size] +} + +func (br *BackIndexRow) ValueSize() int { birv := &BackIndexRowValue{ TermEntries: br.termEntries, StoredEntries: br.storedEntries, } - bytes, _ := proto.Marshal(birv) - return bytes + return birv.Size() +} + +func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { + birv := &BackIndexRowValue{ + TermEntries: br.termEntries, + StoredEntries: br.storedEntries, + } + return birv.MarshalTo(buf) } func (br *BackIndexRow) String() string { @@ -552,7 +676,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { rv.doc, err = buf.ReadBytes(ByteSeparator) if err == io.EOF && len(rv.doc) < 1 { - err = fmt.Errorf("invalid doc length 0") + err = fmt.Errorf("invalid doc length 0 - % x", key) } if err != nil && err != io.EOF { return nil, err @@ -582,8 +706,17 @@ type StoredRow struct { } func (s *StoredRow) Key() []byte { + buf := make([]byte, s.KeySize()) + size, _ := s.KeyTo(buf) + return buf[0:size] +} + +func (s *StoredRow) KeySize() int { + return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions)) +} + +func (s *StoredRow) KeyTo(buf []byte) (int, error) { docLen := len(s.doc) - buf := make([]byte, 1+docLen+1+2+(binary.MaxVarintLen64*len(s.arrayPositions))) buf[0] = 's' copy(buf[1:], s.doc) buf[1+docLen] = ByteSeparator @@ -593,14 +726,23 @@ func (s *StoredRow) Key() []byte { varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition) bytesUsed += varbytes } - return buf[0:bytesUsed] + return bytesUsed, nil } func (s *StoredRow) Value() []byte { - rv := make([]byte, len(s.value)+1) - rv[0] = s.typ - copy(rv[1:], s.value) - return rv + buf := make([]byte, s.ValueSize()) + size, _ := s.ValueTo(buf) + return buf[:size] +} + +func (s *StoredRow) ValueSize() int { + return len(s.value) + 1 +} + +func (s *StoredRow) ValueTo(buf []byte) (int, error) { + buf[0] = s.typ + used := copy(buf[1:], s.value) + return used + 1, nil } func (s *StoredRow) String() string { diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 8fd95e31..e0fd7d6c 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -10,7 +10,6 @@ package upside_down import ( - "bytes" "encoding/json" "fmt" "math" @@ -29,6 +28,13 @@ import ( const Name = "upside_down" +// RowBufferSize should ideally this is sized to be the smallest +// size that can cotain an index row key and its corresponding +// value. It is not a limit, if need be a larger buffer is +// allocated, but performance will be more optimal if *most* +// rows fit this size. +const RowBufferSize = 4 * 1024 + var VersionKey = []byte{'v'} var UnsafeBatchUseDetected = fmt.Errorf("bleve.Batch is NOT thread-safe, modification after execution detected") @@ -40,6 +46,8 @@ var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Ve type UpsideDownCouch struct { version uint8 path string + storeName string + storeConfig map[string]interface{} store store.KVStore fieldCache *index.FieldCache analysisQueue *index.AnalysisQueue @@ -48,16 +56,19 @@ type UpsideDownCouch struct { m sync.RWMutex // fields protected by m docCount uint64 + + writeMutex sync.Mutex } -func NewUpsideDownCouch(s store.KVStore, analysisQueue *index.AnalysisQueue) *UpsideDownCouch { +func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) { return &UpsideDownCouch{ version: Version, fieldCache: index.NewFieldCache(), - store: s, + storeName: storeName, + storeConfig: storeConfig, analysisQueue: analysisQueue, stats: &indexStat{}, - } + }, nil } func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) { @@ -67,27 +78,21 @@ func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) { // version marker rows = append(rows, NewVersionRow(udc.version)) - return udc.batchRows(kvwriter, nil, rows, nil) + err = udc.batchRows(kvwriter, nil, rows, nil) + return } func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { - keyPrefix := []byte{'f'} - it := kvreader.Iterator(keyPrefix) + it := kvreader.PrefixIterator([]byte{'f'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() - it.Seek(keyPrefix) key, val, valid := it.Current() for valid { - - // stop when - if !bytes.HasPrefix(key, keyPrefix) { - break - } var fieldRow *FieldRow fieldRow, err = NewFieldRowKV(key, val) if err != nil { @@ -99,13 +104,12 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { key, val, valid = it.Current() } - keyPrefix = []byte{'v'} - val, err = kvreader.Get(keyPrefix) + val, err = kvreader.Get([]byte{'v'}) if err != nil { return } var vr *VersionRow - vr, err = NewVersionRowKV(keyPrefix, val) + vr, err = NewVersionRowKV([]byte{'v'}, val) if err != nil { return } @@ -117,25 +121,66 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { return } +var rowBufferPool sync.Pool + +func GetRowBuffer() []byte { + if rb, ok := rowBufferPool.Get().([]byte); ok { + return rb + } else { + return make([]byte, RowBufferSize) + } +} + +func PutRowBuffer(buf []byte) { + rowBufferPool.Put(buf) +} + func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) { // prepare batch wb := writer.NewBatch() + // buffer to work with + rowBuf := GetRowBuffer() + // add for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { - // need to increment counter - dictionaryKey := tfr.DictionaryRowKey() - wb.Merge(dictionaryKey, dictionaryTermIncr) + if tfr.DictionaryRowKeySize() > len(rowBuf) { + rowBuf = make([]byte, tfr.DictionaryRowKeySize()) + } + dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) + if err != nil { + return err + } + wb.Merge(rowBuf[:dictKeySize], dictionaryTermIncr) } - wb.Set(row.Key(), row.Value()) + if row.KeySize()+row.ValueSize() > len(rowBuf) { + rowBuf = make([]byte, row.KeySize()+row.ValueSize()) + } + keySize, err := row.KeyTo(rowBuf) + if err != nil { + return err + } + valSize, err := row.ValueTo(rowBuf[keySize:]) + wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } // update for _, row := range updateRows { - wb.Set(row.Key(), row.Value()) + if row.KeySize()+row.ValueSize() > len(rowBuf) { + rowBuf = make([]byte, row.KeySize()+row.ValueSize()) + } + keySize, err := row.KeyTo(rowBuf) + if err != nil { + return err + } + valSize, err := row.ValueTo(rowBuf[keySize:]) + if err != nil { + return err + } + wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } // delete @@ -143,14 +188,29 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter - dictionaryKey := tfr.DictionaryRowKey() - wb.Merge(dictionaryKey, dictionaryTermDecr) + if tfr.DictionaryRowKeySize() > len(rowBuf) { + rowBuf = make([]byte, tfr.DictionaryRowKeySize()) + } + dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) + if err != nil { + return err + } + wb.Merge(rowBuf[:dictKeySize], dictionaryTermDecr) } - wb.Delete(row.Key()) + if row.KeySize()+row.ValueSize() > len(rowBuf) { + rowBuf = make([]byte, row.KeySize()+row.ValueSize()) + } + keySize, err := row.KeyTo(rowBuf) + if err != nil { + return err + } + wb.Delete(rowBuf[:keySize]) } + PutRowBuffer(rowBuf) + // write out the batch - return wb.Execute() + return writer.ExecuteBatch(wb) } func (udc *UpsideDownCouch) DocCount() (uint64, error) { @@ -160,68 +220,88 @@ func (udc *UpsideDownCouch) DocCount() (uint64, error) { } func (udc *UpsideDownCouch) Open() (err error) { - // install the merge operator - udc.store.SetMergeOperator(&mergeOperator) + //acquire the write mutex for the duratin of Open() + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() - // now open the kv store - err = udc.store.Open() + // open the kv store + storeConstructor := registry.KVStoreConstructorByName(udc.storeName) + if storeConstructor == nil { + err = index.ErrorUnknownStorageType + return + } + + // now open the store + udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig) if err != nil { return } - // start a writer for the open process - var kvwriter store.KVWriter - kvwriter, err = udc.store.Writer() + // start a reader to look at the index + var kvreader store.KVReader + kvreader, err = udc.store.Reader() if err != nil { return } - defer func() { - if cerr := kvwriter.Close(); err == nil && cerr != nil { - err = cerr - } - }() var value []byte - value, err = kvwriter.Get(VersionKey) + value, err = kvreader.Get(VersionKey) if err != nil { + _ = kvreader.Close() return } - // init new index OR load schema - if value == nil { - err = udc.init(kvwriter) + if value != nil { + err = udc.loadSchema(kvreader) if err != nil { + _ = kvreader.Close() return } + + // set doc count + udc.m.Lock() + udc.docCount, err = udc.countDocs(kvreader) + udc.m.Unlock() + + err = kvreader.Close() } else { - err = udc.loadSchema(kvwriter) + // new index, close the reader and open writer to init + err = kvreader.Close() if err != nil { return } + + var kvwriter store.KVWriter + kvwriter, err = udc.store.Writer() + if err != nil { + return + } + defer func() { + if cerr := kvwriter.Close(); err == nil && cerr != nil { + err = cerr + } + }() + + // init the index + err = udc.init(kvwriter) } - // set doc count - udc.m.Lock() - udc.docCount, err = udc.countDocs(kvwriter) - udc.m.Unlock() + return } func (udc *UpsideDownCouch) countDocs(kvreader store.KVReader) (count uint64, err error) { - it := kvreader.Iterator([]byte{'b'}) + it := kvreader.PrefixIterator([]byte{'b'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() - key, _, valid := it.Current() + _, _, valid := it.Current() for valid { - if !bytes.HasPrefix(key, []byte{'b'}) { - break - } count++ it.Next() - key, _, valid = it.Current() + _, _, valid = it.Current() } return @@ -238,7 +318,7 @@ func (udc *UpsideDownCouch) rowCount() (count uint64, err error) { err = cerr } }() - it := kvreader.Iterator([]byte{0}) + it := kvreader.RangeIterator(nil, nil) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr @@ -272,6 +352,31 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + + // open a reader for backindex lookup + var kvreader store.KVReader + kvreader, err = udc.store.Reader() + if err != nil { + return + } + + // first we lookup the backindex row for the doc id if it exists + // lookup the back index row + var backIndexRow *BackIndexRow + backIndexRow, err = udc.backIndexRowForDoc(kvreader, doc.ID) + if err != nil { + _ = kvreader.Close() + atomic.AddUint64(&udc.stats.errors, 1) + return + } + + err = kvreader.Close() + if err != nil { + return + } + // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter @@ -285,15 +390,6 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { } }() - // first we lookup the backindex row for the doc id if it exists - // lookup the back index row - var backIndexRow *BackIndexRow - backIndexRow, err = udc.backIndexRowForDoc(kvwriter, doc.ID) - if err != nil { - atomic.AddUint64(&udc.stats.errors, 1) - return - } - // prepare a list of rows addRows := make([]UpsideDownCouchRow, 0) updateRows := make([]UpsideDownCouchRow, 0) @@ -327,29 +423,30 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in existingStoredKeys[string(key)] = true } + keyBuf := GetRowBuffer() for _, row := range rows { switch row := row.(type) { case *TermFrequencyRow: - rowKey := string(row.Key()) - if _, ok := existingTermKeys[rowKey]; ok { + keySize, _ := row.KeyTo(keyBuf) + if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { updateRows = append(updateRows, row) - delete(existingTermKeys, rowKey) + delete(existingTermKeys, string(keyBuf[:keySize])) } else { addRows = append(addRows, row) } case *StoredRow: - rowKey := string(row.Key()) - if _, ok := existingStoredKeys[rowKey]; ok { + keySize, _ := row.KeyTo(keyBuf) + if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { updateRows = append(updateRows, row) - delete(existingStoredKeys, rowKey) + delete(existingStoredKeys, string(keyBuf[:keySize])) } else { addRows = append(addRows, row) } default: updateRows = append(updateRows, row) } - } + PutRowBuffer(keyBuf) // any of the existing rows that weren't updated need to be deleted for existingTermKey := range existingTermKeys { @@ -427,6 +524,37 @@ func (udc *UpsideDownCouch) indexField(docID string, field document.Field, field func (udc *UpsideDownCouch) Delete(id string) (err error) { indexStart := time.Now() + + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + + // open a reader for backindex lookup + var kvreader store.KVReader + kvreader, err = udc.store.Reader() + if err != nil { + return + } + + // first we lookup the backindex row for the doc id if it exists + // lookup the back index row + var backIndexRow *BackIndexRow + backIndexRow, err = udc.backIndexRowForDoc(kvreader, id) + if err != nil { + _ = kvreader.Close() + atomic.AddUint64(&udc.stats.errors, 1) + return + } + + err = kvreader.Close() + if err != nil { + return + } + + if backIndexRow == nil { + atomic.AddUint64(&udc.stats.deletes, 1) + return + } + // start a writer for this delete var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() @@ -439,18 +567,6 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { } }() - // lookup the back index row - var backIndexRow *BackIndexRow - backIndexRow, err = udc.backIndexRowForDoc(kvwriter, id) - if err != nil { - atomic.AddUint64(&udc.stats.errors, 1) - return - } - if backIndexRow == nil { - atomic.AddUint64(&udc.stats.deletes, 1) - return - } - deleteRows := make([]UpsideDownCouchRow, 0) deleteRows = udc.deleteSingle(id, backIndexRow, deleteRows) @@ -490,15 +606,25 @@ func (udc *UpsideDownCouch) backIndexRowForDoc(kvreader store.KVReader, docID st tempRow := &BackIndexRow{ doc: []byte(docID), } - key := tempRow.Key() - value, err := kvreader.Get(key) + + keyBuf := GetRowBuffer() + if tempRow.KeySize() > len(keyBuf) { + keyBuf = make([]byte, 2*tempRow.KeySize()) + } + defer PutRowBuffer(keyBuf) + keySize, err := tempRow.KeyTo(keyBuf) + if err != nil { + return nil, err + } + + value, err := kvreader.Get(keyBuf[:keySize]) if err != nil { return nil, err } if value == nil { return nil, nil } - backIndexRow, err := NewBackIndexRowKV(key, value) + backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value) if err != nil { return nil, err } @@ -630,18 +756,34 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) indexStart := time.Now() - // start a writer for this batch - var kvwriter store.KVWriter - kvwriter, err = udc.store.Writer() + + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + + // open a reader for backindex lookup + var kvreader store.KVReader + kvreader, err = udc.store.Reader() if err != nil { return } // first lookup all the back index rows var backIndexRows map[string]*BackIndexRow - backIndexRows, err = udc.backIndexRowsForBatch(kvwriter, batch) + backIndexRows, err = udc.backIndexRowsForBatch(kvreader, batch) + if err != nil { + _ = kvreader.Close() + return + } + + err = kvreader.Close() + if err != nil { + return + } + + // start a writer for this batch + var kvwriter store.KVWriter + kvwriter, err = udc.store.Writer() if err != nil { - _ = kvwriter.Close() return } @@ -704,6 +846,8 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) { internalRow := NewInternalRow(key, val) + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { @@ -714,11 +858,17 @@ func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) { err = cerr } }() - return writer.Set(internalRow.Key(), internalRow.Value()) + + batch := writer.NewBatch() + batch.Set(internalRow.Key(), internalRow.Value()) + + return writer.ExecuteBatch(batch) } func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) { internalRow := NewInternalRow(key, nil) + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { @@ -729,7 +879,10 @@ func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) { err = cerr } }() - return writer.Delete(internalRow.Key()) + + batch := writer.NewBatch() + batch.Delete(internalRow.Key()) + return writer.ExecuteBatch(batch) } func (udc *UpsideDownCouch) Reader() (index.IndexReader, error) { @@ -758,10 +911,6 @@ func (udc *UpsideDownCouch) fieldIndexOrNewRow(name string) (uint16, *FieldRow) return index, nil } -func IndexTypeConstructor(store store.KVStore, analysisQueue *index.AnalysisQueue) (index.Index, error) { - return NewUpsideDownCouch(store, analysisQueue), nil -} - func init() { - registry.RegisterIndexType(Name, IndexTypeConstructor) + registry.RegisterIndexType(Name, NewUpsideDownCouch) } diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index a7d64a1e..d4a3efd1 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -10,10 +10,11 @@ package upside_down import ( - "os" + "log" "reflect" "regexp" "strconv" + "sync" "testing" "time" @@ -33,17 +34,18 @@ var testAnalyzer = &analysis.Analyzer{ func TestIndexOpenReopen(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -59,7 +61,7 @@ func TestIndexOpenReopen(t *testing.T) { // opening the database should have inserted a version expectedLength := uint64(1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -73,9 +75,10 @@ func TestIndexOpenReopen(t *testing.T) { t.Fatal(err) } - store = boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) - idx = NewUpsideDownCouch(store, analysisQueue) + idx, err = NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) @@ -90,17 +93,18 @@ func TestIndexOpenReopen(t *testing.T) { func TestIndexInsert(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -138,7 +142,7 @@ func TestIndexInsert(t *testing.T) { // should have 4 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the term count, and 1 for the back index entry) expectedLength := uint64(1 + 1 + 1 + 1 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -149,17 +153,18 @@ func TestIndexInsert(t *testing.T) { func TestIndexInsertThenDelete(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -233,7 +238,7 @@ func TestIndexInsertThenDelete(t *testing.T) { // should have 2 rows (1 for version, 1 for schema field, 1 for dictionary row garbage) expectedLength := uint64(1 + 1 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -244,17 +249,18 @@ func TestIndexInsertThenDelete(t *testing.T) { func TestIndexInsertThenUpdate(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -282,7 +288,7 @@ func TestIndexInsertThenUpdate(t *testing.T) { // should have 2 rows (1 for version, 1 for schema field, and 2 for the two term, and 2 for the term counts, and 1 for the back index entry) expectedLength := uint64(1 + 1 + 2 + 2 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -300,7 +306,7 @@ func TestIndexInsertThenUpdate(t *testing.T) { // should have 2 rows (1 for version, 1 for schema field, and 1 for the remaining term, and 2 for the term diciontary, and 1 for the back index entry) expectedLength = uint64(1 + 1 + 1 + 2 + 1) - rowCount, err = idx.rowCount() + rowCount, err = idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -311,17 +317,18 @@ func TestIndexInsertThenUpdate(t *testing.T) { func TestIndexInsertMultiple(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -346,7 +353,7 @@ func TestIndexInsertMultiple(t *testing.T) { // should have 4 rows (1 for version, 1 for schema field, and 2 for single term, and 1 for the term count, and 2 for the back index entries) expectedLength := uint64(1 + 1 + 2 + 1 + 2) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -360,12 +367,13 @@ func TestIndexInsertMultiple(t *testing.T) { t.Fatal(err) } - store = boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) - idx = NewUpsideDownCouch(store, analysisQueue) + idx, err = NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } err = idx.Open() if err != nil { - t.Errorf("error opening index: %v", err) + t.Fatalf("error opening index: %v", err) } defer func() { err := idx.Close() @@ -393,17 +401,18 @@ func TestIndexInsertMultiple(t *testing.T) { func TestIndexInsertWithStore(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -441,7 +450,7 @@ func TestIndexInsertWithStore(t *testing.T) { // should have 6 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the stored field and 1 for the term count, and 1 for the back index entry) expectedLength := uint64(1 + 1 + 1 + 1 + 1 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -479,17 +488,18 @@ func TestIndexInsertWithStore(t *testing.T) { func TestIndexInternalCRUD(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -572,17 +582,18 @@ func TestIndexInternalCRUD(t *testing.T) { func TestIndexBatch(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -669,17 +680,18 @@ func TestIndexBatch(t *testing.T) { func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -733,7 +745,7 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { // 16 for date term counts // 1 for the back index entry expectedLength := uint64(1 + 3 + 1 + (64 / document.DefaultPrecisionStep) + (64 / document.DefaultPrecisionStep) + 3 + 1 + (64 / document.DefaultPrecisionStep) + (64 / document.DefaultPrecisionStep) + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -861,17 +873,18 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { func TestIndexInsertFields(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -920,17 +933,18 @@ func TestIndexInsertFields(t *testing.T) { func TestIndexUpdateComposites(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -958,7 +972,7 @@ func TestIndexUpdateComposites(t *testing.T) { // 4 for the text term count // 1 for the back index entry expectedLength := uint64(1 + 3 + 4 + 2 + 4 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -1005,7 +1019,7 @@ func TestIndexUpdateComposites(t *testing.T) { // should have the same row count as before, plus 4 term dictionary garbage rows expectedLength += 4 - rowCount, err = idx.rowCount() + rowCount, err = idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -1016,17 +1030,18 @@ func TestIndexUpdateComposites(t *testing.T) { func TestIndexFieldsMisc(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -1045,15 +1060,15 @@ func TestIndexFieldsMisc(t *testing.T) { t.Errorf("Error updating index: %v", err) } - fieldName1 := idx.fieldCache.FieldIndexed(0) + fieldName1 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(0) if fieldName1 != "name" { t.Errorf("expected field named 'name', got '%s'", fieldName1) } - fieldName2 := idx.fieldCache.FieldIndexed(1) + fieldName2 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(1) if fieldName2 != "title" { t.Errorf("expected field named 'title', got '%s'", fieldName2) } - fieldName3 := idx.fieldCache.FieldIndexed(2) + fieldName3 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(2) if fieldName3 != "" { t.Errorf("expected field named '', got '%s'", fieldName3) } @@ -1062,17 +1077,18 @@ func TestIndexFieldsMisc(t *testing.T) { func TestIndexTermReaderCompositeFields(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -1122,17 +1138,18 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { func TestIndexDocumentFieldTerms(t *testing.T) { defer func() { - err := os.RemoveAll("test") + err := DestroyTest() if err != nil { t.Fatal(err) } }() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -1183,12 +1200,11 @@ func BenchmarkBatch(b *testing.B) { b.Fatal(err) } - s, err := null.New() + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) err = idx.Open() if err != nil { b.Fatal(err) @@ -1211,3 +1227,59 @@ func BenchmarkBatch(b *testing.B) { } } } + +func TestConcurrentUpdate(t *testing.T) { + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() + + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() + if err != nil { + t.Errorf("error opening index: %v", err) + } + defer func() { + err := idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + // do some concurrent updates + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func(i int) { + doc := document.NewDocument("1") + doc.AddField(document.NewTextFieldWithIndexingOptions(strconv.Itoa(i), []uint64{}, []byte(strconv.Itoa(i)), document.StoreField)) + err := idx.Update(doc) + if err != nil { + t.Errorf("Error updating index: %v", err) + } + wg.Done() + }(i) + } + wg.Wait() + + // now load the name field and see what we get + r, err := idx.Reader() + if err != nil { + log.Fatal(err) + } + + doc, err := r.Document("1") + if err != nil { + log.Fatal(err) + } + + if len(doc.Fields) > 1 { + t.Errorf("expected single field, found %d", len(doc.Fields)) + } +} diff --git a/index_impl.go b/index_impl.go index 6138fad7..93cd33c2 100644 --- a/index_impl.go +++ b/index_impl.go @@ -20,7 +20,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/inmem" + "github.com/blevesearch/bleve/index/store/gtreap" "github.com/blevesearch/bleve/index/upside_down" "github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/search" @@ -51,28 +51,18 @@ func newMemIndex(indexType string, mapping *IndexMapping) (*indexImpl, error) { rv := indexImpl{ path: "", m: mapping, - meta: newIndexMeta(indexType, inmem.Name, nil), + meta: newIndexMeta(indexType, gtreap.Name, nil), stats: &IndexStat{}, } - storeConstructor := registry.KVStoreConstructorByName(rv.meta.Storage) - if storeConstructor == nil { - return nil, ErrorUnknownStorageType - } - // now open the store - var err error - rv.s, err = storeConstructor(nil) - if err != nil { - return nil, err - } - // open the index indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) if indexTypeConstructor == nil { return nil, ErrorUnknownIndexType } - rv.i, err = indexTypeConstructor(rv.s, Config.analysisQueue) + var err error + rv.i, err = indexTypeConstructor(rv.meta.Storage, nil, Config.analysisQueue) if err != nil { return nil, err } @@ -120,10 +110,6 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore meta: newIndexMeta(indexType, kvstore, kvconfig), stats: &IndexStat{}, } - storeConstructor := registry.KVStoreConstructorByName(rv.meta.Storage) - if storeConstructor == nil { - return nil, ErrorUnknownStorageType - } // at this point there is hope that we can be successful, so save index meta err = rv.meta.Save(path) if err != nil { @@ -133,24 +119,21 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore kvconfig["error_if_exists"] = true kvconfig["path"] = indexStorePath(path) - // now create the store - rv.s, err = storeConstructor(kvconfig) - if err != nil { - return nil, err - } - // open the index indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) if indexTypeConstructor == nil { return nil, ErrorUnknownIndexType } - rv.i, err = indexTypeConstructor(rv.s, Config.analysisQueue) + rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue) if err != nil { return nil, err } err = rv.i.Open() if err != nil { + if err == index.ErrorUnknownStorageType { + return nil, ErrorUnknownStorageType + } return nil, err } rv.stats.indexStat = rv.i.Stats() @@ -173,7 +156,6 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore } func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) { - rv = &indexImpl{ path: path, stats: &IndexStat{}, @@ -189,11 +171,6 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde rv.meta.IndexType = upside_down.Name } - storeConstructor := registry.KVStoreConstructorByName(rv.meta.Storage) - if storeConstructor == nil { - return nil, ErrorUnknownStorageType - } - storeConfig := rv.meta.Config if storeConfig == nil { storeConfig = map[string]interface{}{} @@ -206,24 +183,21 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde storeConfig[rck] = rcv } - // now open the store - rv.s, err = storeConstructor(storeConfig) - if err != nil { - return nil, err - } - // open the index indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) if indexTypeConstructor == nil { return nil, ErrorUnknownIndexType } - rv.i, err = indexTypeConstructor(rv.s, Config.analysisQueue) + rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) if err != nil { return nil, err } err = rv.i.Open() if err != nil { + if err == index.ErrorUnknownStorageType { + return nil, ErrorUnknownStorageType + } return nil, err } rv.stats.indexStat = rv.i.Stats() diff --git a/index_meta.go b/index_meta.go index ad0d223e..a6599529 100644 --- a/index_meta.go +++ b/index_meta.go @@ -13,6 +13,8 @@ import ( "encoding/json" "io/ioutil" "os" + + "github.com/blevesearch/bleve/index/upside_down" ) const metaFilename = "index_meta.json" @@ -45,6 +47,9 @@ func openIndexMeta(path string) (*indexMeta, error) { if err != nil { return nil, ErrorIndexMetaCorrupt } + if im.IndexType == "" { + im.IndexType = upside_down.Name + } return &im, nil } diff --git a/registry/index_type.go b/registry/index_type.go index 503c760f..69d1e5de 100644 --- a/registry/index_type.go +++ b/registry/index_type.go @@ -13,7 +13,6 @@ import ( "fmt" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store" ) func RegisterIndexType(name string, constructor IndexTypeConstructor) { @@ -24,7 +23,7 @@ func RegisterIndexType(name string, constructor IndexTypeConstructor) { index_types[name] = constructor } -type IndexTypeConstructor func(store.KVStore, *index.AnalysisQueue) (index.Index, error) +type IndexTypeConstructor func(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) type IndexTypeRegistry map[string]IndexTypeConstructor func IndexTypeConstructorByName(name string) IndexTypeConstructor { @@ -32,16 +31,10 @@ func IndexTypeConstructorByName(name string) IndexTypeConstructor { } func IndexTypesAndInstances() ([]string, []string) { - emptyConfig := map[string]interface{}{} types := make([]string, 0) instances := make([]string, 0) - for name, cons := range stores { - _, err := cons(emptyConfig) - if err == nil { - instances = append(instances, name) - } else { - types = append(types, name) - } + for name, _ := range stores { + types = append(types, name) } return types, instances } diff --git a/registry/store.go b/registry/store.go index cff7c88e..229f534a 100644 --- a/registry/store.go +++ b/registry/store.go @@ -23,7 +23,7 @@ func RegisterKVStore(name string, constructor KVStoreConstructor) { stores[name] = constructor } -type KVStoreConstructor func(config map[string]interface{}) (store.KVStore, error) +type KVStoreConstructor func(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) type KVStoreRegistry map[string]KVStoreConstructor func KVStoreConstructorByName(name string) KVStoreConstructor { @@ -31,16 +31,10 @@ func KVStoreConstructorByName(name string) KVStoreConstructor { } func KVStoreTypesAndInstances() ([]string, []string) { - emptyConfig := map[string]interface{}{} types := make([]string, 0) instances := make([]string, 0) - for name, cons := range stores { - _, err := cons(emptyConfig) - if err == nil { - instances = append(instances, name) - } else { - types = append(types, name) - } + for name, _ := range stores { + types = append(types, name) } return types, instances } diff --git a/search/searchers/base_test.go b/search/searchers/base_test.go index d4ce26e9..54363ee9 100644 --- a/search/searchers/base_test.go +++ b/search/searchers/base_test.go @@ -17,17 +17,20 @@ import ( "github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store/inmem" + "github.com/blevesearch/bleve/index/store/gtreap" "github.com/blevesearch/bleve/index/upside_down" ) var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen()) func init() { - inMemStore, _ := inmem.New() analysisQueue := index.NewAnalysisQueue(1) - twoDocIndex = upside_down.NewUpsideDownCouch(inMemStore, analysisQueue) - err := twoDocIndex.Open() + var err error + twoDocIndex, err = upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue) + if err != nil { + panic(err) + } + err = twoDocIndex.Open() if err != nil { panic(err) } diff --git a/search/searchers/search_boolean_test.go b/search/searchers/search_boolean_test.go index 875424b9..d7926b33 100644 --- a/search/searchers/search_boolean_test.go +++ b/search/searchers/search_boolean_test.go @@ -17,6 +17,9 @@ import ( func TestBooleanSearch(t *testing.T) { + if twoDocIndex == nil { + t.Fatal("its null") + } twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) diff --git a/search/searchers/search_term_test.go b/search/searchers/search_term_test.go index f3072be7..cf6f04b6 100644 --- a/search/searchers/search_term_test.go +++ b/search/searchers/search_term_test.go @@ -15,7 +15,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store/inmem" + "github.com/blevesearch/bleve/index/store/gtreap" "github.com/blevesearch/bleve/index/upside_down" ) @@ -26,10 +26,12 @@ func TestTermSearcher(t *testing.T) { var queryBoost = 3.0 var queryExplain = true - inMemStore, _ := inmem.New() analysisQueue := index.NewAnalysisQueue(1) - i := upside_down.NewUpsideDownCouch(inMemStore, analysisQueue) - err := i.Open() + i, err := upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = i.Open() if err != nil { t.Fatal(err) }