From 900f1b4a678ec557771e64c43ab3b50f99c0db91 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 23 Sep 2015 11:25:47 -0700 Subject: [PATCH 01/17] major kvstore interface and impl overhaul clarified the interface contract --- config/config.go | 1 - index/index.go | 4 + index/store/batch.go | 56 +- index/store/boltdb/iterator.go | 17 +- index/store/boltdb/reader.go | 32 +- index/store/boltdb/store.go | 70 +-- index/store/boltdb/store_test.go | 299 +++-------- index/store/boltdb/writer.go | 74 ++- index/store/goleveldb/batch.go | 35 +- index/store/goleveldb/config.go | 52 ++ index/store/goleveldb/iterator.go | 54 +- index/store/goleveldb/reader.go | 46 +- index/store/goleveldb/store.go | 160 ++---- index/store/goleveldb/store_test.go | 352 ++----------- index/store/goleveldb/util.go | 26 - index/store/goleveldb/writer.go | 56 +- index/store/gtreap/gtreap_test.go | 259 ---------- index/store/gtreap/iterator.go | 20 +- index/store/gtreap/reader.go | 27 +- index/store/gtreap/{gtreap.go => store.go} | 57 +-- index/store/gtreap/store_test.go | 130 +++++ index/store/gtreap/writer.go | 89 ++-- index/store/inmem/iterator.go | 70 --- index/store/inmem/reader.go | 40 -- index/store/inmem/store.go | 106 ---- index/store/inmem/store_test.go | 254 --------- index/store/inmem/writer.go | 57 --- index/store/kvstore.go | 119 ++++- index/store/merge.go | 69 +-- index/store/metrics/batch.go | 26 + index/store/metrics/iterator.go | 44 ++ index/store/metrics/metrics.go | 482 ------------------ index/store/metrics/metrics_test.go | 249 +-------- index/store/metrics/reader.go | 40 ++ index/store/metrics/store.go | 196 +++++++ index/store/metrics/store_test.go | 119 +++++ index/store/metrics/util.go | 72 +++ index/store/metrics/writer.go | 38 ++ index/store/null/null.go | 150 ++---- index/store/null/null_test.go | 18 +- index/store/test/bytes.go | 274 ++++++++++ index/store/test/crud.go | 98 ++++ index/store/test/isolation.go | 177 +++++++ index/store/test/iterator.go | 289 +++++++++++ index/store/test/merge.go | 108 ++++ index/upside_down/analysis_test.go | 5 +- index/upside_down/benchmark_boltdb_test.go | 35 +- index/upside_down/benchmark_common_test.go | 21 +- index/upside_down/benchmark_goleveldb_test.go | 35 +- index/upside_down/benchmark_gtreap_test.go | 33 +- index/upside_down/benchmark_inmem_test.go | 75 --- index/upside_down/benchmark_null_test.go | 33 +- index/upside_down/dump.go | 29 +- index/upside_down/dump_test.go | 17 +- index/upside_down/field_dict.go | 25 +- index/upside_down/field_dict_test.go | 17 +- index/upside_down/index_reader.go | 32 +- index/upside_down/reader.go | 61 +-- index/upside_down/reader_test.go | 33 +- index/upside_down/upside_down.go | 212 +++++--- index/upside_down/upside_down_test.go | 270 ++++------ index_impl.go | 50 +- index_meta.go | 5 + registry/index_type.go | 13 +- registry/store.go | 12 +- search/searchers/base_test.go | 11 +- search/searchers/search_boolean_test.go | 3 + search/searchers/search_term_test.go | 10 +- 68 files changed, 2670 insertions(+), 3378 deletions(-) create mode 100644 index/store/goleveldb/config.go delete mode 100644 index/store/goleveldb/util.go delete mode 100644 index/store/gtreap/gtreap_test.go rename index/store/gtreap/{gtreap.go => store.go} (65%) create mode 100644 index/store/gtreap/store_test.go delete mode 100644 index/store/inmem/iterator.go delete mode 100644 index/store/inmem/reader.go delete mode 100644 index/store/inmem/store.go delete mode 100644 index/store/inmem/store_test.go delete mode 100644 index/store/inmem/writer.go create mode 100644 index/store/metrics/batch.go create mode 100644 index/store/metrics/iterator.go delete mode 100644 index/store/metrics/metrics.go create mode 100644 index/store/metrics/reader.go create mode 100644 index/store/metrics/store.go create mode 100644 index/store/metrics/store_test.go create mode 100644 index/store/metrics/util.go create mode 100644 index/store/metrics/writer.go create mode 100644 index/store/test/bytes.go create mode 100644 index/store/test/crud.go create mode 100644 index/store/test/isolation.go create mode 100644 index/store/test/iterator.go create mode 100644 index/store/test/merge.go delete mode 100644 index/upside_down/benchmark_inmem_test.go diff --git a/config/config.go b/config/config.go index bdb3036e..e033dad8 100644 --- a/config/config.go +++ b/config/config.go @@ -86,7 +86,6 @@ import ( _ "github.com/blevesearch/bleve/index/store/boltdb" _ "github.com/blevesearch/bleve/index/store/goleveldb" _ "github.com/blevesearch/bleve/index/store/gtreap" - _ "github.com/blevesearch/bleve/index/store/inmem" // index types _ "github.com/blevesearch/bleve/index/upside_down" diff --git a/index/index.go b/index/index.go index 4ad16679..aa7b9a77 100644 --- a/index/index.go +++ b/index/index.go @@ -16,6 +16,8 @@ import ( "github.com/blevesearch/bleve/document" ) +var ErrorUnknownStorageType = fmt.Errorf("unknown storage type") + type Index interface { Open() error Close() error @@ -45,6 +47,8 @@ type IndexReader interface { DocIDReader(start, end string) (DocIDReader, error) FieldDict(field string) (FieldDict, error) + + // FieldDictRange is currently defined to include the start and end terms FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error) FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) diff --git a/index/store/batch.go b/index/store/batch.go index 4a3c76bc..07a22ddd 100644 --- a/index/store/batch.go +++ b/index/store/batch.go @@ -15,55 +15,39 @@ type op struct { } type EmulatedBatch struct { - w KVWriter - ops []*op - merge *EmulatedMerge + Ops []*op + Merger *EmulatedMerge } -func NewEmulatedBatch(w KVWriter, mo MergeOperator) *EmulatedBatch { +func NewEmulatedBatch(mo MergeOperator) *EmulatedBatch { return &EmulatedBatch{ - w: w, - ops: make([]*op, 0, 1000), - merge: NewEmulatedMerge(mo), + Ops: make([]*op, 0, 1000), + Merger: NewEmulatedMerge(mo), } } func (b *EmulatedBatch) Set(key, val []byte) { - b.ops = append(b.ops, &op{key, val}) + ck := make([]byte, len(key)) + copy(ck, key) + cv := make([]byte, len(val)) + copy(cv, val) + b.Ops = append(b.Ops, &op{ck, cv}) } func (b *EmulatedBatch) Delete(key []byte) { - b.ops = append(b.ops, &op{key, nil}) + ck := make([]byte, len(key)) + copy(ck, key) + b.Ops = append(b.Ops, &op{ck, nil}) } func (b *EmulatedBatch) Merge(key, val []byte) { - b.merge.Merge(key, val) + ck := make([]byte, len(key)) + copy(ck, key) + cv := make([]byte, len(val)) + copy(cv, val) + b.Merger.Merge(key, val) } -func (b *EmulatedBatch) Execute() error { - // first process merges - err := b.merge.Execute(b.w) - if err != nil { - return err - } - - // now apply all the ops - for _, op := range b.ops { - if op.V != nil { - err := b.w.Set(op.K, op.V) - if err != nil { - return err - } - } else { - err := b.w.Delete(op.K) - if err != nil { - return err - } - } - } - return nil -} - -func (b *EmulatedBatch) Close() error { - return nil +func (b *EmulatedBatch) Reset() { + b.Ops = b.Ops[:0] } diff --git a/index/store/boltdb/iterator.go b/index/store/boltdb/iterator.go index 0cf87049..bccfeaee 100644 --- a/index/store/boltdb/iterator.go +++ b/index/store/boltdb/iterator.go @@ -10,6 +10,8 @@ package boltdb import ( + "bytes" + "github.com/boltdb/bolt" ) @@ -17,24 +19,31 @@ type Iterator struct { store *Store tx *bolt.Tx cursor *bolt.Cursor + prefix []byte + start []byte + end []byte valid bool key []byte val []byte } -func (i *Iterator) SeekFirst() { - i.key, i.val = i.cursor.First() +func (i *Iterator) updateValid() { i.valid = (i.key != nil) + if i.valid && i.prefix != nil { + i.valid = bytes.HasPrefix(i.key, i.prefix) + } else if i.end != nil { + i.valid = bytes.Compare(i.key, i.end) < 0 + } } func (i *Iterator) Seek(k []byte) { i.key, i.val = i.cursor.Seek(k) - i.valid = (i.key != nil) + i.updateValid() } func (i *Iterator) Next() { i.key, i.val = i.cursor.Next() - i.valid = (i.key != nil) + i.updateValid() } func (i *Iterator) Current() ([]byte, []byte, bool) { diff --git a/index/store/boltdb/reader.go b/index/store/boltdb/reader.go index 2179ebb5..71815a72 100644 --- a/index/store/boltdb/reader.go +++ b/index/store/boltdb/reader.go @@ -19,16 +19,17 @@ type Reader struct { tx *bolt.Tx } -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - func (r *Reader) Get(key []byte) ([]byte, error) { - rv := r.tx.Bucket([]byte(r.store.bucket)).Get(key) + var rv []byte + v := r.tx.Bucket([]byte(r.store.bucket)).Get(key) + if v != nil { + rv = make([]byte, len(v)) + copy(rv, v) + } return rv, nil } -func (r *Reader) Iterator(key []byte) store.KVIterator { +func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { b := r.tx.Bucket([]byte(r.store.bucket)) cursor := b.Cursor() @@ -36,9 +37,26 @@ func (r *Reader) Iterator(key []byte) store.KVIterator { store: r.store, tx: r.tx, cursor: cursor, + prefix: prefix, } - rv.Seek(key) + rv.Seek(prefix) + return rv +} + +func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { + b := r.tx.Bucket([]byte(r.store.bucket)) + cursor := b.Cursor() + + rv := &Iterator{ + store: r.store, + tx: r.tx, + cursor: cursor, + start: start, + end: end, + } + + rv.Seek(start) return rv } diff --git a/index/store/boltdb/store.go b/index/store/boltdb/store.go index cd03cd16..ad21b403 100644 --- a/index/store/boltdb/store.go +++ b/index/store/boltdb/store.go @@ -11,7 +11,6 @@ package boltdb import ( "fmt" - "sync" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" @@ -24,40 +23,41 @@ type Store struct { path string bucket string db *bolt.DB - writer sync.Mutex mo store.MergeOperator } -func New(path string, bucket string) *Store { - rv := Store{ - path: path, - bucket: bucket, +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + path, ok := config["path"].(string) + if !ok { + return nil, fmt.Errorf("must specify path") } - return &rv -} -func (bs *Store) Open() error { + bucket, ok := config["bucket"].(string) + if !ok { + bucket = "bleve" + } - var err error - bs.db, err = bolt.Open(bs.path, 0600, nil) + db, err := bolt.Open(path, 0600, nil) if err != nil { - return err + return nil, err } - err = bs.db.Update(func(tx *bolt.Tx) error { - _, err := tx.CreateBucketIfNotExists([]byte(bs.bucket)) + err = db.Update(func(tx *bolt.Tx) error { + _, err := tx.CreateBucketIfNotExists([]byte(bucket)) return err }) if err != nil { - return err + return nil, err } - return nil -} - -func (bs *Store) SetMergeOperator(mo store.MergeOperator) { - bs.mo = mo + rv := Store{ + path: path, + bucket: bucket, + db: db, + mo: mo, + } + return &rv, nil } func (bs *Store) Close() error { @@ -76,37 +76,11 @@ func (bs *Store) Reader() (store.KVReader, error) { } func (bs *Store) Writer() (store.KVWriter, error) { - bs.writer.Lock() - tx, err := bs.db.Begin(true) - if err != nil { - bs.writer.Unlock() - return nil, err - } - reader := &Reader{ - store: bs, - tx: tx, - } return &Writer{ - store: bs, - tx: tx, - reader: reader, + store: bs, }, nil } -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - - bucket, ok := config["bucket"].(string) - if !ok { - bucket = "bleve" - } - - return New(path, bucket), nil -} - func init() { - registry.RegisterKVStore(Name, StoreConstructor) + registry.RegisterKVStore(Name, New) } diff --git a/index/store/boltdb/store_test.go b/index/store/boltdb/store_test.go index d188de28..d5684983 100644 --- a/index/store/boltdb/store_test.go +++ b/index/store/boltdb/store_test.go @@ -10,17 +10,19 @@ package boltdb import ( - "fmt" "os" - "reflect" "testing" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/test" ) -func TestStore(t *testing.T) { - s := New("test", "bleve") - err := s.Open() +func open(mo store.MergeOperator) (store.KVStore, error) { + return New(mo, map[string]interface{}{"path": "test"}) +} + +func TestBoltDBKVCrud(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } @@ -31,12 +33,11 @@ func TestStore(t *testing.T) { } }() - CommonTestKVStore(t, s) + test.CommonTestKVCrud(t, s) } -func TestReaderIsolation(t *testing.T) { - s := New("test", "bleve") - err := s.Open() +func TestBoltDBReaderIsolation(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } @@ -47,252 +48,80 @@ func TestReaderIsolation(t *testing.T) { } }() - CommonTestReaderIsolation(t, s) + test.CommonTestReaderIsolation(t, s) } -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) +func TestBoltDBReaderOwnsGetBytes(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } defer func() { - err := reader.Close() + err := os.RemoveAll("test") if err != nil { t.Fatal(err) } }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } + test.CommonTestReaderOwnsGetBytes(t, s) } -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) +func TestBoltDBWriterOwnsBytes(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } - - // ************************************************** - // this is a hack to try to pre-emptively overflow - // boltdb writes *MAY* block a long reader - // in particular, if the write requires additional - // allocation, it must acquire the same lock as - // the reader, thus cannot continue until that - // reader is closed. - // in general this is not a problem for bleve - // (though it may affect performance in some cases) - // but it is a problem for this test which attemps - // to easily verify that readers are isolated - // this hack writes enough initial data such that - // the subsequent writes do not require additional - // space - hackSize := 1000 - for i := 0; i < hackSize; i++ { - k := fmt.Sprintf("x%d", i) - err = writer.Set([]byte(k), []byte("filler")) - if err != nil { - t.Fatal(err) - } - } - // ************************************************** - - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } defer func() { - err := reader.Close() + err := os.RemoveAll("test") if err != nil { t.Fatal(err) } }() - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != hackSize+1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != hackSize+2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != hackSize+1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestBoltDBPrefixIterator(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestPrefixIterator(t, s) +} + +func TestBoltDBRangeIterator(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestRangeIterator(t, s) +} + +func TestBoltDBMerge(t *testing.T) { + s, err := open(&test.TestMergeCounter{}) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestMerge(t, s) } diff --git a/index/store/boltdb/writer.go b/index/store/boltdb/writer.go index 0a70bfff..3972ab88 100644 --- a/index/store/boltdb/writer.go +++ b/index/store/boltdb/writer.go @@ -10,41 +10,61 @@ package boltdb import ( + "fmt" + "github.com/blevesearch/bleve/index/store" - "github.com/boltdb/bolt" ) type Writer struct { - store *Store - tx *bolt.Tx - reader *Reader -} - -func (w *Writer) Set(key, val []byte) error { - return w.tx.Bucket([]byte(w.store.bucket)).Put(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.tx.Bucket([]byte(w.store.bucket)).Delete(key) + store *Store } func (w *Writer) NewBatch() store.KVBatch { - return store.NewEmulatedBatch(w, w.store.mo) + return store.NewEmulatedBatch(w.store.mo) +} + +func (w *Writer) ExecuteBatch(batch store.KVBatch) error { + + emulatedBatch, ok := batch.(*store.EmulatedBatch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + tx, err := w.store.db.Begin(true) + if err != nil { + return err + } + + for k, mergeOps := range emulatedBatch.Merger.Merges { + kb := []byte(k) + existingVal := tx.Bucket([]byte(w.store.bucket)).Get(kb) + mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) + if !fullMergeOk { + return fmt.Errorf("merge operator returned failure") + } + err = tx.Bucket([]byte(w.store.bucket)).Put(kb, mergedVal) + if err != nil { + return err + } + } + + for _, op := range emulatedBatch.Ops { + if op.V != nil { + err := tx.Bucket([]byte(w.store.bucket)).Put(op.K, op.V) + if err != nil { + return err + } + } else { + err := tx.Bucket([]byte(w.store.bucket)).Delete(op.K) + if err != nil { + return err + } + } + } + + return tx.Commit() } func (w *Writer) Close() error { - defer w.store.writer.Unlock() - return w.tx.Commit() -} - -func (w *Writer) BytesSafeAfterClose() bool { - return w.reader.BytesSafeAfterClose() -} - -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.reader.Get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.reader.Iterator(key) + return nil } diff --git a/index/store/goleveldb/batch.go b/index/store/goleveldb/batch.go index b56a3bb7..43d86b0b 100644 --- a/index/store/goleveldb/batch.go +++ b/index/store/goleveldb/batch.go @@ -15,7 +15,7 @@ import ( ) type Batch struct { - w *Writer + store *Store merge *store.EmulatedMerge batch *leveldb.Batch } @@ -32,22 +32,23 @@ func (b *Batch) Merge(key, val []byte) { b.merge.Merge(key, val) } -func (b *Batch) Execute() error { +// func (b *Batch) Execute() error { - // first process merges - ops, err := b.merge.ExecuteDeferred(b.w) - if err != nil { - return err - } - for _, op := range ops { - b.batch.Put(op.K, op.V) - } +// // first process merges +// ops, err := b.merge.ExecuteDeferred(b.w) +// if err != nil { +// return err +// } +// for _, op := range ops { +// b.batch.Put(op.K, op.V) +// } - wopts := defaultWriteOptions() - err = b.w.store.db.Write(b.batch, wopts) - return err -} - -func (b *Batch) Close() error { - return nil +// wopts := defaultWriteOptions() +// err = b.w.store.db.Write(b.batch, wopts) +// return err +// } + +func (b *Batch) Reset() { + b.batch.Reset() + b.merge = store.NewEmulatedMerge(b.store.mo) } diff --git a/index/store/goleveldb/config.go b/index/store/goleveldb/config.go new file mode 100644 index 00000000..102debb7 --- /dev/null +++ b/index/store/goleveldb/config.go @@ -0,0 +1,52 @@ +package goleveldb + +import ( + "github.com/syndtr/goleveldb/leveldb/filter" + "github.com/syndtr/goleveldb/leveldb/opt" +) + +func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) { + + ro, ok := config["read_only"].(bool) + if ok { + o.ReadOnly = ro + } + + cim, ok := config["create_if_missing"].(bool) + if ok { + o.ErrorIfMissing = !cim + } + + eie, ok := config["error_if_exists"].(bool) + if ok { + o.ErrorIfExist = eie + } + + wbs, ok := config["write_buffer_size"].(float64) + if ok { + o.WriteBuffer = int(wbs) + } + + bs, ok := config["block_size"].(float64) + if ok { + o.BlockSize = int(bs) + } + + bri, ok := config["block_restart_interval"].(float64) + if ok { + o.BlockRestartInterval = int(bri) + } + + lcc, ok := config["lru_cache_capacity"].(float64) + if ok { + o.BlockCacheCapacity = int(lcc) + } + + bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) + if ok { + bf := filter.NewBloomFilter(int(bfbpk)) + o.Filter = bf + } + + return o, nil +} diff --git a/index/store/goleveldb/iterator.go b/index/store/goleveldb/iterator.go index 5c8a955e..7ed4ffe3 100644 --- a/index/store/goleveldb/iterator.go +++ b/index/store/goleveldb/iterator.go @@ -9,53 +9,18 @@ package goleveldb -import ( - "github.com/syndtr/goleveldb/leveldb" - "github.com/syndtr/goleveldb/leveldb/iterator" -) +import "github.com/syndtr/goleveldb/leveldb/iterator" type Iterator struct { store *Store iterator iterator.Iterator - copyk []byte - copyv []byte -} - -func newIterator(store *Store) *Iterator { - ropts := defaultReadOptions() - iter := store.db.NewIterator(nil, ropts) - rv := Iterator{ - store: store, - iterator: iter, - } - return &rv -} - -func newIteratorWithSnapshot(store *Store, snapshot *leveldb.Snapshot) *Iterator { - options := defaultReadOptions() - iter := snapshot.NewIterator(nil, options) - rv := Iterator{ - store: store, - iterator: iter, - } - return &rv -} - -func (ldi *Iterator) SeekFirst() { - ldi.copyk = nil - ldi.copyv = nil - ldi.iterator.First() } func (ldi *Iterator) Seek(key []byte) { - ldi.copyk = nil - ldi.copyv = nil ldi.iterator.Seek(key) } func (ldi *Iterator) Next() { - ldi.copyk = nil - ldi.copyv = nil ldi.iterator.Next() } @@ -67,21 +32,11 @@ func (ldi *Iterator) Current() ([]byte, []byte, bool) { } func (ldi *Iterator) Key() []byte { - k := ldi.iterator.Key() - if ldi.copyk == nil { - ldi.copyk = make([]byte, len(k)) - copy(ldi.copyk, k) - } - return ldi.copyk + return ldi.iterator.Key() } func (ldi *Iterator) Value() []byte { - v := ldi.iterator.Value() - if ldi.copyv == nil { - ldi.copyv = make([]byte, len(v)) - copy(ldi.copyv, v) - } - return ldi.copyv + return ldi.iterator.Value() } func (ldi *Iterator) Valid() bool { @@ -89,7 +44,6 @@ func (ldi *Iterator) Valid() bool { } func (ldi *Iterator) Close() error { - ldi.copyk = nil - ldi.copyv = nil + ldi.iterator.Release() return nil } diff --git a/index/store/goleveldb/reader.go b/index/store/goleveldb/reader.go index 23316a22..7807d571 100644 --- a/index/store/goleveldb/reader.go +++ b/index/store/goleveldb/reader.go @@ -12,6 +12,7 @@ package goleveldb import ( "github.com/blevesearch/bleve/index/store" "github.com/syndtr/goleveldb/leveldb" + "github.com/syndtr/goleveldb/leveldb/util" ) type Reader struct { @@ -19,26 +20,37 @@ type Reader struct { snapshot *leveldb.Snapshot } -func newReader(store *Store) (*Reader, error) { - snapshot, _ := store.db.GetSnapshot() - return &Reader{ - store: store, - snapshot: snapshot, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - func (r *Reader) Get(key []byte) ([]byte, error) { - return r.store.getWithSnapshot(key, r.snapshot) + b, err := r.snapshot.Get(key, r.store.defaultReadOptions) + if err == leveldb.ErrNotFound { + return nil, nil + } + return b, err } -func (r *Reader) Iterator(key []byte) store.KVIterator { - rv := newIteratorWithSnapshot(r.store, r.snapshot) - rv.Seek(key) - return rv +func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator { + byteRange := util.BytesPrefix(prefix) + iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions) + iter.First() + rv := Iterator{ + store: r.store, + iterator: iter, + } + return &rv +} + +func (r *Reader) RangeIterator(start, end []byte) store.KVIterator { + byteRange := &util.Range{ + Start: start, + Limit: end, + } + iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions) + iter.First() + rv := Iterator{ + store: r.store, + iterator: iter, + } + return &rv } func (r *Reader) Close() error { diff --git a/index/store/goleveldb/store.go b/index/store/goleveldb/store.go index 3ab874fd..8fffc451 100644 --- a/index/store/goleveldb/store.go +++ b/index/store/goleveldb/store.go @@ -11,167 +11,73 @@ package goleveldb import ( "fmt" - "sync" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" "github.com/syndtr/goleveldb/leveldb" - "github.com/syndtr/goleveldb/leveldb/filter" "github.com/syndtr/goleveldb/leveldb/opt" ) const Name = "goleveldb" type Store struct { - path string - opts *opt.Options - db *leveldb.DB - writer sync.Mutex - mo store.MergeOperator + path string + opts *opt.Options + db *leveldb.DB + mo store.MergeOperator + + defaultWriteOptions *opt.WriteOptions + defaultReadOptions *opt.ReadOptions } -func New(path string, config map[string]interface{}) (*Store, error) { - rv := Store{ - path: path, - opts: &opt.Options{}, +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + + path, ok := config["path"].(string) + if !ok { + return nil, fmt.Errorf("must specify path") } - _, err := applyConfig(rv.opts, config) + opts, err := applyConfig(&opt.Options{}, config) if err != nil { return nil, err } - return &rv, nil -} - -func (ldbs *Store) Open() error { - var err error - ldbs.db, err = leveldb.OpenFile(ldbs.path, ldbs.opts) + db, err := leveldb.OpenFile(path, opts) if err != nil { - return err + return nil, err } - return nil -} -func (ldbs *Store) SetMergeOperator(mo store.MergeOperator) { - ldbs.mo = mo -} - -func (ldbs *Store) get(key []byte) ([]byte, error) { - options := defaultReadOptions() - b, err := ldbs.db.Get(key, options) - if err == leveldb.ErrNotFound { - return nil, nil + rv := Store{ + path: path, + opts: opts, + db: db, + mo: mo, + defaultReadOptions: &opt.ReadOptions{}, + defaultWriteOptions: &opt.WriteOptions{}, } - return b, err -} + rv.defaultWriteOptions.Sync = true -func (ldbs *Store) getWithSnapshot(key []byte, snapshot *leveldb.Snapshot) ([]byte, error) { - options := defaultReadOptions() - b, err := snapshot.Get(key, options) - if err == leveldb.ErrNotFound { - return nil, nil - } - return b, err -} - -func (ldbs *Store) set(key, val []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.setlocked(key, val) -} - -func (ldbs *Store) setlocked(key, val []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Put(key, val, options) - return err -} - -func (ldbs *Store) delete(key []byte) error { - ldbs.writer.Lock() - defer ldbs.writer.Unlock() - return ldbs.deletelocked(key) -} - -func (ldbs *Store) deletelocked(key []byte) error { - options := defaultWriteOptions() - err := ldbs.db.Delete(key, options) - return err + return &rv, nil } func (ldbs *Store) Close() error { return ldbs.db.Close() } -func (ldbs *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(ldbs) - rv.Seek(key) - return rv -} - func (ldbs *Store) Reader() (store.KVReader, error) { - return newReader(ldbs) + snapshot, _ := ldbs.db.GetSnapshot() + return &Reader{ + store: ldbs, + snapshot: snapshot, + }, nil } func (ldbs *Store) Writer() (store.KVWriter, error) { - return newWriter(ldbs) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - path, ok := config["path"].(string) - if !ok { - return nil, fmt.Errorf("must specify path") - } - return New(path, config) + return &Writer{ + store: ldbs, + }, nil } func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func applyConfig(o *opt.Options, config map[string]interface{}) ( - *opt.Options, error) { - - ro, ok := config["read_only"].(bool) - if ok { - o.ReadOnly = ro - } - - cim, ok := config["create_if_missing"].(bool) - if ok { - o.ErrorIfMissing = !cim - } - - eie, ok := config["error_if_exists"].(bool) - if ok { - o.ErrorIfExist = eie - } - - wbs, ok := config["write_buffer_size"].(float64) - if ok { - o.WriteBuffer = int(wbs) - } - - bs, ok := config["block_size"].(float64) - if ok { - o.BlockSize = int(bs) - } - - bri, ok := config["block_restart_interval"].(float64) - if ok { - o.BlockRestartInterval = int(bri) - } - - lcc, ok := config["lru_cache_capacity"].(float64) - if ok { - o.BlockCacheCapacity = int(lcc) - } - - bfbpk, ok := config["bloom_filter_bits_per_key"].(float64) - if ok { - bf := filter.NewBloomFilter(int(bfbpk)) - o.Filter = bf - } - - return o, nil + registry.RegisterKVStore(Name, New) } diff --git a/index/store/goleveldb/store_test.go b/index/store/goleveldb/store_test.go index 65db6481..1a8368af 100644 --- a/index/store/goleveldb/store_test.go +++ b/index/store/goleveldb/store_test.go @@ -11,17 +11,24 @@ package goleveldb import ( "os" - "reflect" "testing" "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/test" ) -var leveldbTestOptions = map[string]interface{}{ - "create_if_missing": true, +func open(mo store.MergeOperator) (store.KVStore, error) { + return New(mo, map[string]interface{}{ + "path": "test", + "create_if_missing": true, + }) } -func TestLevelDBStore(t *testing.T) { +func TestGoLevelDBKVCrud(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } defer func() { err := os.RemoveAll("test") if err != nil { @@ -29,25 +36,14 @@ func TestLevelDBStore(t *testing.T) { } }() - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) + test.CommonTestKVCrud(t, s) } -func TestLevelDBStoreIterator(t *testing.T) { +func TestGoLevelDBReaderIsolation(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } defer func() { err := os.RemoveAll("test") if err != nil { @@ -55,25 +51,14 @@ func TestLevelDBStoreIterator(t *testing.T) { } }() - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStoreIterator(t, s) + test.CommonTestReaderIsolation(t, s) } -func TestReaderIsolation(t *testing.T) { +func TestGoLevelDBReaderOwnsGetBytes(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } defer func() { err := os.RemoveAll("test") if err != nil { @@ -81,308 +66,65 @@ func TestReaderIsolation(t *testing.T) { } }() - s, err := New("test", leveldbTestOptions) - if err != nil { - t.Fatal(err) - } - err = s.Open() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestReaderIsolation(t, s) + test.CommonTestReaderOwnsGetBytes(t, s) } -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) +func TestGoLevelDBWriterOwnsBytes(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } defer func() { - err := reader.Close() + err := os.RemoveAll("test") if err != nil { t.Fatal(err) } }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } + test.CommonTestWriterOwnsBytes(t, s) } -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) +func TestGoLevelDBPrefixIterator(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } defer func() { - err := reader.Close() + err := os.RemoveAll("test") if err != nil { t.Fatal(err) } }() - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it = newReader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it = reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - + test.CommonTestPrefixIterator(t, s) } -func CommonTestKVStoreIterator(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - - data := []struct { - k []byte - v []byte - }{ - {[]byte("t\x09\x00paint\xff/sponsor/gold/thumbtack/"), []byte("a")}, - {[]byte("t\x09\x00party\xff/sponsor/gold/thumbtack/"), []byte("a")}, - {[]byte("t\x09\x00personal\xff/sponsor/gold/thumbtack/"), []byte("a")}, - {[]byte("t\x09\x00plan\xff/sponsor/gold/thumbtack/"), []byte("a")}, - } - - batch := writer.NewBatch() - for _, d := range data { - batch.Set(d.k, d.v) - } - - err = batch.Execute() +func TestGoLevelDBRangeIterator(t *testing.T) { + s, err := open(nil) if err != nil { t.Fatal(err) } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } defer func() { - err := reader.Close() + err := os.RemoveAll("test") if err != nil { t.Fatal(err) } }() - it := reader.Iterator([]byte("a")) - keys := make([][]byte, 0, len(data)) - key, _, valid := it.Current() - for valid { - keys = append(keys, key) - it.Next() - key, _, valid = it.Current() - } - if len(keys) != len(data) { - t.Errorf("expected same number of keys, got %d != %d", len(keys), len(data)) - } - for i, dk := range data { - if !reflect.DeepEqual(dk.k, keys[i]) { - t.Errorf("expected key %s got %s", dk.k, keys[i]) - } + test.CommonTestRangeIterator(t, s) +} - } - - err = it.Close() +func TestGoLevelDBMerge(t *testing.T) { + s, err := open(&test.TestMergeCounter{}) if err != nil { t.Fatal(err) } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestMerge(t, s) } diff --git a/index/store/goleveldb/util.go b/index/store/goleveldb/util.go deleted file mode 100644 index 4e22808b..00000000 --- a/index/store/goleveldb/util.go +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package goleveldb - -import ( - "github.com/syndtr/goleveldb/leveldb/opt" -) - -func defaultWriteOptions() *opt.WriteOptions { - wo := &opt.WriteOptions{} - // request fsync on write for safety - wo.Sync = true - return wo -} - -func defaultReadOptions() *opt.ReadOptions { - ro := &opt.ReadOptions{} - return ro -} diff --git a/index/store/goleveldb/writer.go b/index/store/goleveldb/writer.go index ce1d03c9..9beb4a27 100644 --- a/index/store/goleveldb/writer.go +++ b/index/store/goleveldb/writer.go @@ -10,6 +10,8 @@ package goleveldb import ( + "fmt" + "github.com/blevesearch/bleve/index/store" "github.com/syndtr/goleveldb/leveldb" ) @@ -18,46 +20,48 @@ type Writer struct { store *Store } -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - func (w *Writer) Set(key, val []byte) error { - return w.store.setlocked(key, val) + return w.store.db.Put(key, val, w.store.defaultWriteOptions) } func (w *Writer) Delete(key []byte) error { - return w.store.deletelocked(key) + return w.store.db.Delete(key, w.store.defaultWriteOptions) } func (w *Writer) NewBatch() store.KVBatch { rv := Batch{ - w: w, + store: w.store, merge: store.NewEmulatedMerge(w.store.mo), batch: new(leveldb.Batch), } return &rv } +func (w *Writer) ExecuteBatch(b store.KVBatch) error { + batch, ok := b.(*Batch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + // first process merges + for k, mergeOps := range batch.merge.Merges { + kb := []byte(k) + existingVal, err := w.store.db.Get(kb, w.store.defaultReadOptions) + if err != nil && err != leveldb.ErrNotFound { + return err + } + mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps) + if !fullMergeOk { + return fmt.Errorf("merge operator returned failure") + } + // add the final merge to this batch + batch.batch.Put(kb, mergedVal) + } + + // now execute the batch + return w.store.db.Write(batch.batch, w.store.defaultWriteOptions) +} + func (w *Writer) Close() error { - w.store.writer.Unlock() return nil } - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/store/gtreap/gtreap_test.go b/index/store/gtreap/gtreap_test.go deleted file mode 100644 index 19b568d1..00000000 --- a/index/store/gtreap/gtreap_test.go +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -package gtreap - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -func TestGTreapStore(t *testing.T) { - s, err := StoreConstructor(nil) - if err != nil { - t.Fatal(err) - } - - CommonTestKVStore(t, s) -} - -func TestReaderIsolation(t *testing.T) { - s, err := StoreConstructor(nil) - if err != nil { - t.Fatal(err) - } - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - valB := []byte("val-b") - err = writer.Set([]byte("b"), valB) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got %s", val) - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } -} diff --git a/index/store/gtreap/iterator.go b/index/store/gtreap/iterator.go index 3bed1a2b..d64dce55 100644 --- a/index/store/gtreap/iterator.go +++ b/index/store/gtreap/iterator.go @@ -15,6 +15,7 @@ package gtreap import ( + "bytes" "sync" "github.com/steveyen/gtreap" @@ -28,19 +29,9 @@ type Iterator struct { nextCh chan *Item curr *Item currOk bool -} -func newIterator(t *gtreap.Treap) *Iterator { - return &Iterator{t: t} -} - -func (w *Iterator) SeekFirst() { - min := w.t.Min() - if min != nil { - w.restart(min.(*Item)) - } else { - w.restart(nil) - } + prefix []byte + end []byte } func (w *Iterator) Seek(k []byte) { @@ -93,6 +84,11 @@ func (w *Iterator) Current() ([]byte, []byte, bool) { if !w.currOk || w.curr == nil { return nil, nil, false } + if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) { + return nil, nil, false + } else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 { + return nil, nil, false + } return w.curr.k, w.curr.v, w.currOk } diff --git a/index/store/gtreap/reader.go b/index/store/gtreap/reader.go index 4a18a452..a67671e9 100644 --- a/index/store/gtreap/reader.go +++ b/index/store/gtreap/reader.go @@ -24,20 +24,33 @@ type Reader struct { t *gtreap.Treap } -func (w *Reader) BytesSafeAfterClose() bool { - return false -} - func (w *Reader) Get(k []byte) (v []byte, err error) { + var rv []byte itm := w.t.Get(&Item{k: k}) if itm != nil { - return itm.(*Item).v, nil + rv = make([]byte, len(itm.(*Item).v)) + copy(rv, itm.(*Item).v) + return rv, nil } return nil, nil } -func (w *Reader) Iterator(k []byte) store.KVIterator { - return newIterator(w.t).restart(&Item{k: k}) +func (w *Reader) PrefixIterator(k []byte) store.KVIterator { + rv := Iterator{ + t: w.t, + prefix: k, + } + rv.restart(&Item{k: k}) + return &rv +} + +func (w *Reader) RangeIterator(start, end []byte) store.KVIterator { + rv := Iterator{ + t: w.t, + end: end, + } + rv.restart(&Item{k: start}) + return &rv } func (w *Reader) Close() error { diff --git a/index/store/gtreap/gtreap.go b/index/store/gtreap/store.go similarity index 65% rename from index/store/gtreap/gtreap.go rename to index/store/gtreap/store.go index e6f1eb65..7b0048f2 100644 --- a/index/store/gtreap/gtreap.go +++ b/index/store/gtreap/store.go @@ -12,36 +12,24 @@ // Package gtreap provides an in-memory implementation of the // KVStore interfaces using the gtreap balanced-binary treap, // copy-on-write data structure. + package gtreap import ( "bytes" - "fmt" "sync" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" - "github.com/steveyen/gtreap" ) const Name = "gtreap" -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -const MAX_CONCURRENT_WRITERS = 1 - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - s := &Store{ - availableWriters: make(chan bool, MAX_CONCURRENT_WRITERS), - t: gtreap.NewTreap(itemCompare), - } - for i := 0; i < MAX_CONCURRENT_WRITERS; i++ { - s.availableWriters <- true - } - return s, nil +type Store struct { + m sync.Mutex + t *gtreap.Treap + mo store.MergeOperator } type Item struct { @@ -53,29 +41,15 @@ func itemCompare(a, b interface{}) int { return bytes.Compare(a.(*Item).k, b.(*Item).k) } -type Store struct { - availableWriters chan bool - - m sync.Mutex - t *gtreap.Treap - - mo store.MergeOperator -} - -type Writer struct { - s *Store -} - -func (s *Store) Open() error { - return nil -} - -func (s *Store) SetMergeOperator(mo store.MergeOperator) { - s.mo = mo +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + rv := Store{ + t: gtreap.NewTreap(itemCompare), + mo: mo, + } + return &rv, nil } func (s *Store) Close() error { - close(s.availableWriters) return nil } @@ -87,10 +61,9 @@ func (s *Store) Reader() (store.KVReader, error) { } func (s *Store) Writer() (store.KVWriter, error) { - available, ok := <-s.availableWriters - if !ok || !available { - return nil, fmt.Errorf("no available writers") - } - return &Writer{s: s}, nil } + +func init() { + registry.RegisterKVStore(Name, New) +} diff --git a/index/store/gtreap/store_test.go b/index/store/gtreap/store_test.go new file mode 100644 index 00000000..92c17e67 --- /dev/null +++ b/index/store/gtreap/store_test.go @@ -0,0 +1,130 @@ +// Copyright (c) 2014 Couchbase, Inc. + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +package gtreap + +import ( + "os" + "testing" + + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/test" +) + +func open(mo store.MergeOperator) (store.KVStore, error) { + return New(mo, nil) +} + +func TestGTreapKVCrud(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestKVCrud(t, s) +} + +func TestGTreapReaderIsolation(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestReaderIsolation(t, s) +} + +func TestGTreapReaderOwnsGetBytes(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestReaderOwnsGetBytes(t, s) +} + +func TestGTreapWriterOwnsBytes(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestGTreapPrefixIterator(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestPrefixIterator(t, s) +} + +func TestGTreapRangeIterator(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestRangeIterator(t, s) +} + +func TestGTreapMerge(t *testing.T) { + s, err := open(&test.TestMergeCounter{}) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestMerge(t, s) +} diff --git a/index/store/gtreap/writer.go b/index/store/gtreap/writer.go index aa95256c..4490b158 100644 --- a/index/store/gtreap/writer.go +++ b/index/store/gtreap/writer.go @@ -15,58 +15,55 @@ package gtreap import ( + "fmt" "math/rand" "github.com/blevesearch/bleve/index/store" ) -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Get(k []byte) (v []byte, err error) { - w.s.m.Lock() - t := w.s.t - w.s.m.Unlock() - - itm := t.Get(&Item{k: k}) - if itm != nil { - return itm.(*Item).v, nil - } - return nil, nil -} - -func (w *Writer) Iterator(k []byte) store.KVIterator { - w.s.m.Lock() - t := w.s.t - w.s.m.Unlock() - - return newIterator(t).restart(&Item{k: k}) -} - -func (w *Writer) Close() error { - w.s.availableWriters <- true - w.s = nil - - return nil -} - -func (w *Writer) Set(k, v []byte) (err error) { - w.s.m.Lock() - w.s.t = w.s.t.Upsert(&Item{k: k, v: v}, rand.Int()) - w.s.m.Unlock() - - return nil -} - -func (w *Writer) Delete(k []byte) (err error) { - w.s.m.Lock() - w.s.t = w.s.t.Delete(&Item{k: k}) - w.s.m.Unlock() - - return nil +type Writer struct { + s *Store } func (w *Writer) NewBatch() store.KVBatch { - return store.NewEmulatedBatch(w, w.s.mo) + return store.NewEmulatedBatch(w.s.mo) +} + +func (w *Writer) ExecuteBatch(batch store.KVBatch) error { + + emulatedBatch, ok := batch.(*store.EmulatedBatch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + + w.s.m.Lock() + for k, mergeOps := range emulatedBatch.Merger.Merges { + kb := []byte(k) + var existingVal []byte + existingItem := w.s.t.Get(&Item{k: kb}) + if existingItem != nil { + existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v + } + mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps) + if !fullMergeOk { + return fmt.Errorf("merge operator returned failure") + } + w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int()) + } + + for _, op := range emulatedBatch.Ops { + if op.V != nil { + w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int()) + } else { + w.s.t = w.s.t.Delete(&Item{k: op.K}) + } + } + w.s.m.Unlock() + + return nil +} + +func (w *Writer) Close() error { + w.s = nil + return nil } diff --git a/index/store/inmem/iterator.go b/index/store/inmem/iterator.go deleted file mode 100644 index 042bd890..00000000 --- a/index/store/inmem/iterator.go +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "github.com/ryszard/goskiplist/skiplist" -) - -type Iterator struct { - store *Store - iterator skiplist.Iterator - valid bool -} - -func newIterator(store *Store) *Iterator { - rv := Iterator{ - store: store, - iterator: store.list.Iterator(), - } - return &rv -} - -func (i *Iterator) SeekFirst() { - i.Seek([]byte{0}) -} - -func (i *Iterator) Seek(k []byte) { - i.valid = i.iterator.Seek(string(k)) -} - -func (i *Iterator) Next() { - i.valid = i.iterator.Next() -} - -func (i *Iterator) Current() ([]byte, []byte, bool) { - if i.valid { - return []byte(i.Key()), []byte(i.Value()), true - } - return nil, nil, false -} - -func (i *Iterator) Key() []byte { - if i.valid { - return []byte(i.iterator.Key().(string)) - } - return nil -} - -func (i *Iterator) Value() []byte { - if i.valid { - return []byte(i.iterator.Value().(string)) - } - return nil -} - -func (i *Iterator) Valid() bool { - return i.valid -} - -func (i *Iterator) Close() error { - i.iterator.Close() - return nil -} diff --git a/index/store/inmem/reader.go b/index/store/inmem/reader.go deleted file mode 100644 index d9966277..00000000 --- a/index/store/inmem/reader.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Reader struct { - store *Store -} - -func newReader(store *Store) (*Reader, error) { - return &Reader{ - store: store, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return false -} - -func (r *Reader) Get(key []byte) ([]byte, error) { - return r.store.get(key) -} - -func (r *Reader) Iterator(key []byte) store.KVIterator { - return r.store.iterator(key) -} - -func (r *Reader) Close() error { - return nil -} diff --git a/index/store/inmem/store.go b/index/store/inmem/store.go deleted file mode 100644 index d121de59..00000000 --- a/index/store/inmem/store.go +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "sync" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - "github.com/ryszard/goskiplist/skiplist" -) - -const Name = "mem" - -type Store struct { - list *skiplist.SkipList - writer sync.Mutex - mo store.MergeOperator -} - -func New() (*Store, error) { - rv := Store{ - list: skiplist.NewStringMap(), - } - - return &rv, nil -} - -func MustOpen() *Store { - rv := Store{ - list: skiplist.NewStringMap(), - } - - return &rv -} - -func (i *Store) Open() error { - return nil -} - -func (i *Store) SetMergeOperator(mo store.MergeOperator) { - i.mo = mo -} - -func (i *Store) get(key []byte) ([]byte, error) { - val, ok := i.list.Get(string(key)) - if ok { - return []byte(val.(string)), nil - } - return nil, nil -} - -func (i *Store) set(key, val []byte) error { - i.writer.Lock() - defer i.writer.Unlock() - return i.setlocked(key, val) -} - -func (i *Store) setlocked(key, val []byte) error { - i.list.Set(string(key), string(val)) - return nil -} - -func (i *Store) delete(key []byte) error { - i.writer.Lock() - defer i.writer.Unlock() - return i.deletelocked(key) -} - -func (i *Store) deletelocked(key []byte) error { - i.list.Delete(string(key)) - return nil -} - -func (i *Store) Close() error { - return nil -} - -func (i *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(i) - rv.Seek(key) - return rv -} - -func (i *Store) Reader() (store.KVReader, error) { - return newReader(i) -} - -func (i *Store) Writer() (store.KVWriter, error) { - return newWriter(i) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - return New() -} - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} diff --git a/index/store/inmem/store_test.go b/index/store/inmem/store_test.go deleted file mode 100644 index 39079459..00000000 --- a/index/store/inmem/store_test.go +++ /dev/null @@ -1,254 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "reflect" - "testing" - - "github.com/blevesearch/bleve/index/store" -) - -func TestStore(t *testing.T) { - s, err := New() - if err != nil { - t.Fatal(err) - } - defer func() { - err := s.Close() - if err != nil { - t.Fatal(err) - } - }() - - CommonTestKVStore(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it = newReader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it = reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - -} diff --git a/index/store/inmem/writer.go b/index/store/inmem/writer.go deleted file mode 100644 index f88c4e01..00000000 --- a/index/store/inmem/writer.go +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package inmem - -import ( - "github.com/blevesearch/bleve/index/store" -) - -type Writer struct { - store *Store -} - -func newWriter(store *Store) (*Writer, error) { - store.writer.Lock() - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return false -} - -func (w *Writer) Set(key, val []byte) error { - return w.store.setlocked(key, val) -} - -func (w *Writer) Delete(key []byte) error { - return w.store.deletelocked(key) -} - -func (w *Writer) NewBatch() store.KVBatch { - return store.NewEmulatedBatch(w, w.store.mo) -} - -func (w *Writer) Close() error { - w.store.writer.Unlock() - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return w.store.get(key) -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} diff --git a/index/store/kvstore.go b/index/store/kvstore.go index b96fce8b..e1e04d73 100644 --- a/index/store/kvstore.go +++ b/index/store/kvstore.go @@ -9,50 +9,113 @@ package store -type KVBatch interface { - Set(key, val []byte) - Delete(key []byte) - Merge(key, val []byte) - Execute() error +// KVStore is an abstraction for working with KV stores +type KVStore interface { + + // Writer returns a KVWriter which can be used to + // make changes to the KVStore. If a writer cannot + // be obtained a non-nil error is returned. + Writer() (KVWriter, error) + + // Reader returns a KVReader which can be used to + // read data from the KVStore. If a reader cannot + // be obtained a non-nil error is returned. + Reader() (KVReader, error) + + // Close closes the KVStore Close() error } +// KVReader is an abstraction of an **ISOLATED** reader +// In this context isolated is defined to mean that +// writes/deletes made after the KVReader is opened +// are not observed. +// Because there is usually a cost associated with +// keeping isolated readers active, users should +// close them as soon as they are no longer needed. +type KVReader interface { + + // Get returns the value associated with the key + // If the key does not exist, nil is returned. + // The caller owns the bytes returned. + Get(key []byte) ([]byte, error) + + // PrefixIterator returns a KVIterator that will + // visit all K/V pairs with the provided prefix + PrefixIterator(prefix []byte) KVIterator + + // RangeIterator returns a KVIterator that will + // visit all K/V pairs >= start AND < end + RangeIterator(start, end []byte) KVIterator + + // Close closes the iterator + Close() error +} + +// KVIterator is an abstraction around key iteration type KVIterator interface { - SeekFirst() - Seek([]byte) + + // Seek will advance the iterator to the specified key + Seek(key []byte) + + // Next will advance the iterator to the next key Next() - Current() ([]byte, []byte, bool) + // Key returns the key pointed to by the iterator + // The bytes returned are **ONLY** valid until the next call to Seek/Next/Close + // Continued use after that requires that they be copied. Key() []byte + + // Value returns the value pointed to by the iterator + // The bytes returned are **ONLY** valid until the next call to Seek/Next/Close + // Continued use after that requires that they be copied. Value() []byte + + // Valid returns whether or not the iterator is in a valid state Valid() bool + // Current returns Key(),Value(),Valid() in a single operation + Current() ([]byte, []byte, bool) + + // Close closes the iterator Close() error } -type KVStore interface { - Open() error - SetMergeOperator(MergeOperator) - Writer() (KVWriter, error) - Reader() (KVReader, error) - Close() error -} - +// KVWriter is an abstraction for mutating the KVStore +// KVWriter does **NOT** enforce restrictions of a single writer +// if the underlying KVStore allows concurrent writes, the +// KVWriter interface should also do so, it is up to the caller +// to do this in a way that is safe and makes sense type KVWriter interface { - KVReader - Set(key, val []byte) error - Delete(key []byte) error - NewBatch() KVBatch -} -type KVReader interface { - BytesSafeAfterClose() bool - Get(key []byte) ([]byte, error) - Iterator(key []byte) KVIterator + // NewBatch returns a KVBatch for performaing batch operations on this kvstore + NewBatch() KVBatch + + // ExecuteBatch will execute the KVBatch, the provided KVBatch **MUST** have + // been created by the same KVStore (though not necessarily the same KVWriter) + // Batch execution is atomic, either all the operations or none will be performed + ExecuteBatch(batch KVBatch) error + + // Close closes the writer Close() error } -type RangeIterable interface { - // iterates keys >= start and < end - RangeIterator(start, end []byte) KVIterator +// KVBatch is an abstraction for making multiple KV mutations at once +type KVBatch interface { + + // Set updates the key with the specified value + // both key and value []byte may be reused as soon as this call returns + Set(key, val []byte) + + // Delete removes the specified key + // the key []byte may be reused as soon as this call returns + Delete(key []byte) + + // Merge merges old value with the new value at the specified key + // as prescribed by the KVStores merge operator + // both key and value []byte may be reused as soon as this call returns + Merge(key, val []byte) + + // Reset frees resources for this batch and allows reuse + Reset() } diff --git a/index/store/merge.go b/index/store/merge.go index 390727bf..6e1a7da7 100644 --- a/index/store/merge.go +++ b/index/store/merge.go @@ -9,10 +9,6 @@ package store -import ( - "fmt" -) - // At the moment this happens to be the same interface as described by // RocksDB, but this may not always be the case. @@ -32,41 +28,20 @@ type MergeOperator interface { Name() string } -// EmulatedMergeSingle removes some duplicated code across -// KV stores which do not support merge operations -// on their own. It is up to the caller to ensure -// that an appropriate lock has been acquired in -// order for this behavior to be valid -func EmulatedMergeSingle(writer KVWriter, mo MergeOperator, key []byte, operand []byte) error { - existingValue, err := writer.Get(key) - if err != nil { - return err - } - newValue, ok := mo.FullMerge(key, existingValue, [][]byte{operand}) - if !ok { - return fmt.Errorf("merge operator returned failure") - } - err = writer.Set(key, newValue) - if err != nil { - return err - } - return nil -} - type EmulatedMerge struct { - merges map[string][][]byte + Merges map[string][][]byte mo MergeOperator } func NewEmulatedMerge(mo MergeOperator) *EmulatedMerge { return &EmulatedMerge{ - merges: make(map[string][][]byte), + Merges: make(map[string][][]byte), mo: mo, } } func (m *EmulatedMerge) Merge(key, val []byte) { - ops, ok := m.merges[string(key)] + ops, ok := m.Merges[string(key)] if ok && len(ops) > 0 { last := ops[len(ops)-1] mergedVal, partialMergeOk := m.mo.PartialMerge(key, last, val) @@ -80,41 +55,5 @@ func (m *EmulatedMerge) Merge(key, val []byte) { } else { ops = [][]byte{val} } - m.merges[string(key)] = ops -} - -func (m *EmulatedMerge) Execute(w KVWriter) error { - for k, mergeOps := range m.merges { - kb := []byte(k) - existingVal, err := w.Get(kb) - if err != nil { - return err - } - mergedVal, fullMergeOk := m.mo.FullMerge(kb, existingVal, mergeOps) - if !fullMergeOk { - return fmt.Errorf("merge operator returned failure") - } - err = w.Set(kb, mergedVal) - if err != nil { - return err - } - } - return nil -} - -func (m *EmulatedMerge) ExecuteDeferred(w KVWriter) ([]*op, error) { - rv := make([]*op, 0, 1000) - for k, mergeOps := range m.merges { - kb := []byte(k) - existingVal, err := w.Get(kb) - if err != nil { - return nil, err - } - mergedVal, fullMergeOk := m.mo.FullMerge(kb, existingVal, mergeOps) - if !fullMergeOk { - return nil, fmt.Errorf("merge operator returned failure") - } - rv = append(rv, &op{kb, mergedVal}) - } - return rv, nil + m.Merges[string(key)] = ops } diff --git a/index/store/metrics/batch.go b/index/store/metrics/batch.go new file mode 100644 index 00000000..8c30448f --- /dev/null +++ b/index/store/metrics/batch.go @@ -0,0 +1,26 @@ +package metrics + +import "github.com/blevesearch/bleve/index/store" + +type Batch struct { + s *Store + o store.KVBatch +} + +func (b *Batch) Set(key, val []byte) { + b.o.Set(key, val) +} + +func (b *Batch) Delete(key []byte) { + b.o.Delete(key) +} + +func (b *Batch) Merge(key, val []byte) { + b.s.TimerBatchMerge.Time(func() { + b.o.Merge(key, val) + }) +} + +func (b *Batch) Reset() { + b.o.Reset() +} diff --git a/index/store/metrics/iterator.go b/index/store/metrics/iterator.go new file mode 100644 index 00000000..ff44eb78 --- /dev/null +++ b/index/store/metrics/iterator.go @@ -0,0 +1,44 @@ +package metrics + +import "github.com/blevesearch/bleve/index/store" + +type Iterator struct { + s *Store + o store.KVIterator +} + +func (i *Iterator) Seek(x []byte) { + i.s.TimerIteratorSeek.Time(func() { + i.o.Seek(x) + }) +} + +func (i *Iterator) Next() { + i.s.TimerIteratorNext.Time(func() { + i.o.Next() + }) +} + +func (i *Iterator) Current() ([]byte, []byte, bool) { + return i.o.Current() +} + +func (i *Iterator) Key() []byte { + return i.o.Key() +} + +func (i *Iterator) Value() []byte { + return i.o.Value() +} + +func (i *Iterator) Valid() bool { + return i.o.Valid() +} + +func (i *Iterator) Close() error { + err := i.o.Close() + if err != nil { + i.s.AddError("Iterator.Close", err, nil) + } + return err +} diff --git a/index/store/metrics/metrics.go b/index/store/metrics/metrics.go deleted file mode 100644 index 7651a848..00000000 --- a/index/store/metrics/metrics.go +++ /dev/null @@ -1,482 +0,0 @@ -// Copyright (c) 2015 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the -// License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an "AS -// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either -// express or implied. See the License for the specific language -// governing permissions and limitations under the License. - -// Package metrics provides a bleve.store.KVStore implementation that -// wraps another, real KVStore implementation, and uses go-metrics to -// track runtime performance metrics. -package metrics - -import ( - "container/list" - "encoding/json" - "fmt" - "io" - "sync" - "time" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/registry" - - "github.com/rcrowley/go-metrics" -) - -const Name = "metrics" -const MaxErrors = 100 - -func init() { - registry.RegisterKVStore(Name, StoreConstructor) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - name, ok := config["kvStoreName_actual"].(string) - if !ok || name == "" { - return nil, fmt.Errorf("metrics: missing kvStoreName_actual,"+ - " config: %#v", config) - } - - if name == Name { - return nil, fmt.Errorf("metrics: circular kvStoreName_actual") - } - - ctr := registry.KVStoreConstructorByName(name) - if ctr == nil { - return nil, fmt.Errorf("metrics: no kv store constructor,"+ - " kvStoreName_actual: %s", name) - } - - kvs, err := ctr(config) - if err != nil { - return nil, err - } - - return NewBleveMetricsStore(kvs), nil -} - -func NewBleveMetricsStore(o store.KVStore) *Store { - return &Store{ - o: o, - - TimerReaderGet: metrics.NewTimer(), - TimerReaderIterator: metrics.NewTimer(), - TimerWriterGet: metrics.NewTimer(), - TimerWriterIterator: metrics.NewTimer(), - TimerWriterSet: metrics.NewTimer(), - TimerWriterDelete: metrics.NewTimer(), - TimerIteratorSeekFirst: metrics.NewTimer(), - TimerIteratorSeek: metrics.NewTimer(), - TimerIteratorNext: metrics.NewTimer(), - TimerBatchMerge: metrics.NewTimer(), - TimerBatchExecute: metrics.NewTimer(), - - errors: list.New(), - } -} - -// The following structs are wrappers around "real" bleve kvstore -// implementations. - -type Store struct { - o store.KVStore - - TimerReaderGet metrics.Timer - TimerReaderIterator metrics.Timer - TimerWriterGet metrics.Timer - TimerWriterIterator metrics.Timer - TimerWriterSet metrics.Timer - TimerWriterDelete metrics.Timer - TimerIteratorSeekFirst metrics.Timer - TimerIteratorSeek metrics.Timer - TimerIteratorNext metrics.Timer - TimerBatchMerge metrics.Timer - TimerBatchExecute metrics.Timer - - m sync.Mutex // Protects the fields that follow. - errors *list.List // Capped list of StoreError's. -} - -type StoreError struct { - Time string - Op string - Err string - Key string -} - -type Reader struct { - s *Store - o store.KVReader -} - -type Writer struct { - s *Store - o store.KVWriter -} - -type Iterator struct { - s *Store - o store.KVIterator -} - -type Batch struct { - s *Store - o store.KVBatch -} - -func (s *Store) Open() error { - return s.o.Open() -} - -func (s *Store) Close() error { - return s.o.Close() -} - -func (s *Store) SetMergeOperator(mo store.MergeOperator) { - s.o.SetMergeOperator(mo) -} - -func (s *Store) Reader() (store.KVReader, error) { - o, err := s.o.Reader() - if err != nil { - s.AddError("Reader", err, nil) - return nil, err - } - return &Reader{s: s, o: o}, nil -} - -func (s *Store) Writer() (store.KVWriter, error) { - o, err := s.o.Writer() - if err != nil { - s.AddError("Writer", err, nil) - return nil, err - } - return &Writer{s: s, o: o}, nil -} - -func (s *Store) Actual() store.KVStore { - return s.o -} - -func (w *Reader) BytesSafeAfterClose() bool { - return w.o.BytesSafeAfterClose() -} - -func (w *Reader) Get(key []byte) (v []byte, err error) { - w.s.TimerReaderGet.Time(func() { - v, err = w.o.Get(key) - if err != nil { - w.s.AddError("Reader.Get", err, key) - } - }) - return -} - -func (w *Reader) Iterator(key []byte) (i store.KVIterator) { - w.s.TimerReaderIterator.Time(func() { - i = &Iterator{s: w.s, o: w.o.Iterator(key)} - }) - return -} - -func (w *Reader) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Reader.Close", err, nil) - } - return err -} - -func (w *Writer) BytesSafeAfterClose() bool { - return w.o.BytesSafeAfterClose() -} - -func (w *Writer) Get(key []byte) (v []byte, err error) { - w.s.TimerWriterGet.Time(func() { - v, err = w.o.Get(key) - if err != nil { - w.s.AddError("Writer.Get", err, key) - } - }) - return -} - -func (w *Writer) Iterator(key []byte) (i store.KVIterator) { - w.s.TimerWriterIterator.Time(func() { - i = &Iterator{s: w.s, o: w.o.Iterator(key)} - }) - return -} - -func (w *Writer) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Writer.Close", err, nil) - } - return err -} - -func (w *Writer) Set(key, val []byte) (err error) { - w.s.TimerWriterSet.Time(func() { - err = w.o.Set(key, val) - if err != nil { - w.s.AddError("Writer.Set", err, key) - } - }) - return -} - -func (w *Writer) Delete(key []byte) (err error) { - w.s.TimerWriterDelete.Time(func() { - err = w.o.Delete(key) - if err != nil { - w.s.AddError("Writer.Delete", err, key) - } - }) - return -} - -func (w *Writer) NewBatch() store.KVBatch { - return &Batch{s: w.s, o: w.o.NewBatch()} -} - -func (w *Iterator) SeekFirst() { - w.s.TimerIteratorSeekFirst.Time(func() { - w.o.SeekFirst() - }) -} - -func (w *Iterator) Seek(x []byte) { - w.s.TimerIteratorSeek.Time(func() { - w.o.Seek(x) - }) -} - -func (w *Iterator) Next() { - w.s.TimerIteratorNext.Time(func() { - w.o.Next() - }) -} - -func (w *Iterator) Current() ([]byte, []byte, bool) { - return w.o.Current() -} - -func (w *Iterator) Key() []byte { - return w.o.Key() -} - -func (w *Iterator) Value() []byte { - return w.o.Value() -} - -func (w *Iterator) Valid() bool { - return w.o.Valid() -} - -func (w *Iterator) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Iterator.Close", err, nil) - } - return err -} - -func (w *Batch) Set(key, val []byte) { - w.o.Set(key, val) -} - -func (w *Batch) Delete(key []byte) { - w.o.Delete(key) -} - -func (w *Batch) Merge(key, val []byte) { - w.s.TimerBatchMerge.Time(func() { - w.o.Merge(key, val) - }) -} - -func (w *Batch) Execute() (err error) { - w.s.TimerBatchExecute.Time(func() { - err = w.o.Execute() - if err != nil { - w.s.AddError("Batch.Execute", err, nil) - } - }) - return -} - -func (w *Batch) Close() error { - err := w.o.Close() - if err != nil { - w.s.AddError("Batch.Close", err, nil) - } - return err -} - -// -------------------------------------------------------- - -func (s *Store) AddError(op string, err error, key []byte) { - e := &StoreError{ - Time: time.Now().Format(time.RFC3339Nano), - Op: op, - Err: fmt.Sprintf("%v", err), - Key: string(key), - } - - s.m.Lock() - for s.errors.Len() >= MaxErrors { - s.errors.Remove(s.errors.Front()) - } - s.errors.PushBack(e) - s.m.Unlock() -} - -// -------------------------------------------------------- - -func (s *Store) WriteJSON(w io.Writer) { - w.Write([]byte(`{"TimerReaderGet":`)) - WriteTimerJSON(w, s.TimerReaderGet) - w.Write([]byte(`,"TimerReaderIterator":`)) - WriteTimerJSON(w, s.TimerReaderIterator) - w.Write([]byte(`,"TimerWriterGet":`)) - WriteTimerJSON(w, s.TimerWriterGet) - w.Write([]byte(`,"TimerWriterIterator":`)) - WriteTimerJSON(w, s.TimerWriterIterator) - w.Write([]byte(`,"TimerWriterSet":`)) - WriteTimerJSON(w, s.TimerWriterSet) - w.Write([]byte(`,"TimerWriterDelete":`)) - WriteTimerJSON(w, s.TimerWriterDelete) - w.Write([]byte(`,"TimerIteratorSeekFirst":`)) - WriteTimerJSON(w, s.TimerIteratorSeekFirst) - w.Write([]byte(`,"TimerIteratorSeek":`)) - WriteTimerJSON(w, s.TimerIteratorSeek) - w.Write([]byte(`,"TimerIteratorNext":`)) - WriteTimerJSON(w, s.TimerIteratorNext) - w.Write([]byte(`,"TimerBatchMerge":`)) - WriteTimerJSON(w, s.TimerBatchMerge) - w.Write([]byte(`,"TimerBatchExecute":`)) - WriteTimerJSON(w, s.TimerBatchExecute) - - w.Write([]byte(`,"Errors":[`)) - s.m.Lock() - e := s.errors.Front() - i := 0 - for e != nil { - se, ok := e.Value.(*StoreError) - if ok && se != nil { - if i > 0 { - w.Write([]byte(",")) - } - buf, err := json.Marshal(se) - if err == nil { - w.Write(buf) - } - } - e = e.Next() - i = i + 1 - } - s.m.Unlock() - w.Write([]byte(`]`)) - - w.Write([]byte(`}`)) -} - -func (s *Store) WriteCSVHeader(w io.Writer) { - WriteTimerCSVHeader(w, "TimerReaderGet") - WriteTimerCSVHeader(w, "TimerReaderIterator") - WriteTimerCSVHeader(w, "TimerWriterGet") - WriteTimerCSVHeader(w, "TimerWriterIterator") - WriteTimerCSVHeader(w, "TimerWriterSet") - WriteTimerCSVHeader(w, "TimerWriterDelete") - WriteTimerCSVHeader(w, "TimerIteratorSeekFirst") - WriteTimerCSVHeader(w, "TimerIteratorSeek") - WriteTimerCSVHeader(w, "TimerIteratorNext") - WriteTimerCSVHeader(w, "TimerBatchMerge") - WriteTimerCSVHeader(w, "TimerBatchExecute") -} - -func (s *Store) WriteCSV(w io.Writer) { - WriteTimerCSV(w, s.TimerReaderGet) - WriteTimerCSV(w, s.TimerReaderIterator) - WriteTimerCSV(w, s.TimerWriterGet) - WriteTimerCSV(w, s.TimerWriterIterator) - WriteTimerCSV(w, s.TimerWriterSet) - WriteTimerCSV(w, s.TimerWriterDelete) - WriteTimerCSV(w, s.TimerIteratorSeekFirst) - WriteTimerCSV(w, s.TimerIteratorSeek) - WriteTimerCSV(w, s.TimerIteratorNext) - WriteTimerCSV(w, s.TimerBatchMerge) - WriteTimerCSV(w, s.TimerBatchExecute) -} - -// -------------------------------------------------------- - -// NOTE: This is copy & pasted from cbft as otherwise there -// would be an import cycle. - -var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999} - -func WriteTimerJSON(w io.Writer, timer metrics.Timer) { - t := timer.Snapshot() - p := t.Percentiles(timerPercentiles) - - fmt.Fprintf(w, `{"count":%9d,`, t.Count()) - fmt.Fprintf(w, `"min":%9d,`, t.Min()) - fmt.Fprintf(w, `"max":%9d,`, t.Max()) - fmt.Fprintf(w, `"mean":%12.2f,`, t.Mean()) - fmt.Fprintf(w, `"stddev":%12.2f,`, t.StdDev()) - fmt.Fprintf(w, `"percentiles":{`) - fmt.Fprintf(w, `"median":%12.2f,`, p[0]) - fmt.Fprintf(w, `"75%%":%12.2f,`, p[1]) - fmt.Fprintf(w, `"95%%":%12.2f,`, p[2]) - fmt.Fprintf(w, `"99%%":%12.2f,`, p[3]) - fmt.Fprintf(w, `"99.9%%":%12.2f},`, p[4]) - fmt.Fprintf(w, `"rates":{`) - fmt.Fprintf(w, `"1-min":%12.2f,`, t.Rate1()) - fmt.Fprintf(w, `"5-min":%12.2f,`, t.Rate5()) - fmt.Fprintf(w, `"15-min":%12.2f,`, t.Rate15()) - fmt.Fprintf(w, `"mean":%12.2f}}`, t.RateMean()) -} - -func WriteTimerCSVHeader(w io.Writer, prefix string) { - fmt.Fprintf(w, "%s-count,", prefix) - fmt.Fprintf(w, "%s-min,", prefix) - fmt.Fprintf(w, "%s-max,", prefix) - fmt.Fprintf(w, "%s-mean,", prefix) - fmt.Fprintf(w, "%s-stddev,", prefix) - fmt.Fprintf(w, "%s-percentile-50%%,", prefix) - fmt.Fprintf(w, "%s-percentile-75%%,", prefix) - fmt.Fprintf(w, "%s-percentile-95%%,", prefix) - fmt.Fprintf(w, "%s-percentile-99%%,", prefix) - fmt.Fprintf(w, "%s-percentile-99.9%%,", prefix) - fmt.Fprintf(w, "%s-rate-1-min,", prefix) - fmt.Fprintf(w, "%s-rate-5-min,", prefix) - fmt.Fprintf(w, "%s-rate-15-min,", prefix) - fmt.Fprintf(w, "%s-rate-mean", prefix) -} - -func WriteTimerCSV(w io.Writer, timer metrics.Timer) { - t := timer.Snapshot() - p := t.Percentiles(timerPercentiles) - - fmt.Fprintf(w, `%d,`, t.Count()) - fmt.Fprintf(w, `%d,`, t.Min()) - fmt.Fprintf(w, `%d,`, t.Max()) - fmt.Fprintf(w, `%f,`, t.Mean()) - fmt.Fprintf(w, `%f,`, t.StdDev()) - fmt.Fprintf(w, `%f,`, p[0]) - fmt.Fprintf(w, `%f,`, p[1]) - fmt.Fprintf(w, `%f,`, p[2]) - fmt.Fprintf(w, `%f,`, p[3]) - fmt.Fprintf(w, `%f,`, p[4]) - fmt.Fprintf(w, `%f,`, t.Rate1()) - fmt.Fprintf(w, `%f,`, t.Rate5()) - fmt.Fprintf(w, `%f,`, t.Rate15()) - fmt.Fprintf(w, `%f`, t.RateMean()) -} diff --git a/index/store/metrics/metrics_test.go b/index/store/metrics/metrics_test.go index 4c94fbfe..5b8e4735 100644 --- a/index/store/metrics/metrics_test.go +++ b/index/store/metrics/metrics_test.go @@ -16,35 +16,31 @@ import ( "bytes" "encoding/json" "fmt" - "reflect" "testing" - "github.com/blevesearch/bleve/index/store" - _ "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/store/gtreap" ) func TestMetricsStore(t *testing.T) { - s, err := StoreConstructor(map[string]interface{}{}) + s, err := New(nil, map[string]interface{}{}) if err == nil { t.Errorf("expected err when bad config") } - s, err = StoreConstructor(map[string]interface{}{ + s, err = New(nil, map[string]interface{}{ "kvStoreName_actual": "some-invalid-kvstore-name", }) if err == nil { t.Errorf("expected err when unknown kvStoreName_actual") } - s, err = StoreConstructor(map[string]interface{}{ - "kvStoreName_actual": "gtreap", + s, err = New(nil, map[string]interface{}{ + "kvStoreName_actual": gtreap.Name, }) if err != nil { t.Fatal(err) } - CommonTestKVStore(t, s) - b := bytes.NewBuffer(nil) s.(*Store).WriteJSON(b) if b.Len() <= 0 { @@ -72,240 +68,9 @@ func TestMetricsStore(t *testing.T) { } } -func TestReaderIsolation(t *testing.T) { - s, err := StoreConstructor(map[string]interface{}{ - "kvStoreName_actual": "gtreap", - }) - if err != nil { - t.Fatal(err) - } - - CommonTestReaderIsolation(t, s) -} - -func CommonTestKVStore(t *testing.T, s store.KVStore) { - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } - - batch := writer.NewBatch() - batch.Set([]byte("b"), []byte("val-b")) - batch.Set([]byte("c"), []byte("val-c")) - batch.Set([]byte("d"), []byte("val-d")) - batch.Set([]byte("e"), []byte("val-e")) - batch.Set([]byte("f"), []byte("val-f")) - batch.Set([]byte("g"), []byte("val-g")) - batch.Set([]byte("h"), []byte("val-h")) - batch.Set([]byte("i"), []byte("val-i")) - batch.Set([]byte("j"), []byte("val-j")) - - err = batch.Execute() - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - it := reader.Iterator([]byte("b")) - key, val, valid := it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "b" { - t.Fatalf("expected key b, got %s", key) - } - if string(val) != "val-b" { - t.Fatalf("expected value val-b, got %s", val) - } - - it.Next() - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "c" { - t.Fatalf("expected key c, got %s", key) - } - if string(val) != "val-c" { - t.Fatalf("expected value val-c, got %s", val) - } - - it.Seek([]byte("i")) - key, val, valid = it.Current() - if !valid { - t.Fatalf("valid false, expected true") - } - if string(key) != "i" { - t.Fatalf("expected key i, got %s", key) - } - if string(val) != "val-i" { - t.Fatalf("expected value val-i, got %s", val) - } - - err = it.Close() - if err != nil { - t.Fatal(err) - } -} - -func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { - // insert a kv pair - writer, err := s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // create an isolated reader - reader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := reader.Close() - if err != nil { - t.Fatal(err) - } - }() - - // verify that we see the value already inserted - val, err := reader.Get([]byte("a")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-a")) { - t.Errorf("expected val-a, got nil") - } - - // verify that an iterator sees it - count := 0 - it := reader.Iterator([]byte{0}) - defer func() { - err := it.Close() - if err != nil { - t.Fatal(err) - } - }() - for it.Valid() { - it.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } - - // add something after the reader was created - writer, err = s.Writer() - if err != nil { - t.Error(err) - } - err = writer.Set([]byte("b"), []byte("val-b")) - if err != nil { - t.Fatal(err) - } - err = writer.Close() - if err != nil { - t.Fatal(err) - } - - // ensure that a newer reader sees it - newReader, err := s.Reader() - if err != nil { - t.Error(err) - } - defer func() { - err := newReader.Close() - if err != nil { - t.Fatal(err) - } - }() - val, err = newReader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if !reflect.DeepEqual(val, []byte("val-b")) { - t.Errorf("expected val-b, got nil") - } - - // ensure that the director iterator sees it - count = 0 - it2 := newReader.Iterator([]byte{0}) - defer func() { - err := it2.Close() - if err != nil { - t.Fatal(err) - } - }() - for it2.Valid() { - it2.Next() - count++ - } - if count != 2 { - t.Errorf("expected iterator to see 2, saw %d", count) - } - - // but that the isolated reader does not - val, err = reader.Get([]byte("b")) - if err != nil { - t.Error(err) - } - if val != nil { - t.Errorf("expected nil, got %v", val) - } - - // and ensure that the iterator on the isolated reader also does not - count = 0 - it3 := reader.Iterator([]byte{0}) - defer func() { - err := it3.Close() - if err != nil { - t.Fatal(err) - } - }() - for it3.Valid() { - it3.Next() - count++ - } - if count != 1 { - t.Errorf("expected iterator to see 1, saw %d", count) - } -} - func TestErrors(t *testing.T) { - s, err := StoreConstructor(map[string]interface{}{ - "kvStoreName_actual": "gtreap", + s, err := New(nil, map[string]interface{}{ + "kvStoreName_actual": gtreap.Name, }) if err != nil { t.Fatal(err) diff --git a/index/store/metrics/reader.go b/index/store/metrics/reader.go new file mode 100644 index 00000000..c555c736 --- /dev/null +++ b/index/store/metrics/reader.go @@ -0,0 +1,40 @@ +package metrics + +import "github.com/blevesearch/bleve/index/store" + +type Reader struct { + s *Store + o store.KVReader +} + +func (r *Reader) Get(key []byte) (v []byte, err error) { + r.s.TimerReaderGet.Time(func() { + v, err = r.o.Get(key) + if err != nil { + r.s.AddError("Reader.Get", err, key) + } + }) + return +} + +func (r *Reader) PrefixIterator(prefix []byte) (i store.KVIterator) { + r.s.TimerReaderPrefixIterator.Time(func() { + i = &Iterator{s: r.s, o: r.o.PrefixIterator(prefix)} + }) + return +} + +func (r *Reader) RangeIterator(start, end []byte) (i store.KVIterator) { + r.s.TimerReaderRangeIterator.Time(func() { + i = &Iterator{s: r.s, o: r.o.RangeIterator(start, end)} + }) + return +} + +func (r *Reader) Close() error { + err := r.o.Close() + if err != nil { + r.s.AddError("Reader.Close", err, nil) + } + return err +} diff --git a/index/store/metrics/store.go b/index/store/metrics/store.go new file mode 100644 index 00000000..fab47afd --- /dev/null +++ b/index/store/metrics/store.go @@ -0,0 +1,196 @@ +// Copyright (c) 2015 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an "AS +// IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either +// express or implied. See the License for the specific language +// governing permissions and limitations under the License. + +// Package metrics provides a bleve.store.KVStore implementation that +// wraps another, real KVStore implementation, and uses go-metrics to +// track runtime performance metrics. +package metrics + +import ( + "container/list" + "encoding/json" + "fmt" + "io" + "sync" + "time" + + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/registry" + "github.com/rcrowley/go-metrics" +) + +const Name = "metrics" + +type Store struct { + o store.KVStore + + TimerReaderGet metrics.Timer + TimerReaderPrefixIterator metrics.Timer + TimerReaderRangeIterator metrics.Timer + TimerWriterExecuteBatch metrics.Timer + TimerIteratorSeek metrics.Timer + TimerIteratorNext metrics.Timer + TimerBatchMerge metrics.Timer + + m sync.Mutex // Protects the fields that follow. + errors *list.List // Capped list of StoreError's. +} + +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + + name, ok := config["kvStoreName_actual"].(string) + if !ok || name == "" { + return nil, fmt.Errorf("metrics: missing kvStoreName_actual,"+ + " config: %#v", config) + } + + if name == Name { + return nil, fmt.Errorf("metrics: circular kvStoreName_actual") + } + + ctr := registry.KVStoreConstructorByName(name) + if ctr == nil { + return nil, fmt.Errorf("metrics: no kv store constructor,"+ + " kvStoreName_actual: %s", name) + } + + kvs, err := ctr(mo, config) + if err != nil { + return nil, err + } + + return &Store{ + o: kvs, + + TimerReaderGet: metrics.NewTimer(), + TimerReaderPrefixIterator: metrics.NewTimer(), + TimerReaderRangeIterator: metrics.NewTimer(), + TimerWriterExecuteBatch: metrics.NewTimer(), + TimerIteratorSeek: metrics.NewTimer(), + TimerIteratorNext: metrics.NewTimer(), + TimerBatchMerge: metrics.NewTimer(), + + errors: list.New(), + }, nil +} + +func init() { + registry.RegisterKVStore(Name, New) +} + +func (s *Store) Close() error { + return s.o.Close() +} + +func (s *Store) Reader() (store.KVReader, error) { + o, err := s.o.Reader() + if err != nil { + s.AddError("Reader", err, nil) + return nil, err + } + return &Reader{s: s, o: o}, nil +} + +func (s *Store) Writer() (store.KVWriter, error) { + o, err := s.o.Writer() + if err != nil { + s.AddError("Writer", err, nil) + return nil, err + } + return &Writer{s: s, o: o}, nil +} + +// Metric specific code below: + +const MaxErrors = 100 + +type StoreError struct { + Time string + Op string + Err string + Key string +} + +func (s *Store) AddError(op string, err error, key []byte) { + e := &StoreError{ + Time: time.Now().Format(time.RFC3339Nano), + Op: op, + Err: fmt.Sprintf("%v", err), + Key: string(key), + } + + s.m.Lock() + for s.errors.Len() >= MaxErrors { + s.errors.Remove(s.errors.Front()) + } + s.errors.PushBack(e) + s.m.Unlock() +} + +func (s *Store) WriteJSON(w io.Writer) { + w.Write([]byte(`{"TimerReaderGet":`)) + WriteTimerJSON(w, s.TimerReaderGet) + w.Write([]byte(`,"TimerReaderPrefixIterator":`)) + WriteTimerJSON(w, s.TimerReaderPrefixIterator) + w.Write([]byte(`,"TimerReaderRangeIterator":`)) + WriteTimerJSON(w, s.TimerReaderRangeIterator) + w.Write([]byte(`,"TimerWriterExecuteBatch":`)) + WriteTimerJSON(w, s.TimerWriterExecuteBatch) + w.Write([]byte(`,"TimerIteratorSeek":`)) + WriteTimerJSON(w, s.TimerIteratorSeek) + w.Write([]byte(`,"TimerIteratorNext":`)) + WriteTimerJSON(w, s.TimerIteratorNext) + w.Write([]byte(`,"TimerBatchMerge":`)) + WriteTimerJSON(w, s.TimerBatchMerge) + + w.Write([]byte(`,"Errors":[`)) + s.m.Lock() + e := s.errors.Front() + i := 0 + for e != nil { + se, ok := e.Value.(*StoreError) + if ok && se != nil { + if i > 0 { + w.Write([]byte(",")) + } + buf, err := json.Marshal(se) + if err == nil { + w.Write(buf) + } + } + e = e.Next() + i = i + 1 + } + s.m.Unlock() + w.Write([]byte(`]`)) + + w.Write([]byte(`}`)) +} + +func (s *Store) WriteCSVHeader(w io.Writer) { + WriteTimerCSVHeader(w, "TimerReaderGet") + WriteTimerCSVHeader(w, "TimerReaderPrefixIterator") + WriteTimerCSVHeader(w, "TimerReaderRangeIterator") + WriteTimerCSVHeader(w, "TimerWtierExecuteBatch") + WriteTimerCSVHeader(w, "TimerIteratorSeek") + WriteTimerCSVHeader(w, "TimerIteratorNext") + WriteTimerCSVHeader(w, "TimerBatchMerge") +} + +func (s *Store) WriteCSV(w io.Writer) { + WriteTimerCSV(w, s.TimerReaderGet) + WriteTimerCSV(w, s.TimerReaderPrefixIterator) + WriteTimerCSV(w, s.TimerReaderRangeIterator) + WriteTimerCSV(w, s.TimerWriterExecuteBatch) + WriteTimerCSV(w, s.TimerIteratorSeek) + WriteTimerCSV(w, s.TimerIteratorNext) + WriteTimerCSV(w, s.TimerBatchMerge) +} diff --git a/index/store/metrics/store_test.go b/index/store/metrics/store_test.go new file mode 100644 index 00000000..0c1c4b91 --- /dev/null +++ b/index/store/metrics/store_test.go @@ -0,0 +1,119 @@ +package metrics + +import ( + "os" + "testing" + + "github.com/blevesearch/bleve/index/store" + "github.com/blevesearch/bleve/index/store/gtreap" + "github.com/blevesearch/bleve/index/store/test" +) + +func open(mo store.MergeOperator) (store.KVStore, error) { + return New(mo, map[string]interface{}{"kvStoreName_actual": gtreap.Name}) +} + +func TestMetricsKVCrud(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestKVCrud(t, s) +} + +func TestMetricsReaderIsolation(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestReaderIsolation(t, s) +} + +func TestMetricsReaderOwnsGetBytes(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestReaderOwnsGetBytes(t, s) +} + +func TestMetricsWriterOwnsBytes(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestWriterOwnsBytes(t, s) +} + +func TestMetricsPrefixIterator(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestPrefixIterator(t, s) +} + +func TestMetricsRangeIterator(t *testing.T) { + s, err := open(nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestRangeIterator(t, s) +} + +func TestMetricsMerge(t *testing.T) { + s, err := open(&test.TestMergeCounter{}) + if err != nil { + t.Fatal(err) + } + defer func() { + err := os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } + }() + + test.CommonTestMerge(t, s) +} diff --git a/index/store/metrics/util.go b/index/store/metrics/util.go new file mode 100644 index 00000000..053e38ee --- /dev/null +++ b/index/store/metrics/util.go @@ -0,0 +1,72 @@ +package metrics + +import ( + "fmt" + "io" + + "github.com/rcrowley/go-metrics" +) + +// NOTE: This is copy & pasted from cbft as otherwise there +// would be an import cycle. + +var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999} + +func WriteTimerJSON(w io.Writer, timer metrics.Timer) { + t := timer.Snapshot() + p := t.Percentiles(timerPercentiles) + + fmt.Fprintf(w, `{"count":%9d,`, t.Count()) + fmt.Fprintf(w, `"min":%9d,`, t.Min()) + fmt.Fprintf(w, `"max":%9d,`, t.Max()) + fmt.Fprintf(w, `"mean":%12.2f,`, t.Mean()) + fmt.Fprintf(w, `"stddev":%12.2f,`, t.StdDev()) + fmt.Fprintf(w, `"percentiles":{`) + fmt.Fprintf(w, `"median":%12.2f,`, p[0]) + fmt.Fprintf(w, `"75%%":%12.2f,`, p[1]) + fmt.Fprintf(w, `"95%%":%12.2f,`, p[2]) + fmt.Fprintf(w, `"99%%":%12.2f,`, p[3]) + fmt.Fprintf(w, `"99.9%%":%12.2f},`, p[4]) + fmt.Fprintf(w, `"rates":{`) + fmt.Fprintf(w, `"1-min":%12.2f,`, t.Rate1()) + fmt.Fprintf(w, `"5-min":%12.2f,`, t.Rate5()) + fmt.Fprintf(w, `"15-min":%12.2f,`, t.Rate15()) + fmt.Fprintf(w, `"mean":%12.2f}}`, t.RateMean()) +} + +func WriteTimerCSVHeader(w io.Writer, prefix string) { + fmt.Fprintf(w, "%s-count,", prefix) + fmt.Fprintf(w, "%s-min,", prefix) + fmt.Fprintf(w, "%s-max,", prefix) + fmt.Fprintf(w, "%s-mean,", prefix) + fmt.Fprintf(w, "%s-stddev,", prefix) + fmt.Fprintf(w, "%s-percentile-50%%,", prefix) + fmt.Fprintf(w, "%s-percentile-75%%,", prefix) + fmt.Fprintf(w, "%s-percentile-95%%,", prefix) + fmt.Fprintf(w, "%s-percentile-99%%,", prefix) + fmt.Fprintf(w, "%s-percentile-99.9%%,", prefix) + fmt.Fprintf(w, "%s-rate-1-min,", prefix) + fmt.Fprintf(w, "%s-rate-5-min,", prefix) + fmt.Fprintf(w, "%s-rate-15-min,", prefix) + fmt.Fprintf(w, "%s-rate-mean", prefix) +} + +func WriteTimerCSV(w io.Writer, timer metrics.Timer) { + t := timer.Snapshot() + p := t.Percentiles(timerPercentiles) + + fmt.Fprintf(w, `%d,`, t.Count()) + fmt.Fprintf(w, `%d,`, t.Min()) + fmt.Fprintf(w, `%d,`, t.Max()) + fmt.Fprintf(w, `%f,`, t.Mean()) + fmt.Fprintf(w, `%f,`, t.StdDev()) + fmt.Fprintf(w, `%f,`, p[0]) + fmt.Fprintf(w, `%f,`, p[1]) + fmt.Fprintf(w, `%f,`, p[2]) + fmt.Fprintf(w, `%f,`, p[3]) + fmt.Fprintf(w, `%f,`, p[4]) + fmt.Fprintf(w, `%f,`, t.Rate1()) + fmt.Fprintf(w, `%f,`, t.Rate5()) + fmt.Fprintf(w, `%f,`, t.Rate15()) + fmt.Fprintf(w, `%f`, t.RateMean()) +} diff --git a/index/store/metrics/writer.go b/index/store/metrics/writer.go new file mode 100644 index 00000000..0f23bfdf --- /dev/null +++ b/index/store/metrics/writer.go @@ -0,0 +1,38 @@ +package metrics + +import ( + "fmt" + + "github.com/blevesearch/bleve/index/store" +) + +type Writer struct { + s *Store + o store.KVWriter +} + +func (w *Writer) Close() error { + err := w.o.Close() + if err != nil { + w.s.AddError("Writer.Close", err, nil) + } + return err +} + +func (w *Writer) NewBatch() store.KVBatch { + return &Batch{s: w.s, o: w.o.NewBatch()} +} + +func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) { + batch, ok := b.(*Batch) + if !ok { + return fmt.Errorf("wrong type of batch") + } + w.s.TimerWriterExecuteBatch.Time(func() { + err = w.o.ExecuteBatch(batch.o) + if err != nil { + w.s.AddError("Writer.ExecuteBatch", err, nil) + } + }) + return +} diff --git a/index/store/null/null.go b/index/store/null/null.go index f5d9d450..53ebbca8 100644 --- a/index/store/null/null.go +++ b/index/store/null/null.go @@ -18,167 +18,87 @@ const Name = "null" type Store struct{} -func New() (*Store, error) { - rv := Store{} - return &rv, nil -} - -func (i *Store) Open() error { - return nil -} - -func (i *Store) SetMergeOperator(mo store.MergeOperator) { - +func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) { + return &Store{}, nil } func (i *Store) Close() error { return nil } -func (i *Store) iterator(key []byte) store.KVIterator { - rv := newIterator(i) - return rv -} - func (i *Store) Reader() (store.KVReader, error) { - return newReader(i) + return &reader{}, nil } func (i *Store) Writer() (store.KVWriter, error) { - return newWriter(i) + return &writer{}, nil } -func (i *Store) newBatch() store.KVBatch { - return newBatch(i) -} +type reader struct{} -type Reader struct { - store *Store -} - -func newReader(store *Store) (*Reader, error) { - return &Reader{ - store: store, - }, nil -} - -func (r *Reader) BytesSafeAfterClose() bool { - return true -} - -func (r *Reader) Get(key []byte) ([]byte, error) { +func (r *reader) Get(key []byte) ([]byte, error) { return nil, nil } -func (r *Reader) Iterator(key []byte) store.KVIterator { - return r.store.iterator(key) +func (r *reader) PrefixIterator(prefix []byte) store.KVIterator { + return &iterator{} } -func (r *Reader) Close() error { +func (r *reader) RangeIterator(start, end []byte) store.KVIterator { + return &iterator{} +} + +func (r *reader) Close() error { return nil } -type Iterator struct{} +type iterator struct{} -func newIterator(store *Store) *Iterator { - return &Iterator{} -} +func (i *iterator) SeekFirst() {} +func (i *iterator) Seek(k []byte) {} +func (i *iterator) Next() {} -func (i *Iterator) SeekFirst() {} - -func (i *Iterator) Seek(k []byte) {} - -func (i *Iterator) Next() {} - -func (i *Iterator) Current() ([]byte, []byte, bool) { +func (i *iterator) Current() ([]byte, []byte, bool) { return nil, nil, false } -func (i *Iterator) Key() []byte { +func (i *iterator) Key() []byte { return nil } -func (i *Iterator) Value() []byte { +func (i *iterator) Value() []byte { return nil } -func (i *Iterator) Valid() bool { +func (i *iterator) Valid() bool { return false } -func (i *Iterator) Close() error { +func (i *iterator) Close() error { return nil } -type Batch struct{} +type batch struct{} -func newBatch(s *Store) *Batch { - rv := Batch{} - return &rv +func (i *batch) Set(key, val []byte) {} +func (i *batch) Delete(key []byte) {} +func (i *batch) Merge(key, val []byte) {} +func (i *batch) Reset() {} + +type writer struct{} + +func (w *writer) NewBatch() store.KVBatch { + return &batch{} } -func (i *Batch) Set(key, val []byte) { -} - -func (i *Batch) Delete(key []byte) { -} - -func (i *Batch) Merge(key, val []byte) { -} - -func (i *Batch) Execute() error { +func (w *writer) ExecuteBatch(store.KVBatch) error { return nil } -func (i *Batch) Close() error { +func (w *writer) Close() error { return nil } -type Writer struct { - store *Store -} - -func newWriter(store *Store) (*Writer, error) { - return &Writer{ - store: store, - }, nil -} - -func (w *Writer) BytesSafeAfterClose() bool { - return true -} - -func (w *Writer) Set(key, val []byte) error { - return nil -} - -func (w *Writer) Delete(key []byte) error { - return nil -} - -func (w *Writer) NewBatch() store.KVBatch { - return newBatch(w.store) -} - -func (w *Writer) Close() error { - return nil -} - -// these two methods can safely read using the regular -// methods without a read transaction, because we know -// that no one else is writing but us -func (w *Writer) Get(key []byte) ([]byte, error) { - return nil, nil -} - -func (w *Writer) Iterator(key []byte) store.KVIterator { - return w.store.iterator(key) -} - -func StoreConstructor(config map[string]interface{}) (store.KVStore, error) { - return New() -} - func init() { - registry.RegisterKVStore(Name, StoreConstructor) + registry.RegisterKVStore(Name, New) } diff --git a/index/store/null/null_test.go b/index/store/null/null_test.go index 720d482a..b0b50be1 100644 --- a/index/store/null/null_test.go +++ b/index/store/null/null_test.go @@ -7,7 +7,7 @@ import ( ) func TestStore(t *testing.T) { - s, err := New() + s, err := New(nil, nil) if err != nil { t.Fatal(err) } @@ -21,18 +21,6 @@ func CommonTestKVStore(t *testing.T, s store.KVStore) { if err != nil { t.Error(err) } - err = writer.Set([]byte("a"), []byte("val-a")) - if err != nil { - t.Fatal(err) - } - err = writer.Set([]byte("z"), []byte("val-z")) - if err != nil { - t.Fatal(err) - } - err = writer.Delete([]byte("z")) - if err != nil { - t.Fatal(err) - } batch := writer.NewBatch() batch.Set([]byte("b"), []byte("val-b")) @@ -45,7 +33,7 @@ func CommonTestKVStore(t *testing.T, s store.KVStore) { batch.Set([]byte("i"), []byte("val-i")) batch.Set([]byte("j"), []byte("val-j")) - err = batch.Execute() + err = writer.ExecuteBatch(batch) if err != nil { t.Fatal(err) } @@ -64,7 +52,7 @@ func CommonTestKVStore(t *testing.T, s store.KVStore) { t.Fatal(err) } }() - it := reader.Iterator([]byte("b")) + it := reader.RangeIterator([]byte("b"), nil) key, val, valid := it.Current() if valid { t.Fatalf("valid true, expected false") diff --git a/index/store/test/bytes.go b/index/store/test/bytes.go new file mode 100644 index 00000000..6a124abf --- /dev/null +++ b/index/store/test/bytes.go @@ -0,0 +1,274 @@ +package test + +import ( + "bytes" + "reflect" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// tests which focus on the byte ownership + +// CommonTestReaderOwnsGetBytes attempts to mutate the returned bytes +// first, while the reader is still open, second after that reader is +// closed, then the original key is read again, to ensure these +// modifications did not cause panic, or mutate the stored value +func CommonTestReaderOwnsGetBytes(t *testing.T, s store.KVStore) { + + originalKey := []byte("key") + originalVal := []byte("val") + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write key/val + batch := writer.NewBatch() + batch.Set(originalKey, originalVal) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // read key + returnedVal, err := reader.Get(originalKey) + if err != nil { + t.Fatal(err) + } + + // check that it is the expected value + if !reflect.DeepEqual(returnedVal, originalVal) { + t.Fatalf("expected value: %v for '%s', got %v", originalVal, originalKey, returnedVal) + } + + // mutate the returned value with reader still open + for i := range returnedVal { + returnedVal[i] = '1' + } + + // read the key again + returnedVal2, err := reader.Get(originalKey) + if err != nil { + t.Fatal(err) + } + + // check that it is the expected value + if !reflect.DeepEqual(returnedVal2, originalVal) { + t.Fatalf("expected value: %v for '%s', got %v", originalVal, originalKey, returnedVal2) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // mutate the original returned value again + for i := range returnedVal { + returnedVal[i] = '2' + } + + // open another reader + reader, err = s.Reader() + if err != nil { + t.Fatal(err) + } + + // read the key again + returnedVal3, err := reader.Get(originalKey) + if err != nil { + t.Fatal(err) + } + + // check that it is the expected value + if !reflect.DeepEqual(returnedVal3, originalVal) { + t.Fatalf("expected value: %v for '%s', got %v", originalVal, originalKey, returnedVal3) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // close the store + err = s.Close() + if err != nil { + t.Fatal(err) + } + + // finally check that the value we mutated still has what we set it to + for i := range returnedVal { + if returnedVal[i] != '2' { + t.Errorf("expected byte to be '2', got %v", returnedVal[i]) + } + } +} + +func CommonTestWriterOwnsBytes(t *testing.T, s store.KVStore) { + + keyBuffer := make([]byte, 5) + valBuffer := make([]byte, 5) + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write key/val pairs reusing same buffer + batch := writer.NewBatch() + for i := 0; i < 10; i++ { + keyBuffer[0] = 'k' + keyBuffer[1] = 'e' + keyBuffer[2] = 'y' + keyBuffer[3] = '-' + keyBuffer[4] = byte('0' + i) + valBuffer[0] = 'v' + valBuffer[1] = 'a' + valBuffer[2] = 'l' + valBuffer[3] = '-' + valBuffer[4] = byte('0' + i) + batch.Set(keyBuffer, valBuffer) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // check that we can read back what we expect + allks := make([][]byte, 0) + allvs := make([][]byte, 0) + iter := reader.RangeIterator(nil, nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + allks = append(allks, copyk) + v := iter.Key() + copyv := make([]byte, len(v)) + copy(copyv, v) + allvs = append(allvs, copyv) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + if len(allks) != 10 { + t.Fatalf("expected 10 k/v pairs, got %d", len(allks)) + } + for i, key := range allks { + val := allvs[i] + if !bytes.HasSuffix(key, []byte{byte('0' + i)}) { + t.Errorf("expected key %v to end in %d", key, []byte{byte('0' + i)}) + } + if !bytes.HasSuffix(val, []byte{byte('0' + i)}) { + t.Errorf("expected val %v to end in %d", val, []byte{byte('0' + i)}) + } + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // open a writer + writer, err = s.Writer() + if err != nil { + t.Fatal(err) + } + + // now delete using same approach + batch = writer.NewBatch() + for i := 0; i < 10; i++ { + keyBuffer[0] = 'k' + keyBuffer[1] = 'e' + keyBuffer[2] = 'y' + keyBuffer[3] = '-' + keyBuffer[4] = byte('0' + i) + batch.Delete(keyBuffer) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err = s.Reader() + if err != nil { + t.Fatal(err) + } + + // check that we can read back what we expect + allks = make([][]byte, 0) + iter = reader.RangeIterator(nil, nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + allks = append(allks, copyk) + v := iter.Key() + copyv := make([]byte, len(v)) + copy(copyv, v) + allvs = append(allvs, copyv) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + if len(allks) != 0 { + t.Fatalf("expected 0 k/v pairs remaining, got %d", len(allks)) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // close the store + err = s.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/index/store/test/crud.go b/index/store/test/crud.go new file mode 100644 index 00000000..ce4c37e6 --- /dev/null +++ b/index/store/test/crud.go @@ -0,0 +1,98 @@ +package test + +import ( + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// basic crud tests + +func CommonTestKVCrud(t *testing.T, s store.KVStore) { + + writer, err := s.Writer() + if err != nil { + t.Error(err) + } + + batch := writer.NewBatch() + batch.Set([]byte("a"), []byte("val-a")) + batch.Set([]byte("z"), []byte("val-z")) + batch.Delete([]byte("z")) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + batch.Reset() + + batch.Set([]byte("b"), []byte("val-b")) + batch.Set([]byte("c"), []byte("val-c")) + batch.Set([]byte("d"), []byte("val-d")) + batch.Set([]byte("e"), []byte("val-e")) + batch.Set([]byte("f"), []byte("val-f")) + batch.Set([]byte("g"), []byte("val-g")) + batch.Set([]byte("h"), []byte("val-h")) + batch.Set([]byte("i"), []byte("val-i")) + batch.Set([]byte("j"), []byte("val-j")) + + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + reader, err := s.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := reader.Close() + if err != nil { + t.Fatal(err) + } + }() + it := reader.RangeIterator([]byte("b"), nil) + key, val, valid := it.Current() + if !valid { + t.Fatalf("valid false, expected true") + } + if string(key) != "b" { + t.Fatalf("expected key b, got %s", key) + } + if string(val) != "val-b" { + t.Fatalf("expected value val-b, got %s", val) + } + + it.Next() + key, val, valid = it.Current() + if !valid { + t.Fatalf("valid false, expected true") + } + if string(key) != "c" { + t.Fatalf("expected key c, got %s", key) + } + if string(val) != "val-c" { + t.Fatalf("expected value val-c, got %s", val) + } + + it.Seek([]byte("i")) + key, val, valid = it.Current() + if !valid { + t.Fatalf("valid false, expected true") + } + if string(key) != "i" { + t.Fatalf("expected key i, got %s", key) + } + if string(val) != "val-i" { + t.Fatalf("expected value val-i, got %s", val) + } + + err = it.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/index/store/test/isolation.go b/index/store/test/isolation.go new file mode 100644 index 00000000..9791bab5 --- /dev/null +++ b/index/store/test/isolation.go @@ -0,0 +1,177 @@ +package test + +import ( + "fmt" + "reflect" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// tests focused on verfiying that readers are isolated from writers + +func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { + // insert a kv pair + writer, err := s.Writer() + if err != nil { + t.Error(err) + } + + // ************************************************** + // this is a hack only required for BoltDB + // however its harmless so to keep the tests + // the same everywhere, we include it here + // + // this is a hack to try to pre-emptively overflow + // boltdb writes *MAY* block a long reader + // in particular, if the write requires additional + // allocation, it must acquire the same lock as + // the reader, thus cannot continue until that + // reader is closed. + // in general this is not a problem for bleve + // (though it may affect performance in some cases) + // but it is a problem for this test which attemps + // to easily verify that readers are isolated + // this hack writes enough initial data such that + // the subsequent writes do not require additional + // space + hackSize := 1000 + batch := writer.NewBatch() + for i := 0; i < hackSize; i++ { + k := fmt.Sprintf("x%d", i) + batch.Set([]byte(k), []byte("filler")) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + // ************************************************** + + batch = writer.NewBatch() + batch.Set([]byte("a"), []byte("val-a")) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // create an isolated reader + reader, err := s.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := reader.Close() + if err != nil { + t.Fatal(err) + } + }() + + // verify that we see the value already inserted + val, err := reader.Get([]byte("a")) + if err != nil { + t.Error(err) + } + if !reflect.DeepEqual(val, []byte("val-a")) { + t.Errorf("expected val-a, got nil") + } + + // verify that an iterator sees it + count := 0 + it := reader.RangeIterator([]byte{0}, []byte{'x'}) + defer func() { + err := it.Close() + if err != nil { + t.Fatal(err) + } + }() + for it.Valid() { + it.Next() + count++ + } + if count != 1 { + t.Errorf("expected iterator to see 1, saw %d", count) + } + + // add something after the reader was created + writer, err = s.Writer() + if err != nil { + t.Error(err) + } + batch = writer.NewBatch() + batch.Set([]byte("b"), []byte("val-b")) + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // ensure that a newer reader sees it + newReader, err := s.Reader() + if err != nil { + t.Error(err) + } + defer func() { + err := newReader.Close() + if err != nil { + t.Fatal(err) + } + }() + val, err = newReader.Get([]byte("b")) + if err != nil { + t.Error(err) + } + if !reflect.DeepEqual(val, []byte("val-b")) { + t.Errorf("expected val-b, got nil") + } + + // ensure that the direct iterator sees it + count = 0 + it = newReader.RangeIterator([]byte{0}, []byte{'x'}) + defer func() { + err := it.Close() + if err != nil { + t.Fatal(err) + } + }() + for it.Valid() { + it.Next() + count++ + } + if count != 2 { + t.Errorf("expected iterator to see 2, saw %d", count) + } + + // but that the isolated reader does not + val, err = reader.Get([]byte("b")) + if err != nil { + t.Error(err) + } + if val != nil { + t.Errorf("expected nil, got %v", val) + } + + // and ensure that the iterator on the isolated reader also does not + count = 0 + it = reader.RangeIterator([]byte{0}, []byte{'x'}) + defer func() { + err := it.Close() + if err != nil { + t.Fatal(err) + } + }() + for it.Valid() { + it.Next() + count++ + } + if count != 1 { + t.Errorf("expected iterator to see 1, saw %d", count) + } + +} diff --git a/index/store/test/iterator.go b/index/store/test/iterator.go new file mode 100644 index 00000000..59217450 --- /dev/null +++ b/index/store/test/iterator.go @@ -0,0 +1,289 @@ +package test + +import ( + "bytes" + "reflect" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// tests around the correct behavior of iterators + +func CommonTestPrefixIterator(t *testing.T, s store.KVStore) { + + data := []struct { + key []byte + val []byte + }{ + {[]byte("apple"), []byte("val")}, + {[]byte("cat1"), []byte("val")}, + {[]byte("cat2"), []byte("val")}, + {[]byte("cat3"), []byte("val")}, + {[]byte("dog1"), []byte("val")}, + {[]byte("dog2"), []byte("val")}, + {[]byte("dog4"), []byte("val")}, + {[]byte("elephant"), []byte("val")}, + } + + expectedCats := [][]byte{ + []byte("cat1"), + []byte("cat2"), + []byte("cat3"), + } + + expectedDogs := [][]byte{ + []byte("dog1"), + // we seek to "dog3" and ensure it skips over "dog2" + // but still finds "dog4" even though there was no "dog3" + []byte("dog4"), + } + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write the data + batch := writer.NewBatch() + for _, row := range data { + batch.Set(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // get a prefix reader + cats := make([][]byte, 0) + iter := reader.PrefixIterator([]byte("cat")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + cats = append(cats, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found all the cats + if !reflect.DeepEqual(cats, expectedCats) { + t.Fatalf("expected cats %v, got %v", expectedCats, cats) + } + + // get a prefix reader + dogs := make([][]byte, 0) + iter = reader.PrefixIterator([]byte("dog")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + dogs = append(dogs, copyk) + if len(dogs) < 2 { + iter.Seek([]byte("dog3")) + } else { + iter.Next() + } + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found the expected dogs + if !reflect.DeepEqual(dogs, expectedDogs) { + t.Fatalf("expected dogs %v, got %v", expectedDogs, dogs) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // close the store + err = s.Close() + if err != nil { + t.Fatal(err) + } +} + +func CommonTestRangeIterator(t *testing.T, s store.KVStore) { + + data := []struct { + key []byte + val []byte + }{ + {[]byte("a1"), []byte("val")}, + {[]byte("b1"), []byte("val")}, + {[]byte("b2"), []byte("val")}, + {[]byte("b3"), []byte("val")}, + {[]byte("c1"), []byte("val")}, + {[]byte("c2"), []byte("val")}, + {[]byte("c4"), []byte("val")}, + {[]byte("d1"), []byte("val")}, + } + + expectedAll := make([][]byte, 0) + expectedBToC := make([][]byte, 0) + expectedCToDSeek3 := make([][]byte, 0) + expectedCToEnd := make([][]byte, 0) + for _, row := range data { + expectedAll = append(expectedAll, row.key) + if bytes.HasPrefix(row.key, []byte("b")) { + expectedBToC = append(expectedBToC, row.key) + } + if bytes.HasPrefix(row.key, []byte("c")) && !bytes.HasSuffix(row.key, []byte("2")) { + expectedCToDSeek3 = append(expectedCToDSeek3, row.key) + } + if bytes.Compare(row.key, []byte("c")) > 0 { + expectedCToEnd = append(expectedCToEnd, row.key) + } + } + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write the data + batch := writer.NewBatch() + for _, row := range data { + batch.Set(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // get a range iterator (all) + all := make([][]byte, 0) + iter := reader.RangeIterator(nil, nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + all = append(all, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found all + if !reflect.DeepEqual(all, expectedAll) { + t.Fatalf("expected all %v, got %v", expectedAll, all) + } + + // get range iterator from b - c + bToC := make([][]byte, 0) + iter = reader.RangeIterator([]byte("b"), []byte("c")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + bToC = append(bToC, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found b-c + if !reflect.DeepEqual(bToC, expectedBToC) { + t.Fatalf("expected b-c %v, got %v", expectedBToC, bToC) + } + + // get range iterator from c - d, but seek to 'c3' + cToDSeek3 := make([][]byte, 0) + iter = reader.RangeIterator([]byte("c"), []byte("d")) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + cToDSeek3 = append(cToDSeek3, copyk) + if len(cToDSeek3) < 2 { + iter.Seek([]byte("c3")) + } else { + iter.Next() + } + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found c-d with seek to c3 + if !reflect.DeepEqual(cToDSeek3, expectedCToDSeek3) { + t.Fatalf("expected b-c %v, got %v", expectedCToDSeek3, cToDSeek3) + } + + // get range iterator from c to the end + cToEnd := make([][]byte, 0) + iter = reader.RangeIterator([]byte("c"), nil) + for iter.Valid() { + // if we want to keep bytes from iteration we must copy + k := iter.Key() + copyk := make([]byte, len(k)) + copy(copyk, k) + cToEnd = append(cToEnd, copyk) + iter.Next() + } + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + // check that we found c to end + if !reflect.DeepEqual(cToEnd, expectedCToEnd) { + t.Fatalf("expected b-c %v, got %v", expectedCToEnd, cToEnd) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + + // close the store + err = s.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/index/store/test/merge.go b/index/store/test/merge.go new file mode 100644 index 00000000..0cd661f8 --- /dev/null +++ b/index/store/test/merge.go @@ -0,0 +1,108 @@ +package test + +import ( + "encoding/binary" + "testing" + + "github.com/blevesearch/bleve/index/store" +) + +// test merge behavior + +func encodeUint64(in uint64) []byte { + rv := make([]byte, 8) + binary.LittleEndian.PutUint64(rv, in) + return rv +} + +func CommonTestMerge(t *testing.T, s store.KVStore) { + + testKey := []byte("k1") + + data := []struct { + key []byte + val []byte + }{ + {testKey, encodeUint64(1)}, + {testKey, encodeUint64(1)}, + } + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write the data + batch := writer.NewBatch() + for _, row := range data { + batch.Merge(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // read key + returnedVal, err := reader.Get(testKey) + if err != nil { + t.Fatal(err) + } + + // check the value + mergedval := binary.LittleEndian.Uint64(returnedVal) + if mergedval != 2 { + t.Errorf("expected 2, got %d", mergedval) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } + +} + +// a test merge operator which is just an incrementing counter of uint64 +type TestMergeCounter struct{} + +func (mc *TestMergeCounter) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) { + var newval uint64 + if len(existingValue) > 0 { + newval = binary.LittleEndian.Uint64(existingValue) + } + + // now process operands + for _, operand := range operands { + next := binary.LittleEndian.Uint64(operand) + newval += next + } + + rv := make([]byte, 8) + binary.LittleEndian.PutUint64(rv, newval) + return rv, true +} + +func (mc *TestMergeCounter) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) { + left := binary.LittleEndian.Uint64(leftOperand) + right := binary.LittleEndian.Uint64(rightOperand) + rv := make([]byte, 8) + binary.LittleEndian.PutUint64(rv, left+right) + return rv, true +} + +func (mc *TestMergeCounter) Name() string { + return "test_merge_counter" +} diff --git a/index/upside_down/analysis_test.go b/index/upside_down/analysis_test.go index 5c5cd78f..6b46d2d3 100644 --- a/index/upside_down/analysis_test.go +++ b/index/upside_down/analysis_test.go @@ -18,12 +18,11 @@ func BenchmarkAnalyze(b *testing.B) { b.Fatal(err) } - s, err := null.New() + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) d := document.NewDocument("1") f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer) diff --git a/index/upside_down/benchmark_boltdb_test.go b/index/upside_down/benchmark_boltdb_test.go index 549f8d7b..f6ba4f73 100644 --- a/index/upside_down/benchmark_boltdb_test.go +++ b/index/upside_down/benchmark_boltdb_test.go @@ -10,68 +10,61 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/boltdb" ) -func CreateBoltDB() (store.KVStore, error) { - s := boltdb.New("test", "bleve") - return s, nil -} - -func DestroyBoltDB() error { - return os.RemoveAll("test") +var boltTestConfig = map[string]interface{}{ + "path": "test", } func BenchmarkBoltDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateBoltDB, DestroyBoltDB, 1) + CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 1) } func BenchmarkBoltDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateBoltDB, DestroyBoltDB, 2) + CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 2) } func BenchmarkBoltDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateBoltDB, DestroyBoltDB, 4) + CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 4) } // batches func BenchmarkBoltDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 1, 10) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 10) } func BenchmarkBoltDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 2, 10) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 10) } func BenchmarkBoltDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 4, 10) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 10) } func BenchmarkBoltDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 1, 100) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 100) } func BenchmarkBoltDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 2, 100) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 100) } func BenchmarkBoltDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 4, 100) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 100) } func BenchmarkBoltBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 1, 1000) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 1000) } func BenchmarkBoltBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 2, 1000) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 1000) } func BenchmarkBoltBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateBoltDB, DestroyBoltDB, 4, 1000) + CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_common_test.go b/index/upside_down/benchmark_common_test.go index d04095dd..363ff443 100644 --- a/index/upside_down/benchmark_common_test.go +++ b/index/upside_down/benchmark_common_test.go @@ -10,13 +10,13 @@ package upside_down import ( + "os" "strconv" "testing" _ "github.com/blevesearch/bleve/analysis/analyzers/standard_analyzer" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/registry" ) @@ -33,10 +33,13 @@ var benchmarkDocBodies = []string{ "The expansion ratio of a liquefied and cryogenic substance is the volume of a given amount of that substance in liquid form compared to the volume of the same amount of substance in gaseous form, at room temperature and normal atmospheric pressure.", } -type KVStoreCreate func() (store.KVStore, error) type KVStoreDestroy func() error -func CommonBenchmarkIndex(b *testing.B, create KVStoreCreate, destroy KVStoreDestroy, analysisWorkers int) { +func DestroyTest() error { + return os.RemoveAll("test") +} + +func CommonBenchmarkIndex(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers int) { cache := registry.NewCache() analyzer, err := cache.AnalyzerNamed("standard") @@ -50,12 +53,11 @@ func CommonBenchmarkIndex(b *testing.B, create KVStoreCreate, destroy KVStoreDes b.ResetTimer() b.StopTimer() for i := 0; i < b.N; i++ { - s, err := create() + analysisQueue := index.NewAnalysisQueue(analysisWorkers) + idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(analysisWorkers) - idx := NewUpsideDownCouch(s, analysisQueue) err = idx.Open() if err != nil { @@ -81,7 +83,7 @@ func CommonBenchmarkIndex(b *testing.B, create KVStoreCreate, destroy KVStoreDes } } -func CommonBenchmarkIndexBatch(b *testing.B, create KVStoreCreate, destroy KVStoreDestroy, analysisWorkers, batchSize int) { +func CommonBenchmarkIndexBatch(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers, batchSize int) { cache := registry.NewCache() analyzer, err := cache.AnalyzerNamed("standard") @@ -93,12 +95,11 @@ func CommonBenchmarkIndexBatch(b *testing.B, create KVStoreCreate, destroy KVSto b.StopTimer() for i := 0; i < b.N; i++ { - s, err := create() + analysisQueue := index.NewAnalysisQueue(analysisWorkers) + idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(analysisWorkers) - idx := NewUpsideDownCouch(s, analysisQueue) err = idx.Open() if err != nil { diff --git a/index/upside_down/benchmark_goleveldb_test.go b/index/upside_down/benchmark_goleveldb_test.go index ba003367..2c0fa2cd 100644 --- a/index/upside_down/benchmark_goleveldb_test.go +++ b/index/upside_down/benchmark_goleveldb_test.go @@ -10,71 +10,62 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/goleveldb" ) var goLevelDBTestOptions = map[string]interface{}{ "create_if_missing": true, -} - -func CreateGoLevelDB() (store.KVStore, error) { - return goleveldb.New("test", goLevelDBTestOptions) -} - -func DestroyGoLevelDB() error { - return os.RemoveAll("test") + "path": "test", } func BenchmarkGoLevelDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoLevelDB, DestroyGoLevelDB, 1) + CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1) } func BenchmarkGoLevelDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoLevelDB, DestroyGoLevelDB, 2) + CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2) } func BenchmarkGoLevelDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoLevelDB, DestroyGoLevelDB, 4) + CommonBenchmarkIndex(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4) } // batches func BenchmarkGoLevelDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 1, 10) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 10) } func BenchmarkGoLevelDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 2, 10) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 10) } func BenchmarkGoLevelDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 4, 10) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 10) } func BenchmarkGoLevelDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 1, 100) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 100) } func BenchmarkGoLevelDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 2, 100) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 100) } func BenchmarkGoLevelDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 4, 100) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 100) } func BenchmarkGoLevelDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 1, 1000) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 1, 1000) } func BenchmarkGoLevelDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 2, 1000) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 2, 1000) } func BenchmarkGoLevelDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoLevelDB, DestroyGoLevelDB, 4, 1000) + CommonBenchmarkIndexBatch(b, goleveldb.Name, goLevelDBTestOptions, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_gtreap_test.go b/index/upside_down/benchmark_gtreap_test.go index 673d830f..2963da62 100644 --- a/index/upside_down/benchmark_gtreap_test.go +++ b/index/upside_down/benchmark_gtreap_test.go @@ -12,64 +12,55 @@ package upside_down import ( "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/gtreap" ) -func CreateGTreap() (store.KVStore, error) { - return gtreap.StoreConstructor(nil) -} - -func DestroyGTreap() error { - return nil -} - func BenchmarkGTreapIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGTreap, DestroyGTreap, 1) + CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 1) } func BenchmarkGTreapIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGTreap, DestroyGTreap, 2) + CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 2) } func BenchmarkGTreapIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGTreap, DestroyGTreap, 4) + CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 4) } // batches func BenchmarkGTreapIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 1, 10) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 10) } func BenchmarkGTreapIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 2, 10) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 10) } func BenchmarkGTreapIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 4, 10) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 10) } func BenchmarkGTreapIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 1, 100) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 100) } func BenchmarkGTreapIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 2, 100) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 100) } func BenchmarkGTreapIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 4, 100) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 100) } func BenchmarkGTreapIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 1, 1000) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 1000) } func BenchmarkGTreapIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 2, 1000) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 1000) } func BenchmarkGTreapIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGTreap, DestroyGTreap, 4, 1000) + CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_inmem_test.go b/index/upside_down/benchmark_inmem_test.go deleted file mode 100644 index ee06bd5b..00000000 --- a/index/upside_down/benchmark_inmem_test.go +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright (c) 2014 Couchbase, Inc. -// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file -// except in compliance with the License. You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// Unless required by applicable law or agreed to in writing, software distributed under the -// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, -// either express or implied. See the License for the specific language governing permissions -// and limitations under the License. - -package upside_down - -import ( - "testing" - - "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/inmem" -) - -func CreateInMem() (store.KVStore, error) { - return inmem.New() -} - -func DestroyInMem() error { - return nil -} - -func BenchmarkInMemIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateInMem, DestroyInMem, 1) -} - -func BenchmarkInMemIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateInMem, DestroyInMem, 2) -} - -func BenchmarkInMemIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateInMem, DestroyInMem, 4) -} - -// batches - -func BenchmarkInMemIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 1, 10) -} - -func BenchmarkInMemIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 2, 10) -} - -func BenchmarkInMemIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 4, 10) -} - -func BenchmarkInMemIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 1, 100) -} - -func BenchmarkInMemIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 2, 100) -} - -func BenchmarkInMemIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 4, 100) -} - -func BenchmarkInMemIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 1, 1000) -} - -func BenchmarkInMemIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 2, 1000) -} - -func BenchmarkInMemIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateInMem, DestroyInMem, 4, 1000) -} diff --git a/index/upside_down/benchmark_null_test.go b/index/upside_down/benchmark_null_test.go index a02813a3..ee0f7c77 100644 --- a/index/upside_down/benchmark_null_test.go +++ b/index/upside_down/benchmark_null_test.go @@ -12,64 +12,55 @@ package upside_down import ( "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/null" ) -func CreateNull() (store.KVStore, error) { - return null.New() -} - -func DestroyNull() error { - return nil -} - func BenchmarkNullIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateNull, DestroyNull, 1) + CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 1) } func BenchmarkNullIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateNull, DestroyNull, 2) + CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 2) } func BenchmarkNullIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateNull, DestroyNull, 4) + CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 4) } // batches func BenchmarkNullIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 1, 10) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 10) } func BenchmarkNullIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 2, 10) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 10) } func BenchmarkNullIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 4, 10) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 10) } func BenchmarkNullIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 1, 100) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 100) } func BenchmarkNullIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 2, 100) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 100) } func BenchmarkNullIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 4, 100) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 100) } func BenchmarkNullIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 1, 1000) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 1000) } func BenchmarkNullIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 2, 1000) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 1000) } func BenchmarkNullIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateNull, DestroyNull, 4, 1000) + CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 1000) } diff --git a/index/upside_down/dump.go b/index/upside_down/dump.go index 05333918..c773c8aa 100644 --- a/index/upside_down/dump.go +++ b/index/upside_down/dump.go @@ -26,7 +26,7 @@ func (udc *UpsideDownCouch) dumpPrefix(kvreader store.KVReader, rv chan interfac if start == nil { start = []byte{0} } - it := kvreader.Iterator(start) + it := kvreader.PrefixIterator(start) defer func() { cerr := it.Close() if cerr != nil { @@ -36,9 +36,28 @@ func (udc *UpsideDownCouch) dumpPrefix(kvreader store.KVReader, rv chan interfac key, val, valid := it.Current() for valid { - if prefix != nil && !bytes.HasPrefix(key, prefix) { - break + row, err := ParseFromKeyValue(key, val) + if err != nil { + rv <- err + return } + rv <- row + + it.Next() + key, val, valid = it.Current() + } +} + +func (udc *UpsideDownCouch) dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) { + it := kvreader.RangeIterator(start, end) + defer func() { + cerr := it.Close() + if cerr != nil { + rv <- cerr + } + }() + key, val, valid := it.Current() + for valid { row, err := ParseFromKeyValue(key, val) if err != nil { @@ -70,7 +89,7 @@ func (udc *UpsideDownCouch) DumpAll() chan interface{} { } }() - udc.dumpPrefix(kvreader, rv, nil) + udc.dumpRange(kvreader, rv, nil, nil) }() return rv } @@ -149,7 +168,7 @@ func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} { // now walk term keys in order and add them as well if len(keys) > 0 { - it := kvreader.Iterator(keys[0]) + it := kvreader.RangeIterator(keys[0], nil) defer func() { cerr := it.Close() if cerr != nil { diff --git a/index/upside_down/dump_test.go b/index/upside_down/dump_test.go index 044ea1c3..277bce51 100644 --- a/index/upside_down/dump_test.go +++ b/index/upside_down/dump_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "testing" "time" @@ -21,18 +20,14 @@ import ( ) func TestDump(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } diff --git a/index/upside_down/field_dict.go b/index/upside_down/field_dict.go index f5feb120..62b56c1c 100644 --- a/index/upside_down/field_dict.go +++ b/index/upside_down/field_dict.go @@ -10,7 +10,6 @@ package upside_down import ( - "bytes" "fmt" "github.com/blevesearch/bleve/index" @@ -20,7 +19,6 @@ import ( type UpsideDownCouchFieldDict struct { indexReader *IndexReader iterator store.KVIterator - endKey []byte field uint16 } @@ -29,16 +27,17 @@ func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTe startKey := NewDictionaryRow(startTerm, field, 0).Key() if endTerm == nil { endTerm = []byte{ByteSeparator} + } else { + endTerm = incrementBytes(endTerm) } endKey := NewDictionaryRow(endTerm, field, 0).Key() - it := indexReader.kvreader.Iterator(startKey) + it := indexReader.kvreader.RangeIterator(startKey, endKey) return &UpsideDownCouchFieldDict{ indexReader: indexReader, iterator: it, field: field, - endKey: endKey, }, nil } @@ -49,11 +48,6 @@ func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { return nil, nil } - // past end term - if bytes.Compare(key, r.endKey) > 0 { - return nil, nil - } - currRow, err := NewDictionaryRowKV(key, val) if err != nil { return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err) @@ -71,16 +65,3 @@ func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { func (r *UpsideDownCouchFieldDict) Close() error { return r.iterator.Close() } - -func incrementBytes(in []byte) []byte { - rv := make([]byte, len(in)) - copy(rv, in) - for i := len(rv) - 1; i >= 0; i-- { - rv[i] = rv[i] + 1 - if rv[i] != 0 { - // didn't overflow, so stop - break - } - } - return rv -} diff --git a/index/upside_down/field_dict_test.go b/index/upside_down/field_dict_test.go index 5ad91f32..1a0864b5 100644 --- a/index/upside_down/field_dict_test.go +++ b/index/upside_down/field_dict_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "reflect" "testing" @@ -20,18 +19,14 @@ import ( ) func TestIndexFieldDict(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index c151033e..4df490e4 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -10,8 +10,6 @@ package upside_down import ( - "bytes" - "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" @@ -64,7 +62,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { doc = document.NewDocument(id) storedRow := NewStoredRow(id, 0, []uint64{}, 'x', nil) storedRowScanPrefix := storedRow.ScanPrefixForDoc() - it := i.kvreader.Iterator(storedRowScanPrefix) + it := i.kvreader.PrefixIterator(storedRowScanPrefix) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr @@ -72,14 +70,8 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) { }() key, val, valid := it.Current() for valid { - if !bytes.HasPrefix(key, storedRowScanPrefix) { - break - } - safeVal := val - if !i.kvreader.BytesSafeAfterClose() { - safeVal = make([]byte, len(val)) - copy(safeVal, val) - } + safeVal := make([]byte, len(val)) + copy(safeVal, val) var row *StoredRow row, err = NewStoredRowKV(key, safeVal) if err != nil { @@ -120,7 +112,7 @@ func (i *IndexReader) DocumentFieldTerms(id string) (index.FieldTerms, error) { func (i *IndexReader) Fields() (fields []string, err error) { fields = make([]string, 0) - it := i.kvreader.Iterator([]byte{'f'}) + it := i.kvreader.PrefixIterator([]byte{'f'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr @@ -128,9 +120,6 @@ func (i *IndexReader) Fields() (fields []string, err error) { }() key, val, valid := it.Current() for valid { - if !bytes.HasPrefix(key, []byte{'f'}) { - break - } var row UpsideDownCouchRow row, err = ParseFromKeyValue(key, val) if err != nil { @@ -162,3 +151,16 @@ func (i *IndexReader) DocCount() uint64 { func (i *IndexReader) Close() error { return i.kvreader.Close() } + +func incrementBytes(in []byte) []byte { + rv := make([]byte, len(in)) + copy(rv, in) + for i := len(rv) - 1; i >= 0; i-- { + rv[i] = rv[i] + 1 + if rv[i] != 0 { + // didn't overflow, so stop + break + } + } + return rv +} diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index cf511186..12879c04 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -10,19 +10,16 @@ package upside_down import ( - "bytes" - "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" ) type UpsideDownCouchTermFieldReader struct { - indexReader *IndexReader - iterator store.KVIterator - count uint64 - term []byte - field uint16 - readerPrefix []byte + indexReader *IndexReader + iterator store.KVIterator + count uint64 + term []byte + field uint16 } func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) { @@ -45,25 +42,14 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi } tfr := NewTermFrequencyRow(term, field, "", 0, 0) - readerPrefix := tfr.Key() - - var it store.KVIterator - switch kvreader := indexReader.kvreader.(type) { - case store.RangeIterable: - etfr := NewTermFrequencyRow(term[:len(term)-1], field, "", 0, 0) - nextTermPrefix := etfr.Key() - it = kvreader.RangeIterator(readerPrefix, nextTermPrefix) - default: - it = kvreader.Iterator(readerPrefix) - } + it := indexReader.kvreader.PrefixIterator(tfr.Key()) return &UpsideDownCouchTermFieldReader{ - indexReader: indexReader, - iterator: it, - count: dictionaryRow.count, - term: term, - field: field, - readerPrefix: readerPrefix, + indexReader: indexReader, + iterator: it, + count: dictionaryRow.count, + term: term, + field: field, }, nil } @@ -75,10 +61,6 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { - if !bytes.HasPrefix(key, r.readerPrefix) { - // end of the line - return nil, nil - } tfr, err := NewTermFrequencyRowKV(key, val) if err != nil { return nil, err @@ -101,10 +83,6 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermField r.iterator.Seek(tfr.Key()) key, val, valid := r.iterator.Current() if valid { - if !bytes.HasPrefix(key, r.readerPrefix) { - // end of the line - return nil, nil - } tfr, err := NewTermFrequencyRowKV(key, val) if err != nil { return nil, err @@ -131,8 +109,6 @@ func (r *UpsideDownCouchTermFieldReader) Close() error { type UpsideDownCouchDocIDReader struct { indexReader *IndexReader iterator store.KVIterator - start string - end string } func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) { @@ -143,24 +119,18 @@ func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) end = string([]byte{0xff}) } bisr := NewBackIndexRow(start, nil, nil) - it := indexReader.kvreader.Iterator(bisr.Key()) + bier := NewBackIndexRow(end, nil, nil) + it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key()) return &UpsideDownCouchDocIDReader{ indexReader: indexReader, iterator: it, - start: start, - end: end, }, nil } func (r *UpsideDownCouchDocIDReader) Next() (string, error) { key, val, valid := r.iterator.Current() if valid { - bier := NewBackIndexRow(r.end, nil, nil) - if bytes.Compare(key, bier.Key()) > 0 { - // end of the line - return "", nil - } br, err := NewBackIndexRowKV(key, val) if err != nil { return "", err @@ -176,11 +146,6 @@ func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) { r.iterator.Seek(bir.Key()) key, val, valid := r.iterator.Current() if valid { - bier := NewBackIndexRow(r.end, nil, nil) - if bytes.Compare(key, bier.Key()) > 0 { - // end of the line - return "", nil - } br, err := NewBackIndexRowKV(key, val) if err != nil { return "", err diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index 9f37e106..456e2039 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "reflect" "testing" @@ -20,18 +19,14 @@ import ( ) func TestIndexReader(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -194,18 +189,14 @@ func TestIndexReader(t *testing.T) { } func TestIndexDocIdReader(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - s := boltdb.New("test", "bleve") - s.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index d2ad347b..674d2c58 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -10,7 +10,6 @@ package upside_down import ( - "bytes" "encoding/json" "fmt" "math" @@ -40,6 +39,8 @@ var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Ve type UpsideDownCouch struct { version uint8 path string + storeName string + storeConfig map[string]interface{} store store.KVStore fieldCache *index.FieldCache analysisQueue *index.AnalysisQueue @@ -50,14 +51,15 @@ type UpsideDownCouch struct { docCount uint64 } -func NewUpsideDownCouch(s store.KVStore, analysisQueue *index.AnalysisQueue) *UpsideDownCouch { +func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) { return &UpsideDownCouch{ version: Version, fieldCache: index.NewFieldCache(), - store: s, + storeName: storeName, + storeConfig: storeConfig, analysisQueue: analysisQueue, stats: &indexStat{}, - } + }, nil } func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) { @@ -67,27 +69,21 @@ func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) { // version marker rows = append(rows, NewVersionRow(udc.version)) - return udc.batchRows(kvwriter, nil, rows, nil) + err = udc.batchRows(kvwriter, nil, rows, nil) + return } func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { - keyPrefix := []byte{'f'} - it := kvreader.Iterator(keyPrefix) + it := kvreader.PrefixIterator([]byte{'f'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() - it.Seek(keyPrefix) key, val, valid := it.Current() for valid { - - // stop when - if !bytes.HasPrefix(key, keyPrefix) { - break - } var fieldRow *FieldRow fieldRow, err = NewFieldRowKV(key, val) if err != nil { @@ -99,13 +95,12 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { key, val, valid = it.Current() } - keyPrefix = []byte{'v'} - val, err = kvreader.Get(keyPrefix) + val, err = kvreader.Get([]byte{'v'}) if err != nil { return } var vr *VersionRow - vr, err = NewVersionRowKV(keyPrefix, val) + vr, err = NewVersionRowKV([]byte{'v'}, val) if err != nil { return } @@ -150,7 +145,7 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow } // write out the batch - err = wb.Execute() + err = writer.ExecuteBatch(wb) if err != nil { return } @@ -164,68 +159,84 @@ func (udc *UpsideDownCouch) DocCount() (uint64, error) { } func (udc *UpsideDownCouch) Open() (err error) { - // install the merge operator - udc.store.SetMergeOperator(&mergeOperator) + // open the kv store + storeConstructor := registry.KVStoreConstructorByName(udc.storeName) + if storeConstructor == nil { + err = index.ErrorUnknownStorageType + return + } - // now open the kv store - err = udc.store.Open() + // now open the store + udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig) if err != nil { return } - // start a writer for the open process - var kvwriter store.KVWriter - kvwriter, err = udc.store.Writer() + // start a reader to look at the index + var kvreader store.KVReader + kvreader, err = udc.store.Reader() if err != nil { return } - defer func() { - if cerr := kvwriter.Close(); err == nil && cerr != nil { - err = cerr - } - }() var value []byte - value, err = kvwriter.Get(VersionKey) + value, err = kvreader.Get(VersionKey) if err != nil { + _ = kvreader.Close() return } - // init new index OR load schema - if value == nil { - err = udc.init(kvwriter) + if value != nil { + err = udc.loadSchema(kvreader) if err != nil { + _ = kvreader.Close() return } + + // set doc count + udc.m.Lock() + udc.docCount, err = udc.countDocs(kvreader) + udc.m.Unlock() + + err = kvreader.Close() } else { - err = udc.loadSchema(kvwriter) + // new index, close the reader and open writer to init + err = kvreader.Close() if err != nil { return } + + var kvwriter store.KVWriter + kvwriter, err = udc.store.Writer() + if err != nil { + return + } + defer func() { + if cerr := kvwriter.Close(); err == nil && cerr != nil { + err = cerr + } + }() + + // init th eindex + err = udc.init(kvwriter) } - // set doc count - udc.m.Lock() - udc.docCount, err = udc.countDocs(kvwriter) - udc.m.Unlock() + return } func (udc *UpsideDownCouch) countDocs(kvreader store.KVReader) (count uint64, err error) { - it := kvreader.Iterator([]byte{'b'}) + it := kvreader.PrefixIterator([]byte{'b'}) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr } }() - key, _, valid := it.Current() + _, _, valid := it.Current() for valid { - if !bytes.HasPrefix(key, []byte{'b'}) { - break - } count++ it.Next() - key, _, valid = it.Current() + _, _, valid = it.Current() } return @@ -242,7 +253,7 @@ func (udc *UpsideDownCouch) rowCount() (count uint64, err error) { err = cerr } }() - it := kvreader.Iterator([]byte{0}) + it := kvreader.RangeIterator(nil, nil) defer func() { if cerr := it.Close(); err == nil && cerr != nil { err = cerr @@ -278,6 +289,28 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) + // open a reader for backindex lookup + var kvreader store.KVReader + kvreader, err = udc.store.Reader() + if err != nil { + return + } + + // first we lookup the backindex row for the doc id if it exists + // lookup the back index row + var backIndexRow *BackIndexRow + backIndexRow, err = udc.backIndexRowForDoc(kvreader, doc.ID) + if err != nil { + _ = kvreader.Close() + atomic.AddUint64(&udc.stats.errors, 1) + return + } + + err = kvreader.Close() + if err != nil { + return + } + // start a writer for this update indexStart := time.Now() var kvwriter store.KVWriter @@ -291,15 +324,6 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { } }() - // first we lookup the backindex row for the doc id if it exists - // lookup the back index row - var backIndexRow *BackIndexRow - backIndexRow, err = udc.backIndexRowForDoc(kvwriter, doc.ID) - if err != nil { - atomic.AddUint64(&udc.stats.errors, 1) - return - } - // prepare a list of rows addRows := make([]UpsideDownCouchRow, 0) updateRows := make([]UpsideDownCouchRow, 0) @@ -433,6 +457,34 @@ func (udc *UpsideDownCouch) indexField(docID string, field document.Field, field func (udc *UpsideDownCouch) Delete(id string) (err error) { indexStart := time.Now() + + // open a reader for backindex lookup + var kvreader store.KVReader + kvreader, err = udc.store.Reader() + if err != nil { + return + } + + // first we lookup the backindex row for the doc id if it exists + // lookup the back index row + var backIndexRow *BackIndexRow + backIndexRow, err = udc.backIndexRowForDoc(kvreader, id) + if err != nil { + _ = kvreader.Close() + atomic.AddUint64(&udc.stats.errors, 1) + return + } + + err = kvreader.Close() + if err != nil { + return + } + + if backIndexRow == nil { + atomic.AddUint64(&udc.stats.deletes, 1) + return + } + // start a writer for this delete var kvwriter store.KVWriter kvwriter, err = udc.store.Writer() @@ -445,18 +497,6 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) { } }() - // lookup the back index row - var backIndexRow *BackIndexRow - backIndexRow, err = udc.backIndexRowForDoc(kvwriter, id) - if err != nil { - atomic.AddUint64(&udc.stats.errors, 1) - return - } - if backIndexRow == nil { - atomic.AddUint64(&udc.stats.deletes, 1) - return - } - deleteRows := make([]UpsideDownCouchRow, 0) deleteRows = udc.deleteSingle(id, backIndexRow, deleteRows) @@ -636,18 +676,31 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) indexStart := time.Now() - // start a writer for this batch - var kvwriter store.KVWriter - kvwriter, err = udc.store.Writer() + + // open a reader for backindex lookup + var kvreader store.KVReader + kvreader, err = udc.store.Reader() if err != nil { return } // first lookup all the back index rows var backIndexRows map[string]*BackIndexRow - backIndexRows, err = udc.backIndexRowsForBatch(kvwriter, batch) + backIndexRows, err = udc.backIndexRowsForBatch(kvreader, batch) + if err != nil { + _ = kvreader.Close() + return + } + + err = kvreader.Close() + if err != nil { + return + } + + // start a writer for this batch + var kvwriter store.KVWriter + kvwriter, err = udc.store.Writer() if err != nil { - _ = kvwriter.Close() return } @@ -720,7 +773,11 @@ func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) { err = cerr } }() - return writer.Set(internalRow.Key(), internalRow.Value()) + + batch := writer.NewBatch() + batch.Set(internalRow.Key(), internalRow.Value()) + + return writer.ExecuteBatch(batch) } func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) { @@ -735,7 +792,10 @@ func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) { err = cerr } }() - return writer.Delete(internalRow.Key()) + + batch := writer.NewBatch() + batch.Delete(internalRow.Key()) + return writer.ExecuteBatch(batch) } func (udc *UpsideDownCouch) Reader() (index.IndexReader, error) { @@ -764,10 +824,6 @@ func (udc *UpsideDownCouch) fieldIndexOrNewRow(name string) (uint16, *FieldRow) return index, nil } -func IndexTypeConstructor(store store.KVStore, analysisQueue *index.AnalysisQueue) (index.Index, error) { - return NewUpsideDownCouch(store, analysisQueue), nil -} - func init() { - registry.RegisterIndexType(Name, IndexTypeConstructor) + registry.RegisterIndexType(Name, NewUpsideDownCouch) } diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index a7d64a1e..38d7f496 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -10,7 +10,6 @@ package upside_down import ( - "os" "reflect" "regexp" "strconv" @@ -32,18 +31,14 @@ var testAnalyzer = &analysis.Analyzer{ } func TestIndexOpenReopen(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -59,7 +54,7 @@ func TestIndexOpenReopen(t *testing.T) { // opening the database should have inserted a version expectedLength := uint64(1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -73,9 +68,10 @@ func TestIndexOpenReopen(t *testing.T) { t.Fatal(err) } - store = boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) - idx = NewUpsideDownCouch(store, analysisQueue) + idx, err = NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) @@ -89,18 +85,14 @@ func TestIndexOpenReopen(t *testing.T) { } func TestIndexInsert(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -138,7 +130,7 @@ func TestIndexInsert(t *testing.T) { // should have 4 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the term count, and 1 for the back index entry) expectedLength := uint64(1 + 1 + 1 + 1 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -148,18 +140,14 @@ func TestIndexInsert(t *testing.T) { } func TestIndexInsertThenDelete(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -233,7 +221,7 @@ func TestIndexInsertThenDelete(t *testing.T) { // should have 2 rows (1 for version, 1 for schema field, 1 for dictionary row garbage) expectedLength := uint64(1 + 1 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -243,18 +231,14 @@ func TestIndexInsertThenDelete(t *testing.T) { } func TestIndexInsertThenUpdate(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -282,7 +266,7 @@ func TestIndexInsertThenUpdate(t *testing.T) { // should have 2 rows (1 for version, 1 for schema field, and 2 for the two term, and 2 for the term counts, and 1 for the back index entry) expectedLength := uint64(1 + 1 + 2 + 2 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -300,7 +284,7 @@ func TestIndexInsertThenUpdate(t *testing.T) { // should have 2 rows (1 for version, 1 for schema field, and 1 for the remaining term, and 2 for the term diciontary, and 1 for the back index entry) expectedLength = uint64(1 + 1 + 1 + 2 + 1) - rowCount, err = idx.rowCount() + rowCount, err = idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -310,18 +294,14 @@ func TestIndexInsertThenUpdate(t *testing.T) { } func TestIndexInsertMultiple(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -346,7 +326,7 @@ func TestIndexInsertMultiple(t *testing.T) { // should have 4 rows (1 for version, 1 for schema field, and 2 for single term, and 1 for the term count, and 2 for the back index entries) expectedLength := uint64(1 + 1 + 2 + 1 + 2) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -360,9 +340,10 @@ func TestIndexInsertMultiple(t *testing.T) { t.Fatal(err) } - store = boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) - idx = NewUpsideDownCouch(store, analysisQueue) + idx, err = NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) @@ -392,18 +373,14 @@ func TestIndexInsertMultiple(t *testing.T) { } func TestIndexInsertWithStore(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -441,7 +418,7 @@ func TestIndexInsertWithStore(t *testing.T) { // should have 6 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the stored field and 1 for the term count, and 1 for the back index entry) expectedLength := uint64(1 + 1 + 1 + 1 + 1 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -478,18 +455,14 @@ func TestIndexInsertWithStore(t *testing.T) { } func TestIndexInternalCRUD(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -571,18 +544,14 @@ func TestIndexInternalCRUD(t *testing.T) { } func TestIndexBatch(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -668,18 +637,14 @@ func TestIndexBatch(t *testing.T) { } func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -733,7 +698,7 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { // 16 for date term counts // 1 for the back index entry expectedLength := uint64(1 + 3 + 1 + (64 / document.DefaultPrecisionStep) + (64 / document.DefaultPrecisionStep) + 3 + 1 + (64 / document.DefaultPrecisionStep) + (64 / document.DefaultPrecisionStep) + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -860,18 +825,14 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { } func TestIndexInsertFields(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -919,18 +880,14 @@ func TestIndexInsertFields(t *testing.T) { } func TestIndexUpdateComposites(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -958,7 +915,7 @@ func TestIndexUpdateComposites(t *testing.T) { // 4 for the text term count // 1 for the back index entry expectedLength := uint64(1 + 3 + 4 + 2 + 4 + 1) - rowCount, err := idx.rowCount() + rowCount, err := idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -1005,7 +962,7 @@ func TestIndexUpdateComposites(t *testing.T) { // should have the same row count as before, plus 4 term dictionary garbage rows expectedLength += 4 - rowCount, err = idx.rowCount() + rowCount, err = idx.(*UpsideDownCouch).rowCount() if err != nil { t.Error(err) } @@ -1015,18 +972,14 @@ func TestIndexUpdateComposites(t *testing.T) { } func TestIndexFieldsMisc(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -1045,15 +998,15 @@ func TestIndexFieldsMisc(t *testing.T) { t.Errorf("Error updating index: %v", err) } - fieldName1 := idx.fieldCache.FieldIndexed(0) + fieldName1 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(0) if fieldName1 != "name" { t.Errorf("expected field named 'name', got '%s'", fieldName1) } - fieldName2 := idx.fieldCache.FieldIndexed(1) + fieldName2 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(1) if fieldName2 != "title" { t.Errorf("expected field named 'title', got '%s'", fieldName2) } - fieldName3 := idx.fieldCache.FieldIndexed(2) + fieldName3 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(2) if fieldName3 != "" { t.Errorf("expected field named '', got '%s'", fieldName3) } @@ -1061,18 +1014,14 @@ func TestIndexFieldsMisc(t *testing.T) { } func TestIndexTermReaderCompositeFields(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -1121,18 +1070,14 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { } func TestIndexDocumentFieldTerms(t *testing.T) { - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + defer DestroyTest() - store := boltdb.New("test", "bleve") - store.SetMergeOperator(&mergeOperator) analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(store, analysisQueue) - err := idx.Open() + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() if err != nil { t.Errorf("error opening index: %v", err) } @@ -1183,12 +1128,11 @@ func BenchmarkBatch(b *testing.B) { b.Fatal(err) } - s, err := null.New() + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue) if err != nil { b.Fatal(err) } - analysisQueue := index.NewAnalysisQueue(1) - idx := NewUpsideDownCouch(s, analysisQueue) err = idx.Open() if err != nil { b.Fatal(err) diff --git a/index_impl.go b/index_impl.go index 256a13f6..90a0fb6d 100644 --- a/index_impl.go +++ b/index_impl.go @@ -20,7 +20,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" "github.com/blevesearch/bleve/index/store" - "github.com/blevesearch/bleve/index/store/inmem" + "github.com/blevesearch/bleve/index/store/gtreap" "github.com/blevesearch/bleve/registry" "github.com/blevesearch/bleve/search" "github.com/blevesearch/bleve/search/collectors" @@ -50,28 +50,18 @@ func newMemIndex(indexType string, mapping *IndexMapping) (*indexImpl, error) { rv := indexImpl{ path: "", m: mapping, - meta: newIndexMeta(indexType, inmem.Name, nil), + meta: newIndexMeta(indexType, gtreap.Name, nil), stats: &IndexStat{}, } - storeConstructor := registry.KVStoreConstructorByName(rv.meta.Storage) - if storeConstructor == nil { - return nil, ErrorUnknownStorageType - } - // now open the store - var err error - rv.s, err = storeConstructor(nil) - if err != nil { - return nil, err - } - // open the index indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) if indexTypeConstructor == nil { return nil, ErrorUnknownIndexType } - rv.i, err = indexTypeConstructor(rv.s, Config.analysisQueue) + var err error + rv.i, err = indexTypeConstructor(rv.meta.Storage, nil, Config.analysisQueue) if err != nil { return nil, err } @@ -119,10 +109,6 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore meta: newIndexMeta(indexType, kvstore, kvconfig), stats: &IndexStat{}, } - storeConstructor := registry.KVStoreConstructorByName(rv.meta.Storage) - if storeConstructor == nil { - return nil, ErrorUnknownStorageType - } // at this point there is hope that we can be successful, so save index meta err = rv.meta.Save(path) if err != nil { @@ -132,24 +118,21 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore kvconfig["error_if_exists"] = true kvconfig["path"] = indexStorePath(path) - // now create the store - rv.s, err = storeConstructor(kvconfig) - if err != nil { - return nil, err - } - // open the index indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) if indexTypeConstructor == nil { return nil, ErrorUnknownIndexType } - rv.i, err = indexTypeConstructor(rv.s, Config.analysisQueue) + rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue) if err != nil { return nil, err } err = rv.i.Open() if err != nil { + if err == index.ErrorUnknownStorageType { + return nil, ErrorUnknownStorageType + } return nil, err } rv.stats.indexStat = rv.i.Stats() @@ -172,7 +155,6 @@ func newIndexUsing(path string, mapping *IndexMapping, indexType string, kvstore } func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) { - rv = &indexImpl{ path: path, stats: &IndexStat{}, @@ -183,11 +165,6 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde return nil, err } - storeConstructor := registry.KVStoreConstructorByName(rv.meta.Storage) - if storeConstructor == nil { - return nil, ErrorUnknownStorageType - } - storeConfig := rv.meta.Config if storeConfig == nil { storeConfig = map[string]interface{}{} @@ -200,24 +177,21 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde storeConfig[rck] = rcv } - // now open the store - rv.s, err = storeConstructor(storeConfig) - if err != nil { - return nil, err - } - // open the index indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType) if indexTypeConstructor == nil { return nil, ErrorUnknownIndexType } - rv.i, err = indexTypeConstructor(rv.s, Config.analysisQueue) + rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue) if err != nil { return nil, err } err = rv.i.Open() if err != nil { + if err == index.ErrorUnknownStorageType { + return nil, ErrorUnknownStorageType + } return nil, err } rv.stats.indexStat = rv.i.Stats() diff --git a/index_meta.go b/index_meta.go index ad0d223e..a6599529 100644 --- a/index_meta.go +++ b/index_meta.go @@ -13,6 +13,8 @@ import ( "encoding/json" "io/ioutil" "os" + + "github.com/blevesearch/bleve/index/upside_down" ) const metaFilename = "index_meta.json" @@ -45,6 +47,9 @@ func openIndexMeta(path string) (*indexMeta, error) { if err != nil { return nil, ErrorIndexMetaCorrupt } + if im.IndexType == "" { + im.IndexType = upside_down.Name + } return &im, nil } diff --git a/registry/index_type.go b/registry/index_type.go index 503c760f..69d1e5de 100644 --- a/registry/index_type.go +++ b/registry/index_type.go @@ -13,7 +13,6 @@ import ( "fmt" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store" ) func RegisterIndexType(name string, constructor IndexTypeConstructor) { @@ -24,7 +23,7 @@ func RegisterIndexType(name string, constructor IndexTypeConstructor) { index_types[name] = constructor } -type IndexTypeConstructor func(store.KVStore, *index.AnalysisQueue) (index.Index, error) +type IndexTypeConstructor func(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) type IndexTypeRegistry map[string]IndexTypeConstructor func IndexTypeConstructorByName(name string) IndexTypeConstructor { @@ -32,16 +31,10 @@ func IndexTypeConstructorByName(name string) IndexTypeConstructor { } func IndexTypesAndInstances() ([]string, []string) { - emptyConfig := map[string]interface{}{} types := make([]string, 0) instances := make([]string, 0) - for name, cons := range stores { - _, err := cons(emptyConfig) - if err == nil { - instances = append(instances, name) - } else { - types = append(types, name) - } + for name, _ := range stores { + types = append(types, name) } return types, instances } diff --git a/registry/store.go b/registry/store.go index cff7c88e..229f534a 100644 --- a/registry/store.go +++ b/registry/store.go @@ -23,7 +23,7 @@ func RegisterKVStore(name string, constructor KVStoreConstructor) { stores[name] = constructor } -type KVStoreConstructor func(config map[string]interface{}) (store.KVStore, error) +type KVStoreConstructor func(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) type KVStoreRegistry map[string]KVStoreConstructor func KVStoreConstructorByName(name string) KVStoreConstructor { @@ -31,16 +31,10 @@ func KVStoreConstructorByName(name string) KVStoreConstructor { } func KVStoreTypesAndInstances() ([]string, []string) { - emptyConfig := map[string]interface{}{} types := make([]string, 0) instances := make([]string, 0) - for name, cons := range stores { - _, err := cons(emptyConfig) - if err == nil { - instances = append(instances, name) - } else { - types = append(types, name) - } + for name, _ := range stores { + types = append(types, name) } return types, instances } diff --git a/search/searchers/base_test.go b/search/searchers/base_test.go index d4ce26e9..54363ee9 100644 --- a/search/searchers/base_test.go +++ b/search/searchers/base_test.go @@ -17,17 +17,20 @@ import ( "github.com/blevesearch/bleve/analysis/tokenizers/regexp_tokenizer" "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store/inmem" + "github.com/blevesearch/bleve/index/store/gtreap" "github.com/blevesearch/bleve/index/upside_down" ) var twoDocIndex index.Index //= upside_down.NewUpsideDownCouch(inmem.MustOpen()) func init() { - inMemStore, _ := inmem.New() analysisQueue := index.NewAnalysisQueue(1) - twoDocIndex = upside_down.NewUpsideDownCouch(inMemStore, analysisQueue) - err := twoDocIndex.Open() + var err error + twoDocIndex, err = upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue) + if err != nil { + panic(err) + } + err = twoDocIndex.Open() if err != nil { panic(err) } diff --git a/search/searchers/search_boolean_test.go b/search/searchers/search_boolean_test.go index 875424b9..d7926b33 100644 --- a/search/searchers/search_boolean_test.go +++ b/search/searchers/search_boolean_test.go @@ -17,6 +17,9 @@ import ( func TestBooleanSearch(t *testing.T) { + if twoDocIndex == nil { + t.Fatal("its null") + } twoDocIndexReader, err := twoDocIndex.Reader() if err != nil { t.Error(err) diff --git a/search/searchers/search_term_test.go b/search/searchers/search_term_test.go index f3072be7..cf6f04b6 100644 --- a/search/searchers/search_term_test.go +++ b/search/searchers/search_term_test.go @@ -15,7 +15,7 @@ import ( "github.com/blevesearch/bleve/document" "github.com/blevesearch/bleve/index" - "github.com/blevesearch/bleve/index/store/inmem" + "github.com/blevesearch/bleve/index/store/gtreap" "github.com/blevesearch/bleve/index/upside_down" ) @@ -26,10 +26,12 @@ func TestTermSearcher(t *testing.T) { var queryBoost = 3.0 var queryExplain = true - inMemStore, _ := inmem.New() analysisQueue := index.NewAnalysisQueue(1) - i := upside_down.NewUpsideDownCouch(inMemStore, analysisQueue) - err := i.Open() + i, err := upside_down.NewUpsideDownCouch(gtreap.Name, nil, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = i.Open() if err != nil { t.Fatal(err) } From d06b526cbf918f3c15dead4d4c8318b298688ca1 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 28 Sep 2015 16:50:27 -0400 Subject: [PATCH 02/17] more refactoring --- index/store/boltdb/store_test.go | 108 +++++++------------------- index/store/goleveldb/store.go | 1 - index/store/goleveldb/store_test.go | 108 +++++++------------------- index/store/gtreap/store_test.go | 105 +++++++------------------ index/store/metrics/store_test.go | 105 +++++++------------------ index/store/null/null_test.go | 6 +- index/store/test/README.md | 11 +++ index/store/test/bytes.go | 12 --- index/store/test/isolation.go | 16 ++-- index/store/test/iterator.go | 12 --- index/upside_down/row.go | 2 +- index/upside_down/upside_down.go | 21 ++++- index/upside_down/upside_down_test.go | 53 +++++++++++++ 13 files changed, 209 insertions(+), 351 deletions(-) create mode 100644 index/store/test/README.md diff --git a/index/store/boltdb/store_test.go b/index/store/boltdb/store_test.go index d5684983..0380a92c 100644 --- a/index/store/boltdb/store_test.go +++ b/index/store/boltdb/store_test.go @@ -17,111 +17,63 @@ import ( "github.com/blevesearch/bleve/index/store/test" ) -func open(mo store.MergeOperator) (store.KVStore, error) { - return New(mo, map[string]interface{}{"path": "test"}) -} - -func TestBoltDBKVCrud(t *testing.T) { - s, err := open(nil) +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, map[string]interface{}{"path": "test"}) if err != nil { t.Fatal(err) } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + return rv +} +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() + if err != nil { + t.Fatal(err) + } + err = os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } +} + +func TestBoltDBKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) test.CommonTestKVCrud(t, s) } func TestBoltDBReaderIsolation(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderIsolation(t, s) } func TestBoltDBReaderOwnsGetBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderOwnsGetBytes(t, s) } func TestBoltDBWriterOwnsBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestWriterOwnsBytes(t, s) } func TestBoltDBPrefixIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestPrefixIterator(t, s) } func TestBoltDBRangeIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestRangeIterator(t, s) } func TestBoltDBMerge(t *testing.T) { - s, err := open(&test.TestMergeCounter{}) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) test.CommonTestMerge(t, s) } diff --git a/index/store/goleveldb/store.go b/index/store/goleveldb/store.go index 8fffc451..26d9a3a0 100644 --- a/index/store/goleveldb/store.go +++ b/index/store/goleveldb/store.go @@ -56,7 +56,6 @@ func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, defaultWriteOptions: &opt.WriteOptions{}, } rv.defaultWriteOptions.Sync = true - return &rv, nil } diff --git a/index/store/goleveldb/store_test.go b/index/store/goleveldb/store_test.go index 1a8368af..e643fd4c 100644 --- a/index/store/goleveldb/store_test.go +++ b/index/store/goleveldb/store_test.go @@ -17,114 +17,66 @@ import ( "github.com/blevesearch/bleve/index/store/test" ) -func open(mo store.MergeOperator) (store.KVStore, error) { - return New(mo, map[string]interface{}{ +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, map[string]interface{}{ "path": "test", "create_if_missing": true, }) -} - -func TestGoLevelDBKVCrud(t *testing.T) { - s, err := open(nil) if err != nil { t.Fatal(err) } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + return rv +} +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() + if err != nil { + t.Fatal(err) + } + err = os.RemoveAll("test") + if err != nil { + t.Fatal(err) + } +} + +func TestGoLevelDBKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) test.CommonTestKVCrud(t, s) } func TestGoLevelDBReaderIsolation(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderIsolation(t, s) } func TestGoLevelDBReaderOwnsGetBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderOwnsGetBytes(t, s) } func TestGoLevelDBWriterOwnsBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestWriterOwnsBytes(t, s) } func TestGoLevelDBPrefixIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestPrefixIterator(t, s) } func TestGoLevelDBRangeIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestRangeIterator(t, s) } func TestGoLevelDBMerge(t *testing.T) { - s, err := open(&test.TestMergeCounter{}) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) test.CommonTestMerge(t, s) } diff --git a/index/store/gtreap/store_test.go b/index/store/gtreap/store_test.go index 92c17e67..82e12cf6 100644 --- a/index/store/gtreap/store_test.go +++ b/index/store/gtreap/store_test.go @@ -13,118 +13,65 @@ package gtreap import ( - "os" "testing" "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/bleve/index/store/test" ) -func open(mo store.MergeOperator) (store.KVStore, error) { - return New(mo, nil) -} - -func TestGTreapKVCrud(t *testing.T) { - s, err := open(nil) +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, nil) if err != nil { t.Fatal(err) } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + return rv +} +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() + if err != nil { + t.Fatal(err) + } +} + +func TestGTreapKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) test.CommonTestKVCrud(t, s) } func TestGTreapReaderIsolation(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderIsolation(t, s) } func TestGTreapReaderOwnsGetBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderOwnsGetBytes(t, s) } func TestGTreapWriterOwnsBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestWriterOwnsBytes(t, s) } func TestGTreapPrefixIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestPrefixIterator(t, s) } func TestGTreapRangeIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestRangeIterator(t, s) } func TestGTreapMerge(t *testing.T) { - s, err := open(&test.TestMergeCounter{}) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) test.CommonTestMerge(t, s) } diff --git a/index/store/metrics/store_test.go b/index/store/metrics/store_test.go index 0c1c4b91..d3c65f79 100644 --- a/index/store/metrics/store_test.go +++ b/index/store/metrics/store_test.go @@ -1,7 +1,6 @@ package metrics import ( - "os" "testing" "github.com/blevesearch/bleve/index/store" @@ -9,111 +8,59 @@ import ( "github.com/blevesearch/bleve/index/store/test" ) -func open(mo store.MergeOperator) (store.KVStore, error) { - return New(mo, map[string]interface{}{"kvStoreName_actual": gtreap.Name}) -} - -func TestMetricsKVCrud(t *testing.T) { - s, err := open(nil) +func open(t *testing.T, mo store.MergeOperator) store.KVStore { + rv, err := New(mo, map[string]interface{}{"kvStoreName_actual": gtreap.Name}) if err != nil { t.Fatal(err) } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() + return rv +} +func cleanup(t *testing.T, s store.KVStore) { + err := s.Close() + if err != nil { + t.Fatal(err) + } +} + +func TestMetricsKVCrud(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) test.CommonTestKVCrud(t, s) } func TestMetricsReaderIsolation(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderIsolation(t, s) } func TestMetricsReaderOwnsGetBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestReaderOwnsGetBytes(t, s) } func TestMetricsWriterOwnsBytes(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestWriterOwnsBytes(t, s) } func TestMetricsPrefixIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestPrefixIterator(t, s) } func TestMetricsRangeIterator(t *testing.T) { - s, err := open(nil) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, nil) + defer cleanup(t, s) test.CommonTestRangeIterator(t, s) } func TestMetricsMerge(t *testing.T) { - s, err := open(&test.TestMergeCounter{}) - if err != nil { - t.Fatal(err) - } - defer func() { - err := os.RemoveAll("test") - if err != nil { - t.Fatal(err) - } - }() - + s := open(t, &test.TestMergeCounter{}) + defer cleanup(t, s) test.CommonTestMerge(t, s) } diff --git a/index/store/null/null_test.go b/index/store/null/null_test.go index b0b50be1..5e921801 100644 --- a/index/store/null/null_test.go +++ b/index/store/null/null_test.go @@ -12,10 +12,12 @@ func TestStore(t *testing.T) { t.Fatal(err) } - CommonTestKVStore(t, s) + NullTestKVStore(t, s) } -func CommonTestKVStore(t *testing.T, s store.KVStore) { +// NullTestKVStore has very different expectations +// compared to CommonTestKVStore +func NullTestKVStore(t *testing.T, s store.KVStore) { writer, err := s.Writer() if err != nil { diff --git a/index/store/test/README.md b/index/store/test/README.md new file mode 100644 index 00000000..392df281 --- /dev/null +++ b/index/store/test/README.md @@ -0,0 +1,11 @@ +# Generic KVStore implementation tests + +These are a set of common tests that should pass on any correct KVStore implementation. + +Each test function in this package has the form: + + func CommonTest(t *testing.T, s store.KVStore) {...} + +A KVStore implementation test should use the same name, including its own KVStore name in the test function. It should instantiate an instance of the store, and pass the testing.T and store to the common function. + +The common test functions should *NOT* close the KVStore. The KVStore test implementation should close the store and cleanup any state. \ No newline at end of file diff --git a/index/store/test/bytes.go b/index/store/test/bytes.go index 6a124abf..d09a2720 100644 --- a/index/store/test/bytes.go +++ b/index/store/test/bytes.go @@ -106,12 +106,6 @@ func CommonTestReaderOwnsGetBytes(t *testing.T, s store.KVStore) { t.Fatal(err) } - // close the store - err = s.Close() - if err != nil { - t.Fatal(err) - } - // finally check that the value we mutated still has what we set it to for i := range returnedVal { if returnedVal[i] != '2' { @@ -265,10 +259,4 @@ func CommonTestWriterOwnsBytes(t *testing.T, s store.KVStore) { if err != nil { t.Fatal(err) } - - // close the store - err = s.Close() - if err != nil { - t.Fatal(err) - } } diff --git a/index/store/test/isolation.go b/index/store/test/isolation.go index 9791bab5..2bceca7c 100644 --- a/index/store/test/isolation.go +++ b/index/store/test/isolation.go @@ -133,15 +133,15 @@ func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { // ensure that the direct iterator sees it count = 0 - it = newReader.RangeIterator([]byte{0}, []byte{'x'}) + it2 := newReader.RangeIterator([]byte{0}, []byte{'x'}) defer func() { - err := it.Close() + err := it2.Close() if err != nil { t.Fatal(err) } }() - for it.Valid() { - it.Next() + for it2.Valid() { + it2.Next() count++ } if count != 2 { @@ -159,15 +159,15 @@ func CommonTestReaderIsolation(t *testing.T, s store.KVStore) { // and ensure that the iterator on the isolated reader also does not count = 0 - it = reader.RangeIterator([]byte{0}, []byte{'x'}) + it3 := reader.RangeIterator([]byte{0}, []byte{'x'}) defer func() { - err := it.Close() + err := it3.Close() if err != nil { t.Fatal(err) } }() - for it.Valid() { - it.Next() + for it3.Valid() { + it3.Next() count++ } if count != 1 { diff --git a/index/store/test/iterator.go b/index/store/test/iterator.go index 59217450..ff5be9e7 100644 --- a/index/store/test/iterator.go +++ b/index/store/test/iterator.go @@ -118,12 +118,6 @@ func CommonTestPrefixIterator(t *testing.T, s store.KVStore) { if err != nil { t.Fatal(err) } - - // close the store - err = s.Close() - if err != nil { - t.Fatal(err) - } } func CommonTestRangeIterator(t *testing.T, s store.KVStore) { @@ -280,10 +274,4 @@ func CommonTestRangeIterator(t *testing.T, s store.KVStore) { if err != nil { t.Fatal(err) } - - // close the store - err = s.Close() - if err != nil { - t.Fatal(err) - } } diff --git a/index/upside_down/row.go b/index/upside_down/row.go index a0e6dcfc..26be5302 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -552,7 +552,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) { rv.doc, err = buf.ReadBytes(ByteSeparator) if err == io.EOF && len(rv.doc) < 1 { - err = fmt.Errorf("invalid doc length 0") + err = fmt.Errorf("invalid doc length 0 - % x", key) } if err != nil && err != io.EOF { return nil, err diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 674d2c58..db327b33 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -49,6 +49,8 @@ type UpsideDownCouch struct { m sync.RWMutex // fields protected by m docCount uint64 + + writeMutex sync.Mutex } func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) { @@ -159,6 +161,10 @@ func (udc *UpsideDownCouch) DocCount() (uint64, error) { } func (udc *UpsideDownCouch) Open() (err error) { + //acquire the write mutex for the duratin of Open() + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + // open the kv store storeConstructor := registry.KVStoreConstructorByName(udc.storeName) if storeConstructor == nil { @@ -217,7 +223,7 @@ func (udc *UpsideDownCouch) Open() (err error) { } }() - // init th eindex + // init the index err = udc.init(kvwriter) } @@ -289,6 +295,9 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) { close(resultChan) atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart))) + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() @@ -458,6 +467,9 @@ func (udc *UpsideDownCouch) indexField(docID string, field document.Field, field func (udc *UpsideDownCouch) Delete(id string) (err error) { indexStart := time.Now() + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() @@ -677,6 +689,9 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { indexStart := time.Now() + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() + // open a reader for backindex lookup var kvreader store.KVReader kvreader, err = udc.store.Reader() @@ -763,6 +778,8 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) { func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) { internalRow := NewInternalRow(key, val) + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { @@ -782,6 +799,8 @@ func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) { func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) { internalRow := NewInternalRow(key, nil) + udc.writeMutex.Lock() + defer udc.writeMutex.Unlock() var writer store.KVWriter writer, err = udc.store.Writer() if err != nil { diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 38d7f496..263ab1ee 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -10,9 +10,11 @@ package upside_down import ( + "log" "reflect" "regexp" "strconv" + "sync" "testing" "time" @@ -1155,3 +1157,54 @@ func BenchmarkBatch(b *testing.B) { } } } + +func TestConcurrentUpdate(t *testing.T) { + defer DestroyTest() + + analysisQueue := index.NewAnalysisQueue(1) + idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) + if err != nil { + t.Fatal(err) + } + err = idx.Open() + if err != nil { + t.Errorf("error opening index: %v", err) + } + defer func() { + err := idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + // do some concurrent updates + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func(i int) { + doc := document.NewDocument("1") + doc.AddField(document.NewTextFieldWithIndexingOptions(strconv.Itoa(i), []uint64{}, []byte(strconv.Itoa(i)), document.StoreField)) + err := idx.Update(doc) + if err != nil { + t.Errorf("Error updating index: %v", err) + } + wg.Done() + }(i) + } + wg.Wait() + + // now load the name field and see what we get + r, err := idx.Reader() + if err != nil { + log.Fatal(err) + } + + doc, err := r.Document("1") + if err != nil { + log.Fatal(err) + } + + if len(doc.Fields) > 1 { + t.Errorf("expected single field, found %d", len(doc.Fields)) + } +} From 71cbb13e073870cbc1f40b9be16105aefbc1d9a5 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 5 Oct 2015 17:49:50 -0400 Subject: [PATCH 03/17] modify code to reuse buffer for kv generation --- index/analysis.go | 5 + index/upside_down/row.go | 202 ++++++++++++++++++++++---- index/upside_down/upside_down.go | 89 +++++++++++- index/upside_down/upside_down_test.go | 2 +- 4 files changed, 259 insertions(+), 39 deletions(-) diff --git a/index/analysis.go b/index/analysis.go index 96b70e77..38db2169 100644 --- a/index/analysis.go +++ b/index/analysis.go @@ -12,7 +12,12 @@ package index import "github.com/blevesearch/bleve/document" type IndexRow interface { + KeySize() int + KeyTo([]byte) (int, error) Key() []byte + + ValueSize() int + ValueTo([]byte) (int, error) Value() []byte } diff --git a/index/upside_down/row.go b/index/upside_down/row.go index 26be5302..442459ef 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -24,8 +24,12 @@ const ByteSeparator byte = 0xff type UpsideDownCouchRowStream chan UpsideDownCouchRow type UpsideDownCouchRow interface { + KeySize() int + KeyTo([]byte) (int, error) Key() []byte Value() []byte + ValueSize() int + ValueTo([]byte) (int, error) } func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { @@ -61,10 +65,28 @@ func (v *VersionRow) Key() []byte { return []byte{'v'} } +func (v *VersionRow) KeySize() int { + return 1 +} + +func (v *VersionRow) KeyTo(buf []byte) (int, error) { + buf[0] = 'v' + return 1, nil +} + func (v *VersionRow) Value() []byte { return []byte{byte(v.version)} } +func (v *VersionRow) ValueSize() int { + return 1 +} + +func (v *VersionRow) ValueTo(buf []byte) (int, error) { + buf[0] = v.version + return 1, nil +} + func (v *VersionRow) String() string { return fmt.Sprintf("Version: %d", v.version) } @@ -93,16 +115,34 @@ type InternalRow struct { } func (i *InternalRow) Key() []byte { - buf := make([]byte, len(i.key)+1) + buf := make([]byte, i.KeySize()) + size, _ := i.KeyTo(buf) + return buf[:size] +} + +func (i *InternalRow) KeySize() int { + return len(i.key) + 1 +} + +func (i *InternalRow) KeyTo(buf []byte) (int, error) { buf[0] = 'i' - copy(buf[1:], i.key) - return buf + actual := copy(buf[1:], i.key) + return 1 + actual, nil } func (i *InternalRow) Value() []byte { return i.val } +func (i *InternalRow) ValueSize() int { + return len(i.val) +} + +func (i *InternalRow) ValueTo(buf []byte) (int, error) { + actual := copy(buf, i.val) + return actual, nil +} + func (i *InternalRow) String() string { return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val) } @@ -129,16 +169,35 @@ type FieldRow struct { } func (f *FieldRow) Key() []byte { - buf := make([]byte, 3) + buf := make([]byte, f.KeySize()) + size, _ := f.KeyTo(buf) + return buf[:size] +} + +func (f *FieldRow) KeySize() int { + return 3 +} + +func (f *FieldRow) KeyTo(buf []byte) (int, error) { buf[0] = 'f' binary.LittleEndian.PutUint16(buf[1:3], f.index) - return buf + return 3, nil } func (f *FieldRow) Value() []byte { return append([]byte(f.name), ByteSeparator) } +func (f *FieldRow) ValueSize() int { + return len(f.name) + 1 +} + +func (f *FieldRow) ValueTo(buf []byte) (int, error) { + size := copy(buf, f.name) + buf[size] = ByteSeparator + return size + 1, nil +} + func (f *FieldRow) String() string { return fmt.Sprintf("Field: %d Name: %s", f.index, f.name) } @@ -182,18 +241,35 @@ type DictionaryRow struct { } func (dr *DictionaryRow) Key() []byte { - buf := make([]byte, 3+len(dr.term)) + buf := make([]byte, dr.KeySize()) + size, _ := dr.KeyTo(buf) + return buf[:size] +} + +func (dr *DictionaryRow) KeySize() int { + return len(dr.term) + 3 +} + +func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { buf[0] = 'd' binary.LittleEndian.PutUint16(buf[1:3], dr.field) - copy(buf[3:], dr.term) - return buf + size := copy(buf[3:], dr.term) + return size + 3, nil } func (dr *DictionaryRow) Value() []byte { - used := 0 - buf := make([]byte, binary.MaxVarintLen64) - used += binary.PutUvarint(buf, dr.count) - return buf[0:used] + buf := make([]byte, dr.ValueSize()) + size, _ := dr.ValueTo(buf) + return buf[:size] +} + +func (dr *DictionaryRow) ValueSize() int { + return binary.MaxVarintLen64 +} + +func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) { + used := binary.PutUvarint(buf, dr.count) + return used, nil } func (dr *DictionaryRow) String() string { @@ -304,13 +380,22 @@ func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte { } func (tfr *TermFrequencyRow) Key() []byte { - buf := make([]byte, 3+len(tfr.term)+1+len(tfr.doc)) + buf := make([]byte, tfr.KeySize()) + size, _ := tfr.KeyTo(buf) + return buf[:size] +} + +func (tfr *TermFrequencyRow) KeySize() int { + return 3 + len(tfr.term) + 1 + len(tfr.doc) +} + +func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { buf[0] = 't' binary.LittleEndian.PutUint16(buf[1:3], tfr.field) termLen := copy(buf[3:], tfr.term) buf[3+termLen] = ByteSeparator - copy(buf[3+termLen+1:], tfr.doc) - return buf + docLen := copy(buf[3+termLen+1:], tfr.doc) + return 3 + termLen + 1 + docLen, nil } func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { @@ -318,15 +403,32 @@ func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { return dr.Key() } +func (tfr *TermFrequencyRow) DictionaryRowKeySize() int { + dr := NewDictionaryRow(tfr.term, tfr.field, 0) + return dr.KeySize() +} + +func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) { + dr := NewDictionaryRow(tfr.term, tfr.field, 0) + return dr.KeyTo(buf) +} + func (tfr *TermFrequencyRow) Value() []byte { - used := 0 + buf := make([]byte, tfr.ValueSize()) + size, _ := tfr.ValueTo(buf) + return buf[:size] +} + +func (tfr *TermFrequencyRow) ValueSize() int { bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64 for _, vector := range tfr.vectors { bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64 } - buf := make([]byte, bufLen) + return bufLen +} - used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], tfr.freq) +func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) { + used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq) normuint32 := math.Float32bits(tfr.norm) newbuf := buf[used : used+binary.MaxVarintLen64] @@ -342,7 +444,7 @@ func (tfr *TermFrequencyRow) Value() []byte { used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition) } } - return buf[0:used] + return used, nil } func (tfr *TermFrequencyRow) String() string { @@ -514,19 +616,41 @@ func (br *BackIndexRow) AllStoredKeys() [][]byte { } func (br *BackIndexRow) Key() []byte { - buf := make([]byte, len(br.doc)+1) + buf := make([]byte, br.KeySize()) + size, _ := br.KeyTo(buf) + return buf[:size] +} + +func (br *BackIndexRow) KeySize() int { + return len(br.doc) + 1 +} + +func (br *BackIndexRow) KeyTo(buf []byte) (int, error) { buf[0] = 'b' - copy(buf[1:], br.doc) - return buf + used := copy(buf[1:], br.doc) + return used + 1, nil } func (br *BackIndexRow) Value() []byte { + buf := make([]byte, br.ValueSize()) + size, _ := br.ValueTo(buf) + return buf[:size] +} + +func (br *BackIndexRow) ValueSize() int { birv := &BackIndexRowValue{ TermEntries: br.termEntries, StoredEntries: br.storedEntries, } - bytes, _ := proto.Marshal(birv) - return bytes + return birv.Size() +} + +func (br *BackIndexRow) ValueTo(buf []byte) (int, error) { + birv := &BackIndexRowValue{ + TermEntries: br.termEntries, + StoredEntries: br.storedEntries, + } + return birv.MarshalTo(buf) } func (br *BackIndexRow) String() string { @@ -582,8 +706,17 @@ type StoredRow struct { } func (s *StoredRow) Key() []byte { + buf := make([]byte, s.KeySize()) + size, _ := s.KeyTo(buf) + return buf[0:size] +} + +func (s *StoredRow) KeySize() int { + return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions)) +} + +func (s *StoredRow) KeyTo(buf []byte) (int, error) { docLen := len(s.doc) - buf := make([]byte, 1+docLen+1+2+(binary.MaxVarintLen64*len(s.arrayPositions))) buf[0] = 's' copy(buf[1:], s.doc) buf[1+docLen] = ByteSeparator @@ -593,14 +726,23 @@ func (s *StoredRow) Key() []byte { varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition) bytesUsed += varbytes } - return buf[0:bytesUsed] + return bytesUsed, nil } func (s *StoredRow) Value() []byte { - rv := make([]byte, len(s.value)+1) - rv[0] = s.typ - copy(rv[1:], s.value) - return rv + buf := make([]byte, s.ValueSize()) + size, _ := s.ValueTo(buf) + return buf[:size] +} + +func (s *StoredRow) ValueSize() int { + return len(s.value) + 1 +} + +func (s *StoredRow) ValueTo(buf []byte) (int, error) { + buf[0] = s.typ + used := copy(buf[1:], s.value) + return used + 1, nil } func (s *StoredRow) String() string { diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index db327b33..be5de326 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -114,6 +114,20 @@ func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) { return } +var rowBufferPool sync.Pool + +func GetRowBuffer() []byte { + if rb, ok := rowBufferPool.Get().([]byte); ok { + return rb + } else { + return make([]byte, 2048) + } +} + +func PutRowBuffer(buf []byte) { + rowBufferPool.Put(buf) +} + func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) { // prepare batch @@ -121,29 +135,88 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow // add for _, row := range addRows { + keyBuf := GetRowBuffer() + valBuf := GetRowBuffer() tfr, ok := row.(*TermFrequencyRow) if ok { - // need to increment counter - dictionaryKey := tfr.DictionaryRowKey() - wb.Merge(dictionaryKey, dictionaryTermIncr) + // need to increment term dictinoary counter + if tfr.DictionaryRowKeySize() > len(keyBuf) { + keyBuf = make([]byte, 2*tfr.DictionaryRowKeySize()) + } + dictKeySize, err := tfr.DictionaryRowKeyTo(keyBuf) + if err != nil { + return err + } + wb.Merge(keyBuf[:dictKeySize], dictionaryTermIncr) } - wb.Set(row.Key(), row.Value()) + if row.KeySize() > len(keyBuf) { + // grow buffer + keyBuf = make([]byte, 2*row.KeySize()) + } + keySize, err := row.KeyTo(keyBuf) + if err != nil { + return err + } + if row.ValueSize() > len(valBuf) { + // grow buffer + valBuf = make([]byte, 2*row.ValueSize()) + } + valSize, err := row.ValueTo(valBuf) + wb.Set(keyBuf[:keySize], valBuf[:valSize]) + + PutRowBuffer(keyBuf) + PutRowBuffer(valBuf) } // update for _, row := range updateRows { - wb.Set(row.Key(), row.Value()) + keyBuf := GetRowBuffer() + valBuf := GetRowBuffer() + if row.KeySize() > len(keyBuf) { + // grow buffer + keyBuf = make([]byte, 2*row.KeySize()) + } + keySize, err := row.KeyTo(keyBuf) + if err != nil { + return err + } + if row.ValueSize() > len(valBuf) { + // grow buffer + valBuf = make([]byte, 2*row.ValueSize()) + } + valSize, err := row.ValueTo(valBuf) + wb.Set(keyBuf[:keySize], valBuf[:valSize]) + + PutRowBuffer(keyBuf) + PutRowBuffer(valBuf) } // delete for _, row := range deleteRows { + keyBuf := GetRowBuffer() tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter - dictionaryKey := tfr.DictionaryRowKey() - wb.Merge(dictionaryKey, dictionaryTermDecr) + if tfr.DictionaryRowKeySize() > len(keyBuf) { + keyBuf = make([]byte, 2*tfr.DictionaryRowKeySize()) + } + dictKeySize, err := tfr.DictionaryRowKeyTo(keyBuf) + if err != nil { + return err + } + wb.Merge(keyBuf[:dictKeySize], dictionaryTermDecr) } - wb.Delete(row.Key()) + if row.KeySize() > len(keyBuf) { + // grow buffer + keyBuf = make([]byte, 2*row.KeySize()) + } + keySize, err := row.KeyTo(keyBuf) + if err != nil { + return err + } + wb.Delete(keyBuf[:keySize]) + + PutRowBuffer(keyBuf) } // write out the batch diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 263ab1ee..6a9853d4 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -348,7 +348,7 @@ func TestIndexInsertMultiple(t *testing.T) { } err = idx.Open() if err != nil { - t.Errorf("error opening index: %v", err) + t.Fatalf("error opening index: %v", err) } defer func() { err := idx.Close() From e28eb749d77b95a384d3e7dfb4de060c39fe6a62 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Tue, 6 Oct 2015 16:45:38 -0400 Subject: [PATCH 04/17] bump up buffer size --- index/upside_down/upside_down.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index be5de326..6a441b49 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -120,7 +120,7 @@ func GetRowBuffer() []byte { if rb, ok := rowBufferPool.Get().([]byte); ok { return rb } else { - return make([]byte, 2048) + return make([]byte, 4096) } } From 95e06538f3182fa10fec832403ef192c620c8b70 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Fri, 9 Oct 2015 11:09:42 -0400 Subject: [PATCH 05/17] fix benchmarks for the x kvstores --- index/upside_down/benchmark_forestdb_test.go | 170 +++++++++++++++--- index/upside_down/benchmark_gorocksdb_test.go | 35 ++-- index/upside_down/benchmark_leveldb_test.go | 35 ++-- 3 files changed, 172 insertions(+), 68 deletions(-) diff --git a/index/upside_down/benchmark_forestdb_test.go b/index/upside_down/benchmark_forestdb_test.go index 89f5cffe..5ece4334 100644 --- a/index/upside_down/benchmark_forestdb_test.go +++ b/index/upside_down/benchmark_forestdb_test.go @@ -15,72 +15,194 @@ import ( "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/blevex/forestdb" ) -func CreateForestDB() (store.KVStore, error) { - err := os.MkdirAll("testdir", 0700) - if err != nil { - return nil, err - } - s, err := forestdb.New("testdir/test", true, nil) - if err != nil { - return nil, err - } - return s, nil +var forestDBTestOption = map[string]interface{}{ + "path": "testdir/test", + "create_if_missing": true, } +// internally used to reset, so we also +// re-make the testdir func DestroyForestDB() error { - return os.RemoveAll("testdir") + err := os.RemoveAll("testdir") + if err != nil { + return err + } + err = os.MkdirAll("testdir", 0700) + if err != nil { + return err + } + return nil } func BenchmarkForestDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 1) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndex(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1) } func BenchmarkForestDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 2) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndex(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2) } func BenchmarkForestDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateForestDB, DestroyForestDB, 4) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndex(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4) } // batches func BenchmarkForestDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 10) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1, 10) } func BenchmarkForestDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 10) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2, 10) } func BenchmarkForestDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 10) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4, 10) } func BenchmarkForestDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 100) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1, 100) } func BenchmarkForestDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 100) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2, 100) } func BenchmarkForestDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 100) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4, 100) } func BenchmarkForestDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 1, 1000) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 1, 1000) } func BenchmarkForestDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 2, 1000) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 2, 1000) } func BenchmarkForestDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateForestDB, DestroyForestDB, 4, 1000) + err := os.MkdirAll("testdir", 0700) + if err != nil { + b.Fatal(err) + } + defer func() { + err := os.RemoveAll("testdir") + if err != nil { + b.Fatal(err) + } + }() + CommonBenchmarkIndexBatch(b, forestdb.Name, forestDBTestOption, DestroyForestDB, 4, 1000) } diff --git a/index/upside_down/benchmark_gorocksdb_test.go b/index/upside_down/benchmark_gorocksdb_test.go index fb931c9b..8c2848e4 100644 --- a/index/upside_down/benchmark_gorocksdb_test.go +++ b/index/upside_down/benchmark_gorocksdb_test.go @@ -12,71 +12,62 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/blevex/rocksdb" ) var rocksdbTestOptions = map[string]interface{}{ + "path": "test", "create_if_missing": true, } -func CreateGoRocksDB() (store.KVStore, error) { - return rocksdb.New("test", rocksdbTestOptions) -} - -func DestroyGoRocksDB() error { - return os.RemoveAll("test") -} - func BenchmarkRocksDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 1) + CommonBenchmarkIndex(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1) } func BenchmarkRocksDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 2) + CommonBenchmarkIndex(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2) } func BenchmarkRocksDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateGoRocksDB, DestroyGoRocksDB, 4) + CommonBenchmarkIndex(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4) } // batches func BenchmarkRocksDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 10) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1, 10) } func BenchmarkRocksDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 10) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2, 10) } func BenchmarkRocksDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 10) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4, 10) } func BenchmarkRocksDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 100) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1, 100) } func BenchmarkRocksDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 100) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2, 100) } func BenchmarkRocksDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 100) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4, 100) } func BenchmarkRocksDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 1, 1000) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 1, 1000) } func BenchmarkRocksDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 2, 1000) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 2, 1000) } func BenchmarkRocksDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateGoRocksDB, DestroyGoRocksDB, 4, 1000) + CommonBenchmarkIndexBatch(b, rocksdb.Name, rocksdbTestOptions, DestroyTest, 4, 1000) } diff --git a/index/upside_down/benchmark_leveldb_test.go b/index/upside_down/benchmark_leveldb_test.go index 9c84fdad..dea8bc93 100644 --- a/index/upside_down/benchmark_leveldb_test.go +++ b/index/upside_down/benchmark_leveldb_test.go @@ -12,71 +12,62 @@ package upside_down import ( - "os" "testing" - "github.com/blevesearch/bleve/index/store" "github.com/blevesearch/blevex/leveldb" ) var leveldbTestOptions = map[string]interface{}{ + "path": "test", "create_if_missing": true, } -func CreateLevelDB() (store.KVStore, error) { - return leveldb.New("test", leveldbTestOptions) -} - -func DestroyLevelDB() error { - return os.RemoveAll("test") -} - func BenchmarkLevelDBIndexing1Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 1) + CommonBenchmarkIndex(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1) } func BenchmarkLevelDBIndexing2Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 2) + CommonBenchmarkIndex(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2) } func BenchmarkLevelDBIndexing4Workers(b *testing.B) { - CommonBenchmarkIndex(b, CreateLevelDB, DestroyLevelDB, 4) + CommonBenchmarkIndex(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4) } // batches func BenchmarkLevelDBIndexing1Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 10) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1, 10) } func BenchmarkLevelDBIndexing2Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 10) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2, 10) } func BenchmarkLevelDBIndexing4Workers10Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 10) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4, 10) } func BenchmarkLevelDBIndexing1Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 100) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1, 100) } func BenchmarkLevelDBIndexing2Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 100) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2, 100) } func BenchmarkLevelDBIndexing4Workers100Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 100) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4, 100) } func BenchmarkLevelDBIndexing1Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 1, 1000) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 1, 1000) } func BenchmarkLevelDBIndexing2Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 2, 1000) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 2, 1000) } func BenchmarkLevelDBIndexing4Workers1000Batch(b *testing.B) { - CommonBenchmarkIndexBatch(b, CreateLevelDB, DestroyLevelDB, 4, 1000) + CommonBenchmarkIndexBatch(b, leveldb.Name, leveldbTestOptions, DestroyTest, 4, 1000) } From 8de860bf12f70962e6d742f88cbb9a5f3bc97a50 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Tue, 13 Oct 2015 12:35:08 -0700 Subject: [PATCH 06/17] 2 more places that used old Key() --- index/upside_down/upside_down.go | 37 ++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 9be2a6a8..00886a31 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -438,18 +438,27 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in for _, row := range rows { switch row := row.(type) { case *TermFrequencyRow: - rowKey := string(row.Key()) - if _, ok := existingTermKeys[rowKey]; ok { + keyBuf := GetRowBuffer() + if row.KeySize() > len(keyBuf) { + keyBuf = make([]byte, 2*row.KeySize()) + } + keySize, _ := row.KeyTo(keyBuf) + if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { updateRows = append(updateRows, row) - delete(existingTermKeys, rowKey) + delete(existingTermKeys, string(keyBuf[:keySize])) } else { addRows = append(addRows, row) } + PutRowBuffer(keyBuf) case *StoredRow: - rowKey := string(row.Key()) - if _, ok := existingStoredKeys[rowKey]; ok { + keyBuf := GetRowBuffer() + if row.KeySize() > len(keyBuf) { + keyBuf = make([]byte, 2*row.KeySize()) + } + keySize, _ := row.KeyTo(keyBuf) + if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { updateRows = append(updateRows, row) - delete(existingStoredKeys, rowKey) + delete(existingStoredKeys, string(keyBuf[:keySize])) } else { addRows = append(addRows, row) } @@ -617,15 +626,25 @@ func (udc *UpsideDownCouch) backIndexRowForDoc(kvreader store.KVReader, docID st tempRow := &BackIndexRow{ doc: []byte(docID), } - key := tempRow.Key() - value, err := kvreader.Get(key) + + keyBuf := GetRowBuffer() + if tempRow.KeySize() > len(keyBuf) { + keyBuf = make([]byte, 2*tempRow.KeySize()) + } + defer PutRowBuffer(keyBuf) + keySize, err := tempRow.KeyTo(keyBuf) + if err != nil { + return nil, err + } + + value, err := kvreader.Get(keyBuf[:keySize]) if err != nil { return nil, err } if value == nil { return nil, nil } - backIndexRow, err := NewBackIndexRowKV(key, value) + backIndexRow, err := NewBackIndexRowKV(keyBuf[:keySize], value) if err != nil { return nil, err } From 4c6bc23043239102b304a7d4e176cf43cd9e66a0 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Tue, 13 Oct 2015 14:04:56 -0700 Subject: [PATCH 07/17] rewrite to keep using same buffer when possible --- index/upside_down/upside_down.go | 84 +++++++++----------------------- 1 file changed, 22 insertions(+), 62 deletions(-) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 00886a31..c4d10b40 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -120,7 +120,7 @@ func GetRowBuffer() []byte { if rb, ok := rowBufferPool.Get().([]byte); ok { return rb } else { - return make([]byte, 4096) + return make([]byte, 4*1024) } } @@ -133,92 +133,60 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow // prepare batch wb := writer.NewBatch() + // buffer to work with + rowBuf := GetRowBuffer() + // add for _, row := range addRows { - keyBuf := GetRowBuffer() - valBuf := GetRowBuffer() tfr, ok := row.(*TermFrequencyRow) if ok { - // need to increment term dictinoary counter - if tfr.DictionaryRowKeySize() > len(keyBuf) { - keyBuf = make([]byte, 2*tfr.DictionaryRowKeySize()) - } - dictKeySize, err := tfr.DictionaryRowKeyTo(keyBuf) + dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } - wb.Merge(keyBuf[:dictKeySize], dictionaryTermIncr) + wb.Merge(rowBuf[:dictKeySize], dictionaryTermIncr) } - if row.KeySize() > len(keyBuf) { - // grow buffer - keyBuf = make([]byte, 2*row.KeySize()) - } - keySize, err := row.KeyTo(keyBuf) + keySize, err := row.KeyTo(rowBuf) if err != nil { return err } - if row.ValueSize() > len(valBuf) { - // grow buffer - valBuf = make([]byte, 2*row.ValueSize()) - } - valSize, err := row.ValueTo(valBuf) - wb.Set(keyBuf[:keySize], valBuf[:valSize]) - - PutRowBuffer(keyBuf) - PutRowBuffer(valBuf) + valSize, err := row.ValueTo(rowBuf[keySize:]) + wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } // update for _, row := range updateRows { - keyBuf := GetRowBuffer() - valBuf := GetRowBuffer() - if row.KeySize() > len(keyBuf) { - // grow buffer - keyBuf = make([]byte, 2*row.KeySize()) - } - keySize, err := row.KeyTo(keyBuf) + keySize, err := row.KeyTo(rowBuf) if err != nil { return err } - if row.ValueSize() > len(valBuf) { - // grow buffer - valBuf = make([]byte, 2*row.ValueSize()) + valSize, err := row.ValueTo(rowBuf[keySize:]) + if err != nil { + return err } - valSize, err := row.ValueTo(valBuf) - wb.Set(keyBuf[:keySize], valBuf[:valSize]) - - PutRowBuffer(keyBuf) - PutRowBuffer(valBuf) + wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize]) } // delete for _, row := range deleteRows { - keyBuf := GetRowBuffer() tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter - if tfr.DictionaryRowKeySize() > len(keyBuf) { - keyBuf = make([]byte, 2*tfr.DictionaryRowKeySize()) - } - dictKeySize, err := tfr.DictionaryRowKeyTo(keyBuf) + dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } - wb.Merge(keyBuf[:dictKeySize], dictionaryTermDecr) + wb.Merge(rowBuf[:dictKeySize], dictionaryTermDecr) } - if row.KeySize() > len(keyBuf) { - // grow buffer - keyBuf = make([]byte, 2*row.KeySize()) - } - keySize, err := row.KeyTo(keyBuf) + keySize, err := row.KeyTo(rowBuf) if err != nil { return err } - wb.Delete(keyBuf[:keySize]) - - PutRowBuffer(keyBuf) + wb.Delete(rowBuf[:keySize]) } + PutRowBuffer(rowBuf) + // write out the batch return writer.ExecuteBatch(wb) } @@ -435,13 +403,10 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in existingStoredKeys[string(key)] = true } + keyBuf := GetRowBuffer() for _, row := range rows { switch row := row.(type) { case *TermFrequencyRow: - keyBuf := GetRowBuffer() - if row.KeySize() > len(keyBuf) { - keyBuf = make([]byte, 2*row.KeySize()) - } keySize, _ := row.KeyTo(keyBuf) if _, ok := existingTermKeys[string(keyBuf[:keySize])]; ok { updateRows = append(updateRows, row) @@ -449,12 +414,7 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in } else { addRows = append(addRows, row) } - PutRowBuffer(keyBuf) case *StoredRow: - keyBuf := GetRowBuffer() - if row.KeySize() > len(keyBuf) { - keyBuf = make([]byte, 2*row.KeySize()) - } keySize, _ := row.KeyTo(keyBuf) if _, ok := existingStoredKeys[string(keyBuf[:keySize])]; ok { updateRows = append(updateRows, row) @@ -465,8 +425,8 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in default: updateRows = append(updateRows, row) } - } + PutRowBuffer(keyBuf) // any of the existing rows that weren't updated need to be deleted for existingTermKey := range existingTermKeys { From f0ee9a3c6609b60ed677600c63c21e55f5512c8b Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 19 Oct 2015 11:13:03 -0400 Subject: [PATCH 08/17] removed commented code and unused functions --- index/store/goleveldb/batch.go | 16 ---------------- index/store/goleveldb/writer.go | 8 -------- 2 files changed, 24 deletions(-) diff --git a/index/store/goleveldb/batch.go b/index/store/goleveldb/batch.go index 43d86b0b..91b2598e 100644 --- a/index/store/goleveldb/batch.go +++ b/index/store/goleveldb/batch.go @@ -32,22 +32,6 @@ func (b *Batch) Merge(key, val []byte) { b.merge.Merge(key, val) } -// func (b *Batch) Execute() error { - -// // first process merges -// ops, err := b.merge.ExecuteDeferred(b.w) -// if err != nil { -// return err -// } -// for _, op := range ops { -// b.batch.Put(op.K, op.V) -// } - -// wopts := defaultWriteOptions() -// err = b.w.store.db.Write(b.batch, wopts) -// return err -// } - func (b *Batch) Reset() { b.batch.Reset() b.merge = store.NewEmulatedMerge(b.store.mo) diff --git a/index/store/goleveldb/writer.go b/index/store/goleveldb/writer.go index 9beb4a27..c5229005 100644 --- a/index/store/goleveldb/writer.go +++ b/index/store/goleveldb/writer.go @@ -20,14 +20,6 @@ type Writer struct { store *Store } -func (w *Writer) Set(key, val []byte) error { - return w.store.db.Put(key, val, w.store.defaultWriteOptions) -} - -func (w *Writer) Delete(key []byte) error { - return w.store.db.Delete(key, w.store.defaultWriteOptions) -} - func (w *Writer) NewBatch() store.KVBatch { rv := Batch{ store: w.store, From faceecf87b5bd8f8af06c45a77bd21594ef00dd6 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 19 Oct 2015 12:03:38 -0400 Subject: [PATCH 09/17] make row buffer size constant/configurable also handle case where it is insufficiently sized --- index/upside_down/upside_down.go | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index c4d10b40..bbe7b443 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -28,6 +28,13 @@ import ( const Name = "upside_down" +// RowBufferSize should ideally this is sized to be the smallest +// size that can cotain an index row key and its corresponding +// value. It is not a limit, if need be a larger buffer is +// allocated, but performance will be more optimal if *most* +// rows fit this size. +const RowBufferSize = 4 * 1024 + var VersionKey = []byte{'v'} var UnsafeBatchUseDetected = fmt.Errorf("bleve.Batch is NOT thread-safe, modification after execution detected") @@ -120,7 +127,7 @@ func GetRowBuffer() []byte { if rb, ok := rowBufferPool.Get().([]byte); ok { return rb } else { - return make([]byte, 4*1024) + return make([]byte, RowBufferSize) } } @@ -140,12 +147,18 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow for _, row := range addRows { tfr, ok := row.(*TermFrequencyRow) if ok { + if tfr.DictionaryRowKeySize() > len(rowBuf) { + rowBuf = make([]byte, tfr.DictionaryRowKeySize()) + } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } wb.Merge(rowBuf[:dictKeySize], dictionaryTermIncr) } + if row.KeySize()+row.ValueSize() > len(rowBuf) { + rowBuf = make([]byte, row.KeySize()+row.ValueSize()) + } keySize, err := row.KeyTo(rowBuf) if err != nil { return err @@ -156,6 +169,9 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow // update for _, row := range updateRows { + if row.KeySize()+row.ValueSize() > len(rowBuf) { + rowBuf = make([]byte, row.KeySize()+row.ValueSize()) + } keySize, err := row.KeyTo(rowBuf) if err != nil { return err @@ -172,12 +188,18 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter + if tfr.DictionaryRowKeySize() > len(rowBuf) { + rowBuf = make([]byte, tfr.DictionaryRowKeySize()) + } dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf) if err != nil { return err } wb.Merge(rowBuf[:dictKeySize], dictionaryTermDecr) } + if row.KeySize()+row.ValueSize() > len(rowBuf) { + rowBuf = make([]byte, row.KeySize()+row.ValueSize()) + } keySize, err := row.KeyTo(rowBuf) if err != nil { return err From 6cc21346dcbbcb58140680ba1494678ff57f0b86 Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Mon, 19 Oct 2015 14:27:03 -0400 Subject: [PATCH 10/17] fix errcheck issues --- index/upside_down/dump_test.go | 7 +- index/upside_down/field_dict_test.go | 7 +- index/upside_down/reader_test.go | 14 +++- index/upside_down/upside_down_test.go | 105 ++++++++++++++++++++++---- 4 files changed, 114 insertions(+), 19 deletions(-) diff --git a/index/upside_down/dump_test.go b/index/upside_down/dump_test.go index 277bce51..5547366e 100644 --- a/index/upside_down/dump_test.go +++ b/index/upside_down/dump_test.go @@ -20,7 +20,12 @@ import ( ) func TestDump(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) diff --git a/index/upside_down/field_dict_test.go b/index/upside_down/field_dict_test.go index 1a0864b5..82f56fb4 100644 --- a/index/upside_down/field_dict_test.go +++ b/index/upside_down/field_dict_test.go @@ -19,7 +19,12 @@ import ( ) func TestIndexFieldDict(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) diff --git a/index/upside_down/reader_test.go b/index/upside_down/reader_test.go index 456e2039..72c7ccc9 100644 --- a/index/upside_down/reader_test.go +++ b/index/upside_down/reader_test.go @@ -19,7 +19,12 @@ import ( ) func TestIndexReader(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -189,7 +194,12 @@ func TestIndexReader(t *testing.T) { } func TestIndexDocIdReader(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) diff --git a/index/upside_down/upside_down_test.go b/index/upside_down/upside_down_test.go index 6a9853d4..d4a3efd1 100644 --- a/index/upside_down/upside_down_test.go +++ b/index/upside_down/upside_down_test.go @@ -33,7 +33,12 @@ var testAnalyzer = &analysis.Analyzer{ } func TestIndexOpenReopen(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -87,7 +92,12 @@ func TestIndexOpenReopen(t *testing.T) { } func TestIndexInsert(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -142,7 +152,12 @@ func TestIndexInsert(t *testing.T) { } func TestIndexInsertThenDelete(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -233,7 +248,12 @@ func TestIndexInsertThenDelete(t *testing.T) { } func TestIndexInsertThenUpdate(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -296,7 +316,12 @@ func TestIndexInsertThenUpdate(t *testing.T) { } func TestIndexInsertMultiple(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -375,7 +400,12 @@ func TestIndexInsertMultiple(t *testing.T) { } func TestIndexInsertWithStore(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -457,7 +487,12 @@ func TestIndexInsertWithStore(t *testing.T) { } func TestIndexInternalCRUD(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -546,7 +581,12 @@ func TestIndexInternalCRUD(t *testing.T) { } func TestIndexBatch(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -639,7 +679,12 @@ func TestIndexBatch(t *testing.T) { } func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -827,7 +872,12 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) { } func TestIndexInsertFields(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -882,7 +932,12 @@ func TestIndexInsertFields(t *testing.T) { } func TestIndexUpdateComposites(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -974,7 +1029,12 @@ func TestIndexUpdateComposites(t *testing.T) { } func TestIndexFieldsMisc(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -1016,7 +1076,12 @@ func TestIndexFieldsMisc(t *testing.T) { } func TestIndexTermReaderCompositeFields(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -1072,7 +1137,12 @@ func TestIndexTermReaderCompositeFields(t *testing.T) { } func TestIndexDocumentFieldTerms(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) @@ -1159,7 +1229,12 @@ func BenchmarkBatch(b *testing.B) { } func TestConcurrentUpdate(t *testing.T) { - defer DestroyTest() + defer func() { + err := DestroyTest() + if err != nil { + t.Fatal(err) + } + }() analysisQueue := index.NewAnalysisQueue(1) idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue) From aada2e7333600a95695050b9a71725e5cdaa4505 Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Tue, 20 Oct 2015 19:05:53 +0200 Subject: [PATCH 11/17] store_test: test RangeIterator.Seek on goleveldb --- index/store/goleveldb/store_test.go | 6 ++ index/store/test/iterator.go | 97 +++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) diff --git a/index/store/goleveldb/store_test.go b/index/store/goleveldb/store_test.go index e643fd4c..6fceee78 100644 --- a/index/store/goleveldb/store_test.go +++ b/index/store/goleveldb/store_test.go @@ -75,6 +75,12 @@ func TestGoLevelDBRangeIterator(t *testing.T) { test.CommonTestRangeIterator(t, s) } +func TestGoLevelDBRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + func TestGoLevelDBMerge(t *testing.T) { s := open(t, &test.TestMergeCounter{}) defer cleanup(t, s) diff --git a/index/store/test/iterator.go b/index/store/test/iterator.go index ff5be9e7..aaf6149f 100644 --- a/index/store/test/iterator.go +++ b/index/store/test/iterator.go @@ -3,6 +3,7 @@ package test import ( "bytes" "reflect" + "strings" "testing" "github.com/blevesearch/bleve/index/store" @@ -275,3 +276,99 @@ func CommonTestRangeIterator(t *testing.T, s store.KVStore) { t.Fatal(err) } } + +func CommonTestRangeIteratorSeek(t *testing.T, s store.KVStore) { + + data := []struct { + key []byte + val []byte + }{ + {[]byte("a1"), []byte("val")}, + {[]byte("b1"), []byte("val")}, + {[]byte("c1"), []byte("val")}, + {[]byte("d1"), []byte("val")}, + {[]byte("e1"), []byte("val")}, + } + + // open a writer + writer, err := s.Writer() + if err != nil { + t.Fatal(err) + } + + // write the data + batch := writer.NewBatch() + for _, row := range data { + batch.Set(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + t.Fatal(err) + } + + // close the writer + err = writer.Close() + if err != nil { + t.Fatal(err) + } + + // open a reader + reader, err := s.Reader() + if err != nil { + t.Fatal(err) + } + + // get an iterator on a central subset of the data + start := []byte("b1") + end := []byte("d1") + iter := reader.RangeIterator(start, end) + + // seek before, at and after every possible key + targets := [][]byte{} + for _, row := range data { + prefix := string(row.key[:1]) + targets = append(targets, []byte(prefix+"0")) + targets = append(targets, []byte(prefix+"1")) + targets = append(targets, []byte(prefix+"2")) + } + for _, target := range targets { + found := []string{} + for iter.Seek(target); iter.Valid(); iter.Next() { + found = append(found, string(iter.Key())) + if len(found) > len(data) { + t.Fatalf("enumerated more than data keys after seeking to %s", + string(target)) + } + } + wanted := []string{} + for _, row := range data { + if bytes.Compare(row.key, start) < 0 || + bytes.Compare(row.key, target) < 0 || + bytes.Compare(row.key, end) >= 0 { + continue + } + wanted = append(wanted, string(row.key)) + } + fs := strings.Join(found, ", ") + ws := strings.Join(wanted, ", ") + if fs != ws { + t.Fatalf("iterating from %s returned [%s] instead of [%s]", + string(target), fs, ws) + } + } + + err = iter.Close() + if err != nil { + t.Fatal(err) + } + + if err != nil { + t.Fatal(err) + } + + // close the reader + err = reader.Close() + if err != nil { + t.Fatal(err) + } +} From 5d7628ba3b94d5d99d3315090ea4a41080c76ea8 Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Tue, 20 Oct 2015 19:01:29 +0200 Subject: [PATCH 12/17] boltdb: fix RangeIterator outside of range seeks Two issues: - Seeking before i.start and iterating returned keys before i.start - Seeking after the store last key did not invalidate the iterator and could cause infinite loops. --- index/store/boltdb/iterator.go | 13 +++++++++---- index/store/boltdb/store_test.go | 6 ++++++ 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/index/store/boltdb/iterator.go b/index/store/boltdb/iterator.go index bccfeaee..63bde9f5 100644 --- a/index/store/boltdb/iterator.go +++ b/index/store/boltdb/iterator.go @@ -29,14 +29,19 @@ type Iterator struct { func (i *Iterator) updateValid() { i.valid = (i.key != nil) - if i.valid && i.prefix != nil { - i.valid = bytes.HasPrefix(i.key, i.prefix) - } else if i.end != nil { - i.valid = bytes.Compare(i.key, i.end) < 0 + if i.valid { + if i.prefix != nil { + i.valid = bytes.HasPrefix(i.key, i.prefix) + } else if i.end != nil { + i.valid = bytes.Compare(i.key, i.end) < 0 + } } } func (i *Iterator) Seek(k []byte) { + if bytes.Compare(k, i.start) < 0 { + k = i.start + } i.key, i.val = i.cursor.Seek(k) i.updateValid() } diff --git a/index/store/boltdb/store_test.go b/index/store/boltdb/store_test.go index 0380a92c..4abde11b 100644 --- a/index/store/boltdb/store_test.go +++ b/index/store/boltdb/store_test.go @@ -72,6 +72,12 @@ func TestBoltDBRangeIterator(t *testing.T) { test.CommonTestRangeIterator(t, s) } +func TestBoltDBRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + func TestBoltDBMerge(t *testing.T) { s := open(t, &test.TestMergeCounter{}) defer cleanup(t, s) From 873f4838041c2899c178703e92c50f438f7c9130 Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Tue, 20 Oct 2015 18:44:00 +0200 Subject: [PATCH 13/17] gtreap: RangeIterator.Seek should not move before start --- index/store/gtreap/iterator.go | 4 ++++ index/store/gtreap/reader.go | 5 +++-- index/store/gtreap/store_test.go | 6 ++++++ index/store/metrics/store_test.go | 6 ++++++ 4 files changed, 19 insertions(+), 2 deletions(-) diff --git a/index/store/gtreap/iterator.go b/index/store/gtreap/iterator.go index d64dce55..bc56eb34 100644 --- a/index/store/gtreap/iterator.go +++ b/index/store/gtreap/iterator.go @@ -31,10 +31,14 @@ type Iterator struct { currOk bool prefix []byte + start []byte end []byte } func (w *Iterator) Seek(k []byte) { + if bytes.Compare(k, w.start) < 0 { + k = w.start + } w.restart(&Item{k: k}) } diff --git a/index/store/gtreap/reader.go b/index/store/gtreap/reader.go index a67671e9..6f92a751 100644 --- a/index/store/gtreap/reader.go +++ b/index/store/gtreap/reader.go @@ -46,8 +46,9 @@ func (w *Reader) PrefixIterator(k []byte) store.KVIterator { func (w *Reader) RangeIterator(start, end []byte) store.KVIterator { rv := Iterator{ - t: w.t, - end: end, + t: w.t, + start: start, + end: end, } rv.restart(&Item{k: start}) return &rv diff --git a/index/store/gtreap/store_test.go b/index/store/gtreap/store_test.go index 82e12cf6..b7686ee6 100644 --- a/index/store/gtreap/store_test.go +++ b/index/store/gtreap/store_test.go @@ -70,6 +70,12 @@ func TestGTreapRangeIterator(t *testing.T) { test.CommonTestRangeIterator(t, s) } +func TestGTreapRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + func TestGTreapMerge(t *testing.T) { s := open(t, &test.TestMergeCounter{}) defer cleanup(t, s) diff --git a/index/store/metrics/store_test.go b/index/store/metrics/store_test.go index d3c65f79..f51d6d61 100644 --- a/index/store/metrics/store_test.go +++ b/index/store/metrics/store_test.go @@ -59,6 +59,12 @@ func TestMetricsRangeIterator(t *testing.T) { test.CommonTestRangeIterator(t, s) } +func TestMetricsRangeIteratorSeek(t *testing.T) { + s := open(t, nil) + defer cleanup(t, s) + test.CommonTestRangeIteratorSeek(t, s) +} + func TestMetricsMerge(t *testing.T) { s := open(t, &test.TestMergeCounter{}) defer cleanup(t, s) From da72d0c2b9ac20bdd5e3784fe184d745edf40d3e Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Tue, 20 Oct 2015 19:21:01 +0200 Subject: [PATCH 14/17] store_test: deduplicate store initialization --- index/store/test/iterator.go | 102 +++++++++++++---------------------- 1 file changed, 36 insertions(+), 66 deletions(-) diff --git a/index/store/test/iterator.go b/index/store/test/iterator.go index aaf6149f..0d7d8992 100644 --- a/index/store/test/iterator.go +++ b/index/store/test/iterator.go @@ -11,12 +11,39 @@ import ( // tests around the correct behavior of iterators +type testRow struct { + key []byte + val []byte +} + +func batchWriteRows(s store.KVStore, rows []testRow) error { + // open a writer + writer, err := s.Writer() + if err != nil { + return err + } + + // write the data + batch := writer.NewBatch() + for _, row := range rows { + batch.Set(row.key, row.val) + } + err = writer.ExecuteBatch(batch) + if err != nil { + return err + } + + // close the writer + err = writer.Close() + if err != nil { + return err + } + return nil +} + func CommonTestPrefixIterator(t *testing.T, s store.KVStore) { - data := []struct { - key []byte - val []byte - }{ + data := []testRow{ {[]byte("apple"), []byte("val")}, {[]byte("cat1"), []byte("val")}, {[]byte("cat2"), []byte("val")}, @@ -40,24 +67,7 @@ func CommonTestPrefixIterator(t *testing.T, s store.KVStore) { []byte("dog4"), } - // open a writer - writer, err := s.Writer() - if err != nil { - t.Fatal(err) - } - - // write the data - batch := writer.NewBatch() - for _, row := range data { - batch.Set(row.key, row.val) - } - err = writer.ExecuteBatch(batch) - if err != nil { - t.Fatal(err) - } - - // close the writer - err = writer.Close() + err := batchWriteRows(s, data) if err != nil { t.Fatal(err) } @@ -123,10 +133,7 @@ func CommonTestPrefixIterator(t *testing.T, s store.KVStore) { func CommonTestRangeIterator(t *testing.T, s store.KVStore) { - data := []struct { - key []byte - val []byte - }{ + data := []testRow{ {[]byte("a1"), []byte("val")}, {[]byte("b1"), []byte("val")}, {[]byte("b2"), []byte("val")}, @@ -154,24 +161,7 @@ func CommonTestRangeIterator(t *testing.T, s store.KVStore) { } } - // open a writer - writer, err := s.Writer() - if err != nil { - t.Fatal(err) - } - - // write the data - batch := writer.NewBatch() - for _, row := range data { - batch.Set(row.key, row.val) - } - err = writer.ExecuteBatch(batch) - if err != nil { - t.Fatal(err) - } - - // close the writer - err = writer.Close() + err := batchWriteRows(s, data) if err != nil { t.Fatal(err) } @@ -279,10 +269,7 @@ func CommonTestRangeIterator(t *testing.T, s store.KVStore) { func CommonTestRangeIteratorSeek(t *testing.T, s store.KVStore) { - data := []struct { - key []byte - val []byte - }{ + data := []testRow{ {[]byte("a1"), []byte("val")}, {[]byte("b1"), []byte("val")}, {[]byte("c1"), []byte("val")}, @@ -290,24 +277,7 @@ func CommonTestRangeIteratorSeek(t *testing.T, s store.KVStore) { {[]byte("e1"), []byte("val")}, } - // open a writer - writer, err := s.Writer() - if err != nil { - t.Fatal(err) - } - - // write the data - batch := writer.NewBatch() - for _, row := range data { - batch.Set(row.key, row.val) - } - err = writer.ExecuteBatch(batch) - if err != nil { - t.Fatal(err) - } - - // close the writer - err = writer.Close() + err := batchWriteRows(s, data) if err != nil { t.Fatal(err) } From b174c137fd621082573cb854375712a6e1e865be Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Sat, 17 Oct 2015 18:40:26 +0200 Subject: [PATCH 15/17] doc: document DocIDReader, and some Index bits --- index.go | 5 +++++ index/index.go | 15 +++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/index.go b/index.go index 73c49d8b..fe68a44c 100644 --- a/index.go +++ b/index.go @@ -160,7 +160,10 @@ type Index interface { NewBatch() *Batch Batch(b *Batch) error + // Document returns specified document or nil if the document is not + // indexed or stored. Document(id string) (*document.Document, error) + // DocCount returns the number of indexed or stored documents. DocCount() (uint64, error) Search(req *SearchRequest) (*SearchResult, error) @@ -185,6 +188,8 @@ type Index interface { SetInternal(key, val []byte) error DeleteInternal(key []byte) error + // Advanced returns the indexer and data store, exposing lower level + // methods to enumerate records and access data. Advanced() (index.Index, store.KVStore, error) } diff --git a/index/index.go b/index/index.go index aa7b9a77..84469776 100644 --- a/index/index.go +++ b/index/index.go @@ -35,6 +35,8 @@ type Index interface { DumpDoc(id string) chan interface{} DumpFields() chan interface{} + // Reader returns a low-level accessor on the index data. Close it to + // release associated resources. Reader() (IndexReader, error) Stats() json.Marshaler @@ -44,6 +46,10 @@ type Index interface { type IndexReader interface { TermFieldReader(term []byte, field string) (TermFieldReader, error) + + // DocIDReader returns an iterator over indexed or stored documents which + // identifiers are greater than or equal to start and smaller than end. The + // caller must close returned instance to release associated resources. DocIDReader(start, end string) (DocIDReader, error) FieldDict(field string) (FieldDict, error) @@ -99,8 +105,17 @@ type FieldDict interface { Close() error } +// DocIDReader is the interface exposing enumeration of indexed or stored +// documents identifiers. Close the reader to release associated resources. type DocIDReader interface { + // Next returns the next document identifier in ascending lexicographic + // byte order, or io.EOF when the end of the sequence is reached. Next() (string, error) + + // Advance resets the iteration to the first identifier greater than or + // equal to ID. If ID is greater than or equal to the end of the range, + // Next() call will return io.EOF. If ID is smaller than the start of the + // range, the behaviour depends on the store implementation. Advance(ID string) (string, error) Close() error } From 2fa334fc27ab9bc6bdf6d03625f80d229336712b Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Sun, 18 Oct 2015 10:56:20 +0200 Subject: [PATCH 16/17] doc: talk about "documents" not "indexed or stored documents" --- index.go | 2 +- index/index.go | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/index.go b/index.go index fe68a44c..f8169443 100644 --- a/index.go +++ b/index.go @@ -163,7 +163,7 @@ type Index interface { // Document returns specified document or nil if the document is not // indexed or stored. Document(id string) (*document.Document, error) - // DocCount returns the number of indexed or stored documents. + // DocCount returns the number of documents in the index. DocCount() (uint64, error) Search(req *SearchRequest) (*SearchResult, error) diff --git a/index/index.go b/index/index.go index 84469776..81c0a190 100644 --- a/index/index.go +++ b/index/index.go @@ -47,9 +47,9 @@ type Index interface { type IndexReader interface { TermFieldReader(term []byte, field string) (TermFieldReader, error) - // DocIDReader returns an iterator over indexed or stored documents which - // identifiers are greater than or equal to start and smaller than end. The - // caller must close returned instance to release associated resources. + // DocIDReader returns an iterator over documents which identifiers are + // greater than or equal to start and smaller than end. The caller must + // close returned instance to release associated resources. DocIDReader(start, end string) (DocIDReader, error) FieldDict(field string) (FieldDict, error) @@ -105,8 +105,8 @@ type FieldDict interface { Close() error } -// DocIDReader is the interface exposing enumeration of indexed or stored -// documents identifiers. Close the reader to release associated resources. +// DocIDReader is the interface exposing enumeration of documents identifiers. +// Close the reader to release associated resources. type DocIDReader interface { // Next returns the next document identifier in ascending lexicographic // byte order, or io.EOF when the end of the sequence is reached. From 5100e00f20efd41b1f8afe61a7b6ad211a6625a1 Mon Sep 17 00:00:00 2001 From: Patrick Mezard Date: Tue, 20 Oct 2015 20:27:31 +0200 Subject: [PATCH 17/17] doc: DocIDReader.Advance() is no longer implementation dependent --- index/index.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/index/index.go b/index/index.go index 81c0a190..b763ead7 100644 --- a/index/index.go +++ b/index/index.go @@ -113,9 +113,9 @@ type DocIDReader interface { Next() (string, error) // Advance resets the iteration to the first identifier greater than or - // equal to ID. If ID is greater than or equal to the end of the range, - // Next() call will return io.EOF. If ID is smaller than the start of the - // range, the behaviour depends on the store implementation. + // equal to ID. If ID is smaller than the start of the range, the iteration + // will start there instead. If ID is greater than or equal to the end of + // the range, Next() call will return io.EOF. Advance(ID string) (string, error) Close() error }