0
0
bleve/index/upsidedown/index_reader.go
Marty Schoch 606fd6344b INDEX FORMAT CHANGE: change back index row value
Previously term entries were encoded pairwise (field/term), so
you'd have data like:

F1/T1 F1/T2 F1/T3 F2/T4 F3/T5

As you can see, even though field 1 has 3 terms, we repeat the F1
part in the encoded data.  This is a bit wasteful.

In the new format we encode it as a list of terms for each field:

F1/T1,T2,T3 F2/T4 F3/T5

When fields have multiple terms, this saves space.  In unit
tests there is no additional waste even in the case that a field
has only a single value.

Here are the results of an indexing test case (beer-search):

$ benchcmp indexing-before.txt indexing-after.txt
benchmark               old ns/op       new ns/op       delta
BenchmarkIndexing-4     11275835988     10745514321     -4.70%

benchmark               old allocs     new allocs     delta
BenchmarkIndexing-4     25230685       22480494       -10.90%

benchmark               old bytes      new bytes      delta
BenchmarkIndexing-4     4802816224     4741641856     -1.27%

And here are the results of a MatchAll search building a facet
on the "abv" field:

$ benchcmp facet-before.txt facet-after.txt
benchmark             old ns/op     new ns/op     delta
BenchmarkFacets-4     439762100     228064575     -48.14%

benchmark             old allocs     new allocs     delta
BenchmarkFacets-4     9460208        3723286        -60.64%

benchmark             old bytes     new bytes     delta
BenchmarkFacets-4     260784261     151746483     -41.81%

Although we expect the index to be smaller in many cases, the
beer-search index is about the same in this case.  However,
this may be due to the underlying storage (boltdb) in this case.

Finally, the index version was bumped from 5 to 7, since smolder
also used version 6, which could lead to some confusion.
2017-01-24 15:39:38 -05:00

190 lines
5.1 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"github.com/blevesearch/bleve/document"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
)
type IndexReader struct {
index *UpsideDownCouch
kvreader store.KVReader
docCount uint64
}
func (i *IndexReader) TermFieldReader(term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors)
}
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors)
}
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, nil, nil)
}
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
}
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
}
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, termPrefix, termPrefix)
}
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) {
return newUpsideDownCouchDocIDReader(i)
}
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
return newUpsideDownCouchDocIDReaderOnly(i, ids)
}
func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
// first hit the back index to confirm doc exists
var backIndexRow *BackIndexRow
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id))
if err != nil {
return
}
if backIndexRow == nil {
return
}
doc = document.NewDocument(id)
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil)
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
safeVal := make([]byte, len(val))
copy(safeVal, val)
var row *StoredRow
row, err = NewStoredRowKV(key, safeVal)
if err != nil {
doc = nil
return
}
if row != nil {
fieldName := i.index.fieldCache.FieldIndexed(row.field)
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value)
if field != nil {
doc.AddField(field)
}
}
it.Next()
key, val, valid = it.Current()
}
return
}
func (i *IndexReader) DocumentFieldTerms(id index.IndexInternalID, fields []string) (index.FieldTerms, error) {
back, err := backIndexRowForDoc(i.kvreader, id)
if err != nil {
return nil, err
}
if back == nil {
return nil, nil
}
rv := make(index.FieldTerms, len(fields))
fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false)
if ok {
fieldsMap[id] = f
}
}
for _, entry := range back.termsEntries {
if field, ok := fieldsMap[uint16(*entry.Field)]; ok {
rv[field] = entry.Terms
}
}
return rv, nil
}
func (i *IndexReader) Fields() (fields []string, err error) {
fields = make([]string, 0)
it := i.kvreader.PrefixIterator([]byte{'f'})
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
var row UpsideDownCouchRow
row, err = ParseFromKeyValue(key, val)
if err != nil {
fields = nil
return
}
if row != nil {
fieldRow, ok := row.(*FieldRow)
if ok {
fields = append(fields, fieldRow.name)
}
}
it.Next()
key, val, valid = it.Current()
}
return
}
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) {
internalRow := NewInternalRow(key, nil)
return i.kvreader.Get(internalRow.Key())
}
func (i *IndexReader) DocCount() (uint64, error) {
return i.docCount, nil
}
func (i *IndexReader) Close() error {
return i.kvreader.Close()
}
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) {
return string(id), nil
}
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) {
return index.IndexInternalID(id), nil
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didn't overflow, so stop
break
}
}
return rv
}