0
0

significant change to index format, support dictionary rows

this introduces disk format v4
now the summary rows for a term are stored in their own
"dictionary row" format, previously the same information
was stored in special term frequency rows
this now allows us to easily iterate all the terms for a field
in sorted order (useful for many other fuzzy data structures)

at the top-level of bleve you can now browse terms within a field
using the following api on the Index interface:

  FieldDict(field string) (index.FieldDict, error)
  FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
  FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)

fixes #127
This commit is contained in:
Marty Schoch 2015-03-10 16:22:19 -04:00
parent 4e14f4e4ef
commit 522f9d5cc7
16 changed files with 541 additions and 124 deletions

View File

@ -78,6 +78,10 @@ type Index interface {
Fields() ([]string, error)
FieldDict(field string) (index.FieldDict, error)
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)
DumpAll() chan interface{}
DumpDoc(id string) chan interface{}
DumpFields() chan interface{}

View File

@ -41,7 +41,9 @@ type IndexReader interface {
TermFieldReader(term []byte, field string) (TermFieldReader, error)
DocIDReader(start, end string) (DocIDReader, error)
FieldReader(field string, startTerm []byte, endTerm []byte) (FieldReader, error)
FieldDict(field string) (FieldDict, error)
FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error)
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
Document(id string) (*document.Document, error)
DocumentFieldTerms(id string) (FieldTerms, error)
@ -79,8 +81,13 @@ type TermFieldReader interface {
Close() error
}
type FieldReader interface {
Next() (*TermFieldDoc, error)
type DictEntry struct {
Term string
Count uint64
}
type FieldDict interface {
Next() (*DictEntry, error)
Close() error
}

View File

@ -17,23 +17,24 @@ import (
"github.com/blevesearch/bleve/index/store"
)
type UpsideDownCouchFieldReader struct {
type UpsideDownCouchFieldDict struct {
indexReader *IndexReader
iterator store.KVIterator
endKey []byte
field uint16
}
func newUpsideDownCouchFieldReader(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldReader, error) {
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) {
startRow := NewTermFrequencyRow(startTerm, field, "", 0, 0)
startKey := startRow.ScanPrefixForFieldTermPrefix()
endKey := NewTermFrequencyRow(endTerm, field, "", 0, 0).Key()
startKey := NewDictionaryRow(startTerm, field, 0).Key()
if endTerm == nil {
endTerm = []byte{ByteSeparator}
}
endKey := NewDictionaryRow(endTerm, field, 0).Key()
it := indexReader.kvreader.Iterator(startKey)
return &UpsideDownCouchFieldReader{
return &UpsideDownCouchFieldDict{
indexReader: indexReader,
iterator: it,
field: field,
@ -42,7 +43,7 @@ func newUpsideDownCouchFieldReader(indexReader *IndexReader, field uint16, start
}
func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) {
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) {
key, val, valid := r.iterator.Current()
if !valid {
return nil, nil
@ -53,24 +54,21 @@ func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) {
return nil, nil
}
currRow, err := NewTermFrequencyRowKV(key, val)
currRow, err := NewDictionaryRowKV(key, val)
if err != nil {
return nil, fmt.Errorf("unexpected error parsing term freq row kv: %v", err)
return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err)
}
rv := index.TermFieldDoc{
Term: string(currRow.term),
Freq: currRow.freq,
rv := index.DictEntry{
Term: string(currRow.term),
Count: currRow.count,
}
// advance the iterator to the next term
// by using invalid doc id (higher sorting)
nextTerm := incrementBytes(currRow.term)
nextRow := NewTermFrequencyRow(nextTerm, r.field, "", 0, 0)
r.iterator.Seek(nextRow.ScanPrefixForFieldTermPrefix())
r.iterator.Next()
return &rv, nil
}
func (r *UpsideDownCouchFieldReader) Close() error {
func (r *UpsideDownCouchFieldDict) Close() error {
return r.iterator.Close()
}

View File

@ -18,7 +18,7 @@ import (
"github.com/blevesearch/bleve/index/store/boltdb"
)
func TestIndexFieldReader(t *testing.T) {
func TestIndexFieldDict(t *testing.T) {
defer os.RemoveAll("test")
store, err := boltdb.Open("test", "bleve")
@ -54,38 +54,39 @@ func TestIndexFieldReader(t *testing.T) {
t.Error(err)
}
defer indexReader.Close()
reader, err := indexReader.FieldReader("name", nil, nil)
dict, err := indexReader.FieldDict("name")
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer reader.Close()
defer dict.Close()
termCount := 0
curr, err := reader.Next()
curr, err := dict.Next()
for err == nil && curr != nil {
termCount++
if curr.Term != "test" {
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
}
curr, err = reader.Next()
curr, err = dict.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}
reader, err = indexReader.FieldReader("desc", nil, nil)
dict, err = indexReader.FieldDict("desc")
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer reader.Close()
defer dict.Close()
termCount = 0
terms := make([]string, 0)
curr, err = reader.Next()
curr, err = dict.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = reader.Next()
curr, err = dict.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
@ -95,25 +96,48 @@ func TestIndexFieldReader(t *testing.T) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
// test use case for prefix
reader, err = indexReader.FieldReader("prefix", []byte("cat"), []byte("cat"))
// test start and end range
dict, err = indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer reader.Close()
defer dict.Close()
termCount = 0
terms = make([]string, 0)
curr, err = reader.Next()
curr, err = dict.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = reader.Next()
curr, err = dict.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}
expectedTerms = []string{"more"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
// test use case for prefix
dict, err = indexReader.FieldDictPrefix("prefix", []byte("cat"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer dict.Close()
termCount = 0
terms = make([]string, 0)
curr, err = dict.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms = []string{"cats", "catting", "cat"}
expectedTerms = []string{"cat", "cats", "catting"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}

View File

@ -31,12 +31,20 @@ func (i *IndexReader) TermFieldReader(term []byte, fieldName string) (index.Term
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0))
}
func (i *IndexReader) FieldReader(fieldName string, startTerm []byte, endTerm []byte) (index.FieldReader, error) {
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, nil, nil)
}
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
fieldIndex, fieldExists := i.index.fieldIndexCache.FieldExists(fieldName)
if fieldExists {
return newUpsideDownCouchFieldReader(i, uint16(fieldIndex), startTerm, endTerm)
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
}
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0))
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
}
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, termPrefix, incrementBytes(termPrefix))
}
func (i *IndexReader) DocIDReader(start, end string) (index.DocIDReader, error) {

View File

@ -25,25 +25,31 @@ type UpsideDownCouchTermFieldReader struct {
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) {
dictionaryRow := NewDictionaryRow(term, field, 0)
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
if err != nil {
return nil, err
}
if val == nil {
return &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
field: field,
}, nil
}
err = dictionaryRow.parseDictionaryV(val)
if err != nil {
return nil, err
}
tfr := NewTermFrequencyRow(term, field, "", 0, 0)
it := indexReader.kvreader.Iterator(tfr.Key())
var count uint64
key, val, valid := it.Current()
if valid {
if bytes.Equal(key, tfr.Key()) {
tfr, err := NewTermFrequencyRowKV(key, val)
if err != nil {
return nil, err
}
count = tfr.freq
}
}
return &UpsideDownCouchTermFieldReader{
indexReader: indexReader,
iterator: it,
count: count,
count: dictionaryRow.count,
term: term,
field: field,
}, nil
@ -54,54 +60,62 @@ func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
}
func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
r.iterator.Next()
key, val, valid := r.iterator.Current()
if valid {
testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
if !bytes.HasPrefix(key, testfr.Key()) {
// end of the line
return nil, nil
if r.iterator != nil {
key, val, valid := r.iterator.Current()
if valid {
testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
if !bytes.HasPrefix(key, testfr.Key()) {
// end of the line
return nil, nil
}
tfr, err := NewTermFrequencyRowKV(key, val)
if err != nil {
return nil, err
}
r.iterator.Next()
return &index.TermFieldDoc{
ID: string(tfr.doc),
Freq: tfr.freq,
Norm: float64(tfr.norm),
Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors),
}, nil
}
tfr, err := NewTermFrequencyRowKV(key, val)
if err != nil {
return nil, err
}
return &index.TermFieldDoc{
ID: string(tfr.doc),
Freq: tfr.freq,
Norm: float64(tfr.norm),
Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors),
}, nil
}
return nil, nil
}
func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) {
tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0)
r.iterator.Seek(tfr.Key())
key, val, valid := r.iterator.Current()
if valid {
testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
if !bytes.HasPrefix(key, testfr.Key()) {
// end of the line
return nil, nil
if r.iterator != nil {
tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0)
r.iterator.Seek(tfr.Key())
key, val, valid := r.iterator.Current()
if valid {
testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
if !bytes.HasPrefix(key, testfr.Key()) {
// end of the line
return nil, nil
}
tfr, err := NewTermFrequencyRowKV(key, val)
if err != nil {
return nil, err
}
r.iterator.Next()
return &index.TermFieldDoc{
ID: string(tfr.doc),
Freq: tfr.freq,
Norm: float64(tfr.norm),
Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors),
}, nil
}
tfr, err := NewTermFrequencyRowKV(key, val)
if err != nil {
return nil, err
}
return &index.TermFieldDoc{
ID: string(tfr.doc),
Freq: tfr.freq,
Norm: float64(tfr.norm),
Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors),
}, nil
}
return nil, nil
}
func (r *UpsideDownCouchTermFieldReader) Close() error {
return r.iterator.Close()
if r.iterator != nil {
return r.iterator.Close()
}
return nil
}
type UpsideDownCouchDocIDReader struct {

View File

@ -35,6 +35,8 @@ func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
return NewVersionRowKV(key, value)
case 'f':
return NewFieldRowKV(key, value)
case 'd':
return NewDictionaryRowKV(key, value)
case 't':
return NewTermFrequencyRowKV(key, value)
case 'b':
@ -171,6 +173,91 @@ func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
return &rv, nil
}
// DICTIONARY
type DictionaryRow struct {
field uint16
term []byte
count uint64
}
func (dr *DictionaryRow) Key() []byte {
buf := make([]byte, 3+len(dr.term))
buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
copy(buf[3:], dr.term)
return buf
}
func (dr *DictionaryRow) Value() []byte {
used := 0
buf := make([]byte, 8)
used += binary.PutUvarint(buf[used:used+8], dr.count)
return buf[0:used]
}
func (dr *DictionaryRow) String() string {
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
}
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
return &DictionaryRow{
term: term,
field: field,
count: count,
}
}
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
rv, err := NewDictionaryRowK(key)
if err != nil {
return nil, err
}
err = rv.parseDictionaryV(value)
if err != nil {
return nil, err
}
return rv, nil
}
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
rv := DictionaryRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
err = binary.Read(buf, binary.LittleEndian, &rv.field)
if err != nil {
return nil, err
}
rv.term, err = buf.ReadBytes(ByteSeparator)
// there is no separator expected here, should get EOF
if err != io.EOF {
return nil, err
}
return &rv, nil
}
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
buf := bytes.NewBuffer((value))
count, err := binary.ReadUvarint(buf)
if err != nil {
return err
}
dr.count = count
return nil
}
// TERM FIELD FREQUENCY
type TermVector struct {
@ -227,13 +314,9 @@ func (tfr *TermFrequencyRow) Key() []byte {
return buf
}
func (tfr *TermFrequencyRow) SummaryKey() []byte {
buf := make([]byte, 3+len(tfr.term)+1)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
buf[3+termLen] = ByteSeparator
return buf
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.Key()
}
func (tfr *TermFrequencyRow) Value() []byte {

View File

@ -9,45 +9,45 @@
package upside_down
type termSummaryIncr struct{}
type dictionaryTermIncr struct{}
func newTermSummaryIncr() *termSummaryIncr {
return &termSummaryIncr{}
func newDictionaryTermIncr() *dictionaryTermIncr {
return &dictionaryTermIncr{}
}
func (t *termSummaryIncr) Merge(key, existing []byte) ([]byte, error) {
func (t *dictionaryTermIncr) Merge(key, existing []byte) ([]byte, error) {
if len(existing) > 0 {
tfr, err := NewTermFrequencyRowKV(key, existing)
dr, err := NewDictionaryRowKV(key, existing)
if err != nil {
return nil, err
}
tfr.freq++
return tfr.Value(), nil
dr.count++
return dr.Value(), nil
} else {
tfr, err := NewTermFrequencyRowK(key)
dr, err := NewDictionaryRowK(key)
if err != nil {
return nil, err
}
tfr.freq = 1
return tfr.Value(), nil
dr.count = 1
return dr.Value(), nil
}
}
type termSummaryDecr struct{}
type dictionaryTermDecr struct{}
func newTermSummaryDecr() *termSummaryDecr {
return &termSummaryDecr{}
func newDictionaryTermDecr() *dictionaryTermDecr {
return &dictionaryTermDecr{}
}
func (t *termSummaryDecr) Merge(key, existing []byte) ([]byte, error) {
func (t *dictionaryTermDecr) Merge(key, existing []byte) ([]byte, error) {
if len(existing) > 0 {
tfr, err := NewTermFrequencyRowKV(key, existing)
dr, err := NewDictionaryRowKV(key, existing)
if err != nil {
return nil, err
}
tfr.freq--
if tfr.freq > 0 {
return tfr.Value(), nil
dr.count--
if dr.count > 0 {
return dr.Value(), nil
}
}
return nil, nil

View File

@ -42,6 +42,11 @@ func TestRows(t *testing.T) {
[]byte{'f', 1, 2},
[]byte{'s', 't', 'y', 'l', 'e', ByteSeparator},
},
{
NewDictionaryRow([]byte{'b', 'e', 'e', 'r'}, 0, 27),
[]byte{'d', 0, 0, 'b', 'e', 'e', 'r'},
[]byte{27},
},
{
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "", 3, 3.14),
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator},

View File

@ -27,7 +27,7 @@ import (
var VersionKey = []byte{'v'}
const Version uint8 = 3
const Version uint8 = 4
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
@ -111,8 +111,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to increment counter
summaryKey := tfr.SummaryKey()
wb.Merge(summaryKey, newTermSummaryIncr())
dictionaryKey := tfr.DictionaryRowKey()
wb.Merge(dictionaryKey, newDictionaryTermIncr())
}
wb.Set(row.Key(), row.Value())
}
@ -127,8 +127,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
summaryKey := tfr.SummaryKey()
wb.Merge(summaryKey, newTermSummaryDecr())
dictionaryKey := tfr.DictionaryRowKey()
wb.Merge(dictionaryKey, newDictionaryTermDecr())
}
wb.Delete(row.Key())
}

View File

@ -165,6 +165,84 @@ func (i *indexAliasImpl) Fields() ([]string, error) {
return i.indexes[0].Fields()
}
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDict(field)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) DumpAll() chan interface{} {
i.mutex.RLock()
defer i.mutex.RUnlock()
@ -482,3 +560,17 @@ func (i *indexAliasImpl) NewBatch() *Batch {
return i.indexes[0].NewBatch()
}
type indexAliasImplFieldDict struct {
index *indexAliasImpl
fieldDict index.FieldDict
}
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) {
return f.fieldDict.Next()
}
func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}

View File

@ -730,6 +730,18 @@ func (i *stubIndex) Fields() ([]string, error) {
return nil, i.err
}
func (i *stubIndex) FieldDict(field string) (index.FieldDict, error) {
return nil, i.err
}
func (i *stubIndex) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
return nil, i.err
}
func (i *stubIndex) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
return nil, i.err
}
func (i *stubIndex) DumpAll() chan interface{} {
return nil
}

View File

@ -498,6 +498,87 @@ func (i *indexImpl) Fields() ([]string, error) {
return indexReader.Fields()
}
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := indexReader.FieldDict(field)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexImplFieldDict{
index: i,
indexReader: indexReader,
fieldDict: fieldDict,
}, nil
}
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexImplFieldDict{
index: i,
indexReader: indexReader,
fieldDict: fieldDict,
}, nil
}
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexImplFieldDict{
index: i,
indexReader: indexReader,
fieldDict: fieldDict,
}, nil
}
// DumpAll writes all index rows to a channel.
// INTERNAL: do not rely on this function, it is
// only intended to be used by the debug utilities
@ -586,3 +667,22 @@ func (i *indexImpl) NewBatch() *Batch {
internal: index.NewBatch(),
}
}
type indexImplFieldDict struct {
index *indexImpl
indexReader index.IndexReader
fieldDict index.FieldDict
}
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) {
return f.fieldDict.Next()
}
func (f *indexImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
err := f.fieldDict.Close()
if err != nil {
return err
}
return f.indexReader.Close()
}

View File

@ -13,6 +13,7 @@ import (
"io/ioutil"
"log"
"os"
"reflect"
"testing"
"time"
)
@ -363,3 +364,66 @@ func TestStoredFieldPreserved(t *testing.T) {
}
}
func TestDict(t *testing.T) {
defer os.RemoveAll("testidx")
index, err := New("testidx", NewIndexMapping())
if err != nil {
t.Fatal(err)
}
doca := map[string]interface{}{
"name": "marty",
"desc": "gophercon india",
}
err = index.Index("a", doca)
if err != nil {
t.Error(err)
}
docy := map[string]interface{}{
"name": "jasper",
"desc": "clojure",
}
err = index.Index("y", docy)
if err != nil {
t.Error(err)
}
docx := map[string]interface{}{
"name": "rose",
"desc": "googler",
}
err = index.Index("x", docx)
if err != nil {
t.Error(err)
}
dict, err := index.FieldDict("name")
if err != nil {
t.Error(err)
}
terms := []string{}
de, err := dict.Next()
for err == nil && de != nil {
terms = append(terms, string(de.Term))
de, err = dict.Next()
}
expectedTerms := []string{"jasper", "marty", "rose"}
if !reflect.DeepEqual(terms, expectedTerms) {
t.Errorf("expected %v, got %v", expectedTerms, terms)
}
err = dict.Close()
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}

View File

@ -33,17 +33,23 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
}
// find the terms with this prefix
fieldReader, err := indexReader.FieldReader(field, []byte(prefixTerm), []byte(prefixTerm))
var fieldDict index.FieldDict
var err error
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
// enumerate terms and check levenshtein distance
candidateTerms := make([]string, 0)
tfd, err := fieldReader.Next()
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
if !exceeded && ld <= fuzziness {
candidateTerms = append(candidateTerms, tfd.Term)
}
tfd, err = fieldReader.Next()
tfd, err = fieldDict.Next()
}
// enumerate all the terms in the range

View File

@ -24,18 +24,18 @@ type TermPrefixSearcher struct {
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
// find the terms with this prefix
fieldReader, err := indexReader.FieldReader(field, []byte(prefix), []byte(prefix))
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
// enumerate all the terms in the range
qsearchers := make([]search.Searcher, 0, 25)
tfd, err := fieldReader.Next()
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
if err != nil {
return nil, err
}
qsearchers = append(qsearchers, qsearcher)
tfd, err = fieldReader.Next()
tfd, err = fieldDict.Next()
}
// build disjunction searcher of these ranges
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)