diff --git a/index.go b/index.go index cf2ecd89..4fbd107b 100644 --- a/index.go +++ b/index.go @@ -78,6 +78,10 @@ type Index interface { Fields() ([]string, error) + FieldDict(field string) (index.FieldDict, error) + FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) + FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) + DumpAll() chan interface{} DumpDoc(id string) chan interface{} DumpFields() chan interface{} diff --git a/index/index.go b/index/index.go index 6f2b3f19..6ffd7909 100644 --- a/index/index.go +++ b/index/index.go @@ -41,7 +41,9 @@ type IndexReader interface { TermFieldReader(term []byte, field string) (TermFieldReader, error) DocIDReader(start, end string) (DocIDReader, error) - FieldReader(field string, startTerm []byte, endTerm []byte) (FieldReader, error) + FieldDict(field string) (FieldDict, error) + FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error) + FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error) Document(id string) (*document.Document, error) DocumentFieldTerms(id string) (FieldTerms, error) @@ -79,8 +81,13 @@ type TermFieldReader interface { Close() error } -type FieldReader interface { - Next() (*TermFieldDoc, error) +type DictEntry struct { + Term string + Count uint64 +} + +type FieldDict interface { + Next() (*DictEntry, error) Close() error } diff --git a/index/upside_down/field_reader.go b/index/upside_down/field_dict.go similarity index 60% rename from index/upside_down/field_reader.go rename to index/upside_down/field_dict.go index 27a249aa..f5feb120 100644 --- a/index/upside_down/field_reader.go +++ b/index/upside_down/field_dict.go @@ -17,23 +17,24 @@ import ( "github.com/blevesearch/bleve/index/store" ) -type UpsideDownCouchFieldReader struct { +type UpsideDownCouchFieldDict struct { indexReader *IndexReader iterator store.KVIterator endKey []byte field uint16 } -func newUpsideDownCouchFieldReader(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldReader, error) { +func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) { - startRow := NewTermFrequencyRow(startTerm, field, "", 0, 0) - startKey := startRow.ScanPrefixForFieldTermPrefix() - - endKey := NewTermFrequencyRow(endTerm, field, "", 0, 0).Key() + startKey := NewDictionaryRow(startTerm, field, 0).Key() + if endTerm == nil { + endTerm = []byte{ByteSeparator} + } + endKey := NewDictionaryRow(endTerm, field, 0).Key() it := indexReader.kvreader.Iterator(startKey) - return &UpsideDownCouchFieldReader{ + return &UpsideDownCouchFieldDict{ indexReader: indexReader, iterator: it, field: field, @@ -42,7 +43,7 @@ func newUpsideDownCouchFieldReader(indexReader *IndexReader, field uint16, start } -func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) { +func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { key, val, valid := r.iterator.Current() if !valid { return nil, nil @@ -53,24 +54,21 @@ func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) { return nil, nil } - currRow, err := NewTermFrequencyRowKV(key, val) + currRow, err := NewDictionaryRowKV(key, val) if err != nil { - return nil, fmt.Errorf("unexpected error parsing term freq row kv: %v", err) + return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err) } - rv := index.TermFieldDoc{ - Term: string(currRow.term), - Freq: currRow.freq, + rv := index.DictEntry{ + Term: string(currRow.term), + Count: currRow.count, } // advance the iterator to the next term - // by using invalid doc id (higher sorting) - nextTerm := incrementBytes(currRow.term) - nextRow := NewTermFrequencyRow(nextTerm, r.field, "", 0, 0) - r.iterator.Seek(nextRow.ScanPrefixForFieldTermPrefix()) + r.iterator.Next() return &rv, nil } -func (r *UpsideDownCouchFieldReader) Close() error { +func (r *UpsideDownCouchFieldDict) Close() error { return r.iterator.Close() } diff --git a/index/upside_down/field_reader_test.go b/index/upside_down/field_dict_test.go similarity index 74% rename from index/upside_down/field_reader_test.go rename to index/upside_down/field_dict_test.go index e39e78eb..e01a73e3 100644 --- a/index/upside_down/field_reader_test.go +++ b/index/upside_down/field_dict_test.go @@ -18,7 +18,7 @@ import ( "github.com/blevesearch/bleve/index/store/boltdb" ) -func TestIndexFieldReader(t *testing.T) { +func TestIndexFieldDict(t *testing.T) { defer os.RemoveAll("test") store, err := boltdb.Open("test", "bleve") @@ -54,38 +54,39 @@ func TestIndexFieldReader(t *testing.T) { t.Error(err) } defer indexReader.Close() - reader, err := indexReader.FieldReader("name", nil, nil) + + dict, err := indexReader.FieldDict("name") if err != nil { t.Errorf("error creating reader: %v", err) } - defer reader.Close() + defer dict.Close() termCount := 0 - curr, err := reader.Next() + curr, err := dict.Next() for err == nil && curr != nil { termCount++ if curr.Term != "test" { t.Errorf("expected term to be 'test', got '%s'", curr.Term) } - curr, err = reader.Next() + curr, err = dict.Next() } if termCount != 1 { t.Errorf("expected 1 term for this field, got %d", termCount) } - reader, err = indexReader.FieldReader("desc", nil, nil) + dict, err = indexReader.FieldDict("desc") if err != nil { t.Errorf("error creating reader: %v", err) } - defer reader.Close() + defer dict.Close() termCount = 0 terms := make([]string, 0) - curr, err = reader.Next() + curr, err = dict.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) - curr, err = reader.Next() + curr, err = dict.Next() } if termCount != 3 { t.Errorf("expected 3 term for this field, got %d", termCount) @@ -95,25 +96,48 @@ func TestIndexFieldReader(t *testing.T) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } - // test use case for prefix - reader, err = indexReader.FieldReader("prefix", []byte("cat"), []byte("cat")) + // test start and end range + dict, err = indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice")) if err != nil { t.Errorf("error creating reader: %v", err) } - defer reader.Close() + defer dict.Close() termCount = 0 terms = make([]string, 0) - curr, err = reader.Next() + curr, err = dict.Next() for err == nil && curr != nil { termCount++ terms = append(terms, curr.Term) - curr, err = reader.Next() + curr, err = dict.Next() + } + if termCount != 1 { + t.Errorf("expected 1 term for this field, got %d", termCount) + } + expectedTerms = []string{"more"} + if !reflect.DeepEqual(expectedTerms, terms) { + t.Errorf("expected %#v, got %#v", expectedTerms, terms) + } + + // test use case for prefix + dict, err = indexReader.FieldDictPrefix("prefix", []byte("cat")) + if err != nil { + t.Errorf("error creating reader: %v", err) + } + defer dict.Close() + + termCount = 0 + terms = make([]string, 0) + curr, err = dict.Next() + for err == nil && curr != nil { + termCount++ + terms = append(terms, curr.Term) + curr, err = dict.Next() } if termCount != 3 { t.Errorf("expected 3 term for this field, got %d", termCount) } - expectedTerms = []string{"cats", "catting", "cat"} + expectedTerms = []string{"cat", "cats", "catting"} if !reflect.DeepEqual(expectedTerms, terms) { t.Errorf("expected %#v, got %#v", expectedTerms, terms) } diff --git a/index/upside_down/index_reader.go b/index/upside_down/index_reader.go index e9ccc40f..f620cbed 100644 --- a/index/upside_down/index_reader.go +++ b/index/upside_down/index_reader.go @@ -31,12 +31,20 @@ func (i *IndexReader) TermFieldReader(term []byte, fieldName string) (index.Term return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0)) } -func (i *IndexReader) FieldReader(fieldName string, startTerm []byte, endTerm []byte) (index.FieldReader, error) { +func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) { + return i.FieldDictRange(fieldName, nil, nil) +} + +func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { fieldIndex, fieldExists := i.index.fieldIndexCache.FieldExists(fieldName) if fieldExists { - return newUpsideDownCouchFieldReader(i, uint16(fieldIndex), startTerm, endTerm) + return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm) } - return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0)) + return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{}) +} + +func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) { + return i.FieldDictRange(fieldName, termPrefix, incrementBytes(termPrefix)) } func (i *IndexReader) DocIDReader(start, end string) (index.DocIDReader, error) { diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index f56a9890..ab78685d 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -25,25 +25,31 @@ type UpsideDownCouchTermFieldReader struct { } func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) { + dictionaryRow := NewDictionaryRow(term, field, 0) + val, err := indexReader.kvreader.Get(dictionaryRow.Key()) + if err != nil { + return nil, err + } + if val == nil { + return &UpsideDownCouchTermFieldReader{ + count: 0, + term: term, + field: field, + }, nil + } + + err = dictionaryRow.parseDictionaryV(val) + if err != nil { + return nil, err + } + tfr := NewTermFrequencyRow(term, field, "", 0, 0) it := indexReader.kvreader.Iterator(tfr.Key()) - var count uint64 - key, val, valid := it.Current() - if valid { - if bytes.Equal(key, tfr.Key()) { - tfr, err := NewTermFrequencyRowKV(key, val) - if err != nil { - return nil, err - } - count = tfr.freq - } - } - return &UpsideDownCouchTermFieldReader{ indexReader: indexReader, iterator: it, - count: count, + count: dictionaryRow.count, term: term, field: field, }, nil @@ -54,54 +60,62 @@ func (r *UpsideDownCouchTermFieldReader) Count() uint64 { } func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) { - r.iterator.Next() - key, val, valid := r.iterator.Current() - if valid { - testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0) - if !bytes.HasPrefix(key, testfr.Key()) { - // end of the line - return nil, nil + if r.iterator != nil { + key, val, valid := r.iterator.Current() + if valid { + testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0) + if !bytes.HasPrefix(key, testfr.Key()) { + // end of the line + return nil, nil + } + tfr, err := NewTermFrequencyRowKV(key, val) + if err != nil { + return nil, err + } + r.iterator.Next() + return &index.TermFieldDoc{ + ID: string(tfr.doc), + Freq: tfr.freq, + Norm: float64(tfr.norm), + Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors), + }, nil } - tfr, err := NewTermFrequencyRowKV(key, val) - if err != nil { - return nil, err - } - return &index.TermFieldDoc{ - ID: string(tfr.doc), - Freq: tfr.freq, - Norm: float64(tfr.norm), - Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors), - }, nil } return nil, nil } func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) { - tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0) - r.iterator.Seek(tfr.Key()) - key, val, valid := r.iterator.Current() - if valid { - testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0) - if !bytes.HasPrefix(key, testfr.Key()) { - // end of the line - return nil, nil + if r.iterator != nil { + tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0) + r.iterator.Seek(tfr.Key()) + key, val, valid := r.iterator.Current() + if valid { + testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0) + if !bytes.HasPrefix(key, testfr.Key()) { + // end of the line + return nil, nil + } + tfr, err := NewTermFrequencyRowKV(key, val) + if err != nil { + return nil, err + } + r.iterator.Next() + return &index.TermFieldDoc{ + ID: string(tfr.doc), + Freq: tfr.freq, + Norm: float64(tfr.norm), + Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors), + }, nil } - tfr, err := NewTermFrequencyRowKV(key, val) - if err != nil { - return nil, err - } - return &index.TermFieldDoc{ - ID: string(tfr.doc), - Freq: tfr.freq, - Norm: float64(tfr.norm), - Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors), - }, nil } return nil, nil } func (r *UpsideDownCouchTermFieldReader) Close() error { - return r.iterator.Close() + if r.iterator != nil { + return r.iterator.Close() + } + return nil } type UpsideDownCouchDocIDReader struct { diff --git a/index/upside_down/row.go b/index/upside_down/row.go index 9d318b85..0784e428 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -35,6 +35,8 @@ func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) { return NewVersionRowKV(key, value) case 'f': return NewFieldRowKV(key, value) + case 'd': + return NewDictionaryRowKV(key, value) case 't': return NewTermFrequencyRowKV(key, value) case 'b': @@ -171,6 +173,91 @@ func NewFieldRowKV(key, value []byte) (*FieldRow, error) { return &rv, nil } +// DICTIONARY + +type DictionaryRow struct { + field uint16 + term []byte + count uint64 +} + +func (dr *DictionaryRow) Key() []byte { + buf := make([]byte, 3+len(dr.term)) + buf[0] = 'd' + binary.LittleEndian.PutUint16(buf[1:3], dr.field) + copy(buf[3:], dr.term) + return buf +} + +func (dr *DictionaryRow) Value() []byte { + used := 0 + buf := make([]byte, 8) + + used += binary.PutUvarint(buf[used:used+8], dr.count) + + return buf[0:used] +} + +func (dr *DictionaryRow) String() string { + return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count) +} + +func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow { + return &DictionaryRow{ + term: term, + field: field, + count: count, + } +} + +func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) { + rv, err := NewDictionaryRowK(key) + if err != nil { + return nil, err + } + + err = rv.parseDictionaryV(value) + if err != nil { + return nil, err + } + return rv, nil + +} + +func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { + rv := DictionaryRow{} + buf := bytes.NewBuffer(key) + _, err := buf.ReadByte() // type + if err != nil { + return nil, err + } + + err = binary.Read(buf, binary.LittleEndian, &rv.field) + if err != nil { + return nil, err + } + + rv.term, err = buf.ReadBytes(ByteSeparator) + // there is no separator expected here, should get EOF + if err != io.EOF { + return nil, err + } + + return &rv, nil +} + +func (dr *DictionaryRow) parseDictionaryV(value []byte) error { + buf := bytes.NewBuffer((value)) + + count, err := binary.ReadUvarint(buf) + if err != nil { + return err + } + dr.count = count + + return nil +} + // TERM FIELD FREQUENCY type TermVector struct { @@ -227,13 +314,9 @@ func (tfr *TermFrequencyRow) Key() []byte { return buf } -func (tfr *TermFrequencyRow) SummaryKey() []byte { - buf := make([]byte, 3+len(tfr.term)+1) - buf[0] = 't' - binary.LittleEndian.PutUint16(buf[1:3], tfr.field) - termLen := copy(buf[3:], tfr.term) - buf[3+termLen] = ByteSeparator - return buf +func (tfr *TermFrequencyRow) DictionaryRowKey() []byte { + dr := NewDictionaryRow(tfr.term, tfr.field, 0) + return dr.Key() } func (tfr *TermFrequencyRow) Value() []byte { diff --git a/index/upside_down/row_merge.go b/index/upside_down/row_merge.go index f9d7ef9d..1721db59 100644 --- a/index/upside_down/row_merge.go +++ b/index/upside_down/row_merge.go @@ -9,45 +9,45 @@ package upside_down -type termSummaryIncr struct{} +type dictionaryTermIncr struct{} -func newTermSummaryIncr() *termSummaryIncr { - return &termSummaryIncr{} +func newDictionaryTermIncr() *dictionaryTermIncr { + return &dictionaryTermIncr{} } -func (t *termSummaryIncr) Merge(key, existing []byte) ([]byte, error) { +func (t *dictionaryTermIncr) Merge(key, existing []byte) ([]byte, error) { if len(existing) > 0 { - tfr, err := NewTermFrequencyRowKV(key, existing) + dr, err := NewDictionaryRowKV(key, existing) if err != nil { return nil, err } - tfr.freq++ - return tfr.Value(), nil + dr.count++ + return dr.Value(), nil } else { - tfr, err := NewTermFrequencyRowK(key) + dr, err := NewDictionaryRowK(key) if err != nil { return nil, err } - tfr.freq = 1 - return tfr.Value(), nil + dr.count = 1 + return dr.Value(), nil } } -type termSummaryDecr struct{} +type dictionaryTermDecr struct{} -func newTermSummaryDecr() *termSummaryDecr { - return &termSummaryDecr{} +func newDictionaryTermDecr() *dictionaryTermDecr { + return &dictionaryTermDecr{} } -func (t *termSummaryDecr) Merge(key, existing []byte) ([]byte, error) { +func (t *dictionaryTermDecr) Merge(key, existing []byte) ([]byte, error) { if len(existing) > 0 { - tfr, err := NewTermFrequencyRowKV(key, existing) + dr, err := NewDictionaryRowKV(key, existing) if err != nil { return nil, err } - tfr.freq-- - if tfr.freq > 0 { - return tfr.Value(), nil + dr.count-- + if dr.count > 0 { + return dr.Value(), nil } } return nil, nil diff --git a/index/upside_down/row_test.go b/index/upside_down/row_test.go index b9cf0663..9c9cf66f 100644 --- a/index/upside_down/row_test.go +++ b/index/upside_down/row_test.go @@ -42,6 +42,11 @@ func TestRows(t *testing.T) { []byte{'f', 1, 2}, []byte{'s', 't', 'y', 'l', 'e', ByteSeparator}, }, + { + NewDictionaryRow([]byte{'b', 'e', 'e', 'r'}, 0, 27), + []byte{'d', 0, 0, 'b', 'e', 'e', 'r'}, + []byte{27}, + }, { NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "", 3, 3.14), []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator}, diff --git a/index/upside_down/upside_down.go b/index/upside_down/upside_down.go index 21689bff..0a41d9e2 100644 --- a/index/upside_down/upside_down.go +++ b/index/upside_down/upside_down.go @@ -27,7 +27,7 @@ import ( var VersionKey = []byte{'v'} -const Version uint8 = 3 +const Version uint8 = 4 var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version) @@ -111,8 +111,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow tfr, ok := row.(*TermFrequencyRow) if ok { // need to increment counter - summaryKey := tfr.SummaryKey() - wb.Merge(summaryKey, newTermSummaryIncr()) + dictionaryKey := tfr.DictionaryRowKey() + wb.Merge(dictionaryKey, newDictionaryTermIncr()) } wb.Set(row.Key(), row.Value()) } @@ -127,8 +127,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow tfr, ok := row.(*TermFrequencyRow) if ok { // need to decrement counter - summaryKey := tfr.SummaryKey() - wb.Merge(summaryKey, newTermSummaryDecr()) + dictionaryKey := tfr.DictionaryRowKey() + wb.Merge(dictionaryKey, newDictionaryTermDecr()) } wb.Delete(row.Key()) } diff --git a/index_alias_impl.go b/index_alias_impl.go index 617853a7..a11f330b 100644 --- a/index_alias_impl.go +++ b/index_alias_impl.go @@ -165,6 +165,84 @@ func (i *indexAliasImpl) Fields() ([]string, error) { return i.indexes[0].Fields() } +func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) { + i.mutex.RLock() + + if !i.open { + i.mutex.RUnlock() + return nil, ErrorIndexClosed + } + + err := i.isAliasToSingleIndex() + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + fieldDict, err := i.indexes[0].FieldDict(field) + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + return &indexAliasImplFieldDict{ + index: i, + fieldDict: fieldDict, + }, nil +} + +func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { + i.mutex.RLock() + + if !i.open { + i.mutex.RUnlock() + return nil, ErrorIndexClosed + } + + err := i.isAliasToSingleIndex() + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm) + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + return &indexAliasImplFieldDict{ + index: i, + fieldDict: fieldDict, + }, nil +} + +func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { + i.mutex.RLock() + + if !i.open { + i.mutex.RUnlock() + return nil, ErrorIndexClosed + } + + err := i.isAliasToSingleIndex() + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix) + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + return &indexAliasImplFieldDict{ + index: i, + fieldDict: fieldDict, + }, nil +} + func (i *indexAliasImpl) DumpAll() chan interface{} { i.mutex.RLock() defer i.mutex.RUnlock() @@ -482,3 +560,17 @@ func (i *indexAliasImpl) NewBatch() *Batch { return i.indexes[0].NewBatch() } + +type indexAliasImplFieldDict struct { + index *indexAliasImpl + fieldDict index.FieldDict +} + +func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) { + return f.fieldDict.Next() +} + +func (f *indexAliasImplFieldDict) Close() error { + defer f.index.mutex.RUnlock() + return f.fieldDict.Close() +} diff --git a/index_alias_impl_test.go b/index_alias_impl_test.go index 10ae0011..de84e9f9 100644 --- a/index_alias_impl_test.go +++ b/index_alias_impl_test.go @@ -730,6 +730,18 @@ func (i *stubIndex) Fields() ([]string, error) { return nil, i.err } +func (i *stubIndex) FieldDict(field string) (index.FieldDict, error) { + return nil, i.err +} + +func (i *stubIndex) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { + return nil, i.err +} + +func (i *stubIndex) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { + return nil, i.err +} + func (i *stubIndex) DumpAll() chan interface{} { return nil } diff --git a/index_impl.go b/index_impl.go index cfbf2c17..f8e1be9d 100644 --- a/index_impl.go +++ b/index_impl.go @@ -498,6 +498,87 @@ func (i *indexImpl) Fields() ([]string, error) { return indexReader.Fields() } +func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) { + i.mutex.RLock() + + if !i.open { + i.mutex.RUnlock() + return nil, ErrorIndexClosed + } + + indexReader, err := i.i.Reader() + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + fieldDict, err := indexReader.FieldDict(field) + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + return &indexImplFieldDict{ + index: i, + indexReader: indexReader, + fieldDict: fieldDict, + }, nil +} + +func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { + i.mutex.RLock() + + if !i.open { + i.mutex.RUnlock() + return nil, ErrorIndexClosed + } + + indexReader, err := i.i.Reader() + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm) + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + return &indexImplFieldDict{ + index: i, + indexReader: indexReader, + fieldDict: fieldDict, + }, nil +} + +func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { + i.mutex.RLock() + + if !i.open { + i.mutex.RUnlock() + return nil, ErrorIndexClosed + } + + indexReader, err := i.i.Reader() + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix) + if err != nil { + i.mutex.RUnlock() + return nil, err + } + + return &indexImplFieldDict{ + index: i, + indexReader: indexReader, + fieldDict: fieldDict, + }, nil +} + // DumpAll writes all index rows to a channel. // INTERNAL: do not rely on this function, it is // only intended to be used by the debug utilities @@ -586,3 +667,22 @@ func (i *indexImpl) NewBatch() *Batch { internal: index.NewBatch(), } } + +type indexImplFieldDict struct { + index *indexImpl + indexReader index.IndexReader + fieldDict index.FieldDict +} + +func (f *indexImplFieldDict) Next() (*index.DictEntry, error) { + return f.fieldDict.Next() +} + +func (f *indexImplFieldDict) Close() error { + defer f.index.mutex.RUnlock() + err := f.fieldDict.Close() + if err != nil { + return err + } + return f.indexReader.Close() +} diff --git a/index_test.go b/index_test.go index 6cb192d3..232d33b7 100644 --- a/index_test.go +++ b/index_test.go @@ -13,6 +13,7 @@ import ( "io/ioutil" "log" "os" + "reflect" "testing" "time" ) @@ -363,3 +364,66 @@ func TestStoredFieldPreserved(t *testing.T) { } } + +func TestDict(t *testing.T) { + defer os.RemoveAll("testidx") + + index, err := New("testidx", NewIndexMapping()) + if err != nil { + t.Fatal(err) + } + + doca := map[string]interface{}{ + "name": "marty", + "desc": "gophercon india", + } + err = index.Index("a", doca) + if err != nil { + t.Error(err) + } + + docy := map[string]interface{}{ + "name": "jasper", + "desc": "clojure", + } + err = index.Index("y", docy) + if err != nil { + t.Error(err) + } + + docx := map[string]interface{}{ + "name": "rose", + "desc": "googler", + } + err = index.Index("x", docx) + if err != nil { + t.Error(err) + } + + dict, err := index.FieldDict("name") + if err != nil { + t.Error(err) + } + + terms := []string{} + de, err := dict.Next() + for err == nil && de != nil { + terms = append(terms, string(de.Term)) + de, err = dict.Next() + } + + expectedTerms := []string{"jasper", "marty", "rose"} + if !reflect.DeepEqual(terms, expectedTerms) { + t.Errorf("expected %v, got %v", expectedTerms, terms) + } + + err = dict.Close() + if err != nil { + t.Fatal(err) + } + + err = index.Close() + if err != nil { + t.Fatal(err) + } +} diff --git a/search/searchers/search_fuzzy.go b/search/searchers/search_fuzzy.go index 8d233547..111e302f 100644 --- a/search/searchers/search_fuzzy.go +++ b/search/searchers/search_fuzzy.go @@ -33,17 +33,23 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin } // find the terms with this prefix - fieldReader, err := indexReader.FieldReader(field, []byte(prefixTerm), []byte(prefixTerm)) + var fieldDict index.FieldDict + var err error + if len(prefixTerm) > 0 { + fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm)) + } else { + fieldDict, err = indexReader.FieldDict(field) + } // enumerate terms and check levenshtein distance candidateTerms := make([]string, 0) - tfd, err := fieldReader.Next() + tfd, err := fieldDict.Next() for err == nil && tfd != nil { ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness) if !exceeded && ld <= fuzziness { candidateTerms = append(candidateTerms, tfd.Term) } - tfd, err = fieldReader.Next() + tfd, err = fieldDict.Next() } // enumerate all the terms in the range diff --git a/search/searchers/search_term_prefix.go b/search/searchers/search_term_prefix.go index 13d588fc..d2d562b0 100644 --- a/search/searchers/search_term_prefix.go +++ b/search/searchers/search_term_prefix.go @@ -24,18 +24,18 @@ type TermPrefixSearcher struct { func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) { // find the terms with this prefix - fieldReader, err := indexReader.FieldReader(field, []byte(prefix), []byte(prefix)) + fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix)) // enumerate all the terms in the range qsearchers := make([]search.Searcher, 0, 25) - tfd, err := fieldReader.Next() + tfd, err := fieldDict.Next() for err == nil && tfd != nil { qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain) if err != nil { return nil, err } qsearchers = append(qsearchers, qsearcher) - tfd, err = fieldReader.Next() + tfd, err = fieldDict.Next() } // build disjunction searcher of these ranges searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)