significant change to index format, support dictionary rows
this introduces disk format v4 now the summary rows for a term are stored in their own "dictionary row" format, previously the same information was stored in special term frequency rows this now allows us to easily iterate all the terms for a field in sorted order (useful for many other fuzzy data structures) at the top-level of bleve you can now browse terms within a field using the following api on the Index interface: FieldDict(field string) (index.FieldDict, error) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) fixes #127
This commit is contained in:
parent
4e14f4e4ef
commit
522f9d5cc7
4
index.go
4
index.go
@ -78,6 +78,10 @@ type Index interface {
|
||||
|
||||
Fields() ([]string, error)
|
||||
|
||||
FieldDict(field string) (index.FieldDict, error)
|
||||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
|
||||
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)
|
||||
|
||||
DumpAll() chan interface{}
|
||||
DumpDoc(id string) chan interface{}
|
||||
DumpFields() chan interface{}
|
||||
|
@ -41,7 +41,9 @@ type IndexReader interface {
|
||||
TermFieldReader(term []byte, field string) (TermFieldReader, error)
|
||||
DocIDReader(start, end string) (DocIDReader, error)
|
||||
|
||||
FieldReader(field string, startTerm []byte, endTerm []byte) (FieldReader, error)
|
||||
FieldDict(field string) (FieldDict, error)
|
||||
FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error)
|
||||
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
|
||||
|
||||
Document(id string) (*document.Document, error)
|
||||
DocumentFieldTerms(id string) (FieldTerms, error)
|
||||
@ -79,8 +81,13 @@ type TermFieldReader interface {
|
||||
Close() error
|
||||
}
|
||||
|
||||
type FieldReader interface {
|
||||
Next() (*TermFieldDoc, error)
|
||||
type DictEntry struct {
|
||||
Term string
|
||||
Count uint64
|
||||
}
|
||||
|
||||
type FieldDict interface {
|
||||
Next() (*DictEntry, error)
|
||||
Close() error
|
||||
}
|
||||
|
||||
|
@ -17,23 +17,24 @@ import (
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
)
|
||||
|
||||
type UpsideDownCouchFieldReader struct {
|
||||
type UpsideDownCouchFieldDict struct {
|
||||
indexReader *IndexReader
|
||||
iterator store.KVIterator
|
||||
endKey []byte
|
||||
field uint16
|
||||
}
|
||||
|
||||
func newUpsideDownCouchFieldReader(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldReader, error) {
|
||||
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) {
|
||||
|
||||
startRow := NewTermFrequencyRow(startTerm, field, "", 0, 0)
|
||||
startKey := startRow.ScanPrefixForFieldTermPrefix()
|
||||
|
||||
endKey := NewTermFrequencyRow(endTerm, field, "", 0, 0).Key()
|
||||
startKey := NewDictionaryRow(startTerm, field, 0).Key()
|
||||
if endTerm == nil {
|
||||
endTerm = []byte{ByteSeparator}
|
||||
}
|
||||
endKey := NewDictionaryRow(endTerm, field, 0).Key()
|
||||
|
||||
it := indexReader.kvreader.Iterator(startKey)
|
||||
|
||||
return &UpsideDownCouchFieldReader{
|
||||
return &UpsideDownCouchFieldDict{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
field: field,
|
||||
@ -42,7 +43,7 @@ func newUpsideDownCouchFieldReader(indexReader *IndexReader, field uint16, start
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
if !valid {
|
||||
return nil, nil
|
||||
@ -53,24 +54,21 @@ func (r *UpsideDownCouchFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
currRow, err := NewTermFrequencyRowKV(key, val)
|
||||
currRow, err := NewDictionaryRowKV(key, val)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unexpected error parsing term freq row kv: %v", err)
|
||||
return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err)
|
||||
}
|
||||
rv := index.TermFieldDoc{
|
||||
rv := index.DictEntry{
|
||||
Term: string(currRow.term),
|
||||
Freq: currRow.freq,
|
||||
Count: currRow.count,
|
||||
}
|
||||
// advance the iterator to the next term
|
||||
// by using invalid doc id (higher sorting)
|
||||
nextTerm := incrementBytes(currRow.term)
|
||||
nextRow := NewTermFrequencyRow(nextTerm, r.field, "", 0, 0)
|
||||
r.iterator.Seek(nextRow.ScanPrefixForFieldTermPrefix())
|
||||
r.iterator.Next()
|
||||
return &rv, nil
|
||||
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchFieldReader) Close() error {
|
||||
func (r *UpsideDownCouchFieldDict) Close() error {
|
||||
return r.iterator.Close()
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ import (
|
||||
"github.com/blevesearch/bleve/index/store/boltdb"
|
||||
)
|
||||
|
||||
func TestIndexFieldReader(t *testing.T) {
|
||||
func TestIndexFieldDict(t *testing.T) {
|
||||
defer os.RemoveAll("test")
|
||||
|
||||
store, err := boltdb.Open("test", "bleve")
|
||||
@ -54,38 +54,39 @@ func TestIndexFieldReader(t *testing.T) {
|
||||
t.Error(err)
|
||||
}
|
||||
defer indexReader.Close()
|
||||
reader, err := indexReader.FieldReader("name", nil, nil)
|
||||
|
||||
dict, err := indexReader.FieldDict("name")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
defer dict.Close()
|
||||
|
||||
termCount := 0
|
||||
curr, err := reader.Next()
|
||||
curr, err := dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
if curr.Term != "test" {
|
||||
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
|
||||
}
|
||||
curr, err = reader.Next()
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
|
||||
reader, err = indexReader.FieldReader("desc", nil, nil)
|
||||
dict, err = indexReader.FieldDict("desc")
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
defer dict.Close()
|
||||
|
||||
termCount = 0
|
||||
terms := make([]string, 0)
|
||||
curr, err = reader.Next()
|
||||
curr, err = dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = reader.Next()
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
@ -95,25 +96,48 @@ func TestIndexFieldReader(t *testing.T) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
// test use case for prefix
|
||||
reader, err = indexReader.FieldReader("prefix", []byte("cat"), []byte("cat"))
|
||||
// test start and end range
|
||||
dict, err = indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
defer dict.Close()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = reader.Next()
|
||||
curr, err = dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = reader.Next()
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 1 {
|
||||
t.Errorf("expected 1 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"more"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
// test use case for prefix
|
||||
dict, err = indexReader.FieldDictPrefix("prefix", []byte("cat"))
|
||||
if err != nil {
|
||||
t.Errorf("error creating reader: %v", err)
|
||||
}
|
||||
defer dict.Close()
|
||||
|
||||
termCount = 0
|
||||
terms = make([]string, 0)
|
||||
curr, err = dict.Next()
|
||||
for err == nil && curr != nil {
|
||||
termCount++
|
||||
terms = append(terms, curr.Term)
|
||||
curr, err = dict.Next()
|
||||
}
|
||||
if termCount != 3 {
|
||||
t.Errorf("expected 3 term for this field, got %d", termCount)
|
||||
}
|
||||
expectedTerms = []string{"cats", "catting", "cat"}
|
||||
expectedTerms = []string{"cat", "cats", "catting"}
|
||||
if !reflect.DeepEqual(expectedTerms, terms) {
|
||||
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
|
||||
}
|
@ -31,12 +31,20 @@ func (i *IndexReader) TermFieldReader(term []byte, fieldName string) (index.Term
|
||||
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0))
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldReader(fieldName string, startTerm []byte, endTerm []byte) (index.FieldReader, error) {
|
||||
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
|
||||
return i.FieldDictRange(fieldName, nil, nil)
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
fieldIndex, fieldExists := i.index.fieldIndexCache.FieldExists(fieldName)
|
||||
if fieldExists {
|
||||
return newUpsideDownCouchFieldReader(i, uint16(fieldIndex), startTerm, endTerm)
|
||||
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
|
||||
}
|
||||
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0))
|
||||
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
|
||||
}
|
||||
|
||||
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.FieldDictRange(fieldName, termPrefix, incrementBytes(termPrefix))
|
||||
}
|
||||
|
||||
func (i *IndexReader) DocIDReader(start, end string) (index.DocIDReader, error) {
|
||||
|
@ -25,25 +25,31 @@ type UpsideDownCouchTermFieldReader struct {
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) {
|
||||
tfr := NewTermFrequencyRow(term, field, "", 0, 0)
|
||||
it := indexReader.kvreader.Iterator(tfr.Key())
|
||||
|
||||
var count uint64
|
||||
key, val, valid := it.Current()
|
||||
if valid {
|
||||
if bytes.Equal(key, tfr.Key()) {
|
||||
tfr, err := NewTermFrequencyRowKV(key, val)
|
||||
dictionaryRow := NewDictionaryRow(term, field, 0)
|
||||
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
count = tfr.freq
|
||||
if val == nil {
|
||||
return &UpsideDownCouchTermFieldReader{
|
||||
count: 0,
|
||||
term: term,
|
||||
field: field,
|
||||
}, nil
|
||||
}
|
||||
|
||||
err = dictionaryRow.parseDictionaryV(val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tfr := NewTermFrequencyRow(term, field, "", 0, 0)
|
||||
it := indexReader.kvreader.Iterator(tfr.Key())
|
||||
|
||||
return &UpsideDownCouchTermFieldReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
count: count,
|
||||
count: dictionaryRow.count,
|
||||
term: term,
|
||||
field: field,
|
||||
}, nil
|
||||
@ -54,7 +60,7 @@ func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
r.iterator.Next()
|
||||
if r.iterator != nil {
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
testfr := NewTermFrequencyRow(r.term, r.field, "", 0, 0)
|
||||
@ -66,6 +72,7 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.iterator.Next()
|
||||
return &index.TermFieldDoc{
|
||||
ID: string(tfr.doc),
|
||||
Freq: tfr.freq,
|
||||
@ -73,10 +80,12 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
|
||||
Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) {
|
||||
if r.iterator != nil {
|
||||
tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0)
|
||||
r.iterator.Seek(tfr.Key())
|
||||
key, val, valid := r.iterator.Current()
|
||||
@ -90,6 +99,7 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermField
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.iterator.Next()
|
||||
return &index.TermFieldDoc{
|
||||
ID: string(tfr.doc),
|
||||
Freq: tfr.freq,
|
||||
@ -97,12 +107,16 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermField
|
||||
Vectors: r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors),
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchTermFieldReader) Close() error {
|
||||
if r.iterator != nil {
|
||||
return r.iterator.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type UpsideDownCouchDocIDReader struct {
|
||||
indexReader *IndexReader
|
||||
|
@ -35,6 +35,8 @@ func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
|
||||
return NewVersionRowKV(key, value)
|
||||
case 'f':
|
||||
return NewFieldRowKV(key, value)
|
||||
case 'd':
|
||||
return NewDictionaryRowKV(key, value)
|
||||
case 't':
|
||||
return NewTermFrequencyRowKV(key, value)
|
||||
case 'b':
|
||||
@ -171,6 +173,91 @@ func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
// DICTIONARY
|
||||
|
||||
type DictionaryRow struct {
|
||||
field uint16
|
||||
term []byte
|
||||
count uint64
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) Key() []byte {
|
||||
buf := make([]byte, 3+len(dr.term))
|
||||
buf[0] = 'd'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
|
||||
copy(buf[3:], dr.term)
|
||||
return buf
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) Value() []byte {
|
||||
used := 0
|
||||
buf := make([]byte, 8)
|
||||
|
||||
used += binary.PutUvarint(buf[used:used+8], dr.count)
|
||||
|
||||
return buf[0:used]
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) String() string {
|
||||
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
|
||||
}
|
||||
|
||||
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
|
||||
return &DictionaryRow{
|
||||
term: term,
|
||||
field: field,
|
||||
count: count,
|
||||
}
|
||||
}
|
||||
|
||||
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
|
||||
rv, err := NewDictionaryRowK(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = rv.parseDictionaryV(value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return rv, nil
|
||||
|
||||
}
|
||||
|
||||
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
|
||||
rv := DictionaryRow{}
|
||||
buf := bytes.NewBuffer(key)
|
||||
_, err := buf.ReadByte() // type
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = binary.Read(buf, binary.LittleEndian, &rv.field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
rv.term, err = buf.ReadBytes(ByteSeparator)
|
||||
// there is no separator expected here, should get EOF
|
||||
if err != io.EOF {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
|
||||
buf := bytes.NewBuffer((value))
|
||||
|
||||
count, err := binary.ReadUvarint(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dr.count = count
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// TERM FIELD FREQUENCY
|
||||
|
||||
type TermVector struct {
|
||||
@ -227,13 +314,9 @@ func (tfr *TermFrequencyRow) Key() []byte {
|
||||
return buf
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) SummaryKey() []byte {
|
||||
buf := make([]byte, 3+len(tfr.term)+1)
|
||||
buf[0] = 't'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
|
||||
termLen := copy(buf[3:], tfr.term)
|
||||
buf[3+termLen] = ByteSeparator
|
||||
return buf
|
||||
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
|
||||
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
|
||||
return dr.Key()
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) Value() []byte {
|
||||
|
@ -9,45 +9,45 @@
|
||||
|
||||
package upside_down
|
||||
|
||||
type termSummaryIncr struct{}
|
||||
type dictionaryTermIncr struct{}
|
||||
|
||||
func newTermSummaryIncr() *termSummaryIncr {
|
||||
return &termSummaryIncr{}
|
||||
func newDictionaryTermIncr() *dictionaryTermIncr {
|
||||
return &dictionaryTermIncr{}
|
||||
}
|
||||
|
||||
func (t *termSummaryIncr) Merge(key, existing []byte) ([]byte, error) {
|
||||
func (t *dictionaryTermIncr) Merge(key, existing []byte) ([]byte, error) {
|
||||
if len(existing) > 0 {
|
||||
tfr, err := NewTermFrequencyRowKV(key, existing)
|
||||
dr, err := NewDictionaryRowKV(key, existing)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tfr.freq++
|
||||
return tfr.Value(), nil
|
||||
dr.count++
|
||||
return dr.Value(), nil
|
||||
} else {
|
||||
tfr, err := NewTermFrequencyRowK(key)
|
||||
dr, err := NewDictionaryRowK(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tfr.freq = 1
|
||||
return tfr.Value(), nil
|
||||
dr.count = 1
|
||||
return dr.Value(), nil
|
||||
}
|
||||
}
|
||||
|
||||
type termSummaryDecr struct{}
|
||||
type dictionaryTermDecr struct{}
|
||||
|
||||
func newTermSummaryDecr() *termSummaryDecr {
|
||||
return &termSummaryDecr{}
|
||||
func newDictionaryTermDecr() *dictionaryTermDecr {
|
||||
return &dictionaryTermDecr{}
|
||||
}
|
||||
|
||||
func (t *termSummaryDecr) Merge(key, existing []byte) ([]byte, error) {
|
||||
func (t *dictionaryTermDecr) Merge(key, existing []byte) ([]byte, error) {
|
||||
if len(existing) > 0 {
|
||||
tfr, err := NewTermFrequencyRowKV(key, existing)
|
||||
dr, err := NewDictionaryRowKV(key, existing)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
tfr.freq--
|
||||
if tfr.freq > 0 {
|
||||
return tfr.Value(), nil
|
||||
dr.count--
|
||||
if dr.count > 0 {
|
||||
return dr.Value(), nil
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
|
@ -42,6 +42,11 @@ func TestRows(t *testing.T) {
|
||||
[]byte{'f', 1, 2},
|
||||
[]byte{'s', 't', 'y', 'l', 'e', ByteSeparator},
|
||||
},
|
||||
{
|
||||
NewDictionaryRow([]byte{'b', 'e', 'e', 'r'}, 0, 27),
|
||||
[]byte{'d', 0, 0, 'b', 'e', 'e', 'r'},
|
||||
[]byte{27},
|
||||
},
|
||||
{
|
||||
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "", 3, 3.14),
|
||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator},
|
||||
|
@ -27,7 +27,7 @@ import (
|
||||
|
||||
var VersionKey = []byte{'v'}
|
||||
|
||||
const Version uint8 = 3
|
||||
const Version uint8 = 4
|
||||
|
||||
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
|
||||
|
||||
@ -111,8 +111,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow
|
||||
tfr, ok := row.(*TermFrequencyRow)
|
||||
if ok {
|
||||
// need to increment counter
|
||||
summaryKey := tfr.SummaryKey()
|
||||
wb.Merge(summaryKey, newTermSummaryIncr())
|
||||
dictionaryKey := tfr.DictionaryRowKey()
|
||||
wb.Merge(dictionaryKey, newDictionaryTermIncr())
|
||||
}
|
||||
wb.Set(row.Key(), row.Value())
|
||||
}
|
||||
@ -127,8 +127,8 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow
|
||||
tfr, ok := row.(*TermFrequencyRow)
|
||||
if ok {
|
||||
// need to decrement counter
|
||||
summaryKey := tfr.SummaryKey()
|
||||
wb.Merge(summaryKey, newTermSummaryDecr())
|
||||
dictionaryKey := tfr.DictionaryRowKey()
|
||||
wb.Merge(dictionaryKey, newDictionaryTermDecr())
|
||||
}
|
||||
wb.Delete(row.Key())
|
||||
}
|
||||
|
@ -165,6 +165,84 @@ func (i *indexAliasImpl) Fields() ([]string, error) {
|
||||
return i.indexes[0].Fields()
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := i.indexes[0].FieldDict(field)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexAliasImplFieldDict{
|
||||
index: i,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexAliasImplFieldDict{
|
||||
index: i,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.isAliasToSingleIndex()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexAliasImplFieldDict{
|
||||
index: i,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexAliasImpl) DumpAll() chan interface{} {
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
@ -482,3 +560,17 @@ func (i *indexAliasImpl) NewBatch() *Batch {
|
||||
|
||||
return i.indexes[0].NewBatch()
|
||||
}
|
||||
|
||||
type indexAliasImplFieldDict struct {
|
||||
index *indexAliasImpl
|
||||
fieldDict index.FieldDict
|
||||
}
|
||||
|
||||
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) {
|
||||
return f.fieldDict.Next()
|
||||
}
|
||||
|
||||
func (f *indexAliasImplFieldDict) Close() error {
|
||||
defer f.index.mutex.RUnlock()
|
||||
return f.fieldDict.Close()
|
||||
}
|
||||
|
@ -730,6 +730,18 @@ func (i *stubIndex) Fields() ([]string, error) {
|
||||
return nil, i.err
|
||||
}
|
||||
|
||||
func (i *stubIndex) FieldDict(field string) (index.FieldDict, error) {
|
||||
return nil, i.err
|
||||
}
|
||||
|
||||
func (i *stubIndex) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
return nil, i.err
|
||||
}
|
||||
|
||||
func (i *stubIndex) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
return nil, i.err
|
||||
}
|
||||
|
||||
func (i *stubIndex) DumpAll() chan interface{} {
|
||||
return nil
|
||||
}
|
||||
|
100
index_impl.go
100
index_impl.go
@ -498,6 +498,87 @@ func (i *indexImpl) Fields() ([]string, error) {
|
||||
return indexReader.Fields()
|
||||
}
|
||||
|
||||
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := indexReader.FieldDict(field)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexImplFieldDict{
|
||||
index: i,
|
||||
indexReader: indexReader,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexImplFieldDict{
|
||||
index: i,
|
||||
indexReader: indexReader,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
|
||||
i.mutex.RLock()
|
||||
|
||||
if !i.open {
|
||||
i.mutex.RUnlock()
|
||||
return nil, ErrorIndexClosed
|
||||
}
|
||||
|
||||
indexReader, err := i.i.Reader()
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
|
||||
if err != nil {
|
||||
i.mutex.RUnlock()
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &indexImplFieldDict{
|
||||
index: i,
|
||||
indexReader: indexReader,
|
||||
fieldDict: fieldDict,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// DumpAll writes all index rows to a channel.
|
||||
// INTERNAL: do not rely on this function, it is
|
||||
// only intended to be used by the debug utilities
|
||||
@ -586,3 +667,22 @@ func (i *indexImpl) NewBatch() *Batch {
|
||||
internal: index.NewBatch(),
|
||||
}
|
||||
}
|
||||
|
||||
type indexImplFieldDict struct {
|
||||
index *indexImpl
|
||||
indexReader index.IndexReader
|
||||
fieldDict index.FieldDict
|
||||
}
|
||||
|
||||
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) {
|
||||
return f.fieldDict.Next()
|
||||
}
|
||||
|
||||
func (f *indexImplFieldDict) Close() error {
|
||||
defer f.index.mutex.RUnlock()
|
||||
err := f.fieldDict.Close()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return f.indexReader.Close()
|
||||
}
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@ -363,3 +364,66 @@ func TestStoredFieldPreserved(t *testing.T) {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDict(t *testing.T) {
|
||||
defer os.RemoveAll("testidx")
|
||||
|
||||
index, err := New("testidx", NewIndexMapping())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
doca := map[string]interface{}{
|
||||
"name": "marty",
|
||||
"desc": "gophercon india",
|
||||
}
|
||||
err = index.Index("a", doca)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
docy := map[string]interface{}{
|
||||
"name": "jasper",
|
||||
"desc": "clojure",
|
||||
}
|
||||
err = index.Index("y", docy)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
docx := map[string]interface{}{
|
||||
"name": "rose",
|
||||
"desc": "googler",
|
||||
}
|
||||
err = index.Index("x", docx)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
dict, err := index.FieldDict("name")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
terms := []string{}
|
||||
de, err := dict.Next()
|
||||
for err == nil && de != nil {
|
||||
terms = append(terms, string(de.Term))
|
||||
de, err = dict.Next()
|
||||
}
|
||||
|
||||
expectedTerms := []string{"jasper", "marty", "rose"}
|
||||
if !reflect.DeepEqual(terms, expectedTerms) {
|
||||
t.Errorf("expected %v, got %v", expectedTerms, terms)
|
||||
}
|
||||
|
||||
err = dict.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
err = index.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
@ -33,17 +33,23 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
|
||||
}
|
||||
|
||||
// find the terms with this prefix
|
||||
fieldReader, err := indexReader.FieldReader(field, []byte(prefixTerm), []byte(prefixTerm))
|
||||
var fieldDict index.FieldDict
|
||||
var err error
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
|
||||
// enumerate terms and check levenshtein distance
|
||||
candidateTerms := make([]string, 0)
|
||||
tfd, err := fieldReader.Next()
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
|
||||
if !exceeded && ld <= fuzziness {
|
||||
candidateTerms = append(candidateTerms, tfd.Term)
|
||||
}
|
||||
tfd, err = fieldReader.Next()
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
|
||||
// enumerate all the terms in the range
|
||||
|
@ -24,18 +24,18 @@ type TermPrefixSearcher struct {
|
||||
|
||||
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string, field string, boost float64, explain bool) (*TermPrefixSearcher, error) {
|
||||
// find the terms with this prefix
|
||||
fieldReader, err := indexReader.FieldReader(field, []byte(prefix), []byte(prefix))
|
||||
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
|
||||
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]search.Searcher, 0, 25)
|
||||
tfd, err := fieldReader.Next()
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
qsearcher, err := NewTermSearcher(indexReader, string(tfd.Term), field, 1.0, explain)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
qsearchers = append(qsearchers, qsearcher)
|
||||
tfd, err = fieldReader.Next()
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
// build disjunction searcher of these ranges
|
||||
searcher, err := NewDisjunctionSearcher(indexReader, qsearchers, 0, explain)
|
||||
|
Loading…
Reference in New Issue
Block a user