added DocIdReader to Index interface
added more debug capabilities removed hard-coded limitation on number of fields in doc
This commit is contained in:
parent
bdfa85761a
commit
2c86a731b4
|
@ -20,12 +20,14 @@ type Index interface {
|
|||
Delete(id string) error
|
||||
|
||||
TermFieldReader(term []byte, field string) (TermFieldReader, error)
|
||||
DocIdReader(start, end string) (DocIdReader, error)
|
||||
|
||||
DocCount() uint64
|
||||
|
||||
Document(id string) (*document.Document, error)
|
||||
|
||||
Dump()
|
||||
DumpDoc(id string) ([]interface{}, error)
|
||||
}
|
||||
|
||||
type TermFieldVector struct {
|
||||
|
@ -48,3 +50,9 @@ type TermFieldReader interface {
|
|||
Count() uint64
|
||||
Close()
|
||||
}
|
||||
|
||||
type DocIdReader interface {
|
||||
Next() (string, error)
|
||||
Advance(ID string) (string, error)
|
||||
Close()
|
||||
}
|
||||
|
|
|
@ -104,3 +104,72 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docId string) (*index.TermField
|
|||
func (r *UpsideDownCouchTermFieldReader) Close() {
|
||||
r.iterator.Close()
|
||||
}
|
||||
|
||||
type UpsideDownCouchDocIdReader struct {
|
||||
index *UpsideDownCouch
|
||||
iterator store.KVIterator
|
||||
start string
|
||||
end string
|
||||
}
|
||||
|
||||
func newUpsideDownCouchDocIdReader(index *UpsideDownCouch, start, end string) (*UpsideDownCouchDocIdReader, error) {
|
||||
if start == "" {
|
||||
start = string([]byte{0x0})
|
||||
}
|
||||
if end == "" {
|
||||
end = string([]byte{0xff})
|
||||
}
|
||||
bisr := NewBackIndexRow(start, nil, nil)
|
||||
it := index.store.Iterator(bisr.Key())
|
||||
|
||||
return &UpsideDownCouchDocIdReader{
|
||||
index: index,
|
||||
iterator: it,
|
||||
start: start,
|
||||
end: end,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIdReader) Next() (string, error) {
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
bier := NewBackIndexRow(r.end, nil, nil)
|
||||
if bytes.Compare(key, bier.Key()) > 0 {
|
||||
// end of the line
|
||||
return "", nil
|
||||
}
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
r.iterator.Next()
|
||||
return string(br.doc), nil
|
||||
} else {
|
||||
return "", nil
|
||||
}
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIdReader) Advance(docId string) (string, error) {
|
||||
bir := NewBackIndexRow(docId, nil, nil)
|
||||
r.iterator.Seek(bir.Key())
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
bier := NewBackIndexRow(r.end, nil, nil)
|
||||
if bytes.Compare(key, bier.Key()) < 0 {
|
||||
// end of the line
|
||||
return "", nil
|
||||
}
|
||||
br, err := NewBackIndexRowKV(key, val)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
r.iterator.Next()
|
||||
return string(br.doc), nil
|
||||
} else {
|
||||
return "", nil
|
||||
}
|
||||
}
|
||||
|
||||
func (r *UpsideDownCouchDocIdReader) Close() {
|
||||
r.iterator.Close()
|
||||
}
|
||||
|
|
|
@ -160,3 +160,49 @@ func TestIndexReader(t *testing.T) {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
func TestIndexDocIdReader(t *testing.T) {
|
||||
defer os.RemoveAll("test")
|
||||
|
||||
store, err := gouchstore.Open("test")
|
||||
idx := NewUpsideDownCouch(store)
|
||||
err = idx.Open()
|
||||
if err != nil {
|
||||
t.Errorf("error opening index: %v", err)
|
||||
}
|
||||
defer idx.Close()
|
||||
|
||||
var expectedCount uint64 = 0
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []byte("test test test")))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
expectedCount += 1
|
||||
|
||||
// first get all doc ids
|
||||
reader, err := idx.DocIdReader("", "")
|
||||
if err != nil {
|
||||
t.Errorf("Error accessing doc id reader: %v", err)
|
||||
}
|
||||
|
||||
id, err := reader.Next()
|
||||
count := uint64(0)
|
||||
for id != "" {
|
||||
count++
|
||||
id, err = reader.Next()
|
||||
}
|
||||
if count != expectedCount {
|
||||
t.Errorf("expected %d, got %d", expectedCount, count)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@ import (
|
|||
"bytes"
|
||||
"fmt"
|
||||
"math"
|
||||
"sort"
|
||||
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
|
||||
|
@ -223,6 +224,9 @@ func (udc *UpsideDownCouch) Close() {
|
|||
udc.store.Close()
|
||||
}
|
||||
|
||||
type termMap map[string]bool
|
||||
type fieldTermMap map[int]termMap
|
||||
|
||||
func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
||||
// first we lookup the backindex row for the doc id if it exists
|
||||
// lookup the back index row
|
||||
|
@ -233,17 +237,16 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
|
||||
var isAdd = true
|
||||
// a map for each field, map key is term (string) bool true for existence
|
||||
// FIMXE hard-coded to max of 256 fields
|
||||
existingTermFieldMaps := make([]map[string]bool, 256)
|
||||
existingTermFieldMaps := make(fieldTermMap, 0)
|
||||
if backIndexRow != nil {
|
||||
isAdd = false
|
||||
for _, entry := range backIndexRow.entries {
|
||||
existingTermFieldMap := existingTermFieldMaps[entry.field]
|
||||
if existingTermFieldMap == nil {
|
||||
existingTermFieldMap = make(map[string]bool, 0)
|
||||
existingTermFieldMaps[entry.field] = existingTermFieldMap
|
||||
existingTermMap, fieldExists := existingTermFieldMaps[int(entry.field)]
|
||||
if !fieldExists {
|
||||
existingTermMap = make(termMap, 0)
|
||||
existingTermFieldMaps[int(entry.field)] = existingTermMap
|
||||
}
|
||||
existingTermFieldMap[string(entry.term)] = true
|
||||
existingTermMap[string(entry.term)] = true
|
||||
}
|
||||
}
|
||||
existingStoredFieldMap := make(map[uint16]bool)
|
||||
|
@ -273,7 +276,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
udc.lastFieldIndex = int(fieldIndex)
|
||||
}
|
||||
|
||||
existingTermFieldMap := existingTermFieldMaps[fieldIndex]
|
||||
existingTermMap, fieldExistedInDoc := existingTermFieldMaps[int(fieldIndex)]
|
||||
|
||||
if field.Options.IsIndexed() {
|
||||
|
||||
|
@ -296,14 +299,14 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
backIndexEntries = append(backIndexEntries, &backIndexEntry)
|
||||
|
||||
// remove the entry from the map of existing term fields if it exists
|
||||
if existingTermFieldMap != nil {
|
||||
if fieldExistedInDoc {
|
||||
termString := string(tf.Term)
|
||||
_, ok := existingTermFieldMap[termString]
|
||||
_, ok := existingTermMap[termString]
|
||||
if ok {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, termFreqRow)
|
||||
// this term existed last time, delete it from that map
|
||||
delete(existingTermFieldMap, termString)
|
||||
delete(existingTermMap, termString)
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
|
@ -317,6 +320,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
|
||||
if field.Options.IsStored() {
|
||||
storedRow := NewStoredRow(doc.ID, uint16(fieldIndex), field.Value)
|
||||
backIndexStoredFields = append(backIndexStoredFields, fieldIndex)
|
||||
_, ok := existingStoredFieldMap[uint16(fieldIndex)]
|
||||
if ok {
|
||||
// this is an update
|
||||
|
@ -429,6 +433,48 @@ func (udc *UpsideDownCouch) Dump() {
|
|||
}
|
||||
}
|
||||
|
||||
type keyset [][]byte
|
||||
|
||||
func (k keyset) Len() int { return len(k) }
|
||||
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
|
||||
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
|
||||
|
||||
// DumpDoc returns all rows in the index related to this doc id
|
||||
func (udc *UpsideDownCouch) DumpDoc(id string) ([]interface{}, error) {
|
||||
rv := make([]interface{}, 0)
|
||||
back, err := udc.backIndexRowForDoc(id)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
keys := make(keyset, 0)
|
||||
for _, stored := range back.storedFields {
|
||||
sr := NewStoredRow(id, stored, []byte{})
|
||||
key := sr.Key()
|
||||
keys = append(keys, key)
|
||||
}
|
||||
for _, entry := range back.entries {
|
||||
//log.Printf("term: `%s`, field: %d", entry.term, entry.field)
|
||||
tfr := NewTermFrequencyRow(entry.term, entry.field, id, 0, 0)
|
||||
key := tfr.Key()
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
||||
value, err := udc.store.Get(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
row, err := ParseFromKeyValue(key, value)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv = append(rv, row)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (index.TermFieldReader, error) {
|
||||
fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
|
||||
if fieldExists {
|
||||
|
@ -437,6 +483,10 @@ func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (inde
|
|||
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, 0)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, error) {
|
||||
return newUpsideDownCouchDocIdReader(udc, start, end)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
|
||||
rv := document.NewDocument(id)
|
||||
storedRow := NewStoredRow(id, 0, nil)
|
||||
|
|
Loading…
Reference in New Issue