0
0
Fork 0

added DocIdReader to Index interface

added more debug capabilities
removed hard-coded limitation on number of fields in doc
This commit is contained in:
Marty Schoch 2014-07-11 14:24:28 -04:00
parent bdfa85761a
commit 2c86a731b4
4 changed files with 184 additions and 11 deletions

View File

@ -20,12 +20,14 @@ type Index interface {
Delete(id string) error
TermFieldReader(term []byte, field string) (TermFieldReader, error)
DocIdReader(start, end string) (DocIdReader, error)
DocCount() uint64
Document(id string) (*document.Document, error)
Dump()
DumpDoc(id string) ([]interface{}, error)
}
type TermFieldVector struct {
@ -48,3 +50,9 @@ type TermFieldReader interface {
Count() uint64
Close()
}
type DocIdReader interface {
Next() (string, error)
Advance(ID string) (string, error)
Close()
}

View File

@ -104,3 +104,72 @@ func (r *UpsideDownCouchTermFieldReader) Advance(docId string) (*index.TermField
func (r *UpsideDownCouchTermFieldReader) Close() {
r.iterator.Close()
}
type UpsideDownCouchDocIdReader struct {
index *UpsideDownCouch
iterator store.KVIterator
start string
end string
}
func newUpsideDownCouchDocIdReader(index *UpsideDownCouch, start, end string) (*UpsideDownCouchDocIdReader, error) {
if start == "" {
start = string([]byte{0x0})
}
if end == "" {
end = string([]byte{0xff})
}
bisr := NewBackIndexRow(start, nil, nil)
it := index.store.Iterator(bisr.Key())
return &UpsideDownCouchDocIdReader{
index: index,
iterator: it,
start: start,
end: end,
}, nil
}
func (r *UpsideDownCouchDocIdReader) Next() (string, error) {
key, val, valid := r.iterator.Current()
if valid {
bier := NewBackIndexRow(r.end, nil, nil)
if bytes.Compare(key, bier.Key()) > 0 {
// end of the line
return "", nil
}
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return "", err
}
r.iterator.Next()
return string(br.doc), nil
} else {
return "", nil
}
}
func (r *UpsideDownCouchDocIdReader) Advance(docId string) (string, error) {
bir := NewBackIndexRow(docId, nil, nil)
r.iterator.Seek(bir.Key())
key, val, valid := r.iterator.Current()
if valid {
bier := NewBackIndexRow(r.end, nil, nil)
if bytes.Compare(key, bier.Key()) < 0 {
// end of the line
return "", nil
}
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return "", err
}
r.iterator.Next()
return string(br.doc), nil
} else {
return "", nil
}
}
func (r *UpsideDownCouchDocIdReader) Close() {
r.iterator.Close()
}

View File

@ -160,3 +160,49 @@ func TestIndexReader(t *testing.T) {
}
}
func TestIndexDocIdReader(t *testing.T) {
defer os.RemoveAll("test")
store, err := gouchstore.Open("test")
idx := NewUpsideDownCouch(store)
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer idx.Close()
var expectedCount uint64 = 0
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount += 1
// first get all doc ids
reader, err := idx.DocIdReader("", "")
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
id, err := reader.Next()
count := uint64(0)
for id != "" {
count++
id, err = reader.Next()
}
if count != expectedCount {
t.Errorf("expected %d, got %d", expectedCount, count)
}
}

View File

@ -12,6 +12,7 @@ import (
"bytes"
"fmt"
"math"
"sort"
"github.com/couchbaselabs/bleve/analysis"
@ -223,6 +224,9 @@ func (udc *UpsideDownCouch) Close() {
udc.store.Close()
}
type termMap map[string]bool
type fieldTermMap map[int]termMap
func (udc *UpsideDownCouch) Update(doc *document.Document) error {
// first we lookup the backindex row for the doc id if it exists
// lookup the back index row
@ -233,17 +237,16 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
var isAdd = true
// a map for each field, map key is term (string) bool true for existence
// FIMXE hard-coded to max of 256 fields
existingTermFieldMaps := make([]map[string]bool, 256)
existingTermFieldMaps := make(fieldTermMap, 0)
if backIndexRow != nil {
isAdd = false
for _, entry := range backIndexRow.entries {
existingTermFieldMap := existingTermFieldMaps[entry.field]
if existingTermFieldMap == nil {
existingTermFieldMap = make(map[string]bool, 0)
existingTermFieldMaps[entry.field] = existingTermFieldMap
existingTermMap, fieldExists := existingTermFieldMaps[int(entry.field)]
if !fieldExists {
existingTermMap = make(termMap, 0)
existingTermFieldMaps[int(entry.field)] = existingTermMap
}
existingTermFieldMap[string(entry.term)] = true
existingTermMap[string(entry.term)] = true
}
}
existingStoredFieldMap := make(map[uint16]bool)
@ -273,7 +276,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
udc.lastFieldIndex = int(fieldIndex)
}
existingTermFieldMap := existingTermFieldMaps[fieldIndex]
existingTermMap, fieldExistedInDoc := existingTermFieldMaps[int(fieldIndex)]
if field.Options.IsIndexed() {
@ -296,14 +299,14 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
backIndexEntries = append(backIndexEntries, &backIndexEntry)
// remove the entry from the map of existing term fields if it exists
if existingTermFieldMap != nil {
if fieldExistedInDoc {
termString := string(tf.Term)
_, ok := existingTermFieldMap[termString]
_, ok := existingTermMap[termString]
if ok {
// this is an update
updateRows = append(updateRows, termFreqRow)
// this term existed last time, delete it from that map
delete(existingTermFieldMap, termString)
delete(existingTermMap, termString)
} else {
// this is an add
addRows = append(addRows, termFreqRow)
@ -317,6 +320,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
if field.Options.IsStored() {
storedRow := NewStoredRow(doc.ID, uint16(fieldIndex), field.Value)
backIndexStoredFields = append(backIndexStoredFields, fieldIndex)
_, ok := existingStoredFieldMap[uint16(fieldIndex)]
if ok {
// this is an update
@ -429,6 +433,48 @@ func (udc *UpsideDownCouch) Dump() {
}
}
type keyset [][]byte
func (k keyset) Len() int { return len(k) }
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
// DumpDoc returns all rows in the index related to this doc id
func (udc *UpsideDownCouch) DumpDoc(id string) ([]interface{}, error) {
rv := make([]interface{}, 0)
back, err := udc.backIndexRowForDoc(id)
if err != nil {
return nil, err
}
keys := make(keyset, 0)
for _, stored := range back.storedFields {
sr := NewStoredRow(id, stored, []byte{})
key := sr.Key()
keys = append(keys, key)
}
for _, entry := range back.entries {
//log.Printf("term: `%s`, field: %d", entry.term, entry.field)
tfr := NewTermFrequencyRow(entry.term, entry.field, id, 0, 0)
key := tfr.Key()
keys = append(keys, key)
}
sort.Sort(keys)
for _, key := range keys {
value, err := udc.store.Get(key)
if err != nil {
return nil, err
}
row, err := ParseFromKeyValue(key, value)
if err != nil {
return nil, err
}
rv = append(rv, row)
}
return rv, nil
}
func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (index.TermFieldReader, error) {
fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
if fieldExists {
@ -437,6 +483,10 @@ func (udc *UpsideDownCouch) TermFieldReader(term []byte, fieldName string) (inde
return newUpsideDownCouchTermFieldReader(udc, []byte{BYTE_SEPARATOR}, 0)
}
func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, error) {
return newUpsideDownCouchDocIdReader(udc, start, end)
}
func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
rv := document.NewDocument(id)
storedRow := NewStoredRow(id, 0, nil)