0
0
Fork 0

add crazy slow but functional DocumentVisitFieldTerms

This commit is contained in:
Marty Schoch 2017-12-10 08:55:59 -05:00
parent dc0adc8827
commit 690cd39921
4 changed files with 115 additions and 1 deletions

View File

@ -58,7 +58,7 @@ func (r *Reader) Document(id string) (*document.Document, error) {
}
func (r *Reader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
visitor index.DocumentFieldTermVisitor) error {
panic("document visit field terms not implemented")
return r.root.DocumentVisitFieldTerms(id, fields, visitor)
}
func (r *Reader) Fields() ([]string, error) {

View File

@ -1195,6 +1195,66 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
}
}
func TestIndexDocumentVisitFieldTerms(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, testConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.IndexField|document.StoreField|document.IncludeTermVectors))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.IndexField|document.StoreField|document.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
fieldTerms := make(index.FieldTerms)
err = indexReader.DocumentVisitFieldTerms(index.IndexInternalID("1"), []string{"name", "title"}, func(field string, term []byte) {
fieldTerms[field] = append(fieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms := index.FieldTerms{
"name": []string{"test"},
"title": []string{"mister"},
}
if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
}
}
func TestConcurrentUpdate(t *testing.T) {
defer func() {
err := DestroyTest()

View File

@ -341,6 +341,15 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq,
return rv, nil
}
func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string,
visitor index.DocumentFieldTermVisitor) error {
docNum := docInternalToNumber(id)
segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum)
return i.segment[segmentIndex].DocumentVisitFieldTerms(localDocNum, fields, visitor)
}
func docNumberToBytes(in uint64) []byte {
buf := new(bytes.Buffer)

View File

@ -16,6 +16,7 @@ package scorch
import (
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
)
@ -56,6 +57,50 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
return s.segment.VisitDocument(num, visitor)
}
func (s *SegmentSnapshot) DocumentVisitFieldTerms(num uint64, fields []string,
visitor index.DocumentFieldTermVisitor) error {
collection := make(map[string][][]byte)
// collect field indexed values
for _, field := range fields {
dict, err := s.Dictionary(field)
if err != nil {
return err
}
dictItr := dict.Iterator()
var next *index.DictEntry
next, err = dictItr.Next()
for next != nil && err == nil {
postings, err2 := dict.PostingsList(next.Term, nil)
if err2 != nil {
return err2
}
postingsItr := postings.Iterator()
nextPosting, err2 := postingsItr.Next()
for err2 == nil && nextPosting != nil && nextPosting.Number() <= num {
if nextPosting.Number() == num {
// got what we're looking for
collection[field] = append(collection[field], []byte(next.Term))
}
nextPosting, err = postingsItr.Next()
}
if err2 != nil {
return err
}
next, err = dictItr.Next()
}
if err != nil {
return err
}
}
// invoke callback
for field, values := range collection {
for _, value := range values {
visitor(field, value)
}
}
return nil
}
func (s *SegmentSnapshot) Count() uint64 {
rv := s.segment.Count()