DocValue Config, new API Changes
-VisitableDocValueFields API for persisted DV field list -making dv configs overridable at field level -enabling on the fly/runtime un inverting of doc values -few UT updates
This commit is contained in:
parent
1788a03803
commit
4c256f5669
|
@ -20,7 +20,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/analysis"
|
"github.com/blevesearch/bleve/analysis"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DefaultBooleanIndexingOptions = StoreField | IndexField
|
const DefaultBooleanIndexingOptions = StoreField | IndexField | DocValues
|
||||||
|
|
||||||
type BooleanField struct {
|
type BooleanField struct {
|
||||||
name string
|
name string
|
||||||
|
|
|
@ -23,7 +23,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/numeric"
|
"github.com/blevesearch/bleve/numeric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DefaultDateTimeIndexingOptions = StoreField | IndexField
|
const DefaultDateTimeIndexingOptions = StoreField | IndexField | DocValues
|
||||||
const DefaultDateTimePrecisionStep uint = 4
|
const DefaultDateTimePrecisionStep uint = 4
|
||||||
|
|
||||||
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
|
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
|
||||||
|
|
|
@ -21,7 +21,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/numeric"
|
"github.com/blevesearch/bleve/numeric"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DefaultNumericIndexingOptions = StoreField | IndexField
|
const DefaultNumericIndexingOptions = StoreField | IndexField | DocValues
|
||||||
|
|
||||||
const DefaultPrecisionStep uint = 4
|
const DefaultPrecisionStep uint = 4
|
||||||
|
|
||||||
|
|
|
@ -20,7 +20,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/analysis"
|
"github.com/blevesearch/bleve/analysis"
|
||||||
)
|
)
|
||||||
|
|
||||||
const DefaultTextIndexingOptions = IndexField
|
const DefaultTextIndexingOptions = IndexField | DocValues
|
||||||
|
|
||||||
type TextField struct {
|
type TextField struct {
|
||||||
name string
|
name string
|
||||||
|
|
|
@ -20,6 +20,7 @@ const (
|
||||||
IndexField IndexingOptions = 1 << iota
|
IndexField IndexingOptions = 1 << iota
|
||||||
StoreField
|
StoreField
|
||||||
IncludeTermVectors
|
IncludeTermVectors
|
||||||
|
DocValues
|
||||||
)
|
)
|
||||||
|
|
||||||
func (o IndexingOptions) IsIndexed() bool {
|
func (o IndexingOptions) IsIndexed() bool {
|
||||||
|
@ -34,6 +35,10 @@ func (o IndexingOptions) IncludeTermVectors() bool {
|
||||||
return o&IncludeTermVectors != 0
|
return o&IncludeTermVectors != 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (o IndexingOptions) IncludeDocValues() bool {
|
||||||
|
return o&DocValues != 0
|
||||||
|
}
|
||||||
|
|
||||||
func (o IndexingOptions) String() string {
|
func (o IndexingOptions) String() string {
|
||||||
rv := ""
|
rv := ""
|
||||||
if o.IsIndexed() {
|
if o.IsIndexed() {
|
||||||
|
@ -51,5 +56,11 @@ func (o IndexingOptions) String() string {
|
||||||
}
|
}
|
||||||
rv += "TV"
|
rv += "TV"
|
||||||
}
|
}
|
||||||
|
if o.IncludeDocValues() {
|
||||||
|
if rv != "" {
|
||||||
|
rv += ", "
|
||||||
|
}
|
||||||
|
rv += "DV"
|
||||||
|
}
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,36 +24,56 @@ func TestIndexingOptions(t *testing.T) {
|
||||||
isIndexed bool
|
isIndexed bool
|
||||||
isStored bool
|
isStored bool
|
||||||
includeTermVectors bool
|
includeTermVectors bool
|
||||||
|
docValues bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
options: IndexField | StoreField | IncludeTermVectors,
|
options: IndexField | StoreField | IncludeTermVectors,
|
||||||
isIndexed: true,
|
isIndexed: true,
|
||||||
isStored: true,
|
isStored: true,
|
||||||
includeTermVectors: true,
|
includeTermVectors: true,
|
||||||
|
docValues: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
options: IndexField | IncludeTermVectors,
|
options: IndexField | IncludeTermVectors,
|
||||||
isIndexed: true,
|
isIndexed: true,
|
||||||
isStored: false,
|
isStored: false,
|
||||||
includeTermVectors: true,
|
includeTermVectors: true,
|
||||||
|
docValues: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
options: StoreField | IncludeTermVectors,
|
options: StoreField | IncludeTermVectors,
|
||||||
isIndexed: false,
|
isIndexed: false,
|
||||||
isStored: true,
|
isStored: true,
|
||||||
includeTermVectors: true,
|
includeTermVectors: true,
|
||||||
|
docValues: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
options: IndexField,
|
options: IndexField,
|
||||||
isIndexed: true,
|
isIndexed: true,
|
||||||
isStored: false,
|
isStored: false,
|
||||||
includeTermVectors: false,
|
includeTermVectors: false,
|
||||||
|
docValues: false,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
options: StoreField,
|
options: StoreField,
|
||||||
isIndexed: false,
|
isIndexed: false,
|
||||||
isStored: true,
|
isStored: true,
|
||||||
includeTermVectors: false,
|
includeTermVectors: false,
|
||||||
|
docValues: false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
options: DocValues,
|
||||||
|
isIndexed: false,
|
||||||
|
isStored: false,
|
||||||
|
includeTermVectors: false,
|
||||||
|
docValues: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
options: IndexField | StoreField | IncludeTermVectors | DocValues,
|
||||||
|
isIndexed: true,
|
||||||
|
isStored: true,
|
||||||
|
includeTermVectors: true,
|
||||||
|
docValues: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,5 +90,9 @@ func TestIndexingOptions(t *testing.T) {
|
||||||
if actuallyIncludeTermVectors != test.includeTermVectors {
|
if actuallyIncludeTermVectors != test.includeTermVectors {
|
||||||
t.Errorf("expected includeTermVectors to be %v, got %v for %d", test.includeTermVectors, actuallyIncludeTermVectors, test.options)
|
t.Errorf("expected includeTermVectors to be %v, got %v for %d", test.includeTermVectors, actuallyIncludeTermVectors, test.options)
|
||||||
}
|
}
|
||||||
|
actuallyDocValues := test.options.IncludeDocValues()
|
||||||
|
if actuallyDocValues != test.docValues {
|
||||||
|
t.Errorf("expected docValue to be %v, got %v for %d", test.docValues, actuallyDocValues, test.options)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,14 +37,6 @@ const Name = "scorch"
|
||||||
|
|
||||||
const Version uint8 = 1
|
const Version uint8 = 1
|
||||||
|
|
||||||
// UnInvertIndex is implemented by various scorch index implementations
|
|
||||||
// to provide the un inverting of the postings or other indexed values.
|
|
||||||
type UnInvertIndex interface {
|
|
||||||
// apparently need better namings here..
|
|
||||||
VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
|
||||||
visitor index.DocumentFieldTermVisitor) error
|
|
||||||
}
|
|
||||||
|
|
||||||
type Scorch struct {
|
type Scorch struct {
|
||||||
readOnly bool
|
readOnly bool
|
||||||
version uint8
|
version uint8
|
||||||
|
|
|
@ -1638,3 +1638,72 @@ func TestIndexDocumentVisitFieldTermsWithMultipleDocs(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions(t *testing.T) {
|
||||||
|
defer func() {
|
||||||
|
err := DestroyTest()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
analysisQueue := index.NewAnalysisQueue(1)
|
||||||
|
idx, err := NewScorch(Name, testConfig, analysisQueue)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
err = idx.Open()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error opening index: %v", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
err := idx.Close()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// mix of field options, this exercises the run time/ on the fly un inverting of
|
||||||
|
// doc values for custom options enabled field like designation, dept.
|
||||||
|
options := document.IndexField | document.StoreField | document.IncludeTermVectors
|
||||||
|
doc := document.NewDocument("1")
|
||||||
|
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) // default doc value persisted
|
||||||
|
doc.AddField(document.NewTextField("title", []uint64{}, []byte("mister"))) // default doc value persisted
|
||||||
|
doc.AddField(document.NewTextFieldWithIndexingOptions("designation", []uint64{}, []byte("engineer"), options))
|
||||||
|
doc.AddField(document.NewTextFieldWithIndexingOptions("dept", []uint64{}, []byte("bleve"), options))
|
||||||
|
|
||||||
|
err = idx.Update(doc)
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Error updating index: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
indexReader, err := idx.Reader()
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldTerms := make(index.FieldTerms)
|
||||||
|
docNumber, err := indexReader.InternalID("1")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
err = indexReader.DocumentVisitFieldTerms(docNumber, []string{"name", "designation", "dept"}, func(field string, term []byte) {
|
||||||
|
fieldTerms[field] = append(fieldTerms[field], string(term))
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
expectedFieldTerms := index.FieldTerms{
|
||||||
|
"name": []string{"test"},
|
||||||
|
"designation": []string{"engineer"},
|
||||||
|
"dept": []string{"bleve"},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
|
||||||
|
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
|
||||||
|
}
|
||||||
|
err = indexReader.Close()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
|
@ -119,10 +119,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
if field.Options().IsStored() {
|
if field.Options().IsStored() {
|
||||||
storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
|
storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
|
||||||
}
|
}
|
||||||
// TODO with mapping changes for dv
|
|
||||||
//if field.Options().IncludeDocValues() {
|
if field.Options().IncludeDocValues() {
|
||||||
s.DocValueFields[fieldID] = true
|
s.DocValueFields[fieldID] = true
|
||||||
//}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// now that its been rolled up into docMap, walk that
|
// now that its been rolled up into docMap, walk that
|
||||||
|
|
|
@ -91,9 +91,14 @@ type Location interface {
|
||||||
}
|
}
|
||||||
|
|
||||||
// DocumentFieldTermVisitable is implemented by various scorch segment
|
// DocumentFieldTermVisitable is implemented by various scorch segment
|
||||||
// implementations to provide the un inverting of the postings
|
// implementations with persistence for the un inverting of the
|
||||||
// or other indexed values.
|
// postings or other indexed values.
|
||||||
type DocumentFieldTermVisitable interface {
|
type DocumentFieldTermVisitable interface {
|
||||||
VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||||
visitor index.DocumentFieldTermVisitor) error
|
visitor index.DocumentFieldTermVisitor) error
|
||||||
|
|
||||||
|
// VisitableDocValueFields implementation should return
|
||||||
|
// the list of fields which are document value persisted and
|
||||||
|
// therefore visitable by the above VisitDocumentFieldTerms method.
|
||||||
|
VisitableDocValueFields() ([]string, error)
|
||||||
}
|
}
|
||||||
|
|
|
@ -286,3 +286,90 @@ func buildMemSegmentMulti() *mem.Segment {
|
||||||
|
|
||||||
return segment
|
return segment
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildMemSegmentWithDefaultFieldMapping() (*mem.Segment, []string) {
|
||||||
|
|
||||||
|
doc := &document.Document{
|
||||||
|
ID: "a",
|
||||||
|
Fields: []document.Field{
|
||||||
|
document.NewTextField("_id", nil, []byte("a")),
|
||||||
|
document.NewTextField("name", nil, []byte("wow")),
|
||||||
|
document.NewTextField("desc", nil, []byte("some thing")),
|
||||||
|
document.NewTextField("tag", []uint64{0}, []byte("cold")),
|
||||||
|
},
|
||||||
|
CompositeFields: []*document.CompositeField{
|
||||||
|
document.NewCompositeField("_all", true, nil, []string{"_id"}),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
var fields []string
|
||||||
|
fields = append(fields, "_id")
|
||||||
|
fields = append(fields, "name")
|
||||||
|
fields = append(fields, "desc")
|
||||||
|
fields = append(fields, "tag")
|
||||||
|
|
||||||
|
// forge analyzed docs
|
||||||
|
results := []*index.AnalysisResult{
|
||||||
|
&index.AnalysisResult{
|
||||||
|
Document: doc,
|
||||||
|
Analyzed: []analysis.TokenFrequencies{
|
||||||
|
analysis.TokenFrequency(analysis.TokenStream{
|
||||||
|
&analysis.Token{
|
||||||
|
Start: 0,
|
||||||
|
End: 1,
|
||||||
|
Position: 1,
|
||||||
|
Term: []byte("a"),
|
||||||
|
},
|
||||||
|
}, nil, false),
|
||||||
|
analysis.TokenFrequency(analysis.TokenStream{
|
||||||
|
&analysis.Token{
|
||||||
|
Start: 0,
|
||||||
|
End: 3,
|
||||||
|
Position: 1,
|
||||||
|
Term: []byte("wow"),
|
||||||
|
},
|
||||||
|
}, nil, true),
|
||||||
|
analysis.TokenFrequency(analysis.TokenStream{
|
||||||
|
&analysis.Token{
|
||||||
|
Start: 0,
|
||||||
|
End: 4,
|
||||||
|
Position: 1,
|
||||||
|
Term: []byte("some"),
|
||||||
|
},
|
||||||
|
&analysis.Token{
|
||||||
|
Start: 5,
|
||||||
|
End: 10,
|
||||||
|
Position: 2,
|
||||||
|
Term: []byte("thing"),
|
||||||
|
},
|
||||||
|
}, nil, true),
|
||||||
|
analysis.TokenFrequency(analysis.TokenStream{
|
||||||
|
&analysis.Token{
|
||||||
|
Start: 0,
|
||||||
|
End: 4,
|
||||||
|
Position: 1,
|
||||||
|
Term: []byte("cold"),
|
||||||
|
},
|
||||||
|
}, []uint64{0}, true),
|
||||||
|
},
|
||||||
|
Length: []int{
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
2,
|
||||||
|
1,
|
||||||
|
1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// fix up composite fields
|
||||||
|
for _, ar := range results {
|
||||||
|
for i, f := range ar.Document.Fields {
|
||||||
|
for _, cf := range ar.Document.CompositeFields {
|
||||||
|
cf.Compose(f.Name(), ar.Length[i], ar.Analyzed[i])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mem.NewFromAnalyzedDocs(results), fields
|
||||||
|
}
|
||||||
|
|
|
@ -151,7 +151,8 @@ func (di *docValueIterator) getDocValueLocs(docID uint64) (uint64, uint64) {
|
||||||
return math.MaxUint64, math.MaxUint64
|
return math.MaxUint64, math.MaxUint64
|
||||||
}
|
}
|
||||||
|
|
||||||
// VisitDocumentFieldTerms is an implementation of the UnInvertIndex interface
|
// VisitDocumentFieldTerms is an implementation of the
|
||||||
|
// DocumentFieldTermVisitable interface
|
||||||
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||||
visitor index.DocumentFieldTermVisitor) error {
|
visitor index.DocumentFieldTermVisitor) error {
|
||||||
fieldID := uint16(0)
|
fieldID := uint16(0)
|
||||||
|
@ -178,3 +179,22 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// VisitableDocValueFields returns the list of fields with
|
||||||
|
// persisted doc value terms ready to be visitable using the
|
||||||
|
// VisitDocumentFieldTerms method.
|
||||||
|
func (s *Segment) VisitableDocValueFields() ([]string, error) {
|
||||||
|
if len(s.fieldsInv) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var rv []string
|
||||||
|
for fieldID, field := range s.fieldsInv {
|
||||||
|
if dvIter, ok := s.fieldDvIterMap[uint16(fieldID)]; ok &&
|
||||||
|
dvIter != nil {
|
||||||
|
rv = append(rv, field)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return rv, nil
|
||||||
|
}
|
||||||
|
|
|
@ -19,6 +19,9 @@ import (
|
||||||
"os"
|
"os"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/blevesearch/bleve/index"
|
||||||
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestOpen(t *testing.T) {
|
func TestOpen(t *testing.T) {
|
||||||
|
@ -515,3 +518,83 @@ func TestOpenMultiWithTwoChunks(t *testing.T) {
|
||||||
t.Errorf("expected count to be 1, got %d", count)
|
t.Errorf("expected count to be 1, got %d", count)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestSegmentVisitableDocValueFieldsList(t *testing.T) {
|
||||||
|
_ = os.RemoveAll("/tmp/scorch.zap")
|
||||||
|
|
||||||
|
memSegment := buildMemSegmentMulti()
|
||||||
|
err := PersistSegment(memSegment, "/tmp/scorch.zap", 1)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error persisting segment: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
seg, err := Open("/tmp/scorch.zap")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error opening segment: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cerr := seg.Close()
|
||||||
|
if cerr != nil {
|
||||||
|
t.Fatalf("error closing segment: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
|
||||||
|
fields, err := zaps.VisitableDocValueFields()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("segment VisitableDocValueFields err: %v", err)
|
||||||
|
}
|
||||||
|
// no persisted doc value fields
|
||||||
|
if len(fields) != 0 {
|
||||||
|
t.Errorf("expected no persisted fields for doc values, got: %#v", fields)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
_ = os.RemoveAll("/tmp/scorch.zap")
|
||||||
|
|
||||||
|
memSegment, expectedFields := buildMemSegmentWithDefaultFieldMapping()
|
||||||
|
err = PersistSegment(memSegment, "/tmp/scorch.zap", 1)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error persisting segment: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
seg, err = Open("/tmp/scorch.zap")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("error opening segment: %v", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
cerr := seg.Close()
|
||||||
|
if cerr != nil {
|
||||||
|
t.Fatalf("error closing segment: %v", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
if zaps, ok := seg.(segment.DocumentFieldTermVisitable); ok {
|
||||||
|
fields, err := zaps.VisitableDocValueFields()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("segment VisitableDocValueFields err: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !reflect.DeepEqual(fields, expectedFields) {
|
||||||
|
t.Errorf("expected field terms: %#v, got: %#v", expectedFields, fields)
|
||||||
|
}
|
||||||
|
|
||||||
|
fieldTerms := make(index.FieldTerms)
|
||||||
|
err = zaps.VisitDocumentFieldTerms(0, fields, func(field string, term []byte) {
|
||||||
|
fieldTerms[field] = append(fieldTerms[field], string(term))
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Error(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
expectedFieldTerms := index.FieldTerms{
|
||||||
|
"name": []string{"wow"},
|
||||||
|
"desc": []string{"some", "thing"},
|
||||||
|
"tag": []string{"cold"},
|
||||||
|
"_id": []string{"a"},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(fieldTerms, expectedFieldTerms) {
|
||||||
|
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, fieldTerms)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -412,15 +412,64 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
|
||||||
ss := i.segment[segmentIndex]
|
ss := i.segment[segmentIndex]
|
||||||
|
|
||||||
if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
|
if zaps, ok := ss.segment.(segment.DocumentFieldTermVisitable); ok {
|
||||||
return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
|
// get the list of doc value persisted fields
|
||||||
|
pFields, err := zaps.VisitableDocValueFields()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// assort the fields for which terms look up have to
|
||||||
|
// be performed runtime
|
||||||
|
dvPendingFields := extractDvPendingFields(fields, pFields)
|
||||||
|
if len(dvPendingFields) == 0 {
|
||||||
|
// all fields are doc value persisted
|
||||||
|
return zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
|
||||||
|
}
|
||||||
|
|
||||||
|
// concurrently trigger the runtime doc value preparations for
|
||||||
|
// pending fields as well as the visit of the persisted doc values
|
||||||
|
errCh := make(chan error, 1)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer close(errCh)
|
||||||
|
err := ss.cachedDocs.prepareFields(fields, ss)
|
||||||
|
if err != nil {
|
||||||
|
errCh <- err
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// visit the persisted dv while the cache preparation is in progress
|
||||||
|
err = zaps.VisitDocumentFieldTerms(localDocNum, fields, visitor)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// err out if fieldCache preparation failed
|
||||||
|
err = <-errCh
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
visitDocumentFieldCacheTerms(localDocNum, dvPendingFields, ss, visitor)
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// else fallback to the in memory fieldCache
|
return prepareCacheVisitDocumentFieldTerms(localDocNum, fields, ss, visitor)
|
||||||
err = ss.cachedDocs.prepareFields(fields, ss)
|
}
|
||||||
|
|
||||||
|
func prepareCacheVisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||||
|
ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) error {
|
||||||
|
err := ss.cachedDocs.prepareFields(fields, ss)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
visitDocumentFieldCacheTerms(localDocNum, fields, ss, visitor)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func visitDocumentFieldCacheTerms(localDocNum uint64, fields []string,
|
||||||
|
ss *SegmentSnapshot, visitor index.DocumentFieldTermVisitor) {
|
||||||
|
|
||||||
for _, field := range fields {
|
for _, field := range fields {
|
||||||
if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
|
if cachedFieldDocs, exists := ss.cachedDocs.cache[field]; exists {
|
||||||
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
|
||||||
|
@ -436,5 +485,19 @@ func (i *IndexSnapshot) DocumentVisitFieldTerms(id index.IndexInternalID,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
}
|
||||||
|
|
||||||
|
func extractDvPendingFields(requestedFields, persistedFields []string) []string {
|
||||||
|
removeMap := map[string]struct{}{}
|
||||||
|
for _, str := range persistedFields {
|
||||||
|
removeMap[str] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
rv := make([]string, 0, len(requestedFields))
|
||||||
|
for _, s := range requestedFields {
|
||||||
|
if _, ok := removeMap[s]; !ok {
|
||||||
|
rv = append(rv, s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return rv
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,7 +18,6 @@ import (
|
||||||
"sync"
|
"sync"
|
||||||
|
|
||||||
"github.com/RoaringBitmap/roaring"
|
"github.com/RoaringBitmap/roaring"
|
||||||
"github.com/blevesearch/bleve/index"
|
|
||||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -84,50 +83,6 @@ func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.DocumentFiel
|
||||||
return s.segment.VisitDocument(num, visitor)
|
return s.segment.VisitDocument(num, visitor)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SegmentSnapshot) DocumentVisitFieldTerms(num uint64, fields []string,
|
|
||||||
visitor index.DocumentFieldTermVisitor) error {
|
|
||||||
collection := make(map[string][][]byte)
|
|
||||||
// collect field indexed values
|
|
||||||
for _, field := range fields {
|
|
||||||
dict, err := s.Dictionary(field)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
dictItr := dict.Iterator()
|
|
||||||
var next *index.DictEntry
|
|
||||||
next, err = dictItr.Next()
|
|
||||||
for next != nil && err == nil {
|
|
||||||
postings, err2 := dict.PostingsList(next.Term, nil)
|
|
||||||
if err2 != nil {
|
|
||||||
return err2
|
|
||||||
}
|
|
||||||
postingsItr := postings.Iterator()
|
|
||||||
nextPosting, err2 := postingsItr.Next()
|
|
||||||
for err2 == nil && nextPosting != nil && nextPosting.Number() <= num {
|
|
||||||
if nextPosting.Number() == num {
|
|
||||||
// got what we're looking for
|
|
||||||
collection[field] = append(collection[field], []byte(next.Term))
|
|
||||||
}
|
|
||||||
nextPosting, err = postingsItr.Next()
|
|
||||||
}
|
|
||||||
if err2 != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
next, err = dictItr.Next()
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// invoke callback
|
|
||||||
for field, values := range collection {
|
|
||||||
for _, value := range values {
|
|
||||||
visitor(field, value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (s *SegmentSnapshot) Count() uint64 {
|
func (s *SegmentSnapshot) Count() uint64 {
|
||||||
|
|
||||||
rv := s.segment.Count()
|
rv := s.segment.Count()
|
||||||
|
|
|
@ -28,6 +28,7 @@ import (
|
||||||
var (
|
var (
|
||||||
IndexDynamic = true
|
IndexDynamic = true
|
||||||
StoreDynamic = true
|
StoreDynamic = true
|
||||||
|
DocValues = true // TODO revisit default?
|
||||||
)
|
)
|
||||||
|
|
||||||
// A FieldMapping describes how a specific item
|
// A FieldMapping describes how a specific item
|
||||||
|
@ -54,6 +55,10 @@ type FieldMapping struct {
|
||||||
IncludeTermVectors bool `json:"include_term_vectors,omitempty"`
|
IncludeTermVectors bool `json:"include_term_vectors,omitempty"`
|
||||||
IncludeInAll bool `json:"include_in_all,omitempty"`
|
IncludeInAll bool `json:"include_in_all,omitempty"`
|
||||||
DateFormat string `json:"date_format,omitempty"`
|
DateFormat string `json:"date_format,omitempty"`
|
||||||
|
|
||||||
|
// DocValues, if true makes the index uninverting possible for this field
|
||||||
|
// It is useful for faceting and sorting queries.
|
||||||
|
DocValues bool `json:"docvalues,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewTextFieldMapping returns a default field mapping for text
|
// NewTextFieldMapping returns a default field mapping for text
|
||||||
|
@ -64,6 +69,7 @@ func NewTextFieldMapping() *FieldMapping {
|
||||||
Index: true,
|
Index: true,
|
||||||
IncludeTermVectors: true,
|
IncludeTermVectors: true,
|
||||||
IncludeInAll: true,
|
IncludeInAll: true,
|
||||||
|
DocValues: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,6 +77,7 @@ func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||||
rv := NewTextFieldMapping()
|
rv := NewTextFieldMapping()
|
||||||
rv.Store = im.StoreDynamic
|
rv.Store = im.StoreDynamic
|
||||||
rv.Index = im.IndexDynamic
|
rv.Index = im.IndexDynamic
|
||||||
|
rv.DocValues = im.DocValues
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -81,6 +88,7 @@ func NewNumericFieldMapping() *FieldMapping {
|
||||||
Store: true,
|
Store: true,
|
||||||
Index: true,
|
Index: true,
|
||||||
IncludeInAll: true,
|
IncludeInAll: true,
|
||||||
|
DocValues: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -88,6 +96,7 @@ func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||||
rv := NewNumericFieldMapping()
|
rv := NewNumericFieldMapping()
|
||||||
rv.Store = im.StoreDynamic
|
rv.Store = im.StoreDynamic
|
||||||
rv.Index = im.IndexDynamic
|
rv.Index = im.IndexDynamic
|
||||||
|
rv.DocValues = im.DocValues
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -98,6 +107,7 @@ func NewDateTimeFieldMapping() *FieldMapping {
|
||||||
Store: true,
|
Store: true,
|
||||||
Index: true,
|
Index: true,
|
||||||
IncludeInAll: true,
|
IncludeInAll: true,
|
||||||
|
DocValues: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -105,6 +115,7 @@ func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||||
rv := NewDateTimeFieldMapping()
|
rv := NewDateTimeFieldMapping()
|
||||||
rv.Store = im.StoreDynamic
|
rv.Store = im.StoreDynamic
|
||||||
rv.Index = im.IndexDynamic
|
rv.Index = im.IndexDynamic
|
||||||
|
rv.DocValues = im.DocValues
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -115,6 +126,7 @@ func NewBooleanFieldMapping() *FieldMapping {
|
||||||
Store: true,
|
Store: true,
|
||||||
Index: true,
|
Index: true,
|
||||||
IncludeInAll: true,
|
IncludeInAll: true,
|
||||||
|
DocValues: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -122,6 +134,7 @@ func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
|
||||||
rv := NewBooleanFieldMapping()
|
rv := NewBooleanFieldMapping()
|
||||||
rv.Store = im.StoreDynamic
|
rv.Store = im.StoreDynamic
|
||||||
rv.Index = im.IndexDynamic
|
rv.Index = im.IndexDynamic
|
||||||
|
rv.DocValues = im.DocValues
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,6 +145,7 @@ func NewGeoPointFieldMapping() *FieldMapping {
|
||||||
Store: true,
|
Store: true,
|
||||||
Index: true,
|
Index: true,
|
||||||
IncludeInAll: true,
|
IncludeInAll: true,
|
||||||
|
DocValues: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -147,6 +161,9 @@ func (fm *FieldMapping) Options() document.IndexingOptions {
|
||||||
if fm.IncludeTermVectors {
|
if fm.IncludeTermVectors {
|
||||||
rv |= document.IncludeTermVectors
|
rv |= document.IncludeTermVectors
|
||||||
}
|
}
|
||||||
|
if fm.DocValues {
|
||||||
|
rv |= document.DocValues
|
||||||
|
}
|
||||||
return rv
|
return rv
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -308,6 +325,11 @@ func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
case "docvalues":
|
||||||
|
err := json.Unmarshal(v, &fm.DocValues)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
invalidKeys = append(invalidKeys, k)
|
invalidKeys = append(invalidKeys, k)
|
||||||
}
|
}
|
||||||
|
|
|
@ -50,6 +50,7 @@ type IndexMappingImpl struct {
|
||||||
DefaultField string `json:"default_field"`
|
DefaultField string `json:"default_field"`
|
||||||
StoreDynamic bool `json:"store_dynamic"`
|
StoreDynamic bool `json:"store_dynamic"`
|
||||||
IndexDynamic bool `json:"index_dynamic"`
|
IndexDynamic bool `json:"index_dynamic"`
|
||||||
|
DocValues bool `json:"docvalues,omitempty"`
|
||||||
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
|
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
|
||||||
cache *registry.Cache
|
cache *registry.Cache
|
||||||
}
|
}
|
||||||
|
@ -154,6 +155,7 @@ func NewIndexMapping() *IndexMappingImpl {
|
||||||
DefaultField: defaultField,
|
DefaultField: defaultField,
|
||||||
IndexDynamic: IndexDynamic,
|
IndexDynamic: IndexDynamic,
|
||||||
StoreDynamic: StoreDynamic,
|
StoreDynamic: StoreDynamic,
|
||||||
|
DocValues: DocValues,
|
||||||
CustomAnalysis: newCustomAnalysis(),
|
CustomAnalysis: newCustomAnalysis(),
|
||||||
cache: registry.NewCache(),
|
cache: registry.NewCache(),
|
||||||
}
|
}
|
||||||
|
@ -217,6 +219,7 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
|
||||||
im.TypeMapping = make(map[string]*DocumentMapping)
|
im.TypeMapping = make(map[string]*DocumentMapping)
|
||||||
im.StoreDynamic = StoreDynamic
|
im.StoreDynamic = StoreDynamic
|
||||||
im.IndexDynamic = IndexDynamic
|
im.IndexDynamic = IndexDynamic
|
||||||
|
im.DocValues = DocValues
|
||||||
|
|
||||||
var invalidKeys []string
|
var invalidKeys []string
|
||||||
for k, v := range tmp {
|
for k, v := range tmp {
|
||||||
|
@ -271,6 +274,11 @@ func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
case "docvalues":
|
||||||
|
err := json.Unmarshal(v, &im.DocValues)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
invalidKeys = append(invalidKeys, k)
|
invalidKeys = append(invalidKeys, k)
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,8 @@ var mappingSource = []byte(`{
|
||||||
"store": true,
|
"store": true,
|
||||||
"index": true,
|
"index": true,
|
||||||
"include_term_vectors": true,
|
"include_term_vectors": true,
|
||||||
"include_in_all": true
|
"include_in_all": true,
|
||||||
|
"docvalues": true
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue