parent
7bb58e1be4
commit
8efbd556a3
|
@ -24,34 +24,55 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
|
|||
Rows: make([]index.IndexRow, 0, 100),
|
||||
}
|
||||
|
||||
// information we collate as we merge fields with same name
|
||||
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies)
|
||||
fieldLengths := make(map[uint16]int)
|
||||
fieldIncludeTermVectors := make(map[uint16]bool)
|
||||
fieldNames := make(map[uint16]string)
|
||||
|
||||
for _, field := range d.Fields {
|
||||
fieldIndex, newFieldRow := f.fieldIndexOrNewRow(field.Name())
|
||||
if newFieldRow != nil {
|
||||
rv.Rows = append(rv.Rows, newFieldRow)
|
||||
}
|
||||
fieldNames[fieldIndex] = field.Name()
|
||||
|
||||
// add the _id row
|
||||
rv.Rows = append(rv.Rows, NewTermFreqRow(0, nil, []byte(d.ID), d.Number, 0, 0, nil))
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
|
||||
fieldLength, tokenFreqs := field.Analyze()
|
||||
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
|
||||
existingFreqs := fieldTermFreqs[fieldIndex]
|
||||
if existingFreqs == nil {
|
||||
fieldTermFreqs[fieldIndex] = tokenFreqs
|
||||
} else {
|
||||
existingFreqs.MergeAll(field.Name(), tokenFreqs)
|
||||
fieldTermFreqs[fieldIndex] = existingFreqs
|
||||
}
|
||||
|
||||
// encode this field
|
||||
indexRows := f.indexField(d.ID, d.Number, field, fieldIndex, fieldLength, tokenFreqs)
|
||||
rv.Rows = append(rv.Rows, indexRows...)
|
||||
fieldLengths[fieldIndex] += fieldLength
|
||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
||||
}
|
||||
|
||||
if field.Options().IsStored() {
|
||||
storeRow := f.storeField(d.ID, d.Number, field, fieldIndex)
|
||||
rv.Rows = append(rv.Rows, storeRow)
|
||||
}
|
||||
}
|
||||
|
||||
// walk through the collated information and proccess
|
||||
// once for each indexed field (unique name)
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
fieldLength := fieldLengths[fieldIndex]
|
||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
||||
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(fieldNames[fieldIndex], fieldLength, tokenFreqs)
|
||||
}
|
||||
|
||||
// encode this field
|
||||
indexRows := f.indexField(d.ID, d.Number, includeTermVectors, fieldIndex, fieldLength, tokenFreqs)
|
||||
rv.Rows = append(rv.Rows, indexRows...)
|
||||
}
|
||||
|
||||
// now index the composite fields
|
||||
|
@ -63,7 +84,7 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
|
|||
if compositeField.Options().IsIndexed() {
|
||||
fieldLength, tokenFreqs := compositeField.Analyze()
|
||||
// encode this field
|
||||
indexRows := f.indexField(d.ID, d.Number, compositeField, fieldIndex, fieldLength, tokenFreqs)
|
||||
indexRows := f.indexField(d.ID, d.Number, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs)
|
||||
rv.Rows = append(rv.Rows, indexRows...)
|
||||
}
|
||||
}
|
||||
|
@ -71,14 +92,14 @@ func (f *Firestorm) Analyze(d *document.Document) *index.AnalysisResult {
|
|||
return rv
|
||||
}
|
||||
|
||||
func (f *Firestorm) indexField(docID string, docNum uint64, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
|
||||
func (f *Firestorm) indexField(docID string, docNum uint64, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) []index.IndexRow {
|
||||
|
||||
rows := make([]index.IndexRow, 0, 100)
|
||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||
|
||||
for _, tf := range tokenFreqs {
|
||||
var termFreqRow *TermFreqRow
|
||||
if field.Options().IncludeTermVectors() {
|
||||
if includeTermVectors {
|
||||
tv, newFieldRows := f.termVectorsFromTokenFreq(fieldIndex, tf)
|
||||
rows = append(rows, newFieldRows...)
|
||||
termFreqRow = NewTermFreqRow(fieldIndex, tf.Term, []byte(docID), docNum, uint64(tf.Frequency()), fieldNorm, tv)
|
||||
|
|
|
@ -80,8 +80,8 @@ func TestAnalysis(t *testing.T) {
|
|||
Rows: []index.IndexRow{
|
||||
NewFieldRow(1, "name"),
|
||||
NewTermFreqRow(0, nil, []byte("a"), 1, 0, 0.0, nil),
|
||||
NewTermFreqRow(1, []byte("test"), []byte("a"), 1, 1, 1.0, []*TermVector{NewTermVector(1, 1, 0, 4, nil)}),
|
||||
NewStoredRow([]byte("a"), 1, 1, nil, []byte("ttest")),
|
||||
NewTermFreqRow(1, []byte("test"), []byte("a"), 1, 1, 1.0, []*TermVector{NewTermVector(1, 1, 0, 4, nil)}),
|
||||
},
|
||||
},
|
||||
},
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
package upside_down
|
||||
|
||||
import (
|
||||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/document"
|
||||
"github.com/blevesearch/bleve/index"
|
||||
)
|
||||
|
@ -24,25 +25,34 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
|
|||
backIndexTermEntries := make([]*BackIndexTermEntry, 0)
|
||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
||||
|
||||
// information we collate as we merge fields with same name
|
||||
fieldTermFreqs := make(map[uint16]analysis.TokenFrequencies)
|
||||
fieldLengths := make(map[uint16]int)
|
||||
fieldIncludeTermVectors := make(map[uint16]bool)
|
||||
fieldNames := make(map[uint16]string)
|
||||
|
||||
// walk all the fields, record stored fields now
|
||||
// place information about indexed fields into map
|
||||
// this collates information across fields with
|
||||
// same names (arrays)
|
||||
for _, field := range d.Fields {
|
||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
|
||||
if newFieldRow != nil {
|
||||
rv.Rows = append(rv.Rows, newFieldRow)
|
||||
}
|
||||
fieldNames[fieldIndex] = field.Name()
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
|
||||
fieldLength, tokenFreqs := field.Analyze()
|
||||
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
|
||||
existingFreqs := fieldTermFreqs[fieldIndex]
|
||||
if existingFreqs == nil {
|
||||
fieldTermFreqs[fieldIndex] = tokenFreqs
|
||||
} else {
|
||||
existingFreqs.MergeAll(field.Name(), tokenFreqs)
|
||||
fieldTermFreqs[fieldIndex] = existingFreqs
|
||||
}
|
||||
|
||||
// encode this field
|
||||
indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, field, fieldIndex, fieldLength, tokenFreqs)
|
||||
rv.Rows = append(rv.Rows, indexRows...)
|
||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
||||
fieldLengths[fieldIndex] += fieldLength
|
||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
||||
}
|
||||
|
||||
if field.Options().IsStored() {
|
||||
|
@ -53,6 +63,23 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
|
|||
|
||||
}
|
||||
|
||||
// walk through the collated information and proccess
|
||||
// once for each indexed field (unique name)
|
||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||
fieldLength := fieldLengths[fieldIndex]
|
||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
||||
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
compositeField.Compose(fieldNames[fieldIndex], fieldLength, tokenFreqs)
|
||||
}
|
||||
|
||||
// encode this field
|
||||
indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, includeTermVectors, fieldIndex, fieldLength, tokenFreqs)
|
||||
rv.Rows = append(rv.Rows, indexRows...)
|
||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
||||
}
|
||||
|
||||
// now index the composite fields
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(compositeField.Name())
|
||||
|
@ -62,7 +89,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
|
|||
if compositeField.Options().IsIndexed() {
|
||||
fieldLength, tokenFreqs := compositeField.Analyze()
|
||||
// encode this field
|
||||
indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, compositeField, fieldIndex, fieldLength, tokenFreqs)
|
||||
indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs)
|
||||
rv.Rows = append(rv.Rows, indexRows...)
|
||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
||||
}
|
||||
|
|
|
@ -502,7 +502,7 @@ func encodeFieldType(f document.Field) byte {
|
|||
return fieldType
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) indexField(docID string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]index.IndexRow, []*BackIndexTermEntry) {
|
||||
func (udc *UpsideDownCouch) indexField(docID string, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]index.IndexRow, []*BackIndexTermEntry) {
|
||||
|
||||
rows := make([]index.IndexRow, 0, 100)
|
||||
backIndexTermEntries := make([]*BackIndexTermEntry, 0, len(tokenFreqs))
|
||||
|
@ -510,7 +510,7 @@ func (udc *UpsideDownCouch) indexField(docID string, field document.Field, field
|
|||
|
||||
for k, tf := range tokenFreqs {
|
||||
var termFreqRow *TermFrequencyRow
|
||||
if field.Options().IncludeTermVectors() {
|
||||
if includeTermVectors {
|
||||
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf)
|
||||
rows = append(rows, newFieldRows...)
|
||||
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
|
||||
|
|
|
@ -22,8 +22,9 @@ import (
|
|||
"time"
|
||||
|
||||
"encoding/json"
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
|
||||
"strconv"
|
||||
|
||||
"github.com/blevesearch/bleve/analysis/analyzers/keyword_analyzer"
|
||||
)
|
||||
|
||||
func TestCrud(t *testing.T) {
|
||||
|
@ -1229,3 +1230,84 @@ func TestDateTimeFieldMappingIssue287(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
index, err := New("testidx", NewIndexMapping())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// index a document with an array of strings
|
||||
err = index.Index("k", struct {
|
||||
Messages []string
|
||||
Another string
|
||||
MoreData []string
|
||||
}{
|
||||
Messages: []string{
|
||||
"bleve",
|
||||
"bleve",
|
||||
},
|
||||
Another: "text",
|
||||
MoreData: []string{
|
||||
"a",
|
||||
"b",
|
||||
"c",
|
||||
"bleve",
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// search for it in the messages field
|
||||
tq := NewTermQuery("bleve").SetField("Messages")
|
||||
tsr := NewSearchRequest(tq)
|
||||
results, err := index.Search(tsr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if results.Total != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", results.Total)
|
||||
}
|
||||
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
|
||||
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
|
||||
}
|
||||
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
|
||||
t.Errorf("expected array position to be 0")
|
||||
}
|
||||
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
|
||||
t.Errorf("expected array position to be 1")
|
||||
}
|
||||
|
||||
// search for it in all
|
||||
tq = NewTermQuery("bleve")
|
||||
tsr = NewSearchRequest(tq)
|
||||
results, err = index.Search(tsr)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if results.Total != 1 {
|
||||
t.Fatalf("expected 1 result, got %d", results.Total)
|
||||
}
|
||||
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
|
||||
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
|
||||
}
|
||||
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
|
||||
t.Errorf("expected array position to be 0")
|
||||
}
|
||||
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
|
||||
t.Errorf("expected array position to be 1")
|
||||
}
|
||||
|
||||
err = index.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,44 @@
|
|||
{
|
||||
"salary": 104561.8,
|
||||
"_type": "emp",
|
||||
"name": "Deirdre Reed",
|
||||
"mutated": 0,
|
||||
"is_manager": true,
|
||||
"dept": "Accounts",
|
||||
"join_date": "2003-05-28T21:29:00",
|
||||
"manages": {
|
||||
"team_size": 9,
|
||||
"reports": [
|
||||
"Gallia Julián",
|
||||
"Duvessa Nicolás",
|
||||
"Beryl Thomas",
|
||||
"Deirdre Julián",
|
||||
"Antonia Gerónimo",
|
||||
"Ciara Young",
|
||||
"Riona Richardson IX",
|
||||
"Severin Jr.",
|
||||
"Perdita Morgan"
|
||||
]
|
||||
},
|
||||
"languages_known": [
|
||||
"English",
|
||||
"Spanish",
|
||||
"German",
|
||||
"Italian",
|
||||
"French",
|
||||
"Arabic",
|
||||
"Africans",
|
||||
"Hindi",
|
||||
"Vietnamese",
|
||||
"Urdu",
|
||||
"Dutch",
|
||||
"Quechua",
|
||||
"Japanese",
|
||||
"Chinese",
|
||||
"Nepalese",
|
||||
"Thai",
|
||||
"Malay"
|
||||
],
|
||||
"emp_id": "10508560",
|
||||
"email": "deirdre@mcdiabetes.com"
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
{}
|
|
@ -0,0 +1,38 @@
|
|||
[
|
||||
{
|
||||
"search": {
|
||||
"from": 0,
|
||||
"size": 10,
|
||||
"query": {
|
||||
"field": "manages.reports",
|
||||
"term": "julián"
|
||||
}
|
||||
},
|
||||
"result": {
|
||||
"total_hits": 1,
|
||||
"hits": [
|
||||
{
|
||||
"id": "emp10508560",
|
||||
"locations": {
|
||||
"manages.reports": {
|
||||
"julián": [
|
||||
{
|
||||
"pos": 2,
|
||||
"start": 7,
|
||||
"end": 14,
|
||||
"array_positions":[0]
|
||||
},
|
||||
{
|
||||
"pos": 2,
|
||||
"start": 8,
|
||||
"end": 15,
|
||||
"array_positions":[3]
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
Loading…
Reference in New Issue