added support for composite fields
This commit is contained in:
parent
1f17195e7d
commit
70a8b03bed
|
@ -8,7 +8,10 @@
|
|||
// and limitations under the License.
|
||||
package analysis
|
||||
|
||||
import ()
|
||||
|
||||
type TokenLocation struct {
|
||||
Field string
|
||||
Start int
|
||||
End int
|
||||
Position int
|
||||
|
@ -19,7 +22,38 @@ type TokenFreq struct {
|
|||
Locations []*TokenLocation
|
||||
}
|
||||
|
||||
func TokenFrequency(tokens TokenStream) []*TokenFreq {
|
||||
type TokenFrequencies []*TokenFreq
|
||||
|
||||
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) TokenFrequencies {
|
||||
// put existing tokens into a map
|
||||
index := make(map[string]*TokenFreq)
|
||||
for _, tf := range tfs {
|
||||
index[string(tf.Term)] = tf
|
||||
}
|
||||
// walk the new token frequencies
|
||||
for _, tf := range other {
|
||||
// set the remoteField value in incoming token freqs
|
||||
for _, l := range tf.Locations {
|
||||
l.Field = remoteField
|
||||
}
|
||||
existingTf, exists := index[string(tf.Term)]
|
||||
if exists {
|
||||
existingTf.Locations = append(existingTf.Locations, tf.Locations...)
|
||||
} else {
|
||||
index[string(tf.Term)] = tf
|
||||
}
|
||||
}
|
||||
// flatten map back to array
|
||||
rv := make(TokenFrequencies, len(index))
|
||||
i := 0
|
||||
for _, tf := range index {
|
||||
rv[i] = tf
|
||||
i += 1
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func TokenFrequency(tokens TokenStream) TokenFrequencies {
|
||||
index := make(map[string]*TokenFreq)
|
||||
|
||||
for _, token := range tokens {
|
||||
|
@ -44,7 +78,7 @@ func TokenFrequency(tokens TokenStream) []*TokenFreq {
|
|||
}
|
||||
}
|
||||
|
||||
rv := make([]*TokenFreq, len(index))
|
||||
rv := make(TokenFrequencies, len(index))
|
||||
i := 0
|
||||
for _, tf := range index {
|
||||
rv[i] = tf
|
||||
|
|
|
@ -0,0 +1,159 @@
|
|||
package analysis
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTokenFrequency(t *testing.T) {
|
||||
tokens := TokenStream{
|
||||
&Token{
|
||||
Term: []byte("water"),
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&Token{
|
||||
Term: []byte("water"),
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
}
|
||||
expectedResult := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
result := TokenFrequency(tokens)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenFrequenciesMergeAll(t *testing.T) {
|
||||
tf1 := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tf2 := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
expectedResult := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
tf1.MergeAll("tf2", tf2)
|
||||
if !reflect.DeepEqual(tf1, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, tf1)
|
||||
}
|
||||
}
|
||||
|
||||
func TestTokenFrequenciesMergeAllLeftEmpty(t *testing.T) {
|
||||
tf1 := TokenFrequencies{}
|
||||
tf2 := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
expectedResult := TokenFrequencies{
|
||||
&TokenFreq{
|
||||
Term: []byte("water"),
|
||||
Locations: []*TokenLocation{
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 1,
|
||||
Start: 0,
|
||||
End: 5,
|
||||
},
|
||||
&TokenLocation{
|
||||
Field: "tf2",
|
||||
Position: 2,
|
||||
Start: 6,
|
||||
End: 11,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
result := tf1.MergeAll("tf2", tf2)
|
||||
if !reflect.DeepEqual(result, expectedResult) {
|
||||
t.Errorf("expected %#v, got %#v", expectedResult, result)
|
||||
//t.Logf("%#v", tf1[0])
|
||||
}
|
||||
}
|
|
@ -13,19 +13,26 @@ import (
|
|||
)
|
||||
|
||||
type Document struct {
|
||||
ID string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
ID string `json:"id"`
|
||||
Fields []Field `json:"fields"`
|
||||
CompositeFields []*CompositeField
|
||||
}
|
||||
|
||||
func NewDocument(id string) *Document {
|
||||
return &Document{
|
||||
ID: id,
|
||||
Fields: make([]Field, 0),
|
||||
ID: id,
|
||||
Fields: make([]Field, 0),
|
||||
CompositeFields: make([]*CompositeField, 0),
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Document) AddField(f Field) *Document {
|
||||
d.Fields = append(d.Fields, f)
|
||||
switch f := f.(type) {
|
||||
case *CompositeField:
|
||||
d.CompositeFields = append(d.CompositeFields, f)
|
||||
default:
|
||||
d.Fields = append(d.Fields, f)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
|
|
|
@ -15,6 +15,6 @@ import (
|
|||
type Field interface {
|
||||
Name() string
|
||||
Options() IndexingOptions
|
||||
Analyzer() *analysis.Analyzer
|
||||
Analyze() (int, analysis.TokenFrequencies)
|
||||
Value() []byte
|
||||
}
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
package document
|
||||
|
||||
import (
|
||||
"github.com/couchbaselabs/bleve/analysis"
|
||||
)
|
||||
|
||||
const DEFAULT_COMPOSITE_INDEXING_OPTIONS = INDEX_FIELD
|
||||
|
||||
type CompositeField struct {
|
||||
name string
|
||||
includedFields map[string]bool
|
||||
excludedFields map[string]bool
|
||||
defaultInclude bool
|
||||
options IndexingOptions
|
||||
totalLength int
|
||||
compositeFrequencies analysis.TokenFrequencies
|
||||
}
|
||||
|
||||
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField {
|
||||
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DEFAULT_COMPOSITE_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options IndexingOptions) *CompositeField {
|
||||
rv := &CompositeField{
|
||||
name: name,
|
||||
options: options,
|
||||
defaultInclude: defaultInclude,
|
||||
includedFields: make(map[string]bool, len(include)),
|
||||
excludedFields: make(map[string]bool, len(exclude)),
|
||||
}
|
||||
|
||||
for _, i := range include {
|
||||
rv.includedFields[i] = true
|
||||
}
|
||||
for _, e := range exclude {
|
||||
rv.excludedFields[e] = true
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (c *CompositeField) Name() string {
|
||||
return c.name
|
||||
}
|
||||
|
||||
func (c *CompositeField) Options() IndexingOptions {
|
||||
return c.options
|
||||
}
|
||||
|
||||
func (c *CompositeField) Analyze() (int, analysis.TokenFrequencies) {
|
||||
return c.totalLength, c.compositeFrequencies
|
||||
}
|
||||
|
||||
func (c *CompositeField) Value() []byte {
|
||||
return []byte{}
|
||||
}
|
||||
|
||||
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
|
||||
shouldInclude := c.defaultInclude
|
||||
_, fieldShouldBeIncluded := c.includedFields[field]
|
||||
if fieldShouldBeIncluded {
|
||||
shouldInclude = true
|
||||
}
|
||||
_, fieldShouldBeExcluded := c.excludedFields[field]
|
||||
if fieldShouldBeExcluded {
|
||||
shouldInclude = false
|
||||
}
|
||||
|
||||
if shouldInclude {
|
||||
c.totalLength += length
|
||||
c.compositeFrequencies = c.compositeFrequencies.MergeAll(field, freq)
|
||||
}
|
||||
}
|
|
@ -42,8 +42,11 @@ func (t *TextField) Options() IndexingOptions {
|
|||
return t.options
|
||||
}
|
||||
|
||||
func (t *TextField) Analyzer() *analysis.Analyzer {
|
||||
return t.analyzer
|
||||
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
|
||||
tokens := t.analyzer.Analyze(t.Value())
|
||||
fieldLength := len(tokens) // number of tokens in this doc field
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
return fieldLength, tokenFreqs
|
||||
}
|
||||
|
||||
func (t *TextField) Value() []byte {
|
||||
|
|
|
@ -225,7 +225,7 @@ func (udc *UpsideDownCouch) Close() {
|
|||
}
|
||||
|
||||
type termMap map[string]bool
|
||||
type fieldTermMap map[int]termMap
|
||||
type fieldTermMap map[uint16]termMap
|
||||
|
||||
func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
||||
// first we lookup the backindex row for the doc id if it exists
|
||||
|
@ -241,10 +241,10 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
if backIndexRow != nil {
|
||||
isAdd = false
|
||||
for _, entry := range backIndexRow.entries {
|
||||
existingTermMap, fieldExists := existingTermFieldMaps[int(entry.field)]
|
||||
existingTermMap, fieldExists := existingTermFieldMaps[entry.field]
|
||||
if !fieldExists {
|
||||
existingTermMap = make(termMap, 0)
|
||||
existingTermFieldMaps[int(entry.field)] = existingTermMap
|
||||
existingTermFieldMaps[entry.field] = existingTermMap
|
||||
}
|
||||
existingTermMap[string(entry.term)] = true
|
||||
}
|
||||
|
@ -265,75 +265,55 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
backIndexStoredFields := make([]uint16, 0)
|
||||
|
||||
for _, field := range doc.Fields {
|
||||
fieldIndex, fieldExists := udc.fieldIndexes[field.Name()]
|
||||
if !fieldExists {
|
||||
// assign next field id
|
||||
fieldIndex = uint16(udc.lastFieldIndex + 1)
|
||||
udc.fieldIndexes[field.Name()] = fieldIndex
|
||||
// ensure this batch adds a row for this field
|
||||
row := NewFieldRow(uint16(fieldIndex), field.Name())
|
||||
updateRows = append(updateRows, row)
|
||||
udc.lastFieldIndex = int(fieldIndex)
|
||||
fieldIndex, newFieldRow := udc.fieldNameToFieldIndex(field.Name())
|
||||
if newFieldRow != nil {
|
||||
updateRows = append(updateRows, newFieldRow)
|
||||
}
|
||||
|
||||
existingTermMap, fieldExistedInDoc := existingTermFieldMaps[int(fieldIndex)]
|
||||
existingTermMap := existingTermFieldMaps[fieldIndex]
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
|
||||
analyzer := field.Analyzer()
|
||||
tokens := analyzer.Analyze(field.Value())
|
||||
fieldLength := len(tokens) // number of tokens in this doc field
|
||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||
tokenFreqs := analysis.TokenFrequency(tokens)
|
||||
for _, tf := range tokenFreqs {
|
||||
var termFreqRow *TermFrequencyRow
|
||||
if field.Options().IncludeTermVectors() {
|
||||
tv := termVectorsFromTokenFreq(uint16(fieldIndex), tf)
|
||||
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, uint16(fieldIndex), doc.ID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
|
||||
} else {
|
||||
termFreqRow = NewTermFrequencyRow(tf.Term, uint16(fieldIndex), doc.ID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
|
||||
}
|
||||
fieldLength, tokenFreqs := field.Analyze()
|
||||
|
||||
// record the back index entry
|
||||
backIndexEntry := BackIndexEntry{tf.Term, uint16(fieldIndex)}
|
||||
backIndexEntries = append(backIndexEntries, &backIndexEntry)
|
||||
|
||||
// remove the entry from the map of existing term fields if it exists
|
||||
if fieldExistedInDoc {
|
||||
termString := string(tf.Term)
|
||||
_, ok := existingTermMap[termString]
|
||||
if ok {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, termFreqRow)
|
||||
// this term existed last time, delete it from that map
|
||||
delete(existingTermMap, termString)
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
}
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
}
|
||||
// see if any of the composite fields need this
|
||||
for _, compositeField := range doc.CompositeFields {
|
||||
compositeField.Compose(field.Name(), fieldLength, tokenFreqs)
|
||||
}
|
||||
|
||||
// encode this field
|
||||
indexAddRows, indexUpdateRows, indexBackIndexEntries := udc.indexField(doc.ID, field, fieldIndex, fieldLength, tokenFreqs, existingTermMap)
|
||||
addRows = append(addRows, indexAddRows...)
|
||||
updateRows = append(updateRows, indexUpdateRows...)
|
||||
backIndexEntries = append(backIndexEntries, indexBackIndexEntries...)
|
||||
}
|
||||
|
||||
if field.Options().IsStored() {
|
||||
storedRow := NewStoredRow(doc.ID, uint16(fieldIndex), field.Value())
|
||||
storeAddRows, storeUpdateRows := udc.storeField(doc.ID, field, fieldIndex, existingStoredFieldMap)
|
||||
addRows = append(addRows, storeAddRows...)
|
||||
updateRows = append(updateRows, storeUpdateRows...)
|
||||
backIndexStoredFields = append(backIndexStoredFields, fieldIndex)
|
||||
_, ok := existingStoredFieldMap[uint16(fieldIndex)]
|
||||
if ok {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, storedRow)
|
||||
// this field was stored last time, delete it from that map
|
||||
delete(existingStoredFieldMap, uint16(fieldIndex))
|
||||
} else {
|
||||
addRows = append(addRows, storedRow)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// now index the composite fields
|
||||
for _, compositeField := range doc.CompositeFields {
|
||||
fieldIndex, newFieldRow := udc.fieldNameToFieldIndex(compositeField.Name())
|
||||
if newFieldRow != nil {
|
||||
updateRows = append(updateRows, newFieldRow)
|
||||
}
|
||||
existingTermMap := existingTermFieldMaps[fieldIndex]
|
||||
if compositeField.Options().IsIndexed() {
|
||||
|
||||
fieldLength, tokenFreqs := compositeField.Analyze()
|
||||
// encode this field
|
||||
indexAddRows, indexUpdateRows, indexBackIndexEntries := udc.indexField(doc.ID, compositeField, fieldIndex, fieldLength, tokenFreqs, existingTermMap)
|
||||
addRows = append(addRows, indexAddRows...)
|
||||
updateRows = append(updateRows, indexUpdateRows...)
|
||||
backIndexEntries = append(backIndexEntries, indexBackIndexEntries...)
|
||||
}
|
||||
}
|
||||
|
||||
// build the back index row
|
||||
backIndexRow = NewBackIndexRow(doc.ID, backIndexEntries, backIndexStoredFields)
|
||||
updateRows = append(updateRows, backIndexRow)
|
||||
|
@ -361,6 +341,79 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
return err
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) storeField(docId string, field document.Field, fieldIndex uint16, existingStoredFieldMap map[uint16]bool) ([]UpsideDownCouchRow, []UpsideDownCouchRow) {
|
||||
updateRows := make([]UpsideDownCouchRow, 0)
|
||||
addRows := make([]UpsideDownCouchRow, 0)
|
||||
storedRow := NewStoredRow(docId, fieldIndex, field.Value())
|
||||
_, ok := existingStoredFieldMap[fieldIndex]
|
||||
if ok {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, storedRow)
|
||||
// this field was stored last time, delete it from that map
|
||||
delete(existingStoredFieldMap, fieldIndex)
|
||||
} else {
|
||||
addRows = append(addRows, storedRow)
|
||||
}
|
||||
return addRows, updateRows
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) indexField(docId string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, existingTermMap termMap) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []*BackIndexEntry) {
|
||||
|
||||
updateRows := make([]UpsideDownCouchRow, 0)
|
||||
addRows := make([]UpsideDownCouchRow, 0)
|
||||
backIndexEntries := make([]*BackIndexEntry, 0)
|
||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||
|
||||
for _, tf := range tokenFreqs {
|
||||
var termFreqRow *TermFrequencyRow
|
||||
if field.Options().IncludeTermVectors() {
|
||||
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf)
|
||||
updateRows = append(updateRows, newFieldRows...)
|
||||
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docId, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
|
||||
} else {
|
||||
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docId, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
|
||||
}
|
||||
|
||||
// record the back index entry
|
||||
backIndexEntry := BackIndexEntry{tf.Term, fieldIndex}
|
||||
backIndexEntries = append(backIndexEntries, &backIndexEntry)
|
||||
|
||||
// remove the entry from the map of existing term fields if it exists
|
||||
if existingTermMap != nil {
|
||||
termString := string(tf.Term)
|
||||
_, ok := existingTermMap[termString]
|
||||
if ok {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, termFreqRow)
|
||||
// this term existed last time, delete it from that map
|
||||
delete(existingTermMap, termString)
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
}
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
}
|
||||
}
|
||||
|
||||
return addRows, updateRows, backIndexEntries
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) fieldNameToFieldIndex(fieldName string) (uint16, *FieldRow) {
|
||||
var fieldRow *FieldRow
|
||||
fieldIndex, fieldExists := udc.fieldIndexes[fieldName]
|
||||
if !fieldExists {
|
||||
// assign next field id
|
||||
fieldIndex = uint16(udc.lastFieldIndex + 1)
|
||||
udc.fieldIndexes[fieldName] = fieldIndex
|
||||
// ensure this batch adds a row for this field
|
||||
fieldRow = NewFieldRow(uint16(fieldIndex), fieldName)
|
||||
udc.lastFieldIndex = int(fieldIndex)
|
||||
}
|
||||
return fieldIndex, fieldRow
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Delete(id string) error {
|
||||
// lookup the back index row
|
||||
backIndexRow, err := udc.backIndexRowForDoc(id)
|
||||
|
@ -453,7 +506,6 @@ func (udc *UpsideDownCouch) DumpDoc(id string) ([]interface{}, error) {
|
|||
keys = append(keys, key)
|
||||
}
|
||||
for _, entry := range back.entries {
|
||||
//log.Printf("term: `%s`, field: %d", entry.term, entry.field)
|
||||
tfr := NewTermFrequencyRow(entry.term, entry.field, id, 0, 0)
|
||||
key := tfr.Key()
|
||||
keys = append(keys, key)
|
||||
|
@ -515,12 +567,22 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
|
|||
return len(tf.Locations)
|
||||
}
|
||||
|
||||
func termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) []*TermVector {
|
||||
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) ([]*TermVector, []UpsideDownCouchRow) {
|
||||
rv := make([]*TermVector, len(tf.Locations))
|
||||
newFieldRows := make([]UpsideDownCouchRow, 0)
|
||||
|
||||
for i, l := range tf.Locations {
|
||||
var newFieldRow *FieldRow
|
||||
fieldIndex := field
|
||||
if l.Field != "" {
|
||||
// lookup correct field
|
||||
fieldIndex, newFieldRow = udc.fieldNameToFieldIndex(l.Field)
|
||||
if newFieldRow != nil {
|
||||
newFieldRows = append(newFieldRows, newFieldRow)
|
||||
}
|
||||
}
|
||||
tv := TermVector{
|
||||
field: field,
|
||||
field: fieldIndex,
|
||||
pos: uint64(l.Position),
|
||||
start: uint64(l.Start),
|
||||
end: uint64(l.End),
|
||||
|
@ -528,7 +590,7 @@ func termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) []*TermVecto
|
|||
rv[i] = &tv
|
||||
}
|
||||
|
||||
return rv
|
||||
return rv, newFieldRows
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
|
||||
|
|
|
@ -14,10 +14,11 @@ import (
|
|||
)
|
||||
|
||||
type SyntaxQuery struct {
|
||||
Query string `json:"query"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
mapping document.Mapping
|
||||
Query string `json:"query"`
|
||||
BoostVal float64 `json:"boost,omitempty"`
|
||||
Explain bool `json:"explain,omitempty"`
|
||||
DefaultField string `json:"default_field,omitemtpy"`
|
||||
mapping document.Mapping
|
||||
}
|
||||
|
||||
func (q *SyntaxQuery) Boost() float64 {
|
||||
|
@ -25,7 +26,7 @@ func (q *SyntaxQuery) Boost() float64 {
|
|||
}
|
||||
|
||||
func (q *SyntaxQuery) Searcher(index index.Index) (Searcher, error) {
|
||||
newQuery, err := ParseQuerySyntax(q.Query, q.mapping)
|
||||
newQuery, err := ParseQuerySyntax(q.Query, q.mapping, q.DefaultField)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
|
|
@ -21,11 +21,12 @@ var parsingMustNotList *TermDisjunctionQuery
|
|||
var parsingShouldList *TermDisjunctionQuery
|
||||
var parsingMapping document.Mapping
|
||||
|
||||
func ParseQuerySyntax(query string, mapping document.Mapping) (rq Query, err error) {
|
||||
func ParseQuerySyntax(query string, mapping document.Mapping, defaultField string) (rq Query, err error) {
|
||||
parserMutex.Lock()
|
||||
defer parserMutex.Unlock()
|
||||
|
||||
parsingMapping = mapping
|
||||
parsingDefaultField = defaultField
|
||||
|
||||
parsingMustList = &TermConjunctionQuery{
|
||||
Terms: make([]Query, 0),
|
||||
|
|
|
@ -133,19 +133,32 @@ func (s *TermQueryScorer) Score(termMatch *index.TermFieldDoc) *DocumentMatch {
|
|||
}
|
||||
|
||||
if termMatch.Vectors != nil && len(termMatch.Vectors) > 0 {
|
||||
locations := make(Locations, len(termMatch.Vectors))
|
||||
for i, v := range termMatch.Vectors {
|
||||
|
||||
rv.Locations = make(FieldTermLocationMap)
|
||||
for _, v := range termMatch.Vectors {
|
||||
tlm := rv.Locations[v.Field]
|
||||
if tlm == nil {
|
||||
tlm = make(TermLocationMap)
|
||||
}
|
||||
|
||||
loc := Location{
|
||||
Pos: float64(v.Pos),
|
||||
Start: float64(v.Start),
|
||||
End: float64(v.End),
|
||||
}
|
||||
locations[i] = &loc
|
||||
|
||||
locations := tlm[s.query.Term]
|
||||
if locations == nil {
|
||||
locations = make(Locations, 1)
|
||||
locations[0] = &loc
|
||||
} else {
|
||||
locations = append(locations, &loc)
|
||||
}
|
||||
tlm[s.query.Term] = locations
|
||||
|
||||
rv.Locations[v.Field] = tlm
|
||||
}
|
||||
tlm := make(TermLocationMap)
|
||||
tlm[s.query.Term] = locations
|
||||
rv.Locations = make(FieldTermLocationMap)
|
||||
rv.Locations[s.query.Field] = tlm
|
||||
|
||||
}
|
||||
|
||||
return &rv
|
||||
|
|
Loading…
Reference in New Issue