fix storing/retrieving numeric and date fields
also includes new ability to request stored fields be returned with results closes #55 and closes #56 and closes #58
This commit is contained in:
parent
d84187fd24
commit
41d4f67ee2
|
@ -16,7 +16,7 @@ import (
|
|||
"github.com/couchbaselabs/bleve/numeric_util"
|
||||
)
|
||||
|
||||
const DEFAULT_DATETIME_INDEXING_OPTIONS = INDEX_FIELD
|
||||
const DEFAULT_DATETIME_INDEXING_OPTIONS = STORE_FIELD | INDEX_FIELD
|
||||
|
||||
const DEFAULT_DATETIME_PRECISION_STEP uint = 4
|
||||
|
||||
|
@ -74,12 +74,28 @@ func (n *DateTimeField) Value() []byte {
|
|||
return n.value
|
||||
}
|
||||
|
||||
func (n *DateTimeField) DateTime() (time.Time, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return time.Time{}, err
|
||||
}
|
||||
return time.Unix(0, i64), nil
|
||||
}
|
||||
|
||||
func (n *DateTimeField) GoString() string {
|
||||
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func NewDateTimeFieldFromBytes(name string, value []byte) *DateTimeField {
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
value: value,
|
||||
options: DEFAULT_DATETIME_INDEXING_OPTIONS,
|
||||
}
|
||||
}
|
||||
|
||||
func NewDateTimeField(name string, dt time.Time) *DateTimeField {
|
||||
return NewDateTimeFieldWithIndexingOptions(name, dt, DEFAULT_NUMERIC_INDEXING_OPTIONS)
|
||||
return NewDateTimeFieldWithIndexingOptions(name, dt, DEFAULT_DATETIME_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewDateTimeFieldWithIndexingOptions(name string, dt time.Time, options IndexingOptions) *DateTimeField {
|
||||
|
|
|
@ -15,7 +15,7 @@ import (
|
|||
"github.com/couchbaselabs/bleve/numeric_util"
|
||||
)
|
||||
|
||||
const DEFAULT_NUMERIC_INDEXING_OPTIONS = INDEX_FIELD
|
||||
const DEFAULT_NUMERIC_INDEXING_OPTIONS = STORE_FIELD | INDEX_FIELD
|
||||
|
||||
const DEFAULT_PRECISION_STEP uint = 4
|
||||
|
||||
|
@ -73,10 +73,26 @@ func (n *NumericField) Value() []byte {
|
|||
return n.value
|
||||
}
|
||||
|
||||
func (n *NumericField) Number() (float64, error) {
|
||||
i64, err := n.value.Int64()
|
||||
if err != nil {
|
||||
return 0.0, err
|
||||
}
|
||||
return numeric_util.Int64ToFloat64(i64), nil
|
||||
}
|
||||
|
||||
func (n *NumericField) GoString() string {
|
||||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func NewNumericFieldFromBytes(name string, value []byte) *NumericField {
|
||||
return &NumericField{
|
||||
name: name,
|
||||
value: value,
|
||||
options: DEFAULT_NUMERIC_INDEXING_OPTIONS,
|
||||
}
|
||||
}
|
||||
|
||||
func NewNumericField(name string, number float64) *NumericField {
|
||||
return NewNumericFieldWithIndexingOptions(name, number, DEFAULT_NUMERIC_INDEXING_OPTIONS)
|
||||
}
|
||||
|
|
|
@ -388,6 +388,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
|
|||
type StoredRow struct {
|
||||
doc []byte
|
||||
field uint16
|
||||
typ byte
|
||||
value []byte
|
||||
}
|
||||
|
||||
|
@ -403,11 +404,14 @@ func (s *StoredRow) Key() []byte {
|
|||
}
|
||||
|
||||
func (s *StoredRow) Value() []byte {
|
||||
return s.value
|
||||
rv := make([]byte, len(s.value)+1)
|
||||
rv[0] = s.typ
|
||||
copy(rv[1:], s.value)
|
||||
return rv
|
||||
}
|
||||
|
||||
func (s *StoredRow) String() string {
|
||||
return fmt.Sprintf("Document: %s Field %d, Value: %s", s.doc, s.field, s.value)
|
||||
return fmt.Sprintf("Document: %s Field %d, Type: %s Value: %s", s.doc, s.field, string(s.typ), s.value)
|
||||
}
|
||||
|
||||
func (s *StoredRow) ScanPrefixForDoc() []byte {
|
||||
|
@ -418,10 +422,11 @@ func (s *StoredRow) ScanPrefixForDoc() []byte {
|
|||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func NewStoredRow(doc string, field uint16, value []byte) *StoredRow {
|
||||
func NewStoredRow(doc string, field uint16, typ byte, value []byte) *StoredRow {
|
||||
return &StoredRow{
|
||||
doc: []byte(doc),
|
||||
field: field,
|
||||
typ: typ,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
@ -446,7 +451,9 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
rv.value = value
|
||||
rv.typ = value[0]
|
||||
|
||||
rv.value = value[1:]
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
|
|
@ -70,9 +70,9 @@ func TestRows(t *testing.T) {
|
|||
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'e', 'a', 't', BYTE_SEPARATOR, 1, 0, BYTE_SEPARATOR, 3, 0, BYTE_SEPARATOR, 4, 0, BYTE_SEPARATOR, 5, 0},
|
||||
},
|
||||
{
|
||||
NewStoredRow("budweiser", 0, []byte("an american beer")),
|
||||
NewStoredRow("budweiser", 0, byte('t'), []byte("an american beer")),
|
||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', BYTE_SEPARATOR, 0, 0},
|
||||
[]byte{'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
}
|
||||
|
||||
|
@ -194,12 +194,12 @@ func TestInvalidRows(t *testing.T) {
|
|||
// type s, invalid key (missing id)
|
||||
{
|
||||
[]byte{'s'},
|
||||
[]byte{'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
// type b, invalid val (missing field)
|
||||
{
|
||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', BYTE_SEPARATOR},
|
||||
[]byte{'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
}
|
||||
|
||||
|
|
|
@ -330,7 +330,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
}
|
||||
// any of the existing stored fields that weren't updated need to be deleted
|
||||
for storedFieldIndex, _ := range existingStoredFieldMap {
|
||||
storedRow := NewStoredRow(doc.ID, storedFieldIndex, nil)
|
||||
storedRow := NewStoredRow(doc.ID, storedFieldIndex, 'x', nil)
|
||||
deleteRows = append(deleteRows, storedRow)
|
||||
}
|
||||
|
||||
|
@ -344,7 +344,8 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
func (udc *UpsideDownCouch) storeField(docId string, field document.Field, fieldIndex uint16, existingStoredFieldMap map[uint16]bool) ([]UpsideDownCouchRow, []UpsideDownCouchRow) {
|
||||
updateRows := make([]UpsideDownCouchRow, 0)
|
||||
addRows := make([]UpsideDownCouchRow, 0)
|
||||
storedRow := NewStoredRow(docId, fieldIndex, field.Value())
|
||||
fieldType := encodeFieldType(field)
|
||||
storedRow := NewStoredRow(docId, fieldIndex, fieldType, field.Value())
|
||||
_, ok := existingStoredFieldMap[fieldIndex]
|
||||
if ok {
|
||||
// this is an update
|
||||
|
@ -357,6 +358,21 @@ func (udc *UpsideDownCouch) storeField(docId string, field document.Field, field
|
|||
return addRows, updateRows
|
||||
}
|
||||
|
||||
func encodeFieldType(f document.Field) byte {
|
||||
fieldType := byte('x')
|
||||
switch f.(type) {
|
||||
case *document.TextField:
|
||||
fieldType = 't'
|
||||
case *document.NumericField:
|
||||
fieldType = 'n'
|
||||
case *document.DateTimeField:
|
||||
fieldType = 'd'
|
||||
case *document.CompositeField:
|
||||
fieldType = 'c'
|
||||
}
|
||||
return fieldType
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) indexField(docId string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, existingTermMap termMap) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []*BackIndexEntry) {
|
||||
|
||||
updateRows := make([]UpsideDownCouchRow, 0)
|
||||
|
@ -431,7 +447,7 @@ func (udc *UpsideDownCouch) Delete(id string) error {
|
|||
rows = append(rows, tfr)
|
||||
}
|
||||
for _, sf := range backIndexRow.storedFields {
|
||||
sf := NewStoredRow(id, sf, nil)
|
||||
sf := NewStoredRow(id, sf, 'x', nil)
|
||||
rows = append(rows, sf)
|
||||
}
|
||||
|
||||
|
@ -551,7 +567,7 @@ func (udc *UpsideDownCouch) DumpDoc(id string) ([]interface{}, error) {
|
|||
}
|
||||
keys := make(keyset, 0)
|
||||
for _, stored := range back.storedFields {
|
||||
sr := NewStoredRow(id, stored, []byte{})
|
||||
sr := NewStoredRow(id, stored, 'x', []byte{})
|
||||
key := sr.Key()
|
||||
keys = append(keys, key)
|
||||
}
|
||||
|
@ -591,7 +607,7 @@ func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, e
|
|||
|
||||
func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
|
||||
rv := document.NewDocument(id)
|
||||
storedRow := NewStoredRow(id, 0, nil)
|
||||
storedRow := NewStoredRow(id, 0, 'x', nil)
|
||||
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
|
||||
it := udc.store.Iterator(storedRowScanPrefix)
|
||||
key, val, valid := it.Current()
|
||||
|
@ -604,7 +620,11 @@ func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
|
|||
return nil, err
|
||||
}
|
||||
if row != nil {
|
||||
rv.AddField(document.NewTextField(udc.fieldIndexToName(row.field), row.Value()))
|
||||
fieldName := udc.fieldIndexToName(row.field)
|
||||
field := decodeFieldType(row.typ, fieldName, row.value)
|
||||
if field != nil {
|
||||
rv.AddField(field)
|
||||
}
|
||||
}
|
||||
|
||||
it.Next()
|
||||
|
@ -613,6 +633,18 @@ func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
|
|||
return rv, nil
|
||||
}
|
||||
|
||||
func decodeFieldType(typ byte, name string, value []byte) document.Field {
|
||||
switch typ {
|
||||
case 't':
|
||||
return document.NewTextField(name, value)
|
||||
case 'n':
|
||||
return document.NewNumericFieldFromBytes(name, value)
|
||||
case 'd':
|
||||
return document.NewDateTimeFieldFromBytes(name, value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
|
||||
return len(tf.Locations)
|
||||
}
|
||||
|
|
|
@ -318,7 +318,11 @@ func TestIndexInsertWithStore(t *testing.T) {
|
|||
if len(storedDoc.Fields) != 1 {
|
||||
t.Errorf("expected 1 stored field, got %d", len(storedDoc.Fields))
|
||||
}
|
||||
if string(storedDoc.Fields[0].Value()) != "test" {
|
||||
t.Errorf("expected field content 'test', got '%s'", string(storedDoc.Fields[0].Value()))
|
||||
textField, ok := storedDoc.Fields[0].(*document.TextField)
|
||||
if !ok {
|
||||
t.Errorf("expected text field")
|
||||
}
|
||||
if string(textField.Value()) != "test" {
|
||||
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
|
||||
}
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ package bleve
|
|||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
|
@ -151,6 +152,40 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
|
|||
}
|
||||
}
|
||||
|
||||
if len(req.Fields) > 0 {
|
||||
for _, hit := range hits {
|
||||
// FIXME avoid loading doc second time
|
||||
// if we already loaded it for highlighting
|
||||
doc, err := i.Document(hit.ID)
|
||||
if err == nil {
|
||||
for _, f := range req.Fields {
|
||||
for _, docF := range doc.Fields {
|
||||
if docF.Name() == f {
|
||||
var value interface{}
|
||||
switch docF := docF.(type) {
|
||||
case *document.TextField:
|
||||
value = string(docF.Value())
|
||||
case *document.NumericField:
|
||||
num, err := docF.Number()
|
||||
if err == nil {
|
||||
value = num
|
||||
}
|
||||
case *document.DateTimeField:
|
||||
datetime, err := docF.DateTime()
|
||||
if err == nil {
|
||||
value = datetime.Format(time.RFC3339)
|
||||
}
|
||||
}
|
||||
if value != nil {
|
||||
hit.AddFieldValue(f, value)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &SearchResult{
|
||||
Request: req,
|
||||
Hits: hits,
|
||||
|
|
|
@ -36,6 +36,7 @@ type SearchRequest struct {
|
|||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Explain bool `json:"explain"`
|
||||
}
|
||||
|
||||
|
@ -45,6 +46,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
|||
Size int `json:"size"`
|
||||
From int `json:"from"`
|
||||
Highlight *HighlightRequest `json:"highlight"`
|
||||
Fields []string `json:"fields"`
|
||||
Explain bool `json:"explain"`
|
||||
}
|
||||
|
||||
|
@ -57,6 +59,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
|
|||
r.From = temp.From
|
||||
r.Explain = temp.Explain
|
||||
r.Highlight = temp.Highlight
|
||||
r.Fields = temp.Fields
|
||||
r.Query, err = ParseQuery(temp.Q)
|
||||
if err != nil {
|
||||
return err
|
||||
|
|
|
@ -72,11 +72,14 @@ func (s *SimpleHighlighter) BestFragmentsInField(dm *DocumentMatch, doc *documen
|
|||
heap.Init(&fq)
|
||||
for _, f := range doc.Fields {
|
||||
if f.Name() == field {
|
||||
fieldData := f.Value()
|
||||
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
|
||||
for _, fragment := range fragments {
|
||||
scorer.Score(fragment)
|
||||
heap.Push(&fq, fragment)
|
||||
_, ok := f.(*document.TextField)
|
||||
if ok {
|
||||
fieldData := f.Value()
|
||||
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
|
||||
for _, fragment := range fragments {
|
||||
scorer.Score(fragment)
|
||||
heap.Push(&fq, fragment)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,11 +35,19 @@ type FieldTermLocationMap map[string]TermLocationMap
|
|||
type FieldFragmentMap map[string][]string
|
||||
|
||||
type DocumentMatch struct {
|
||||
ID string `json:"id"`
|
||||
Score float64 `json:"score"`
|
||||
Expl *Explanation `json:"explanation,omitempty"`
|
||||
Locations FieldTermLocationMap `json:"locations,omitempty"`
|
||||
Fragments FieldFragmentMap `json:"fragments,omitempty"`
|
||||
ID string `json:"id"`
|
||||
Score float64 `json:"score"`
|
||||
Expl *Explanation `json:"explanation,omitempty"`
|
||||
Locations FieldTermLocationMap `json:"locations,omitempty"`
|
||||
Fragments FieldFragmentMap `json:"fragments,omitempty"`
|
||||
Fields map[string]interface{} `json:"fields,omitempty"`
|
||||
}
|
||||
|
||||
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
|
||||
if dm.Fields == nil {
|
||||
dm.Fields = make(map[string]interface{})
|
||||
}
|
||||
dm.Fields[name] = value
|
||||
}
|
||||
|
||||
type DocumentMatchCollection []*DocumentMatch
|
||||
|
|
Loading…
Reference in New Issue