0
0
Fork 0

fix storing/retrieving numeric and date fields

also includes new ability to request stored fields be returned with results

closes #55 and closes #56 and closes #58
This commit is contained in:
Marty Schoch 2014-08-06 13:52:20 -04:00
parent d84187fd24
commit 41d4f67ee2
10 changed files with 153 additions and 29 deletions

View File

@ -16,7 +16,7 @@ import (
"github.com/couchbaselabs/bleve/numeric_util"
)
const DEFAULT_DATETIME_INDEXING_OPTIONS = INDEX_FIELD
const DEFAULT_DATETIME_INDEXING_OPTIONS = STORE_FIELD | INDEX_FIELD
const DEFAULT_DATETIME_PRECISION_STEP uint = 4
@ -74,12 +74,28 @@ func (n *DateTimeField) Value() []byte {
return n.value
}
func (n *DateTimeField) DateTime() (time.Time, error) {
i64, err := n.value.Int64()
if err != nil {
return time.Time{}, err
}
return time.Unix(0, i64), nil
}
func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func NewDateTimeFieldFromBytes(name string, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
value: value,
options: DEFAULT_DATETIME_INDEXING_OPTIONS,
}
}
func NewDateTimeField(name string, dt time.Time) *DateTimeField {
return NewDateTimeFieldWithIndexingOptions(name, dt, DEFAULT_NUMERIC_INDEXING_OPTIONS)
return NewDateTimeFieldWithIndexingOptions(name, dt, DEFAULT_DATETIME_INDEXING_OPTIONS)
}
func NewDateTimeFieldWithIndexingOptions(name string, dt time.Time, options IndexingOptions) *DateTimeField {

View File

@ -15,7 +15,7 @@ import (
"github.com/couchbaselabs/bleve/numeric_util"
)
const DEFAULT_NUMERIC_INDEXING_OPTIONS = INDEX_FIELD
const DEFAULT_NUMERIC_INDEXING_OPTIONS = STORE_FIELD | INDEX_FIELD
const DEFAULT_PRECISION_STEP uint = 4
@ -73,10 +73,26 @@ func (n *NumericField) Value() []byte {
return n.value
}
func (n *NumericField) Number() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return numeric_util.Int64ToFloat64(i64), nil
}
func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func NewNumericFieldFromBytes(name string, value []byte) *NumericField {
return &NumericField{
name: name,
value: value,
options: DEFAULT_NUMERIC_INDEXING_OPTIONS,
}
}
func NewNumericField(name string, number float64) *NumericField {
return NewNumericFieldWithIndexingOptions(name, number, DEFAULT_NUMERIC_INDEXING_OPTIONS)
}

View File

@ -388,6 +388,7 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
type StoredRow struct {
doc []byte
field uint16
typ byte
value []byte
}
@ -403,11 +404,14 @@ func (s *StoredRow) Key() []byte {
}
func (s *StoredRow) Value() []byte {
return s.value
rv := make([]byte, len(s.value)+1)
rv[0] = s.typ
copy(rv[1:], s.value)
return rv
}
func (s *StoredRow) String() string {
return fmt.Sprintf("Document: %s Field %d, Value: %s", s.doc, s.field, s.value)
return fmt.Sprintf("Document: %s Field %d, Type: %s Value: %s", s.doc, s.field, string(s.typ), s.value)
}
func (s *StoredRow) ScanPrefixForDoc() []byte {
@ -418,10 +422,11 @@ func (s *StoredRow) ScanPrefixForDoc() []byte {
return buf.Bytes()
}
func NewStoredRow(doc string, field uint16, value []byte) *StoredRow {
func NewStoredRow(doc string, field uint16, typ byte, value []byte) *StoredRow {
return &StoredRow{
doc: []byte(doc),
field: field,
typ: typ,
value: value,
}
}
@ -446,7 +451,9 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
return nil, err
}
rv.value = value
rv.typ = value[0]
rv.value = value[1:]
return &rv, nil
}

View File

@ -70,9 +70,9 @@ func TestRows(t *testing.T) {
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'e', 'a', 't', BYTE_SEPARATOR, 1, 0, BYTE_SEPARATOR, 3, 0, BYTE_SEPARATOR, 4, 0, BYTE_SEPARATOR, 5, 0},
},
{
NewStoredRow("budweiser", 0, []byte("an american beer")),
NewStoredRow("budweiser", 0, byte('t'), []byte("an american beer")),
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', BYTE_SEPARATOR, 0, 0},
[]byte{'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
}
@ -194,12 +194,12 @@ func TestInvalidRows(t *testing.T) {
// type s, invalid key (missing id)
{
[]byte{'s'},
[]byte{'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
// type b, invalid val (missing field)
{
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', BYTE_SEPARATOR},
[]byte{'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
}

View File

@ -330,7 +330,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
}
// any of the existing stored fields that weren't updated need to be deleted
for storedFieldIndex, _ := range existingStoredFieldMap {
storedRow := NewStoredRow(doc.ID, storedFieldIndex, nil)
storedRow := NewStoredRow(doc.ID, storedFieldIndex, 'x', nil)
deleteRows = append(deleteRows, storedRow)
}
@ -344,7 +344,8 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
func (udc *UpsideDownCouch) storeField(docId string, field document.Field, fieldIndex uint16, existingStoredFieldMap map[uint16]bool) ([]UpsideDownCouchRow, []UpsideDownCouchRow) {
updateRows := make([]UpsideDownCouchRow, 0)
addRows := make([]UpsideDownCouchRow, 0)
storedRow := NewStoredRow(docId, fieldIndex, field.Value())
fieldType := encodeFieldType(field)
storedRow := NewStoredRow(docId, fieldIndex, fieldType, field.Value())
_, ok := existingStoredFieldMap[fieldIndex]
if ok {
// this is an update
@ -357,6 +358,21 @@ func (udc *UpsideDownCouch) storeField(docId string, field document.Field, field
return addRows, updateRows
}
func encodeFieldType(f document.Field) byte {
fieldType := byte('x')
switch f.(type) {
case *document.TextField:
fieldType = 't'
case *document.NumericField:
fieldType = 'n'
case *document.DateTimeField:
fieldType = 'd'
case *document.CompositeField:
fieldType = 'c'
}
return fieldType
}
func (udc *UpsideDownCouch) indexField(docId string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, existingTermMap termMap) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []*BackIndexEntry) {
updateRows := make([]UpsideDownCouchRow, 0)
@ -431,7 +447,7 @@ func (udc *UpsideDownCouch) Delete(id string) error {
rows = append(rows, tfr)
}
for _, sf := range backIndexRow.storedFields {
sf := NewStoredRow(id, sf, nil)
sf := NewStoredRow(id, sf, 'x', nil)
rows = append(rows, sf)
}
@ -551,7 +567,7 @@ func (udc *UpsideDownCouch) DumpDoc(id string) ([]interface{}, error) {
}
keys := make(keyset, 0)
for _, stored := range back.storedFields {
sr := NewStoredRow(id, stored, []byte{})
sr := NewStoredRow(id, stored, 'x', []byte{})
key := sr.Key()
keys = append(keys, key)
}
@ -591,7 +607,7 @@ func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, e
func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
rv := document.NewDocument(id)
storedRow := NewStoredRow(id, 0, nil)
storedRow := NewStoredRow(id, 0, 'x', nil)
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
it := udc.store.Iterator(storedRowScanPrefix)
key, val, valid := it.Current()
@ -604,7 +620,11 @@ func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
return nil, err
}
if row != nil {
rv.AddField(document.NewTextField(udc.fieldIndexToName(row.field), row.Value()))
fieldName := udc.fieldIndexToName(row.field)
field := decodeFieldType(row.typ, fieldName, row.value)
if field != nil {
rv.AddField(field)
}
}
it.Next()
@ -613,6 +633,18 @@ func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
return rv, nil
}
func decodeFieldType(typ byte, name string, value []byte) document.Field {
switch typ {
case 't':
return document.NewTextField(name, value)
case 'n':
return document.NewNumericFieldFromBytes(name, value)
case 'd':
return document.NewDateTimeFieldFromBytes(name, value)
}
return nil
}
func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
return len(tf.Locations)
}

View File

@ -318,7 +318,11 @@ func TestIndexInsertWithStore(t *testing.T) {
if len(storedDoc.Fields) != 1 {
t.Errorf("expected 1 stored field, got %d", len(storedDoc.Fields))
}
if string(storedDoc.Fields[0].Value()) != "test" {
t.Errorf("expected field content 'test', got '%s'", string(storedDoc.Fields[0].Value()))
textField, ok := storedDoc.Fields[0].(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "test" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
}

View File

@ -11,6 +11,7 @@ package bleve
import (
"encoding/json"
"fmt"
"time"
"github.com/couchbaselabs/bleve/document"
"github.com/couchbaselabs/bleve/index"
@ -151,6 +152,40 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
}
}
if len(req.Fields) > 0 {
for _, hit := range hits {
// FIXME avoid loading doc second time
// if we already loaded it for highlighting
doc, err := i.Document(hit.ID)
if err == nil {
for _, f := range req.Fields {
for _, docF := range doc.Fields {
if docF.Name() == f {
var value interface{}
switch docF := docF.(type) {
case *document.TextField:
value = string(docF.Value())
case *document.NumericField:
num, err := docF.Number()
if err == nil {
value = num
}
case *document.DateTimeField:
datetime, err := docF.DateTime()
if err == nil {
value = datetime.Format(time.RFC3339)
}
}
if value != nil {
hit.AddFieldValue(f, value)
}
}
}
}
}
}
}
return &SearchResult{
Request: req,
Hits: hits,

View File

@ -36,6 +36,7 @@ type SearchRequest struct {
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Explain bool `json:"explain"`
}
@ -45,6 +46,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Explain bool `json:"explain"`
}
@ -57,6 +59,7 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.From = temp.From
r.Explain = temp.Explain
r.Highlight = temp.Highlight
r.Fields = temp.Fields
r.Query, err = ParseQuery(temp.Q)
if err != nil {
return err

View File

@ -72,11 +72,14 @@ func (s *SimpleHighlighter) BestFragmentsInField(dm *DocumentMatch, doc *documen
heap.Init(&fq)
for _, f := range doc.Fields {
if f.Name() == field {
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
for _, fragment := range fragments {
scorer.Score(fragment)
heap.Push(&fq, fragment)
_, ok := f.(*document.TextField)
if ok {
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, orderedTermLocations)
for _, fragment := range fragments {
scorer.Score(fragment)
heap.Push(&fq, fragment)
}
}
}
}

View File

@ -35,11 +35,19 @@ type FieldTermLocationMap map[string]TermLocationMap
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
ID string `json:"id"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
ID string `json:"id"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Fields map[string]interface{} `json:"fields,omitempty"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
if dm.Fields == nil {
dm.Fields = make(map[string]interface{})
}
dm.Fields[name] = value
}
type DocumentMatchCollection []*DocumentMatch