major change to fields
now can track array positions for field values stored fields now include this in the key and the back index now uses protobufs to simplify serialization closes #73
This commit is contained in:
parent
ad3ba27cb8
commit
082a5b0b03
|
@ -12,4 +12,5 @@
|
|||
/examples/beer-search/beer-search
|
||||
/examples/beer-search/beer-search.bleve
|
||||
/utils/bleve_dump/bleve_dump
|
||||
/utils/bleve_registry/bleve_registry
|
||||
/y.output
|
||||
|
|
|
@ -14,6 +14,7 @@ import (
|
|||
|
||||
type Field interface {
|
||||
Name() string
|
||||
ArrayPositions() []uint64
|
||||
Options() IndexingOptions
|
||||
Analyze() (int, analysis.TokenFrequencies)
|
||||
Value() []byte
|
||||
|
|
|
@ -51,6 +51,10 @@ func (c *CompositeField) Name() string {
|
|||
return c.name
|
||||
}
|
||||
|
||||
func (c *CompositeField) ArrayPositions() []uint64 {
|
||||
return []uint64{}
|
||||
}
|
||||
|
||||
func (c *CompositeField) Options() IndexingOptions {
|
||||
return c.options
|
||||
}
|
||||
|
|
|
@ -21,15 +21,20 @@ const DEFAULT_DATETIME_INDEXING_OPTIONS = STORE_FIELD | INDEX_FIELD
|
|||
const DEFAULT_DATETIME_PRECISION_STEP uint = 4
|
||||
|
||||
type DateTimeField struct {
|
||||
name string
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *DateTimeField) ArrayPositions() []uint64 {
|
||||
return n.arrayPositions
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Options() IndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
@ -86,24 +91,26 @@ func (n *DateTimeField) GoString() string {
|
|||
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func NewDateTimeFieldFromBytes(name string, value []byte) *DateTimeField {
|
||||
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
value: value,
|
||||
options: DEFAULT_DATETIME_INDEXING_OPTIONS,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DEFAULT_DATETIME_INDEXING_OPTIONS,
|
||||
}
|
||||
}
|
||||
|
||||
func NewDateTimeField(name string, dt time.Time) *DateTimeField {
|
||||
return NewDateTimeFieldWithIndexingOptions(name, dt, DEFAULT_DATETIME_INDEXING_OPTIONS)
|
||||
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time) *DateTimeField {
|
||||
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, DEFAULT_DATETIME_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewDateTimeFieldWithIndexingOptions(name string, dt time.Time, options IndexingOptions) *DateTimeField {
|
||||
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, options IndexingOptions) *DateTimeField {
|
||||
dtInt64 := dt.UnixNano()
|
||||
prefixCoded := numeric_util.MustNewPrefixCodedInt64(dtInt64, 0)
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,15 +20,20 @@ const DEFAULT_NUMERIC_INDEXING_OPTIONS = STORE_FIELD | INDEX_FIELD
|
|||
const DEFAULT_PRECISION_STEP uint = 4
|
||||
|
||||
type NumericField struct {
|
||||
name string
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
}
|
||||
|
||||
func (n *NumericField) Name() string {
|
||||
return n.name
|
||||
}
|
||||
|
||||
func (n *NumericField) ArrayPositions() []uint64 {
|
||||
return n.arrayPositions
|
||||
}
|
||||
|
||||
func (n *NumericField) Options() IndexingOptions {
|
||||
return n.options
|
||||
}
|
||||
|
@ -85,24 +90,26 @@ func (n *NumericField) GoString() string {
|
|||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func NewNumericFieldFromBytes(name string, value []byte) *NumericField {
|
||||
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
|
||||
return &NumericField{
|
||||
name: name,
|
||||
value: value,
|
||||
options: DEFAULT_NUMERIC_INDEXING_OPTIONS,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DEFAULT_NUMERIC_INDEXING_OPTIONS,
|
||||
}
|
||||
}
|
||||
|
||||
func NewNumericField(name string, number float64) *NumericField {
|
||||
return NewNumericFieldWithIndexingOptions(name, number, DEFAULT_NUMERIC_INDEXING_OPTIONS)
|
||||
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField {
|
||||
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DEFAULT_NUMERIC_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewNumericFieldWithIndexingOptions(name string, number float64, options IndexingOptions) *NumericField {
|
||||
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options IndexingOptions) *NumericField {
|
||||
numberInt64 := numeric_util.Float64ToInt64(number)
|
||||
prefixCoded := numeric_util.MustNewPrefixCodedInt64(numberInt64, 0)
|
||||
return &NumericField{
|
||||
name: name,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,7 +13,7 @@ import (
|
|||
)
|
||||
|
||||
func TestNumericField(t *testing.T) {
|
||||
nf := NewNumericField("age", 3.4)
|
||||
nf := NewNumericField("age", []uint64{}, 3.4)
|
||||
numTokens, tokenFreqs := nf.Analyze()
|
||||
if numTokens != 16 {
|
||||
t.Errorf("expected 16 tokens")
|
||||
|
|
|
@ -17,16 +17,21 @@ import (
|
|||
const DEFAULT_TEXT_INDEXING_OPTIONS = INDEX_FIELD
|
||||
|
||||
type TextField struct {
|
||||
name string
|
||||
options IndexingOptions
|
||||
analyzer *analysis.Analyzer
|
||||
value []byte
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
analyzer *analysis.Analyzer
|
||||
value []byte
|
||||
}
|
||||
|
||||
func (t *TextField) Name() string {
|
||||
return t.name
|
||||
}
|
||||
|
||||
func (t *TextField) ArrayPositions() []uint64 {
|
||||
return t.arrayPositions
|
||||
}
|
||||
|
||||
func (t *TextField) Options() IndexingOptions {
|
||||
return t.options
|
||||
}
|
||||
|
@ -59,32 +64,35 @@ func (t *TextField) GoString() string {
|
|||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %s, Value: %s}", t.name, t.options, t.analyzer, t.value)
|
||||
}
|
||||
|
||||
func NewTextField(name string, value []byte) *TextField {
|
||||
return NewTextFieldWithIndexingOptions(name, value, DEFAULT_TEXT_INDEXING_OPTIONS)
|
||||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
|
||||
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DEFAULT_TEXT_INDEXING_OPTIONS)
|
||||
}
|
||||
|
||||
func NewTextFieldWithIndexingOptions(name string, value []byte, options IndexingOptions) *TextField {
|
||||
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
options: options,
|
||||
value: value,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldWithAnalyzer(name string, value []byte, analyzer *analysis.Analyzer) *TextField {
|
||||
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
options: DEFAULT_TEXT_INDEXING_OPTIONS,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: DEFAULT_TEXT_INDEXING_OPTIONS,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldCustom(name string, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
|
||||
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
options: options,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ func CommonBenchmarkIndex(b *testing.B, s store.KVStore) {
|
|||
index := NewUpsideDownCouch(s)
|
||||
|
||||
indexDocument := document.NewDocument("").
|
||||
AddField(document.NewTextField("body", []byte("A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.")))
|
||||
AddField(document.NewTextField("body", []uint64{}, []byte("A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.")))
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
|
|
|
@ -87,15 +87,15 @@ func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} {
|
|||
}
|
||||
// build sorted list of term keys
|
||||
keys := make(keyset, 0)
|
||||
for _, entry := range back.entries {
|
||||
tfr := NewTermFrequencyRow(entry.term, entry.field, id, 0, 0)
|
||||
for _, entry := range back.termEntries {
|
||||
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), id, 0, 0)
|
||||
key := tfr.Key()
|
||||
keys = append(keys, key)
|
||||
}
|
||||
sort.Sort(keys)
|
||||
|
||||
// first add all the stored rows
|
||||
storedRowPrefix := NewStoredRow(id, 0, 'x', []byte{}).ScanPrefixForDoc()
|
||||
storedRowPrefix := NewStoredRow(id, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
|
||||
udc.dumpPrefix(rv, storedRowPrefix)
|
||||
|
||||
// now walk term keys in order and add them as well
|
||||
|
|
|
@ -38,18 +38,18 @@ func TestDump(t *testing.T) {
|
|||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
}
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test2"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test2"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
|
|
@ -30,7 +30,7 @@ func TestIndexFieldReader(t *testing.T) {
|
|||
|
||||
var expectedCount uint64 = 0
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -38,9 +38,9 @@ func TestIndexFieldReader(t *testing.T) {
|
|||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("prefix", []byte("bob cat cats catting dog doggy zoo"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
|
|
@ -31,7 +31,7 @@ func TestIndexReader(t *testing.T) {
|
|||
|
||||
var expectedCount uint64 = 0
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -39,8 +39,8 @@ func TestIndexReader(t *testing.T) {
|
|||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS, testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -173,7 +173,7 @@ func TestIndexDocIdReader(t *testing.T) {
|
|||
|
||||
var expectedCount uint64 = 0
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -181,8 +181,8 @@ func TestIndexDocIdReader(t *testing.T) {
|
|||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []byte("test test test")))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
|
|
@ -14,6 +14,8 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
|
||||
"code.google.com/p/goprotobuf/proto"
|
||||
)
|
||||
|
||||
const BYTE_SEPARATOR byte = 0xff
|
||||
|
@ -265,7 +267,7 @@ func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, doc string, f
|
|||
}
|
||||
}
|
||||
|
||||
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
|
||||
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) {
|
||||
rv := TermFrequencyRow{
|
||||
doc: []byte(""),
|
||||
}
|
||||
|
@ -292,7 +294,16 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
|
|||
rv.doc = doc
|
||||
}
|
||||
|
||||
buf = bytes.NewBuffer((value))
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
|
||||
rv, err := NewTermFrequencyRowK(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buf := bytes.NewBuffer((value))
|
||||
err = binary.Read(buf, binary.LittleEndian, &rv.freq)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -332,23 +343,38 @@ func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
|
|||
err = binary.Read(buf, binary.LittleEndian, &field)
|
||||
}
|
||||
|
||||
return &rv, nil
|
||||
return rv, nil
|
||||
|
||||
}
|
||||
|
||||
type BackIndexEntry struct {
|
||||
term []byte
|
||||
field uint16
|
||||
}
|
||||
|
||||
func (bie *BackIndexEntry) String() string {
|
||||
return fmt.Sprintf("Term: `%s` Field: %d", string(bie.term), bie.field)
|
||||
}
|
||||
|
||||
type BackIndexRow struct {
|
||||
doc []byte
|
||||
entries []*BackIndexEntry
|
||||
storedFields []uint16
|
||||
doc []byte
|
||||
termEntries []*BackIndexTermEntry
|
||||
storedEntries []*BackIndexStoreEntry
|
||||
}
|
||||
|
||||
func (br *BackIndexRow) AllTermKeys() [][]byte {
|
||||
if br == nil {
|
||||
return nil
|
||||
}
|
||||
rv := make([][]byte, len(br.termEntries))
|
||||
for i, termEntry := range br.termEntries {
|
||||
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), string(br.doc), 0, 0)
|
||||
rv[i] = termRow.Key()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (br *BackIndexRow) AllStoredKeys() [][]byte {
|
||||
if br == nil {
|
||||
return nil
|
||||
}
|
||||
rv := make([][]byte, len(br.storedEntries))
|
||||
for i, storedEntry := range br.storedEntries {
|
||||
storedRow := NewStoredRow(string(br.doc), uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
|
||||
rv[i] = storedRow.Key()
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
||||
func (br *BackIndexRow) Key() []byte {
|
||||
|
@ -359,32 +385,23 @@ func (br *BackIndexRow) Key() []byte {
|
|||
}
|
||||
|
||||
func (br *BackIndexRow) Value() []byte {
|
||||
buf := new(bytes.Buffer)
|
||||
for _, e := range br.entries {
|
||||
buf.Write(e.term)
|
||||
buf.WriteByte(BYTE_SEPARATOR)
|
||||
fieldbuf := make([]byte, 2)
|
||||
binary.LittleEndian.PutUint16(fieldbuf, e.field)
|
||||
buf.Write(fieldbuf)
|
||||
birv := &BackIndexRowValue{
|
||||
TermEntries: br.termEntries,
|
||||
StoredEntries: br.storedEntries,
|
||||
}
|
||||
for _, sf := range br.storedFields {
|
||||
buf.WriteByte(BYTE_SEPARATOR)
|
||||
fieldbuf := make([]byte, 2)
|
||||
binary.LittleEndian.PutUint16(fieldbuf, sf)
|
||||
buf.Write(fieldbuf)
|
||||
}
|
||||
return buf.Bytes()
|
||||
bytes, _ := proto.Marshal(birv)
|
||||
return bytes
|
||||
}
|
||||
|
||||
func (br *BackIndexRow) String() string {
|
||||
return fmt.Sprintf("Backindex DocId: `%s` Entries: %v, Stored Fields: %v", string(br.doc), br.entries, br.storedFields)
|
||||
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries)
|
||||
}
|
||||
|
||||
func NewBackIndexRow(doc string, entries []*BackIndexEntry, storedFields []uint16) *BackIndexRow {
|
||||
func NewBackIndexRow(doc string, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
|
||||
return &BackIndexRow{
|
||||
doc: []byte(doc),
|
||||
entries: entries,
|
||||
storedFields: storedFields,
|
||||
doc: []byte(doc),
|
||||
termEntries: entries,
|
||||
storedEntries: storedFields,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -403,44 +420,13 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
buf = bytes.NewBuffer(value)
|
||||
rv.entries = make([]*BackIndexEntry, 0)
|
||||
rv.storedFields = make([]uint16, 0)
|
||||
|
||||
var term []byte
|
||||
term, err = buf.ReadBytes(BYTE_SEPARATOR)
|
||||
if err == io.EOF && len(term) < 1 {
|
||||
err = fmt.Errorf("invalid term length 0")
|
||||
}
|
||||
if err != nil && err != io.EOF {
|
||||
var birv BackIndexRowValue
|
||||
err = proto.Unmarshal(value, &birv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for err != io.EOF {
|
||||
if len(term) > 2 {
|
||||
// this is a back index entry
|
||||
ent := BackIndexEntry{}
|
||||
ent.term = term[:len(term)-1] // trim off separator byte
|
||||
|
||||
err = binary.Read(buf, binary.LittleEndian, &ent.field)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.entries = append(rv.entries, &ent)
|
||||
} else {
|
||||
// this is a stored field entry
|
||||
var sf uint16
|
||||
err = binary.Read(buf, binary.LittleEndian, &sf)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.storedFields = append(rv.storedFields, sf)
|
||||
}
|
||||
|
||||
term, err = buf.ReadBytes(BYTE_SEPARATOR)
|
||||
if err != nil && err != io.EOF {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
rv.termEntries = birv.TermEntries
|
||||
rv.storedEntries = birv.StoredEntries
|
||||
|
||||
return &rv, nil
|
||||
}
|
||||
|
@ -448,10 +434,11 @@ func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
|
|||
// STORED
|
||||
|
||||
type StoredRow struct {
|
||||
doc []byte
|
||||
field uint16
|
||||
typ byte
|
||||
value []byte
|
||||
doc []byte
|
||||
field uint16
|
||||
arrayPositions []uint64
|
||||
typ byte
|
||||
value []byte
|
||||
}
|
||||
|
||||
func (s *StoredRow) Key() []byte {
|
||||
|
@ -462,6 +449,11 @@ func (s *StoredRow) Key() []byte {
|
|||
fieldbuf := make([]byte, 2)
|
||||
binary.LittleEndian.PutUint16(fieldbuf, s.field)
|
||||
buf.Write(fieldbuf)
|
||||
for _, arrayPosition := range s.arrayPositions {
|
||||
arrayPositionBuffer := make([]byte, binary.MaxVarintLen64)
|
||||
numBytes := binary.PutUvarint(arrayPositionBuffer, arrayPosition)
|
||||
buf.Write(arrayPositionBuffer[0:numBytes])
|
||||
}
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
|
@ -473,7 +465,7 @@ func (s *StoredRow) Value() []byte {
|
|||
}
|
||||
|
||||
func (s *StoredRow) String() string {
|
||||
return fmt.Sprintf("Document: %s Field %d, Type: %s Value: %s", s.doc, s.field, string(s.typ), s.value)
|
||||
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value)
|
||||
}
|
||||
|
||||
func (s *StoredRow) ScanPrefixForDoc() []byte {
|
||||
|
@ -484,16 +476,17 @@ func (s *StoredRow) ScanPrefixForDoc() []byte {
|
|||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func NewStoredRow(doc string, field uint16, typ byte, value []byte) *StoredRow {
|
||||
func NewStoredRow(doc string, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
|
||||
return &StoredRow{
|
||||
doc: []byte(doc),
|
||||
field: field,
|
||||
typ: typ,
|
||||
value: value,
|
||||
doc: []byte(doc),
|
||||
field: field,
|
||||
arrayPositions: arrayPositions,
|
||||
typ: typ,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
|
||||
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
||||
func NewStoredRowK(key []byte) (*StoredRow, error) {
|
||||
rv := StoredRow{}
|
||||
|
||||
buf := bytes.NewBuffer(key)
|
||||
|
@ -513,9 +506,21 @@ func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
|||
return nil, err
|
||||
}
|
||||
|
||||
rv.typ = value[0]
|
||||
|
||||
rv.value = value[1:]
|
||||
|
||||
rv.arrayPositions = make([]uint64, 0)
|
||||
nextArrayPos, err := binary.ReadUvarint(buf)
|
||||
for err == nil {
|
||||
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
|
||||
nextArrayPos, err = binary.ReadUvarint(buf)
|
||||
}
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
||||
rv, err := NewStoredRowK(key)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv.typ = value[0]
|
||||
rv.value = value[1:]
|
||||
return rv, nil
|
||||
}
|
||||
|
|
|
@ -11,6 +11,8 @@ package upside_down
|
|||
import (
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
"code.google.com/p/goprotobuf/proto"
|
||||
)
|
||||
|
||||
func TestRows(t *testing.T) {
|
||||
|
@ -55,22 +57,22 @@ func TestRows(t *testing.T) {
|
|||
[]byte{3, 0, 0, 0, 0, 0, 0, 0, 195, 245, 72, 64, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 23, 0, 0, 0, 0, 0, 0, 0, 31, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 43, 0, 0, 0, 0, 0, 0, 0, 51, 0, 0, 0, 0, 0, 0, 0},
|
||||
},
|
||||
{
|
||||
NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}}, []uint16{}),
|
||||
NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil),
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0},
|
||||
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0},
|
||||
},
|
||||
{
|
||||
NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}, &BackIndexEntry{[]byte{'b', 'e', 'a', 't'}, 1}}, []uint16{}),
|
||||
NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil),
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'e', 'a', 't', BYTE_SEPARATOR, 1, 0},
|
||||
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1},
|
||||
},
|
||||
{
|
||||
NewBackIndexRow("budweiser", []*BackIndexEntry{&BackIndexEntry{[]byte{'b', 'e', 'e', 'r'}, 0}, &BackIndexEntry{[]byte{'b', 'e', 'a', 't'}, 1}}, []uint16{3, 4, 5}),
|
||||
NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{&BackIndexStoreEntry{Field: proto.Uint32(3)}, &BackIndexStoreEntry{Field: proto.Uint32(4)}, &BackIndexStoreEntry{Field: proto.Uint32(5)}}),
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0, 'b', 'e', 'a', 't', BYTE_SEPARATOR, 1, 0, BYTE_SEPARATOR, 3, 0, BYTE_SEPARATOR, 4, 0, BYTE_SEPARATOR, 5, 0},
|
||||
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1, 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5},
|
||||
},
|
||||
{
|
||||
NewStoredRow("budweiser", 0, byte('t'), []byte("an american beer")),
|
||||
NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer")),
|
||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', BYTE_SEPARATOR, 0, 0},
|
||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||
},
|
||||
|
@ -94,13 +96,13 @@ func TestRows(t *testing.T) {
|
|||
}
|
||||
|
||||
// now test going back from k/v bytes to struct
|
||||
for _, test := range tests {
|
||||
for i, test := range tests {
|
||||
row, err := ParseFromKeyValue(test.outKey, test.outVal)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !reflect.DeepEqual(row, test.input) {
|
||||
t.Fatalf("Expected: %#v got: %#v", test.input, row)
|
||||
t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -186,15 +188,10 @@ func TestInvalidRows(t *testing.T) {
|
|||
[]byte{'b'},
|
||||
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR, 0, 0},
|
||||
},
|
||||
// type b, invalid val (missing term)
|
||||
{
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{},
|
||||
},
|
||||
// type b, invalid val (missing field)
|
||||
{
|
||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||
[]byte{'b', 'e', 'e', 'r', BYTE_SEPARATOR},
|
||||
[]byte{'g', 'a', 'r', 'b', 'a', 'g', 'e'},
|
||||
},
|
||||
// type s, invalid key (missing id)
|
||||
{
|
||||
|
|
|
@ -17,6 +17,8 @@ import (
|
|||
"github.com/couchbaselabs/bleve/document"
|
||||
"github.com/couchbaselabs/bleve/index"
|
||||
"github.com/couchbaselabs/bleve/index/store"
|
||||
|
||||
"code.google.com/p/goprotobuf/proto"
|
||||
)
|
||||
|
||||
var VERSION_KEY []byte = []byte{'v'}
|
||||
|
@ -223,9 +225,6 @@ func (udc *UpsideDownCouch) Close() {
|
|||
udc.store.Close()
|
||||
}
|
||||
|
||||
type termMap map[string]bool
|
||||
type fieldTermMap map[uint16]termMap
|
||||
|
||||
func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
||||
// first we lookup the backindex row for the doc id if it exists
|
||||
// lookup the back index row
|
||||
|
@ -250,35 +249,25 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) error {
|
|||
|
||||
func (udc *UpsideDownCouch) updateSingle(doc *document.Document, backIndexRow *BackIndexRow, addRows, updateRows, deleteRows []UpsideDownCouchRow) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []UpsideDownCouchRow) {
|
||||
|
||||
// a map for each field, map key is term (string) bool true for existence
|
||||
existingTermFieldMaps := make(fieldTermMap, 0)
|
||||
if backIndexRow != nil {
|
||||
for _, entry := range backIndexRow.entries {
|
||||
existingTermMap, fieldExists := existingTermFieldMaps[entry.field]
|
||||
if !fieldExists {
|
||||
existingTermMap = make(termMap, 0)
|
||||
existingTermFieldMaps[entry.field] = existingTermMap
|
||||
}
|
||||
existingTermMap[string(entry.term)] = true
|
||||
}
|
||||
existingTermKeys := make(map[string]bool)
|
||||
for _, key := range backIndexRow.AllTermKeys() {
|
||||
existingTermKeys[string(key)] = true
|
||||
}
|
||||
existingStoredFieldMap := make(map[uint16]bool)
|
||||
if backIndexRow != nil {
|
||||
for _, sf := range backIndexRow.storedFields {
|
||||
existingStoredFieldMap[sf] = true
|
||||
}
|
||||
|
||||
existingStoredKeys := make(map[string]bool)
|
||||
for _, key := range backIndexRow.AllStoredKeys() {
|
||||
existingStoredKeys[string(key)] = true
|
||||
}
|
||||
|
||||
// track our back index entries
|
||||
backIndexEntries := make([]*BackIndexEntry, 0)
|
||||
backIndexStoredFields := make([]uint16, 0)
|
||||
backIndexTermEntries := make([]*BackIndexTermEntry, 0)
|
||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
||||
|
||||
for _, field := range doc.Fields {
|
||||
fieldIndex, newFieldRow := udc.fieldNameToFieldIndex(field.Name())
|
||||
if newFieldRow != nil {
|
||||
updateRows = append(updateRows, newFieldRow)
|
||||
}
|
||||
existingTermMap := existingTermFieldMaps[fieldIndex]
|
||||
|
||||
if field.Options().IsIndexed() {
|
||||
|
||||
|
@ -290,17 +279,17 @@ func (udc *UpsideDownCouch) updateSingle(doc *document.Document, backIndexRow *B
|
|||
}
|
||||
|
||||
// encode this field
|
||||
indexAddRows, indexUpdateRows, indexBackIndexEntries := udc.indexField(doc.ID, field, fieldIndex, fieldLength, tokenFreqs, existingTermMap)
|
||||
indexAddRows, indexUpdateRows, indexBackIndexTermEntries := udc.indexField(doc.ID, field, fieldIndex, fieldLength, tokenFreqs, existingTermKeys)
|
||||
addRows = append(addRows, indexAddRows...)
|
||||
updateRows = append(updateRows, indexUpdateRows...)
|
||||
backIndexEntries = append(backIndexEntries, indexBackIndexEntries...)
|
||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
||||
}
|
||||
|
||||
if field.Options().IsStored() {
|
||||
storeAddRows, storeUpdateRows := udc.storeField(doc.ID, field, fieldIndex, existingStoredFieldMap)
|
||||
storeAddRows, storeUpdateRows, indexBackIndexStoreEntries := udc.storeField(doc.ID, field, fieldIndex, existingStoredKeys)
|
||||
addRows = append(addRows, storeAddRows...)
|
||||
updateRows = append(updateRows, storeUpdateRows...)
|
||||
backIndexStoredFields = append(backIndexStoredFields, fieldIndex)
|
||||
backIndexStoredEntries = append(backIndexStoredEntries, indexBackIndexStoreEntries...)
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -311,55 +300,62 @@ func (udc *UpsideDownCouch) updateSingle(doc *document.Document, backIndexRow *B
|
|||
if newFieldRow != nil {
|
||||
updateRows = append(updateRows, newFieldRow)
|
||||
}
|
||||
existingTermMap := existingTermFieldMaps[fieldIndex]
|
||||
if compositeField.Options().IsIndexed() {
|
||||
|
||||
fieldLength, tokenFreqs := compositeField.Analyze()
|
||||
// encode this field
|
||||
indexAddRows, indexUpdateRows, indexBackIndexEntries := udc.indexField(doc.ID, compositeField, fieldIndex, fieldLength, tokenFreqs, existingTermMap)
|
||||
indexAddRows, indexUpdateRows, indexBackIndexTermEntries := udc.indexField(doc.ID, compositeField, fieldIndex, fieldLength, tokenFreqs, existingTermKeys)
|
||||
addRows = append(addRows, indexAddRows...)
|
||||
updateRows = append(updateRows, indexUpdateRows...)
|
||||
backIndexEntries = append(backIndexEntries, indexBackIndexEntries...)
|
||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
||||
}
|
||||
}
|
||||
|
||||
// build the back index row
|
||||
backIndexRow = NewBackIndexRow(doc.ID, backIndexEntries, backIndexStoredFields)
|
||||
backIndexRow = NewBackIndexRow(doc.ID, backIndexTermEntries, backIndexStoredEntries)
|
||||
updateRows = append(updateRows, backIndexRow)
|
||||
|
||||
// any of the existing rows that weren't updated need to be deleted
|
||||
for fieldIndex, existingTermFieldMap := range existingTermFieldMaps {
|
||||
if existingTermFieldMap != nil {
|
||||
for termString, _ := range existingTermFieldMap {
|
||||
termFreqRow := NewTermFrequencyRow([]byte(termString), uint16(fieldIndex), doc.ID, 0, 0)
|
||||
deleteRows = append(deleteRows, termFreqRow)
|
||||
}
|
||||
for existingTermKey, _ := range existingTermKeys {
|
||||
termFreqRow, err := NewTermFrequencyRowK([]byte(existingTermKey))
|
||||
if err == nil {
|
||||
deleteRows = append(deleteRows, termFreqRow)
|
||||
}
|
||||
}
|
||||
|
||||
// any of the existing stored fields that weren't updated need to be deleted
|
||||
for storedFieldIndex, _ := range existingStoredFieldMap {
|
||||
storedRow := NewStoredRow(doc.ID, storedFieldIndex, 'x', nil)
|
||||
deleteRows = append(deleteRows, storedRow)
|
||||
for existingStoredKey, _ := range existingStoredKeys {
|
||||
storedRow, err := NewStoredRowK([]byte(existingStoredKey))
|
||||
if err == nil {
|
||||
deleteRows = append(deleteRows, storedRow)
|
||||
}
|
||||
}
|
||||
|
||||
return addRows, updateRows, deleteRows
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) storeField(docId string, field document.Field, fieldIndex uint16, existingStoredFieldMap map[uint16]bool) ([]UpsideDownCouchRow, []UpsideDownCouchRow) {
|
||||
func (udc *UpsideDownCouch) storeField(docId string, field document.Field, fieldIndex uint16, existingKeys map[string]bool) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []*BackIndexStoreEntry) {
|
||||
updateRows := make([]UpsideDownCouchRow, 0)
|
||||
addRows := make([]UpsideDownCouchRow, 0)
|
||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
||||
fieldType := encodeFieldType(field)
|
||||
storedRow := NewStoredRow(docId, fieldIndex, fieldType, field.Value())
|
||||
_, ok := existingStoredFieldMap[fieldIndex]
|
||||
if ok {
|
||||
storedRow := NewStoredRow(docId, fieldIndex, field.ArrayPositions(), fieldType, field.Value())
|
||||
|
||||
// record the back index entry
|
||||
backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()}
|
||||
backIndexStoredEntries = append(backIndexStoredEntries, &backIndexStoredEntry)
|
||||
|
||||
storedRowKey := string(storedRow.Key())
|
||||
_, existed := existingKeys[storedRowKey]
|
||||
if existed {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, storedRow)
|
||||
// this field was stored last time, delete it from that map
|
||||
delete(existingStoredFieldMap, fieldIndex)
|
||||
delete(existingKeys, storedRowKey)
|
||||
} else {
|
||||
addRows = append(addRows, storedRow)
|
||||
}
|
||||
return addRows, updateRows
|
||||
return addRows, updateRows, backIndexStoredEntries
|
||||
}
|
||||
|
||||
func encodeFieldType(f document.Field) byte {
|
||||
|
@ -377,11 +373,11 @@ func encodeFieldType(f document.Field) byte {
|
|||
return fieldType
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) indexField(docId string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, existingTermMap termMap) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []*BackIndexEntry) {
|
||||
func (udc *UpsideDownCouch) indexField(docId string, field document.Field, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, existingKeys map[string]bool) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []*BackIndexTermEntry) {
|
||||
|
||||
updateRows := make([]UpsideDownCouchRow, 0)
|
||||
addRows := make([]UpsideDownCouchRow, 0)
|
||||
backIndexEntries := make([]*BackIndexEntry, 0)
|
||||
backIndexTermEntries := make([]*BackIndexTermEntry, 0)
|
||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||
|
||||
for _, tf := range tokenFreqs {
|
||||
|
@ -395,29 +391,23 @@ func (udc *UpsideDownCouch) indexField(docId string, field document.Field, field
|
|||
}
|
||||
|
||||
// record the back index entry
|
||||
backIndexEntry := BackIndexEntry{tf.Term, fieldIndex}
|
||||
backIndexEntries = append(backIndexEntries, &backIndexEntry)
|
||||
backIndexTermEntry := BackIndexTermEntry{Term: proto.String(string(tf.Term)), Field: proto.Uint32(uint32(fieldIndex))}
|
||||
backIndexTermEntries = append(backIndexTermEntries, &backIndexTermEntry)
|
||||
|
||||
// remove the entry from the map of existing term fields if it exists
|
||||
if existingTermMap != nil {
|
||||
termString := string(tf.Term)
|
||||
_, ok := existingTermMap[termString]
|
||||
if ok {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, termFreqRow)
|
||||
// this term existed last time, delete it from that map
|
||||
delete(existingTermMap, termString)
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
}
|
||||
tfrKeyString := string(termFreqRow.Key())
|
||||
_, existed := existingKeys[tfrKeyString]
|
||||
if existed {
|
||||
// this is an update
|
||||
updateRows = append(updateRows, termFreqRow)
|
||||
// this term existed last time, delete it from that map
|
||||
delete(existingKeys, tfrKeyString)
|
||||
} else {
|
||||
// this is an add
|
||||
addRows = append(addRows, termFreqRow)
|
||||
}
|
||||
}
|
||||
|
||||
return addRows, updateRows, backIndexEntries
|
||||
return addRows, updateRows, backIndexTermEntries
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) fieldNameToFieldIndex(fieldName string) (uint16, *FieldRow) {
|
||||
|
@ -456,12 +446,12 @@ func (udc *UpsideDownCouch) Delete(id string) error {
|
|||
|
||||
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
|
||||
|
||||
for _, backIndexEntry := range backIndexRow.entries {
|
||||
tfr := NewTermFrequencyRow(backIndexEntry.term, backIndexEntry.field, id, 0, 0)
|
||||
for _, backIndexEntry := range backIndexRow.termEntries {
|
||||
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), id, 0, 0)
|
||||
deleteRows = append(deleteRows, tfr)
|
||||
}
|
||||
for _, sf := range backIndexRow.storedFields {
|
||||
sf := NewStoredRow(id, sf, 'x', nil)
|
||||
for _, se := range backIndexRow.storedEntries {
|
||||
sf := NewStoredRow(id, uint16(*se.Field), se.ArrayPositions, 'x', nil)
|
||||
deleteRows = append(deleteRows, sf)
|
||||
}
|
||||
|
||||
|
@ -552,7 +542,7 @@ func (udc *UpsideDownCouch) DocIdReader(start, end string) (index.DocIdReader, e
|
|||
|
||||
func (udc *UpsideDownCouch) Document(id string) (*document.Document, error) {
|
||||
rv := document.NewDocument(id)
|
||||
storedRow := NewStoredRow(id, 0, 'x', nil)
|
||||
storedRow := NewStoredRow(id, 0, []uint64{}, 'x', nil)
|
||||
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
|
||||
it := udc.store.Iterator(storedRowScanPrefix)
|
||||
key, val, valid := it.Current()
|
||||
|
@ -583,14 +573,14 @@ func (udc *UpsideDownCouch) DocumentFieldTerms(id string) (index.FieldTerms, err
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
rv := make(index.FieldTerms, len(back.entries))
|
||||
for _, entry := range back.entries {
|
||||
fieldName := udc.fieldIndexToName(entry.field)
|
||||
rv := make(index.FieldTerms, len(back.termEntries))
|
||||
for _, entry := range back.termEntries {
|
||||
fieldName := udc.fieldIndexToName(uint16(*entry.Field))
|
||||
terms, ok := rv[fieldName]
|
||||
if !ok {
|
||||
terms = make([]string, 0)
|
||||
}
|
||||
terms = append(terms, string(entry.term))
|
||||
terms = append(terms, *entry.Term)
|
||||
rv[fieldName] = terms
|
||||
}
|
||||
return rv, nil
|
||||
|
@ -599,11 +589,11 @@ func (udc *UpsideDownCouch) DocumentFieldTerms(id string) (index.FieldTerms, err
|
|||
func decodeFieldType(typ byte, name string, value []byte) document.Field {
|
||||
switch typ {
|
||||
case 't':
|
||||
return document.NewTextField(name, value)
|
||||
return document.NewTextField(name, []uint64{}, value)
|
||||
case 'n':
|
||||
return document.NewNumericFieldFromBytes(name, value)
|
||||
return document.NewNumericFieldFromBytes(name, []uint64{}, value)
|
||||
case 'd':
|
||||
return document.NewDateTimeFieldFromBytes(name, value)
|
||||
return document.NewDateTimeFieldFromBytes(name, []uint64{}, value)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -0,0 +1,98 @@
|
|||
// Code generated by protoc-gen-go.
|
||||
// source: upside_down.proto
|
||||
// DO NOT EDIT!
|
||||
|
||||
/*
|
||||
Package upside_down is a generated protocol buffer package.
|
||||
|
||||
It is generated from these files:
|
||||
upside_down.proto
|
||||
|
||||
It has these top-level messages:
|
||||
BackIndexTermEntry
|
||||
BackIndexStoreEntry
|
||||
BackIndexRowValue
|
||||
*/
|
||||
package upside_down
|
||||
|
||||
import proto "code.google.com/p/goprotobuf/proto"
|
||||
import math "math"
|
||||
|
||||
// Reference imports to suppress errors if they are not otherwise used.
|
||||
var _ = proto.Marshal
|
||||
var _ = math.Inf
|
||||
|
||||
type BackIndexTermEntry struct {
|
||||
Term *string `protobuf:"bytes,1,req,name=term" json:"term,omitempty"`
|
||||
Field *uint32 `protobuf:"varint,2,req,name=field" json:"field,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexTermEntry) Reset() { *m = BackIndexTermEntry{} }
|
||||
func (m *BackIndexTermEntry) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexTermEntry) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexTermEntry) GetTerm() string {
|
||||
if m != nil && m.Term != nil {
|
||||
return *m.Term
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *BackIndexTermEntry) GetField() uint32 {
|
||||
if m != nil && m.Field != nil {
|
||||
return *m.Field
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
type BackIndexStoreEntry struct {
|
||||
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
|
||||
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) Reset() { *m = BackIndexStoreEntry{} }
|
||||
func (m *BackIndexStoreEntry) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexStoreEntry) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexStoreEntry) GetField() uint32 {
|
||||
if m != nil && m.Field != nil {
|
||||
return *m.Field
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func (m *BackIndexStoreEntry) GetArrayPositions() []uint64 {
|
||||
if m != nil {
|
||||
return m.ArrayPositions
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type BackIndexRowValue struct {
|
||||
TermEntries []*BackIndexTermEntry `protobuf:"bytes,1,rep,name=termEntries" json:"termEntries,omitempty"`
|
||||
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
|
||||
XXX_unrecognized []byte `json:"-"`
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) Reset() { *m = BackIndexRowValue{} }
|
||||
func (m *BackIndexRowValue) String() string { return proto.CompactTextString(m) }
|
||||
func (*BackIndexRowValue) ProtoMessage() {}
|
||||
|
||||
func (m *BackIndexRowValue) GetTermEntries() []*BackIndexTermEntry {
|
||||
if m != nil {
|
||||
return m.TermEntries
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
|
||||
if m != nil {
|
||||
return m.StoredEntries
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func init() {
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
message BackIndexTermEntry {
|
||||
required string term = 1;
|
||||
required uint32 field = 2;
|
||||
}
|
||||
|
||||
message BackIndexStoreEntry {
|
||||
required uint32 field = 1;
|
||||
repeated uint64 arrayPositions = 2;
|
||||
}
|
||||
|
||||
message BackIndexRowValue {
|
||||
repeated BackIndexTermEntry termEntries = 1;
|
||||
repeated BackIndexStoreEntry storedEntries = 2;
|
||||
}
|
|
@ -81,7 +81,7 @@ func TestIndexInsert(t *testing.T) {
|
|||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -119,7 +119,7 @@ func TestIndexInsertThenDelete(t *testing.T) {
|
|||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -127,7 +127,7 @@ func TestIndexInsertThenDelete(t *testing.T) {
|
|||
expectedCount += 1
|
||||
|
||||
doc2 := document.NewDocument("2")
|
||||
doc2.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc2.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc2)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -181,7 +181,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -189,7 +189,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
|
|||
|
||||
// this update should overwrite one term, and introduce one new one
|
||||
doc = document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []byte("test fail"), testAnalyzer))
|
||||
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test fail"), testAnalyzer))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error deleting entry from index: %v", err)
|
||||
|
@ -204,7 +204,7 @@ func TestIndexInsertThenUpdate(t *testing.T) {
|
|||
|
||||
// now do another update that should remove one of term
|
||||
doc = document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("fail")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("fail")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error deleting entry from index: %v", err)
|
||||
|
@ -231,7 +231,7 @@ func TestIndexInsertMultiple(t *testing.T) {
|
|||
var expectedCount uint64 = 0
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -239,7 +239,7 @@ func TestIndexInsertMultiple(t *testing.T) {
|
|||
expectedCount++
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -264,7 +264,7 @@ func TestIndexInsertMultiple(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc = document.NewDocument("3")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -298,7 +298,7 @@ func TestIndexInsertWithStore(t *testing.T) {
|
|||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -400,7 +400,7 @@ func TestIndexBatch(t *testing.T) {
|
|||
|
||||
// first create 2 docs the old fashioned way
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextField("name", []byte("test")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -408,7 +408,7 @@ func TestIndexBatch(t *testing.T) {
|
|||
expectedCount += 1
|
||||
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []byte("test2")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -423,10 +423,10 @@ func TestIndexBatch(t *testing.T) {
|
|||
|
||||
batch := make(index.Batch, 0)
|
||||
doc = document.NewDocument("3")
|
||||
doc.AddField(document.NewTextField("name", []byte("test3")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
|
||||
batch["3"] = doc
|
||||
doc = document.NewDocument("2")
|
||||
doc.AddField(document.NewTextField("name", []byte("test2updated")))
|
||||
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
|
||||
batch["2"] = doc
|
||||
batch["1"] = nil
|
||||
|
||||
|
@ -480,9 +480,9 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
|
|||
}
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -553,8 +553,8 @@ func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
|
|||
|
||||
// now update the document, but omit one of the fields
|
||||
doc = document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("testup"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", 36.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testup"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 36.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -621,9 +621,9 @@ func TestIndexInsertFields(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -656,8 +656,8 @@ func TestIndexUpdateComposites(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.INDEX_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
|
@ -679,8 +679,8 @@ func TestIndexUpdateComposites(t *testing.T) {
|
|||
|
||||
// now lets update it
|
||||
doc = document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("testupdated"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []byte("misterupdated"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testupdated"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("misterupdated"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.INDEX_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
|
@ -725,8 +725,8 @@ func TestIndexFieldsMisc(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
@ -762,8 +762,8 @@ func TestIndexTermReaderCompositeFields(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, document.INDEX_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
|
@ -802,8 +802,8 @@ func TestIndexDocumentFieldTerms(t *testing.T) {
|
|||
defer idx.Close()
|
||||
|
||||
doc := document.NewDocument("1")
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []byte("test"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), document.INDEX_FIELD|document.STORE_FIELD|document.INCLUDE_TERM_VECTORS))
|
||||
err = idx.Update(doc)
|
||||
if err != nil {
|
||||
t.Errorf("Error updating index: %v", err)
|
||||
|
|
|
@ -56,6 +56,7 @@ var people = []*Person{
|
|||
Age: 19,
|
||||
Birthday: time.Unix(1000000000, 0),
|
||||
Title: "mista",
|
||||
Tags: []string{"gopher", "belieber"},
|
||||
},
|
||||
&Person{
|
||||
Identifier: "b",
|
||||
|
@ -222,4 +223,63 @@ func TestIndex(t *testing.T) {
|
|||
t.Errorf("expected next hit id 'c', got '%s'", searchResult.Hits[1].ID)
|
||||
}
|
||||
}
|
||||
|
||||
// test behavior of arrays
|
||||
// make sure we can successfully find by all elements in array
|
||||
termQuery = NewTermQuery("gopher").SetField("tags")
|
||||
searchRequest = NewSearchRequest(termQuery)
|
||||
searchResult, err = index.Search(searchRequest)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
if searchResult.Total != uint64(1) {
|
||||
t.Errorf("expected 1 total hit for term query, got %d", searchResult.Total)
|
||||
} else {
|
||||
if searchResult.Hits[0].ID != "a" {
|
||||
t.Errorf("expected top hit id 'a', got '%s'", searchResult.Hits[0].ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
termQuery = NewTermQuery("belieber").SetField("tags")
|
||||
searchRequest = NewSearchRequest(termQuery)
|
||||
searchResult, err = index.Search(searchRequest)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
if searchResult.Total != uint64(1) {
|
||||
t.Errorf("expected 1 total hit for term query, got %d", searchResult.Total)
|
||||
} else {
|
||||
if searchResult.Hits[0].ID != "a" {
|
||||
t.Errorf("expected top hit id 'a', got '%s'", searchResult.Hits[0].ID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
termQuery = NewTermQuery("notintagsarray").SetField("tags")
|
||||
searchRequest = NewSearchRequest(termQuery)
|
||||
searchResult, err = index.Search(searchRequest)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if searchResult.Total != uint64(0) {
|
||||
t.Errorf("expected 0 total hits")
|
||||
}
|
||||
|
||||
// lookup document a
|
||||
// expect to find 2 values for field "tags"
|
||||
tagsCount := 0
|
||||
doc, err := index.Document("a")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
} else {
|
||||
for _, f := range doc.Fields {
|
||||
if f.Name() == "tags" {
|
||||
tagsCount++
|
||||
}
|
||||
}
|
||||
}
|
||||
if tagsCount != 2 {
|
||||
t.Errorf("expected to find 2 values for tags")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -223,7 +223,7 @@ func (im *IndexMapping) MapDocument(doc *document.Document, data interface{}) er
|
|||
docType := im.determineType(data)
|
||||
docMapping := im.MappingForType(docType)
|
||||
walkContext := newWalkContext(doc, docMapping)
|
||||
im.walkDocument(data, []string{}, walkContext)
|
||||
im.walkDocument(data, []string{}, []uint64{}, walkContext)
|
||||
|
||||
// see if the _all field was disabled
|
||||
allMapping := docMapping.DocumentMappingForPath("_all")
|
||||
|
@ -249,7 +249,7 @@ func newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
|
|||
}
|
||||
}
|
||||
|
||||
func (im *IndexMapping) walkDocument(data interface{}, path []string, context *walkContext) {
|
||||
func (im *IndexMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
|
||||
val := reflect.ValueOf(data)
|
||||
typ := val.Type()
|
||||
switch typ.Kind() {
|
||||
|
@ -259,7 +259,7 @@ func (im *IndexMapping) walkDocument(data interface{}, path []string, context *w
|
|||
for _, key := range val.MapKeys() {
|
||||
fieldName := key.String()
|
||||
fieldVal := val.MapIndex(key).Interface()
|
||||
im.processProperty(fieldVal, append(path, fieldName), context)
|
||||
im.processProperty(fieldVal, append(path, fieldName), indexes, context)
|
||||
}
|
||||
}
|
||||
case reflect.Struct:
|
||||
|
@ -276,25 +276,25 @@ func (im *IndexMapping) walkDocument(data interface{}, path []string, context *w
|
|||
|
||||
if val.Field(i).CanInterface() {
|
||||
fieldVal := val.Field(i).Interface()
|
||||
im.processProperty(fieldVal, append(path, fieldName), context)
|
||||
im.processProperty(fieldVal, append(path, fieldName), indexes, context)
|
||||
}
|
||||
}
|
||||
case reflect.Slice, reflect.Array:
|
||||
for i := 0; i < val.Len(); i++ {
|
||||
if val.Index(i).CanInterface() {
|
||||
fieldVal := val.Index(i).Interface()
|
||||
im.processProperty(fieldVal, path, context)
|
||||
im.processProperty(fieldVal, path, append(indexes, uint64(i)), context)
|
||||
}
|
||||
}
|
||||
case reflect.Ptr:
|
||||
ptrElem := val.Elem()
|
||||
if ptrElem.IsValid() && ptrElem.CanInterface() {
|
||||
im.walkDocument(ptrElem.Interface(), path, context)
|
||||
im.walkDocument(ptrElem.Interface(), path, indexes, context)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (im *IndexMapping) processProperty(property interface{}, path []string, context *walkContext) {
|
||||
func (im *IndexMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
|
||||
pathString := encodePath(path)
|
||||
// look to see if there is a mapping for this field
|
||||
subDocMapping := context.dm.DocumentMappingForPath(pathString)
|
||||
|
@ -316,7 +316,7 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
|||
options := fieldMapping.Options()
|
||||
if *fieldMapping.Type == "text" {
|
||||
analyzer := im.AnalyzerNamed(*fieldMapping.Analyzer)
|
||||
field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer)
|
||||
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
|
||||
context.doc.AddField(field)
|
||||
|
||||
if fieldMapping.IncludeInAll != nil && !*fieldMapping.IncludeInAll {
|
||||
|
@ -331,7 +331,7 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
|||
if dateTimeParser != nil {
|
||||
parsedDateTime, err := dateTimeParser.ParseDateTime(propertyValueString)
|
||||
if err != nil {
|
||||
field := document.NewDateTimeFieldWithIndexingOptions(fieldName, parsedDateTime, options)
|
||||
field := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, parsedDateTime, options)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
}
|
||||
|
@ -352,11 +352,11 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
|||
analyzerName = im.DefaultAnalyzer
|
||||
}
|
||||
analyzer := im.AnalyzerNamed(analyzerName)
|
||||
field := document.NewTextFieldCustom(pathString, []byte(propertyValueString), options, analyzer)
|
||||
field := document.NewTextFieldCustom(pathString, indexes, []byte(propertyValueString), options, analyzer)
|
||||
context.doc.AddField(field)
|
||||
} else {
|
||||
// index as datetime
|
||||
field := document.NewDateTimeField(pathString, parsedDateTime)
|
||||
field := document.NewDateTimeField(pathString, indexes, parsedDateTime)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
}
|
||||
|
@ -369,13 +369,13 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
|||
fieldName := getFieldName(pathString, path, fieldMapping)
|
||||
if *fieldMapping.Type == "number" {
|
||||
options := fieldMapping.Options()
|
||||
field := document.NewNumericFieldWithIndexingOptions(fieldName, propertyValFloat, options)
|
||||
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// automatic indexing behavior
|
||||
field := document.NewNumericField(pathString, propertyValFloat)
|
||||
field := document.NewNumericField(pathString, indexes, propertyValFloat)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
case reflect.Struct:
|
||||
|
@ -388,21 +388,21 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
|||
fieldName := getFieldName(pathString, path, fieldMapping)
|
||||
if *fieldMapping.Type == "datetime" {
|
||||
options := fieldMapping.Options()
|
||||
field := document.NewDateTimeFieldWithIndexingOptions(fieldName, property, options)
|
||||
field := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, property, options)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// automatic indexing behavior
|
||||
field := document.NewDateTimeField(pathString, property)
|
||||
field := document.NewDateTimeField(pathString, indexes, property)
|
||||
context.doc.AddField(field)
|
||||
}
|
||||
|
||||
default:
|
||||
im.walkDocument(property, path, context)
|
||||
im.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
default:
|
||||
im.walkDocument(property, path, context)
|
||||
im.walkDocument(property, path, indexes, context)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -41,29 +41,29 @@ var twoDocIndexDescIndexingOptions = document.DEFAULT_TEXT_INDEXING_OPTIONS | do
|
|||
var twoDocIndexDocs = []*document.Document{
|
||||
// must have 4/4 beer
|
||||
document.NewDocument("1").
|
||||
AddField(document.NewTextField("name", []byte("marty"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)),
|
||||
AddField(document.NewTextField("name", []uint64{}, []byte("marty"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)),
|
||||
// must have 1/4 beer
|
||||
document.NewDocument("2").
|
||||
AddField(document.NewTextField("name", []byte("steve"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("street", []byte("couchbase way"), testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
|
||||
AddField(document.NewTextField("name", []uint64{}, []byte("steve"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
|
||||
// must have 1/4 beer
|
||||
document.NewDocument("3").
|
||||
AddField(document.NewTextField("name", []byte("dustin"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
|
||||
AddField(document.NewTextField("name", []uint64{}, []byte("dustin"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
|
||||
// must have 65/65 beer
|
||||
document.NewDocument("4").
|
||||
AddField(document.NewTextField("name", []byte("ravi"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
|
||||
AddField(document.NewTextField("name", []uint64{}, []byte("ravi"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
|
||||
// must have 0/x beer
|
||||
document.NewDocument("5").
|
||||
AddField(document.NewTextField("name", []byte("bobert"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []byte("mister"), testAnalyzer)),
|
||||
AddField(document.NewTextField("name", []uint64{}, []byte("bobert"))).
|
||||
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
|
||||
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
|
||||
}
|
||||
|
||||
func scoresCloseEnough(a, b float64) bool {
|
||||
|
|
|
@ -43,7 +43,7 @@ func TestSimpleHighlighter(t *testing.T) {
|
|||
}
|
||||
|
||||
expectedFragment := DEFAULT_SEPARATOR + "the " + DEFAULT_ANSI_HIGHLIGHT + "quick" + reset + " brown " + DEFAULT_ANSI_HIGHLIGHT + "fox" + reset + " jumps over the lazy dog" + DEFAULT_SEPARATOR
|
||||
doc := document.NewDocument("a").AddField(document.NewTextField("desc", []byte("the quick brown fox jumps over the lazy dog")))
|
||||
doc := document.NewDocument("a").AddField(document.NewTextField("desc", []uint64{}, []byte("the quick brown fox jumps over the lazy dog")))
|
||||
|
||||
fragment := highlighter.BestFragmentInField(&docMatch, doc, "desc")
|
||||
if fragment != expectedFragment {
|
||||
|
@ -63,7 +63,7 @@ Fusce viverra eleifend iaculis. Maecenas tempor dictum cursus. Mauris faucibus,
|
|||
|
||||
Etiam vel augue vel nisl commodo suscipit et ac nisl. Quisque eros diam, porttitor et aliquet sed, vulputate in odio. Aenean feugiat est quis neque vehicula, eget vulputate nunc tempor. Donec quis nulla ut quam feugiat consectetur ut et justo. Nulla congue, metus auctor facilisis scelerisque, nunc risus vulputate urna, in blandit urna nibh et neque. Etiam quis tortor ut nulla dignissim dictum non sed ligula. Vivamus accumsan ligula eget ipsum ultrices, a tincidunt urna blandit. In hac habitasse platea dictumst.`)
|
||||
|
||||
doc := document.NewDocument("a").AddField(document.NewTextField("full", fieldBytes))
|
||||
doc := document.NewDocument("a").AddField(document.NewTextField("full", []uint64{}, fieldBytes))
|
||||
docMatch := DocumentMatch{
|
||||
ID: "a",
|
||||
Score: 1.0,
|
||||
|
|
|
@ -29,61 +29,61 @@ func TestTermSearcher(t *testing.T) {
|
|||
i.Update(&document.Document{
|
||||
ID: "a",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "b",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "c",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "d",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "e",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "f",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "g",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "h",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "i",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("desc", []byte("beer")),
|
||||
document.NewTextField("desc", []uint64{}, []byte("beer")),
|
||||
},
|
||||
})
|
||||
i.Update(&document.Document{
|
||||
ID: "j",
|
||||
Fields: []document.Field{
|
||||
document.NewTextField("title", []byte("cat")),
|
||||
document.NewTextField("title", []uint64{}, []byte("cat")),
|
||||
},
|
||||
})
|
||||
|
||||
|
|
Binary file not shown.
Loading…
Reference in New Issue