160 lines
4.4 KiB
Go
160 lines
4.4 KiB
Go
// Copyright (c) 2015 Couchbase, Inc.
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
// and limitations under the License.
|
|
|
|
package firestorm
|
|
|
|
import (
|
|
"bytes"
|
|
|
|
"github.com/blevesearch/bleve/index"
|
|
"github.com/blevesearch/bleve/index/store"
|
|
)
|
|
|
|
type firestormTermFieldReader struct {
|
|
r *firestormReader
|
|
field uint16
|
|
term []byte
|
|
prefix []byte
|
|
count uint64
|
|
i store.KVIterator
|
|
}
|
|
|
|
func newFirestormTermFieldReader(r *firestormReader, field uint16, term []byte) (index.TermFieldReader, error) {
|
|
dictionaryKey := DictionaryRowKey(field, term)
|
|
dictionaryValue, err := r.r.Get(dictionaryKey)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
prefix := TermFreqIteratorStart(field, term)
|
|
logger.Printf("starting term freq iterator at: '%s' - % x", prefix, prefix)
|
|
i := r.r.PrefixIterator(prefix)
|
|
rv := firestormTermFieldReader{
|
|
r: r,
|
|
field: field,
|
|
term: term,
|
|
prefix: prefix,
|
|
i: i,
|
|
}
|
|
|
|
// NOTE: in firestorm the dictionary row is advisory in nature
|
|
// it *may* tell us the correct out
|
|
// if this record does not exist, it DOES not mean that there isno
|
|
// usage, we must scan the term frequencies to be sure
|
|
if dictionaryValue != nil {
|
|
dictionaryRow, err := NewDictionaryRowKV(dictionaryKey, dictionaryValue)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
rv.count = dictionaryRow.Count()
|
|
}
|
|
|
|
return &rv, nil
|
|
}
|
|
|
|
func (r *firestormTermFieldReader) Next() (*index.TermFieldDoc, error) {
|
|
if r.i != nil {
|
|
key, val, valid := r.i.Current()
|
|
for valid {
|
|
logger.Printf("see key: '%s' - % x", key, key)
|
|
tfrsByDocNum := make(map[uint64]*TermFreqRow)
|
|
tfr, err := NewTermFreqRowKV(key, val)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
tfrsByDocNum[tfr.DocNum()] = tfr
|
|
|
|
// now we have a possible row, but there may be more rows for the same docid
|
|
// find these now
|
|
err = r.findNextTfrsWithSameDocId(tfrsByDocNum, tfr.DocID())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
docNumList := make(DocNumberList, 0, len(tfrsByDocNum))
|
|
for dn := range tfrsByDocNum {
|
|
docNumList = append(docNumList, dn)
|
|
}
|
|
|
|
logger.Printf("docNumList: %v", docNumList)
|
|
|
|
highestValidDocNum := r.r.s.Which(tfr.docID, docNumList)
|
|
if highestValidDocNum == 0 {
|
|
// no valid doc number
|
|
key, val, valid = r.i.Current()
|
|
continue
|
|
}
|
|
logger.Printf("highest valid: %d", highestValidDocNum)
|
|
|
|
tfr = tfrsByDocNum[highestValidDocNum]
|
|
|
|
return &index.TermFieldDoc{
|
|
ID: string(tfr.DocID()),
|
|
Freq: tfr.Freq(),
|
|
Norm: float64(tfr.Norm()),
|
|
Vectors: r.termFieldVectorsFromTermVectors(tfr.Vectors()),
|
|
}, nil
|
|
}
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func (r *firestormTermFieldReader) findNextTfrsWithSameDocId(tfrsByDocNum map[uint64]*TermFreqRow, docID []byte) error {
|
|
tfrDocIdPrefix := TermFreqPrefixFieldTermDocId(r.field, r.term, docID)
|
|
r.i.Next()
|
|
key, val, valid := r.i.Current()
|
|
for valid && bytes.HasPrefix(key, tfrDocIdPrefix) {
|
|
tfr, err := NewTermFreqRowKV(key, val)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
tfrsByDocNum[tfr.DocNum()] = tfr
|
|
r.i.Next()
|
|
key, val, valid = r.i.Current()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *firestormTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) {
|
|
if r.i != nil {
|
|
tfrDocIdPrefix := TermFreqPrefixFieldTermDocId(r.field, r.term, []byte(docID))
|
|
r.i.Seek(tfrDocIdPrefix)
|
|
return r.Next()
|
|
}
|
|
return nil, nil
|
|
}
|
|
|
|
func (r *firestormTermFieldReader) Count() uint64 {
|
|
return r.count
|
|
}
|
|
|
|
func (r *firestormTermFieldReader) Close() error {
|
|
if r.i != nil {
|
|
return r.i.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (r *firestormTermFieldReader) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
|
|
rv := make([]*index.TermFieldVector, len(in))
|
|
|
|
for i, tv := range in {
|
|
fieldName := r.r.f.fieldCache.FieldIndexed(uint16(tv.GetField()))
|
|
tfv := index.TermFieldVector{
|
|
Field: fieldName,
|
|
ArrayPositions: tv.GetArrayPositions(),
|
|
Pos: tv.GetPos(),
|
|
Start: tv.GetStart(),
|
|
End: tv.GetEnd(),
|
|
}
|
|
rv[i] = &tfv
|
|
}
|
|
return rv
|
|
}
|