Merge branch 'WIP-perf-20160106' of https://github.com/steveyen/bleve into steveyen-WIP-perf-20160106
This commit is contained in:
commit
48fcd5a7d5
@ -44,10 +44,16 @@ type Snapshot struct {
|
|||||||
// returns which doc number is valid
|
// returns which doc number is valid
|
||||||
// if none, then 0
|
// if none, then 0
|
||||||
func (s *Snapshot) Which(docID []byte, docNumList DocNumberList) uint64 {
|
func (s *Snapshot) Which(docID []byte, docNumList DocNumberList) uint64 {
|
||||||
sort.Sort(docNumList)
|
inFlightVal := s.inFlight.Get(&InFlightItem{docID: docID})
|
||||||
highestValidDocNum := docNumList.HighestValid(s.maxRead)
|
|
||||||
if highestValidDocNum > 0 && s.Valid(docID, highestValidDocNum) {
|
sort.Sort(docNumList) // Descending ordering.
|
||||||
return highestValidDocNum
|
|
||||||
|
for _, docNum := range docNumList {
|
||||||
|
if docNum > 0 && docNum <= s.maxRead &&
|
||||||
|
(inFlightVal == nil || inFlightVal.(*InFlightItem).docNum == docNum) &&
|
||||||
|
!s.deletedDocNumbers.Test(uint(docNum)) {
|
||||||
|
return docNum
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
@ -146,7 +146,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
|
|||||||
aw := index.NewAnalysisWork(f, doc, resultChan)
|
aw := index.NewAnalysisWork(f, doc, resultChan)
|
||||||
|
|
||||||
// put the work on the queue
|
// put the work on the queue
|
||||||
go f.analysisQueue.Queue(aw)
|
f.analysisQueue.Queue(aw)
|
||||||
|
|
||||||
// wait for the result
|
// wait for the result
|
||||||
result := <-resultChan
|
result := <-resultChan
|
||||||
|
@ -135,9 +135,10 @@ func (gc *GarbageCollector) cleanup() {
|
|||||||
termFreqStart := TermFreqIteratorStart(0, []byte{ByteSeparator})
|
termFreqStart := TermFreqIteratorStart(0, []byte{ByteSeparator})
|
||||||
termFreqEnd := TermFreqIteratorStart(math.MaxUint16, []byte{ByteSeparator})
|
termFreqEnd := TermFreqIteratorStart(math.MaxUint16, []byte{ByteSeparator})
|
||||||
|
|
||||||
|
var tfr TermFreqRow
|
||||||
dictionaryDeltas := make(map[string]int64)
|
dictionaryDeltas := make(map[string]int64)
|
||||||
err = visitRange(reader, termFreqStart, termFreqEnd, func(key, val []byte) (bool, error) {
|
err = visitRange(reader, termFreqStart, termFreqEnd, func(key, val []byte) (bool, error) {
|
||||||
tfr, err := NewTermFreqRowKV(key, val)
|
err := tfr.ParseKey(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
@ -158,8 +159,9 @@ func (gc *GarbageCollector) cleanup() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// walk all the stored rows
|
// walk all the stored rows
|
||||||
|
var sr StoredRow
|
||||||
err = visitPrefix(reader, StoredKeyPrefix, func(key, val []byte) (bool, error) {
|
err = visitPrefix(reader, StoredKeyPrefix, func(key, val []byte) (bool, error) {
|
||||||
sr, err := NewStoredRowKV(key, val)
|
err := sr.ParseKey(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
@ -91,10 +91,11 @@ func (l *Lookuper) lookup(item *InFlightItem) {
|
|||||||
|
|
||||||
prefix := TermFreqPrefixFieldTermDocId(0, nil, item.docID)
|
prefix := TermFreqPrefixFieldTermDocId(0, nil, item.docID)
|
||||||
logger.Printf("lookuper prefix - % x", prefix)
|
logger.Printf("lookuper prefix - % x", prefix)
|
||||||
|
var tfk TermFreqRow
|
||||||
docNums := make(DocNumberList, 0)
|
docNums := make(DocNumberList, 0)
|
||||||
err = visitPrefix(reader, prefix, func(key, val []byte) (bool, error) {
|
err = visitPrefix(reader, prefix, func(key, val []byte) (bool, error) {
|
||||||
logger.Printf("lookuper sees key % x", key)
|
logger.Printf("lookuper sees key % x", key)
|
||||||
tfk, err := NewTermFreqRowKV(key, val)
|
err := tfk.ParseKey(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
@ -41,46 +41,52 @@ func NewStoredRow(docID []byte, docNum uint64, field uint16, arrayPositions []ui
|
|||||||
|
|
||||||
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
|
||||||
rv := StoredRow{}
|
rv := StoredRow{}
|
||||||
|
err := rv.ParseKey(key)
|
||||||
buf := bytes.NewBuffer(key)
|
|
||||||
_, err := buf.ReadByte() // type
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
rv.docID, err = buf.ReadBytes(ByteSeparator)
|
|
||||||
if len(rv.docID) < 2 { // 1 for min doc id length, 1 for separator
|
|
||||||
err = fmt.Errorf("invalid doc length 0")
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
rv.docID = rv.docID[:len(rv.docID)-1] // trim off separator byte
|
|
||||||
|
|
||||||
rv.docNum, err = binary.ReadUvarint(buf)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
err = binary.Read(buf, binary.LittleEndian, &rv.field)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
rv.arrayPositions = make([]uint64, 0)
|
|
||||||
nextArrayPos, err := binary.ReadUvarint(buf)
|
|
||||||
for err == nil {
|
|
||||||
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
|
|
||||||
nextArrayPos, err = binary.ReadUvarint(buf)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = rv.value.Unmarshal(value)
|
err = rv.value.Unmarshal(value)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
return &rv, nil
|
return &rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (sr *StoredRow) ParseKey(key []byte) error {
|
||||||
|
buf := bytes.NewBuffer(key)
|
||||||
|
_, err := buf.ReadByte() // type
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sr.docID, err = buf.ReadBytes(ByteSeparator)
|
||||||
|
if len(sr.docID) < 2 { // 1 for min doc id length, 1 for separator
|
||||||
|
err = fmt.Errorf("invalid doc length 0")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sr.docID = sr.docID[:len(sr.docID)-1] // trim off separator byte
|
||||||
|
|
||||||
|
sr.docNum, err = binary.ReadUvarint(buf)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = binary.Read(buf, binary.LittleEndian, &sr.field)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sr.arrayPositions = make([]uint64, 0)
|
||||||
|
nextArrayPos, err := binary.ReadUvarint(buf)
|
||||||
|
for err == nil {
|
||||||
|
sr.arrayPositions = append(sr.arrayPositions, nextArrayPos)
|
||||||
|
nextArrayPos, err = binary.ReadUvarint(buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func (sr *StoredRow) KeySize() int {
|
func (sr *StoredRow) KeySize() int {
|
||||||
return 1 + len(sr.docID) + 1 + binary.MaxVarintLen64 + 2 + (binary.MaxVarintLen64 * len(sr.arrayPositions))
|
return 1 + len(sr.docID) + 1 + binary.MaxVarintLen64 + 2 + (binary.MaxVarintLen64 * len(sr.arrayPositions))
|
||||||
}
|
}
|
||||||
|
@ -62,32 +62,39 @@ func InitTermFreqRow(tfr *TermFreqRow, field uint16, term []byte, docID []byte,
|
|||||||
|
|
||||||
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {
|
func NewTermFreqRowKV(key, value []byte) (*TermFreqRow, error) {
|
||||||
rv := TermFreqRow{}
|
rv := TermFreqRow{}
|
||||||
|
err := rv.ParseKey(key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
err = rv.value.Unmarshal(value)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &rv, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (tfr *TermFreqRow) ParseKey(key []byte) error {
|
||||||
keyLen := len(key)
|
keyLen := len(key)
|
||||||
if keyLen < 3 {
|
if keyLen < 3 {
|
||||||
return nil, fmt.Errorf("invalid term frequency key, no valid field")
|
return fmt.Errorf("invalid term frequency key, no valid field")
|
||||||
}
|
}
|
||||||
rv.field = binary.LittleEndian.Uint16(key[1:3])
|
tfr.field = binary.LittleEndian.Uint16(key[1:3])
|
||||||
|
|
||||||
termStartPos := 3
|
termStartPos := 3
|
||||||
termEndPos := bytes.IndexByte(key[termStartPos:], ByteSeparator)
|
termEndPos := bytes.IndexByte(key[termStartPos:], ByteSeparator)
|
||||||
if termEndPos < 0 {
|
if termEndPos < 0 {
|
||||||
return nil, fmt.Errorf("invalid term frequency key, no byte separator terminating term")
|
return fmt.Errorf("invalid term frequency key, no byte separator terminating term")
|
||||||
}
|
}
|
||||||
rv.term = key[termStartPos : termStartPos+termEndPos]
|
tfr.term = key[termStartPos : termStartPos+termEndPos]
|
||||||
|
|
||||||
docStartPos := termStartPos + termEndPos + 1
|
docStartPos := termStartPos + termEndPos + 1
|
||||||
docEndPos := bytes.IndexByte(key[docStartPos:], ByteSeparator)
|
docEndPos := bytes.IndexByte(key[docStartPos:], ByteSeparator)
|
||||||
rv.docID = key[docStartPos : docStartPos+docEndPos]
|
tfr.docID = key[docStartPos : docStartPos+docEndPos]
|
||||||
|
|
||||||
docNumPos := docStartPos + docEndPos + 1
|
docNumPos := docStartPos + docEndPos + 1
|
||||||
rv.docNum, _ = binary.Uvarint(key[docNumPos:])
|
tfr.docNum, _ = binary.Uvarint(key[docNumPos:])
|
||||||
|
|
||||||
err := rv.value.Unmarshal(value)
|
return nil
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
return &rv, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tfr *TermFreqRow) KeySize() int {
|
func (tfr *TermFreqRow) KeySize() int {
|
||||||
|
@ -67,10 +67,11 @@ func (f *Firestorm) warmup(reader store.KVReader) error {
|
|||||||
|
|
||||||
tfkPrefix := TermFreqIteratorStart(idField, nil)
|
tfkPrefix := TermFreqIteratorStart(idField, nil)
|
||||||
|
|
||||||
|
var tfk TermFreqRow
|
||||||
var lastDocId []byte
|
var lastDocId []byte
|
||||||
lastDocNumbers := make(DocNumberList, 1)
|
lastDocNumbers := make(DocNumberList, 1)
|
||||||
err = visitPrefix(reader, tfkPrefix, func(key, val []byte) (bool, error) {
|
err = visitPrefix(reader, tfkPrefix, func(key, val []byte) (bool, error) {
|
||||||
tfk, err := NewTermFreqRowKV(key, val)
|
err := tfk.ParseKey(key)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
@ -21,8 +21,9 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
|
|||||||
Rows: make([]index.IndexRow, 0, 100),
|
Rows: make([]index.IndexRow, 0, 100),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
docIDBytes := []byte(d.ID)
|
||||||
|
|
||||||
// track our back index entries
|
// track our back index entries
|
||||||
backIndexTermEntries := make([]*BackIndexTermEntry, 0)
|
|
||||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
||||||
|
|
||||||
// information we collate as we merge fields with same name
|
// information we collate as we merge fields with same name
|
||||||
@ -31,11 +32,7 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
|
|||||||
fieldIncludeTermVectors := make(map[uint16]bool)
|
fieldIncludeTermVectors := make(map[uint16]bool)
|
||||||
fieldNames := make(map[uint16]string)
|
fieldNames := make(map[uint16]string)
|
||||||
|
|
||||||
// walk all the fields, record stored fields now
|
analyzeField := func(field document.Field, storable bool) {
|
||||||
// place information about indexed fields into map
|
|
||||||
// this collates information across fields with
|
|
||||||
// same names (arrays)
|
|
||||||
for _, field := range d.Fields {
|
|
||||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
|
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
|
||||||
if newFieldRow != nil {
|
if newFieldRow != nil {
|
||||||
rv.Rows = append(rv.Rows, newFieldRow)
|
rv.Rows = append(rv.Rows, newFieldRow)
|
||||||
@ -55,48 +52,51 @@ func (udc *UpsideDownCouch) Analyze(d *document.Document) *index.AnalysisResult
|
|||||||
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
|
||||||
}
|
}
|
||||||
|
|
||||||
if field.Options().IsStored() {
|
if storable && field.Options().IsStored() {
|
||||||
storeRows, indexBackIndexStoreEntries := udc.storeField(d.ID, field, fieldIndex)
|
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
|
||||||
rv.Rows = append(rv.Rows, storeRows...)
|
}
|
||||||
backIndexStoredEntries = append(backIndexStoredEntries, indexBackIndexStoreEntries...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// walk all the fields, record stored fields now
|
||||||
|
// place information about indexed fields into map
|
||||||
|
// this collates information across fields with
|
||||||
|
// same names (arrays)
|
||||||
|
for _, field := range d.Fields {
|
||||||
|
analyzeField(field, true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||||
|
// see if any of the composite fields need this
|
||||||
|
for _, compositeField := range d.CompositeFields {
|
||||||
|
compositeField.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, compositeField := range d.CompositeFields {
|
||||||
|
analyzeField(compositeField, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
rowsCapNeeded := len(rv.Rows) + 1
|
||||||
|
for _, tokenFreqs := range fieldTermFreqs {
|
||||||
|
rowsCapNeeded += len(tokenFreqs)
|
||||||
|
}
|
||||||
|
|
||||||
|
rv.Rows = append(make([]index.IndexRow, 0, rowsCapNeeded), rv.Rows...)
|
||||||
|
|
||||||
|
backIndexTermEntries := make([]*BackIndexTermEntry, 0, rowsCapNeeded)
|
||||||
|
|
||||||
// walk through the collated information and proccess
|
// walk through the collated information and proccess
|
||||||
// once for each indexed field (unique name)
|
// once for each indexed field (unique name)
|
||||||
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
for fieldIndex, tokenFreqs := range fieldTermFreqs {
|
||||||
fieldLength := fieldLengths[fieldIndex]
|
fieldLength := fieldLengths[fieldIndex]
|
||||||
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
|
||||||
|
|
||||||
// see if any of the composite fields need this
|
|
||||||
for _, compositeField := range d.CompositeFields {
|
|
||||||
compositeField.Compose(fieldNames[fieldIndex], fieldLength, tokenFreqs)
|
|
||||||
}
|
|
||||||
|
|
||||||
// encode this field
|
// encode this field
|
||||||
indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, includeTermVectors, fieldIndex, fieldLength, tokenFreqs)
|
rv.Rows, backIndexTermEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermEntries)
|
||||||
rv.Rows = append(rv.Rows, indexRows...)
|
|
||||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// now index the composite fields
|
|
||||||
for _, compositeField := range d.CompositeFields {
|
|
||||||
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(compositeField.Name())
|
|
||||||
if newFieldRow != nil {
|
|
||||||
rv.Rows = append(rv.Rows, newFieldRow)
|
|
||||||
}
|
|
||||||
if compositeField.Options().IsIndexed() {
|
|
||||||
fieldLength, tokenFreqs := compositeField.Analyze()
|
|
||||||
// encode this field
|
|
||||||
indexRows, indexBackIndexTermEntries := udc.indexField(d.ID, compositeField.Options().IncludeTermVectors(), fieldIndex, fieldLength, tokenFreqs)
|
|
||||||
rv.Rows = append(rv.Rows, indexRows...)
|
|
||||||
backIndexTermEntries = append(backIndexTermEntries, indexBackIndexTermEntries...)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// build the back index row
|
// build the back index row
|
||||||
backIndexRow := NewBackIndexRow(d.ID, backIndexTermEntries, backIndexStoredEntries)
|
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermEntries, backIndexStoredEntries)
|
||||||
rv.Rows = append(rv.Rows, backIndexRow)
|
rv.Rows = append(rv.Rows, backIndexRow)
|
||||||
|
|
||||||
return rv
|
return rv
|
||||||
|
@ -131,6 +131,8 @@ func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
|
|||||||
|
|
||||||
// DumpDoc returns all rows in the index related to this doc id
|
// DumpDoc returns all rows in the index related to this doc id
|
||||||
func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} {
|
func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} {
|
||||||
|
idBytes := []byte(id)
|
||||||
|
|
||||||
rv := make(chan interface{})
|
rv := make(chan interface{})
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
@ -162,14 +164,14 @@ func (udc *UpsideDownCouch) DumpDoc(id string) chan interface{} {
|
|||||||
// build sorted list of term keys
|
// build sorted list of term keys
|
||||||
keys := make(keyset, 0)
|
keys := make(keyset, 0)
|
||||||
for _, entry := range back.termEntries {
|
for _, entry := range back.termEntries {
|
||||||
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), id, 0, 0)
|
tfr := NewTermFrequencyRow([]byte(*entry.Term), uint16(*entry.Field), idBytes, 0, 0)
|
||||||
key := tfr.Key()
|
key := tfr.Key()
|
||||||
keys = append(keys, key)
|
keys = append(keys, key)
|
||||||
}
|
}
|
||||||
sort.Sort(keys)
|
sort.Sort(keys)
|
||||||
|
|
||||||
// first add all the stored rows
|
// first add all the stored rows
|
||||||
storedRowPrefix := NewStoredRow(id, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
|
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
|
||||||
udc.dumpPrefix(kvreader, rv, storedRowPrefix)
|
udc.dumpPrefix(kvreader, rv, storedRowPrefix)
|
||||||
|
|
||||||
// now walk term keys in order and add them as well
|
// now walk term keys in order and add them as well
|
||||||
|
@ -60,7 +60,7 @@ func (i *IndexReader) Document(id string) (doc *document.Document, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
doc = document.NewDocument(id)
|
doc = document.NewDocument(id)
|
||||||
storedRow := NewStoredRow(id, 0, []uint64{}, 'x', nil)
|
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil)
|
||||||
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
|
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
|
||||||
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
|
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
|
||||||
defer func() {
|
defer func() {
|
||||||
|
@ -41,7 +41,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi
|
|||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tfr := NewTermFrequencyRow(term, field, "", 0, 0)
|
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
|
||||||
it := indexReader.kvreader.PrefixIterator(tfr.Key())
|
it := indexReader.kvreader.PrefixIterator(tfr.Key())
|
||||||
|
|
||||||
return &UpsideDownCouchTermFieldReader{
|
return &UpsideDownCouchTermFieldReader{
|
||||||
@ -80,7 +80,7 @@ func (r *UpsideDownCouchTermFieldReader) Next() (*index.TermFieldDoc, error) {
|
|||||||
|
|
||||||
func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) {
|
func (r *UpsideDownCouchTermFieldReader) Advance(docID string) (*index.TermFieldDoc, error) {
|
||||||
if r.iterator != nil {
|
if r.iterator != nil {
|
||||||
tfr := NewTermFrequencyRow(r.term, r.field, docID, 0, 0)
|
tfr := NewTermFrequencyRow(r.term, r.field, []byte(docID), 0, 0)
|
||||||
r.iterator.Seek(tfr.Key())
|
r.iterator.Seek(tfr.Key())
|
||||||
key, val, valid := r.iterator.Current()
|
key, val, valid := r.iterator.Current()
|
||||||
if valid {
|
if valid {
|
||||||
@ -114,14 +114,16 @@ type UpsideDownCouchDocIDReader struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) {
|
func newUpsideDownCouchDocIDReader(indexReader *IndexReader, start, end string) (*UpsideDownCouchDocIDReader, error) {
|
||||||
|
startBytes := []byte(start)
|
||||||
if start == "" {
|
if start == "" {
|
||||||
start = string([]byte{0x0})
|
startBytes = []byte{0x0}
|
||||||
}
|
}
|
||||||
|
endBytes := []byte(end)
|
||||||
if end == "" {
|
if end == "" {
|
||||||
end = string([]byte{0xff})
|
endBytes = []byte{0xff}
|
||||||
}
|
}
|
||||||
bisr := NewBackIndexRow(start, nil, nil)
|
bisr := NewBackIndexRow(startBytes, nil, nil)
|
||||||
bier := NewBackIndexRow(end, nil, nil)
|
bier := NewBackIndexRow(endBytes, nil, nil)
|
||||||
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
|
||||||
|
|
||||||
return &UpsideDownCouchDocIDReader{
|
return &UpsideDownCouchDocIDReader{
|
||||||
@ -145,7 +147,7 @@ func (r *UpsideDownCouchDocIDReader) Next() (string, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) {
|
func (r *UpsideDownCouchDocIDReader) Advance(docID string) (string, error) {
|
||||||
bir := NewBackIndexRow(docID, nil, nil)
|
bir := NewBackIndexRow([]byte(docID), nil, nil)
|
||||||
r.iterator.Seek(bir.Key())
|
r.iterator.Seek(bir.Key())
|
||||||
key, val, valid := r.iterator.Current()
|
key, val, valid := r.iterator.Current()
|
||||||
if valid {
|
if valid {
|
||||||
|
@ -459,21 +459,21 @@ func (tfr *TermFrequencyRow) String() string {
|
|||||||
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
|
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTermFrequencyRow(term []byte, field uint16, doc string, freq uint64, norm float32) *TermFrequencyRow {
|
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
|
||||||
return &TermFrequencyRow{
|
return &TermFrequencyRow{
|
||||||
term: term,
|
term: term,
|
||||||
field: field,
|
field: field,
|
||||||
doc: []byte(doc),
|
doc: docID,
|
||||||
freq: freq,
|
freq: freq,
|
||||||
norm: norm,
|
norm: norm,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, doc string, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
|
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
|
||||||
return &TermFrequencyRow{
|
return &TermFrequencyRow{
|
||||||
term: term,
|
term: term,
|
||||||
field: field,
|
field: field,
|
||||||
doc: []byte(doc),
|
doc: docID,
|
||||||
freq: freq,
|
freq: freq,
|
||||||
norm: norm,
|
norm: norm,
|
||||||
vectors: vectors,
|
vectors: vectors,
|
||||||
@ -605,7 +605,7 @@ func (br *BackIndexRow) AllTermKeys() [][]byte {
|
|||||||
}
|
}
|
||||||
rv := make([][]byte, len(br.termEntries))
|
rv := make([][]byte, len(br.termEntries))
|
||||||
for i, termEntry := range br.termEntries {
|
for i, termEntry := range br.termEntries {
|
||||||
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), string(br.doc), 0, 0)
|
termRow := NewTermFrequencyRow([]byte(termEntry.GetTerm()), uint16(termEntry.GetField()), br.doc, 0, 0)
|
||||||
rv[i] = termRow.Key()
|
rv[i] = termRow.Key()
|
||||||
}
|
}
|
||||||
return rv
|
return rv
|
||||||
@ -617,7 +617,7 @@ func (br *BackIndexRow) AllStoredKeys() [][]byte {
|
|||||||
}
|
}
|
||||||
rv := make([][]byte, len(br.storedEntries))
|
rv := make([][]byte, len(br.storedEntries))
|
||||||
for i, storedEntry := range br.storedEntries {
|
for i, storedEntry := range br.storedEntries {
|
||||||
storedRow := NewStoredRow(string(br.doc), uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
|
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
|
||||||
rv[i] = storedRow.Key()
|
rv[i] = storedRow.Key()
|
||||||
}
|
}
|
||||||
return rv
|
return rv
|
||||||
@ -665,9 +665,9 @@ func (br *BackIndexRow) String() string {
|
|||||||
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries)
|
return fmt.Sprintf("Backindex DocId: `%s` Term Entries: %v, Stored Entries: %v", string(br.doc), br.termEntries, br.storedEntries)
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewBackIndexRow(doc string, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
|
func NewBackIndexRow(docID []byte, entries []*BackIndexTermEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
|
||||||
return &BackIndexRow{
|
return &BackIndexRow{
|
||||||
doc: []byte(doc),
|
doc: docID,
|
||||||
termEntries: entries,
|
termEntries: entries,
|
||||||
storedEntries: storedFields,
|
storedEntries: storedFields,
|
||||||
}
|
}
|
||||||
@ -766,9 +766,9 @@ func (s *StoredRow) ScanPrefixForDoc() []byte {
|
|||||||
return buf
|
return buf
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewStoredRow(doc string, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
|
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
|
||||||
return &StoredRow{
|
return &StoredRow{
|
||||||
doc: []byte(doc),
|
doc: docID,
|
||||||
field: field,
|
field: field,
|
||||||
arrayPositions: arrayPositions,
|
arrayPositions: arrayPositions,
|
||||||
typ: typ,
|
typ: typ,
|
||||||
|
@ -49,54 +49,54 @@ func TestRows(t *testing.T) {
|
|||||||
[]byte{27},
|
[]byte{27},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "catz", 3, 3.14),
|
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("catz"), 3, 3.14),
|
||||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'c', 'a', 't', 'z'},
|
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'c', 'a', 't', 'z'},
|
||||||
[]byte{3, 195, 235, 163, 130, 4},
|
[]byte{3, 195, 235, 163, 130, 4},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14),
|
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14),
|
||||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{3, 195, 235, 163, 130, 4},
|
[]byte{3, 195, 235, 163, 130, 4},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}),
|
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14, []*TermVector{&TermVector{field: 0, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}),
|
||||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0},
|
[]byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0},
|
||||||
},
|
},
|
||||||
// test larger varints
|
// test larger varints
|
||||||
{
|
{
|
||||||
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2198, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}),
|
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11}, &TermVector{field: 0, pos: 2198, start: 23, end: 31}, &TermVector{field: 0, pos: 3, start: 43, end: 51}}),
|
||||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 0, 0, 150, 17, 23, 31, 0, 0, 3, 43, 51, 0},
|
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 0, 0, 150, 17, 23, 31, 0, 0, 3, 43, 51, 0},
|
||||||
},
|
},
|
||||||
// test vectors with arrayPositions
|
// test vectors with arrayPositions
|
||||||
{
|
{
|
||||||
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, "budweiser", 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11, arrayPositions: []uint64{0}}, &TermVector{field: 0, pos: 2198, start: 23, end: 31, arrayPositions: []uint64{1, 2}}, &TermVector{field: 0, pos: 3, start: 43, end: 51, arrayPositions: []uint64{3, 4, 5}}}),
|
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{&TermVector{field: 255, pos: 1, start: 3, end: 11, arrayPositions: []uint64{0}}, &TermVector{field: 0, pos: 2198, start: 23, end: 31, arrayPositions: []uint64{1, 2}}, &TermVector{field: 0, pos: 3, start: 43, end: 51, arrayPositions: []uint64{3, 4, 5}}}),
|
||||||
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 1, 0, 0, 150, 17, 23, 31, 2, 1, 2, 0, 3, 43, 51, 3, 3, 4, 5},
|
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 1, 0, 0, 150, 17, 23, 31, 2, 1, 2, 0, 3, 43, 51, 3, 3, 4, 5},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil),
|
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}}, nil),
|
||||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0},
|
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil),
|
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, nil),
|
||||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1},
|
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewBackIndexRow("budweiser", []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{&BackIndexStoreEntry{Field: proto.Uint32(3)}, &BackIndexStoreEntry{Field: proto.Uint32(4)}, &BackIndexStoreEntry{Field: proto.Uint32(5)}}),
|
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermEntry{&BackIndexTermEntry{Term: proto.String("beer"), Field: proto.Uint32(0)}, &BackIndexTermEntry{Term: proto.String("beat"), Field: proto.Uint32(1)}}, []*BackIndexStoreEntry{&BackIndexStoreEntry{Field: proto.Uint32(3)}, &BackIndexStoreEntry{Field: proto.Uint32(4)}, &BackIndexStoreEntry{Field: proto.Uint32(5)}}),
|
||||||
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
|
||||||
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1, 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5},
|
[]byte{10, 8, 10, 4, 'b', 'e', 'e', 'r', 16, 0, 10, 8, 10, 4, 'b', 'e', 'a', 't', 16, 1, 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer")),
|
NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer")),
|
||||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0},
|
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0},
|
||||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
NewStoredRow("budweiser", 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")),
|
NewStoredRow([]byte("budweiser"), 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")),
|
||||||
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24},
|
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24},
|
||||||
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
|
||||||
},
|
},
|
||||||
@ -259,7 +259,7 @@ func BenchmarkTermFrequencyRowEncode(b *testing.B) {
|
|||||||
row := NewTermFrequencyRowWithTermVectors(
|
row := NewTermFrequencyRowWithTermVectors(
|
||||||
[]byte{'b', 'e', 'e', 'r'},
|
[]byte{'b', 'e', 'e', 'r'},
|
||||||
0,
|
0,
|
||||||
"budweiser",
|
[]byte("budweiser"),
|
||||||
3,
|
3,
|
||||||
3.14,
|
3.14,
|
||||||
[]*TermVector{
|
[]*TermVector{
|
||||||
@ -304,7 +304,7 @@ func BenchmarkTermFrequencyRowDecode(b *testing.B) {
|
|||||||
func BenchmarkBackIndexRowEncode(b *testing.B) {
|
func BenchmarkBackIndexRowEncode(b *testing.B) {
|
||||||
field := uint32(1)
|
field := uint32(1)
|
||||||
t1 := "term1"
|
t1 := "term1"
|
||||||
row := NewBackIndexRow("beername",
|
row := NewBackIndexRow([]byte("beername"),
|
||||||
[]*BackIndexTermEntry{
|
[]*BackIndexTermEntry{
|
||||||
&BackIndexTermEntry{
|
&BackIndexTermEntry{
|
||||||
Term: &t1,
|
Term: &t1,
|
||||||
@ -336,7 +336,7 @@ func BenchmarkBackIndexRowDecode(b *testing.B) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func BenchmarkStoredRowEncode(b *testing.B) {
|
func BenchmarkStoredRowEncode(b *testing.B) {
|
||||||
row := NewStoredRow("budweiser", 0, []uint64{}, byte('t'), []byte("an american beer"))
|
row := NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer"))
|
||||||
b.ResetTimer()
|
b.ResetTimer()
|
||||||
for i := 0; i < b.N; i++ {
|
for i := 0; i < b.N; i++ {
|
||||||
row.Key()
|
row.Key()
|
||||||
|
@ -29,7 +29,7 @@ import (
|
|||||||
const Name = "upside_down"
|
const Name = "upside_down"
|
||||||
|
|
||||||
// RowBufferSize should ideally this is sized to be the smallest
|
// RowBufferSize should ideally this is sized to be the smallest
|
||||||
// size that can cotain an index row key and its corresponding
|
// size that can contain an index row key and its corresponding
|
||||||
// value. It is not a limit, if need be a larger buffer is
|
// value. It is not a limit, if need be a larger buffer is
|
||||||
// allocated, but performance will be more optimal if *most*
|
// allocated, but performance will be more optimal if *most*
|
||||||
// rows fit this size.
|
// rows fit this size.
|
||||||
@ -344,6 +344,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||||||
analysisStart := time.Now()
|
analysisStart := time.Now()
|
||||||
resultChan := make(chan *index.AnalysisResult)
|
resultChan := make(chan *index.AnalysisResult)
|
||||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||||
|
|
||||||
// put the work on the queue
|
// put the work on the queue
|
||||||
udc.analysisQueue.Queue(aw)
|
udc.analysisQueue.Queue(aw)
|
||||||
|
|
||||||
@ -473,18 +474,14 @@ func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []in
|
|||||||
return addRows, updateRows, deleteRows
|
return addRows, updateRows, deleteRows
|
||||||
}
|
}
|
||||||
|
|
||||||
func (udc *UpsideDownCouch) storeField(docID string, field document.Field, fieldIndex uint16) ([]index.IndexRow, []*BackIndexStoreEntry) {
|
func (udc *UpsideDownCouch) storeField(docID []byte, field document.Field, fieldIndex uint16, rows []index.IndexRow, backIndexStoredEntries []*BackIndexStoreEntry) ([]index.IndexRow, []*BackIndexStoreEntry) {
|
||||||
rows := make([]index.IndexRow, 0, 100)
|
|
||||||
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
|
|
||||||
fieldType := encodeFieldType(field)
|
fieldType := encodeFieldType(field)
|
||||||
storedRow := NewStoredRow(docID, fieldIndex, field.ArrayPositions(), fieldType, field.Value())
|
storedRow := NewStoredRow(docID, fieldIndex, field.ArrayPositions(), fieldType, field.Value())
|
||||||
|
|
||||||
// record the back index entry
|
// record the back index entry
|
||||||
backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()}
|
backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()}
|
||||||
backIndexStoredEntries = append(backIndexStoredEntries, &backIndexStoredEntry)
|
|
||||||
|
|
||||||
rows = append(rows, storedRow)
|
return append(rows, storedRow), append(backIndexStoredEntries, &backIndexStoredEntry)
|
||||||
return rows, backIndexStoredEntries
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func encodeFieldType(f document.Field) byte {
|
func encodeFieldType(f document.Field) byte {
|
||||||
@ -502,17 +499,14 @@ func encodeFieldType(f document.Field) byte {
|
|||||||
return fieldType
|
return fieldType
|
||||||
}
|
}
|
||||||
|
|
||||||
func (udc *UpsideDownCouch) indexField(docID string, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies) ([]index.IndexRow, []*BackIndexTermEntry) {
|
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs analysis.TokenFrequencies, rows []index.IndexRow, backIndexTermEntries []*BackIndexTermEntry) ([]index.IndexRow, []*BackIndexTermEntry) {
|
||||||
|
|
||||||
rows := make([]index.IndexRow, 0, 100)
|
|
||||||
backIndexTermEntries := make([]*BackIndexTermEntry, 0, len(tokenFreqs))
|
|
||||||
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
|
||||||
|
|
||||||
for k, tf := range tokenFreqs {
|
for k, tf := range tokenFreqs {
|
||||||
var termFreqRow *TermFrequencyRow
|
var termFreqRow *TermFrequencyRow
|
||||||
if includeTermVectors {
|
if includeTermVectors {
|
||||||
tv, newFieldRows := udc.termVectorsFromTokenFreq(fieldIndex, tf)
|
var tv []*TermVector
|
||||||
rows = append(rows, newFieldRows...)
|
tv, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
|
||||||
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
|
termFreqRow = NewTermFrequencyRowWithTermVectors(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm, tv)
|
||||||
} else {
|
} else {
|
||||||
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
|
termFreqRow = NewTermFrequencyRow(tf.Term, fieldIndex, docID, uint64(frequencyFromTokenFreq(tf)), fieldNorm)
|
||||||
@ -592,13 +586,14 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
|
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
|
||||||
|
idBytes := []byte(id)
|
||||||
|
|
||||||
for _, backIndexEntry := range backIndexRow.termEntries {
|
for _, backIndexEntry := range backIndexRow.termEntries {
|
||||||
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), id, 0, 0)
|
tfr := NewTermFrequencyRow([]byte(*backIndexEntry.Term), uint16(*backIndexEntry.Field), idBytes, 0, 0)
|
||||||
deleteRows = append(deleteRows, tfr)
|
deleteRows = append(deleteRows, tfr)
|
||||||
}
|
}
|
||||||
for _, se := range backIndexRow.storedEntries {
|
for _, se := range backIndexRow.storedEntries {
|
||||||
sf := NewStoredRow(id, uint16(*se.Field), se.ArrayPositions, 'x', nil)
|
sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil)
|
||||||
deleteRows = append(deleteRows, sf)
|
deleteRows = append(deleteRows, sf)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -667,9 +662,8 @@ func frequencyFromTokenFreq(tf *analysis.TokenFreq) int {
|
|||||||
return tf.Frequency()
|
return tf.Frequency()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq) ([]*TermVector, []index.IndexRow) {
|
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.TokenFreq, rows []index.IndexRow) ([]*TermVector, []index.IndexRow) {
|
||||||
rv := make([]*TermVector, len(tf.Locations))
|
rv := make([]*TermVector, len(tf.Locations))
|
||||||
newFieldRows := make([]index.IndexRow, 0)
|
|
||||||
|
|
||||||
for i, l := range tf.Locations {
|
for i, l := range tf.Locations {
|
||||||
var newFieldRow *FieldRow
|
var newFieldRow *FieldRow
|
||||||
@ -678,7 +672,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
|
|||||||
// lookup correct field
|
// lookup correct field
|
||||||
fieldIndex, newFieldRow = udc.fieldIndexOrNewRow(l.Field)
|
fieldIndex, newFieldRow = udc.fieldIndexOrNewRow(l.Field)
|
||||||
if newFieldRow != nil {
|
if newFieldRow != nil {
|
||||||
newFieldRows = append(newFieldRows, newFieldRow)
|
rows = append(rows, newFieldRow)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tv := TermVector{
|
tv := TermVector{
|
||||||
@ -691,7 +685,7 @@ func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *analysis.
|
|||||||
rv[i] = &tv
|
rv[i] = &tv
|
||||||
}
|
}
|
||||||
|
|
||||||
return rv, newFieldRows
|
return rv, rows
|
||||||
}
|
}
|
||||||
|
|
||||||
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
|
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
|
||||||
|
Loading…
Reference in New Issue
Block a user