0
0

Merge pull request #530 from steveyen/master

optimizations around search / DocumentMatchPool
This commit is contained in:
Marty Schoch 2017-02-09 15:44:04 -05:00 committed by GitHub
commit 4da7756f67
3 changed files with 62 additions and 26 deletions

View File

@ -29,41 +29,49 @@ type UpsideDownCouchTermFieldReader struct {
iterator store.KVIterator iterator store.KVIterator
term []byte term []byte
tfrNext *TermFrequencyRow tfrNext *TermFrequencyRow
tfrPrealloc TermFrequencyRow
keyBuf []byte keyBuf []byte
field uint16 field uint16
includeTermVectors bool includeTermVectors bool
} }
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) { func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
dictionaryRow := NewDictionaryRow(term, field, 0) bufNeeded := termFrequencyRowKeySize(term, nil)
val, err := indexReader.kvreader.Get(dictionaryRow.Key()) if bufNeeded < dictionaryRowKeySize(term) {
bufNeeded = dictionaryRowKeySize(term)
}
buf := make([]byte, bufNeeded)
bufUsed := dictionaryRowKeyTo(buf, field, term)
val, err := indexReader.kvreader.Get(buf[:bufUsed])
if err != nil { if err != nil {
return nil, err return nil, err
} }
if val == nil { if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{ rv := &UpsideDownCouchTermFieldReader{
count: 0, count: 0,
term: term, term: term,
tfrNext: &TermFrequencyRow{},
field: field, field: field,
includeTermVectors: includeTermVectors, includeTermVectors: includeTermVectors,
}, nil }
rv.tfrNext = &rv.tfrPrealloc
return rv, nil
} }
err = dictionaryRow.parseDictionaryV(val) count, err := dictionaryRowParseV(val)
if err != nil { if err != nil {
return nil, err return nil, err
} }
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0) bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
it := indexReader.kvreader.PrefixIterator(tfr.Key()) it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{ return &UpsideDownCouchTermFieldReader{
indexReader: indexReader, indexReader: indexReader,
iterator: it, iterator: it,
count: dictionaryRow.count, count: count,
term: term, term: term,
field: field, field: field,
includeTermVectors: includeTermVectors, includeTermVectors: includeTermVectors,
@ -82,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
if r.tfrNext != nil { if r.tfrNext != nil {
r.iterator.Next() r.iterator.Next()
} else { } else {
r.tfrNext = &TermFrequencyRow{} r.tfrNext = &r.tfrPrealloc
} }
key, val, valid := r.iterator.Current() key, val, valid := r.iterator.Current()
if valid { if valid {

View File

@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte {
} }
func (dr *DictionaryRow) KeySize() int { func (dr *DictionaryRow) KeySize() int {
return len(dr.term) + 3 return dictionaryRowKeySize(dr.term)
}
func dictionaryRowKeySize(term []byte) int {
return len(term) + 3
} }
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) { func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
}
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
buf[0] = 'd' buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], dr.field) binary.LittleEndian.PutUint16(buf[1:3], field)
size := copy(buf[3:], dr.term) size := copy(buf[3:], term)
return size + 3, nil return size + 3
} }
func (dr *DictionaryRow) Value() []byte { func (dr *DictionaryRow) Value() []byte {
@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
} }
func (dr *DictionaryRow) parseDictionaryV(value []byte) error { func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, nread := binary.Uvarint(value) count, err := dictionaryRowParseV(value)
if nread <= 0 { if err != nil {
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) return err
} }
dr.count = count dr.count = count
return nil return nil
} }
func dictionaryRowParseV(value []byte) (uint64, error) {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
return count, nil
}
// TERM FIELD FREQUENCY // TERM FIELD FREQUENCY
type TermVector struct { type TermVector struct {
@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte {
} }
func (tfr *TermFrequencyRow) KeySize() int { func (tfr *TermFrequencyRow) KeySize() int {
return 3 + len(tfr.term) + 1 + len(tfr.doc) return termFrequencyRowKeySize(tfr.term, tfr.doc)
}
func termFrequencyRowKeySize(term, doc []byte) int {
return 3 + len(term) + 1 + len(doc)
} }
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) { func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
}
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
buf[0] = 't' buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field) binary.LittleEndian.PutUint16(buf[1:3], field)
termLen := copy(buf[3:], tfr.term) termLen := copy(buf[3:], term)
buf[3+termLen] = ByteSeparator buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], tfr.doc) docLen := copy(buf[3+termLen+1:], doc)
return 3 + termLen + 1 + docLen, nil return 3 + termLen + 1 + docLen
} }
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) { func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {

View File

@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
// pre-allocated to accommodate the requested number of DocumentMatch // pre-allocated to accommodate the requested number of DocumentMatch
// instances // instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool { func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, 0, size) avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances // pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size) startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available // make these initial instances available
for i := range startBlock { i, j := 0, 0
startBlock[i].Sort = make([]string, 0, sortsize) for i < size {
avail = append(avail, &startBlock[i]) avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
} }
return &DocumentMatchPool{ return &DocumentMatchPool{
avail: avail, avail: avail,