0
0

Merge pull request #530 from steveyen/master

optimizations around search / DocumentMatchPool
This commit is contained in:
Marty Schoch 2017-02-09 15:44:04 -05:00 committed by GitHub
commit 4da7756f67
3 changed files with 62 additions and 26 deletions

View File

@ -29,41 +29,49 @@ type UpsideDownCouchTermFieldReader struct {
iterator store.KVIterator
term []byte
tfrNext *TermFrequencyRow
tfrPrealloc TermFrequencyRow
keyBuf []byte
field uint16
includeTermVectors bool
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
dictionaryRow := NewDictionaryRow(term, field, 0)
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
bufNeeded := termFrequencyRowKeySize(term, nil)
if bufNeeded < dictionaryRowKeySize(term) {
bufNeeded = dictionaryRowKeySize(term)
}
buf := make([]byte, bufNeeded)
bufUsed := dictionaryRowKeyTo(buf, field, term)
val, err := indexReader.kvreader.Get(buf[:bufUsed])
if err != nil {
return nil, err
}
if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
rv := &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
tfrNext: &TermFrequencyRow{},
field: field,
includeTermVectors: includeTermVectors,
}, nil
}
rv.tfrNext = &rv.tfrPrealloc
return rv, nil
}
err = dictionaryRow.parseDictionaryV(val)
count, err := dictionaryRowParseV(val)
if err != nil {
return nil, err
}
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
it := indexReader.kvreader.PrefixIterator(tfr.Key())
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
indexReader: indexReader,
iterator: it,
count: dictionaryRow.count,
count: count,
term: term,
field: field,
includeTermVectors: includeTermVectors,
@ -82,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
if r.tfrNext != nil {
r.iterator.Next()
} else {
r.tfrNext = &TermFrequencyRow{}
r.tfrNext = &r.tfrPrealloc
}
key, val, valid := r.iterator.Current()
if valid {

View File

@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte {
}
func (dr *DictionaryRow) KeySize() int {
return len(dr.term) + 3
return dictionaryRowKeySize(dr.term)
}
func dictionaryRowKeySize(term []byte) int {
return len(term) + 3
}
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
}
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
size := copy(buf[3:], dr.term)
return size + 3, nil
binary.LittleEndian.PutUint16(buf[1:3], field)
size := copy(buf[3:], term)
return size + 3
}
func (dr *DictionaryRow) Value() []byte {
@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
}
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
count, err := dictionaryRowParseV(value)
if err != nil {
return err
}
dr.count = count
return nil
}
func dictionaryRowParseV(value []byte) (uint64, error) {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
return count, nil
}
// TERM FIELD FREQUENCY
type TermVector struct {
@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte {
}
func (tfr *TermFrequencyRow) KeySize() int {
return 3 + len(tfr.term) + 1 + len(tfr.doc)
return termFrequencyRowKeySize(tfr.term, tfr.doc)
}
func termFrequencyRowKeySize(term, doc []byte) int {
return 3 + len(term) + 1 + len(doc)
}
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
}
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
binary.LittleEndian.PutUint16(buf[1:3], field)
termLen := copy(buf[3:], term)
buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], tfr.doc)
return 3 + termLen + 1 + docLen, nil
docLen := copy(buf[3+termLen+1:], doc)
return 3 + termLen + 1 + docLen
}
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {

View File

@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
// pre-allocated to accommodate the requested number of DocumentMatch
// instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, 0, size)
avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available
for i := range startBlock {
startBlock[i].Sort = make([]string, 0, sortsize)
avail = append(avail, &startBlock[i])
i, j := 0, 0
for i < size {
avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
}
return &DocumentMatchPool{
avail: avail,