Merge pull request #530 from steveyen/master
optimizations around search / DocumentMatchPool
This commit is contained in:
commit
4da7756f67
@ -29,41 +29,49 @@ type UpsideDownCouchTermFieldReader struct {
|
||||
iterator store.KVIterator
|
||||
term []byte
|
||||
tfrNext *TermFrequencyRow
|
||||
tfrPrealloc TermFrequencyRow
|
||||
keyBuf []byte
|
||||
field uint16
|
||||
includeTermVectors bool
|
||||
}
|
||||
|
||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||
dictionaryRow := NewDictionaryRow(term, field, 0)
|
||||
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
|
||||
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||
if bufNeeded < dictionaryRowKeySize(term) {
|
||||
bufNeeded = dictionaryRowKeySize(term)
|
||||
}
|
||||
buf := make([]byte, bufNeeded)
|
||||
|
||||
bufUsed := dictionaryRowKeyTo(buf, field, term)
|
||||
val, err := indexReader.kvreader.Get(buf[:bufUsed])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if val == nil {
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||
return &UpsideDownCouchTermFieldReader{
|
||||
rv := &UpsideDownCouchTermFieldReader{
|
||||
count: 0,
|
||||
term: term,
|
||||
tfrNext: &TermFrequencyRow{},
|
||||
field: field,
|
||||
includeTermVectors: includeTermVectors,
|
||||
}, nil
|
||||
}
|
||||
rv.tfrNext = &rv.tfrPrealloc
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
err = dictionaryRow.parseDictionaryV(val)
|
||||
count, err := dictionaryRowParseV(val)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
|
||||
it := indexReader.kvreader.PrefixIterator(tfr.Key())
|
||||
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
|
||||
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
|
||||
|
||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||
return &UpsideDownCouchTermFieldReader{
|
||||
indexReader: indexReader,
|
||||
iterator: it,
|
||||
count: dictionaryRow.count,
|
||||
count: count,
|
||||
term: term,
|
||||
field: field,
|
||||
includeTermVectors: includeTermVectors,
|
||||
@ -82,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
|
||||
if r.tfrNext != nil {
|
||||
r.iterator.Next()
|
||||
} else {
|
||||
r.tfrNext = &TermFrequencyRow{}
|
||||
r.tfrNext = &r.tfrPrealloc
|
||||
}
|
||||
key, val, valid := r.iterator.Current()
|
||||
if valid {
|
||||
|
@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte {
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) KeySize() int {
|
||||
return len(dr.term) + 3
|
||||
return dictionaryRowKeySize(dr.term)
|
||||
}
|
||||
|
||||
func dictionaryRowKeySize(term []byte) int {
|
||||
return len(term) + 3
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
|
||||
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
|
||||
}
|
||||
|
||||
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
|
||||
buf[0] = 'd'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
|
||||
size := copy(buf[3:], dr.term)
|
||||
return size + 3, nil
|
||||
binary.LittleEndian.PutUint16(buf[1:3], field)
|
||||
size := copy(buf[3:], term)
|
||||
return size + 3
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) Value() []byte {
|
||||
@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
|
||||
}
|
||||
|
||||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
|
||||
count, nread := binary.Uvarint(value)
|
||||
if nread <= 0 {
|
||||
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
|
||||
count, err := dictionaryRowParseV(value)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
dr.count = count
|
||||
return nil
|
||||
}
|
||||
|
||||
func dictionaryRowParseV(value []byte) (uint64, error) {
|
||||
count, nread := binary.Uvarint(value)
|
||||
if nread <= 0 {
|
||||
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
|
||||
}
|
||||
return count, nil
|
||||
}
|
||||
|
||||
// TERM FIELD FREQUENCY
|
||||
|
||||
type TermVector struct {
|
||||
@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte {
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) KeySize() int {
|
||||
return 3 + len(tfr.term) + 1 + len(tfr.doc)
|
||||
return termFrequencyRowKeySize(tfr.term, tfr.doc)
|
||||
}
|
||||
|
||||
func termFrequencyRowKeySize(term, doc []byte) int {
|
||||
return 3 + len(term) + 1 + len(doc)
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
|
||||
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
|
||||
}
|
||||
|
||||
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
|
||||
buf[0] = 't'
|
||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
|
||||
termLen := copy(buf[3:], tfr.term)
|
||||
binary.LittleEndian.PutUint16(buf[1:3], field)
|
||||
termLen := copy(buf[3:], term)
|
||||
buf[3+termLen] = ByteSeparator
|
||||
docLen := copy(buf[3+termLen+1:], tfr.doc)
|
||||
return 3 + termLen + 1 + docLen, nil
|
||||
docLen := copy(buf[3+termLen+1:], doc)
|
||||
return 3 + termLen + 1 + docLen
|
||||
}
|
||||
|
||||
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
|
||||
|
@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
|
||||
// pre-allocated to accommodate the requested number of DocumentMatch
|
||||
// instances
|
||||
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
|
||||
avail := make(DocumentMatchCollection, 0, size)
|
||||
avail := make(DocumentMatchCollection, size)
|
||||
// pre-allocate the expected number of instances
|
||||
startBlock := make([]DocumentMatch, size)
|
||||
startSorts := make([]string, size*sortsize)
|
||||
// make these initial instances available
|
||||
for i := range startBlock {
|
||||
startBlock[i].Sort = make([]string, 0, sortsize)
|
||||
avail = append(avail, &startBlock[i])
|
||||
i, j := 0, 0
|
||||
for i < size {
|
||||
avail[i] = &startBlock[i]
|
||||
avail[i].Sort = startSorts[j:j]
|
||||
i += 1
|
||||
j += sortsize
|
||||
}
|
||||
return &DocumentMatchPool{
|
||||
avail: avail,
|
||||
|
Loading…
Reference in New Issue
Block a user