Merge pull request #530 from steveyen/master
optimizations around search / DocumentMatchPool
This commit is contained in:
commit
4da7756f67
|
@ -29,41 +29,49 @@ type UpsideDownCouchTermFieldReader struct {
|
||||||
iterator store.KVIterator
|
iterator store.KVIterator
|
||||||
term []byte
|
term []byte
|
||||||
tfrNext *TermFrequencyRow
|
tfrNext *TermFrequencyRow
|
||||||
|
tfrPrealloc TermFrequencyRow
|
||||||
keyBuf []byte
|
keyBuf []byte
|
||||||
field uint16
|
field uint16
|
||||||
includeTermVectors bool
|
includeTermVectors bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
|
||||||
dictionaryRow := NewDictionaryRow(term, field, 0)
|
bufNeeded := termFrequencyRowKeySize(term, nil)
|
||||||
val, err := indexReader.kvreader.Get(dictionaryRow.Key())
|
if bufNeeded < dictionaryRowKeySize(term) {
|
||||||
|
bufNeeded = dictionaryRowKeySize(term)
|
||||||
|
}
|
||||||
|
buf := make([]byte, bufNeeded)
|
||||||
|
|
||||||
|
bufUsed := dictionaryRowKeyTo(buf, field, term)
|
||||||
|
val, err := indexReader.kvreader.Get(buf[:bufUsed])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
if val == nil {
|
if val == nil {
|
||||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||||
return &UpsideDownCouchTermFieldReader{
|
rv := &UpsideDownCouchTermFieldReader{
|
||||||
count: 0,
|
count: 0,
|
||||||
term: term,
|
term: term,
|
||||||
tfrNext: &TermFrequencyRow{},
|
|
||||||
field: field,
|
field: field,
|
||||||
includeTermVectors: includeTermVectors,
|
includeTermVectors: includeTermVectors,
|
||||||
}, nil
|
}
|
||||||
|
rv.tfrNext = &rv.tfrPrealloc
|
||||||
|
return rv, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
err = dictionaryRow.parseDictionaryV(val)
|
count, err := dictionaryRowParseV(val)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
tfr := NewTermFrequencyRow(term, field, []byte{}, 0, 0)
|
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
|
||||||
it := indexReader.kvreader.PrefixIterator(tfr.Key())
|
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
|
||||||
|
|
||||||
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
|
||||||
return &UpsideDownCouchTermFieldReader{
|
return &UpsideDownCouchTermFieldReader{
|
||||||
indexReader: indexReader,
|
indexReader: indexReader,
|
||||||
iterator: it,
|
iterator: it,
|
||||||
count: dictionaryRow.count,
|
count: count,
|
||||||
term: term,
|
term: term,
|
||||||
field: field,
|
field: field,
|
||||||
includeTermVectors: includeTermVectors,
|
includeTermVectors: includeTermVectors,
|
||||||
|
@ -82,7 +90,7 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*
|
||||||
if r.tfrNext != nil {
|
if r.tfrNext != nil {
|
||||||
r.iterator.Next()
|
r.iterator.Next()
|
||||||
} else {
|
} else {
|
||||||
r.tfrNext = &TermFrequencyRow{}
|
r.tfrNext = &r.tfrPrealloc
|
||||||
}
|
}
|
||||||
key, val, valid := r.iterator.Current()
|
key, val, valid := r.iterator.Current()
|
||||||
if valid {
|
if valid {
|
||||||
|
|
|
@ -254,14 +254,22 @@ func (dr *DictionaryRow) Key() []byte {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dr *DictionaryRow) KeySize() int {
|
func (dr *DictionaryRow) KeySize() int {
|
||||||
return len(dr.term) + 3
|
return dictionaryRowKeySize(dr.term)
|
||||||
|
}
|
||||||
|
|
||||||
|
func dictionaryRowKeySize(term []byte) int {
|
||||||
|
return len(term) + 3
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
|
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
|
||||||
|
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
|
||||||
buf[0] = 'd'
|
buf[0] = 'd'
|
||||||
binary.LittleEndian.PutUint16(buf[1:3], dr.field)
|
binary.LittleEndian.PutUint16(buf[1:3], field)
|
||||||
size := copy(buf[3:], dr.term)
|
size := copy(buf[3:], term)
|
||||||
return size + 3, nil
|
return size + 3
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dr *DictionaryRow) Value() []byte {
|
func (dr *DictionaryRow) Value() []byte {
|
||||||
|
@ -324,14 +332,22 @@ func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
|
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
|
||||||
count, nread := binary.Uvarint(value)
|
count, err := dictionaryRowParseV(value)
|
||||||
if nread <= 0 {
|
if err != nil {
|
||||||
return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
|
return err
|
||||||
}
|
}
|
||||||
dr.count = count
|
dr.count = count
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func dictionaryRowParseV(value []byte) (uint64, error) {
|
||||||
|
count, nread := binary.Uvarint(value)
|
||||||
|
if nread <= 0 {
|
||||||
|
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
|
||||||
|
}
|
||||||
|
return count, nil
|
||||||
|
}
|
||||||
|
|
||||||
// TERM FIELD FREQUENCY
|
// TERM FIELD FREQUENCY
|
||||||
|
|
||||||
type TermVector struct {
|
type TermVector struct {
|
||||||
|
@ -394,16 +410,24 @@ func (tfr *TermFrequencyRow) Key() []byte {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tfr *TermFrequencyRow) KeySize() int {
|
func (tfr *TermFrequencyRow) KeySize() int {
|
||||||
return 3 + len(tfr.term) + 1 + len(tfr.doc)
|
return termFrequencyRowKeySize(tfr.term, tfr.doc)
|
||||||
|
}
|
||||||
|
|
||||||
|
func termFrequencyRowKeySize(term, doc []byte) int {
|
||||||
|
return 3 + len(term) + 1 + len(doc)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
|
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
|
||||||
|
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
|
||||||
buf[0] = 't'
|
buf[0] = 't'
|
||||||
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
|
binary.LittleEndian.PutUint16(buf[1:3], field)
|
||||||
termLen := copy(buf[3:], tfr.term)
|
termLen := copy(buf[3:], term)
|
||||||
buf[3+termLen] = ByteSeparator
|
buf[3+termLen] = ByteSeparator
|
||||||
docLen := copy(buf[3+termLen+1:], tfr.doc)
|
docLen := copy(buf[3+termLen+1:], doc)
|
||||||
return 3 + termLen + 1 + docLen, nil
|
return 3 + termLen + 1 + docLen
|
||||||
}
|
}
|
||||||
|
|
||||||
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
|
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
|
||||||
|
|
|
@ -37,13 +37,17 @@ func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
|
||||||
// pre-allocated to accommodate the requested number of DocumentMatch
|
// pre-allocated to accommodate the requested number of DocumentMatch
|
||||||
// instances
|
// instances
|
||||||
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
|
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
|
||||||
avail := make(DocumentMatchCollection, 0, size)
|
avail := make(DocumentMatchCollection, size)
|
||||||
// pre-allocate the expected number of instances
|
// pre-allocate the expected number of instances
|
||||||
startBlock := make([]DocumentMatch, size)
|
startBlock := make([]DocumentMatch, size)
|
||||||
|
startSorts := make([]string, size*sortsize)
|
||||||
// make these initial instances available
|
// make these initial instances available
|
||||||
for i := range startBlock {
|
i, j := 0, 0
|
||||||
startBlock[i].Sort = make([]string, 0, sortsize)
|
for i < size {
|
||||||
avail = append(avail, &startBlock[i])
|
avail[i] = &startBlock[i]
|
||||||
|
avail[i].Sort = startSorts[j:j]
|
||||||
|
i += 1
|
||||||
|
j += sortsize
|
||||||
}
|
}
|
||||||
return &DocumentMatchPool{
|
return &DocumentMatchPool{
|
||||||
avail: avail,
|
avail: avail,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user