From 3c82086805892fd9269cbc495578b5baad2dda87 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Fri, 22 Jul 2016 16:49:33 -0700 Subject: [PATCH] optimize upside_down reader & 64-bit struct alignments The UpsideDownCouchTermFieldReader.Next() only needs the doc ID from the key, so this change provides a specialized parseKDoc() method for that optimization. Additionally, fields in various structs are more 64-bit aligned, in an attempt to reduce the invocations of runtime.typedmemmove() and runtime.heapBitsBulkBarrier(), which the go compiler seems to automatically insert to transparently handle misaligned data. --- index/upside_down/reader.go | 20 ++++++++++++-------- index/upside_down/row.go | 27 ++++++++++++++++++++------- search/searchers/search_term.go | 2 +- 3 files changed, 33 insertions(+), 16 deletions(-) diff --git a/index/upside_down/reader.go b/index/upside_down/reader.go index 4b01f927..5b3c2959 100644 --- a/index/upside_down/reader.go +++ b/index/upside_down/reader.go @@ -17,12 +17,12 @@ import ( ) type UpsideDownCouchTermFieldReader struct { + count uint64 indexReader *IndexReader iterator store.KVIterator - count uint64 term []byte + tfrNext *TermFrequencyRow field uint16 - tfrNext TermFrequencyRow } func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16) (*UpsideDownCouchTermFieldReader, error) { @@ -34,9 +34,10 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi if val == nil { atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1)) return &UpsideDownCouchTermFieldReader{ - count: 0, - term: term, - field: field, + count: 0, + term: term, + tfrNext: &TermFrequencyRow{}, + field: field, }, nil } @@ -54,6 +55,7 @@ func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, fi iterator: it, count: dictionaryRow.count, term: term, + tfrNext: &TermFrequencyRow{}, field: field, }, nil } @@ -66,8 +68,8 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* if r.iterator != nil { key, val, valid := r.iterator.Current() if valid { - tfr := &r.tfrNext - err := tfr.parseK(key) + tfr := r.tfrNext + err := tfr.parseKDoc(key) if err != nil { return nil, err } @@ -82,7 +84,9 @@ func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (* rv.ID = string(tfr.doc) rv.Freq = tfr.freq rv.Norm = float64(tfr.norm) - rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) + if tfr.vectors != nil { + rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors) + } r.iterator.Next() return rv, nil } diff --git a/index/upside_down/row.go b/index/upside_down/row.go index 1a44126b..7327f5e3 100644 --- a/index/upside_down/row.go +++ b/index/upside_down/row.go @@ -350,11 +350,11 @@ func (tv *TermVector) String() string { type TermFrequencyRow struct { term []byte - field uint16 doc []byte freq uint64 - norm float32 vectors []*TermVector + norm float32 + field uint16 } func (tfr *TermFrequencyRow) Term() []byte { @@ -504,7 +504,7 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error { } tfr.term = key[3 : 3+termEndPos] - docLen := len(key) - (3 + termEndPos + 1) + docLen := keyLen - (3 + termEndPos + 1) if docLen < 1 { return fmt.Errorf("invalid term frequency key, empty docid") } @@ -513,14 +513,27 @@ func (tfr *TermFrequencyRow) parseK(key []byte) error { return nil } +func (tfr *TermFrequencyRow) parseKDoc(key []byte) error { + termEndPos := bytes.IndexByte(key[3:], ByteSeparator) + if termEndPos < 0 { + return fmt.Errorf("invalid term frequency key, no byte separator terminating term") + } + + tfr.doc = key[3+termEndPos+1:] + if len(tfr.doc) <= 0 { + return fmt.Errorf("invalid term frequency key, empty docid") + } + + return nil +} + func (tfr *TermFrequencyRow) parseV(value []byte) error { - currOffset := 0 - bytesRead := 0 - tfr.freq, bytesRead = binary.Uvarint(value[currOffset:]) + var bytesRead int + tfr.freq, bytesRead = binary.Uvarint(value) if bytesRead <= 0 { return fmt.Errorf("invalid term frequency value, invalid frequency") } - currOffset += bytesRead + currOffset := bytesRead var norm uint64 norm, bytesRead = binary.Uvarint(value[currOffset:]) diff --git a/search/searchers/search_term.go b/search/searchers/search_term.go index c0d8bcc5..7baa649b 100644 --- a/search/searchers/search_term.go +++ b/search/searchers/search_term.go @@ -19,10 +19,10 @@ type TermSearcher struct { indexReader index.IndexReader term string field string - explain bool reader index.TermFieldReader scorer *scorers.TermQueryScorer tfd index.TermFieldDoc + explain bool } func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, explain bool) (*TermSearcher, error) {