From 2d72b542c0b5c6fbc52a3921f87d9aa36cfe1d74 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Mon, 10 Oct 2016 16:16:30 -0700 Subject: [PATCH 1/2] optimize upside-down FieldDict reader with prealloc'ed objects As part of this commit, there's also a newly added Dictionaryrow.parseDictionaryK() helper method. --- index/upsidedown/field_dict.go | 18 ++++++++++++------ index/upsidedown/row.go | 22 +++++++++++++++------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/index/upsidedown/field_dict.go b/index/upsidedown/field_dict.go index ab906b0d..ee8d2014 100644 --- a/index/upsidedown/field_dict.go +++ b/index/upsidedown/field_dict.go @@ -24,6 +24,8 @@ import ( type UpsideDownCouchFieldDict struct { indexReader *IndexReader iterator store.KVIterator + dictRow *DictionaryRow + dictEntry *index.DictEntry field uint16 } @@ -42,6 +44,8 @@ func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTe return &UpsideDownCouchFieldDict{ indexReader: indexReader, iterator: it, + dictRow: &DictionaryRow{}, // Pre-alloced, reused row. + dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry. field: field, }, nil @@ -53,17 +57,19 @@ func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) { return nil, nil } - currRow, err := NewDictionaryRowKV(key, val) + err := r.dictRow.parseDictionaryK(key) if err != nil { - return nil, fmt.Errorf("unexpected error parsing dictionary row kv: %v", err) + return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err) } - rv := index.DictEntry{ - Term: string(currRow.term), - Count: currRow.count, + err = r.dictRow.parseDictionaryV(val) + if err != nil { + return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err) } + r.dictEntry.Term = string(r.dictRow.term) + r.dictEntry.Count = r.dictRow.count // advance the iterator to the next term r.iterator.Next() - return &rv, nil + return r.dictEntry, nil } diff --git a/index/upsidedown/row.go b/index/upsidedown/row.go index 8d06b319..b2ab21ab 100644 --- a/index/upsidedown/row.go +++ b/index/upsidedown/row.go @@ -306,25 +306,33 @@ func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) { } func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { - rv := DictionaryRow{} + rv := &DictionaryRow{} + err := rv.parseDictionaryK(key) + if err != nil { + return nil, err + } + return rv, nil +} + +func (dr *DictionaryRow) parseDictionaryK(key []byte) error { buf := bytes.NewBuffer(key) _, err := buf.ReadByte() // type if err != nil { - return nil, err + return err } - err = binary.Read(buf, binary.LittleEndian, &rv.field) + err = binary.Read(buf, binary.LittleEndian, &dr.field) if err != nil { - return nil, err + return err } - rv.term, err = buf.ReadBytes(ByteSeparator) + dr.term, err = buf.ReadBytes(ByteSeparator) // there is no separator expected here, should get EOF if err != io.EOF { - return nil, err + return err } - return &rv, nil + return nil } func (dr *DictionaryRow) parseDictionaryV(value []byte) error { From 01fb59d293558dcbbce4c7cd02f8164570c58022 Mon Sep 17 00:00:00 2001 From: Steve Yen Date: Tue, 11 Oct 2016 08:32:30 -0700 Subject: [PATCH 2/2] optimize upside-down DictionaryRow for fewer parsing alloc's --- analysis/token/lowercase/lowercase.go | 4 ++-- index/upsidedown/field_dict.go | 2 +- index/upsidedown/row.go | 31 +++++++-------------------- search/searcher/search_disjunction.go | 2 +- 4 files changed, 12 insertions(+), 27 deletions(-) diff --git a/analysis/token/lowercase/lowercase.go b/analysis/token/lowercase/lowercase.go index eca62ee8..adb740c3 100644 --- a/analysis/token/lowercase/lowercase.go +++ b/analysis/token/lowercase/lowercase.go @@ -78,11 +78,11 @@ func toLowerDeferredCopy(s []byte) []byte { // Handles the Unicode edge-case where the last // rune in a word on the greek Σ needs to be converted // differently. - if l == 'σ' && i + 2 == len(s) { + if l == 'σ' && i+2 == len(s) { l = 'ς' } - lwid := utf8.RuneLen(l) + lwid := utf8.RuneLen(l) if lwid > wid { // utf-8 encoded replacement is wider // for now, punt and defer diff --git a/index/upsidedown/field_dict.go b/index/upsidedown/field_dict.go index ee8d2014..20d4eb34 100644 --- a/index/upsidedown/field_dict.go +++ b/index/upsidedown/field_dict.go @@ -44,7 +44,7 @@ func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTe return &UpsideDownCouchFieldDict{ indexReader: indexReader, iterator: it, - dictRow: &DictionaryRow{}, // Pre-alloced, reused row. + dictRow: &DictionaryRow{}, // Pre-alloced, reused row. dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry. field: field, }, nil diff --git a/index/upsidedown/row.go b/index/upsidedown/row.go index b2ab21ab..5d9c80ee 100644 --- a/index/upsidedown/row.go +++ b/index/upsidedown/row.go @@ -242,9 +242,9 @@ func NewFieldRowKV(key, value []byte) (*FieldRow, error) { const DictionaryRowMaxValueSize = binary.MaxVarintLen64 type DictionaryRow struct { - field uint16 term []byte count uint64 + field uint16 } func (dr *DictionaryRow) Key() []byte { @@ -315,35 +315,20 @@ func NewDictionaryRowK(key []byte) (*DictionaryRow, error) { } func (dr *DictionaryRow) parseDictionaryK(key []byte) error { - buf := bytes.NewBuffer(key) - _, err := buf.ReadByte() // type - if err != nil { - return err + dr.field = binary.LittleEndian.Uint16(key[1:3]) + if dr.term != nil { + dr.term = dr.term[:0] } - - err = binary.Read(buf, binary.LittleEndian, &dr.field) - if err != nil { - return err - } - - dr.term, err = buf.ReadBytes(ByteSeparator) - // there is no separator expected here, should get EOF - if err != io.EOF { - return err - } - + dr.term = append(dr.term, key[3:]...) return nil } func (dr *DictionaryRow) parseDictionaryV(value []byte) error { - buf := bytes.NewBuffer(value) - - count, err := binary.ReadUvarint(buf) - if err != nil { - return err + count, nread := binary.Uvarint(value) + if nread <= 0 { + return fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread) } dr.count = count - return nil } diff --git a/search/searcher/search_disjunction.go b/search/searcher/search_disjunction.go index bf416baa..bd2ff68a 100644 --- a/search/searcher/search_disjunction.go +++ b/search/searcher/search_disjunction.go @@ -127,7 +127,7 @@ func (s *DisjunctionSearcher) updateMatches() error { return err } - last := len(s.searchers)-1 + last := len(s.searchers) - 1 s.searchers[i] = s.searchers[last] s.searchers = s.searchers[0:last] s.currs[i] = s.currs[last]