refactor to make mem segment contents exported

2017-12-01 07:26:47 -05:00 · 2017-12-01 07:26:47 -05:00 · 395458ce83
parent f521d80835
commit 395458ce83
4 changed files with 86 additions and 86 deletions
--- a/index/scorch/segment/mem/build.go
+++ b/index/scorch/segment/mem/build.go
@ -23,7 +23,7 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
 	}

 	// go back and sort the dictKeys
-	for _, dict := range s.dictKeys {
+	for _, dict := range s.DictKeys {
 		sort.Strings(dict)
 	}

@ -81,9 +81,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	}

 	storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
-		s.stored[docNum][field] = append(s.stored[docNum][field], val)
-		s.storedTypes[docNum][field] = append(s.storedTypes[docNum][field], typ)
-		s.storedPos[docNum][field] = append(s.storedPos[docNum][field], pos)
+		s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
+		s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
+		s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
 	}

 	// walk each composite field
@ -107,7 +107,7 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 	// now that its been rolled up into docMap, walk that
 	for fieldID, tokenFrequencies := range docMap {
 		for term, tokenFreq := range tokenFrequencies {
-			fieldTermPostings := s.dicts[fieldID][term]
+			fieldTermPostings := s.Dicts[fieldID][term]

 			// FIXME this if/else block has duplicate code that has resulted in
 			// bugs fixed/missed more than once, need to refactor
@ -116,12 +116,12 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 				bs := roaring.New()
 				bs.AddInt(int(docNum))

-				newPostingID := uint64(len(s.postings) + 1)
+				newPostingID := uint64(len(s.Postings) + 1)
 				// add this new bitset to the postings slice
-				s.postings = append(s.postings, bs)
+				s.Postings = append(s.Postings, bs)
 				// add this to the details slice
-				s.freqs = append(s.freqs, []uint64{uint64(tokenFreq.Frequency())})
-				s.norms = append(s.norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
+				s.Freqs = append(s.Freqs, []uint64{uint64(tokenFreq.Frequency())})
+				s.Norms = append(s.Norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
 				// add to locations
 				var locfields []uint16
 				var locstarts []uint64
@ -143,35 +143,35 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 						locarraypos = append(locarraypos, nil)
 					}
 				}
-				s.locfields = append(s.locfields, locfields)
-				s.locstarts = append(s.locstarts, locstarts)
-				s.locends = append(s.locends, locends)
-				s.locpos = append(s.locpos, locpos)
-				s.locarraypos = append(s.locarraypos, locarraypos)
+				s.Locfields = append(s.Locfields, locfields)
+				s.Locstarts = append(s.Locstarts, locstarts)
+				s.Locends = append(s.Locends, locends)
+				s.Locpos = append(s.Locpos, locpos)
+				s.Locarraypos = append(s.Locarraypos, locarraypos)
 				// record it
-				s.dicts[fieldID][term] = newPostingID
+				s.Dicts[fieldID][term] = newPostingID
 				// this term was new for this field, add it to dictKeys
-				s.dictKeys[fieldID] = append(s.dictKeys[fieldID], term)
+				s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
 			} else {
 				// posting already started for this field/term
 				// the actual offset is - 1, because 0 is zero value
-				bs := s.postings[fieldTermPostings-1]
+				bs := s.Postings[fieldTermPostings-1]
 				bs.AddInt(int(docNum))
-				s.freqs[fieldTermPostings-1] = append(s.freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
-				s.norms[fieldTermPostings-1] = append(s.norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
+				s.Freqs[fieldTermPostings-1] = append(s.Freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
+				s.Norms[fieldTermPostings-1] = append(s.Norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
 				for _, loc := range tokenFreq.Locations {
 					var locf = fieldID
 					if loc.Field != "" {
 						locf = uint16(s.getOrDefineField(loc.Field, false))
 					}
-					s.locfields[fieldTermPostings-1] = append(s.locfields[fieldTermPostings-1], locf)
-					s.locstarts[fieldTermPostings-1] = append(s.locstarts[fieldTermPostings-1], uint64(loc.Start))
-					s.locends[fieldTermPostings-1] = append(s.locends[fieldTermPostings-1], uint64(loc.End))
-					s.locpos[fieldTermPostings-1] = append(s.locpos[fieldTermPostings-1], uint64(loc.Position))
+					s.Locfields[fieldTermPostings-1] = append(s.Locfields[fieldTermPostings-1], locf)
+					s.Locstarts[fieldTermPostings-1] = append(s.Locstarts[fieldTermPostings-1], uint64(loc.Start))
+					s.Locends[fieldTermPostings-1] = append(s.Locends[fieldTermPostings-1], uint64(loc.End))
+					s.Locpos[fieldTermPostings-1] = append(s.Locpos[fieldTermPostings-1], uint64(loc.Position))
 					if len(loc.ArrayPositions) > 0 {
-						s.locarraypos[fieldTermPostings-1] = append(s.locarraypos[fieldTermPostings-1], loc.ArrayPositions)
+						s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], loc.ArrayPositions)
 					} else {
-						s.locarraypos[fieldTermPostings-1] = append(s.locarraypos[fieldTermPostings-1], nil)
+						s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], nil)
 					}
 				}
 			}
@ -180,23 +180,23 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
 }

 func (s *Segment) getOrDefineField(name string, hasLoc bool) int {
-	fieldID, ok := s.fields[name]
+	fieldID, ok := s.FieldsMap[name]
 	if !ok {
-		fieldID = uint16(len(s.fieldsInv) + 1)
-		s.fields[name] = fieldID
-		s.fieldsInv = append(s.fieldsInv, name)
-		s.fieldsLoc = append(s.fieldsLoc, hasLoc)
-		s.dicts = append(s.dicts, make(map[string]uint64))
-		s.dictKeys = append(s.dictKeys, make([]string, 0))
+		fieldID = uint16(len(s.FieldsInv) + 1)
+		s.FieldsMap[name] = fieldID
+		s.FieldsInv = append(s.FieldsInv, name)
+		s.FieldsLoc = append(s.FieldsLoc, hasLoc)
+		s.Dicts = append(s.Dicts, make(map[string]uint64))
+		s.DictKeys = append(s.DictKeys, make([]string, 0))
 	}
 	return int(fieldID - 1)
 }

 func (s *Segment) addDocument() int {
-	docNum := len(s.stored)
-	s.stored = append(s.stored, map[uint16][][]byte{})
-	s.storedTypes = append(s.storedTypes, map[uint16][]byte{})
-	s.storedPos = append(s.storedPos, map[uint16][][]uint64{})
+	docNum := len(s.Stored)
+	s.Stored = append(s.Stored, map[uint16][][]byte{})
+	s.StoredTypes = append(s.StoredTypes, map[uint16][]byte{})
+	s.StoredPos = append(s.StoredPos, map[uint16][][]uint64{})
 	return docNum
 }

--- a/index/scorch/segment/mem/dict.go
+++ b/index/scorch/segment/mem/dict.go
@ -21,7 +21,7 @@ func (d *Dictionary) PostingsList(term string, except *roaring.Bitmap) segment.P
 	return &PostingsList{
 		dictionary: d,
 		term:       term,
-		postingsID: d.segment.dicts[d.fieldID][term],
+		postingsID: d.segment.Dicts[d.fieldID][term],
 		except:     except,
 	}
 }
@ -36,7 +36,7 @@ func (d *Dictionary) Iterator() segment.DictionaryIterator {
 // PrefixIterator returns an iterator which only visits terms having the
 // the specified prefix
 func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.dictKeys[d.fieldID], prefix)
+	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], prefix)
 	return &DictionaryIterator{
 		d:      d,
 		prefix: prefix,
@ -47,7 +47,7 @@ func (d *Dictionary) PrefixIterator(prefix string) segment.DictionaryIterator {
 // RangeIterator returns an iterator which only visits terms between the
 // start and end terms.  NOTE: bleve.index API specifies the end is inclusive.
 func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator {
-	offset := sort.SearchStrings(d.segment.dictKeys[d.fieldID], start)
+	offset := sort.SearchStrings(d.segment.DictKeys[d.fieldID], start)
 	return &DictionaryIterator{
 		d:      d,
 		offset: offset,
@ -65,10 +65,10 @@ type DictionaryIterator struct {

 // Next returns the next entry in the dictionary
 func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
-	if d.offset > len(d.d.segment.dictKeys[d.d.fieldID])-1 {
+	if d.offset > len(d.d.segment.DictKeys[d.d.fieldID])-1 {
 		return nil, nil
 	}
-	next := d.d.segment.dictKeys[d.d.fieldID][d.offset]
+	next := d.d.segment.DictKeys[d.d.fieldID][d.offset]
 	// check prefix
 	if d.prefix != "" && !strings.HasPrefix(next, d.prefix) {
 		return nil, nil
@ -79,9 +79,9 @@ func (d *DictionaryIterator) Next() (*index.DictEntry, error) {
 	}

 	d.offset++
-	postingID := d.d.segment.dicts[d.d.fieldID][next]
+	postingID := d.d.segment.Dicts[d.d.fieldID][next]
 	return &index.DictEntry{
 		Term:  next,
-		Count: d.d.segment.postings[postingID-1].GetCardinality(),
+		Count: d.d.segment.Postings[postingID-1].GetCardinality(),
 	}, nil
 }
--- a/index/scorch/segment/mem/posting.go
+++ b/index/scorch/segment/mem/posting.go
@ -17,7 +17,7 @@ type PostingsList struct {
 func (p *PostingsList) Count() uint64 {
 	var rv uint64
 	if p.postingsID > 0 {
-		rv = p.dictionary.segment.postings[p.postingsID-1].GetCardinality()
+		rv = p.dictionary.segment.Postings[p.postingsID-1].GetCardinality()
 		if p.except != nil {
 			except := p.except.GetCardinality()
 			if except > rv {
@ -36,7 +36,7 @@ func (p *PostingsList) Iterator() segment.PostingsIterator {
 		postings: p,
 	}
 	if p.postingsID > 0 {
-		allbits := p.dictionary.segment.postings[p.postingsID-1]
+		allbits := p.dictionary.segment.Postings[p.postingsID-1]
 		rv.all = allbits.Iterator()
 		if p.except != nil {
 			allExcept := allbits.Clone()
@ -72,7 +72,7 @@ func (i *PostingsIterator) Next() segment.Posting {
 	// if they don't match, adjust offsets to factor in item we're skipping over
 	// incr the all iterator, and check again
 	for allN != n {
-		i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
+		i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 		i.offset++
 		allN = i.all.Next()
 	}
@ -83,7 +83,7 @@ func (i *PostingsIterator) Next() segment.Posting {
 		locoffset: i.locoffset,
 	}

-	i.locoffset += int(i.postings.dictionary.segment.freqs[i.postings.postingsID-1][i.offset])
+	i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
 	i.offset++
 	return rv
 }
@ -103,17 +103,17 @@ func (p *Posting) Number() uint64 {

 // Frequency returns the frequence of occurance of this term in this doc/field
 func (p *Posting) Frequency() uint64 {
-	return p.iterator.postings.dictionary.segment.freqs[p.iterator.postings.postingsID-1][p.offset]
+	return p.iterator.postings.dictionary.segment.Freqs[p.iterator.postings.postingsID-1][p.offset]
 }

 // Norm returns the normalization factor for this posting
 func (p *Posting) Norm() float64 {
-	return float64(p.iterator.postings.dictionary.segment.norms[p.iterator.postings.postingsID-1][p.offset])
+	return float64(p.iterator.postings.dictionary.segment.Norms[p.iterator.postings.postingsID-1][p.offset])
 }

 // Locations returns the location information for each occurance
 func (p *Posting) Locations() []segment.Location {
-	if !p.iterator.postings.dictionary.segment.fieldsLoc[p.iterator.postings.dictionary.fieldID] {
+	if !p.iterator.postings.dictionary.segment.FieldsLoc[p.iterator.postings.dictionary.fieldID] {
 		return nil
 	}
 	freq := int(p.Frequency())
@ -136,25 +136,25 @@ type Location struct {
 // Field returns the name of the field (useful in composite fields to know
 // which original field the value came from)
 func (l *Location) Field() string {
-	return l.p.iterator.postings.dictionary.segment.fieldsInv[l.p.iterator.postings.dictionary.segment.locfields[l.p.iterator.postings.postingsID-1][l.offset]]
+	return l.p.iterator.postings.dictionary.segment.FieldsInv[l.p.iterator.postings.dictionary.segment.Locfields[l.p.iterator.postings.postingsID-1][l.offset]]
 }

 // Start returns the start byte offset of this occurance
 func (l *Location) Start() uint64 {
-	return l.p.iterator.postings.dictionary.segment.locstarts[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locstarts[l.p.iterator.postings.postingsID-1][l.offset]
 }

 // End returns the end byte offset of this occurance
 func (l *Location) End() uint64 {
-	return l.p.iterator.postings.dictionary.segment.locends[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locends[l.p.iterator.postings.postingsID-1][l.offset]
 }

 // Pos returns the 1-based phrase position of this occurance
 func (l *Location) Pos() uint64 {
-	return l.p.iterator.postings.dictionary.segment.locpos[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locpos[l.p.iterator.postings.postingsID-1][l.offset]
 }

 // ArrayPositions returns the array position vector associated with this occurance
 func (l *Location) ArrayPositions() []uint64 {
-	return l.p.iterator.postings.dictionary.segment.locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
+	return l.p.iterator.postings.dictionary.segment.Locarraypos[l.p.iterator.postings.postingsID-1][l.offset]
 }
--- a/index/scorch/segment/mem/segment.go
+++ b/index/scorch/segment/mem/segment.go
@ -22,77 +22,77 @@ import (
 // Segment is an in memory implementation of scorch.Segment
 type Segment struct {

-	// fields name -> id+1
-	fields map[string]uint16
+	// FieldsMap name -> id+1
+	FieldsMap map[string]uint16
 	// fields id -> name
-	fieldsInv []string
+	FieldsInv []string
 	// field id -> has location info
-	fieldsLoc []bool
+	FieldsLoc []bool

 	// term dictionary
 	//  field id -> term -> posting id + 1
-	dicts []map[string]uint64
+	Dicts []map[string]uint64

 	// term dictionary keys
 	//  field id -> []dictionary keys
-	dictKeys [][]string
+	DictKeys [][]string

-	// postings list
-	//  postings list id -> postings bitmap
-	postings []*roaring.Bitmap
+	// Postings list
+	//  Postings list id -> Postings bitmap
+	Postings []*roaring.Bitmap

 	// term frequencies
-	//  postings list id -> freqs (one for each hit in bitmap)
-	freqs [][]uint64
+	//  postings list id -> Freqs (one for each hit in bitmap)
+	Freqs [][]uint64

-	// field norms
-	//  postings list id -> norms (one for each hit in bitmap)
-	norms [][]float32
+	// field Norms
+	//  postings list id -> Norms (one for each hit in bitmap)
+	Norms [][]float32

 	// field/start/end/pos/locarraypos
 	//  postings list id -> start/end/pos/locarraypos (one for each freq)
-	locfields   [][]uint16
-	locstarts   [][]uint64
-	locends     [][]uint64
-	locpos      [][]uint64
-	locarraypos [][][]uint64
+	Locfields   [][]uint16
+	Locstarts   [][]uint64
+	Locends     [][]uint64
+	Locpos      [][]uint64
+	Locarraypos [][][]uint64

-	// stored field values
+	// Stored field values
 	//  docNum -> field id -> slice of values (each value []byte)
-	stored []map[uint16][][]byte
+	Stored []map[uint16][][]byte

 	// stored field types
 	//  docNum -> field id -> slice of types (each type byte)
-	storedTypes []map[uint16][]byte
+	StoredTypes []map[uint16][]byte

 	// stored field array positions
 	//  docNum -> field id -> slice of array positions (each is []uint64)
-	storedPos []map[uint16][][]uint64
+	StoredPos []map[uint16][][]uint64
 }

 // New builds a new empty Segment
 func New() *Segment {
 	return &Segment{
-		fields: map[string]uint16{},
+		FieldsMap: map[string]uint16{},
 	}
 }

 // Fields returns the field names used in this segment
 func (s *Segment) Fields() []string {
-	return s.fieldsInv
+	return s.FieldsInv
 }

 // VisitDocument invokes the DocFieldValueVistor for each stored field
 // for the specified doc number
 func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVisitor) error {
 	// ensure document number exists
-	if int(num) > len(s.stored)-1 {
+	if int(num) > len(s.Stored)-1 {
 		return nil
 	}
-	docFields := s.stored[int(num)]
+	docFields := s.Stored[int(num)]
 	for field, values := range docFields {
 		for i, value := range values {
-			keepGoing := visitor(s.fieldsInv[field], s.storedTypes[int(num)][field][i], value, s.storedPos[int(num)][field][i])
+			keepGoing := visitor(s.FieldsInv[field], s.StoredTypes[int(num)][field][i], value, s.StoredPos[int(num)][field][i])
 			if !keepGoing {
 				return nil
 			}
@ -113,19 +113,19 @@ func (s *Segment) Dictionary(field string) segment.TermDictionary {
 // Count returns the number of documents in this segment
 // (this has no notion of deleted docs)
 func (s *Segment) Count() uint64 {
-	return uint64(len(s.stored))
+	return uint64(len(s.Stored))
 }

 // DocNumbers returns a bitset corresponding to the doc numbers of all the
 // provided _id strings
 func (s *Segment) DocNumbers(ids []string) *roaring.Bitmap {

-	idDictionary := s.dicts[s.getOrDefineField("_id", false)]
+	idDictionary := s.Dicts[s.getOrDefineField("_id", false)]
 	rv := roaring.New()
 	for _, id := range ids {
 		postingID := idDictionary[id]
 		if postingID > 0 {
-			rv.Or(s.postings[postingID-1])
+			rv.Or(s.Postings[postingID-1])
 		}
 	}
 	return rv