0
0
Fork 0

Merge pull request #795 from steveyen/scorch-mem-optimizations

More scorch micro optimizations when processing mem segments
This commit is contained in:
Steve Yen 2018-03-05 12:02:15 -08:00 committed by GitHub
commit c5ab1f61d7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 15 deletions

View File

@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
}
}
storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
}
// walk each composite field
for _, field := range result.Document.CompositeFields {
fieldID := uint16(s.getOrDefineField(field.Name()))
@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
processField(fieldID, field.Name(), l, tf)
}
docStored := s.Stored[docNum]
docStoredTypes := s.StoredTypes[docNum]
docStoredPos := s.StoredPos[docNum]
// walk each field
for i, field := range result.Document.Fields {
fieldID := uint16(s.getOrDefineField(field.Name()))
@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
tf := result.Analyzed[i]
processField(fieldID, field.Name(), l, tf)
if field.Options().IsStored() {
storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
docStored[fieldID] = append(docStored[fieldID], field.Value())
docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
}
if field.Options().IncludeDocValues() {
@ -252,12 +252,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
// now that its been rolled up into docMap, walk that
for fieldID, tokenFrequencies := range docMap {
dict := s.Dicts[fieldID]
norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))
for term, tokenFreq := range tokenFrequencies {
pid := s.Dicts[fieldID][term] - 1
pid := dict[term] - 1
bs := s.Postings[pid]
bs.AddInt(int(docNum))
s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
s.Norms[pid] = append(s.Norms[pid], norm)
locationBS := s.PostingsLocs[pid]
if len(tokenFreq.Locations) > 0 {
locationBS.AddInt(int(docNum))

View File

@ -73,6 +73,7 @@ type PostingsIterator struct {
offset int
locoffset int
actual roaring.IntIterable
reuse Posting
}
// Next returns the next posting on the postings list, or nil at the end
@ -92,17 +93,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
i.offset++
allN = i.all.Next()
}
rv := &Posting{
i.reuse = Posting{
iterator: i,
docNum: uint64(n),
offset: i.offset,
locoffset: i.locoffset,
hasLoc: i.locations.Contains(n),
}
i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
i.offset++
return rv, nil
return &i.reuse, nil
}
// Posting is a single entry in a postings list

View File

@ -552,8 +552,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
nextPosting, err2 := postingsItr.Next()
for err2 == nil && nextPosting != nil {
docNum := nextPosting.Number()
docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
nextPosting, err2 = postingsItr.Next()
}
if err2 != nil {
@ -562,10 +561,10 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
next, err = dictItr.Next()
}
if err != nil {
return nil, err
}
// sort wrt to docIDs
var docNumbers docIDRange
for k := range docTermMap {