Merge pull request #795 from steveyen/scorch-mem-optimizations
More scorch micro optimizations when processing mem segments
This commit is contained in:
commit
c5ab1f61d7
|
@ -222,12 +222,6 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
}
|
||||
}
|
||||
|
||||
storeField := func(docNum uint64, field uint16, typ byte, val []byte, pos []uint64) {
|
||||
s.Stored[docNum][field] = append(s.Stored[docNum][field], val)
|
||||
s.StoredTypes[docNum][field] = append(s.StoredTypes[docNum][field], typ)
|
||||
s.StoredPos[docNum][field] = append(s.StoredPos[docNum][field], pos)
|
||||
}
|
||||
|
||||
// walk each composite field
|
||||
for _, field := range result.Document.CompositeFields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
|
@ -235,6 +229,10 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
processField(fieldID, field.Name(), l, tf)
|
||||
}
|
||||
|
||||
docStored := s.Stored[docNum]
|
||||
docStoredTypes := s.StoredTypes[docNum]
|
||||
docStoredPos := s.StoredPos[docNum]
|
||||
|
||||
// walk each field
|
||||
for i, field := range result.Document.Fields {
|
||||
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||
|
@ -242,7 +240,9 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
tf := result.Analyzed[i]
|
||||
processField(fieldID, field.Name(), l, tf)
|
||||
if field.Options().IsStored() {
|
||||
storeField(docNum, fieldID, encodeFieldType(field), field.Value(), field.ArrayPositions())
|
||||
docStored[fieldID] = append(docStored[fieldID], field.Value())
|
||||
docStoredTypes[fieldID] = append(docStoredTypes[fieldID], encodeFieldType(field))
|
||||
docStoredPos[fieldID] = append(docStoredPos[fieldID], field.ArrayPositions())
|
||||
}
|
||||
|
||||
if field.Options().IncludeDocValues() {
|
||||
|
@ -252,12 +252,14 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
|||
|
||||
// now that its been rolled up into docMap, walk that
|
||||
for fieldID, tokenFrequencies := range docMap {
|
||||
dict := s.Dicts[fieldID]
|
||||
norm := float32(1.0/math.Sqrt(float64(fieldLens[fieldID])))
|
||||
for term, tokenFreq := range tokenFrequencies {
|
||||
pid := s.Dicts[fieldID][term] - 1
|
||||
pid := dict[term] - 1
|
||||
bs := s.Postings[pid]
|
||||
bs.AddInt(int(docNum))
|
||||
s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
|
||||
s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
|
||||
s.Norms[pid] = append(s.Norms[pid], norm)
|
||||
locationBS := s.PostingsLocs[pid]
|
||||
if len(tokenFreq.Locations) > 0 {
|
||||
locationBS.AddInt(int(docNum))
|
||||
|
|
|
@ -73,6 +73,7 @@ type PostingsIterator struct {
|
|||
offset int
|
||||
locoffset int
|
||||
actual roaring.IntIterable
|
||||
reuse Posting
|
||||
}
|
||||
|
||||
// Next returns the next posting on the postings list, or nil at the end
|
||||
|
@ -92,17 +93,16 @@ func (i *PostingsIterator) Next() (segment.Posting, error) {
|
|||
i.offset++
|
||||
allN = i.all.Next()
|
||||
}
|
||||
rv := &Posting{
|
||||
i.reuse = Posting{
|
||||
iterator: i,
|
||||
docNum: uint64(n),
|
||||
offset: i.offset,
|
||||
locoffset: i.locoffset,
|
||||
hasLoc: i.locations.Contains(n),
|
||||
}
|
||||
|
||||
i.locoffset += int(i.postings.dictionary.segment.Freqs[i.postings.postingsID-1][i.offset])
|
||||
i.offset++
|
||||
return rv, nil
|
||||
return &i.reuse, nil
|
||||
}
|
||||
|
||||
// Posting is a single entry in a postings list
|
||||
|
|
|
@ -552,8 +552,7 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
|||
nextPosting, err2 := postingsItr.Next()
|
||||
for err2 == nil && nextPosting != nil {
|
||||
docNum := nextPosting.Number()
|
||||
docTermMap[docNum] = append(docTermMap[docNum], []byte(next.Term)...)
|
||||
docTermMap[docNum] = append(docTermMap[docNum], termSeparator)
|
||||
docTermMap[docNum] = append(append(docTermMap[docNum], []byte(next.Term)...), termSeparator)
|
||||
nextPosting, err2 = postingsItr.Next()
|
||||
}
|
||||
if err2 != nil {
|
||||
|
@ -562,10 +561,10 @@ func persistDocValues(memSegment *mem.Segment, w *CountHashWriter,
|
|||
|
||||
next, err = dictItr.Next()
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// sort wrt to docIDs
|
||||
var docNumbers docIDRange
|
||||
for k := range docTermMap {
|
||||
|
|
Loading…
Reference in New Issue