Merge pull request #734 from steveyen/master
scorch mem segment optimizations
This commit is contained in:
commit
f4c3f984a4
|
@ -24,13 +24,16 @@ import (
|
||||||
"github.com/blevesearch/bleve/index"
|
"github.com/blevesearch/bleve/index"
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewFromAnalyzedDocs places the analyzed document mutations into this segment
|
// NewFromAnalyzedDocs places the analyzed document mutations into a new segment
|
||||||
func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
|
func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
|
||||||
s := New()
|
s := New()
|
||||||
|
|
||||||
// ensure that _id field get fieldID 0
|
// ensure that _id field get fieldID 0
|
||||||
s.getOrDefineField("_id")
|
s.getOrDefineField("_id")
|
||||||
|
|
||||||
|
// fill Dicts/DictKeys and preallocate memory
|
||||||
|
s.initializeDict(results)
|
||||||
|
|
||||||
// walk each doc
|
// walk each doc
|
||||||
for _, result := range results {
|
for _, result := range results {
|
||||||
s.processDocument(result)
|
s.processDocument(result)
|
||||||
|
@ -82,10 +85,117 @@ func NewFromAnalyzedDocs(results []*index.AnalysisResult) *Segment {
|
||||||
return s
|
return s
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// fill Dicts/DictKeys and preallocate memory for postings
|
||||||
|
func (s *Segment) initializeDict(results []*index.AnalysisResult) {
|
||||||
|
var numPostingsLists int
|
||||||
|
|
||||||
|
numTermsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id.
|
||||||
|
numLocsPerPostingsList := make([]int, 0, 64) // Keyed by postings list id.
|
||||||
|
|
||||||
|
var numTokenFrequencies int
|
||||||
|
var totLocs int
|
||||||
|
|
||||||
|
processField := func(fieldID uint16, tfs analysis.TokenFrequencies) {
|
||||||
|
for term, tf := range tfs {
|
||||||
|
pidPlus1, exists := s.Dicts[fieldID][term]
|
||||||
|
if !exists {
|
||||||
|
numPostingsLists++
|
||||||
|
pidPlus1 = uint64(numPostingsLists)
|
||||||
|
s.Dicts[fieldID][term] = pidPlus1
|
||||||
|
s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
|
||||||
|
numTermsPerPostingsList = append(numTermsPerPostingsList, 0)
|
||||||
|
numLocsPerPostingsList = append(numLocsPerPostingsList, 0)
|
||||||
|
}
|
||||||
|
pid := pidPlus1 - 1
|
||||||
|
numTermsPerPostingsList[pid] += 1
|
||||||
|
numLocsPerPostingsList[pid] += len(tf.Locations)
|
||||||
|
totLocs += len(tf.Locations)
|
||||||
|
}
|
||||||
|
numTokenFrequencies += len(tfs)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, result := range results {
|
||||||
|
// walk each composite field
|
||||||
|
for _, field := range result.Document.CompositeFields {
|
||||||
|
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||||
|
_, tf := field.Analyze()
|
||||||
|
processField(fieldID, tf)
|
||||||
|
}
|
||||||
|
|
||||||
|
// walk each field
|
||||||
|
for i, field := range result.Document.Fields {
|
||||||
|
fieldID := uint16(s.getOrDefineField(field.Name()))
|
||||||
|
tf := result.Analyzed[i]
|
||||||
|
processField(fieldID, tf)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
s.Postings = make([]*roaring.Bitmap, numPostingsLists)
|
||||||
|
for i := 0; i < numPostingsLists; i++ {
|
||||||
|
s.Postings[i] = roaring.New()
|
||||||
|
}
|
||||||
|
s.PostingsLocs = make([]*roaring.Bitmap, numPostingsLists)
|
||||||
|
for i := 0; i < numPostingsLists; i++ {
|
||||||
|
s.PostingsLocs[i] = roaring.New()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Preallocate big, contiguous backing arrays.
|
||||||
|
auint64Backing := make([][]uint64, numPostingsLists*4+totLocs) // For Freqs, Locstarts, Locends, Locpos, sub-Locarraypos.
|
||||||
|
uint64Backing := make([]uint64, numTokenFrequencies+totLocs*3) // For sub-Freqs, sub-Locstarts, sub-Locends, sub-Locpos.
|
||||||
|
float32Backing := make([]float32, numTokenFrequencies) // For sub-Norms.
|
||||||
|
uint16Backing := make([]uint16, totLocs) // For sub-Locfields.
|
||||||
|
|
||||||
|
// Point top-level slices to the backing arrays.
|
||||||
|
s.Freqs = auint64Backing[0:numPostingsLists]
|
||||||
|
auint64Backing = auint64Backing[numPostingsLists:]
|
||||||
|
|
||||||
|
s.Norms = make([][]float32, numPostingsLists)
|
||||||
|
|
||||||
|
s.Locfields = make([][]uint16, numPostingsLists)
|
||||||
|
|
||||||
|
s.Locstarts = auint64Backing[0:numPostingsLists]
|
||||||
|
auint64Backing = auint64Backing[numPostingsLists:]
|
||||||
|
|
||||||
|
s.Locends = auint64Backing[0:numPostingsLists]
|
||||||
|
auint64Backing = auint64Backing[numPostingsLists:]
|
||||||
|
|
||||||
|
s.Locpos = auint64Backing[0:numPostingsLists]
|
||||||
|
auint64Backing = auint64Backing[numPostingsLists:]
|
||||||
|
|
||||||
|
s.Locarraypos = make([][][]uint64, numPostingsLists)
|
||||||
|
|
||||||
|
// Point sub-slices to the backing arrays.
|
||||||
|
for pid, numTerms := range numTermsPerPostingsList {
|
||||||
|
s.Freqs[pid] = uint64Backing[0:0]
|
||||||
|
uint64Backing = uint64Backing[numTerms:]
|
||||||
|
|
||||||
|
s.Norms[pid] = float32Backing[0:0]
|
||||||
|
float32Backing = float32Backing[numTerms:]
|
||||||
|
}
|
||||||
|
|
||||||
|
for pid, numLocs := range numLocsPerPostingsList {
|
||||||
|
s.Locfields[pid] = uint16Backing[0:0]
|
||||||
|
uint16Backing = uint16Backing[numLocs:]
|
||||||
|
|
||||||
|
s.Locstarts[pid] = uint64Backing[0:0]
|
||||||
|
uint64Backing = uint64Backing[numLocs:]
|
||||||
|
|
||||||
|
s.Locends[pid] = uint64Backing[0:0]
|
||||||
|
uint64Backing = uint64Backing[numLocs:]
|
||||||
|
|
||||||
|
s.Locpos[pid] = uint64Backing[0:0]
|
||||||
|
uint64Backing = uint64Backing[numLocs:]
|
||||||
|
|
||||||
|
s.Locarraypos[pid] = auint64Backing[0:0]
|
||||||
|
auint64Backing = auint64Backing[numLocs:]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (s *Segment) processDocument(result *index.AnalysisResult) {
|
func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
// used to collate information across fields
|
// used to collate information across fields
|
||||||
docMap := map[uint16]analysis.TokenFrequencies{}
|
docMap := make(map[uint16]analysis.TokenFrequencies, len(s.FieldsMap))
|
||||||
fieldLens := map[uint16]int{}
|
fieldLens := make(map[uint16]int, len(s.FieldsMap))
|
||||||
|
|
||||||
docNum := uint64(s.addDocument())
|
docNum := uint64(s.addDocument())
|
||||||
|
|
||||||
processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) {
|
processField := func(field uint16, name string, l int, tf analysis.TokenFrequencies) {
|
||||||
|
@ -128,81 +238,27 @@ func (s *Segment) processDocument(result *index.AnalysisResult) {
|
||||||
// now that its been rolled up into docMap, walk that
|
// now that its been rolled up into docMap, walk that
|
||||||
for fieldID, tokenFrequencies := range docMap {
|
for fieldID, tokenFrequencies := range docMap {
|
||||||
for term, tokenFreq := range tokenFrequencies {
|
for term, tokenFreq := range tokenFrequencies {
|
||||||
fieldTermPostings := s.Dicts[fieldID][term]
|
pid := s.Dicts[fieldID][term] - 1
|
||||||
|
bs := s.Postings[pid]
|
||||||
// FIXME this if/else block has duplicate code that has resulted in
|
bs.AddInt(int(docNum))
|
||||||
// bugs fixed/missed more than once, need to refactor
|
s.Freqs[pid] = append(s.Freqs[pid], uint64(tokenFreq.Frequency()))
|
||||||
if fieldTermPostings == 0 {
|
s.Norms[pid] = append(s.Norms[pid], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
|
||||||
// need to build new posting
|
locationBS := s.PostingsLocs[pid]
|
||||||
bs := roaring.New()
|
if len(tokenFreq.Locations) > 0 {
|
||||||
bs.AddInt(int(docNum))
|
locationBS.AddInt(int(docNum))
|
||||||
|
|
||||||
newPostingID := uint64(len(s.Postings) + 1)
|
|
||||||
// add this new bitset to the postings slice
|
|
||||||
s.Postings = append(s.Postings, bs)
|
|
||||||
|
|
||||||
locationBS := roaring.New()
|
|
||||||
s.PostingsLocs = append(s.PostingsLocs, locationBS)
|
|
||||||
// add this to the details slice
|
|
||||||
s.Freqs = append(s.Freqs, []uint64{uint64(tokenFreq.Frequency())})
|
|
||||||
s.Norms = append(s.Norms, []float32{float32(1.0 / math.Sqrt(float64(fieldLens[fieldID])))})
|
|
||||||
// add to locations
|
|
||||||
var locfields []uint16
|
|
||||||
var locstarts []uint64
|
|
||||||
var locends []uint64
|
|
||||||
var locpos []uint64
|
|
||||||
var locarraypos [][]uint64
|
|
||||||
if len(tokenFreq.Locations) > 0 {
|
|
||||||
locationBS.AddInt(int(docNum))
|
|
||||||
}
|
|
||||||
for _, loc := range tokenFreq.Locations {
|
for _, loc := range tokenFreq.Locations {
|
||||||
var locf = fieldID
|
var locf = fieldID
|
||||||
if loc.Field != "" {
|
if loc.Field != "" {
|
||||||
locf = uint16(s.getOrDefineField(loc.Field))
|
locf = uint16(s.getOrDefineField(loc.Field))
|
||||||
}
|
}
|
||||||
locfields = append(locfields, locf)
|
s.Locfields[pid] = append(s.Locfields[pid], locf)
|
||||||
locstarts = append(locstarts, uint64(loc.Start))
|
s.Locstarts[pid] = append(s.Locstarts[pid], uint64(loc.Start))
|
||||||
locends = append(locends, uint64(loc.End))
|
s.Locends[pid] = append(s.Locends[pid], uint64(loc.End))
|
||||||
locpos = append(locpos, uint64(loc.Position))
|
s.Locpos[pid] = append(s.Locpos[pid], uint64(loc.Position))
|
||||||
if len(loc.ArrayPositions) > 0 {
|
if len(loc.ArrayPositions) > 0 {
|
||||||
locarraypos = append(locarraypos, loc.ArrayPositions)
|
s.Locarraypos[pid] = append(s.Locarraypos[pid], loc.ArrayPositions)
|
||||||
} else {
|
} else {
|
||||||
locarraypos = append(locarraypos, nil)
|
s.Locarraypos[pid] = append(s.Locarraypos[pid], nil)
|
||||||
}
|
|
||||||
}
|
|
||||||
s.Locfields = append(s.Locfields, locfields)
|
|
||||||
s.Locstarts = append(s.Locstarts, locstarts)
|
|
||||||
s.Locends = append(s.Locends, locends)
|
|
||||||
s.Locpos = append(s.Locpos, locpos)
|
|
||||||
s.Locarraypos = append(s.Locarraypos, locarraypos)
|
|
||||||
// record it
|
|
||||||
s.Dicts[fieldID][term] = newPostingID
|
|
||||||
// this term was new for this field, add it to dictKeys
|
|
||||||
s.DictKeys[fieldID] = append(s.DictKeys[fieldID], term)
|
|
||||||
} else {
|
|
||||||
// posting already started for this field/term
|
|
||||||
// the actual offset is - 1, because 0 is zero value
|
|
||||||
bs := s.Postings[fieldTermPostings-1]
|
|
||||||
bs.AddInt(int(docNum))
|
|
||||||
locationBS := s.PostingsLocs[fieldTermPostings-1]
|
|
||||||
s.Freqs[fieldTermPostings-1] = append(s.Freqs[fieldTermPostings-1], uint64(tokenFreq.Frequency()))
|
|
||||||
s.Norms[fieldTermPostings-1] = append(s.Norms[fieldTermPostings-1], float32(1.0/math.Sqrt(float64(fieldLens[fieldID]))))
|
|
||||||
if len(tokenFreq.Locations) > 0 {
|
|
||||||
locationBS.AddInt(int(docNum))
|
|
||||||
}
|
|
||||||
for _, loc := range tokenFreq.Locations {
|
|
||||||
var locf = fieldID
|
|
||||||
if loc.Field != "" {
|
|
||||||
locf = uint16(s.getOrDefineField(loc.Field))
|
|
||||||
}
|
|
||||||
s.Locfields[fieldTermPostings-1] = append(s.Locfields[fieldTermPostings-1], locf)
|
|
||||||
s.Locstarts[fieldTermPostings-1] = append(s.Locstarts[fieldTermPostings-1], uint64(loc.Start))
|
|
||||||
s.Locends[fieldTermPostings-1] = append(s.Locends[fieldTermPostings-1], uint64(loc.End))
|
|
||||||
s.Locpos[fieldTermPostings-1] = append(s.Locpos[fieldTermPostings-1], uint64(loc.Position))
|
|
||||||
if len(loc.ArrayPositions) > 0 {
|
|
||||||
s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], loc.ArrayPositions)
|
|
||||||
} else {
|
|
||||||
s.Locarraypos[fieldTermPostings-1] = append(s.Locarraypos[fieldTermPostings-1], nil)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,7 @@ type Segment struct {
|
||||||
FieldsInv []string
|
FieldsInv []string
|
||||||
|
|
||||||
// term dictionary
|
// term dictionary
|
||||||
// field id -> term -> posting id + 1
|
// field id -> term -> postings list id + 1
|
||||||
Dicts []map[string]uint64
|
Dicts []map[string]uint64
|
||||||
|
|
||||||
// term dictionary keys
|
// term dictionary keys
|
||||||
|
@ -54,7 +54,7 @@ type Segment struct {
|
||||||
DictKeys [][]string
|
DictKeys [][]string
|
||||||
|
|
||||||
// Postings list
|
// Postings list
|
||||||
// Postings list id -> Postings bitmap
|
// postings list id -> Postings bitmap
|
||||||
Postings []*roaring.Bitmap
|
Postings []*roaring.Bitmap
|
||||||
|
|
||||||
// Postings List has locations
|
// Postings List has locations
|
||||||
|
@ -188,9 +188,11 @@ func (s *Segment) VisitDocument(num uint64, visitor segment.DocumentFieldValueVi
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
docFields := s.Stored[int(num)]
|
docFields := s.Stored[int(num)]
|
||||||
|
st := s.StoredTypes[int(num)]
|
||||||
|
sp := s.StoredPos[int(num)]
|
||||||
for field, values := range docFields {
|
for field, values := range docFields {
|
||||||
for i, value := range values {
|
for i, value := range values {
|
||||||
keepGoing := visitor(s.FieldsInv[field], s.StoredTypes[int(num)][field][i], value, s.StoredPos[int(num)][field][i])
|
keepGoing := visitor(s.FieldsInv[field], st[field][i], value, sp[field][i])
|
||||||
if !keepGoing {
|
if !keepGoing {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -140,12 +140,18 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
|
||||||
|
|
||||||
metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
|
metaEncoder := govarint.NewU64Base128Encoder(&metaBuf)
|
||||||
|
|
||||||
|
st := memSegment.StoredTypes[docNum]
|
||||||
|
sp := memSegment.StoredPos[docNum]
|
||||||
|
|
||||||
// encode fields in order
|
// encode fields in order
|
||||||
for fieldID := range memSegment.FieldsInv {
|
for fieldID := range memSegment.FieldsInv {
|
||||||
if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
|
if storedFieldValues, ok := storedValues[uint16(fieldID)]; ok {
|
||||||
// has stored values for this field
|
// has stored values for this field
|
||||||
num := len(storedFieldValues)
|
num := len(storedFieldValues)
|
||||||
|
|
||||||
|
stf := st[uint16(fieldID)]
|
||||||
|
spf := sp[uint16(fieldID)]
|
||||||
|
|
||||||
// process each value
|
// process each value
|
||||||
for i := 0; i < num; i++ {
|
for i := 0; i < num; i++ {
|
||||||
// encode field
|
// encode field
|
||||||
|
@ -154,7 +160,7 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
|
||||||
return 0, err2
|
return 0, err2
|
||||||
}
|
}
|
||||||
// encode type
|
// encode type
|
||||||
_, err2 = metaEncoder.PutU64(uint64(memSegment.StoredTypes[docNum][uint16(fieldID)][i]))
|
_, err2 = metaEncoder.PutU64(uint64(stf[i]))
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
return 0, err2
|
return 0, err2
|
||||||
}
|
}
|
||||||
|
@ -169,13 +175,13 @@ func persistStored(memSegment *mem.Segment, w *CountHashWriter) (uint64, error)
|
||||||
return 0, err2
|
return 0, err2
|
||||||
}
|
}
|
||||||
// encode number of array pos
|
// encode number of array pos
|
||||||
_, err2 = metaEncoder.PutU64(uint64(len(memSegment.StoredPos[docNum][uint16(fieldID)][i])))
|
_, err2 = metaEncoder.PutU64(uint64(len(spf[i])))
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
return 0, err2
|
return 0, err2
|
||||||
}
|
}
|
||||||
// encode all array positions
|
// encode all array positions
|
||||||
for j := 0; j < len(memSegment.StoredPos[docNum][uint16(fieldID)][i]); j++ {
|
for _, pos := range spf[i] {
|
||||||
_, err2 = metaEncoder.PutU64(memSegment.StoredPos[docNum][uint16(fieldID)][i][j])
|
_, err2 = metaEncoder.PutU64(pos)
|
||||||
if err2 != nil {
|
if err2 != nil {
|
||||||
return 0, err2
|
return 0, err2
|
||||||
}
|
}
|
||||||
|
@ -235,6 +241,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
if postingID != 0 {
|
if postingID != 0 {
|
||||||
tfEncoder.Reset()
|
tfEncoder.Reset()
|
||||||
}
|
}
|
||||||
|
freqs := memSegment.Freqs[postingID]
|
||||||
|
norms := memSegment.Norms[postingID]
|
||||||
postingsListItr := memSegment.Postings[postingID].Iterator()
|
postingsListItr := memSegment.Postings[postingID].Iterator()
|
||||||
var offset int
|
var offset int
|
||||||
for postingsListItr.HasNext() {
|
for postingsListItr.HasNext() {
|
||||||
|
@ -242,13 +250,13 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
docNum := uint64(postingsListItr.Next())
|
docNum := uint64(postingsListItr.Next())
|
||||||
|
|
||||||
// put freq
|
// put freq
|
||||||
err := tfEncoder.Add(docNum, memSegment.Freqs[postingID][offset])
|
err := tfEncoder.Add(docNum, freqs[offset])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// put norm
|
// put norm
|
||||||
norm := memSegment.Norms[postingID][offset]
|
norm := norms[offset]
|
||||||
normBits := math.Float32bits(norm)
|
normBits := math.Float32bits(norm)
|
||||||
err = tfEncoder.Add(docNum, uint64(normBits))
|
err = tfEncoder.Add(docNum, uint64(normBits))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -275,40 +283,46 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
if postingID != 0 {
|
if postingID != 0 {
|
||||||
locEncoder.Reset()
|
locEncoder.Reset()
|
||||||
}
|
}
|
||||||
|
freqs := memSegment.Freqs[postingID]
|
||||||
|
locfields := memSegment.Locfields[postingID]
|
||||||
|
locpos := memSegment.Locpos[postingID]
|
||||||
|
locstarts := memSegment.Locstarts[postingID]
|
||||||
|
locends := memSegment.Locends[postingID]
|
||||||
|
locarraypos := memSegment.Locarraypos[postingID]
|
||||||
postingsListItr := memSegment.Postings[postingID].Iterator()
|
postingsListItr := memSegment.Postings[postingID].Iterator()
|
||||||
var offset int
|
var offset int
|
||||||
var locOffset int
|
var locOffset int
|
||||||
for postingsListItr.HasNext() {
|
for postingsListItr.HasNext() {
|
||||||
docNum := uint64(postingsListItr.Next())
|
docNum := uint64(postingsListItr.Next())
|
||||||
for i := 0; i < int(memSegment.Freqs[postingID][offset]); i++ {
|
for i := 0; i < int(freqs[offset]); i++ {
|
||||||
if len(memSegment.Locfields[postingID]) > 0 {
|
if len(locfields) > 0 {
|
||||||
// put field
|
// put field
|
||||||
err := locEncoder.Add(docNum, uint64(memSegment.Locfields[postingID][locOffset]))
|
err := locEncoder.Add(docNum, uint64(locfields[locOffset]))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// put pos
|
// put pos
|
||||||
|
|
||||||
err = locEncoder.Add(docNum, memSegment.Locpos[postingID][locOffset])
|
err = locEncoder.Add(docNum, locpos[locOffset])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// put start
|
// put start
|
||||||
err = locEncoder.Add(docNum, memSegment.Locstarts[postingID][locOffset])
|
err = locEncoder.Add(docNum, locstarts[locOffset])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// put end
|
// put end
|
||||||
err = locEncoder.Add(docNum, memSegment.Locends[postingID][locOffset])
|
err = locEncoder.Add(docNum, locends[locOffset])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
// put array positions
|
// put array positions
|
||||||
num := len(memSegment.Locarraypos[postingID][locOffset])
|
num := len(locarraypos[locOffset])
|
||||||
|
|
||||||
// put the number of array positions to follow
|
// put the number of array positions to follow
|
||||||
err = locEncoder.Add(docNum, uint64(num))
|
err = locEncoder.Add(docNum, uint64(num))
|
||||||
|
@ -317,8 +331,8 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
}
|
}
|
||||||
|
|
||||||
// put each array position
|
// put each array position
|
||||||
for j := 0; j < num; j++ {
|
for _, pos := range locarraypos[locOffset] {
|
||||||
err = locEncoder.Add(docNum, memSegment.Locarraypos[postingID][locOffset][j])
|
err = locEncoder.Add(docNum, pos)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, err
|
return nil, nil, err
|
||||||
}
|
}
|
||||||
|
@ -341,6 +355,7 @@ func persistPostingDetails(memSegment *mem.Segment, w *CountHashWriter, chunkFac
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint64, err error) {
|
||||||
|
rv = make([]uint64, 0, len(memSegment.PostingsLocs))
|
||||||
for postingID := range memSegment.PostingsLocs {
|
for postingID := range memSegment.PostingsLocs {
|
||||||
// record where we start this posting loc
|
// record where we start this posting loc
|
||||||
rv = append(rv, uint64(w.Count()))
|
rv = append(rv, uint64(w.Count()))
|
||||||
|
@ -355,6 +370,7 @@ func persistPostingsLocs(memSegment *mem.Segment, w *CountHashWriter) (rv []uint
|
||||||
|
|
||||||
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
|
postingsListLocs, freqOffsets, locOffsets []uint64) (rv []uint64, err error) {
|
||||||
|
rv = make([]uint64, 0, len(memSegment.Postings))
|
||||||
for postingID := range memSegment.Postings {
|
for postingID := range memSegment.Postings {
|
||||||
// record where we start this posting list
|
// record where we start this posting list
|
||||||
rv = append(rv, uint64(w.Count()))
|
rv = append(rv, uint64(w.Count()))
|
||||||
|
@ -376,7 +392,7 @@ func persistPostingsLists(memSegment *mem.Segment, w *CountHashWriter,
|
||||||
}
|
}
|
||||||
|
|
||||||
func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
|
func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs []uint64) ([]uint64, error) {
|
||||||
var rv []uint64
|
rv := make([]uint64, 0, len(memSegment.DictKeys))
|
||||||
|
|
||||||
var buffer bytes.Buffer
|
var buffer bytes.Buffer
|
||||||
for fieldID, fieldTerms := range memSegment.DictKeys {
|
for fieldID, fieldTerms := range memSegment.DictKeys {
|
||||||
|
@ -392,10 +408,10 @@ func persistDictionary(memSegment *mem.Segment, w *CountHashWriter, postingsLocs
|
||||||
|
|
||||||
dict := memSegment.Dicts[fieldID]
|
dict := memSegment.Dicts[fieldID]
|
||||||
// now walk the dictionary in order of fieldTerms (already sorted)
|
// now walk the dictionary in order of fieldTerms (already sorted)
|
||||||
for i := range fieldTerms {
|
for _, fieldTerm := range fieldTerms {
|
||||||
postingID := dict[fieldTerms[i]] - 1
|
postingID := dict[fieldTerm] - 1
|
||||||
postingsAddr := postingsLocs[postingID]
|
postingsAddr := postingsLocs[postingID]
|
||||||
err = builder.Insert([]byte(fieldTerms[i]), postingsAddr)
|
err = builder.Insert([]byte(fieldTerm), postingsAddr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue