0
0

Merge pull request #316 from steveyen/WIP-perf-20160110

upside-down backindex read concurrency
This commit is contained in:
Marty Schoch 2016-01-12 12:53:37 -05:00
commit c3ddf038ab
2 changed files with 205 additions and 136 deletions

View File

@ -346,6 +346,7 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
}
indexStart := time.Now()
// start a writer for this batch
var kvwriter store.KVWriter
kvwriter, err = f.store.Writer()
@ -362,10 +363,12 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
}
f.compensator.MutateBatch(inflightItems, lastDocNumber)
err = kvwriter.Close()
f.lookuper.NotifyBatch(inflightItems)
f.dictUpdater.NotifyBatch(dictionaryDeltas)
err = kvwriter.Close()
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {

View File

@ -10,6 +10,7 @@
package upside_down
import (
"encoding/binary"
"encoding/json"
"fmt"
"math"
@ -60,6 +61,12 @@ type UpsideDownCouch struct {
writeMutex sync.Mutex
}
type docBackIndexRow struct {
docID string
doc *document.Document // If deletion, doc will be nil.
backIndexRow *BackIndexRow
}
func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
return &UpsideDownCouch{
version: Version,
@ -72,13 +79,12 @@ func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, an
}
func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) {
// prepare a list of rows
rows := make([]UpsideDownCouchRow, 0)
// version marker
rows = append(rows, NewVersionRow(udc.version))
rowsAll := [][]UpsideDownCouchRow{
[]UpsideDownCouchRow{NewVersionRow(udc.version)},
}
err = udc.batchRows(kvwriter, nil, rows, nil)
err = udc.batchRows(kvwriter, nil, rowsAll, nil)
return
}
@ -135,7 +141,7 @@ func PutRowBuffer(buf []byte) {
rowBufferPool.Put(buf)
}
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) (err error) {
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
// prepare batch
wb := writer.NewBatch()
@ -146,68 +152,84 @@ func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRows []UpsideDow
// buffer to work with
rowBuf := GetRowBuffer()
dictionaryDeltas := make(map[string]int64)
// add
for _, row := range addRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
for _, addRows := range addRowsAll {
for _, row := range addRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf[:dictKeySize])] += 1
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
if row.KeySize()+row.ValueSize() > len(rowBuf) {
rowBuf = make([]byte, row.KeySize()+row.ValueSize())
}
keySize, err := row.KeyTo(rowBuf)
if err != nil {
return err
}
wb.Merge(rowBuf[:dictKeySize], dictionaryTermIncr)
valSize, err := row.ValueTo(rowBuf[keySize:])
wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
}
if row.KeySize()+row.ValueSize() > len(rowBuf) {
rowBuf = make([]byte, row.KeySize()+row.ValueSize())
}
keySize, err := row.KeyTo(rowBuf)
if err != nil {
return err
}
valSize, err := row.ValueTo(rowBuf[keySize:])
wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
}
// update
for _, row := range updateRows {
if row.KeySize()+row.ValueSize() > len(rowBuf) {
rowBuf = make([]byte, row.KeySize()+row.ValueSize())
}
keySize, err := row.KeyTo(rowBuf)
if err != nil {
return err
}
valSize, err := row.ValueTo(rowBuf[keySize:])
if err != nil {
return err
}
wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
}
// delete
for _, row := range deleteRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
for _, updateRows := range updateRowsAll {
for _, row := range updateRows {
if row.KeySize()+row.ValueSize() > len(rowBuf) {
rowBuf = make([]byte, row.KeySize()+row.ValueSize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
keySize, err := row.KeyTo(rowBuf)
if err != nil {
return err
}
wb.Merge(rowBuf[:dictKeySize], dictionaryTermDecr)
valSize, err := row.ValueTo(rowBuf[keySize:])
if err != nil {
return err
}
wb.Set(rowBuf[:keySize], rowBuf[keySize:keySize+valSize])
}
if row.KeySize()+row.ValueSize() > len(rowBuf) {
rowBuf = make([]byte, row.KeySize()+row.ValueSize())
}
// delete
for _, deleteRows := range deleteRowsAll {
for _, row := range deleteRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
if tfr.DictionaryRowKeySize() > len(rowBuf) {
rowBuf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf[:dictKeySize])] -= 1
}
if row.KeySize()+row.ValueSize() > len(rowBuf) {
rowBuf = make([]byte, row.KeySize()+row.ValueSize())
}
keySize, err := row.KeyTo(rowBuf)
if err != nil {
return err
}
wb.Delete(rowBuf[:keySize])
}
keySize, err := row.KeyTo(rowBuf)
if err != nil {
return err
}
wb.Delete(rowBuf[:keySize])
}
if 8 > len(rowBuf) {
rowBuf = make([]byte, 8)
}
for dictRowKey, delta := range dictionaryDeltas {
binary.LittleEndian.PutUint64(rowBuf, uint64(delta))
wb.Merge([]byte(dictRowKey), rowBuf[0:8])
}
PutRowBuffer(rowBuf)
@ -395,13 +417,22 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
}()
// prepare a list of rows
addRows := make([]UpsideDownCouchRow, 0)
updateRows := make([]UpsideDownCouchRow, 0)
deleteRows := make([]UpsideDownCouchRow, 0)
var addRowsAll [][]UpsideDownCouchRow
var updateRowsAll [][]UpsideDownCouchRow
var deleteRowsAll [][]UpsideDownCouchRow
addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, result.Rows, addRows, updateRows, deleteRows)
addRows, updateRows, deleteRows := udc.mergeOldAndNew(backIndexRow, result.Rows)
if len(addRows) > 0 {
addRowsAll = append(addRowsAll, addRows)
}
if len(updateRows) > 0 {
updateRowsAll = append(updateRowsAll, updateRows)
}
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows)
err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
if err == nil && backIndexRow == nil {
udc.m.Lock()
udc.docCount++
@ -416,7 +447,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return
}
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow, addRows, updateRows, deleteRows []UpsideDownCouchRow) ([]UpsideDownCouchRow, []UpsideDownCouchRow, []UpsideDownCouchRow) {
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []index.IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) {
addRows = make([]UpsideDownCouchRow, 0, len(rows))
updateRows = make([]UpsideDownCouchRow, 0, len(rows))
deleteRows = make([]UpsideDownCouchRow, 0, len(rows))
existingTermKeys := make(map[string]bool)
for _, key := range backIndexRow.AllTermKeys() {
existingTermKeys[string(key)] = true
@ -570,10 +605,14 @@ func (udc *UpsideDownCouch) Delete(id string) (err error) {
}
}()
deleteRows := make([]UpsideDownCouchRow, 0)
deleteRows = udc.deleteSingle(id, backIndexRow, deleteRows)
var deleteRowsAll [][]UpsideDownCouchRow
err = udc.batchRows(kvwriter, nil, nil, deleteRows)
deleteRows := udc.deleteSingle(id, backIndexRow, nil)
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll)
if err == nil {
udc.m.Lock()
udc.docCount--
@ -635,20 +674,6 @@ func (udc *UpsideDownCouch) backIndexRowForDoc(kvreader store.KVReader, docID st
return backIndexRow, nil
}
func (udc *UpsideDownCouch) backIndexRowsForBatch(kvreader store.KVReader, batch *index.Batch) (map[string]*BackIndexRow, error) {
// FIXME faster to order the ids and scan sequentially
// for now just get it working
rv := make(map[string]*BackIndexRow, 0)
for docID := range batch.IndexOps {
backIndexRow, err := udc.backIndexRowForDoc(kvreader, docID)
if err != nil {
return nil, err
}
rv[docID] = backIndexRow
}
return rv, nil
}
func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document.Field {
switch typ {
case 't':
@ -710,7 +735,8 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult)
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
var numUpdates uint64
for _, doc := range batch.IndexOps {
@ -740,8 +766,43 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}()
// retrieve back index rows concurrent with analysis
docBackIndexRowErr := error(nil)
docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps))
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
go func() {
defer close(docBackIndexRowCh)
// open a reader for backindex lookup
var kvreader store.KVReader
kvreader, err = udc.store.Reader()
if err != nil {
docBackIndexRowErr = err
return
}
for docID, doc := range batch.IndexOps {
backIndexRow, err := udc.backIndexRowForDoc(kvreader, docID)
if err != nil {
docBackIndexRowErr = err
return
}
docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
}
err = kvreader.Close()
if err != nil {
docBackIndexRowErr = err
return
}
}()
// wait for analysis result
newRowsMap := make(map[string][]index.IndexRow)
// wait for the result
var itemsDeQueued uint64
for itemsDeQueued < numUpdates {
result := <-resultChan
@ -750,68 +811,22 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
close(resultChan)
detectedUnsafeMutex.RLock()
defer detectedUnsafeMutex.RUnlock()
if detectedUnsafe {
return UnsafeBatchUseDetected
}
atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))
indexStart := time.Now()
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
// open a reader for backindex lookup
var kvreader store.KVReader
kvreader, err = udc.store.Reader()
if err != nil {
return
}
// first lookup all the back index rows
var backIndexRows map[string]*BackIndexRow
backIndexRows, err = udc.backIndexRowsForBatch(kvreader, batch)
if err != nil {
_ = kvreader.Close()
return
}
err = kvreader.Close()
if err != nil {
return
}
// start a writer for this batch
var kvwriter store.KVWriter
kvwriter, err = udc.store.Writer()
if err != nil {
return
}
// prepare a list of rows
addRows := make([]UpsideDownCouchRow, 0)
updateRows := make([]UpsideDownCouchRow, 0)
deleteRows := make([]UpsideDownCouchRow, 0)
docsAdded := uint64(0)
docsDeleted := uint64(0)
for docID, doc := range batch.IndexOps {
backIndexRow := backIndexRows[docID]
if doc == nil && backIndexRow != nil {
// delete
deleteRows = udc.deleteSingle(docID, backIndexRow, deleteRows)
docsDeleted++
} else if doc != nil {
addRows, updateRows, deleteRows = udc.mergeOldAndNew(backIndexRow, newRowsMap[docID], addRows, updateRows, deleteRows)
if backIndexRow == nil {
docsAdded++
}
}
}
indexStart := time.Now()
// prepare a list of rows
var addRowsAll [][]UpsideDownCouchRow
var updateRowsAll [][]UpsideDownCouchRow
var deleteRowsAll [][]UpsideDownCouchRow
// add the internal ops
var updateRows []UpsideDownCouchRow
var deleteRows []UpsideDownCouchRow
for internalKey, internalValue := range batch.InternalOps {
if internalValue == nil {
// delete
@ -823,7 +838,57 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}
err = udc.batchRows(kvwriter, addRows, updateRows, deleteRows)
if len(updateRows) > 0 {
updateRowsAll = append(updateRowsAll, updateRows)
}
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
// process back index rows as they arrive
for dbir := range docBackIndexRowCh {
if dbir.doc == nil && dbir.backIndexRow != nil {
// delete
deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil)
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
docsDeleted++
} else if dbir.doc != nil {
addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID])
if len(addRows) > 0 {
addRowsAll = append(addRowsAll, addRows)
}
if len(updateRows) > 0 {
updateRowsAll = append(updateRowsAll, updateRows)
}
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
if dbir.backIndexRow == nil {
docsAdded++
}
}
}
if docBackIndexRowErr != nil {
return docBackIndexRowErr
}
detectedUnsafeMutex.RLock()
defer detectedUnsafeMutex.RUnlock()
if detectedUnsafe {
return UnsafeBatchUseDetected
}
// start a writer for this batch
var kvwriter store.KVWriter
kvwriter, err = udc.store.Writer()
if err != nil {
return
}
err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
if err != nil {
_ = kvwriter.Close()
atomic.AddUint64(&udc.stats.errors, 1)
@ -831,6 +896,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
err = kvwriter.Close()
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {