0
0
Fork 0

add support for numPlainTextBytesIndexed metric

This commit is contained in:
Marty Schoch 2016-03-05 14:05:08 -05:00
parent 81780f97d0
commit 23a323bc9d
12 changed files with 193 additions and 49 deletions

View File

@ -55,3 +55,18 @@ func (d *Document) GoString() string {
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
}
func (d *Document) NumPlainTextBytes() uint64 {
rv := uint64(0)
for _, field := range d.Fields {
rv += field.NumPlainTextBytes()
}
for _, compositeField := range d.CompositeFields {
for _, field := range d.Fields {
if compositeField.includesField(field.Name()) {
rv += field.NumPlainTextBytes()
}
}
}
return rv
}

68
document/document_test.go Normal file
View File

@ -0,0 +1,68 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package document
import (
"testing"
)
func TestDocumentNumPlainTextBytes(t *testing.T) {
tests := []struct {
doc *Document
num uint64
}{
{
doc: NewDocument("a"),
num: 0,
},
{
doc: NewDocument("b").
AddField(NewTextField("name", nil, []byte("hello"))),
num: 5,
},
{
doc: NewDocument("c").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))),
num: 6,
},
{
doc: NewDocument("d").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)),
num: 14,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, nil)),
num: 28,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, []string{"age"})),
num: 20,
},
}
for _, test := range tests {
actual := test.doc.NumPlainTextBytes()
if actual != test.num {
t.Errorf("expected doc '%s' to have %d plain text bytes, got %d", test.doc.ID, test.num, actual)
}
}
}

View File

@ -26,4 +26,9 @@ type Field interface {
Options() IndexingOptions
Analyze() (int, analysis.TokenFrequencies)
Value() []byte
// NumPlainTextBytes should return the number of plain text bytes
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
}

View File

@ -18,10 +18,11 @@ import (
const DefaultBooleanIndexingOptions = StoreField | IndexField
type BooleanField struct {
name string
arrayPositions []uint64
options IndexingOptions
value []byte
name string
arrayPositions []uint64
options IndexingOptions
value []byte
numPlainTextBytes uint64
}
func (b *BooleanField) Name() string {
@ -66,12 +67,17 @@ func (b *BooleanField) GoString() string {
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
}
func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
@ -80,14 +86,17 @@ func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField
}
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField {
numPlainTextBytes := 5
v := []byte("F")
if b {
numPlainTextBytes = 4
v = []byte("T")
}
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: uint64(numPlainTextBytes),
}
}

View File

@ -69,7 +69,11 @@ func (c *CompositeField) Value() []byte {
return []byte{}
}
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
func (c *CompositeField) NumPlainTextBytes() uint64 {
return 0
}
func (c *CompositeField) includesField(field string) bool {
shouldInclude := c.defaultInclude
_, fieldShouldBeIncluded := c.includedFields[field]
if fieldShouldBeIncluded {
@ -79,8 +83,11 @@ func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFr
if fieldShouldBeExcluded {
shouldInclude = false
}
return shouldInclude
}
if shouldInclude {
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
if c.includesField(field) {
c.totalLength += length
c.compositeFrequencies.MergeAll(field, freq)
}

View File

@ -25,10 +25,11 @@ var MinTimeRepresentable = time.Unix(0, math.MinInt64)
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
type DateTimeField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
numPlainTextBytes uint64
}
func (n *DateTimeField) Name() string {
@ -95,12 +96,17 @@ func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
@ -117,6 +123,9 @@ func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, d
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}
return nil, fmt.Errorf("cannot represent %s in this type", dt)

View File

@ -21,10 +21,11 @@ const DefaultNumericIndexingOptions = StoreField | IndexField
const DefaultPrecisionStep uint = 4
type NumericField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
name string
arrayPositions []uint64
options IndexingOptions
value numeric_util.PrefixCoded
numPlainTextBytes uint64
}
func (n *NumericField) Name() string {
@ -91,12 +92,17 @@ func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
@ -112,5 +118,8 @@ func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, nu
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

View File

@ -18,11 +18,12 @@ import (
const DefaultTextIndexingOptions = IndexField
type TextField struct {
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
numPlainTextBytes uint64
}
func (t *TextField) Name() string {
@ -72,35 +73,42 @@ func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}

View File

@ -146,6 +146,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(f, doc, resultChan)
@ -183,6 +184,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
f.dictUpdater.NotifyBatch(dictionaryDeltas)
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
return
}
@ -302,11 +304,13 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
var docsUpdated uint64
var docsDeleted uint64
var numPlainTextBytes uint64
for _, doc := range batch.IndexOps {
if doc != nil {
doc.Number = firstDocNumber // actually assign doc numbers here
firstDocNumber++
docsUpdated++
numPlainTextBytes += doc.NumPlainTextBytes()
} else {
docsDeleted++
}
@ -411,6 +415,7 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&f.stats.updates, docsUpdated)
atomic.AddUint64(&f.stats.deletes, docsDeleted)
atomic.AddUint64(&f.stats.batches, 1)
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&f.stats.errors, 1)
}

View File

@ -20,6 +20,7 @@ type indexStat struct {
analysisTime, indexTime uint64
termSearchersStarted uint64
termSearchersFinished uint64
numPlainTextBytesIndexed uint64
kvStats json.Marshaler
}
@ -34,6 +35,7 @@ func (i *indexStat) MarshalJSON() ([]byte, error) {
m["lookup_queue_len"] = len(i.f.lookuper.workChan)
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
if i.kvStats != nil {
m["kv"] = i.kvStats
}

View File

@ -19,6 +19,7 @@ type indexStat struct {
analysisTime, indexTime uint64
termSearchersStarted uint64
termSearchersFinished uint64
numPlainTextBytesIndexed uint64
kvStats json.Marshaler
}
@ -32,6 +33,7 @@ func (i *indexStat) MarshalJSON() ([]byte, error) {
m["index_time"] = atomic.LoadUint64(&i.indexTime)
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
if i.kvStats != nil {
m["kv"] = i.kvStats
}

View File

@ -419,6 +419,7 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -493,6 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@ -795,9 +797,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
var numUpdates uint64
var numPlainTextBytes uint64
for _, doc := range batch.IndexOps {
if doc != nil {
numUpdates++
numPlainTextBytes += doc.NumPlainTextBytes()
}
}
@ -963,6 +967,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&udc.stats.updates, numUpdates)
atomic.AddUint64(&udc.stats.deletes, docsDeleted)
atomic.AddUint64(&udc.stats.batches, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}