add support for numPlainTextBytesIndexed metric
This commit is contained in:
parent
81780f97d0
commit
23a323bc9d
|
@ -55,3 +55,18 @@ func (d *Document) GoString() string {
|
|||
}
|
||||
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
|
||||
}
|
||||
|
||||
func (d *Document) NumPlainTextBytes() uint64 {
|
||||
rv := uint64(0)
|
||||
for _, field := range d.Fields {
|
||||
rv += field.NumPlainTextBytes()
|
||||
}
|
||||
for _, compositeField := range d.CompositeFields {
|
||||
for _, field := range d.Fields {
|
||||
if compositeField.includesField(field.Name()) {
|
||||
rv += field.NumPlainTextBytes()
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv
|
||||
}
|
||||
|
|
|
@ -0,0 +1,68 @@
|
|||
// Copyright (c) 2014 Couchbase, Inc.
|
||||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
||||
// except in compliance with the License. You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
// Unless required by applicable law or agreed to in writing, software distributed under the
|
||||
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
||||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
package document
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDocumentNumPlainTextBytes(t *testing.T) {
|
||||
|
||||
tests := []struct {
|
||||
doc *Document
|
||||
num uint64
|
||||
}{
|
||||
{
|
||||
doc: NewDocument("a"),
|
||||
num: 0,
|
||||
},
|
||||
{
|
||||
doc: NewDocument("b").
|
||||
AddField(NewTextField("name", nil, []byte("hello"))),
|
||||
num: 5,
|
||||
},
|
||||
{
|
||||
doc: NewDocument("c").
|
||||
AddField(NewTextField("name", nil, []byte("hello"))).
|
||||
AddField(NewTextField("desc", nil, []byte("x"))),
|
||||
num: 6,
|
||||
},
|
||||
{
|
||||
doc: NewDocument("d").
|
||||
AddField(NewTextField("name", nil, []byte("hello"))).
|
||||
AddField(NewTextField("desc", nil, []byte("x"))).
|
||||
AddField(NewNumericField("age", nil, 1.0)),
|
||||
num: 14,
|
||||
},
|
||||
{
|
||||
doc: NewDocument("e").
|
||||
AddField(NewTextField("name", nil, []byte("hello"))).
|
||||
AddField(NewTextField("desc", nil, []byte("x"))).
|
||||
AddField(NewNumericField("age", nil, 1.0)).
|
||||
AddField(NewCompositeField("_all", true, nil, nil)),
|
||||
num: 28,
|
||||
},
|
||||
{
|
||||
doc: NewDocument("e").
|
||||
AddField(NewTextField("name", nil, []byte("hello"))).
|
||||
AddField(NewTextField("desc", nil, []byte("x"))).
|
||||
AddField(NewNumericField("age", nil, 1.0)).
|
||||
AddField(NewCompositeField("_all", true, nil, []string{"age"})),
|
||||
num: 20,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
actual := test.doc.NumPlainTextBytes()
|
||||
if actual != test.num {
|
||||
t.Errorf("expected doc '%s' to have %d plain text bytes, got %d", test.doc.ID, test.num, actual)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -26,4 +26,9 @@ type Field interface {
|
|||
Options() IndexingOptions
|
||||
Analyze() (int, analysis.TokenFrequencies)
|
||||
Value() []byte
|
||||
|
||||
// NumPlainTextBytes should return the number of plain text bytes
|
||||
// that this field represents - this is a common metric for tracking
|
||||
// the rate of indexing
|
||||
NumPlainTextBytes() uint64
|
||||
}
|
||||
|
|
|
@ -18,10 +18,11 @@ import (
|
|||
const DefaultBooleanIndexingOptions = StoreField | IndexField
|
||||
|
||||
type BooleanField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value []byte
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value []byte
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (b *BooleanField) Name() string {
|
||||
|
@ -66,12 +67,17 @@ func (b *BooleanField) GoString() string {
|
|||
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
|
||||
}
|
||||
|
||||
func (b *BooleanField) NumPlainTextBytes() uint64 {
|
||||
return b.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,14 +86,17 @@ func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField
|
|||
}
|
||||
|
||||
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField {
|
||||
numPlainTextBytes := 5
|
||||
v := []byte("F")
|
||||
if b {
|
||||
numPlainTextBytes = 4
|
||||
v = []byte("T")
|
||||
}
|
||||
return &BooleanField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: v,
|
||||
options: options,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: v,
|
||||
options: options,
|
||||
numPlainTextBytes: uint64(numPlainTextBytes),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -69,7 +69,11 @@ func (c *CompositeField) Value() []byte {
|
|||
return []byte{}
|
||||
}
|
||||
|
||||
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
|
||||
func (c *CompositeField) NumPlainTextBytes() uint64 {
|
||||
return 0
|
||||
}
|
||||
|
||||
func (c *CompositeField) includesField(field string) bool {
|
||||
shouldInclude := c.defaultInclude
|
||||
_, fieldShouldBeIncluded := c.includedFields[field]
|
||||
if fieldShouldBeIncluded {
|
||||
|
@ -79,8 +83,11 @@ func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFr
|
|||
if fieldShouldBeExcluded {
|
||||
shouldInclude = false
|
||||
}
|
||||
return shouldInclude
|
||||
}
|
||||
|
||||
if shouldInclude {
|
||||
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
|
||||
if c.includesField(field) {
|
||||
c.totalLength += length
|
||||
c.compositeFrequencies.MergeAll(field, freq)
|
||||
}
|
||||
|
|
|
@ -25,10 +25,11 @@ var MinTimeRepresentable = time.Unix(0, math.MinInt64)
|
|||
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
|
||||
|
||||
type DateTimeField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (n *DateTimeField) Name() string {
|
||||
|
@ -95,12 +96,17 @@ func (n *DateTimeField) GoString() string {
|
|||
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func (n *DateTimeField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
|
||||
return &DateTimeField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultDateTimeIndexingOptions,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultDateTimeIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -117,6 +123,9 @@ func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, d
|
|||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8),
|
||||
}, nil
|
||||
}
|
||||
return nil, fmt.Errorf("cannot represent %s in this type", dt)
|
||||
|
|
|
@ -21,10 +21,11 @@ const DefaultNumericIndexingOptions = StoreField | IndexField
|
|||
const DefaultPrecisionStep uint = 4
|
||||
|
||||
type NumericField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
value numeric_util.PrefixCoded
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (n *NumericField) Name() string {
|
||||
|
@ -91,12 +92,17 @@ func (n *NumericField) GoString() string {
|
|||
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
|
||||
}
|
||||
|
||||
func (n *NumericField) NumPlainTextBytes() uint64 {
|
||||
return n.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
|
||||
return &NumericField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
value: value,
|
||||
options: DefaultNumericIndexingOptions,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -112,5 +118,8 @@ func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, nu
|
|||
arrayPositions: arrayPositions,
|
||||
value: prefixCoded,
|
||||
options: options,
|
||||
// not correct, just a place holder until we revisit how fields are
|
||||
// represented and can fix this better
|
||||
numPlainTextBytes: uint64(8),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,11 +18,12 @@ import (
|
|||
const DefaultTextIndexingOptions = IndexField
|
||||
|
||||
type TextField struct {
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
analyzer *analysis.Analyzer
|
||||
value []byte
|
||||
name string
|
||||
arrayPositions []uint64
|
||||
options IndexingOptions
|
||||
analyzer *analysis.Analyzer
|
||||
value []byte
|
||||
numPlainTextBytes uint64
|
||||
}
|
||||
|
||||
func (t *TextField) Name() string {
|
||||
|
@ -72,35 +73,42 @@ func (t *TextField) GoString() string {
|
|||
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
|
||||
}
|
||||
|
||||
func (t *TextField) NumPlainTextBytes() uint64 {
|
||||
return t.numPlainTextBytes
|
||||
}
|
||||
|
||||
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
|
||||
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
|
||||
}
|
||||
|
||||
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
value: value,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
value: value,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: DefaultTextIndexingOptions,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: DefaultTextIndexingOptions,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
||||
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
|
||||
return &TextField{
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
name: name,
|
||||
arrayPositions: arrayPositions,
|
||||
options: options,
|
||||
analyzer: analyzer,
|
||||
value: value,
|
||||
numPlainTextBytes: uint64(len(value)),
|
||||
}
|
||||
}
|
||||
|
|
|
@ -146,6 +146,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
|
|||
|
||||
// do analysis before acquiring write lock
|
||||
analysisStart := time.Now()
|
||||
numPlainTextBytes := doc.NumPlainTextBytes()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
aw := index.NewAnalysisWork(f, doc, resultChan)
|
||||
|
||||
|
@ -183,6 +184,7 @@ func (f *Firestorm) Update(doc *document.Document) (err error) {
|
|||
f.dictUpdater.NotifyBatch(dictionaryDeltas)
|
||||
|
||||
atomic.AddUint64(&f.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
return
|
||||
}
|
||||
|
||||
|
@ -302,11 +304,13 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
|
|||
|
||||
var docsUpdated uint64
|
||||
var docsDeleted uint64
|
||||
var numPlainTextBytes uint64
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
doc.Number = firstDocNumber // actually assign doc numbers here
|
||||
firstDocNumber++
|
||||
docsUpdated++
|
||||
numPlainTextBytes += doc.NumPlainTextBytes()
|
||||
} else {
|
||||
docsDeleted++
|
||||
}
|
||||
|
@ -411,6 +415,7 @@ func (f *Firestorm) Batch(batch *index.Batch) (err error) {
|
|||
atomic.AddUint64(&f.stats.updates, docsUpdated)
|
||||
atomic.AddUint64(&f.stats.deletes, docsDeleted)
|
||||
atomic.AddUint64(&f.stats.batches, 1)
|
||||
atomic.AddUint64(&f.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
} else {
|
||||
atomic.AddUint64(&f.stats.errors, 1)
|
||||
}
|
||||
|
|
|
@ -20,6 +20,7 @@ type indexStat struct {
|
|||
analysisTime, indexTime uint64
|
||||
termSearchersStarted uint64
|
||||
termSearchersFinished uint64
|
||||
numPlainTextBytesIndexed uint64
|
||||
kvStats json.Marshaler
|
||||
}
|
||||
|
||||
|
@ -34,6 +35,7 @@ func (i *indexStat) MarshalJSON() ([]byte, error) {
|
|||
m["lookup_queue_len"] = len(i.f.lookuper.workChan)
|
||||
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
|
||||
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
|
||||
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
|
||||
if i.kvStats != nil {
|
||||
m["kv"] = i.kvStats
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ type indexStat struct {
|
|||
analysisTime, indexTime uint64
|
||||
termSearchersStarted uint64
|
||||
termSearchersFinished uint64
|
||||
numPlainTextBytesIndexed uint64
|
||||
kvStats json.Marshaler
|
||||
}
|
||||
|
||||
|
@ -32,6 +33,7 @@ func (i *indexStat) MarshalJSON() ([]byte, error) {
|
|||
m["index_time"] = atomic.LoadUint64(&i.indexTime)
|
||||
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
|
||||
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
|
||||
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
|
||||
if i.kvStats != nil {
|
||||
m["kv"] = i.kvStats
|
||||
}
|
||||
|
|
|
@ -419,6 +419,7 @@ func (udc *UpsideDownCouch) Close() error {
|
|||
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
||||
// do analysis before acquiring write lock
|
||||
analysisStart := time.Now()
|
||||
numPlainTextBytes := doc.NumPlainTextBytes()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||
|
||||
|
@ -493,6 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
if err == nil {
|
||||
atomic.AddUint64(&udc.stats.updates, 1)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
} else {
|
||||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
@ -795,9 +797,11 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
|
||||
|
||||
var numUpdates uint64
|
||||
var numPlainTextBytes uint64
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
numUpdates++
|
||||
numPlainTextBytes += doc.NumPlainTextBytes()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -963,6 +967,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
atomic.AddUint64(&udc.stats.updates, numUpdates)
|
||||
atomic.AddUint64(&udc.stats.deletes, docsDeleted)
|
||||
atomic.AddUint64(&udc.stats.batches, 1)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
} else {
|
||||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue