2014-04-17 22:55:53 +02:00
|
|
|
// Copyright (c) 2014 Couchbase, Inc.
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
|
|
// and limitations under the License.
|
2014-09-02 16:54:50 +02:00
|
|
|
|
2014-04-17 22:55:53 +02:00
|
|
|
package index
|
|
|
|
|
|
|
|
import (
|
2014-10-02 20:12:22 +02:00
|
|
|
"encoding/json"
|
2015-04-08 16:41:42 +02:00
|
|
|
"fmt"
|
2014-10-02 20:12:22 +02:00
|
|
|
|
2014-08-28 21:38:57 +02:00
|
|
|
"github.com/blevesearch/bleve/document"
|
2014-04-17 22:55:53 +02:00
|
|
|
)
|
|
|
|
|
2015-09-23 20:25:47 +02:00
|
|
|
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
|
|
|
|
|
2014-04-17 22:55:53 +02:00
|
|
|
type Index interface {
|
|
|
|
Open() error
|
2014-10-31 14:40:23 +01:00
|
|
|
Close() error
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2014-10-31 14:40:23 +01:00
|
|
|
DocCount() (uint64, error)
|
2014-09-12 23:21:35 +02:00
|
|
|
|
2014-04-17 22:55:53 +02:00
|
|
|
Update(doc *document.Document) error
|
|
|
|
Delete(id string) error
|
2014-10-31 14:40:23 +01:00
|
|
|
Batch(batch *Batch) error
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2014-09-12 23:21:35 +02:00
|
|
|
SetInternal(key, val []byte) error
|
|
|
|
DeleteInternal(key []byte) error
|
|
|
|
|
|
|
|
DumpAll() chan interface{}
|
|
|
|
DumpDoc(id string) chan interface{}
|
|
|
|
DumpFields() chan interface{}
|
|
|
|
|
2015-10-17 18:40:26 +02:00
|
|
|
// Reader returns a low-level accessor on the index data. Close it to
|
|
|
|
// release associated resources.
|
2014-10-31 14:40:23 +01:00
|
|
|
Reader() (IndexReader, error)
|
2014-10-02 20:12:22 +02:00
|
|
|
|
|
|
|
Stats() json.Marshaler
|
2015-09-02 19:12:08 +02:00
|
|
|
|
|
|
|
Analyze(d *document.Document) *AnalysisResult
|
2014-09-12 23:21:35 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type IndexReader interface {
|
2014-04-17 22:55:53 +02:00
|
|
|
TermFieldReader(term []byte, field string) (TermFieldReader, error)
|
2015-10-17 18:40:26 +02:00
|
|
|
|
2015-10-18 10:56:20 +02:00
|
|
|
// DocIDReader returns an iterator over documents which identifiers are
|
2015-10-23 18:03:31 +02:00
|
|
|
// greater than or equal to start and smaller than end. Set start to the
|
|
|
|
// empty string to iterate from the first document, end to the empty string
|
|
|
|
// to iterate to the last one.
|
|
|
|
// The caller must close returned instance to release associated resources.
|
2014-09-04 01:53:59 +02:00
|
|
|
DocIDReader(start, end string) (DocIDReader, error)
|
2014-04-17 22:55:53 +02:00
|
|
|
|
2015-03-10 21:22:19 +01:00
|
|
|
FieldDict(field string) (FieldDict, error)
|
2015-09-23 20:25:47 +02:00
|
|
|
|
|
|
|
// FieldDictRange is currently defined to include the start and end terms
|
2015-03-10 21:22:19 +01:00
|
|
|
FieldDictRange(field string, startTerm []byte, endTerm []byte) (FieldDict, error)
|
|
|
|
FieldDictPrefix(field string, termPrefix []byte) (FieldDict, error)
|
2014-08-07 19:45:39 +02:00
|
|
|
|
2014-06-26 17:43:13 +02:00
|
|
|
Document(id string) (*document.Document, error)
|
2014-08-11 17:03:29 +02:00
|
|
|
DocumentFieldTerms(id string) (FieldTerms, error)
|
2014-06-26 17:43:13 +02:00
|
|
|
|
2014-07-31 17:47:36 +02:00
|
|
|
Fields() ([]string, error)
|
|
|
|
|
2014-08-14 03:14:47 +02:00
|
|
|
GetInternal(key []byte) ([]byte, error)
|
|
|
|
|
2014-09-12 23:21:35 +02:00
|
|
|
DocCount() uint64
|
|
|
|
|
2015-03-06 20:46:29 +01:00
|
|
|
Close() error
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
2014-08-11 17:03:29 +02:00
|
|
|
type FieldTerms map[string][]string
|
|
|
|
|
2014-04-17 22:55:53 +02:00
|
|
|
type TermFieldVector struct {
|
2015-05-17 07:07:14 +02:00
|
|
|
Field string
|
|
|
|
ArrayPositions []uint64
|
|
|
|
Pos uint64
|
|
|
|
Start uint64
|
|
|
|
End uint64
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
type TermFieldDoc struct {
|
2014-08-07 19:45:39 +02:00
|
|
|
Term string
|
2014-04-17 22:55:53 +02:00
|
|
|
ID string
|
|
|
|
Freq uint64
|
|
|
|
Norm float64
|
|
|
|
Vectors []*TermFieldVector
|
|
|
|
}
|
|
|
|
|
2015-10-27 14:44:28 +01:00
|
|
|
// TermFieldReader is the interface exposing the enumeration of documents
|
|
|
|
// containing a given term in a given field. Documents are returned in byte
|
|
|
|
// lexicographic order over their identifiers.
|
2014-04-17 22:55:53 +02:00
|
|
|
type TermFieldReader interface {
|
2015-10-27 14:44:28 +01:00
|
|
|
// Next returns the next document containing the term in this field, or nil
|
|
|
|
// when it reaches the end of the enumeration.
|
2014-04-17 22:55:53 +02:00
|
|
|
Next() (*TermFieldDoc, error)
|
2015-10-27 14:44:28 +01:00
|
|
|
|
|
|
|
// Advance resets the enumeration at specified document or its immediate
|
|
|
|
// follower.
|
2014-04-17 22:55:53 +02:00
|
|
|
Advance(ID string) (*TermFieldDoc, error)
|
2015-10-27 14:44:28 +01:00
|
|
|
|
|
|
|
// Count returns the number of documents contains the term in this field.
|
2014-04-17 22:55:53 +02:00
|
|
|
Count() uint64
|
2015-03-06 20:46:29 +01:00
|
|
|
Close() error
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
2014-07-11 20:24:28 +02:00
|
|
|
|
2015-03-10 21:22:19 +01:00
|
|
|
type DictEntry struct {
|
|
|
|
Term string
|
|
|
|
Count uint64
|
|
|
|
}
|
|
|
|
|
|
|
|
type FieldDict interface {
|
|
|
|
Next() (*DictEntry, error)
|
2015-03-06 20:46:29 +01:00
|
|
|
Close() error
|
2014-08-07 19:45:39 +02:00
|
|
|
}
|
|
|
|
|
2015-10-18 10:56:20 +02:00
|
|
|
// DocIDReader is the interface exposing enumeration of documents identifiers.
|
|
|
|
// Close the reader to release associated resources.
|
2014-09-04 01:53:59 +02:00
|
|
|
type DocIDReader interface {
|
2015-10-17 18:40:26 +02:00
|
|
|
// Next returns the next document identifier in ascending lexicographic
|
|
|
|
// byte order, or io.EOF when the end of the sequence is reached.
|
2014-07-11 20:24:28 +02:00
|
|
|
Next() (string, error)
|
2015-10-17 18:40:26 +02:00
|
|
|
|
|
|
|
// Advance resets the iteration to the first identifier greater than or
|
2015-10-20 20:27:31 +02:00
|
|
|
// equal to ID. If ID is smaller than the start of the range, the iteration
|
|
|
|
// will start there instead. If ID is greater than or equal to the end of
|
|
|
|
// the range, Next() call will return io.EOF.
|
2014-07-11 20:24:28 +02:00
|
|
|
Advance(ID string) (string, error)
|
2015-03-06 20:46:29 +01:00
|
|
|
Close() error
|
2014-07-11 20:24:28 +02:00
|
|
|
}
|
2014-08-11 22:27:18 +02:00
|
|
|
|
2014-10-31 14:40:23 +01:00
|
|
|
type Batch struct {
|
|
|
|
IndexOps map[string]*document.Document
|
|
|
|
InternalOps map[string][]byte
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewBatch() *Batch {
|
|
|
|
return &Batch{
|
|
|
|
IndexOps: make(map[string]*document.Document),
|
|
|
|
InternalOps: make(map[string][]byte),
|
|
|
|
}
|
|
|
|
}
|
2014-08-11 22:27:18 +02:00
|
|
|
|
2015-04-08 16:41:42 +02:00
|
|
|
func (b *Batch) Update(doc *document.Document) {
|
2014-10-31 14:40:23 +01:00
|
|
|
b.IndexOps[doc.ID] = doc
|
2014-08-11 22:27:18 +02:00
|
|
|
}
|
|
|
|
|
2015-04-08 16:41:42 +02:00
|
|
|
func (b *Batch) Delete(id string) {
|
2014-10-31 14:40:23 +01:00
|
|
|
b.IndexOps[id] = nil
|
|
|
|
}
|
|
|
|
|
2015-04-08 16:41:42 +02:00
|
|
|
func (b *Batch) SetInternal(key, val []byte) {
|
2014-10-31 14:40:23 +01:00
|
|
|
b.InternalOps[string(key)] = val
|
|
|
|
}
|
|
|
|
|
2015-04-08 16:41:42 +02:00
|
|
|
func (b *Batch) DeleteInternal(key []byte) {
|
2014-10-31 14:40:23 +01:00
|
|
|
b.InternalOps[string(key)] = nil
|
2014-08-11 22:27:18 +02:00
|
|
|
}
|
2015-04-08 16:41:42 +02:00
|
|
|
|
|
|
|
func (b *Batch) String() string {
|
|
|
|
rv := fmt.Sprintf("Batch (%d ops, %d internal ops)\n", len(b.IndexOps), len(b.InternalOps))
|
|
|
|
for k, v := range b.IndexOps {
|
|
|
|
if v != nil {
|
|
|
|
rv += fmt.Sprintf("\tINDEX - '%s'\n", k)
|
|
|
|
} else {
|
|
|
|
rv += fmt.Sprintf("\tDELETE - '%s'\n", k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for k, v := range b.InternalOps {
|
|
|
|
if v != nil {
|
|
|
|
rv += fmt.Sprintf("\tSET INTERNAL - '%s'\n", k)
|
|
|
|
} else {
|
|
|
|
rv += fmt.Sprintf("\tDELETE INTERNAL - '%s'\n", k)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return rv
|
|
|
|
}
|
2015-05-15 21:04:52 +02:00
|
|
|
|
|
|
|
func (b *Batch) Reset() {
|
|
|
|
for k, _ := range b.IndexOps {
|
|
|
|
delete(b.IndexOps, k)
|
|
|
|
}
|
|
|
|
for k, _ := range b.InternalOps {
|
|
|
|
delete(b.InternalOps, k)
|
|
|
|
}
|
|
|
|
}
|