Merge branch 'master' into newkvstore
This commit is contained in:
commit
64ce81c283
|
@ -16,7 +16,7 @@ Try out bleve live by [searching our wiki](http://wikisearch.blevesearch.com/sea
|
|||
* Term, Phrase, Match, Match Phrase, Prefix
|
||||
* Conjunction, Disjunction, Boolean
|
||||
* Numeric Range, Date Range
|
||||
* Simple query [syntax](https://github.com/blevesearch/bleve/wiki/Query-String-Query) for human entry
|
||||
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
|
||||
* tf-idf Scoring
|
||||
* Search result match highlighting
|
||||
* Supports Aggregating Facets:
|
||||
|
|
|
@ -13,23 +13,27 @@ import (
|
|||
"github.com/blevesearch/bleve/analysis"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
|
||||
"github.com/ikawaha/kagome"
|
||||
"github.com/ikawaha/kagome/tokenizer"
|
||||
)
|
||||
|
||||
const TokenizerName = "kagome"
|
||||
|
||||
type KagomeMorphTokenizer struct {
|
||||
tok *kagome.Tokenizer
|
||||
tok tokenizer.Tokenizer
|
||||
}
|
||||
|
||||
func init() {
|
||||
_ = tokenizer.SysDic() // prepare system dictionary
|
||||
}
|
||||
|
||||
func NewKagomeMorphTokenizer() *KagomeMorphTokenizer {
|
||||
return &KagomeMorphTokenizer{
|
||||
tok: kagome.NewTokenizer(),
|
||||
tok: tokenizer.New(),
|
||||
}
|
||||
}
|
||||
|
||||
func NewKagomeMorphTokenizerWithUserDic(userdic *kagome.UserDic) *KagomeMorphTokenizer {
|
||||
k := kagome.NewTokenizer()
|
||||
func NewKagomeMorphTokenizerWithUserDic(userdic tokenizer.UserDic) *KagomeMorphTokenizer {
|
||||
k := tokenizer.New()
|
||||
k.SetUserDic(userdic)
|
||||
return &KagomeMorphTokenizer{
|
||||
tok: k,
|
||||
|
@ -38,7 +42,7 @@ func NewKagomeMorphTokenizerWithUserDic(userdic *kagome.UserDic) *KagomeMorphTok
|
|||
|
||||
func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
||||
var (
|
||||
morphs []kagome.Token
|
||||
morphs []tokenizer.Token
|
||||
prevstart int
|
||||
)
|
||||
|
||||
|
@ -47,7 +51,7 @@ func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream {
|
|||
return rv
|
||||
}
|
||||
|
||||
morphs = t.tok.Tokenize(string(input))
|
||||
morphs = t.tok.Analyze(string(input), tokenizer.Search)
|
||||
|
||||
for i, m := range morphs {
|
||||
if m.Surface == "EOS" || m.Surface == "BOS" {
|
||||
|
|
|
@ -4,8 +4,3 @@
|
|||
BUILD_NUM=$(curl -s 'https://api.travis-ci.org/repos/blevesearch/beer-search/builds' | grep -o '^\[{"id":[0-9]*,' | grep -o '[0-9]' | tr -d '\n')
|
||||
# Restart last child project build
|
||||
curl -X POST https://api.travis-ci.org/builds/$BUILD_NUM/restart --header "Authorization: token "$AUTH_TOKEN
|
||||
|
||||
# Get last child project build number
|
||||
BUILD_NUM=$(curl -s 'https://api.travis-ci.org/repos/blevesearch/bleve-wiki-indexer/builds' | grep -o '^\[{"id":[0-9]*,' | grep -o '[0-9]' | tr -d '\n')
|
||||
# Restart last child project build
|
||||
curl -X POST https://api.travis-ci.org/builds/$BUILD_NUM/restart --header "Authorization: token "$AUTH_TOKEN
|
34
error.go
34
error.go
|
@ -25,6 +25,7 @@ const (
|
|||
ErrorAliasMulti
|
||||
ErrorAliasEmpty
|
||||
ErrorUnknownIndexType
|
||||
ErrorEmptyID
|
||||
)
|
||||
|
||||
// Error represents a more strongly typed bleve error for detecting
|
||||
|
@ -32,22 +33,23 @@ const (
|
|||
type Error int
|
||||
|
||||
func (e Error) Error() string {
|
||||
return errorMessages[int(e)]
|
||||
return errorMessages[e]
|
||||
}
|
||||
|
||||
var errorMessages = map[int]string{
|
||||
int(ErrorIndexPathExists): "cannot create new index, path already exists",
|
||||
int(ErrorIndexPathDoesNotExist): "cannot open index, path does not exist",
|
||||
int(ErrorIndexMetaMissing): "cannot open index, metadata missing",
|
||||
int(ErrorIndexMetaCorrupt): "cannot open index, metadata corrupt",
|
||||
int(ErrorDisjunctionFewerThanMinClauses): "disjunction query has fewer than the minimum number of clauses to satisfy",
|
||||
int(ErrorBooleanQueryNeedsMustOrShouldOrNotMust): "boolean query must contain at least one must or should or not must clause",
|
||||
int(ErrorNumericQueryNoBounds): "numeric range query must specify min or max",
|
||||
int(ErrorPhraseQueryNoTerms): "phrase query must contain at least one term",
|
||||
int(ErrorUnknownQueryType): "unknown query type",
|
||||
int(ErrorUnknownStorageType): "unknown storage type",
|
||||
int(ErrorIndexClosed): "index is closed",
|
||||
int(ErrorAliasMulti): "cannot perform single index operation on multiple index alias",
|
||||
int(ErrorAliasEmpty): "cannot perform operation on empty alias",
|
||||
int(ErrorUnknownIndexType): "unknown index type",
|
||||
var errorMessages = map[Error]string{
|
||||
ErrorIndexPathExists: "cannot create new index, path already exists",
|
||||
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
|
||||
ErrorIndexMetaMissing: "cannot open index, metadata missing",
|
||||
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
|
||||
ErrorDisjunctionFewerThanMinClauses: "disjunction query has fewer than the minimum number of clauses to satisfy",
|
||||
ErrorBooleanQueryNeedsMustOrShouldOrNotMust: "boolean query must contain at least one must or should or not must clause",
|
||||
ErrorNumericQueryNoBounds: "numeric range query must specify min or max",
|
||||
ErrorPhraseQueryNoTerms: "phrase query must contain at least one term",
|
||||
ErrorUnknownQueryType: "unknown query type",
|
||||
ErrorUnknownStorageType: "unknown storage type",
|
||||
ErrorIndexClosed: "index is closed",
|
||||
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
|
||||
ErrorAliasEmpty: "cannot perform operation on empty alias",
|
||||
ErrorUnknownIndexType: "unknown index type",
|
||||
ErrorEmptyID: "document ID cannot be empty",
|
||||
}
|
||||
|
|
71
index.go
71
index.go
|
@ -30,6 +30,9 @@ type Batch struct {
|
|||
// batch. NOTE: the bleve Index is not updated
|
||||
// until the batch is executed.
|
||||
func (b *Batch) Index(id string, data interface{}) error {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
doc := document.NewDocument(id)
|
||||
err := b.index.Mapping().mapDocument(doc, data)
|
||||
if err != nil {
|
||||
|
@ -43,7 +46,9 @@ func (b *Batch) Index(id string, data interface{}) error {
|
|||
// batch. NOTE: the bleve Index is not updated until
|
||||
// the batch is executed.
|
||||
func (b *Batch) Delete(id string) {
|
||||
b.internal.Delete(id)
|
||||
if id != "" {
|
||||
b.internal.Delete(id)
|
||||
}
|
||||
}
|
||||
|
||||
// SetInternal adds the specified set internal
|
||||
|
@ -81,7 +86,71 @@ func (b *Batch) Reset() {
|
|||
// An Index implements all the indexing and searching
|
||||
// capabilities of bleve. An Index can be created
|
||||
// using the New() and Open() methods.
|
||||
//
|
||||
// Index() takes an input value, deduces a DocumentMapping for its type,
|
||||
// assigns string paths to its fields or values then applies field mappings on
|
||||
// them.
|
||||
//
|
||||
// If the value is a []byte, the indexer attempts to convert it to something
|
||||
// else using the ByteArrayConverter registered as
|
||||
// IndexMapping.ByteArrayConverter. By default, it interprets the value as a
|
||||
// JSON payload and unmarshals it to map[string]interface{}.
|
||||
//
|
||||
// The DocumentMapping used to index a value is deduced by the following rules:
|
||||
// 1) If value implements Classifier interface, resolve the mapping from Type().
|
||||
// 2) If value has a string field or value at IndexMapping.TypeField.
|
||||
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
|
||||
// is described below.
|
||||
// 3) If IndexMapping.DefaultType is registered, return it.
|
||||
// 4) Return IndexMapping.DefaultMapping.
|
||||
//
|
||||
// Each field or nested field of the value is identified by a string path, then
|
||||
// mapped to one or several FieldMappings which extract the result for analysis.
|
||||
//
|
||||
// Struct values fields are identified by their "json:" tag, or by their name.
|
||||
// Nested fields are identified by prefixing with their parent identifier,
|
||||
// separated by a dot.
|
||||
//
|
||||
// Map values entries are identified by their string key. Entries not indexed
|
||||
// by strings are ignored. Entry values are identified recursively like struct
|
||||
// fields.
|
||||
//
|
||||
// Slice and array values are identified by their field name. Their elements
|
||||
// are processed sequentially with the same FieldMapping.
|
||||
//
|
||||
// String, float64 and time.Time values are identified by their field name.
|
||||
// Other types are ignored.
|
||||
//
|
||||
// Each value identifier is decomposed in its parts and recursively address
|
||||
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a
|
||||
// mapping is found, all its FieldMappings are applied to the value. If no
|
||||
// mapping is found and the root DocumentMapping is dynamic, default mappings
|
||||
// are used based on value type and IndexMapping default configurations.
|
||||
//
|
||||
// Finally, mapped values are analyzed, indexed or stored. Examples:
|
||||
//
|
||||
// type Date struct {
|
||||
// Day string `json:"day"`
|
||||
// Month string
|
||||
// Year string
|
||||
// }
|
||||
//
|
||||
// type Person struct {
|
||||
// FirstName string `json:"first_name"`
|
||||
// LastName string
|
||||
// BirthDate Date `json:"birth_date"`
|
||||
// }
|
||||
//
|
||||
// A Person value FirstName is mapped by the SubDocumentMapping at
|
||||
// "first_name". Its LastName is mapped by the one at "LastName". The day of
|
||||
// BirthDate is mapped to the SubDocumentMapping "day" of the root
|
||||
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
|
||||
// field in the index. The month is mapped to "birth_date.Month".
|
||||
type Index interface {
|
||||
// Index analyzes, indexes or stores mapped data fields. Supplied
|
||||
// identifier is bound to analyzed data and will be retrieved by search
|
||||
// requests. See Index interface documentation for details about mapping
|
||||
// rules.
|
||||
Index(id string, data interface{}) error
|
||||
Delete(id string) error
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ import (
|
|||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/store"
|
||||
"github.com/blevesearch/bleve/index/store/gtreap"
|
||||
"github.com/blevesearch/bleve/index/upside_down"
|
||||
"github.com/blevesearch/bleve/registry"
|
||||
"github.com/blevesearch/bleve/search"
|
||||
"github.com/blevesearch/bleve/search/collectors"
|
||||
|
@ -165,6 +166,11 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
|
|||
return nil, err
|
||||
}
|
||||
|
||||
// backwards compatability if index type is missing
|
||||
if rv.meta.IndexType == "" {
|
||||
rv.meta.IndexType = upside_down.Name
|
||||
}
|
||||
|
||||
storeConfig := rv.meta.Config
|
||||
if storeConfig == nil {
|
||||
storeConfig = map[string]interface{}{}
|
||||
|
@ -250,7 +256,11 @@ func (i *indexImpl) Mapping() *IndexMapping {
|
|||
// Index the object with the specified identifier.
|
||||
// The IndexMapping for this index will determine
|
||||
// how the object is indexed.
|
||||
func (i *indexImpl) Index(id string, data interface{}) error {
|
||||
func (i *indexImpl) Index(id string, data interface{}) (err error) {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
|
@ -259,20 +269,21 @@ func (i *indexImpl) Index(id string, data interface{}) error {
|
|||
}
|
||||
|
||||
doc := document.NewDocument(id)
|
||||
err := i.m.mapDocument(doc, data)
|
||||
err = i.m.mapDocument(doc, data)
|
||||
if err != nil {
|
||||
return err
|
||||
return
|
||||
}
|
||||
err = i.i.Update(doc)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
return
|
||||
}
|
||||
|
||||
// Delete entries for the specified identifier from
|
||||
// the index.
|
||||
func (i *indexImpl) Delete(id string) error {
|
||||
func (i *indexImpl) Delete(id string) (err error) {
|
||||
if id == "" {
|
||||
return ErrorEmptyID
|
||||
}
|
||||
|
||||
i.mutex.RLock()
|
||||
defer i.mutex.RUnlock()
|
||||
|
||||
|
@ -280,11 +291,8 @@ func (i *indexImpl) Delete(id string) error {
|
|||
return ErrorIndexClosed
|
||||
}
|
||||
|
||||
err := i.i.Delete(id)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
err = i.i.Delete(id)
|
||||
return
|
||||
}
|
||||
|
||||
// Batch executes multiple Index and Delete
|
||||
|
|
101
index_test.go
101
index_test.go
|
@ -15,6 +15,7 @@ import (
|
|||
"log"
|
||||
"os"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
@ -1038,3 +1039,103 @@ func TestTermVectorArrayPositions(t *testing.T) {
|
|||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDocumentStaticMapping(t *testing.T) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
m := NewIndexMapping()
|
||||
m.DefaultMapping = NewDocumentStaticMapping()
|
||||
m.DefaultMapping.AddFieldMappingsAt("Text", NewTextFieldMapping())
|
||||
m.DefaultMapping.AddFieldMappingsAt("Date", NewDateTimeFieldMapping())
|
||||
m.DefaultMapping.AddFieldMappingsAt("Numeric", NewNumericFieldMapping())
|
||||
|
||||
index, err := New("testidx", m)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
doc1 := struct {
|
||||
Text string
|
||||
IgnoredText string
|
||||
Numeric float64
|
||||
IgnoredNumeric float64
|
||||
Date time.Time
|
||||
IgnoredDate time.Time
|
||||
}{
|
||||
Text: "valid text",
|
||||
IgnoredText: "ignored text",
|
||||
Numeric: 10,
|
||||
IgnoredNumeric: 20,
|
||||
Date: time.Unix(1, 0),
|
||||
IgnoredDate: time.Unix(2, 0),
|
||||
}
|
||||
|
||||
err = index.Index("a", doc1)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fields, err := index.Fields()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
sort.Strings(fields)
|
||||
expectedFields := []string{"Date", "Numeric", "Text", "_all"}
|
||||
if len(fields) != len(expectedFields) {
|
||||
t.Fatalf("invalid field count: %d", len(fields))
|
||||
}
|
||||
for i, expected := range expectedFields {
|
||||
if expected != fields[i] {
|
||||
t.Fatalf("unexpected field[%d]: %s", i, fields[i])
|
||||
}
|
||||
}
|
||||
|
||||
err = index.Close()
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIndexEmptyDocId(t *testing.T) {
|
||||
defer func() {
|
||||
err := os.RemoveAll("testidx")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}()
|
||||
|
||||
index, err := New("testidx", NewIndexMapping())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
doc := map[string]interface{}{
|
||||
"body": "nodocid",
|
||||
}
|
||||
|
||||
err = index.Index("", doc)
|
||||
if err != ErrorEmptyID {
|
||||
t.Errorf("expect index empty doc id to fail")
|
||||
}
|
||||
|
||||
err = index.Delete("")
|
||||
if err != ErrorEmptyID {
|
||||
t.Errorf("expect delete empty doc id to fail")
|
||||
}
|
||||
|
||||
batch := index.NewBatch()
|
||||
err = batch.Index("", doc)
|
||||
if err != ErrorEmptyID {
|
||||
t.Errorf("expect index empty doc id in batch to fail")
|
||||
}
|
||||
|
||||
batch.Delete("")
|
||||
if batch.Size() > 0 {
|
||||
t.Errorf("expect delete empty doc id in batch to be ignored")
|
||||
}
|
||||
}
|
||||
|
|
|
@ -322,7 +322,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
|||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
|
||||
}
|
||||
} else {
|
||||
} else if dm.Dynamic {
|
||||
// automatic indexing behavior
|
||||
|
||||
// first see if it can be parsed by the default date parser
|
||||
|
@ -347,7 +347,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
|||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
|
||||
}
|
||||
} else {
|
||||
} else if dm.Dynamic {
|
||||
// automatic indexing behavior
|
||||
fieldMapping := NewNumericFieldMapping()
|
||||
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
|
||||
|
@ -361,7 +361,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
|
|||
for _, fieldMapping := range subDocMapping.Fields {
|
||||
fieldMapping.processTime(property, pathString, path, indexes, context)
|
||||
}
|
||||
} else {
|
||||
} else if dm.Dynamic {
|
||||
fieldMapping := NewDateTimeFieldMapping()
|
||||
fieldMapping.processTime(property, pathString, path, indexes, context)
|
||||
}
|
||||
|
|
|
@ -50,19 +50,20 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
|
|||
dm.Fields = make(map[string]interface{})
|
||||
}
|
||||
existingVal, ok := dm.Fields[name]
|
||||
if ok {
|
||||
valSlice, ok := existingVal.([]interface{})
|
||||
if ok {
|
||||
// already a slice, append to it
|
||||
valSlice = append(valSlice, value)
|
||||
} else {
|
||||
// create a slice
|
||||
valSlice = []interface{}{existingVal, value}
|
||||
}
|
||||
dm.Fields[name] = valSlice
|
||||
} else {
|
||||
if !ok {
|
||||
dm.Fields[name] = value
|
||||
return
|
||||
}
|
||||
|
||||
valSlice, ok := existingVal.([]interface{})
|
||||
if ok {
|
||||
// already a slice, append to it
|
||||
valSlice = append(valSlice, value)
|
||||
} else {
|
||||
// create a slice
|
||||
valSlice = []interface{}{existingVal, value}
|
||||
}
|
||||
dm.Fields[name] = valSlice
|
||||
}
|
||||
|
||||
type DocumentMatchCollection []*DocumentMatch
|
||||
|
|
Loading…
Reference in New Issue