0
0
Fork 0

Merge branch 'master' into newkvstore

This commit is contained in:
Marty Schoch 2015-09-29 14:06:27 -04:00
commit 64ce81c283
9 changed files with 237 additions and 57 deletions

View File

@ -16,7 +16,7 @@ Try out bleve live by [searching our wiki](http://wikisearch.blevesearch.com/sea
* Term, Phrase, Match, Match Phrase, Prefix
* Conjunction, Disjunction, Boolean
* Numeric Range, Date Range
* Simple query [syntax](https://github.com/blevesearch/bleve/wiki/Query-String-Query) for human entry
* Simple query [syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
* tf-idf Scoring
* Search result match highlighting
* Supports Aggregating Facets:

View File

@ -13,23 +13,27 @@ import (
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/registry"
"github.com/ikawaha/kagome"
"github.com/ikawaha/kagome/tokenizer"
)
const TokenizerName = "kagome"
type KagomeMorphTokenizer struct {
tok *kagome.Tokenizer
tok tokenizer.Tokenizer
}
func init() {
_ = tokenizer.SysDic() // prepare system dictionary
}
func NewKagomeMorphTokenizer() *KagomeMorphTokenizer {
return &KagomeMorphTokenizer{
tok: kagome.NewTokenizer(),
tok: tokenizer.New(),
}
}
func NewKagomeMorphTokenizerWithUserDic(userdic *kagome.UserDic) *KagomeMorphTokenizer {
k := kagome.NewTokenizer()
func NewKagomeMorphTokenizerWithUserDic(userdic tokenizer.UserDic) *KagomeMorphTokenizer {
k := tokenizer.New()
k.SetUserDic(userdic)
return &KagomeMorphTokenizer{
tok: k,
@ -38,7 +42,7 @@ func NewKagomeMorphTokenizerWithUserDic(userdic *kagome.UserDic) *KagomeMorphTok
func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream {
var (
morphs []kagome.Token
morphs []tokenizer.Token
prevstart int
)
@ -47,7 +51,7 @@ func (t *KagomeMorphTokenizer) Tokenize(input []byte) analysis.TokenStream {
return rv
}
morphs = t.tok.Tokenize(string(input))
morphs = t.tok.Analyze(string(input), tokenizer.Search)
for i, m := range morphs {
if m.Surface == "EOS" || m.Surface == "BOS" {

View File

@ -4,8 +4,3 @@
BUILD_NUM=$(curl -s 'https://api.travis-ci.org/repos/blevesearch/beer-search/builds' | grep -o '^\[{"id":[0-9]*,' | grep -o '[0-9]' | tr -d '\n')
# Restart last child project build
curl -X POST https://api.travis-ci.org/builds/$BUILD_NUM/restart --header "Authorization: token "$AUTH_TOKEN
# Get last child project build number
BUILD_NUM=$(curl -s 'https://api.travis-ci.org/repos/blevesearch/bleve-wiki-indexer/builds' | grep -o '^\[{"id":[0-9]*,' | grep -o '[0-9]' | tr -d '\n')
# Restart last child project build
curl -X POST https://api.travis-ci.org/builds/$BUILD_NUM/restart --header "Authorization: token "$AUTH_TOKEN

View File

@ -25,6 +25,7 @@ const (
ErrorAliasMulti
ErrorAliasEmpty
ErrorUnknownIndexType
ErrorEmptyID
)
// Error represents a more strongly typed bleve error for detecting
@ -32,22 +33,23 @@ const (
type Error int
func (e Error) Error() string {
return errorMessages[int(e)]
return errorMessages[e]
}
var errorMessages = map[int]string{
int(ErrorIndexPathExists): "cannot create new index, path already exists",
int(ErrorIndexPathDoesNotExist): "cannot open index, path does not exist",
int(ErrorIndexMetaMissing): "cannot open index, metadata missing",
int(ErrorIndexMetaCorrupt): "cannot open index, metadata corrupt",
int(ErrorDisjunctionFewerThanMinClauses): "disjunction query has fewer than the minimum number of clauses to satisfy",
int(ErrorBooleanQueryNeedsMustOrShouldOrNotMust): "boolean query must contain at least one must or should or not must clause",
int(ErrorNumericQueryNoBounds): "numeric range query must specify min or max",
int(ErrorPhraseQueryNoTerms): "phrase query must contain at least one term",
int(ErrorUnknownQueryType): "unknown query type",
int(ErrorUnknownStorageType): "unknown storage type",
int(ErrorIndexClosed): "index is closed",
int(ErrorAliasMulti): "cannot perform single index operation on multiple index alias",
int(ErrorAliasEmpty): "cannot perform operation on empty alias",
int(ErrorUnknownIndexType): "unknown index type",
var errorMessages = map[Error]string{
ErrorIndexPathExists: "cannot create new index, path already exists",
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
ErrorIndexMetaMissing: "cannot open index, metadata missing",
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
ErrorDisjunctionFewerThanMinClauses: "disjunction query has fewer than the minimum number of clauses to satisfy",
ErrorBooleanQueryNeedsMustOrShouldOrNotMust: "boolean query must contain at least one must or should or not must clause",
ErrorNumericQueryNoBounds: "numeric range query must specify min or max",
ErrorPhraseQueryNoTerms: "phrase query must contain at least one term",
ErrorUnknownQueryType: "unknown query type",
ErrorUnknownStorageType: "unknown storage type",
ErrorIndexClosed: "index is closed",
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
ErrorAliasEmpty: "cannot perform operation on empty alias",
ErrorUnknownIndexType: "unknown index type",
ErrorEmptyID: "document ID cannot be empty",
}

View File

@ -30,6 +30,9 @@ type Batch struct {
// batch. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
doc := document.NewDocument(id)
err := b.index.Mapping().mapDocument(doc, data)
if err != nil {
@ -43,7 +46,9 @@ func (b *Batch) Index(id string, data interface{}) error {
// batch. NOTE: the bleve Index is not updated until
// the batch is executed.
func (b *Batch) Delete(id string) {
b.internal.Delete(id)
if id != "" {
b.internal.Delete(id)
}
}
// SetInternal adds the specified set internal
@ -81,7 +86,71 @@ func (b *Batch) Reset() {
// An Index implements all the indexing and searching
// capabilities of bleve. An Index can be created
// using the New() and Open() methods.
//
// Index() takes an input value, deduces a DocumentMapping for its type,
// assigns string paths to its fields or values then applies field mappings on
// them.
//
// If the value is a []byte, the indexer attempts to convert it to something
// else using the ByteArrayConverter registered as
// IndexMapping.ByteArrayConverter. By default, it interprets the value as a
// JSON payload and unmarshals it to map[string]interface{}.
//
// The DocumentMapping used to index a value is deduced by the following rules:
// 1) If value implements Classifier interface, resolve the mapping from Type().
// 2) If value has a string field or value at IndexMapping.TypeField.
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
// is described below.
// 3) If IndexMapping.DefaultType is registered, return it.
// 4) Return IndexMapping.DefaultMapping.
//
// Each field or nested field of the value is identified by a string path, then
// mapped to one or several FieldMappings which extract the result for analysis.
//
// Struct values fields are identified by their "json:" tag, or by their name.
// Nested fields are identified by prefixing with their parent identifier,
// separated by a dot.
//
// Map values entries are identified by their string key. Entries not indexed
// by strings are ignored. Entry values are identified recursively like struct
// fields.
//
// Slice and array values are identified by their field name. Their elements
// are processed sequentially with the same FieldMapping.
//
// String, float64 and time.Time values are identified by their field name.
// Other types are ignored.
//
// Each value identifier is decomposed in its parts and recursively address
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a
// mapping is found, all its FieldMappings are applied to the value. If no
// mapping is found and the root DocumentMapping is dynamic, default mappings
// are used based on value type and IndexMapping default configurations.
//
// Finally, mapped values are analyzed, indexed or stored. Examples:
//
// type Date struct {
// Day string `json:"day"`
// Month string
// Year string
// }
//
// type Person struct {
// FirstName string `json:"first_name"`
// LastName string
// BirthDate Date `json:"birth_date"`
// }
//
// A Person value FirstName is mapped by the SubDocumentMapping at
// "first_name". Its LastName is mapped by the one at "LastName". The day of
// BirthDate is mapped to the SubDocumentMapping "day" of the root
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
// field in the index. The month is mapped to "birth_date.Month".
type Index interface {
// Index analyzes, indexes or stores mapped data fields. Supplied
// identifier is bound to analyzed data and will be retrieved by search
// requests. See Index interface documentation for details about mapping
// rules.
Index(id string, data interface{}) error
Delete(id string) error

View File

@ -21,6 +21,7 @@ import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/store"
"github.com/blevesearch/bleve/index/store/gtreap"
"github.com/blevesearch/bleve/index/upside_down"
"github.com/blevesearch/bleve/registry"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/collectors"
@ -165,6 +166,11 @@ func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *inde
return nil, err
}
// backwards compatability if index type is missing
if rv.meta.IndexType == "" {
rv.meta.IndexType = upside_down.Name
}
storeConfig := rv.meta.Config
if storeConfig == nil {
storeConfig = map[string]interface{}{}
@ -250,7 +256,11 @@ func (i *indexImpl) Mapping() *IndexMapping {
// Index the object with the specified identifier.
// The IndexMapping for this index will determine
// how the object is indexed.
func (i *indexImpl) Index(id string, data interface{}) error {
func (i *indexImpl) Index(id string, data interface{}) (err error) {
if id == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
@ -259,20 +269,21 @@ func (i *indexImpl) Index(id string, data interface{}) error {
}
doc := document.NewDocument(id)
err := i.m.mapDocument(doc, data)
err = i.m.mapDocument(doc, data)
if err != nil {
return err
return
}
err = i.i.Update(doc)
if err != nil {
return err
}
return nil
return
}
// Delete entries for the specified identifier from
// the index.
func (i *indexImpl) Delete(id string) error {
func (i *indexImpl) Delete(id string) (err error) {
if id == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
@ -280,11 +291,8 @@ func (i *indexImpl) Delete(id string) error {
return ErrorIndexClosed
}
err := i.i.Delete(id)
if err != nil {
return err
}
return nil
err = i.i.Delete(id)
return
}
// Batch executes multiple Index and Delete

View File

@ -15,6 +15,7 @@ import (
"log"
"os"
"reflect"
"sort"
"strings"
"sync"
"testing"
@ -1038,3 +1039,103 @@ func TestTermVectorArrayPositions(t *testing.T) {
t.Fatal(err)
}
}
func TestDocumentStaticMapping(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
m := NewIndexMapping()
m.DefaultMapping = NewDocumentStaticMapping()
m.DefaultMapping.AddFieldMappingsAt("Text", NewTextFieldMapping())
m.DefaultMapping.AddFieldMappingsAt("Date", NewDateTimeFieldMapping())
m.DefaultMapping.AddFieldMappingsAt("Numeric", NewNumericFieldMapping())
index, err := New("testidx", m)
if err != nil {
t.Fatal(err)
}
doc1 := struct {
Text string
IgnoredText string
Numeric float64
IgnoredNumeric float64
Date time.Time
IgnoredDate time.Time
}{
Text: "valid text",
IgnoredText: "ignored text",
Numeric: 10,
IgnoredNumeric: 20,
Date: time.Unix(1, 0),
IgnoredDate: time.Unix(2, 0),
}
err = index.Index("a", doc1)
if err != nil {
t.Fatal(err)
}
fields, err := index.Fields()
if err != nil {
t.Fatal(err)
}
sort.Strings(fields)
expectedFields := []string{"Date", "Numeric", "Text", "_all"}
if len(fields) != len(expectedFields) {
t.Fatalf("invalid field count: %d", len(fields))
}
for i, expected := range expectedFields {
if expected != fields[i] {
t.Fatalf("unexpected field[%d]: %s", i, fields[i])
}
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexEmptyDocId(t *testing.T) {
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
index, err := New("testidx", NewIndexMapping())
if err != nil {
t.Fatal(err)
}
doc := map[string]interface{}{
"body": "nodocid",
}
err = index.Index("", doc)
if err != ErrorEmptyID {
t.Errorf("expect index empty doc id to fail")
}
err = index.Delete("")
if err != ErrorEmptyID {
t.Errorf("expect delete empty doc id to fail")
}
batch := index.NewBatch()
err = batch.Index("", doc)
if err != ErrorEmptyID {
t.Errorf("expect index empty doc id in batch to fail")
}
batch.Delete("")
if batch.Size() > 0 {
t.Errorf("expect delete empty doc id in batch to be ignored")
}
}

View File

@ -322,7 +322,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
} else {
} else if dm.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
@ -347,7 +347,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else {
} else if dm.Dynamic {
// automatic indexing behavior
fieldMapping := NewNumericFieldMapping()
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
@ -361,7 +361,7 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, pathString, path, indexes, context)
}
} else {
} else if dm.Dynamic {
fieldMapping := NewDateTimeFieldMapping()
fieldMapping.processTime(property, pathString, path, indexes, context)
}

View File

@ -50,19 +50,20 @@ func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
dm.Fields = make(map[string]interface{})
}
existingVal, ok := dm.Fields[name]
if ok {
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
} else {
if !ok {
dm.Fields[name] = value
return
}
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
}
type DocumentMatchCollection []*DocumentMatch