diff --git a/byte_array_converter.go b/byte_array_converter.go new file mode 100644 index 00000000..02642073 --- /dev/null +++ b/byte_array_converter.go @@ -0,0 +1,52 @@ +// Copyright (c) 2014 Couchbase, Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file +// except in compliance with the License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software distributed under the +// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, +// either express or implied. See the License for the specific language governing permissions +// and limitations under the License. +package bleve + +import ( + "encoding/json" +) + +type ByteArrayConverter interface { + Convert([]byte) (interface{}, error) +} + +type StringByteArrayConverter struct{} + +func NewStringByteArrayConverter() *StringByteArrayConverter { + return &StringByteArrayConverter{} +} + +func (c *StringByteArrayConverter) Convert(in []byte) (interface{}, error) { + return string(in), nil +} + +type JSONByteArrayConverter struct{} + +func NewJSONByteArrayConverter() *JSONByteArrayConverter { + return &JSONByteArrayConverter{} +} + +func (c *JSONByteArrayConverter) Convert(in []byte) (interface{}, error) { + var rv map[string]interface{} + err := json.Unmarshal(in, &rv) + if err != nil { + return nil, err + } + return rv, nil +} + +type IgnoreByteArrayConverter struct{} + +func NewIgnoreByteArrayConverter() *IgnoreByteArrayConverter { + return &IgnoreByteArrayConverter{} +} + +func (c *IgnoreByteArrayConverter) Convert(in []byte) (interface{}, error) { + return nil, nil +} diff --git a/config.go b/config.go index 11bb07ec..4a79f10b 100644 --- a/config.go +++ b/config.go @@ -59,6 +59,7 @@ type Configuration struct { CreateIfMissing bool DefaultDateTimeFormat *string DefaultField *string + ByteArrayConverters map[string]ByteArrayConverter } func (c *Configuration) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) { @@ -120,6 +121,7 @@ func NewConfiguration() *Configuration { Highlight: &HighlightConfig{ Highlighters: make(map[string]search.Highlighter), }, + ByteArrayConverters: make(map[string]ByteArrayConverter), } } @@ -130,6 +132,11 @@ func init() { // build the default configuration Config = NewConfiguration() + // register byte array converters + Config.ByteArrayConverters["string"] = NewStringByteArrayConverter() + Config.ByteArrayConverters["json"] = NewJSONByteArrayConverter() + Config.ByteArrayConverters["ignore"] = NewIgnoreByteArrayConverter() + // register stop token maps Config.Analysis.TokenMaps["da_stop"] = Config.MustLoadStopWords(stop_words_filter.DanishStopWords) Config.Analysis.TokenMaps["nl_stop"] = Config.MustLoadStopWords(stop_words_filter.DutchStopWords) diff --git a/examples/beer-search/main.go b/examples/beer-search/main.go index ad62813c..41832adb 100644 --- a/examples/beer-search/main.go +++ b/examples/beer-search/main.go @@ -83,7 +83,7 @@ func indexBeer(i bleve.Index) error { // // shred them into a document ext := filepath.Ext(filename) docId := filename[:(len(filename) - len(ext))] - err = i.IndexJSONID(docId, jsonBytes) + err = i.Index(docId, jsonBytes) if err != nil { return err } diff --git a/examples/bleve_index_json/main.go b/examples/bleve_index_json/main.go index 206734e1..de223b10 100644 --- a/examples/bleve_index_json/main.go +++ b/examples/bleve_index_json/main.go @@ -35,7 +35,7 @@ func main() { for jsonFile := range walkDirectory(*jsonDir) { // index the json files - err = index.IndexJSONID(jsonFile.filename, jsonFile.contents) + err = index.Index(jsonFile.filename, jsonFile.contents) if err != nil { log.Fatal(err) } diff --git a/index.go b/index.go index fae5761a..b5ff3fb7 100644 --- a/index.go +++ b/index.go @@ -12,23 +12,14 @@ import ( "github.com/couchbaselabs/bleve/document" ) -type Identifier interface { - ID() string -} - type Classifier interface { Type() string } type Index interface { - Index(data interface{}) error - IndexID(id string, data interface{}) error + Index(id string, data interface{}) error - IndexJSON(data []byte) error - IndexJSONID(id string, data []byte) error - - Delete(data interface{}) error - DeleteID(id string) error + Delete(id string) error Document(id string) (*document.Document, error) DocCount() uint64 diff --git a/index_impl.go b/index_impl.go index f052a67a..a1b58d5c 100644 --- a/index_impl.go +++ b/index_impl.go @@ -9,7 +9,6 @@ package bleve import ( - "encoding/json" "fmt" "time" @@ -44,17 +43,7 @@ func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) { }, nil } -// Index the provided data. -func (i *indexImpl) Index(data interface{}) error { - id, ok := i.determineID(data) - if ok { - return i.IndexID(id, data) - } - - return ERROR_NO_ID -} - -func (i *indexImpl) IndexID(id string, data interface{}) error { +func (i *indexImpl) Index(id string, data interface{}) error { doc := document.NewDocument(id) err := i.m.MapDocument(doc, data) if err != nil { @@ -67,34 +56,7 @@ func (i *indexImpl) IndexID(id string, data interface{}) error { return nil } -func (i *indexImpl) IndexJSON(data []byte) error { - var obj interface{} - err := json.Unmarshal(data, &obj) - if err != nil { - return err - } - return i.Index(obj) -} - -func (i *indexImpl) IndexJSONID(id string, data []byte) error { - var obj interface{} - err := json.Unmarshal(data, &obj) - if err != nil { - return err - } - return i.IndexID(id, obj) -} - -func (i *indexImpl) Delete(data interface{}) error { - id, ok := i.determineID(data) - if ok { - return i.DeleteID(id) - } - - return ERROR_NO_ID -} - -func (i *indexImpl) DeleteID(id string) error { +func (i *indexImpl) Delete(id string) error { err := i.i.Delete(id) if err != nil { return err @@ -242,21 +204,3 @@ func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) { func (i *indexImpl) Close() { i.i.Close() } - -func (i *indexImpl) determineID(data interface{}) (string, bool) { - // first see if the object implements Identifier - identifier, ok := data.(Identifier) - if ok { - return identifier.ID(), true - } - - // now see if we can find an ID using the mapping - if i.m.IdField != nil { - id, ok := mustString(lookupPropertyPath(data, *i.m.IdField)) - if ok { - return id, true - } - } - - return "", false -} diff --git a/index_test.go b/index_test.go index 7c2be20e..8dacf8aa 100644 --- a/index_test.go +++ b/index_test.go @@ -28,10 +28,6 @@ type Person struct { Tags []string `json:"tags"` } -func (p *Person) ID() string { - return p.Identifier -} - func (p *Person) Type() string { return "person" } @@ -83,7 +79,7 @@ func TestIndex(t *testing.T) { Tags: []string{"amped", "bogus", "gnarley", "tubed"}, } - err = index.Index(&obj) + err = index.Index(obj.Identifier, &obj) if err != nil { t.Error(err) } diff --git a/mapping_index.go b/mapping_index.go index 3831b29f..a47723c1 100644 --- a/mapping_index.go +++ b/mapping_index.go @@ -26,15 +26,17 @@ var DEFAULT_ID_FIELD = "_id" var DEFAULT_TYPE_FIELD = "_type" var DEFAULT_TYPE = "_default" var DEFAULT_FIELD = "_all" +var DEFAULT_TOP_LEVEL_BYTE_ARRAY_CONVERTER = "json" type IndexMapping struct { - TypeMapping map[string]*DocumentMapping `json:"types"` - DefaultMapping *DocumentMapping `json:"default_mapping"` - IdField *string `json:"id_field"` - TypeField *string `json:"type_field"` - DefaultType *string `json:"default_type"` - DefaultAnalyzer *string `json:"default_analyzer"` - DefaultField *string `json:"default_field"` + TypeMapping map[string]*DocumentMapping `json:"types"` + DefaultMapping *DocumentMapping `json:"default_mapping"` + IdField *string `json:"id_field"` + TypeField *string `json:"type_field"` + DefaultType *string `json:"default_type"` + DefaultAnalyzer *string `json:"default_analyzer"` + DefaultField *string `json:"default_field"` + ByteArrayConverter *string `json:"byte_array_converter"` } func (im *IndexMapping) GoString() string { @@ -43,12 +45,13 @@ func (im *IndexMapping) GoString() string { func NewIndexMapping() *IndexMapping { return &IndexMapping{ - TypeMapping: make(map[string]*DocumentMapping), - DefaultMapping: NewDocumentMapping(), - IdField: &DEFAULT_ID_FIELD, - TypeField: &DEFAULT_TYPE_FIELD, - DefaultType: &DEFAULT_TYPE, - DefaultField: &DEFAULT_FIELD, + TypeMapping: make(map[string]*DocumentMapping), + DefaultMapping: NewDocumentMapping(), + IdField: &DEFAULT_ID_FIELD, + TypeField: &DEFAULT_TYPE_FIELD, + DefaultType: &DEFAULT_TYPE, + DefaultField: &DEFAULT_FIELD, + ByteArrayConverter: &DEFAULT_TOP_LEVEL_BYTE_ARRAY_CONVERTER, } } @@ -82,13 +85,14 @@ func (im *IndexMapping) MappingForType(docType string) *DocumentMapping { func (im *IndexMapping) UnmarshalJSON(data []byte) error { var tmp struct { - TypeMapping map[string]*DocumentMapping `json:"types"` - DefaultMapping *DocumentMapping `json:"default_mapping"` - IdField *string `json:"id_field"` - TypeField *string `json:"type_field"` - DefaultType *string `json:"default_type"` - DefaultAnalyzer *string `json:"default_analyzer"` - DefaultField *string `json:"default_field"` + TypeMapping map[string]*DocumentMapping `json:"types"` + DefaultMapping *DocumentMapping `json:"default_mapping"` + IdField *string `json:"id_field"` + TypeField *string `json:"type_field"` + DefaultType *string `json:"default_type"` + DefaultAnalyzer *string `json:"default_analyzer"` + DefaultField *string `json:"default_field"` + ByteArrayConverter *string `json:"byte_array_converter"` } err := json.Unmarshal(data, &tmp) if err != nil { @@ -123,6 +127,10 @@ func (im *IndexMapping) UnmarshalJSON(data []byte) error { if tmp.DefaultField != nil { im.DefaultField = tmp.DefaultField } + im.ByteArrayConverter = &DEFAULT_TOP_LEVEL_BYTE_ARRAY_CONVERTER + if tmp.ByteArrayConverter != nil { + im.ByteArrayConverter = tmp.ByteArrayConverter + } im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping)) for typeName, typeDocMapping := range tmp.TypeMapping { @@ -155,6 +163,19 @@ func (im *IndexMapping) determineType(data interface{}) (string, bool) { } func (im *IndexMapping) MapDocument(doc *document.Document, data interface{}) error { + // see if the top level object is a byte array, and possibly run through conveter + byteArrayData, ok := data.([]byte) + if ok && im.ByteArrayConverter != nil { + byteArrayConverter, valid := Config.ByteArrayConverters[*im.ByteArrayConverter] + if valid { + convertedData, err := byteArrayConverter.Convert(byteArrayData) + if err != nil { + return err + } + data = convertedData + } + } + docType, ok := im.determineType(data) if !ok { return ERROR_NO_TYPE