0
0
Fork 0

further streamlined the API

introduced concept of byte array converters
right now only wired up to top-level index mapping
allowing the removal of the JSON methods, now at the top level
we default to parsing []byte as JSON, override if thats not
the behavior you want.

future enhancements will allow use of these byte array converters
to control how byte arrays are handled elsewhere in documents
this would allow for handing binary attachments, etc in the future

closes #59
This commit is contained in:
Marty Schoch 2014-08-11 12:47:29 -04:00
parent 7bbaa8ecd5
commit 42895649de
8 changed files with 107 additions and 96 deletions

52
byte_array_converter.go Normal file
View File

@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package bleve
import (
"encoding/json"
)
type ByteArrayConverter interface {
Convert([]byte) (interface{}, error)
}
type StringByteArrayConverter struct{}
func NewStringByteArrayConverter() *StringByteArrayConverter {
return &StringByteArrayConverter{}
}
func (c *StringByteArrayConverter) Convert(in []byte) (interface{}, error) {
return string(in), nil
}
type JSONByteArrayConverter struct{}
func NewJSONByteArrayConverter() *JSONByteArrayConverter {
return &JSONByteArrayConverter{}
}
func (c *JSONByteArrayConverter) Convert(in []byte) (interface{}, error) {
var rv map[string]interface{}
err := json.Unmarshal(in, &rv)
if err != nil {
return nil, err
}
return rv, nil
}
type IgnoreByteArrayConverter struct{}
func NewIgnoreByteArrayConverter() *IgnoreByteArrayConverter {
return &IgnoreByteArrayConverter{}
}
func (c *IgnoreByteArrayConverter) Convert(in []byte) (interface{}, error) {
return nil, nil
}

View File

@ -59,6 +59,7 @@ type Configuration struct {
CreateIfMissing bool
DefaultDateTimeFormat *string
DefaultField *string
ByteArrayConverters map[string]ByteArrayConverter
}
func (c *Configuration) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
@ -120,6 +121,7 @@ func NewConfiguration() *Configuration {
Highlight: &HighlightConfig{
Highlighters: make(map[string]search.Highlighter),
},
ByteArrayConverters: make(map[string]ByteArrayConverter),
}
}
@ -130,6 +132,11 @@ func init() {
// build the default configuration
Config = NewConfiguration()
// register byte array converters
Config.ByteArrayConverters["string"] = NewStringByteArrayConverter()
Config.ByteArrayConverters["json"] = NewJSONByteArrayConverter()
Config.ByteArrayConverters["ignore"] = NewIgnoreByteArrayConverter()
// register stop token maps
Config.Analysis.TokenMaps["da_stop"] = Config.MustLoadStopWords(stop_words_filter.DanishStopWords)
Config.Analysis.TokenMaps["nl_stop"] = Config.MustLoadStopWords(stop_words_filter.DutchStopWords)

View File

@ -83,7 +83,7 @@ func indexBeer(i bleve.Index) error {
// // shred them into a document
ext := filepath.Ext(filename)
docId := filename[:(len(filename) - len(ext))]
err = i.IndexJSONID(docId, jsonBytes)
err = i.Index(docId, jsonBytes)
if err != nil {
return err
}

View File

@ -35,7 +35,7 @@ func main() {
for jsonFile := range walkDirectory(*jsonDir) {
// index the json files
err = index.IndexJSONID(jsonFile.filename, jsonFile.contents)
err = index.Index(jsonFile.filename, jsonFile.contents)
if err != nil {
log.Fatal(err)
}

View File

@ -12,23 +12,14 @@ import (
"github.com/couchbaselabs/bleve/document"
)
type Identifier interface {
ID() string
}
type Classifier interface {
Type() string
}
type Index interface {
Index(data interface{}) error
IndexID(id string, data interface{}) error
Index(id string, data interface{}) error
IndexJSON(data []byte) error
IndexJSONID(id string, data []byte) error
Delete(data interface{}) error
DeleteID(id string) error
Delete(id string) error
Document(id string) (*document.Document, error)
DocCount() uint64

View File

@ -9,7 +9,6 @@
package bleve
import (
"encoding/json"
"fmt"
"time"
@ -44,17 +43,7 @@ func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) {
}, nil
}
// Index the provided data.
func (i *indexImpl) Index(data interface{}) error {
id, ok := i.determineID(data)
if ok {
return i.IndexID(id, data)
}
return ERROR_NO_ID
}
func (i *indexImpl) IndexID(id string, data interface{}) error {
func (i *indexImpl) Index(id string, data interface{}) error {
doc := document.NewDocument(id)
err := i.m.MapDocument(doc, data)
if err != nil {
@ -67,34 +56,7 @@ func (i *indexImpl) IndexID(id string, data interface{}) error {
return nil
}
func (i *indexImpl) IndexJSON(data []byte) error {
var obj interface{}
err := json.Unmarshal(data, &obj)
if err != nil {
return err
}
return i.Index(obj)
}
func (i *indexImpl) IndexJSONID(id string, data []byte) error {
var obj interface{}
err := json.Unmarshal(data, &obj)
if err != nil {
return err
}
return i.IndexID(id, obj)
}
func (i *indexImpl) Delete(data interface{}) error {
id, ok := i.determineID(data)
if ok {
return i.DeleteID(id)
}
return ERROR_NO_ID
}
func (i *indexImpl) DeleteID(id string) error {
func (i *indexImpl) Delete(id string) error {
err := i.i.Delete(id)
if err != nil {
return err
@ -242,21 +204,3 @@ func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) {
func (i *indexImpl) Close() {
i.i.Close()
}
func (i *indexImpl) determineID(data interface{}) (string, bool) {
// first see if the object implements Identifier
identifier, ok := data.(Identifier)
if ok {
return identifier.ID(), true
}
// now see if we can find an ID using the mapping
if i.m.IdField != nil {
id, ok := mustString(lookupPropertyPath(data, *i.m.IdField))
if ok {
return id, true
}
}
return "", false
}

View File

@ -28,10 +28,6 @@ type Person struct {
Tags []string `json:"tags"`
}
func (p *Person) ID() string {
return p.Identifier
}
func (p *Person) Type() string {
return "person"
}
@ -83,7 +79,7 @@ func TestIndex(t *testing.T) {
Tags: []string{"amped", "bogus", "gnarley", "tubed"},
}
err = index.Index(&obj)
err = index.Index(obj.Identifier, &obj)
if err != nil {
t.Error(err)
}

View File

@ -26,15 +26,17 @@ var DEFAULT_ID_FIELD = "_id"
var DEFAULT_TYPE_FIELD = "_type"
var DEFAULT_TYPE = "_default"
var DEFAULT_FIELD = "_all"
var DEFAULT_TOP_LEVEL_BYTE_ARRAY_CONVERTER = "json"
type IndexMapping struct {
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
IdField *string `json:"id_field"`
TypeField *string `json:"type_field"`
DefaultType *string `json:"default_type"`
DefaultAnalyzer *string `json:"default_analyzer"`
DefaultField *string `json:"default_field"`
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
IdField *string `json:"id_field"`
TypeField *string `json:"type_field"`
DefaultType *string `json:"default_type"`
DefaultAnalyzer *string `json:"default_analyzer"`
DefaultField *string `json:"default_field"`
ByteArrayConverter *string `json:"byte_array_converter"`
}
func (im *IndexMapping) GoString() string {
@ -43,12 +45,13 @@ func (im *IndexMapping) GoString() string {
func NewIndexMapping() *IndexMapping {
return &IndexMapping{
TypeMapping: make(map[string]*DocumentMapping),
DefaultMapping: NewDocumentMapping(),
IdField: &DEFAULT_ID_FIELD,
TypeField: &DEFAULT_TYPE_FIELD,
DefaultType: &DEFAULT_TYPE,
DefaultField: &DEFAULT_FIELD,
TypeMapping: make(map[string]*DocumentMapping),
DefaultMapping: NewDocumentMapping(),
IdField: &DEFAULT_ID_FIELD,
TypeField: &DEFAULT_TYPE_FIELD,
DefaultType: &DEFAULT_TYPE,
DefaultField: &DEFAULT_FIELD,
ByteArrayConverter: &DEFAULT_TOP_LEVEL_BYTE_ARRAY_CONVERTER,
}
}
@ -82,13 +85,14 @@ func (im *IndexMapping) MappingForType(docType string) *DocumentMapping {
func (im *IndexMapping) UnmarshalJSON(data []byte) error {
var tmp struct {
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
IdField *string `json:"id_field"`
TypeField *string `json:"type_field"`
DefaultType *string `json:"default_type"`
DefaultAnalyzer *string `json:"default_analyzer"`
DefaultField *string `json:"default_field"`
TypeMapping map[string]*DocumentMapping `json:"types"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
IdField *string `json:"id_field"`
TypeField *string `json:"type_field"`
DefaultType *string `json:"default_type"`
DefaultAnalyzer *string `json:"default_analyzer"`
DefaultField *string `json:"default_field"`
ByteArrayConverter *string `json:"byte_array_converter"`
}
err := json.Unmarshal(data, &tmp)
if err != nil {
@ -123,6 +127,10 @@ func (im *IndexMapping) UnmarshalJSON(data []byte) error {
if tmp.DefaultField != nil {
im.DefaultField = tmp.DefaultField
}
im.ByteArrayConverter = &DEFAULT_TOP_LEVEL_BYTE_ARRAY_CONVERTER
if tmp.ByteArrayConverter != nil {
im.ByteArrayConverter = tmp.ByteArrayConverter
}
im.TypeMapping = make(map[string]*DocumentMapping, len(tmp.TypeMapping))
for typeName, typeDocMapping := range tmp.TypeMapping {
@ -155,6 +163,19 @@ func (im *IndexMapping) determineType(data interface{}) (string, bool) {
}
func (im *IndexMapping) MapDocument(doc *document.Document, data interface{}) error {
// see if the top level object is a byte array, and possibly run through conveter
byteArrayData, ok := data.([]byte)
if ok && im.ByteArrayConverter != nil {
byteArrayConverter, valid := Config.ByteArrayConverters[*im.ByteArrayConverter]
if valid {
convertedData, err := byteArrayConverter.Convert(byteArrayData)
if err != nil {
return err
}
data = convertedData
}
}
docType, ok := im.determineType(data)
if !ok {
return ERROR_NO_TYPE