2014-04-17 22:55:53 +02:00
|
|
|
// Copyright (c) 2014 Couchbase, Inc.
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
|
|
|
|
// except in compliance with the License. You may obtain a copy of the License at
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
// Unless required by applicable law or agreed to in writing, software distributed under the
|
|
|
|
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
|
|
|
|
// either express or implied. See the License for the specific language governing permissions
|
|
|
|
// and limitations under the License.
|
|
|
|
package shredder
|
|
|
|
|
|
|
|
import (
|
2014-07-11 20:26:25 +02:00
|
|
|
"bytes"
|
|
|
|
|
|
|
|
"github.com/couchbaselabs/bleve/analysis"
|
2014-04-17 22:55:53 +02:00
|
|
|
"github.com/couchbaselabs/bleve/document"
|
2014-07-11 20:26:25 +02:00
|
|
|
|
2014-04-17 22:55:53 +02:00
|
|
|
"github.com/dustin/go-jsonpointer"
|
|
|
|
)
|
|
|
|
|
|
|
|
// A simple automatic JSON shredder which parses the whole document body.
|
|
|
|
// Any strings found in the JSON are added as text fields
|
|
|
|
|
|
|
|
type JsonPointerShredder struct {
|
|
|
|
fieldPaths map[string]string
|
|
|
|
paths []string
|
2014-07-11 20:26:25 +02:00
|
|
|
analyzers map[string]*analysis.Analyzer
|
|
|
|
options map[string]document.IndexingOptions
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
func NewJsonPointerShredder() *JsonPointerShredder {
|
|
|
|
return &JsonPointerShredder{
|
|
|
|
fieldPaths: make(map[string]string),
|
|
|
|
paths: make([]string, 0),
|
2014-07-11 20:26:25 +02:00
|
|
|
analyzers: make(map[string]*analysis.Analyzer),
|
|
|
|
options: make(map[string]document.IndexingOptions),
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *JsonPointerShredder) AddTextField(name string, path string) {
|
|
|
|
s.fieldPaths[name] = path
|
|
|
|
s.paths = append(s.paths, path)
|
|
|
|
}
|
|
|
|
|
2014-07-11 20:26:25 +02:00
|
|
|
func (s *JsonPointerShredder) AddFieldCustom(name string, path string, options document.IndexingOptions, analyzer *analysis.Analyzer) {
|
2014-04-17 22:55:53 +02:00
|
|
|
s.fieldPaths[name] = path
|
2014-07-11 20:26:25 +02:00
|
|
|
s.analyzers[name] = analyzer
|
|
|
|
s.options[name] = options
|
2014-04-17 22:55:53 +02:00
|
|
|
s.paths = append(s.paths, path)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *JsonPointerShredder) Shred(id string, body []byte) (*document.Document, error) {
|
|
|
|
rv := document.NewDocument(id)
|
|
|
|
|
|
|
|
values, err := jsonpointer.FindMany(body, s.paths)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for fieldName, fieldPath := range s.fieldPaths {
|
2014-07-11 20:26:25 +02:00
|
|
|
fieldValue := bytes.TrimSpace(values[fieldPath])
|
|
|
|
if bytes.HasPrefix(fieldValue, []byte{'"'}) {
|
|
|
|
fieldValue = fieldValue[1:]
|
|
|
|
}
|
|
|
|
if bytes.HasSuffix(fieldValue, []byte{'"'}) {
|
|
|
|
fieldValue = fieldValue[:len(fieldValue)-1]
|
|
|
|
}
|
|
|
|
analyzer, custom := s.analyzers[fieldName]
|
|
|
|
if custom {
|
|
|
|
options := s.options[fieldName]
|
|
|
|
field := document.NewField(fieldName, fieldValue, options, analyzer)
|
|
|
|
rv.AddField(field)
|
|
|
|
} else {
|
|
|
|
field := document.NewTextField(fieldName, fieldValue)
|
|
|
|
rv.AddField(field)
|
|
|
|
}
|
2014-04-17 22:55:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return rv, nil
|
|
|
|
}
|