0
0
bleve/analysis/type.go
Marty Schoch 8debf26cb7 changed many components to not have defaults
many of these defaults were arbitrary, and not having
defaults lets us more easily flag them for configuration
added a shingle filter
introduce new toke type for shingles
2014-09-09 18:15:14 -04:00

84 lines
2.0 KiB
Go

// Copyright (c) 2014 Couchbase, Inc.
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file
// except in compliance with the License. You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software distributed under the
// License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
package analysis
import (
"fmt"
"time"
)
type CharFilter interface {
Filter([]byte) []byte
}
type TokenType int
const (
AlphaNumeric TokenType = iota
Ideographic
Numeric
DateTime
Shingle
)
type Token struct {
Start int `json:"start"`
End int `json:"end"`
Term []byte `json:"term"`
Position int `json:"position"`
Type TokenType `json:"type"`
KeyWord bool `json:"keyword"`
}
func (t *Token) String() string {
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type)
}
type TokenStream []*Token
type Tokenizer interface {
Tokenize([]byte) TokenStream
}
type TokenFilter interface {
Filter(TokenStream) TokenStream
}
type Analyzer struct {
CharFilters []CharFilter
Tokenizer Tokenizer
TokenFilters []TokenFilter
}
func (a *Analyzer) Analyze(input []byte) TokenStream {
if a.CharFilters != nil {
for _, cf := range a.CharFilters {
input = cf.Filter(input)
}
}
tokens := a.Tokenizer.Tokenize(input)
if a.TokenFilters != nil {
for _, tf := range a.TokenFilters {
tokens = tf.Filter(tokens)
}
}
return tokens
}
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")
type DateTimeParser interface {
ParseDateTime(string) (time.Time, error)
}
type ByteArrayConverter interface {
Convert([]byte) (interface{}, error)
}