introduced a config option to disable creating indexes if they don't already exist
closes #23 and closes #24
This commit is contained in:
parent
67ff4a97a6
commit
216767953c
97
config.go
97
config.go
@ -40,14 +40,15 @@ type HighlightConfig struct {
|
||||
Highlighters map[string]search.Highlighter
|
||||
}
|
||||
|
||||
type Config struct {
|
||||
type Configuration struct {
|
||||
Analysis *AnalysisConfig
|
||||
DefaultAnalyzer *string
|
||||
Highlight *HighlightConfig
|
||||
DefaultHighlighter *string
|
||||
CreateIfMissing bool
|
||||
}
|
||||
|
||||
func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
|
||||
func (c *Configuration) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
|
||||
rv := analysis.Analyzer{}
|
||||
if len(charFilterNames) > 0 {
|
||||
rv.CharFilters = make([]analysis.CharFilter, len(charFilterNames))
|
||||
@ -76,7 +77,7 @@ func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string
|
||||
return &rv, nil
|
||||
}
|
||||
|
||||
func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer {
|
||||
func (c *Configuration) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer {
|
||||
analyzer, err := c.BuildNewAnalyzer(charFilterNames, tokenizerName, tokenFilterNames)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@ -84,8 +85,8 @@ func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName st
|
||||
return analyzer
|
||||
}
|
||||
|
||||
func NewConfig() *Config {
|
||||
return &Config{
|
||||
func NewConfiguration() *Configuration {
|
||||
return &Configuration{
|
||||
Analysis: &AnalysisConfig{
|
||||
CharFilters: make(map[string]analysis.CharFilter),
|
||||
Tokenizers: make(map[string]analysis.Tokenizer),
|
||||
@ -98,75 +99,77 @@ func NewConfig() *Config {
|
||||
}
|
||||
}
|
||||
|
||||
var config *Config
|
||||
var Config *Configuration
|
||||
|
||||
func init() {
|
||||
|
||||
// build the default configuration
|
||||
config = NewConfig()
|
||||
Config = NewConfiguration()
|
||||
|
||||
// register char filters
|
||||
htmlCharFilterRegexp := regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
|
||||
htmlCharFilter := regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, []byte{' '})
|
||||
config.Analysis.CharFilters["html"] = htmlCharFilter
|
||||
Config.Analysis.CharFilters["html"] = htmlCharFilter
|
||||
|
||||
// register tokenizers
|
||||
whitespaceTokenizerRegexp := regexp.MustCompile(`\w+`)
|
||||
config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer()
|
||||
config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer()
|
||||
config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH")
|
||||
config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp)
|
||||
Config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer()
|
||||
Config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer()
|
||||
Config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH")
|
||||
Config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp)
|
||||
|
||||
// register token filters
|
||||
config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter()
|
||||
config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1)
|
||||
config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255)
|
||||
config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter()
|
||||
config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish")
|
||||
config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch")
|
||||
config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english")
|
||||
config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish")
|
||||
config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french")
|
||||
config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german")
|
||||
config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian")
|
||||
config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian")
|
||||
config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian")
|
||||
config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter")
|
||||
config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese")
|
||||
config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian")
|
||||
config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian")
|
||||
config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish")
|
||||
config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish")
|
||||
config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish")
|
||||
config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter()
|
||||
Config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter()
|
||||
Config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1)
|
||||
Config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255)
|
||||
Config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter()
|
||||
Config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish")
|
||||
Config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch")
|
||||
Config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english")
|
||||
Config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish")
|
||||
Config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french")
|
||||
Config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german")
|
||||
Config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian")
|
||||
Config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian")
|
||||
Config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian")
|
||||
Config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter")
|
||||
Config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese")
|
||||
Config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian")
|
||||
Config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian")
|
||||
Config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish")
|
||||
Config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish")
|
||||
Config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish")
|
||||
Config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter()
|
||||
|
||||
// register analyzers
|
||||
keywordAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{})
|
||||
config.Analysis.Analyzers["keyword"] = keywordAnalyzer
|
||||
simpleAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"})
|
||||
config.Analysis.Analyzers["simple"] = simpleAnalyzer
|
||||
standardAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"})
|
||||
config.Analysis.Analyzers["standard"] = standardAnalyzer
|
||||
englishAnalyzer := config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"})
|
||||
config.Analysis.Analyzers["english"] = englishAnalyzer
|
||||
detectLangAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"})
|
||||
config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer
|
||||
keywordAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "single", []string{})
|
||||
Config.Analysis.Analyzers["keyword"] = keywordAnalyzer
|
||||
simpleAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"})
|
||||
Config.Analysis.Analyzers["simple"] = simpleAnalyzer
|
||||
standardAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"})
|
||||
Config.Analysis.Analyzers["standard"] = standardAnalyzer
|
||||
englishAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"})
|
||||
Config.Analysis.Analyzers["english"] = englishAnalyzer
|
||||
detectLangAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"})
|
||||
Config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer
|
||||
|
||||
// register ansi highlighter
|
||||
config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter()
|
||||
Config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter()
|
||||
|
||||
// register html highlighter
|
||||
htmlFormatter := search.NewHTMLFragmentFormatterCustom(`<span class="highlight">`, `</span>`)
|
||||
htmlHighlighter := search.NewSimpleHighlighter()
|
||||
htmlHighlighter.SetFragmentFormatter(htmlFormatter)
|
||||
config.Highlight.Highlighters["html"] = htmlHighlighter
|
||||
Config.Highlight.Highlighters["html"] = htmlHighlighter
|
||||
|
||||
// set the default analyzer
|
||||
simpleAnalyzerName := "simple"
|
||||
config.DefaultAnalyzer = &simpleAnalyzerName
|
||||
Config.DefaultAnalyzer = &simpleAnalyzerName
|
||||
|
||||
// set the default highlighter
|
||||
htmlHighlighterName := "html"
|
||||
config.DefaultHighlighter = &htmlHighlighterName
|
||||
Config.DefaultHighlighter = &htmlHighlighterName
|
||||
|
||||
// default CreateIfMissing to true
|
||||
Config.CreateIfMissing = true
|
||||
}
|
||||
|
@ -32,6 +32,9 @@ func main() {
|
||||
log.Fatal("Specify search query")
|
||||
}
|
||||
|
||||
// don't create an index if it doesn't exist
|
||||
bleve.Config.CreateIfMissing = false
|
||||
|
||||
// create a new default mapping
|
||||
mapping := bleve.NewIndexMapping()
|
||||
|
||||
|
2
index.go
2
index.go
@ -35,7 +35,9 @@ type Index interface {
|
||||
|
||||
Search(req *SearchRequest) (*SearchResult, error)
|
||||
|
||||
Dump()
|
||||
DumpDoc(id string) ([]interface{}, error)
|
||||
DumpFields()
|
||||
|
||||
Close()
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ type Index interface {
|
||||
|
||||
Dump()
|
||||
DumpDoc(id string) ([]interface{}, error)
|
||||
DumpFields()
|
||||
}
|
||||
|
||||
type TermFieldVector struct {
|
||||
|
@ -19,13 +19,13 @@ type LevelDBStore struct {
|
||||
db *levigo.DB
|
||||
}
|
||||
|
||||
func Open(path string) (*LevelDBStore, error) {
|
||||
func Open(path string, createIfMissing bool) (*LevelDBStore, error) {
|
||||
rv := LevelDBStore{
|
||||
path: path,
|
||||
}
|
||||
|
||||
opts := levigo.NewOptions()
|
||||
opts.SetCreateIfMissing(true)
|
||||
opts.SetCreateIfMissing(createIfMissing)
|
||||
rv.opts = opts
|
||||
|
||||
var err error
|
||||
|
@ -16,7 +16,7 @@ import (
|
||||
)
|
||||
|
||||
func TestLevelDBStore(t *testing.T) {
|
||||
s, err := Open("test")
|
||||
s, err := Open("test", true)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
@ -16,7 +16,7 @@ import (
|
||||
)
|
||||
|
||||
func BenchmarkLevelDBIndexing(b *testing.B) {
|
||||
s, err := leveldb.Open("test")
|
||||
s, err := leveldb.Open("test", true)
|
||||
if err != nil {
|
||||
b.Fatal(err)
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ type indexImpl struct {
|
||||
}
|
||||
|
||||
func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) {
|
||||
store, err := leveldb.Open(path)
|
||||
store, err := leveldb.Open(path, Config.CreateIfMissing)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -124,9 +124,9 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
|
||||
|
||||
if req.Highlight != nil {
|
||||
// get the right highlighter
|
||||
highlighter := config.Highlight.Highlighters[*config.DefaultHighlighter]
|
||||
highlighter := Config.Highlight.Highlighters[*Config.DefaultHighlighter]
|
||||
if req.Highlight.Style != nil {
|
||||
highlighter = config.Highlight.Highlighters[*req.Highlight.Style]
|
||||
highlighter = Config.Highlight.Highlighters[*req.Highlight.Style]
|
||||
if highlighter == nil {
|
||||
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
|
||||
}
|
||||
@ -160,6 +160,14 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (i *indexImpl) Dump() {
|
||||
i.i.Dump()
|
||||
}
|
||||
|
||||
func (i *indexImpl) DumpFields() {
|
||||
i.i.DumpFields()
|
||||
}
|
||||
|
||||
func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) {
|
||||
return i.i.DumpDoc(id)
|
||||
}
|
||||
|
@ -124,7 +124,7 @@ func (dm *DocumentMapping) defaultAnalyzer(path []string) *analysis.Analyzer {
|
||||
break
|
||||
}
|
||||
if current.DefaultAnalyzer != nil {
|
||||
rv = config.Analysis.Analyzers[*current.DefaultAnalyzer]
|
||||
rv = Config.Analysis.Analyzers[*current.DefaultAnalyzer]
|
||||
}
|
||||
}
|
||||
return rv
|
||||
|
@ -247,7 +247,7 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
|
||||
fieldName = parentName + *fieldMapping.Name
|
||||
}
|
||||
options := fieldMapping.Options()
|
||||
analyzer := config.Analysis.Analyzers[*fieldMapping.Analyzer]
|
||||
analyzer := Config.Analysis.Analyzers[*fieldMapping.Analyzer]
|
||||
if analyzer != nil {
|
||||
field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer)
|
||||
context.doc.AddField(field)
|
||||
@ -276,9 +276,9 @@ func (im *IndexMapping) defaultAnalyzer(dm *DocumentMapping, path []string) *ana
|
||||
rv := dm.defaultAnalyzer(path)
|
||||
if rv == nil {
|
||||
if im.DefaultAnalyzer != nil {
|
||||
rv = config.Analysis.Analyzers[*im.DefaultAnalyzer]
|
||||
} else if config.DefaultAnalyzer != nil {
|
||||
rv = config.Analysis.Analyzers[*config.DefaultAnalyzer]
|
||||
rv = Config.Analysis.Analyzers[*im.DefaultAnalyzer]
|
||||
} else if Config.DefaultAnalyzer != nil {
|
||||
rv = Config.Analysis.Analyzers[*Config.DefaultAnalyzer]
|
||||
}
|
||||
}
|
||||
return rv
|
||||
@ -297,7 +297,7 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
|
||||
if pathMapping != nil {
|
||||
if len(pathMapping.Fields) > 0 {
|
||||
if pathMapping.Fields[0].Analyzer != nil {
|
||||
return config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer]
|
||||
return Config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer]
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -312,5 +312,5 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
|
||||
}
|
||||
|
||||
// finally just return the system-wide default analyzer
|
||||
return config.Analysis.Analyzers[*config.DefaultAnalyzer]
|
||||
return Config.Analysis.Analyzers[*Config.DefaultAnalyzer]
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ func (q *MatchQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, erro
|
||||
|
||||
var analyzer *analysis.Analyzer
|
||||
if q.Analyzer != "" {
|
||||
analyzer = config.Analysis.Analyzers[q.Analyzer]
|
||||
analyzer = Config.Analysis.Analyzers[q.Analyzer]
|
||||
} else {
|
||||
analyzer = i.m.analyzerForPath(q.FieldVal)
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ func (q *MatchPhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher
|
||||
|
||||
var analyzer *analysis.Analyzer
|
||||
if q.Analyzer != "" {
|
||||
analyzer = config.Analysis.Analyzers[q.Analyzer]
|
||||
analyzer = Config.Analysis.Analyzers[q.Analyzer]
|
||||
} else {
|
||||
analyzer = i.m.analyzerForPath(q.FieldVal)
|
||||
}
|
||||
|
@ -12,8 +12,7 @@ import (
|
||||
"flag"
|
||||
"log"
|
||||
|
||||
"github.com/couchbaselabs/bleve/index/store/leveldb"
|
||||
"github.com/couchbaselabs/bleve/index/upside_down"
|
||||
"github.com/couchbaselabs/bleve"
|
||||
)
|
||||
|
||||
var indexDir = flag.String("indexDir", "index", "index directory")
|
||||
@ -23,12 +22,8 @@ var fieldsOnly = flag.Bool("fields", false, "fields only")
|
||||
func main() {
|
||||
flag.Parse()
|
||||
|
||||
store, err := leveldb.Open(*indexDir)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
index := upside_down.NewUpsideDownCouch(store)
|
||||
err = index.Open()
|
||||
bleve.Config.CreateIfMissing = false
|
||||
index, err := bleve.Open(*indexDir, bleve.NewIndexMapping())
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user