From 216767953cdce473a7c2fb61e1d60ae4be36269b Mon Sep 17 00:00:00 2001 From: Marty Schoch Date: Wed, 30 Jul 2014 14:29:26 -0400 Subject: [PATCH] introduced a config option to disable creating indexes if they don't already exist closes #23 and closes #24 --- config.go | 97 +++++++++++---------- examples/bleve_query/main.go | 3 + index.go | 2 + index/index.go | 1 + index/store/leveldb/store.go | 4 +- index/store/leveldb/store_test.go | 2 +- index/upside_down/benchmark_leveldb_test.go | 2 +- index_impl.go | 14 ++- mapping_document.go | 2 +- mapping_index.go | 12 +-- query_match.go | 2 +- query_match_phrase.go | 2 +- utils/bleve_dump/main.go | 11 +-- 13 files changed, 83 insertions(+), 71 deletions(-) diff --git a/config.go b/config.go index addb7033..aa2af6eb 100644 --- a/config.go +++ b/config.go @@ -40,14 +40,15 @@ type HighlightConfig struct { Highlighters map[string]search.Highlighter } -type Config struct { +type Configuration struct { Analysis *AnalysisConfig DefaultAnalyzer *string Highlight *HighlightConfig DefaultHighlighter *string + CreateIfMissing bool } -func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) { +func (c *Configuration) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) { rv := analysis.Analyzer{} if len(charFilterNames) > 0 { rv.CharFilters = make([]analysis.CharFilter, len(charFilterNames)) @@ -76,7 +77,7 @@ func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string return &rv, nil } -func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer { +func (c *Configuration) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer { analyzer, err := c.BuildNewAnalyzer(charFilterNames, tokenizerName, tokenFilterNames) if err != nil { panic(err) @@ -84,8 +85,8 @@ func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName st return analyzer } -func NewConfig() *Config { - return &Config{ +func NewConfiguration() *Configuration { + return &Configuration{ Analysis: &AnalysisConfig{ CharFilters: make(map[string]analysis.CharFilter), Tokenizers: make(map[string]analysis.Tokenizer), @@ -98,75 +99,77 @@ func NewConfig() *Config { } } -var config *Config +var Config *Configuration func init() { // build the default configuration - config = NewConfig() + Config = NewConfiguration() // register char filters htmlCharFilterRegexp := regexp.MustCompile(`\s]+))?)+\s*|\s*)/?>`) htmlCharFilter := regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, []byte{' '}) - config.Analysis.CharFilters["html"] = htmlCharFilter + Config.Analysis.CharFilters["html"] = htmlCharFilter // register tokenizers whitespaceTokenizerRegexp := regexp.MustCompile(`\w+`) - config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer() - config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer() - config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH") - config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp) + Config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer() + Config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer() + Config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH") + Config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp) // register token filters - config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter() - config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1) - config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255) - config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter() - config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish") - config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch") - config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english") - config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish") - config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french") - config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german") - config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian") - config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian") - config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian") - config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter") - config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese") - config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian") - config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian") - config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish") - config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish") - config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish") - config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter() + Config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter() + Config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1) + Config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255) + Config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter() + Config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish") + Config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch") + Config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english") + Config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish") + Config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french") + Config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german") + Config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian") + Config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian") + Config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian") + Config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter") + Config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese") + Config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian") + Config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian") + Config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish") + Config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish") + Config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish") + Config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter() // register analyzers - keywordAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{}) - config.Analysis.Analyzers["keyword"] = keywordAnalyzer - simpleAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"}) - config.Analysis.Analyzers["simple"] = simpleAnalyzer - standardAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"}) - config.Analysis.Analyzers["standard"] = standardAnalyzer - englishAnalyzer := config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"}) - config.Analysis.Analyzers["english"] = englishAnalyzer - detectLangAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"}) - config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer + keywordAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "single", []string{}) + Config.Analysis.Analyzers["keyword"] = keywordAnalyzer + simpleAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"}) + Config.Analysis.Analyzers["simple"] = simpleAnalyzer + standardAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"}) + Config.Analysis.Analyzers["standard"] = standardAnalyzer + englishAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"}) + Config.Analysis.Analyzers["english"] = englishAnalyzer + detectLangAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"}) + Config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer // register ansi highlighter - config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter() + Config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter() // register html highlighter htmlFormatter := search.NewHTMLFragmentFormatterCustom(``, ``) htmlHighlighter := search.NewSimpleHighlighter() htmlHighlighter.SetFragmentFormatter(htmlFormatter) - config.Highlight.Highlighters["html"] = htmlHighlighter + Config.Highlight.Highlighters["html"] = htmlHighlighter // set the default analyzer simpleAnalyzerName := "simple" - config.DefaultAnalyzer = &simpleAnalyzerName + Config.DefaultAnalyzer = &simpleAnalyzerName // set the default highlighter htmlHighlighterName := "html" - config.DefaultHighlighter = &htmlHighlighterName + Config.DefaultHighlighter = &htmlHighlighterName + // default CreateIfMissing to true + Config.CreateIfMissing = true } diff --git a/examples/bleve_query/main.go b/examples/bleve_query/main.go index ab2321fb..bd3c06d8 100644 --- a/examples/bleve_query/main.go +++ b/examples/bleve_query/main.go @@ -32,6 +32,9 @@ func main() { log.Fatal("Specify search query") } + // don't create an index if it doesn't exist + bleve.Config.CreateIfMissing = false + // create a new default mapping mapping := bleve.NewIndexMapping() diff --git a/index.go b/index.go index a454d1e2..8bc09d70 100644 --- a/index.go +++ b/index.go @@ -35,7 +35,9 @@ type Index interface { Search(req *SearchRequest) (*SearchResult, error) + Dump() DumpDoc(id string) ([]interface{}, error) + DumpFields() Close() } diff --git a/index/index.go b/index/index.go index 9cbc413d..e018a6e2 100644 --- a/index/index.go +++ b/index/index.go @@ -28,6 +28,7 @@ type Index interface { Dump() DumpDoc(id string) ([]interface{}, error) + DumpFields() } type TermFieldVector struct { diff --git a/index/store/leveldb/store.go b/index/store/leveldb/store.go index 4735fe1a..22185486 100644 --- a/index/store/leveldb/store.go +++ b/index/store/leveldb/store.go @@ -19,13 +19,13 @@ type LevelDBStore struct { db *levigo.DB } -func Open(path string) (*LevelDBStore, error) { +func Open(path string, createIfMissing bool) (*LevelDBStore, error) { rv := LevelDBStore{ path: path, } opts := levigo.NewOptions() - opts.SetCreateIfMissing(true) + opts.SetCreateIfMissing(createIfMissing) rv.opts = opts var err error diff --git a/index/store/leveldb/store_test.go b/index/store/leveldb/store_test.go index 6432bd29..73b05fd0 100644 --- a/index/store/leveldb/store_test.go +++ b/index/store/leveldb/store_test.go @@ -16,7 +16,7 @@ import ( ) func TestLevelDBStore(t *testing.T) { - s, err := Open("test") + s, err := Open("test", true) if err != nil { t.Fatal(err) } diff --git a/index/upside_down/benchmark_leveldb_test.go b/index/upside_down/benchmark_leveldb_test.go index 7429edc3..b5e55cc2 100644 --- a/index/upside_down/benchmark_leveldb_test.go +++ b/index/upside_down/benchmark_leveldb_test.go @@ -16,7 +16,7 @@ import ( ) func BenchmarkLevelDBIndexing(b *testing.B) { - s, err := leveldb.Open("test") + s, err := leveldb.Open("test", true) if err != nil { b.Fatal(err) } diff --git a/index_impl.go b/index_impl.go index 5882999e..0a8ce515 100644 --- a/index_impl.go +++ b/index_impl.go @@ -27,7 +27,7 @@ type indexImpl struct { } func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) { - store, err := leveldb.Open(path) + store, err := leveldb.Open(path, Config.CreateIfMissing) if err != nil { return nil, err } @@ -124,9 +124,9 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) { if req.Highlight != nil { // get the right highlighter - highlighter := config.Highlight.Highlighters[*config.DefaultHighlighter] + highlighter := Config.Highlight.Highlighters[*Config.DefaultHighlighter] if req.Highlight.Style != nil { - highlighter = config.Highlight.Highlighters[*req.Highlight.Style] + highlighter = Config.Highlight.Highlighters[*req.Highlight.Style] if highlighter == nil { return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style) } @@ -160,6 +160,14 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) { }, nil } +func (i *indexImpl) Dump() { + i.i.Dump() +} + +func (i *indexImpl) DumpFields() { + i.i.DumpFields() +} + func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) { return i.i.DumpDoc(id) } diff --git a/mapping_document.go b/mapping_document.go index 9fd1bb0e..280930ed 100644 --- a/mapping_document.go +++ b/mapping_document.go @@ -124,7 +124,7 @@ func (dm *DocumentMapping) defaultAnalyzer(path []string) *analysis.Analyzer { break } if current.DefaultAnalyzer != nil { - rv = config.Analysis.Analyzers[*current.DefaultAnalyzer] + rv = Config.Analysis.Analyzers[*current.DefaultAnalyzer] } } return rv diff --git a/mapping_index.go b/mapping_index.go index 60d41b2c..f9029328 100644 --- a/mapping_index.go +++ b/mapping_index.go @@ -247,7 +247,7 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con fieldName = parentName + *fieldMapping.Name } options := fieldMapping.Options() - analyzer := config.Analysis.Analyzers[*fieldMapping.Analyzer] + analyzer := Config.Analysis.Analyzers[*fieldMapping.Analyzer] if analyzer != nil { field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer) context.doc.AddField(field) @@ -276,9 +276,9 @@ func (im *IndexMapping) defaultAnalyzer(dm *DocumentMapping, path []string) *ana rv := dm.defaultAnalyzer(path) if rv == nil { if im.DefaultAnalyzer != nil { - rv = config.Analysis.Analyzers[*im.DefaultAnalyzer] - } else if config.DefaultAnalyzer != nil { - rv = config.Analysis.Analyzers[*config.DefaultAnalyzer] + rv = Config.Analysis.Analyzers[*im.DefaultAnalyzer] + } else if Config.DefaultAnalyzer != nil { + rv = Config.Analysis.Analyzers[*Config.DefaultAnalyzer] } } return rv @@ -297,7 +297,7 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer { if pathMapping != nil { if len(pathMapping.Fields) > 0 { if pathMapping.Fields[0].Analyzer != nil { - return config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer] + return Config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer] } } } @@ -312,5 +312,5 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer { } // finally just return the system-wide default analyzer - return config.Analysis.Analyzers[*config.DefaultAnalyzer] + return Config.Analysis.Analyzers[*Config.DefaultAnalyzer] } diff --git a/query_match.go b/query_match.go index f9b296da..9fad055f 100644 --- a/query_match.go +++ b/query_match.go @@ -51,7 +51,7 @@ func (q *MatchQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, erro var analyzer *analysis.Analyzer if q.Analyzer != "" { - analyzer = config.Analysis.Analyzers[q.Analyzer] + analyzer = Config.Analysis.Analyzers[q.Analyzer] } else { analyzer = i.m.analyzerForPath(q.FieldVal) } diff --git a/query_match_phrase.go b/query_match_phrase.go index 50b4140a..ad80c7b3 100644 --- a/query_match_phrase.go +++ b/query_match_phrase.go @@ -51,7 +51,7 @@ func (q *MatchPhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher var analyzer *analysis.Analyzer if q.Analyzer != "" { - analyzer = config.Analysis.Analyzers[q.Analyzer] + analyzer = Config.Analysis.Analyzers[q.Analyzer] } else { analyzer = i.m.analyzerForPath(q.FieldVal) } diff --git a/utils/bleve_dump/main.go b/utils/bleve_dump/main.go index c240d05d..a7669a70 100644 --- a/utils/bleve_dump/main.go +++ b/utils/bleve_dump/main.go @@ -12,8 +12,7 @@ import ( "flag" "log" - "github.com/couchbaselabs/bleve/index/store/leveldb" - "github.com/couchbaselabs/bleve/index/upside_down" + "github.com/couchbaselabs/bleve" ) var indexDir = flag.String("indexDir", "index", "index directory") @@ -23,12 +22,8 @@ var fieldsOnly = flag.Bool("fields", false, "fields only") func main() { flag.Parse() - store, err := leveldb.Open(*indexDir) - if err != nil { - log.Fatal(err) - } - index := upside_down.NewUpsideDownCouch(store) - err = index.Open() + bleve.Config.CreateIfMissing = false + index, err := bleve.Open(*indexDir, bleve.NewIndexMapping()) if err != nil { log.Fatal(err) }