0
0
Fork 0

introduced a config option to disable creating indexes if they don't already exist

closes #23 and closes #24
This commit is contained in:
Marty Schoch 2014-07-30 14:29:26 -04:00
parent 67ff4a97a6
commit 216767953c
13 changed files with 83 additions and 71 deletions

View File

@ -40,14 +40,15 @@ type HighlightConfig struct {
Highlighters map[string]search.Highlighter
}
type Config struct {
type Configuration struct {
Analysis *AnalysisConfig
DefaultAnalyzer *string
Highlight *HighlightConfig
DefaultHighlighter *string
CreateIfMissing bool
}
func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
func (c *Configuration) BuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) (*analysis.Analyzer, error) {
rv := analysis.Analyzer{}
if len(charFilterNames) > 0 {
rv.CharFilters = make([]analysis.CharFilter, len(charFilterNames))
@ -76,7 +77,7 @@ func (c *Config) BuildNewAnalyzer(charFilterNames []string, tokenizerName string
return &rv, nil
}
func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer {
func (c *Configuration) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName string, tokenFilterNames []string) *analysis.Analyzer {
analyzer, err := c.BuildNewAnalyzer(charFilterNames, tokenizerName, tokenFilterNames)
if err != nil {
panic(err)
@ -84,8 +85,8 @@ func (c *Config) MustBuildNewAnalyzer(charFilterNames []string, tokenizerName st
return analyzer
}
func NewConfig() *Config {
return &Config{
func NewConfiguration() *Configuration {
return &Configuration{
Analysis: &AnalysisConfig{
CharFilters: make(map[string]analysis.CharFilter),
Tokenizers: make(map[string]analysis.Tokenizer),
@ -98,75 +99,77 @@ func NewConfig() *Config {
}
}
var config *Config
var Config *Configuration
func init() {
// build the default configuration
config = NewConfig()
Config = NewConfiguration()
// register char filters
htmlCharFilterRegexp := regexp.MustCompile(`</?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
htmlCharFilter := regexp_char_filter.NewRegexpCharFilter(htmlCharFilterRegexp, []byte{' '})
config.Analysis.CharFilters["html"] = htmlCharFilter
Config.Analysis.CharFilters["html"] = htmlCharFilter
// register tokenizers
whitespaceTokenizerRegexp := regexp.MustCompile(`\w+`)
config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer()
config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer()
config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH")
config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp)
Config.Analysis.Tokenizers["single"] = single_token.NewSingleTokenTokenizer()
Config.Analysis.Tokenizers["unicode"] = unicode_word_boundary.NewUnicodeWordBoundaryTokenizer()
Config.Analysis.Tokenizers["unicode_th"] = unicode_word_boundary.NewUnicodeWordBoundaryCustomLocaleTokenizer("th_TH")
Config.Analysis.Tokenizers["whitespace"] = regexp_tokenizer.NewRegexpTokenizer(whitespaceTokenizerRegexp)
// register token filters
config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter()
config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1)
config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255)
config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter()
config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish")
config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch")
config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english")
config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish")
config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french")
config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german")
config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian")
config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian")
config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian")
config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter")
config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese")
config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian")
config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian")
config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish")
config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish")
config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish")
config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter()
Config.Analysis.TokenFilters["detect_lang"] = cld2.NewCld2Filter()
Config.Analysis.TokenFilters["short"] = length_filter.NewLengthFilter(3, -1)
Config.Analysis.TokenFilters["long"] = length_filter.NewLengthFilter(-1, 255)
Config.Analysis.TokenFilters["to_lower"] = lower_case_filter.NewLowerCaseFilter()
Config.Analysis.TokenFilters["stemmer_da"] = stemmer_filter.MustNewStemmerFilter("danish")
Config.Analysis.TokenFilters["stemmer_nl"] = stemmer_filter.MustNewStemmerFilter("dutch")
Config.Analysis.TokenFilters["stemmer_en"] = stemmer_filter.MustNewStemmerFilter("english")
Config.Analysis.TokenFilters["stemmer_fi"] = stemmer_filter.MustNewStemmerFilter("finnish")
Config.Analysis.TokenFilters["stemmer_fr"] = stemmer_filter.MustNewStemmerFilter("french")
Config.Analysis.TokenFilters["stemmer_de"] = stemmer_filter.MustNewStemmerFilter("german")
Config.Analysis.TokenFilters["stemmer_hu"] = stemmer_filter.MustNewStemmerFilter("hungarian")
Config.Analysis.TokenFilters["stemmer_it"] = stemmer_filter.MustNewStemmerFilter("italian")
Config.Analysis.TokenFilters["stemmer_no"] = stemmer_filter.MustNewStemmerFilter("norwegian")
Config.Analysis.TokenFilters["stemmer_porter"] = stemmer_filter.MustNewStemmerFilter("porter")
Config.Analysis.TokenFilters["stemmer_pt"] = stemmer_filter.MustNewStemmerFilter("portuguese")
Config.Analysis.TokenFilters["stemmer_ro"] = stemmer_filter.MustNewStemmerFilter("romanian")
Config.Analysis.TokenFilters["stemmer_ru"] = stemmer_filter.MustNewStemmerFilter("russian")
Config.Analysis.TokenFilters["stemmer_es"] = stemmer_filter.MustNewStemmerFilter("spanish")
Config.Analysis.TokenFilters["stemmer_sv"] = stemmer_filter.MustNewStemmerFilter("swedish")
Config.Analysis.TokenFilters["stemmer_tr"] = stemmer_filter.MustNewStemmerFilter("turkish")
Config.Analysis.TokenFilters["stop_token"] = stop_words_filter.NewStopWordsFilter()
// register analyzers
keywordAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{})
config.Analysis.Analyzers["keyword"] = keywordAnalyzer
simpleAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"})
config.Analysis.Analyzers["simple"] = simpleAnalyzer
standardAnalyzer := config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"})
config.Analysis.Analyzers["standard"] = standardAnalyzer
englishAnalyzer := config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"})
config.Analysis.Analyzers["english"] = englishAnalyzer
detectLangAnalyzer := config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"})
config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer
keywordAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "single", []string{})
Config.Analysis.Analyzers["keyword"] = keywordAnalyzer
simpleAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower"})
Config.Analysis.Analyzers["simple"] = simpleAnalyzer
standardAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "whitespace", []string{"to_lower", "stop_token"})
Config.Analysis.Analyzers["standard"] = standardAnalyzer
englishAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "unicode", []string{"to_lower", "stemmer_en", "stop_token"})
Config.Analysis.Analyzers["english"] = englishAnalyzer
detectLangAnalyzer := Config.MustBuildNewAnalyzer([]string{}, "single", []string{"to_lower", "detect_lang"})
Config.Analysis.Analyzers["detect_lang"] = detectLangAnalyzer
// register ansi highlighter
config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter()
Config.Highlight.Highlighters["ansi"] = search.NewSimpleHighlighter()
// register html highlighter
htmlFormatter := search.NewHTMLFragmentFormatterCustom(`<span class="highlight">`, `</span>`)
htmlHighlighter := search.NewSimpleHighlighter()
htmlHighlighter.SetFragmentFormatter(htmlFormatter)
config.Highlight.Highlighters["html"] = htmlHighlighter
Config.Highlight.Highlighters["html"] = htmlHighlighter
// set the default analyzer
simpleAnalyzerName := "simple"
config.DefaultAnalyzer = &simpleAnalyzerName
Config.DefaultAnalyzer = &simpleAnalyzerName
// set the default highlighter
htmlHighlighterName := "html"
config.DefaultHighlighter = &htmlHighlighterName
Config.DefaultHighlighter = &htmlHighlighterName
// default CreateIfMissing to true
Config.CreateIfMissing = true
}

View File

@ -32,6 +32,9 @@ func main() {
log.Fatal("Specify search query")
}
// don't create an index if it doesn't exist
bleve.Config.CreateIfMissing = false
// create a new default mapping
mapping := bleve.NewIndexMapping()

View File

@ -35,7 +35,9 @@ type Index interface {
Search(req *SearchRequest) (*SearchResult, error)
Dump()
DumpDoc(id string) ([]interface{}, error)
DumpFields()
Close()
}

View File

@ -28,6 +28,7 @@ type Index interface {
Dump()
DumpDoc(id string) ([]interface{}, error)
DumpFields()
}
type TermFieldVector struct {

View File

@ -19,13 +19,13 @@ type LevelDBStore struct {
db *levigo.DB
}
func Open(path string) (*LevelDBStore, error) {
func Open(path string, createIfMissing bool) (*LevelDBStore, error) {
rv := LevelDBStore{
path: path,
}
opts := levigo.NewOptions()
opts.SetCreateIfMissing(true)
opts.SetCreateIfMissing(createIfMissing)
rv.opts = opts
var err error

View File

@ -16,7 +16,7 @@ import (
)
func TestLevelDBStore(t *testing.T) {
s, err := Open("test")
s, err := Open("test", true)
if err != nil {
t.Fatal(err)
}

View File

@ -16,7 +16,7 @@ import (
)
func BenchmarkLevelDBIndexing(b *testing.B) {
s, err := leveldb.Open("test")
s, err := leveldb.Open("test", true)
if err != nil {
b.Fatal(err)
}

View File

@ -27,7 +27,7 @@ type indexImpl struct {
}
func newIndex(path string, mapping *IndexMapping) (*indexImpl, error) {
store, err := leveldb.Open(path)
store, err := leveldb.Open(path, Config.CreateIfMissing)
if err != nil {
return nil, err
}
@ -124,9 +124,9 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
if req.Highlight != nil {
// get the right highlighter
highlighter := config.Highlight.Highlighters[*config.DefaultHighlighter]
highlighter := Config.Highlight.Highlighters[*Config.DefaultHighlighter]
if req.Highlight.Style != nil {
highlighter = config.Highlight.Highlighters[*req.Highlight.Style]
highlighter = Config.Highlight.Highlighters[*req.Highlight.Style]
if highlighter == nil {
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
}
@ -160,6 +160,14 @@ func (i *indexImpl) Search(req *SearchRequest) (*SearchResult, error) {
}, nil
}
func (i *indexImpl) Dump() {
i.i.Dump()
}
func (i *indexImpl) DumpFields() {
i.i.DumpFields()
}
func (i *indexImpl) DumpDoc(id string) ([]interface{}, error) {
return i.i.DumpDoc(id)
}

View File

@ -124,7 +124,7 @@ func (dm *DocumentMapping) defaultAnalyzer(path []string) *analysis.Analyzer {
break
}
if current.DefaultAnalyzer != nil {
rv = config.Analysis.Analyzers[*current.DefaultAnalyzer]
rv = Config.Analysis.Analyzers[*current.DefaultAnalyzer]
}
}
return rv

View File

@ -247,7 +247,7 @@ func (im *IndexMapping) processProperty(property interface{}, path []string, con
fieldName = parentName + *fieldMapping.Name
}
options := fieldMapping.Options()
analyzer := config.Analysis.Analyzers[*fieldMapping.Analyzer]
analyzer := Config.Analysis.Analyzers[*fieldMapping.Analyzer]
if analyzer != nil {
field := document.NewTextFieldCustom(fieldName, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
@ -276,9 +276,9 @@ func (im *IndexMapping) defaultAnalyzer(dm *DocumentMapping, path []string) *ana
rv := dm.defaultAnalyzer(path)
if rv == nil {
if im.DefaultAnalyzer != nil {
rv = config.Analysis.Analyzers[*im.DefaultAnalyzer]
} else if config.DefaultAnalyzer != nil {
rv = config.Analysis.Analyzers[*config.DefaultAnalyzer]
rv = Config.Analysis.Analyzers[*im.DefaultAnalyzer]
} else if Config.DefaultAnalyzer != nil {
rv = Config.Analysis.Analyzers[*Config.DefaultAnalyzer]
}
}
return rv
@ -297,7 +297,7 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != nil {
return config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer]
return Config.Analysis.Analyzers[*pathMapping.Fields[0].Analyzer]
}
}
}
@ -312,5 +312,5 @@ func (im *IndexMapping) analyzerForPath(path string) *analysis.Analyzer {
}
// finally just return the system-wide default analyzer
return config.Analysis.Analyzers[*config.DefaultAnalyzer]
return Config.Analysis.Analyzers[*Config.DefaultAnalyzer]
}

View File

@ -51,7 +51,7 @@ func (q *MatchQuery) Searcher(i *indexImpl, explain bool) (search.Searcher, erro
var analyzer *analysis.Analyzer
if q.Analyzer != "" {
analyzer = config.Analysis.Analyzers[q.Analyzer]
analyzer = Config.Analysis.Analyzers[q.Analyzer]
} else {
analyzer = i.m.analyzerForPath(q.FieldVal)
}

View File

@ -51,7 +51,7 @@ func (q *MatchPhraseQuery) Searcher(i *indexImpl, explain bool) (search.Searcher
var analyzer *analysis.Analyzer
if q.Analyzer != "" {
analyzer = config.Analysis.Analyzers[q.Analyzer]
analyzer = Config.Analysis.Analyzers[q.Analyzer]
} else {
analyzer = i.m.analyzerForPath(q.FieldVal)
}

View File

@ -12,8 +12,7 @@ import (
"flag"
"log"
"github.com/couchbaselabs/bleve/index/store/leveldb"
"github.com/couchbaselabs/bleve/index/upside_down"
"github.com/couchbaselabs/bleve"
)
var indexDir = flag.String("indexDir", "index", "index directory")
@ -23,12 +22,8 @@ var fieldsOnly = flag.Bool("fields", false, "fields only")
func main() {
flag.Parse()
store, err := leveldb.Open(*indexDir)
if err != nil {
log.Fatal(err)
}
index := upside_down.NewUpsideDownCouch(store)
err = index.Open()
bleve.Config.CreateIfMissing = false
index, err := bleve.Open(*indexDir, bleve.NewIndexMapping())
if err != nil {
log.Fatal(err)
}