multi-term searches check DisjunctionMaxClauseCount earlier
regexp, fuzzy and numeric range searchers now check to see if they will be exceeding a configured DisjunctionMaxClauseCount and stop work earlier, this does a better job of avoiding situations which consume all available memory for an operation they cannot complete
This commit is contained in:
parent
c7ae842b33
commit
53f7eb2891
|
@ -35,9 +35,20 @@ type DisjunctionSearcher struct {
|
|||
min float64
|
||||
}
|
||||
|
||||
func tooManyClauses(count int) bool {
|
||||
if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func tooManyClausesErr() error {
|
||||
return fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount)
|
||||
}
|
||||
|
||||
func NewDisjunctionSearcher(indexReader index.IndexReader, qsearchers []search.Searcher, min float64, explain bool) (*DisjunctionSearcher, error) {
|
||||
if DisjunctionMaxClauseCount != 0 && len(qsearchers) > DisjunctionMaxClauseCount {
|
||||
return nil, fmt.Errorf("TooManyClauses[maxClauseCount is set to %d]", DisjunctionMaxClauseCount)
|
||||
if tooManyClauses(len(qsearchers)) {
|
||||
return nil, tooManyClausesErr()
|
||||
}
|
||||
// build the downstream searchers
|
||||
searchers := make(OrderedSearcherList, len(qsearchers))
|
||||
|
|
|
@ -32,36 +32,13 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
|
|||
}
|
||||
}
|
||||
|
||||
// find the terms with this prefix
|
||||
var fieldDict index.FieldDict
|
||||
var err error
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
|
||||
// enumerate terms and check levenshtein distance
|
||||
candidateTerms := make([]string, 0)
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
ld, exceeded := search.LevenshteinDistanceMax(&term, &tfd.Term, fuzziness)
|
||||
if !exceeded && ld <= fuzziness {
|
||||
candidateTerms = append(candidateTerms, tfd.Term)
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = fieldDict.Close()
|
||||
candidateTerms, err := findFuzzyCandidateTerms(indexReader, &term, fuzziness, field, prefixTerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]search.Searcher, 0, 25)
|
||||
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
|
||||
|
||||
for _, cterm := range candidateTerms {
|
||||
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
|
||||
|
@ -87,6 +64,37 @@ func NewFuzzySearcher(indexReader index.IndexReader, term string, prefix, fuzzin
|
|||
searcher: searcher,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func findFuzzyCandidateTerms(indexReader index.IndexReader, term *string, fuzziness int, field, prefixTerm string) (rv []string, err error) {
|
||||
rv = make([]string, 0)
|
||||
var fieldDict index.FieldDict
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
// enumerate terms and check levenshtein distance
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
ld, exceeded := search.LevenshteinDistanceMax(term, &tfd.Term, fuzziness)
|
||||
if !exceeded && ld <= fuzziness {
|
||||
rv = append(rv, tfd.Term)
|
||||
if tooManyClauses(len(rv)) {
|
||||
return rv, tooManyClausesErr()
|
||||
}
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func (s *FuzzySearcher) Count() uint64 {
|
||||
return s.searcher.Count()
|
||||
}
|
||||
|
|
|
@ -57,6 +57,9 @@ func NewNumericRangeSearcher(indexReader index.IndexReader, min *float64, max *f
|
|||
// FIXME hard-coded precision, should match field declaration
|
||||
termRanges := splitInt64Range(minInt64, maxInt64, 4)
|
||||
terms := termRanges.Enumerate()
|
||||
if tooManyClauses(len(terms)) {
|
||||
return nil, tooManyClausesErr()
|
||||
}
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]search.Searcher, len(terms))
|
||||
for i, term := range terms {
|
||||
|
|
|
@ -27,39 +27,20 @@ type RegexpSearcher struct {
|
|||
func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, explain bool) (*RegexpSearcher, error) {
|
||||
|
||||
prefixTerm, complete := pattern.LiteralPrefix()
|
||||
candidateTerms := make([]string, 0)
|
||||
var candidateTerms []string
|
||||
if complete {
|
||||
// there is no pattern
|
||||
candidateTerms = append(candidateTerms, prefixTerm)
|
||||
candidateTerms = []string{prefixTerm}
|
||||
} else {
|
||||
var fieldDict index.FieldDict
|
||||
var err error
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
|
||||
// enumerate the terms and check against regexp
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
if pattern.MatchString(tfd.Term) {
|
||||
candidateTerms = append(candidateTerms, tfd.Term)
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = fieldDict.Close()
|
||||
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field, prefixTerm)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
// enumerate all the terms in the range
|
||||
qsearchers := make([]search.Searcher, 0, 25)
|
||||
qsearchers := make([]search.Searcher, 0, len(candidateTerms))
|
||||
|
||||
for _, cterm := range candidateTerms {
|
||||
qsearcher, err := NewTermSearcher(indexReader, cterm, field, boost, explain)
|
||||
|
@ -83,6 +64,36 @@ func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, fi
|
|||
searcher: searcher,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Regexp, field, prefixTerm string) (rv []string, err error) {
|
||||
rv = make([]string, 0)
|
||||
var fieldDict index.FieldDict
|
||||
if len(prefixTerm) > 0 {
|
||||
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
|
||||
} else {
|
||||
fieldDict, err = indexReader.FieldDict(field)
|
||||
}
|
||||
defer func() {
|
||||
if cerr := fieldDict.Close(); cerr != nil && err == nil {
|
||||
err = cerr
|
||||
}
|
||||
}()
|
||||
|
||||
// enumerate the terms and check against regexp
|
||||
tfd, err := fieldDict.Next()
|
||||
for err == nil && tfd != nil {
|
||||
if pattern.MatchString(tfd.Term) {
|
||||
rv = append(rv, tfd.Term)
|
||||
if tooManyClauses(len(rv)) {
|
||||
return rv, tooManyClausesErr()
|
||||
}
|
||||
}
|
||||
tfd, err = fieldDict.Next()
|
||||
}
|
||||
|
||||
return rv, err
|
||||
}
|
||||
|
||||
func (s *RegexpSearcher) Count() uint64 {
|
||||
return s.searcher.Count()
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue