diff --git a/search/query/regexp.go b/search/query/regexp.go index 65f9a656..09544fcf 100644 --- a/search/query/regexp.go +++ b/search/query/regexp.go @@ -33,7 +33,9 @@ type RegexpQuery struct { // NewRegexpQuery creates a new Query which finds // documents containing terms that match the -// specified regular expression. +// specified regular expression. The regexp pattern +// SHOULD NOT include ^ or $ modifiers, the search +// will only match entire terms even without them. func NewRegexpQuery(regexp string) *RegexpQuery { return &RegexpQuery{ Regexp: regexp, @@ -76,14 +78,14 @@ func (q *RegexpQuery) Validate() error { func (q *RegexpQuery) compile() error { if q.compiled == nil { - // require that pattern be anchored to start and end of term + // require that pattern NOT be anchored to start and end of term actualRegexp := q.Regexp - if !strings.HasPrefix(actualRegexp, "^") { - actualRegexp = "^" + actualRegexp - } - if !strings.HasSuffix(actualRegexp, "$") { - actualRegexp = actualRegexp + "$" + if strings.HasPrefix(actualRegexp, "^") { + actualRegexp = actualRegexp[1:] // remove leading ^ } + // do not attempt to remove trailing $, it's presence is not + // known to interfere with LiteralPrefix() the way ^ does + // and removing $ introduces possible ambiguities with escaped \$, \\$, etc var err error q.compiled, err = regexp.Compile(actualRegexp) if err != nil { diff --git a/search/query/wildcard.go b/search/query/wildcard.go index 9fbf846d..7fd7482c 100644 --- a/search/query/wildcard.go +++ b/search/query/wildcard.go @@ -101,6 +101,6 @@ func (q *WildcardQuery) Validate() error { } func (q *WildcardQuery) convertToRegexp() (*regexp.Regexp, error) { - regexpString := "^" + wildcardRegexpReplacer.Replace(q.Wildcard) + "$" + regexpString := wildcardRegexpReplacer.Replace(q.Wildcard) return regexp.Compile(regexpString) } diff --git a/search/searcher/search_regexp.go b/search/searcher/search_regexp.go index f2134d66..84cea434 100644 --- a/search/searcher/search_regexp.go +++ b/search/searcher/search_regexp.go @@ -21,6 +21,11 @@ import ( "github.com/blevesearch/bleve/search" ) +// NewRegexpSearcher creates a searcher which will match documents that +// contain terms which match the pattern regexp. The match must be EXACT +// matching the entire term. The provided regexp SHOULD NOT start with ^ +// or end with $ as this can intefere with the implementation. Separately, +// matches will be checked to ensure they match the entire term. func NewRegexpSearcher(indexReader index.IndexReader, pattern *regexp.Regexp, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) { prefixTerm, complete := pattern.LiteralPrefix() @@ -79,7 +84,8 @@ func findRegexpCandidateTerms(indexReader index.IndexReader, pattern *regexp.Reg // enumerate the terms and check against regexp tfd, err := fieldDict.Next() for err == nil && tfd != nil { - if pattern.MatchString(tfd.Term) { + matchPos := pattern.FindStringIndex(tfd.Term) + if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) { rv = append(rv, tfd.Term) if tooManyClauses(len(rv)) { return rv, tooManyClausesErr()