Merge pull request #365 from bcampbell/documenting
some minor godoc additions
This commit is contained in:
commit
1f0509fe48
|
@ -7,6 +7,13 @@
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
// either express or implied. See the License for the specific language governing permissions
|
||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
|
|
||||||
|
// Package en implements an analyzer with reasonable defaults for processing
|
||||||
|
// English text.
|
||||||
|
//
|
||||||
|
// It strips possessive suffixes ('s), transforms tokens to lower case,
|
||||||
|
// removes stopwords from a built-in list, and applies porter stemming.
|
||||||
|
//
|
||||||
|
// The built-in stopwords list is defined in EnglishStopWords.
|
||||||
package en
|
package en
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
|
|
@ -16,6 +16,8 @@ import (
|
||||||
"github.com/blevesearch/bleve/registry"
|
"github.com/blevesearch/bleve/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// PossessiveName is the name PossessiveFilter is registered as
|
||||||
|
// in the bleve registry.
|
||||||
const PossessiveName = "possessive_en"
|
const PossessiveName = "possessive_en"
|
||||||
|
|
||||||
const rightSingleQuotationMark = '’'
|
const rightSingleQuotationMark = '’'
|
||||||
|
@ -24,6 +26,11 @@ const fullWidthApostrophe = '''
|
||||||
|
|
||||||
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
|
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
|
||||||
|
|
||||||
|
// PossessiveFilter implements a TokenFilter which
|
||||||
|
// strips the English possessive suffix ('s) from tokens.
|
||||||
|
// It handle a variety of apostrophe types, is case-insensitive
|
||||||
|
// and doesn't distinguish between possessive and contraction.
|
||||||
|
// (ie "She's So Rad" becomes "She So Rad")
|
||||||
type PossessiveFilter struct {
|
type PossessiveFilter struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,10 +7,11 @@ import (
|
||||||
|
|
||||||
const StopName = "stop_en"
|
const StopName = "stop_en"
|
||||||
|
|
||||||
|
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
|
||||||
|
//
|
||||||
// this content was obtained from:
|
// this content was obtained from:
|
||||||
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
|
||||||
// ` was changed to ' to allow for literal string
|
// ` was changed to ' to allow for literal string
|
||||||
|
|
||||||
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
|
||||||
| This file is distributed under the BSD License.
|
| This file is distributed under the BSD License.
|
||||||
| See http://snowball.tartarus.org/license.php
|
| See http://snowball.tartarus.org/license.php
|
||||||
|
|
|
@ -7,6 +7,8 @@
|
||||||
// either express or implied. See the License for the specific language governing permissions
|
// either express or implied. See the License for the specific language governing permissions
|
||||||
// and limitations under the License.
|
// and limitations under the License.
|
||||||
|
|
||||||
|
// Package lower_case_filter implements a TokenFilter which converts
|
||||||
|
// tokens to lower case according to unicode rules.
|
||||||
package lower_case_filter
|
package lower_case_filter
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
@ -18,6 +20,7 @@ import (
|
||||||
"github.com/blevesearch/bleve/registry"
|
"github.com/blevesearch/bleve/registry"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Name is the name used to register LowerCaseFilter in the bleve registry
|
||||||
const Name = "to_lower"
|
const Name = "to_lower"
|
||||||
|
|
||||||
type LowerCaseFilter struct {
|
type LowerCaseFilter struct {
|
||||||
|
|
|
@ -23,6 +23,9 @@ func NewTokenMap() TokenMap {
|
||||||
return make(TokenMap, 0)
|
return make(TokenMap, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LoadFile reads in a list of tokens from a text file,
|
||||||
|
// one per line.
|
||||||
|
// Comments are supported using `#` or `|`
|
||||||
func (t TokenMap) LoadFile(filename string) error {
|
func (t TokenMap) LoadFile(filename string) error {
|
||||||
data, err := ioutil.ReadFile(filename)
|
data, err := ioutil.ReadFile(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -31,6 +34,9 @@ func (t TokenMap) LoadFile(filename string) error {
|
||||||
return t.LoadBytes(data)
|
return t.LoadBytes(data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LoadBytes reads in a list of tokens from memory,
|
||||||
|
// one per line.
|
||||||
|
// Comments are supported using `#` or `|`
|
||||||
func (t TokenMap) LoadBytes(data []byte) error {
|
func (t TokenMap) LoadBytes(data []byte) error {
|
||||||
bytesReader := bytes.NewReader(data)
|
bytesReader := bytes.NewReader(data)
|
||||||
bufioReader := bufio.NewReader(bytesReader)
|
bufioReader := bufio.NewReader(bytesReader)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user