added some godoc documentation for the en analyzer

2015-11-18 15:28:57 +13:00 · 2015-11-18 15:28:57 +13:00 · 994f4b4d11
commit 994f4b4d11
parent 7dd52a5463
5 changed files with 25 additions and 1 deletions
--- a/analysis/language/en/analyzer_en.go
+++ b/analysis/language/en/analyzer_en.go
@ -7,6 +7,13 @@
 //  either express or implied. See the License for the specific language governing permissions
 //  and limitations under the License.

+// Package en implements an analyzer with reasonable defaults for processing
+// English text.
+//
+// It strips possessive suffixes ('s), transforms tokens to lower case,
+// removes stopwords from a built-in list, and applies porter stemming.
+//
+// The built-in stopwords list is defined in EnglishStopWords.
 package en

 import (
--- a/analysis/language/en/possessive_filter_en.go
+++ b/analysis/language/en/possessive_filter_en.go
@ -16,6 +16,8 @@ import (
 	"github.com/blevesearch/bleve/registry"
 )

+// PossessiveName is the name PossessiveFilter is registered as
+// in the bleve registry.
 const PossessiveName = "possessive_en"

 const rightSingleQuotationMark = '’'
@ -24,6 +26,11 @@ const fullWidthApostrophe = '＇'

 const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe

+// PossessiveFilter implements a TokenFilter which
+// strips the English possessive suffix ('s) from tokens.
+// It handle a variety of apostrophe types, is case-insensitive
+// and doesn't distinguish between possessive and contraction.
+// (ie "She's So Rad" becomes "She So Rad")
 type PossessiveFilter struct {
 }

--- a/analysis/language/en/stop_words_en.go
+++ b/analysis/language/en/stop_words_en.go
@ -7,10 +7,11 @@ import (

 const StopName = "stop_en"

+// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
+//
 // this content was obtained from:
 // lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
 // ` was changed to ' to allow for literal string
-
 var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
 | This file is distributed under the BSD License.
 | See http://snowball.tartarus.org/license.php
--- a/analysis/token_filters/lower_case_filter/lower_case_filter.go
+++ b/analysis/token_filters/lower_case_filter/lower_case_filter.go
@ -7,6 +7,8 @@
 //  either express or implied. See the License for the specific language governing permissions
 //  and limitations under the License.

+// Package lower_case_filter implements a TokenFilter which converts
+// tokens to lower case according to unicode rules.
 package lower_case_filter

 import (
@ -18,6 +20,7 @@ import (
 	"github.com/blevesearch/bleve/registry"
 )

+// Name is the name used to register LowerCaseFilter in the bleve registry
 const Name = "to_lower"

 type LowerCaseFilter struct {
--- a/analysis/token_map.go
+++ b/analysis/token_map.go
@ -23,6 +23,9 @@ func NewTokenMap() TokenMap {
 	return make(TokenMap, 0)
 }

+// LoadFile reads in a list of tokens from a text file,
+// one per line.
+// Comments are supported using `#` or `|`
 func (t TokenMap) LoadFile(filename string) error {
 	data, err := ioutil.ReadFile(filename)
 	if err != nil {
@ -31,6 +34,9 @@ func (t TokenMap) LoadFile(filename string) error {
 	return t.LoadBytes(data)
 }

+// LoadBytes reads in a list of tokens from memory,
+// one per line.
+// Comments are supported using `#` or `|`
 func (t TokenMap) LoadBytes(data []byte) error {
 	bytesReader := bytes.NewReader(data)
 	bufioReader := bufio.NewReader(bytesReader)