0
0

Merge pull request #273 from pmezard/document-token-map

token_map: document it along with stop_token_filter
This commit is contained in:
Marty Schoch 2015-11-12 15:40:40 +00:00
commit bfbc2a6244
3 changed files with 19 additions and 0 deletions

View File

@ -7,6 +7,13 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// package stop_tokens_filter implements a TokenFilter removing tokens found in
// a TokenMap.
//
// It constructor takes the following arguments:
//
// "stop_token_map" (string): the name of the token map identifying tokens to
// remove.
package stop_tokens_filter
import (

View File

@ -7,6 +7,17 @@
// either express or implied. See the License for the specific language governing permissions
// and limitations under the License.
// package token_map implements a generic TokenMap, often used in conjunction
// with filters to remove or process specific tokens.
//
// Its constructor takes the following arguments:
//
// "filename" (string): the path of a file listing the tokens. Each line may
// contain one or more whitespace separated tokens, followed by an optional
// comment starting with a "#" or "|" character.
//
// "tokens" ([]interface{}): if "filename" is not specified, tokens can be
// passed directly as a sequence of strings wrapped in a []interface{}.
package token_map
import (

View File

@ -60,6 +60,7 @@ type Tokenizer interface {
Tokenize([]byte) TokenStream
}
// A TokenFilter adds, transforms or removes tokens from a token stream.
type TokenFilter interface {
Filter(TokenStream) TokenStream
}