Merge pull request #273 from pmezard/document-token-map
token_map: document it along with stop_token_filter
This commit is contained in:
commit
bfbc2a6244
|
@ -7,6 +7,13 @@
|
|||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// package stop_tokens_filter implements a TokenFilter removing tokens found in
|
||||
// a TokenMap.
|
||||
//
|
||||
// It constructor takes the following arguments:
|
||||
//
|
||||
// "stop_token_map" (string): the name of the token map identifying tokens to
|
||||
// remove.
|
||||
package stop_tokens_filter
|
||||
|
||||
import (
|
||||
|
|
|
@ -7,6 +7,17 @@
|
|||
// either express or implied. See the License for the specific language governing permissions
|
||||
// and limitations under the License.
|
||||
|
||||
// package token_map implements a generic TokenMap, often used in conjunction
|
||||
// with filters to remove or process specific tokens.
|
||||
//
|
||||
// Its constructor takes the following arguments:
|
||||
//
|
||||
// "filename" (string): the path of a file listing the tokens. Each line may
|
||||
// contain one or more whitespace separated tokens, followed by an optional
|
||||
// comment starting with a "#" or "|" character.
|
||||
//
|
||||
// "tokens" ([]interface{}): if "filename" is not specified, tokens can be
|
||||
// passed directly as a sequence of strings wrapped in a []interface{}.
|
||||
package token_map
|
||||
|
||||
import (
|
||||
|
|
|
@ -60,6 +60,7 @@ type Tokenizer interface {
|
|||
Tokenize([]byte) TokenStream
|
||||
}
|
||||
|
||||
// A TokenFilter adds, transforms or removes tokens from a token stream.
|
||||
type TokenFilter interface {
|
||||
Filter(TokenStream) TokenStream
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user