From 8b17787a6500c426237ad327e1c476bf87461daf Mon Sep 17 00:00:00 2001
From: Patrick Mezard <patrick@mezard.eu>
Date: Tue, 27 Oct 2015 16:51:54 +0100
Subject: [PATCH] analysis: document "exception" tokenizer, and Tokenizer
 interface

---
 analysis/tokenizers/exception/exception.go | 12 ++++++++++++
 analysis/type.go                           |  2 ++
 2 files changed, 14 insertions(+)

diff --git a/analysis/tokenizers/exception/exception.go b/analysis/tokenizers/exception/exception.go
index 0000f4c6..51da89ac 100644
--- a/analysis/tokenizers/exception/exception.go
+++ b/analysis/tokenizers/exception/exception.go
@@ -7,6 +7,18 @@
 //  either express or implied. See the License for the specific language governing permissions
 //  and limitations under the License.
 
+// package exception implements a Tokenizer which extracts pieces matched by a
+// regular expression from the input data, delegates the rest to another
+// tokenizer, then insert back extracted parts in the token stream. Use it to
+// preserve sequences which a regular tokenizer would alter or remove.
+//
+// Its constructor takes the following arguments:
+//
+// "exceptions" ([]string): one or more Go regular expressions matching the
+// sequence to preserve. Multiple expressions are combined with "|".
+//
+// "tokenizer" (string): the name of the tokenizer processing the data not
+// matched by "exceptions".
 package exception
 
 import (
diff --git a/analysis/type.go b/analysis/type.go
index 13759ec8..a8feeabd 100644
--- a/analysis/type.go
+++ b/analysis/type.go
@@ -54,6 +54,8 @@ func (t *Token) String() string {
 
 type TokenStream []*Token
 
+// A Tokenizer splits an input string into tokens, the usual behaviour being to
+// map words to tokens.
 type Tokenizer interface {
 	Tokenize([]byte) TokenStream
 }