0
0
Fork 0

doc: document Token, TokenFrequencies and Field structs

It helps understanding what is going on in indexing code.
ArrayPositions() was particularly puzzling.
This commit is contained in:
Patrick Mezard 2015-10-09 10:54:40 +02:00
parent aee82f8b49
commit e2fa3d6351
4 changed files with 29 additions and 5 deletions

View File

@ -9,6 +9,10 @@
package analysis
// TokenLocation represents one occurrence of a term at a particular location in
// a field. Start, End and Position have the same meaning as in analysis.Token.
// Field and ArrayPositions identify the field value in the source document.
// See document.Field for details.
type TokenLocation struct {
Field string
ArrayPositions []uint64
@ -17,11 +21,15 @@ type TokenLocation struct {
Position int
}
// TokenFreq represents all the occurrences of a term in all fields of a
// document.
type TokenFreq struct {
Term []byte
Locations []*TokenLocation
}
// TokenFrequencies maps document terms to their combined frequencies from all
// fields.
type TokenFrequencies map[string]*TokenFreq
func (tfs TokenFrequencies) MergeAll(remoteField string, other TokenFrequencies) {

View File

@ -30,10 +30,19 @@ const (
Double
)
// Token represents one occurrence of a term at a particular location in a
// field.
type Token struct {
Start int `json:"start"`
End int `json:"end"`
Term []byte `json:"term"`
// Start specifies the byte offset of the beginning of the term in the
// field.
Start int `json:"start"`
// End specifies the byte offset of the end of the term in the field.
End int `json:"end"`
Term []byte `json:"term"`
// Position specifies the 1-based index of the token in the sequence of
// occurrences of its term in the field.
Position int `json:"position"`
Type TokenType `json:"type"`
KeyWord bool `json:"keyword"`

View File

@ -14,7 +14,14 @@ import (
)
type Field interface {
// Name returns the path of the field from the root DocumentMapping.
// A root field path is "field", a subdocument field is "parent.field".
Name() string
// ArrayPositions returns the intermediate document and field indices
// required to resolve the field value in the document. For example, if the
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
// "doc1", then "field" in "doc2".
ArrayPositions() []uint64
Options() IndexingOptions
Analyze() (int, analysis.TokenFrequencies)

View File

@ -253,7 +253,7 @@ func ExampleNewFacetRequest() {
fmt.Println(searchResults.Facets["facet name"].Total)
// numer of docs with no value for this field
fmt.Println(searchResults.Facets["facet name"].Missing)
// term with highest occurences in field name
// term with highest occurrences in field name
fmt.Println(searchResults.Facets["facet name"].Terms[0].Term)
// Output:
// 5
@ -339,7 +339,7 @@ func ExampleSearchRequest_AddFacet() {
fmt.Println(searchResults.Facets["facet name"].Total)
// numer of docs with no value for this field
fmt.Println(searchResults.Facets["facet name"].Missing)
// term with highest occurences in field name
// term with highest occurrences in field name
fmt.Println(searchResults.Facets["facet name"].Terms[0].Term)
// Output:
// 5