aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/cases/cases.go
blob: 752cdf03167329ce9b6d1374022d185c96cd0b8f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

//go:generate go run gen.go gen_trieval.go

// Package cases provides general and language-specific case mappers.
package cases // import "golang.org/x/text/cases"

import (
	"golang.org/x/text/language"
	"golang.org/x/text/transform"
)

// References:
// - Unicode Reference Manual Chapter 3.13, 4.2, and 5.18.
// - https://www.unicode.org/reports/tr29/
// - https://www.unicode.org/Public/6.3.0/ucd/CaseFolding.txt
// - https://www.unicode.org/Public/6.3.0/ucd/SpecialCasing.txt
// - https://www.unicode.org/Public/6.3.0/ucd/DerivedCoreProperties.txt
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakProperty.txt
// - https://www.unicode.org/Public/6.3.0/ucd/auxiliary/WordBreakTest.txt
// - http://userguide.icu-project.org/transforms/casemappings

// TODO:
// - Case folding
// - Wide and Narrow?
// - Segmenter option for title casing.
// - ASCII fast paths
// - Encode Soft-Dotted property within trie somehow.

// A Caser transforms given input to a certain case. It implements
// transform.Transformer.
//
// A Caser may be stateful and should therefore not be shared between
// goroutines.
type Caser struct {
	t transform.SpanningTransformer
}

// Bytes returns a new byte slice with the result of converting b to the case
// form implemented by c.
func (c Caser) Bytes(b []byte) []byte {
	b, _, _ = transform.Bytes(c.t, b)
	return b
}

// String returns a string with the result of transforming s to the case form
// implemented by c.
func (c Caser) String(s string) string {
	s, _, _ = transform.String(c.t, s)
	return s
}

// Reset resets the Caser to be reused for new input after a previous call to
// Transform.
func (c Caser) Reset() { c.t.Reset() }

// Transform implements the transform.Transformer interface and transforms the
// given input to the case form implemented by c.
func (c Caser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	return c.t.Transform(dst, src, atEOF)
}

// Span implements the transform.SpanningTransformer interface.
func (c Caser) Span(src []byte, atEOF bool) (n int, err error) {
	return c.t.Span(src, atEOF)
}

// Upper returns a Caser for language-specific uppercasing.
func Upper(t language.Tag, opts ...Option) Caser {
	return Caser{makeUpper(t, getOpts(opts...))}
}

// Lower returns a Caser for language-specific lowercasing.
func Lower(t language.Tag, opts ...Option) Caser {
	return Caser{makeLower(t, getOpts(opts...))}
}

// Title returns a Caser for language-specific title casing. It uses an
// approximation of the default Unicode Word Break algorithm.
func Title(t language.Tag, opts ...Option) Caser {
	return Caser{makeTitle(t, getOpts(opts...))}
}

// Fold returns a Caser that implements Unicode case folding. The returned Caser
// is stateless and safe to use concurrently by multiple goroutines.
//
// Case folding does not normalize the input and may not preserve a normal form.
// Use the collate or search package for more convenient and linguistically
// sound comparisons. Use golang.org/x/text/secure/precis for string comparisons
// where security aspects are a concern.
func Fold(opts ...Option) Caser {
	return Caser{makeFold(getOpts(opts...))}
}

// An Option is used to modify the behavior of a Caser.
type Option func(o options) options

// TODO: consider these options to take a boolean as well, like FinalSigma.
// The advantage of using this approach is that other providers of a lower-case
// algorithm could set different defaults by prefixing a user-provided slice
// of options with their own. This is handy, for instance, for the precis
// package which would override the default to not handle the Greek final sigma.

var (
	// NoLower disables the lowercasing of non-leading letters for a title
	// caser.
	NoLower Option = noLower

	// Compact omits mappings in case folding for characters that would grow the
	// input. (Unimplemented.)
	Compact Option = compact
)

// TODO: option to preserve a normal form, if applicable?

type options struct {
	noLower bool
	simple  bool

	// TODO: segmenter, max ignorable, alternative versions, etc.

	ignoreFinalSigma bool
}

func getOpts(o ...Option) (res options) {
	for _, f := range o {
		res = f(res)
	}
	return
}

func noLower(o options) options {
	o.noLower = true
	return o
}

func compact(o options) options {
	o.simple = true
	return o
}

// HandleFinalSigma specifies whether the special handling of Greek final sigma
// should be enabled. Unicode prescribes handling the Greek final sigma for all
// locales, but standards like IDNA and PRECIS override this default.
func HandleFinalSigma(enable bool) Option {
	if enable {
		return handleFinalSigma
	}
	return ignoreFinalSigma
}

func ignoreFinalSigma(o options) options {
	o.ignoreFinalSigma = true
	return o
}

func handleFinalSigma(o options) options {
	o.ignoreFinalSigma = false
	return o
}