aboutsummaryrefslogtreecommitdiff
path: root/vendor/golang.org/x/text/cases/map.go
blob: 0f7c6a14bb73c5a7a70230281b079cb26e45ef0e (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package cases

// This file contains the definitions of case mappings for all supported
// languages. The rules for the language-specific tailorings were taken and
// modified from the CLDR transform definitions in common/transforms.

import (
	"strings"
	"unicode"
	"unicode/utf8"

	"golang.org/x/text/internal"
	"golang.org/x/text/language"
	"golang.org/x/text/transform"
	"golang.org/x/text/unicode/norm"
)

// A mapFunc takes a context set to the current rune and writes the mapped
// version to the same context. It may advance the context to the next rune. It
// returns whether a checkpoint is possible: whether the pDst bytes written to
// dst so far won't need changing as we see more source bytes.
type mapFunc func(*context) bool

// A spanFunc takes a context set to the current rune and returns whether this
// rune would be altered when written to the output. It may advance the context
// to the next rune. It returns whether a checkpoint is possible.
type spanFunc func(*context) bool

// maxIgnorable defines the maximum number of ignorables to consider for
// lookahead operations.
const maxIgnorable = 30

// supported lists the language tags for which we have tailorings.
const supported = "und af az el lt nl tr"

func init() {
	tags := []language.Tag{}
	for _, s := range strings.Split(supported, " ") {
		tags = append(tags, language.MustParse(s))
	}
	matcher = internal.NewInheritanceMatcher(tags)
	Supported = language.NewCoverage(tags)
}

var (
	matcher *internal.InheritanceMatcher

	Supported language.Coverage

	// We keep the following lists separate, instead of having a single per-
	// language struct, to give the compiler a chance to remove unused code.

	// Some uppercase mappers are stateless, so we can precompute the
	// Transformers and save a bit on runtime allocations.
	upperFunc = []struct {
		upper mapFunc
		span  spanFunc
	}{
		{nil, nil},                  // und
		{nil, nil},                  // af
		{aztrUpper(upper), isUpper}, // az
		{elUpper, noSpan},           // el
		{ltUpper(upper), noSpan},    // lt
		{nil, nil},                  // nl
		{aztrUpper(upper), isUpper}, // tr
	}

	undUpper            transform.SpanningTransformer = &undUpperCaser{}
	undLower            transform.SpanningTransformer = &undLowerCaser{}
	undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{}

	lowerFunc = []mapFunc{
		nil,       // und
		nil,       // af
		aztrLower, // az
		nil,       // el
		ltLower,   // lt
		nil,       // nl
		aztrLower, // tr
	}

	titleInfos = []struct {
		title     mapFunc
		lower     mapFunc
		titleSpan spanFunc
		rewrite   func(*context)
	}{
		{title, lower, isTitle, nil},                // und
		{title, lower, isTitle, afnlRewrite},        // af
		{aztrUpper(title), aztrLower, isTitle, nil}, // az
		{title, lower, isTitle, nil},                // el
		{ltUpper(title), ltLower, noSpan, nil},      // lt
		{nlTitle, lower, nlTitleSpan, afnlRewrite},  // nl
		{aztrUpper(title), aztrLower, isTitle, nil}, // tr
	}
)

func makeUpper(t language.Tag, o options) transform.SpanningTransformer {
	_, i, _ := matcher.Match(t)
	f := upperFunc[i].upper
	if f == nil {
		return undUpper
	}
	return &simpleCaser{f: f, span: upperFunc[i].span}
}

func makeLower(t language.Tag, o options) transform.SpanningTransformer {
	_, i, _ := matcher.Match(t)
	f := lowerFunc[i]
	if f == nil {
		if o.ignoreFinalSigma {
			return undLowerIgnoreSigma
		}
		return undLower
	}
	if o.ignoreFinalSigma {
		return &simpleCaser{f: f, span: isLower}
	}
	return &lowerCaser{
		first:   f,
		midWord: finalSigma(f),
	}
}

func makeTitle(t language.Tag, o options) transform.SpanningTransformer {
	_, i, _ := matcher.Match(t)
	x := &titleInfos[i]
	lower := x.lower
	if o.noLower {
		lower = (*context).copy
	} else if !o.ignoreFinalSigma {
		lower = finalSigma(lower)
	}
	return &titleCaser{
		title:     x.title,
		lower:     lower,
		titleSpan: x.titleSpan,
		rewrite:   x.rewrite,
	}
}

func noSpan(c *context) bool {
	c.err = transform.ErrEndOfSpan
	return false
}

// TODO: consider a similar special case for the fast majority lower case. This
// is a bit more involved so will require some more precise benchmarking to
// justify it.

type undUpperCaser struct{ transform.NopResetter }

// undUpperCaser implements the Transformer interface for doing an upper case
// mapping for the root locale (und). It eliminates the need for an allocation
// as it prevents escaping by not using function pointers.
func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	c := context{dst: dst, src: src, atEOF: atEOF}
	for c.next() {
		upper(&c)
		c.checkpoint()
	}
	return c.ret()
}

func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) {
	c := context{src: src, atEOF: atEOF}
	for c.next() && isUpper(&c) {
		c.checkpoint()
	}
	return c.retSpan()
}

// undLowerIgnoreSigmaCaser implements the Transformer interface for doing
// a lower case mapping for the root locale (und) ignoring final sigma
// handling. This casing algorithm is used in some performance-critical packages
// like secure/precis and x/net/http/idna, which warrants its special-casing.
type undLowerIgnoreSigmaCaser struct{ transform.NopResetter }

func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	c := context{dst: dst, src: src, atEOF: atEOF}
	for c.next() && lower(&c) {
		c.checkpoint()
	}
	return c.ret()

}

// Span implements a generic lower-casing. This is possible as isLower works
// for all lowercasing variants. All lowercase variants only vary in how they
// transform a non-lowercase letter. They will never change an already lowercase
// letter. In addition, there is no state.
func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) {
	c := context{src: src, atEOF: atEOF}
	for c.next() && isLower(&c) {
		c.checkpoint()
	}
	return c.retSpan()
}

type simpleCaser struct {
	context
	f    mapFunc
	span spanFunc
}

// simpleCaser implements the Transformer interface for doing a case operation
// on a rune-by-rune basis.
func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	c := context{dst: dst, src: src, atEOF: atEOF}
	for c.next() && t.f(&c) {
		c.checkpoint()
	}
	return c.ret()
}

func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) {
	c := context{src: src, atEOF: atEOF}
	for c.next() && t.span(&c) {
		c.checkpoint()
	}
	return c.retSpan()
}

// undLowerCaser implements the Transformer interface for doing a lower case
// mapping for the root locale (und) ignoring final sigma handling. This casing
// algorithm is used in some performance-critical packages like secure/precis
// and x/net/http/idna, which warrants its special-casing.
type undLowerCaser struct{ transform.NopResetter }

func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	c := context{dst: dst, src: src, atEOF: atEOF}

	for isInterWord := true; c.next(); {
		if isInterWord {
			if c.info.isCased() {
				if !lower(&c) {
					break
				}
				isInterWord = false
			} else if !c.copy() {
				break
			}
		} else {
			if c.info.isNotCasedAndNotCaseIgnorable() {
				if !c.copy() {
					break
				}
				isInterWord = true
			} else if !c.hasPrefix("Σ") {
				if !lower(&c) {
					break
				}
			} else if !finalSigmaBody(&c) {
				break
			}
		}
		c.checkpoint()
	}
	return c.ret()
}

func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) {
	c := context{src: src, atEOF: atEOF}
	for c.next() && isLower(&c) {
		c.checkpoint()
	}
	return c.retSpan()
}

// lowerCaser implements the Transformer interface. The default Unicode lower
// casing requires different treatment for the first and subsequent characters
// of a word, most notably to handle the Greek final Sigma.
type lowerCaser struct {
	undLowerIgnoreSigmaCaser

	context

	first, midWord mapFunc
}

func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF}
	c := &t.context

	for isInterWord := true; c.next(); {
		if isInterWord {
			if c.info.isCased() {
				if !t.first(c) {
					break
				}
				isInterWord = false
			} else if !c.copy() {
				break
			}
		} else {
			if c.info.isNotCasedAndNotCaseIgnorable() {
				if !c.copy() {
					break
				}
				isInterWord = true
			} else if !t.midWord(c) {
				break
			}
		}
		c.checkpoint()
	}
	return c.ret()
}

// titleCaser implements the Transformer interface. Title casing algorithms
// distinguish between the first letter of a word and subsequent letters of the
// same word. It uses state to avoid requiring a potentially infinite lookahead.
type titleCaser struct {
	context

	// rune mappings used by the actual casing algorithms.
	title     mapFunc
	lower     mapFunc
	titleSpan spanFunc

	rewrite func(*context)
}

// Transform implements the standard Unicode title case algorithm as defined in
// Chapter 3 of The Unicode Standard:
// toTitlecase(X): Find the word boundaries in X according to Unicode Standard
// Annex #29, "Unicode Text Segmentation." For each word boundary, find the
// first cased character F following the word boundary. If F exists, map F to
// Titlecase_Mapping(F); then map all characters C between F and the following
// word boundary to Lowercase_Mapping(C).
func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
	t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord}
	c := &t.context

	if !c.next() {
		return c.ret()
	}

	for {
		p := c.info
		if t.rewrite != nil {
			t.rewrite(c)
		}

		wasMid := p.isMid()
		// Break out of this loop on failure to ensure we do not modify the
		// state incorrectly.
		if p.isCased() {
			if !c.isMidWord {
				if !t.title(c) {
					break
				}
				c.isMidWord = true
			} else if !t.lower(c) {
				break
			}
		} else if !c.copy() {
			break
		} else if p.isBreak() {
			c.isMidWord = false
		}

		// As we save the state of the transformer, it is safe to call
		// checkpoint after any successful write.
		if !(c.isMidWord && wasMid) {
			c.checkpoint()
		}

		if !c.next() {
			break
		}
		if wasMid && c.info.isMid() {
			c.isMidWord = false
		}
	}
	return c.ret()
}

func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) {
	t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord}
	c := &t.context

	if !c.next() {
		return c.retSpan()
	}

	for {
		p := c.info
		if t.rewrite != nil {
			t.rewrite(c)
		}

		wasMid := p.isMid()
		// Break out of this loop on failure to ensure we do not modify the
		// state incorrectly.
		if p.isCased() {
			if !c.isMidWord {
				if !t.titleSpan(c) {
					break
				}
				c.isMidWord = true
			} else if !isLower(c) {
				break
			}
		} else if p.isBreak() {
			c.isMidWord = false
		}
		// As we save the state of the transformer, it is safe to call
		// checkpoint after any successful write.
		if !(c.isMidWord && wasMid) {
			c.checkpoint()
		}

		if !c.next() {
			break
		}
		if wasMid && c.info.isMid() {
			c.isMidWord = false
		}
	}
	return c.retSpan()
}

// finalSigma adds Greek final Sigma handing to another casing function. It
// determines whether a lowercased sigma should be σ or ς, by looking ahead for
// case-ignorables and a cased letters.
func finalSigma(f mapFunc) mapFunc {
	return func(c *context) bool {
		if !c.hasPrefix("Σ") {
			return f(c)
		}
		return finalSigmaBody(c)
	}
}

func finalSigmaBody(c *context) bool {
	// Current rune must be ∑.

	// ::NFD();
	// # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA
	// Σ } [:case-ignorable:]* [:cased:] → σ;
	// [:cased:] [:case-ignorable:]* { Σ → ς;
	// ::Any-Lower;
	// ::NFC();

	p := c.pDst
	c.writeString("ς")

	// TODO: we should do this here, but right now this will never have an
	// effect as this is called when the prefix is Sigma, whereas Dutch and
	// Afrikaans only test for an apostrophe.
	//
	// if t.rewrite != nil {
	// 	t.rewrite(c)
	// }

	// We need to do one more iteration after maxIgnorable, as a cased
	// letter is not an ignorable and may modify the result.
	wasMid := false
	for i := 0; i < maxIgnorable+1; i++ {
		if !c.next() {
			return false
		}
		if !c.info.isCaseIgnorable() {
			// All Midword runes are also case ignorable, so we are
			// guaranteed to have a letter or word break here. As we are
			// unreading the run, there is no need to unset c.isMidWord;
			// the title caser will handle this.
			if c.info.isCased() {
				// p+1 is guaranteed to be in bounds: if writing ς was
				// successful, p+1 will contain the second byte of ς. If not,
				// this function will have returned after c.next returned false.
				c.dst[p+1]++ // ς → σ
			}
			c.unreadRune()
			return true
		}
		// A case ignorable may also introduce a word break, so we may need
		// to continue searching even after detecting a break.
		isMid := c.info.isMid()
		if (wasMid && isMid) || c.info.isBreak() {
			c.isMidWord = false
		}
		wasMid = isMid
		c.copy()
	}
	return true
}

// finalSigmaSpan would be the same as isLower.

// elUpper implements Greek upper casing, which entails removing a predefined
// set of non-blocked modifiers. Note that these accents should not be removed
// for title casing!
// Example: "Οδός" -> "ΟΔΟΣ".
func elUpper(c *context) bool {
	// From CLDR:
	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ;
	// [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ;

	r, _ := utf8.DecodeRune(c.src[c.pSrc:])
	oldPDst := c.pDst
	if !upper(c) {
		return false
	}
	if !unicode.Is(unicode.Greek, r) {
		return true
	}
	i := 0
	// Take the properties of the uppercased rune that is already written to the
	// destination. This saves us the trouble of having to uppercase the
	// decomposed rune again.
	if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil {
		// Restore the destination position and process the decomposed rune.
		r, sz := utf8.DecodeRune(b)
		if r <= 0xFF { // See A.6.1
			return true
		}
		c.pDst = oldPDst
		// Insert the first rune and ignore the modifiers. See A.6.2.
		c.writeBytes(b[:sz])
		i = len(b[sz:]) / 2 // Greek modifiers are always of length 2.
	}

	for ; i < maxIgnorable && c.next(); i++ {
		switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r {
		// Above and Iota Subscript
		case 0x0300, // U+0300 COMBINING GRAVE ACCENT
			0x0301, // U+0301 COMBINING ACUTE ACCENT
			0x0304, // U+0304 COMBINING MACRON
			0x0306, // U+0306 COMBINING BREVE
			0x0308, // U+0308 COMBINING DIAERESIS
			0x0313, // U+0313 COMBINING COMMA ABOVE
			0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE
			0x0342, // U+0342 COMBINING GREEK PERISPOMENI
			0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI
			// No-op. Gobble the modifier.

		default:
			switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() {
			case cccZero:
				c.unreadRune()
				return true

			// We don't need to test for IotaSubscript as the only rune that
			// qualifies (U+0345) was already excluded in the switch statement
			// above. See A.4.

			case cccAbove:
				return c.copy()
			default:
				// Some other modifier. We're still allowed to gobble Greek
				// modifiers after this.
				c.copy()
			}
		}
	}
	return i == maxIgnorable
}

// TODO: implement elUpperSpan (low-priority: complex and infrequent).

func ltLower(c *context) bool {
	// From CLDR:
	// # Introduce an explicit dot above when lowercasing capital I's and J's
	// # whenever there are more accents above.
	// # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek)
	// # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I
	// # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J
	// # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK
	// # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE
	// # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
	// # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
	// ::NFD();
	// I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307;
	// J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307;
	// I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307;
	// I \u0300 (Ì) → i \u0307 \u0300;
	// I \u0301 (Í) → i \u0307 \u0301;
	// I \u0303 (Ĩ) → i \u0307 \u0303;
	// ::Any-Lower();
	// ::NFC();

	i := 0
	if r := c.src[c.pSrc]; r < utf8.RuneSelf {
		lower(c)
		if r != 'I' && r != 'J' {
			return true
		}
	} else {
		p := norm.NFD.Properties(c.src[c.pSrc:])
		if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') {
			// UTF-8 optimization: the decomposition will only have an above
			// modifier if the last rune of the decomposition is in [U+300-U+311].
			// In all other cases, a decomposition starting with I is always
			// an I followed by modifiers that are not cased themselves. See A.2.
			if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4.
				if !c.writeBytes(d[:1]) {
					return false
				}
				c.dst[c.pDst-1] += 'a' - 'A' // lower

				// Assumption: modifier never changes on lowercase. See A.1.
				// Assumption: all modifiers added have CCC = Above. See A.2.3.
				return c.writeString("\u0307") && c.writeBytes(d[1:])
			}
			// In all other cases the additional modifiers will have a CCC
			// that is less than 230 (Above). We will insert the U+0307, if
			// needed, after these modifiers so that a string in FCD form
			// will remain so. See A.2.2.
			lower(c)
			i = 1
		} else {
			return lower(c)
		}
	}

	for ; i < maxIgnorable && c.next(); i++ {
		switch c.info.cccType() {
		case cccZero:
			c.unreadRune()
			return true
		case cccAbove:
			return c.writeString("\u0307") && c.copy() // See A.1.
		default:
			c.copy() // See A.1.
		}
	}
	return i == maxIgnorable
}

// ltLowerSpan would be the same as isLower.

func ltUpper(f mapFunc) mapFunc {
	return func(c *context) bool {
		// Unicode:
		// 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE
		//
		// From CLDR:
		// # Remove \u0307 following soft-dotteds (i, j, and the like), with possible
		// # intervening non-230 marks.
		// ::NFD();
		// [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ;
		// ::Any-Upper();
		// ::NFC();

		// TODO: See A.5. A soft-dotted rune never has an exception. This would
		// allow us to overload the exception bit and encode this property in
		// info. Need to measure performance impact of this.
		r, _ := utf8.DecodeRune(c.src[c.pSrc:])
		oldPDst := c.pDst
		if !f(c) {
			return false
		}
		if !unicode.Is(unicode.Soft_Dotted, r) {
			return true
		}

		// We don't need to do an NFD normalization, as a soft-dotted rune never
		// contains U+0307. See A.3.

		i := 0
		for ; i < maxIgnorable && c.next(); i++ {
			switch c.info.cccType() {
			case cccZero:
				c.unreadRune()
				return true
			case cccAbove:
				if c.hasPrefix("\u0307") {
					// We don't do a full NFC, but rather combine runes for
					// some of the common cases. (Returning NFC or
					// preserving normal form is neither a requirement nor
					// a possibility anyway).
					if !c.next() {
						return false
					}
					if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc {
						s := ""
						switch c.src[c.pSrc+1] {
						case 0x80: // U+0300 COMBINING GRAVE ACCENT
							s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE
						case 0x81: // U+0301 COMBINING ACUTE ACCENT
							s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE
						case 0x83: // U+0303 COMBINING TILDE
							s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE
						case 0x88: // U+0308 COMBINING DIAERESIS
							s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS
						default:
						}
						if s != "" {
							c.pDst = oldPDst
							return c.writeString(s)
						}
					}
				}
				return c.copy()
			default:
				c.copy()
			}
		}
		return i == maxIgnorable
	}
}

// TODO: implement ltUpperSpan (low priority: complex and infrequent).

func aztrUpper(f mapFunc) mapFunc {
	return func(c *context) bool {
		// i→İ;
		if c.src[c.pSrc] == 'i' {
			return c.writeString("İ")
		}
		return f(c)
	}
}

func aztrLower(c *context) (done bool) {
	// From CLDR:
	// # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
	// # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE
	// İ→i;
	// # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
	// # This matches the behavior of the canonically equivalent I-dot_above
	// # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE
	// # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
	// # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I
	// I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ;
	// I→ı ;
	// ::Any-Lower();
	if c.hasPrefix("\u0130") { // İ
		return c.writeString("i")
	}
	if c.src[c.pSrc] != 'I' {
		return lower(c)
	}

	// We ignore the lower-case I for now, but insert it later when we know
	// which form we need.
	start := c.pSrc + c.sz

	i := 0
Loop:
	// We check for up to n ignorables before \u0307. As \u0307 is an
	// ignorable as well, n is maxIgnorable-1.
	for ; i < maxIgnorable && c.next(); i++ {
		switch c.info.cccType() {
		case cccAbove:
			if c.hasPrefix("\u0307") {
				return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307
			}
			done = true
			break Loop
		case cccZero:
			c.unreadRune()
			done = true
			break Loop
		default:
			// We'll write this rune after we know which starter to use.
		}
	}
	if i == maxIgnorable {
		done = true
	}
	return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done
}

// aztrLowerSpan would be the same as isLower.

func nlTitle(c *context) bool {
	// From CLDR:
	// # Special titlecasing for Dutch initial "ij".
	// ::Any-Title();
	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
	if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' {
		return title(c)
	}

	if !c.writeString("I") || !c.next() {
		return false
	}
	if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' {
		return c.writeString("J")
	}
	c.unreadRune()
	return true
}

func nlTitleSpan(c *context) bool {
	// From CLDR:
	// # Special titlecasing for Dutch initial "ij".
	// ::Any-Title();
	// # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29)
	// [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ;
	if c.src[c.pSrc] != 'I' {
		return isTitle(c)
	}
	if !c.next() || c.src[c.pSrc] == 'j' {
		return false
	}
	if c.src[c.pSrc] != 'J' {
		c.unreadRune()
	}
	return true
}

// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078.
func afnlRewrite(c *context) {
	if c.hasPrefix("'") || c.hasPrefix("’") {
		c.isMidWord = true
	}
}