From 6ea4d2c82de80efc87708e5e182034b7c6c2019e Mon Sep 17 00:00:00 2001 From: Gibheer Date: Thu, 5 Sep 2024 19:38:25 +0200 Subject: switch from github.com/lib/pq to github.com/jackc/pgx/v5 lib/pq is out of maintenance for some time now, so switch to the newer more active library. Looks like it finally stabilized after a long time. --- vendor/golang.org/x/text/cases/map.go | 816 ++++++++++++++++++++++++++++++++++ 1 file changed, 816 insertions(+) create mode 100644 vendor/golang.org/x/text/cases/map.go (limited to 'vendor/golang.org/x/text/cases/map.go') diff --git a/vendor/golang.org/x/text/cases/map.go b/vendor/golang.org/x/text/cases/map.go new file mode 100644 index 0000000..0f7c6a1 --- /dev/null +++ b/vendor/golang.org/x/text/cases/map.go @@ -0,0 +1,816 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package cases + +// This file contains the definitions of case mappings for all supported +// languages. The rules for the language-specific tailorings were taken and +// modified from the CLDR transform definitions in common/transforms. + +import ( + "strings" + "unicode" + "unicode/utf8" + + "golang.org/x/text/internal" + "golang.org/x/text/language" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" +) + +// A mapFunc takes a context set to the current rune and writes the mapped +// version to the same context. It may advance the context to the next rune. It +// returns whether a checkpoint is possible: whether the pDst bytes written to +// dst so far won't need changing as we see more source bytes. +type mapFunc func(*context) bool + +// A spanFunc takes a context set to the current rune and returns whether this +// rune would be altered when written to the output. It may advance the context +// to the next rune. It returns whether a checkpoint is possible. +type spanFunc func(*context) bool + +// maxIgnorable defines the maximum number of ignorables to consider for +// lookahead operations. +const maxIgnorable = 30 + +// supported lists the language tags for which we have tailorings. +const supported = "und af az el lt nl tr" + +func init() { + tags := []language.Tag{} + for _, s := range strings.Split(supported, " ") { + tags = append(tags, language.MustParse(s)) + } + matcher = internal.NewInheritanceMatcher(tags) + Supported = language.NewCoverage(tags) +} + +var ( + matcher *internal.InheritanceMatcher + + Supported language.Coverage + + // We keep the following lists separate, instead of having a single per- + // language struct, to give the compiler a chance to remove unused code. + + // Some uppercase mappers are stateless, so we can precompute the + // Transformers and save a bit on runtime allocations. + upperFunc = []struct { + upper mapFunc + span spanFunc + }{ + {nil, nil}, // und + {nil, nil}, // af + {aztrUpper(upper), isUpper}, // az + {elUpper, noSpan}, // el + {ltUpper(upper), noSpan}, // lt + {nil, nil}, // nl + {aztrUpper(upper), isUpper}, // tr + } + + undUpper transform.SpanningTransformer = &undUpperCaser{} + undLower transform.SpanningTransformer = &undLowerCaser{} + undLowerIgnoreSigma transform.SpanningTransformer = &undLowerIgnoreSigmaCaser{} + + lowerFunc = []mapFunc{ + nil, // und + nil, // af + aztrLower, // az + nil, // el + ltLower, // lt + nil, // nl + aztrLower, // tr + } + + titleInfos = []struct { + title mapFunc + lower mapFunc + titleSpan spanFunc + rewrite func(*context) + }{ + {title, lower, isTitle, nil}, // und + {title, lower, isTitle, afnlRewrite}, // af + {aztrUpper(title), aztrLower, isTitle, nil}, // az + {title, lower, isTitle, nil}, // el + {ltUpper(title), ltLower, noSpan, nil}, // lt + {nlTitle, lower, nlTitleSpan, afnlRewrite}, // nl + {aztrUpper(title), aztrLower, isTitle, nil}, // tr + } +) + +func makeUpper(t language.Tag, o options) transform.SpanningTransformer { + _, i, _ := matcher.Match(t) + f := upperFunc[i].upper + if f == nil { + return undUpper + } + return &simpleCaser{f: f, span: upperFunc[i].span} +} + +func makeLower(t language.Tag, o options) transform.SpanningTransformer { + _, i, _ := matcher.Match(t) + f := lowerFunc[i] + if f == nil { + if o.ignoreFinalSigma { + return undLowerIgnoreSigma + } + return undLower + } + if o.ignoreFinalSigma { + return &simpleCaser{f: f, span: isLower} + } + return &lowerCaser{ + first: f, + midWord: finalSigma(f), + } +} + +func makeTitle(t language.Tag, o options) transform.SpanningTransformer { + _, i, _ := matcher.Match(t) + x := &titleInfos[i] + lower := x.lower + if o.noLower { + lower = (*context).copy + } else if !o.ignoreFinalSigma { + lower = finalSigma(lower) + } + return &titleCaser{ + title: x.title, + lower: lower, + titleSpan: x.titleSpan, + rewrite: x.rewrite, + } +} + +func noSpan(c *context) bool { + c.err = transform.ErrEndOfSpan + return false +} + +// TODO: consider a similar special case for the fast majority lower case. This +// is a bit more involved so will require some more precise benchmarking to +// justify it. + +type undUpperCaser struct{ transform.NopResetter } + +// undUpperCaser implements the Transformer interface for doing an upper case +// mapping for the root locale (und). It eliminates the need for an allocation +// as it prevents escaping by not using function pointers. +func (t undUpperCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + c := context{dst: dst, src: src, atEOF: atEOF} + for c.next() { + upper(&c) + c.checkpoint() + } + return c.ret() +} + +func (t undUpperCaser) Span(src []byte, atEOF bool) (n int, err error) { + c := context{src: src, atEOF: atEOF} + for c.next() && isUpper(&c) { + c.checkpoint() + } + return c.retSpan() +} + +// undLowerIgnoreSigmaCaser implements the Transformer interface for doing +// a lower case mapping for the root locale (und) ignoring final sigma +// handling. This casing algorithm is used in some performance-critical packages +// like secure/precis and x/net/http/idna, which warrants its special-casing. +type undLowerIgnoreSigmaCaser struct{ transform.NopResetter } + +func (t undLowerIgnoreSigmaCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + c := context{dst: dst, src: src, atEOF: atEOF} + for c.next() && lower(&c) { + c.checkpoint() + } + return c.ret() + +} + +// Span implements a generic lower-casing. This is possible as isLower works +// for all lowercasing variants. All lowercase variants only vary in how they +// transform a non-lowercase letter. They will never change an already lowercase +// letter. In addition, there is no state. +func (t undLowerIgnoreSigmaCaser) Span(src []byte, atEOF bool) (n int, err error) { + c := context{src: src, atEOF: atEOF} + for c.next() && isLower(&c) { + c.checkpoint() + } + return c.retSpan() +} + +type simpleCaser struct { + context + f mapFunc + span spanFunc +} + +// simpleCaser implements the Transformer interface for doing a case operation +// on a rune-by-rune basis. +func (t *simpleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + c := context{dst: dst, src: src, atEOF: atEOF} + for c.next() && t.f(&c) { + c.checkpoint() + } + return c.ret() +} + +func (t *simpleCaser) Span(src []byte, atEOF bool) (n int, err error) { + c := context{src: src, atEOF: atEOF} + for c.next() && t.span(&c) { + c.checkpoint() + } + return c.retSpan() +} + +// undLowerCaser implements the Transformer interface for doing a lower case +// mapping for the root locale (und) ignoring final sigma handling. This casing +// algorithm is used in some performance-critical packages like secure/precis +// and x/net/http/idna, which warrants its special-casing. +type undLowerCaser struct{ transform.NopResetter } + +func (t undLowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + c := context{dst: dst, src: src, atEOF: atEOF} + + for isInterWord := true; c.next(); { + if isInterWord { + if c.info.isCased() { + if !lower(&c) { + break + } + isInterWord = false + } else if !c.copy() { + break + } + } else { + if c.info.isNotCasedAndNotCaseIgnorable() { + if !c.copy() { + break + } + isInterWord = true + } else if !c.hasPrefix("Σ") { + if !lower(&c) { + break + } + } else if !finalSigmaBody(&c) { + break + } + } + c.checkpoint() + } + return c.ret() +} + +func (t undLowerCaser) Span(src []byte, atEOF bool) (n int, err error) { + c := context{src: src, atEOF: atEOF} + for c.next() && isLower(&c) { + c.checkpoint() + } + return c.retSpan() +} + +// lowerCaser implements the Transformer interface. The default Unicode lower +// casing requires different treatment for the first and subsequent characters +// of a word, most notably to handle the Greek final Sigma. +type lowerCaser struct { + undLowerIgnoreSigmaCaser + + context + + first, midWord mapFunc +} + +func (t *lowerCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + t.context = context{dst: dst, src: src, atEOF: atEOF} + c := &t.context + + for isInterWord := true; c.next(); { + if isInterWord { + if c.info.isCased() { + if !t.first(c) { + break + } + isInterWord = false + } else if !c.copy() { + break + } + } else { + if c.info.isNotCasedAndNotCaseIgnorable() { + if !c.copy() { + break + } + isInterWord = true + } else if !t.midWord(c) { + break + } + } + c.checkpoint() + } + return c.ret() +} + +// titleCaser implements the Transformer interface. Title casing algorithms +// distinguish between the first letter of a word and subsequent letters of the +// same word. It uses state to avoid requiring a potentially infinite lookahead. +type titleCaser struct { + context + + // rune mappings used by the actual casing algorithms. + title mapFunc + lower mapFunc + titleSpan spanFunc + + rewrite func(*context) +} + +// Transform implements the standard Unicode title case algorithm as defined in +// Chapter 3 of The Unicode Standard: +// toTitlecase(X): Find the word boundaries in X according to Unicode Standard +// Annex #29, "Unicode Text Segmentation." For each word boundary, find the +// first cased character F following the word boundary. If F exists, map F to +// Titlecase_Mapping(F); then map all characters C between F and the following +// word boundary to Lowercase_Mapping(C). +func (t *titleCaser) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + t.context = context{dst: dst, src: src, atEOF: atEOF, isMidWord: t.isMidWord} + c := &t.context + + if !c.next() { + return c.ret() + } + + for { + p := c.info + if t.rewrite != nil { + t.rewrite(c) + } + + wasMid := p.isMid() + // Break out of this loop on failure to ensure we do not modify the + // state incorrectly. + if p.isCased() { + if !c.isMidWord { + if !t.title(c) { + break + } + c.isMidWord = true + } else if !t.lower(c) { + break + } + } else if !c.copy() { + break + } else if p.isBreak() { + c.isMidWord = false + } + + // As we save the state of the transformer, it is safe to call + // checkpoint after any successful write. + if !(c.isMidWord && wasMid) { + c.checkpoint() + } + + if !c.next() { + break + } + if wasMid && c.info.isMid() { + c.isMidWord = false + } + } + return c.ret() +} + +func (t *titleCaser) Span(src []byte, atEOF bool) (n int, err error) { + t.context = context{src: src, atEOF: atEOF, isMidWord: t.isMidWord} + c := &t.context + + if !c.next() { + return c.retSpan() + } + + for { + p := c.info + if t.rewrite != nil { + t.rewrite(c) + } + + wasMid := p.isMid() + // Break out of this loop on failure to ensure we do not modify the + // state incorrectly. + if p.isCased() { + if !c.isMidWord { + if !t.titleSpan(c) { + break + } + c.isMidWord = true + } else if !isLower(c) { + break + } + } else if p.isBreak() { + c.isMidWord = false + } + // As we save the state of the transformer, it is safe to call + // checkpoint after any successful write. + if !(c.isMidWord && wasMid) { + c.checkpoint() + } + + if !c.next() { + break + } + if wasMid && c.info.isMid() { + c.isMidWord = false + } + } + return c.retSpan() +} + +// finalSigma adds Greek final Sigma handing to another casing function. It +// determines whether a lowercased sigma should be σ or ς, by looking ahead for +// case-ignorables and a cased letters. +func finalSigma(f mapFunc) mapFunc { + return func(c *context) bool { + if !c.hasPrefix("Σ") { + return f(c) + } + return finalSigmaBody(c) + } +} + +func finalSigmaBody(c *context) bool { + // Current rune must be ∑. + + // ::NFD(); + // # 03A3; 03C2; 03A3; 03A3; Final_Sigma; # GREEK CAPITAL LETTER SIGMA + // Σ } [:case-ignorable:]* [:cased:] → σ; + // [:cased:] [:case-ignorable:]* { Σ → ς; + // ::Any-Lower; + // ::NFC(); + + p := c.pDst + c.writeString("ς") + + // TODO: we should do this here, but right now this will never have an + // effect as this is called when the prefix is Sigma, whereas Dutch and + // Afrikaans only test for an apostrophe. + // + // if t.rewrite != nil { + // t.rewrite(c) + // } + + // We need to do one more iteration after maxIgnorable, as a cased + // letter is not an ignorable and may modify the result. + wasMid := false + for i := 0; i < maxIgnorable+1; i++ { + if !c.next() { + return false + } + if !c.info.isCaseIgnorable() { + // All Midword runes are also case ignorable, so we are + // guaranteed to have a letter or word break here. As we are + // unreading the run, there is no need to unset c.isMidWord; + // the title caser will handle this. + if c.info.isCased() { + // p+1 is guaranteed to be in bounds: if writing ς was + // successful, p+1 will contain the second byte of ς. If not, + // this function will have returned after c.next returned false. + c.dst[p+1]++ // ς → σ + } + c.unreadRune() + return true + } + // A case ignorable may also introduce a word break, so we may need + // to continue searching even after detecting a break. + isMid := c.info.isMid() + if (wasMid && isMid) || c.info.isBreak() { + c.isMidWord = false + } + wasMid = isMid + c.copy() + } + return true +} + +// finalSigmaSpan would be the same as isLower. + +// elUpper implements Greek upper casing, which entails removing a predefined +// set of non-blocked modifiers. Note that these accents should not be removed +// for title casing! +// Example: "Οδός" -> "ΟΔΟΣ". +func elUpper(c *context) bool { + // From CLDR: + // [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Above:]]*? { [\u0313\u0314\u0301\u0300\u0306\u0342\u0308\u0304] → ; + // [:Greek:] [^[:ccc=Not_Reordered:][:ccc=Iota_Subscript:]]*? { \u0345 → ; + + r, _ := utf8.DecodeRune(c.src[c.pSrc:]) + oldPDst := c.pDst + if !upper(c) { + return false + } + if !unicode.Is(unicode.Greek, r) { + return true + } + i := 0 + // Take the properties of the uppercased rune that is already written to the + // destination. This saves us the trouble of having to uppercase the + // decomposed rune again. + if b := norm.NFD.Properties(c.dst[oldPDst:]).Decomposition(); b != nil { + // Restore the destination position and process the decomposed rune. + r, sz := utf8.DecodeRune(b) + if r <= 0xFF { // See A.6.1 + return true + } + c.pDst = oldPDst + // Insert the first rune and ignore the modifiers. See A.6.2. + c.writeBytes(b[:sz]) + i = len(b[sz:]) / 2 // Greek modifiers are always of length 2. + } + + for ; i < maxIgnorable && c.next(); i++ { + switch r, _ := utf8.DecodeRune(c.src[c.pSrc:]); r { + // Above and Iota Subscript + case 0x0300, // U+0300 COMBINING GRAVE ACCENT + 0x0301, // U+0301 COMBINING ACUTE ACCENT + 0x0304, // U+0304 COMBINING MACRON + 0x0306, // U+0306 COMBINING BREVE + 0x0308, // U+0308 COMBINING DIAERESIS + 0x0313, // U+0313 COMBINING COMMA ABOVE + 0x0314, // U+0314 COMBINING REVERSED COMMA ABOVE + 0x0342, // U+0342 COMBINING GREEK PERISPOMENI + 0x0345: // U+0345 COMBINING GREEK YPOGEGRAMMENI + // No-op. Gobble the modifier. + + default: + switch v, _ := trie.lookup(c.src[c.pSrc:]); info(v).cccType() { + case cccZero: + c.unreadRune() + return true + + // We don't need to test for IotaSubscript as the only rune that + // qualifies (U+0345) was already excluded in the switch statement + // above. See A.4. + + case cccAbove: + return c.copy() + default: + // Some other modifier. We're still allowed to gobble Greek + // modifiers after this. + c.copy() + } + } + } + return i == maxIgnorable +} + +// TODO: implement elUpperSpan (low-priority: complex and infrequent). + +func ltLower(c *context) bool { + // From CLDR: + // # Introduce an explicit dot above when lowercasing capital I's and J's + // # whenever there are more accents above. + // # (of the accents used in Lithuanian: grave, acute, tilde above, and ogonek) + // # 0049; 0069 0307; 0049; 0049; lt More_Above; # LATIN CAPITAL LETTER I + // # 004A; 006A 0307; 004A; 004A; lt More_Above; # LATIN CAPITAL LETTER J + // # 012E; 012F 0307; 012E; 012E; lt More_Above; # LATIN CAPITAL LETTER I WITH OGONEK + // # 00CC; 0069 0307 0300; 00CC; 00CC; lt; # LATIN CAPITAL LETTER I WITH GRAVE + // # 00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE + // # 0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE + // ::NFD(); + // I } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0307; + // J } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → j \u0307; + // I \u0328 (Į) } [^[:ccc=Not_Reordered:][:ccc=Above:]]* [:ccc=Above:] → i \u0328 \u0307; + // I \u0300 (Ì) → i \u0307 \u0300; + // I \u0301 (Í) → i \u0307 \u0301; + // I \u0303 (Ĩ) → i \u0307 \u0303; + // ::Any-Lower(); + // ::NFC(); + + i := 0 + if r := c.src[c.pSrc]; r < utf8.RuneSelf { + lower(c) + if r != 'I' && r != 'J' { + return true + } + } else { + p := norm.NFD.Properties(c.src[c.pSrc:]) + if d := p.Decomposition(); len(d) >= 3 && (d[0] == 'I' || d[0] == 'J') { + // UTF-8 optimization: the decomposition will only have an above + // modifier if the last rune of the decomposition is in [U+300-U+311]. + // In all other cases, a decomposition starting with I is always + // an I followed by modifiers that are not cased themselves. See A.2. + if d[1] == 0xCC && d[2] <= 0x91 { // A.2.4. + if !c.writeBytes(d[:1]) { + return false + } + c.dst[c.pDst-1] += 'a' - 'A' // lower + + // Assumption: modifier never changes on lowercase. See A.1. + // Assumption: all modifiers added have CCC = Above. See A.2.3. + return c.writeString("\u0307") && c.writeBytes(d[1:]) + } + // In all other cases the additional modifiers will have a CCC + // that is less than 230 (Above). We will insert the U+0307, if + // needed, after these modifiers so that a string in FCD form + // will remain so. See A.2.2. + lower(c) + i = 1 + } else { + return lower(c) + } + } + + for ; i < maxIgnorable && c.next(); i++ { + switch c.info.cccType() { + case cccZero: + c.unreadRune() + return true + case cccAbove: + return c.writeString("\u0307") && c.copy() // See A.1. + default: + c.copy() // See A.1. + } + } + return i == maxIgnorable +} + +// ltLowerSpan would be the same as isLower. + +func ltUpper(f mapFunc) mapFunc { + return func(c *context) bool { + // Unicode: + // 0307; 0307; ; ; lt After_Soft_Dotted; # COMBINING DOT ABOVE + // + // From CLDR: + // # Remove \u0307 following soft-dotteds (i, j, and the like), with possible + // # intervening non-230 marks. + // ::NFD(); + // [:Soft_Dotted:] [^[:ccc=Not_Reordered:][:ccc=Above:]]* { \u0307 → ; + // ::Any-Upper(); + // ::NFC(); + + // TODO: See A.5. A soft-dotted rune never has an exception. This would + // allow us to overload the exception bit and encode this property in + // info. Need to measure performance impact of this. + r, _ := utf8.DecodeRune(c.src[c.pSrc:]) + oldPDst := c.pDst + if !f(c) { + return false + } + if !unicode.Is(unicode.Soft_Dotted, r) { + return true + } + + // We don't need to do an NFD normalization, as a soft-dotted rune never + // contains U+0307. See A.3. + + i := 0 + for ; i < maxIgnorable && c.next(); i++ { + switch c.info.cccType() { + case cccZero: + c.unreadRune() + return true + case cccAbove: + if c.hasPrefix("\u0307") { + // We don't do a full NFC, but rather combine runes for + // some of the common cases. (Returning NFC or + // preserving normal form is neither a requirement nor + // a possibility anyway). + if !c.next() { + return false + } + if c.dst[oldPDst] == 'I' && c.pDst == oldPDst+1 && c.src[c.pSrc] == 0xcc { + s := "" + switch c.src[c.pSrc+1] { + case 0x80: // U+0300 COMBINING GRAVE ACCENT + s = "\u00cc" // U+00CC LATIN CAPITAL LETTER I WITH GRAVE + case 0x81: // U+0301 COMBINING ACUTE ACCENT + s = "\u00cd" // U+00CD LATIN CAPITAL LETTER I WITH ACUTE + case 0x83: // U+0303 COMBINING TILDE + s = "\u0128" // U+0128 LATIN CAPITAL LETTER I WITH TILDE + case 0x88: // U+0308 COMBINING DIAERESIS + s = "\u00cf" // U+00CF LATIN CAPITAL LETTER I WITH DIAERESIS + default: + } + if s != "" { + c.pDst = oldPDst + return c.writeString(s) + } + } + } + return c.copy() + default: + c.copy() + } + } + return i == maxIgnorable + } +} + +// TODO: implement ltUpperSpan (low priority: complex and infrequent). + +func aztrUpper(f mapFunc) mapFunc { + return func(c *context) bool { + // i→İ; + if c.src[c.pSrc] == 'i' { + return c.writeString("İ") + } + return f(c) + } +} + +func aztrLower(c *context) (done bool) { + // From CLDR: + // # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri + // # 0130; 0069; 0130; 0130; tr; # LATIN CAPITAL LETTER I WITH DOT ABOVE + // İ→i; + // # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i. + // # This matches the behavior of the canonically equivalent I-dot_above + // # 0307; ; 0307; 0307; tr After_I; # COMBINING DOT ABOVE + // # When lowercasing, unless an I is before a dot_above, it turns into a dotless i. + // # 0049; 0131; 0049; 0049; tr Not_Before_Dot; # LATIN CAPITAL LETTER I + // I([^[:ccc=Not_Reordered:][:ccc=Above:]]*)\u0307 → i$1 ; + // I→ı ; + // ::Any-Lower(); + if c.hasPrefix("\u0130") { // İ + return c.writeString("i") + } + if c.src[c.pSrc] != 'I' { + return lower(c) + } + + // We ignore the lower-case I for now, but insert it later when we know + // which form we need. + start := c.pSrc + c.sz + + i := 0 +Loop: + // We check for up to n ignorables before \u0307. As \u0307 is an + // ignorable as well, n is maxIgnorable-1. + for ; i < maxIgnorable && c.next(); i++ { + switch c.info.cccType() { + case cccAbove: + if c.hasPrefix("\u0307") { + return c.writeString("i") && c.writeBytes(c.src[start:c.pSrc]) // ignore U+0307 + } + done = true + break Loop + case cccZero: + c.unreadRune() + done = true + break Loop + default: + // We'll write this rune after we know which starter to use. + } + } + if i == maxIgnorable { + done = true + } + return c.writeString("ı") && c.writeBytes(c.src[start:c.pSrc+c.sz]) && done +} + +// aztrLowerSpan would be the same as isLower. + +func nlTitle(c *context) bool { + // From CLDR: + // # Special titlecasing for Dutch initial "ij". + // ::Any-Title(); + // # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) + // [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; + if c.src[c.pSrc] != 'I' && c.src[c.pSrc] != 'i' { + return title(c) + } + + if !c.writeString("I") || !c.next() { + return false + } + if c.src[c.pSrc] == 'j' || c.src[c.pSrc] == 'J' { + return c.writeString("J") + } + c.unreadRune() + return true +} + +func nlTitleSpan(c *context) bool { + // From CLDR: + // # Special titlecasing for Dutch initial "ij". + // ::Any-Title(); + // # Fix up Ij at the beginning of a "word" (per Any-Title, notUAX #29) + // [:^WB=ALetter:] [:WB=Extend:]* [[:WB=MidLetter:][:WB=MidNumLet:]]? { Ij } → IJ ; + if c.src[c.pSrc] != 'I' { + return isTitle(c) + } + if !c.next() || c.src[c.pSrc] == 'j' { + return false + } + if c.src[c.pSrc] != 'J' { + c.unreadRune() + } + return true +} + +// Not part of CLDR, but see https://unicode.org/cldr/trac/ticket/7078. +func afnlRewrite(c *context) { + if c.hasPrefix("'") || c.hasPrefix("’") { + c.isMidWord = true + } +} -- cgit v1.2.3-70-g09d2