diff options
Diffstat (limited to 'vendor/golang.org/x/text/width/width.go')
-rw-r--r-- | vendor/golang.org/x/text/width/width.go | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/width/width.go b/vendor/golang.org/x/text/width/width.go new file mode 100644 index 0000000..29c7509 --- /dev/null +++ b/vendor/golang.org/x/text/width/width.go @@ -0,0 +1,206 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:generate stringer -type=Kind +//go:generate go run gen.go gen_common.go gen_trieval.go + +// Package width provides functionality for handling different widths in text. +// +// Wide characters behave like ideographs; they tend to allow line breaks after +// each character and remain upright in vertical text layout. Narrow characters +// are kept together in words or runs that are rotated sideways in vertical text +// layout. +// +// For more information, see https://unicode.org/reports/tr11/. +package width // import "golang.org/x/text/width" + +import ( + "unicode/utf8" + + "golang.org/x/text/transform" +) + +// TODO +// 1) Reduce table size by compressing blocks. +// 2) API proposition for computing display length +// (approximation, fixed pitch only). +// 3) Implement display length. + +// Kind indicates the type of width property as defined in https://unicode.org/reports/tr11/. +type Kind int + +const ( + // Neutral characters do not occur in legacy East Asian character sets. + Neutral Kind = iota + + // EastAsianAmbiguous characters that can be sometimes wide and sometimes + // narrow and require additional information not contained in the character + // code to further resolve their width. + EastAsianAmbiguous + + // EastAsianWide characters are wide in its usual form. They occur only in + // the context of East Asian typography. These runes may have explicit + // halfwidth counterparts. + EastAsianWide + + // EastAsianNarrow characters are narrow in its usual form. They often have + // fullwidth counterparts. + EastAsianNarrow + + // Note: there exist Narrow runes that do not have fullwidth or wide + // counterparts, despite what the definition says (e.g. U+27E6). + + // EastAsianFullwidth characters have a compatibility decompositions of type + // wide that map to a narrow counterpart. + EastAsianFullwidth + + // EastAsianHalfwidth characters have a compatibility decomposition of type + // narrow that map to a wide or ambiguous counterpart, plus U+20A9 ₩ WON + // SIGN. + EastAsianHalfwidth + + // Note: there exist runes that have a halfwidth counterparts but that are + // classified as Ambiguous, rather than wide (e.g. U+2190). +) + +// TODO: the generated tries need to return size 1 for invalid runes for the +// width to be computed correctly (each byte should render width 1) + +var trie = newWidthTrie(0) + +// Lookup reports the Properties of the first rune in b and the number of bytes +// of its UTF-8 encoding. +func Lookup(b []byte) (p Properties, size int) { + v, sz := trie.lookup(b) + return Properties{elem(v), b[sz-1]}, sz +} + +// LookupString reports the Properties of the first rune in s and the number of +// bytes of its UTF-8 encoding. +func LookupString(s string) (p Properties, size int) { + v, sz := trie.lookupString(s) + return Properties{elem(v), s[sz-1]}, sz +} + +// LookupRune reports the Properties of rune r. +func LookupRune(r rune) Properties { + var buf [4]byte + n := utf8.EncodeRune(buf[:], r) + v, _ := trie.lookup(buf[:n]) + last := byte(r) + if r >= utf8.RuneSelf { + last = 0x80 + byte(r&0x3f) + } + return Properties{elem(v), last} +} + +// Properties provides access to width properties of a rune. +type Properties struct { + elem elem + last byte +} + +func (e elem) kind() Kind { + return Kind(e >> typeShift) +} + +// Kind returns the Kind of a rune as defined in Unicode TR #11. +// See https://unicode.org/reports/tr11/ for more details. +func (p Properties) Kind() Kind { + return p.elem.kind() +} + +// Folded returns the folded variant of a rune or 0 if the rune is canonical. +func (p Properties) Folded() rune { + if p.elem&tagNeedsFold != 0 { + buf := inverseData[byte(p.elem)] + buf[buf[0]] ^= p.last + r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) + return r + } + return 0 +} + +// Narrow returns the narrow variant of a rune or 0 if the rune is already +// narrow or doesn't have a narrow variant. +func (p Properties) Narrow() rune { + if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianFullwidth || k == EastAsianWide || k == EastAsianAmbiguous) { + buf := inverseData[byte(p.elem)] + buf[buf[0]] ^= p.last + r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) + return r + } + return 0 +} + +// Wide returns the wide variant of a rune or 0 if the rune is already +// wide or doesn't have a wide variant. +func (p Properties) Wide() rune { + if k := p.elem.kind(); byte(p.elem) != 0 && (k == EastAsianHalfwidth || k == EastAsianNarrow) { + buf := inverseData[byte(p.elem)] + buf[buf[0]] ^= p.last + r, _ := utf8.DecodeRune(buf[1 : 1+buf[0]]) + return r + } + return 0 +} + +// TODO for Properties: +// - Add Fullwidth/Halfwidth or Inverted methods for computing variants +// mapping. +// - Add width information (including information on non-spacing runes). + +// Transformer implements the transform.Transformer interface. +type Transformer struct { + t transform.SpanningTransformer +} + +// Reset implements the transform.Transformer interface. +func (t Transformer) Reset() { t.t.Reset() } + +// Transform implements the transform.Transformer interface. +func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { + return t.t.Transform(dst, src, atEOF) +} + +// Span implements the transform.SpanningTransformer interface. +func (t Transformer) Span(src []byte, atEOF bool) (n int, err error) { + return t.t.Span(src, atEOF) +} + +// Bytes returns a new byte slice with the result of applying t to b. +func (t Transformer) Bytes(b []byte) []byte { + b, _, _ = transform.Bytes(t, b) + return b +} + +// String returns a string with the result of applying t to s. +func (t Transformer) String(s string) string { + s, _, _ = transform.String(t, s) + return s +} + +var ( + // Fold is a transform that maps all runes to their canonical width. + // + // Note that the NFKC and NFKD transforms in golang.org/x/text/unicode/norm + // provide a more generic folding mechanism. + Fold Transformer = Transformer{foldTransform{}} + + // Widen is a transform that maps runes to their wide variant, if + // available. + Widen Transformer = Transformer{wideTransform{}} + + // Narrow is a transform that maps runes to their narrow variant, if + // available. + Narrow Transformer = Transformer{narrowTransform{}} +) + +// TODO: Consider the following options: +// - Treat Ambiguous runes that have a halfwidth counterpart as wide, or some +// generalized variant of this. +// - Consider a wide Won character to be the default width (or some generalized +// variant of this). +// - Filter the set of characters that gets converted (the preferred approach is +// to allow applying filters to transforms). |