blob: 11e0ccbb19272a3c14ad4c67f29cd22adb3212f8 (
plain) (
tree)
|
|
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package precis
import (
"unicode"
"unicode/utf8"
"golang.org/x/text/transform"
)
type nickAdditionalMapping struct {
// TODO: This transformer needs to be stateless somehow…
notStart bool
prevSpace bool
}
func (t *nickAdditionalMapping) Reset() {
t.prevSpace = false
t.notStart = false
}
func (t *nickAdditionalMapping) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
// RFC 8266 §2.1. Rules
//
// 2. Additional Mapping Rule: The additional mapping rule consists of
// the following sub-rules.
//
// a. Map any instances of non-ASCII space to SPACE (U+0020); a
// non-ASCII space is any Unicode code point having a general
// category of "Zs", naturally with the exception of SPACE
// (U+0020). (The inclusion of only ASCII space prevents
// confusion with various non-ASCII space code points, many of
// which are difficult to reproduce across different input
// methods.)
//
// b. Remove any instances of the ASCII space character at the
// beginning or end of a nickname (e.g., "stpeter " is mapped to
// "stpeter").
//
// c. Map interior sequences of more than one ASCII space character
// to a single ASCII space character (e.g., "St Peter" is
// mapped to "St Peter").
for nSrc < len(src) {
r, size := utf8.DecodeRune(src[nSrc:])
if size == 0 { // Incomplete UTF-8 encoding
if !atEOF {
return nDst, nSrc, transform.ErrShortSrc
}
size = 1
}
if unicode.Is(unicode.Zs, r) {
t.prevSpace = true
} else {
if t.prevSpace && t.notStart {
dst[nDst] = ' '
nDst += 1
}
if size != copy(dst[nDst:], src[nSrc:nSrc+size]) {
nDst += size
return nDst, nSrc, transform.ErrShortDst
}
nDst += size
t.prevSpace = false
t.notStart = true
}
nSrc += size
}
return nDst, nSrc, nil
}
|