diff options
author | Gibheer <gibheer+git@zero-knowledge.org> | 2024-09-05 19:38:25 +0200 |
---|---|---|
committer | Gibheer <gibheer+git@zero-knowledge.org> | 2024-09-05 19:38:25 +0200 |
commit | 6ea4d2c82de80efc87708e5e182034b7c6c2019e (patch) | |
tree | 35c0856a929040216c82153ca62d43b27530a887 /vendor/golang.org/x/text/unicode/bidi/bracket.go | |
parent | 6f64eeace1b66639b9380b44e88a8d54850a4306 (diff) |
lib/pq is out of maintenance for some time now, so switch to the newer
more active library. Looks like it finally stabilized after a long time.
Diffstat (limited to 'vendor/golang.org/x/text/unicode/bidi/bracket.go')
-rw-r--r-- | vendor/golang.org/x/text/unicode/bidi/bracket.go | 335 |
1 files changed, 335 insertions, 0 deletions
diff --git a/vendor/golang.org/x/text/unicode/bidi/bracket.go b/vendor/golang.org/x/text/unicode/bidi/bracket.go new file mode 100644 index 0000000..1853939 --- /dev/null +++ b/vendor/golang.org/x/text/unicode/bidi/bracket.go @@ -0,0 +1,335 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bidi + +import ( + "container/list" + "fmt" + "sort" +) + +// This file contains a port of the reference implementation of the +// Bidi Parentheses Algorithm: +// https://www.unicode.org/Public/PROGRAMS/BidiReferenceJava/BidiPBAReference.java +// +// The implementation in this file covers definitions BD14-BD16 and rule N0 +// of UAX#9. +// +// Some preprocessing is done for each rune before data is passed to this +// algorithm: +// - opening and closing brackets are identified +// - a bracket pair type, like '(' and ')' is assigned a unique identifier that +// is identical for the opening and closing bracket. It is left to do these +// mappings. +// - The BPA algorithm requires that bracket characters that are canonical +// equivalents of each other be able to be substituted for each other. +// It is the responsibility of the caller to do this canonicalization. +// +// In implementing BD16, this implementation departs slightly from the "logical" +// algorithm defined in UAX#9. In particular, the stack referenced there +// supports operations that go beyond a "basic" stack. An equivalent +// implementation based on a linked list is used here. + +// Bidi_Paired_Bracket_Type +// BD14. An opening paired bracket is a character whose +// Bidi_Paired_Bracket_Type property value is Open. +// +// BD15. A closing paired bracket is a character whose +// Bidi_Paired_Bracket_Type property value is Close. +type bracketType byte + +const ( + bpNone bracketType = iota + bpOpen + bpClose +) + +// bracketPair holds a pair of index values for opening and closing bracket +// location of a bracket pair. +type bracketPair struct { + opener int + closer int +} + +func (b *bracketPair) String() string { + return fmt.Sprintf("(%v, %v)", b.opener, b.closer) +} + +// bracketPairs is a slice of bracketPairs with a sort.Interface implementation. +type bracketPairs []bracketPair + +func (b bracketPairs) Len() int { return len(b) } +func (b bracketPairs) Swap(i, j int) { b[i], b[j] = b[j], b[i] } +func (b bracketPairs) Less(i, j int) bool { return b[i].opener < b[j].opener } + +// resolvePairedBrackets runs the paired bracket part of the UBA algorithm. +// +// For each rune, it takes the indexes into the original string, the class the +// bracket type (in pairTypes) and the bracket identifier (pairValues). It also +// takes the direction type for the start-of-sentence and the embedding level. +// +// The identifiers for bracket types are the rune of the canonicalized opening +// bracket for brackets (open or close) or 0 for runes that are not brackets. +func resolvePairedBrackets(s *isolatingRunSequence) { + p := bracketPairer{ + sos: s.sos, + openers: list.New(), + codesIsolatedRun: s.types, + indexes: s.indexes, + } + dirEmbed := L + if s.level&1 != 0 { + dirEmbed = R + } + p.locateBrackets(s.p.pairTypes, s.p.pairValues) + p.resolveBrackets(dirEmbed, s.p.initialTypes) +} + +type bracketPairer struct { + sos Class // direction corresponding to start of sequence + + // The following is a restatement of BD 16 using non-algorithmic language. + // + // A bracket pair is a pair of characters consisting of an opening + // paired bracket and a closing paired bracket such that the + // Bidi_Paired_Bracket property value of the former equals the latter, + // subject to the following constraints. + // - both characters of a pair occur in the same isolating run sequence + // - the closing character of a pair follows the opening character + // - any bracket character can belong at most to one pair, the earliest possible one + // - any bracket character not part of a pair is treated like an ordinary character + // - pairs may nest properly, but their spans may not overlap otherwise + + // Bracket characters with canonical decompositions are supposed to be + // treated as if they had been normalized, to allow normalized and non- + // normalized text to give the same result. In this implementation that step + // is pushed out to the caller. The caller has to ensure that the pairValue + // slices contain the rune of the opening bracket after normalization for + // any opening or closing bracket. + + openers *list.List // list of positions for opening brackets + + // bracket pair positions sorted by location of opening bracket + pairPositions bracketPairs + + codesIsolatedRun []Class // directional bidi codes for an isolated run + indexes []int // array of index values into the original string + +} + +// matchOpener reports whether characters at given positions form a matching +// bracket pair. +func (p *bracketPairer) matchOpener(pairValues []rune, opener, closer int) bool { + return pairValues[p.indexes[opener]] == pairValues[p.indexes[closer]] +} + +const maxPairingDepth = 63 + +// locateBrackets locates matching bracket pairs according to BD16. +// +// This implementation uses a linked list instead of a stack, because, while +// elements are added at the front (like a push) they are not generally removed +// in atomic 'pop' operations, reducing the benefit of the stack archetype. +func (p *bracketPairer) locateBrackets(pairTypes []bracketType, pairValues []rune) { + // traverse the run + // do that explicitly (not in a for-each) so we can record position + for i, index := range p.indexes { + + // look at the bracket type for each character + if pairTypes[index] == bpNone || p.codesIsolatedRun[i] != ON { + // continue scanning + continue + } + switch pairTypes[index] { + case bpOpen: + // check if maximum pairing depth reached + if p.openers.Len() == maxPairingDepth { + p.openers.Init() + return + } + // remember opener location, most recent first + p.openers.PushFront(i) + + case bpClose: + // see if there is a match + count := 0 + for elem := p.openers.Front(); elem != nil; elem = elem.Next() { + count++ + opener := elem.Value.(int) + if p.matchOpener(pairValues, opener, i) { + // if the opener matches, add nested pair to the ordered list + p.pairPositions = append(p.pairPositions, bracketPair{opener, i}) + // remove up to and including matched opener + for ; count > 0; count-- { + p.openers.Remove(p.openers.Front()) + } + break + } + } + sort.Sort(p.pairPositions) + // if we get here, the closing bracket matched no openers + // and gets ignored + } + } +} + +// Bracket pairs within an isolating run sequence are processed as units so +// that both the opening and the closing paired bracket in a pair resolve to +// the same direction. +// +// N0. Process bracket pairs in an isolating run sequence sequentially in +// the logical order of the text positions of the opening paired brackets +// using the logic given below. Within this scope, bidirectional types EN +// and AN are treated as R. +// +// Identify the bracket pairs in the current isolating run sequence +// according to BD16. For each bracket-pair element in the list of pairs of +// text positions: +// +// a Inspect the bidirectional types of the characters enclosed within the +// bracket pair. +// +// b If any strong type (either L or R) matching the embedding direction is +// found, set the type for both brackets in the pair to match the embedding +// direction. +// +// o [ e ] o -> o e e e o +// +// o [ o e ] -> o e o e e +// +// o [ NI e ] -> o e NI e e +// +// c Otherwise, if a strong type (opposite the embedding direction) is +// found, test for adjacent strong types as follows: 1 First, check +// backwards before the opening paired bracket until the first strong type +// (L, R, or sos) is found. If that first preceding strong type is opposite +// the embedding direction, then set the type for both brackets in the pair +// to that type. 2 Otherwise, set the type for both brackets in the pair to +// the embedding direction. +// +// o [ o ] e -> o o o o e +// +// o [ o NI ] o -> o o o NI o o +// +// e [ o ] o -> e e o e o +// +// e [ o ] e -> e e o e e +// +// e ( o [ o ] NI ) e -> e e o o o o NI e e +// +// d Otherwise, do not set the type for the current bracket pair. Note that +// if the enclosed text contains no strong types the paired brackets will +// both resolve to the same level when resolved individually using rules N1 +// and N2. +// +// e ( NI ) o -> e ( NI ) o + +// getStrongTypeN0 maps character's directional code to strong type as required +// by rule N0. +// +// TODO: have separate type for "strong" directionality. +func (p *bracketPairer) getStrongTypeN0(index int) Class { + switch p.codesIsolatedRun[index] { + // in the scope of N0, number types are treated as R + case EN, AN, AL, R: + return R + case L: + return L + default: + return ON + } +} + +// classifyPairContent reports the strong types contained inside a Bracket Pair, +// assuming the given embedding direction. +// +// It returns ON if no strong type is found. If a single strong type is found, +// it returns this type. Otherwise it returns the embedding direction. +// +// TODO: use separate type for "strong" directionality. +func (p *bracketPairer) classifyPairContent(loc bracketPair, dirEmbed Class) Class { + dirOpposite := ON + for i := loc.opener + 1; i < loc.closer; i++ { + dir := p.getStrongTypeN0(i) + if dir == ON { + continue + } + if dir == dirEmbed { + return dir // type matching embedding direction found + } + dirOpposite = dir + } + // return ON if no strong type found, or class opposite to dirEmbed + return dirOpposite +} + +// classBeforePair determines which strong types are present before a Bracket +// Pair. Return R or L if strong type found, otherwise ON. +func (p *bracketPairer) classBeforePair(loc bracketPair) Class { + for i := loc.opener - 1; i >= 0; i-- { + if dir := p.getStrongTypeN0(i); dir != ON { + return dir + } + } + // no strong types found, return sos + return p.sos +} + +// assignBracketType implements rule N0 for a single bracket pair. +func (p *bracketPairer) assignBracketType(loc bracketPair, dirEmbed Class, initialTypes []Class) { + // rule "N0, a", inspect contents of pair + dirPair := p.classifyPairContent(loc, dirEmbed) + + // dirPair is now L, R, or N (no strong type found) + + // the following logical tests are performed out of order compared to + // the statement of the rules but yield the same results + if dirPair == ON { + return // case "d" - nothing to do + } + + if dirPair != dirEmbed { + // case "c": strong type found, opposite - check before (c.1) + dirPair = p.classBeforePair(loc) + if dirPair == dirEmbed || dirPair == ON { + // no strong opposite type found before - use embedding (c.2) + dirPair = dirEmbed + } + } + // else: case "b", strong type found matching embedding, + // no explicit action needed, as dirPair is already set to embedding + // direction + + // set the bracket types to the type found + p.setBracketsToType(loc, dirPair, initialTypes) +} + +func (p *bracketPairer) setBracketsToType(loc bracketPair, dirPair Class, initialTypes []Class) { + p.codesIsolatedRun[loc.opener] = dirPair + p.codesIsolatedRun[loc.closer] = dirPair + + for i := loc.opener + 1; i < loc.closer; i++ { + index := p.indexes[i] + if initialTypes[index] != NSM { + break + } + p.codesIsolatedRun[i] = dirPair + } + + for i := loc.closer + 1; i < len(p.indexes); i++ { + index := p.indexes[i] + if initialTypes[index] != NSM { + break + } + p.codesIsolatedRun[i] = dirPair + } +} + +// resolveBrackets implements rule N0 for a list of pairs. +func (p *bracketPairer) resolveBrackets(dirEmbed Class, initialTypes []Class) { + for _, loc := range p.pairPositions { + p.assignBracketType(loc, dirEmbed, initialTypes) + } +} |