Bump golang.org/x/text from 0.3.7 to 0.4.0 (#981)

Bumps [golang.org/x/text](https://github.com/golang/text) from 0.3.7 to 0.4.0.
- [Release notes](https://github.com/golang/text/releases)
- [Commits](https://github.com/golang/text/compare/v0.3.7...v0.4.0)

---
updated-dependencies:
- dependency-name: golang.org/x/text
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
dependabot[bot] 2022-11-07 10:18:13 +00:00 committed by GitHub
parent 1a23fb0dc9
commit 1638470388
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 649 additions and 636 deletions

2
go.mod
View file

@ -51,7 +51,7 @@ require (
golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d
golang.org/x/net v0.0.0-20220909164309-bea034e7d591 golang.org/x/net v0.0.0-20220909164309-bea034e7d591
golang.org/x/oauth2 v0.0.0-20220822191816-0ebed06d0094 golang.org/x/oauth2 v0.0.0-20220822191816-0ebed06d0094
golang.org/x/text v0.3.7 golang.org/x/text v0.4.0
gopkg.in/mcuadros/go-syslog.v2 v2.3.0 gopkg.in/mcuadros/go-syslog.v2 v2.3.0
modernc.org/sqlite v1.18.2 modernc.org/sqlite v1.18.2
mvdan.cc/xurls/v2 v2.4.0 mvdan.cc/xurls/v2 v2.4.0

3
go.sum
View file

@ -888,8 +888,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg=
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

3
vendor/golang.org/x/text/AUTHORS generated vendored
View file

@ -1,3 +0,0 @@
# This source code refers to The Go Authors for copyright purposes.
# The master list of authors is in the main Go distribution,
# visible at http://tip.golang.org/AUTHORS.

View file

@ -1,3 +0,0 @@
# This source code was written by the Go contributors.
# The master list of contributors is in the main Go distribution,
# visible at http://tip.golang.org/CONTRIBUTORS.

View file

@ -14,19 +14,19 @@
// //
// The per-rune values have the following format: // The per-rune values have the following format:
// //
// if (exception) { // if (exception) {
// 15..4 unsigned exception index // 15..4 unsigned exception index
// } else { // } else {
// 15..8 XOR pattern or index to XOR pattern for case mapping // 15..8 XOR pattern or index to XOR pattern for case mapping
// Only 13..8 are used for XOR patterns. // Only 13..8 are used for XOR patterns.
// 7 inverseFold (fold to upper, not to lower) // 7 inverseFold (fold to upper, not to lower)
// 6 index: interpret the XOR pattern as an index // 6 index: interpret the XOR pattern as an index
// or isMid if case mode is cIgnorableUncased. // or isMid if case mode is cIgnorableUncased.
// 5..4 CCC: zero (normal or break), above or other // 5..4 CCC: zero (normal or break), above or other
// } // }
// 3 exception: interpret this value as an exception index // 3 exception: interpret this value as an exception index
// (TODO: is this bit necessary? Probably implied from case mode.) // (TODO: is this bit necessary? Probably implied from case mode.)
// 2..0 case mode // 2..0 case mode
// //
// For the non-exceptional cases, a rune must be either uncased, lowercase or // For the non-exceptional cases, a rune must be either uncased, lowercase or
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase // uppercase. If the rune is cased, the XOR pattern maps either a lowercase
@ -128,37 +128,40 @@ func (c info) isMid() bool {
// The entry is pointed to by the exception index in an entry. It has the // The entry is pointed to by the exception index in an entry. It has the
// following format: // following format:
// //
// Header // Header:
// byte 0:
// 7..6 unused
// 5..4 CCC type (same bits as entry)
// 3 unused
// 2..0 length of fold
// //
// byte 1: // byte 0:
// 7..6 unused // 7..6 unused
// 5..3 length of 1st mapping of case type // 5..4 CCC type (same bits as entry)
// 2..0 length of 2nd mapping of case type // 3 unused
// 2..0 length of fold
// //
// case 1st 2nd // byte 1:
// lower -> upper, title // 7..6 unused
// upper -> lower, title // 5..3 length of 1st mapping of case type
// title -> lower, upper // 2..0 length of 2nd mapping of case type
//
// case 1st 2nd
// lower -> upper, title
// upper -> lower, title
// title -> lower, upper
// //
// Lengths with the value 0x7 indicate no value and implies no change. // Lengths with the value 0x7 indicate no value and implies no change.
// A length of 0 indicates a mapping to zero-length string. // A length of 0 indicates a mapping to zero-length string.
// //
// Body bytes: // Body bytes:
// case folding bytes //
// lowercase mapping bytes // case folding bytes
// uppercase mapping bytes // lowercase mapping bytes
// titlecase mapping bytes // uppercase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO) // titlecase mapping bytes
// closure mapping bytes (for NFKC_Casefold). (TODO)
// //
// Fallbacks: // Fallbacks:
// missing fold -> lower //
// missing title -> upper // missing fold -> lower
// all missing -> original rune // missing title -> upper
// all missing -> original rune
// //
// exceptions starts with a dummy byte to enforce that there is no zero index // exceptions starts with a dummy byte to enforce that there is no zero index
// value. // value.

View file

@ -966,7 +966,7 @@
0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c, 0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c,
0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3, 0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3,
0x40200000, 0x4020004c, 0x40700000, 0x40800000, 0x40200000, 0x4020004c, 0x40700000, 0x40800000,
0x4085a000, 0x4085a0ba, 0x408e3000, 0x408e30ba, 0x4085a000, 0x4085a0ba, 0x408e8000, 0x408e80ba,
0x40c00000, 0x40c000b3, 0x41200000, 0x41200111, 0x40c00000, 0x40c000b3, 0x41200000, 0x41200111,
0x41600000, 0x4160010f, 0x41c00000, 0x41d00000, 0x41600000, 0x4160010f, 0x41c00000, 0x41d00000,
// Entry 280 - 29F // Entry 280 - 29F
@ -994,7 +994,7 @@
0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8, 0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8,
0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000, 0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000,
0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000, 0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000,
0x4be5a000, 0x4be5a0b4, 0x4beeb000, 0x4beeb0b4, 0x4be5a000, 0x4be5a0b4, 0x4bef1000, 0x4bef10b4,
0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000, 0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000,
// Entry 2E0 - 2FF // Entry 2E0 - 2FF
0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000, 0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000,
@ -1012,4 +1012,4 @@
const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix" const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
// Total table size 3147 bytes (3KiB); checksum: BE816D44 // Total table size 3147 bytes (3KiB); checksum: 6772C83C

View file

@ -50,7 +50,7 @@ func (id Language) Canonicalize() (Language, AliasType) {
return normLang(id) return normLang(id)
} }
// mapLang returns the mapped langID of id according to mapping m. // normLang returns the mapped langID of id according to mapping m.
func normLang(id Language) (Language, AliasType) { func normLang(id Language) (Language, AliasType) {
k := sort.Search(len(AliasMap), func(i int) bool { k := sort.Search(len(AliasMap), func(i int) bool {
return AliasMap[i].From >= uint16(id) return AliasMap[i].From >= uint16(id)
@ -328,7 +328,7 @@ func (r Region) IsPrivateUse() bool {
return r.typ()&iso3166UserAssigned != 0 return r.typ()&iso3166UserAssigned != 0
} }
type Script uint8 type Script uint16
// getScriptID returns the script id for string s. It assumes that s // getScriptID returns the script id for string s. It assumes that s
// is of the format [A-Z][a-z]{3}. // is of the format [A-Z][a-z]{3}.

View file

@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) {
} else if n >= 4 { } else if n >= 4 {
return Und, ErrSyntax return Und, ErrSyntax
} else { // the usual case } else { // the usual case
t, end = parseTag(scan) t, end = parseTag(scan, true)
if n := len(scan.token); n == 1 { if n := len(scan.token); n == 1 {
t.pExt = uint16(end) t.pExt = uint16(end)
end = parseExtensions(scan) end = parseExtensions(scan)
@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) {
// parseTag parses language, script, region and variants. // parseTag parses language, script, region and variants.
// It returns a Tag and the end position in the input that was parsed. // It returns a Tag and the end position in the input that was parsed.
func parseTag(scan *scanner) (t Tag, end int) { // If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
var e error var e error
// TODO: set an error if an unknown lang, script or region is encountered. // TODO: set an error if an unknown lang, script or region is encountered.
t.LangID, e = getLangID(scan.token) t.LangID, e = getLangID(scan.token)
@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) {
for len(scan.token) == 3 && isAlpha(scan.token[0]) { for len(scan.token) == 3 && isAlpha(scan.token[0]) {
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent // From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
// to a tag of the form <extlang>. // to a tag of the form <extlang>.
lang, e := getLangID(scan.token) if doNorm {
if lang != 0 { lang, e := getLangID(scan.token)
t.LangID = lang if lang != 0 {
copy(scan.b[langStart:], lang.String()) t.LangID = lang
scan.b[langStart+3] = '-' langStr := lang.String()
scan.start = langStart + 4 copy(scan.b[langStart:], langStr)
scan.b[langStart+len(langStr)] = '-'
scan.start = langStart + len(langStr) + 1
}
scan.gobble(e)
} }
scan.gobble(e)
end = scan.scan() end = scan.scan()
} }
if len(scan.token) == 4 && isAlpha(scan.token[0]) { if len(scan.token) == 4 && isAlpha(scan.token[0]) {
@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int {
case 't': // https://www.ietf.org/rfc/rfc6497.txt case 't': // https://www.ietf.org/rfc/rfc6497.txt
scan.scan() scan.scan()
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) { if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
_, end = parseTag(scan) _, end = parseTag(scan, false)
scan.toLower(start, end) scan.toLower(start, end)
} }
for len(scan.token) == 2 && !isAlpha(scan.token[1]) { for len(scan.token) == 2 && !isAlpha(scan.token[1]) {

File diff suppressed because it is too large Load diff

View file

@ -10,18 +10,17 @@
// and provides the user with the best experience // and provides the user with the best experience
// (see https://blog.golang.org/matchlang). // (see https://blog.golang.org/matchlang).
// //
// // # Matching preferred against supported languages
// Matching preferred against supported languages
// //
// A Matcher for an application that supports English, Australian English, // A Matcher for an application that supports English, Australian English,
// Danish, and standard Mandarin can be created as follows: // Danish, and standard Mandarin can be created as follows:
// //
// var matcher = language.NewMatcher([]language.Tag{ // var matcher = language.NewMatcher([]language.Tag{
// language.English, // The first language is used as fallback. // language.English, // The first language is used as fallback.
// language.MustParse("en-AU"), // language.MustParse("en-AU"),
// language.Danish, // language.Danish,
// language.Chinese, // language.Chinese,
// }) // })
// //
// This list of supported languages is typically implied by the languages for // This list of supported languages is typically implied by the languages for
// which there exists translations of the user interface. // which there exists translations of the user interface.
@ -30,14 +29,14 @@
// language tags. // language tags.
// The MatchString finds best matches for such strings: // The MatchString finds best matches for such strings:
// //
// handler(w http.ResponseWriter, r *http.Request) { // handler(w http.ResponseWriter, r *http.Request) {
// lang, _ := r.Cookie("lang") // lang, _ := r.Cookie("lang")
// accept := r.Header.Get("Accept-Language") // accept := r.Header.Get("Accept-Language")
// tag, _ := language.MatchStrings(matcher, lang.String(), accept) // tag, _ := language.MatchStrings(matcher, lang.String(), accept)
// //
// // tag should now be used for the initialization of any // // tag should now be used for the initialization of any
// // locale-specific service. // // locale-specific service.
// } // }
// //
// The Matcher's Match method can be used to match Tags directly. // The Matcher's Match method can be used to match Tags directly.
// //
@ -48,8 +47,7 @@
// For instance, it will know that a reader of Bokmål Danish can read Norwegian // For instance, it will know that a reader of Bokmål Danish can read Norwegian
// and will know that Cantonese ("yue") is a good match for "zh-HK". // and will know that Cantonese ("yue") is a good match for "zh-HK".
// //
// // # Using match results
// Using match results
// //
// To guarantee a consistent user experience to the user it is important to // To guarantee a consistent user experience to the user it is important to
// use the same language tag for the selection of any locale-specific services. // use the same language tag for the selection of any locale-specific services.
@ -58,9 +56,9 @@
// More subtly confusing is using the wrong sorting order or casing // More subtly confusing is using the wrong sorting order or casing
// algorithm for a certain language. // algorithm for a certain language.
// //
// All the packages in x/text that provide locale-specific services // All the packages in x/text that provide locale-specific services
// (e.g. collate, cases) should be initialized with the tag that was // (e.g. collate, cases) should be initialized with the tag that was
// obtained at the start of an interaction with the user. // obtained at the start of an interaction with the user.
// //
// Note that Tag that is returned by Match and MatchString may differ from any // Note that Tag that is returned by Match and MatchString may differ from any
// of the supported languages, as it may contain carried over settings from // of the supported languages, as it may contain carried over settings from
@ -70,8 +68,7 @@
// Match and MatchString both return the index of the matched supported tag // Match and MatchString both return the index of the matched supported tag
// to simplify associating such data with the matched tag. // to simplify associating such data with the matched tag.
// //
// // # Canonicalization
// Canonicalization
// //
// If one uses the Matcher to compare languages one does not need to // If one uses the Matcher to compare languages one does not need to
// worry about canonicalization. // worry about canonicalization.
@ -92,10 +89,9 @@
// equivalence relations. The CanonType type can be used to alter the // equivalence relations. The CanonType type can be used to alter the
// canonicalization form. // canonicalization form.
// //
// References // # References
// //
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47 // BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
//
package language // import "golang.org/x/text/language" package language // import "golang.org/x/text/language"
// TODO: explanation on how to match languages for your own locale-specific // TODO: explanation on how to match languages for your own locale-specific

View file

@ -1,39 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !go1.2
// +build !go1.2
package language
import "sort"
func sortStable(s sort.Interface) {
ss := stableSort{
s: s,
pos: make([]int, s.Len()),
}
for i := range ss.pos {
ss.pos[i] = i
}
sort.Sort(&ss)
}
type stableSort struct {
s sort.Interface
pos []int
}
func (s *stableSort) Len() int {
return len(s.pos)
}
func (s *stableSort) Less(i, j int) bool {
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
}
func (s *stableSort) Swap(i, j int) {
s.s.Swap(i, j)
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
}

View file

@ -1,12 +0,0 @@
// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build go1.2
// +build go1.2
package language
import "sort"
var sortStable = sort.Stable

View file

@ -545,7 +545,7 @@ type bestMatch struct {
// match as the preferred match. // match as the preferred match.
// //
// If pin is true and have and tag are a strong match, it will henceforth only // If pin is true and have and tag are a strong match, it will henceforth only
// consider matches for this language. This corresponds to the nothing that most // consider matches for this language. This corresponds to the idea that most
// users have a strong preference for the first defined language. A user can // users have a strong preference for the first defined language. A user can
// still prefer a second language over a dialect of the preferred language by // still prefer a second language over a dialect of the preferred language by
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should // explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should

View file

@ -6,6 +6,7 @@
import ( import (
"errors" "errors"
"sort"
"strconv" "strconv"
"strings" "strings"
@ -147,6 +148,7 @@ func update(b *language.Builder, part ...interface{}) (err error) {
} }
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight") var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
var errTagListTooLarge = errors.New("tag list exceeds max length")
// ParseAcceptLanguage parses the contents of an Accept-Language header as // ParseAcceptLanguage parses the contents of an Accept-Language header as
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and // defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
@ -164,6 +166,10 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
} }
}() }()
if strings.Count(s, "-") > 1000 {
return nil, nil, errTagListTooLarge
}
var entry string var entry string
for s != "" { for s != "" {
if entry, s = split(s, ','); entry == "" { if entry, s = split(s, ','); entry == "" {
@ -201,7 +207,7 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
tag = append(tag, t) tag = append(tag, t)
q = append(q, float32(w)) q = append(q, float32(w))
} }
sortStable(&tagSort{tag, q}) sort.Stable(&tagSort{tag, q})
return tag, q, nil return tag, q, nil
} }

View file

@ -39,12 +39,12 @@
_Hani = 57 _Hani = 57
_Hans = 59 _Hans = 59
_Hant = 60 _Hant = 60
_Qaaa = 143 _Qaaa = 147
_Qaai = 151 _Qaai = 155
_Qabx = 192 _Qabx = 196
_Zinh = 245 _Zinh = 252
_Zyyy = 250 _Zyyy = 257
_Zzzz = 251 _Zzzz = 258
) )
var regionToGroups = []uint8{ // 358 elements var regionToGroups = []uint8{ // 358 elements
@ -265,9 +265,9 @@ type regionIntelligibility struct {
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa}, 13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa},
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xcf, haveScript: 0x5a, distance: 0xa}, 16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd4, haveScript: 0x5a, distance: 0xa},
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xde, haveScript: 0x5a, distance: 0xa}, 17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe3, haveScript: 0x5a, distance: 0xa},
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe1, haveScript: 0x5a, distance: 0xa}, 18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5a, distance: 0xa},
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa}, 19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa},
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa}, 20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa}, 21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},

View file

@ -33,7 +33,7 @@ func In(rt *unicode.RangeTable) Set {
return setFunc(func(r rune) bool { return unicode.Is(rt, r) }) return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
} }
// In creates a Set with a Contains method that returns true for all runes not // NotIn creates a Set with a Contains method that returns true for all runes not
// in the given RangeTable. // in the given RangeTable.
func NotIn(rt *unicode.RangeTable) Set { func NotIn(rt *unicode.RangeTable) Set {
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) }) return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })

View file

@ -316,7 +316,7 @@ func (p *Profile) Compare(a, b string) bool {
return false return false
} }
return bytes.Compare(akey, bkey) == 0 return bytes.Equal(akey, bkey)
} }
// Allowed returns a runes.Set containing every rune that is a member of the // Allowed returns a runes.Set containing every rune that is a member of the

View file

@ -193,14 +193,14 @@ func (p *paragraph) run() {
// //
// At the end of this function: // At the end of this function:
// //
// - The member variable matchingPDI is set to point to the index of the // - The member variable matchingPDI is set to point to the index of the
// matching PDI character for each isolate initiator character. If there is // matching PDI character for each isolate initiator character. If there is
// no matching PDI, it is set to the length of the input text. For other // no matching PDI, it is set to the length of the input text. For other
// characters, it is set to -1. // characters, it is set to -1.
// - The member variable matchingIsolateInitiator is set to point to the // - The member variable matchingIsolateInitiator is set to point to the
// index of the matching isolate initiator character for each PDI character. // index of the matching isolate initiator character for each PDI character.
// If there is no matching isolate initiator, or the character is not a PDI, // If there is no matching isolate initiator, or the character is not a PDI,
// it is set to -1. // it is set to -1.
func (p *paragraph) determineMatchingIsolates() { func (p *paragraph) determineMatchingIsolates() {
p.matchingPDI = make([]int, p.Len()) p.matchingPDI = make([]int, p.Len())
p.matchingIsolateInitiator = make([]int, p.Len()) p.matchingIsolateInitiator = make([]int, p.Len())
@ -435,7 +435,7 @@ func maxLevel(a, b level) level {
} }
// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types, // Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
// either L or R, for each isolating run sequence. // either L or R, for each isolating run sequence.
func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence { func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
length := len(indexes) length := len(indexes)
types := make([]Class, length) types := make([]Class, length)
@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() {
if t == NSM { if t == NSM {
s.types[i] = precedingCharacterType s.types[i] = precedingCharacterType
} else { } else {
if t.in(LRI, RLI, FSI, PDI) { // if t.in(LRI, RLI, FSI, PDI) {
precedingCharacterType = ON // precedingCharacterType = ON
} // }
precedingCharacterType = t precedingCharacterType = t
} }
} }
@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level {
// Lines are concatenated from left to right. So for example, the fifth // Lines are concatenated from left to right. So for example, the fifth
// character from the left on the third line is // character from the left on the third line is
// //
// getReordering(linebreaks)[linebreaks[1] + 4] // getReordering(linebreaks)[linebreaks[1] + 4]
// //
// (linebreaks[1] is the position after the last character of the second // (linebreaks[1] is the position after the last character of the second
// line, which is also the index of the first character on the third line, // line, which is also the index of the first character on the third line,

View file

@ -110,10 +110,11 @@ func (p Properties) BoundaryAfter() bool {
} }
// We pack quick check data in 4 bits: // We pack quick check data in 4 bits:
// 5: Combines forward (0 == false, 1 == true) //
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11) // 5: Combines forward (0 == false, 1 == true)
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition. // 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
// 1..0: Number of trailing non-starters. // 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
// 1..0: Number of trailing non-starters.
// //
// When all 4 bits are zero, the character is inert, meaning it is never // When all 4 bits are zero, the character is inert, meaning it is never
// influenced by normalization. // influenced by normalization.

View file

@ -18,16 +18,17 @@
// A Form denotes a canonical representation of Unicode code points. // A Form denotes a canonical representation of Unicode code points.
// The Unicode-defined normalization and equivalence forms are: // The Unicode-defined normalization and equivalence forms are:
// //
// NFC Unicode Normalization Form C // NFC Unicode Normalization Form C
// NFD Unicode Normalization Form D // NFD Unicode Normalization Form D
// NFKC Unicode Normalization Form KC // NFKC Unicode Normalization Form KC
// NFKD Unicode Normalization Form KD // NFKD Unicode Normalization Form KD
// //
// For a Form f, this documentation uses the notation f(x) to mean // For a Form f, this documentation uses the notation f(x) to mean
// the bytes or string x converted to the given form. // the bytes or string x converted to the given form.
// A position n in x is called a boundary if conversion to the form can // A position n in x is called a boundary if conversion to the form can
// proceed independently on both sides: // proceed independently on both sides:
// f(x) == append(f(x[0:n]), f(x[n:])...) //
// f(x) == append(f(x[0:n]), f(x[n:])...)
// //
// References: https://unicode.org/reports/tr15/ and // References: https://unicode.org/reports/tr15/ and
// https://unicode.org/notes/tn5/. // https://unicode.org/notes/tn5/.

View file

@ -7315,7 +7315,7 @@ func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16 {
"\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C "\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C
"\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D "\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D
"\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E "\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E
"\x00v\x03#\x00\x00\x1e\u007f" + // 0x00760323: 0x00001E7F "\x00v\x03#\x00\x00\x1e\x7f" + // 0x00760323: 0x00001E7F
"\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80 "\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80
"\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81 "\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81
"\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82 "\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82
@ -7342,7 +7342,7 @@ func (t *nfkcTrie) lookupValue(n uint32, b byte) uint16 {
"\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97 "\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97
"\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98 "\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98
"\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99 "\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99
"\x01\u007f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B "\x01\x7f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
"\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0 "\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0
"\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1 "\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1
"\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2 "\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2

View file

@ -1146,21 +1146,31 @@ func (t *widthTrie) lookupValue(n uint32, b byte) uint16 {
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View file

@ -1158,21 +1158,31 @@ func (t *widthTrie) lookupValue(n uint32, b byte) uint16 {
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View file

@ -1178,21 +1178,31 @@ func (t *widthTrie) lookupValue(n uint32, b byte) uint16 {
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View file

@ -1179,21 +1179,31 @@ func (t *widthTrie) lookupValue(n uint32, b byte) uint16 {
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

View file

@ -1114,21 +1114,31 @@ func (t *widthTrie) lookupValue(n uint32, b byte) uint16 {
} }
// inverseData contains 4-byte entries of the following format: // inverseData contains 4-byte entries of the following format:
// <length> <modified UTF-8-encoded rune> <0 padding> //
// <length> <modified UTF-8-encoded rune> <0 padding>
//
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the // The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
// UTF-8 encoding of the original rune. Mappings often have the following // UTF-8 encoding of the original rune. Mappings often have the following
// pattern: // pattern:
// -> A (U+FF21 -> U+0041) //
// -> B (U+FF22 -> U+0042) // -> A (U+FF21 -> U+0041)
// ... // -> B (U+FF22 -> U+0042)
// ...
//
// By xor-ing the last byte the same entry can be shared by many mappings. This // By xor-ing the last byte the same entry can be shared by many mappings. This
// reduces the total number of distinct entries by about two thirds. // reduces the total number of distinct entries by about two thirds.
// The resulting entry for the aforementioned mappings is // The resulting entry for the aforementioned mappings is
// { 0x01, 0xE0, 0x00, 0x00 } //
// { 0x01, 0xE0, 0x00, 0x00 }
//
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get // Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
// E0 ^ A1 = 41. //
// E0 ^ A1 = 41.
//
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get // Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
// E0 ^ A2 = 42. //
// E0 ^ A2 = 42.
//
// Note that because of the xor-ing, the byte sequence stored in the entry is // Note that because of the xor-ing, the byte sequence stored in the entry is
// not valid UTF-8. // not valid UTF-8.
var inverseData = [150][4]byte{ var inverseData = [150][4]byte{

2
vendor/modules.txt vendored
View file

@ -683,7 +683,7 @@ golang.org/x/sys/execabs
golang.org/x/sys/internal/unsafeheader golang.org/x/sys/internal/unsafeheader
golang.org/x/sys/unix golang.org/x/sys/unix
golang.org/x/sys/windows golang.org/x/sys/windows
# golang.org/x/text v0.3.7 # golang.org/x/text v0.4.0
## explicit; go 1.17 ## explicit; go 1.17
golang.org/x/text/cases golang.org/x/text/cases
golang.org/x/text/internal golang.org/x/text/internal