Update dependencies

This commit is contained in:
bluepython508
2025-04-09 01:00:12 +01:00
parent f0641ffd6e
commit 5a9cfc022c
882 changed files with 68930 additions and 24201 deletions

View File

@@ -74,7 +74,7 @@ func ConsumeTrue(b []byte) int {
func ConsumeLiteral(b []byte, lit string) (n int, err error) {
for i := 0; i < len(b) && i < len(lit); i++ {
if b[i] != lit[i] {
return i, NewInvalidCharacterError(b[i:], "within literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
return i, NewInvalidCharacterError(b[i:], "in literal "+lit+" (expecting "+strconv.QuoteRune(rune(lit[i]))+")")
}
}
if len(b) < len(lit) {
@@ -240,7 +240,7 @@ func ConsumeStringResumable(flags *ValueFlags, b []byte, resumeOffset int, valid
// Handle invalid control characters.
case r < ' ':
flags.Join(stringNonVerbatim | stringNonCanonical)
return n, NewInvalidCharacterError(b[n:], "within string (expecting non-control character)")
return n, NewInvalidCharacterError(b[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(b[n:]))
}
@@ -374,7 +374,7 @@ func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, er
// Handle invalid control characters.
case r < ' ':
dst = append(dst, src[i:n]...)
return dst, NewInvalidCharacterError(src[n:], "within string (expecting non-control character)")
return dst, NewInvalidCharacterError(src[n:], "in string (expecting non-control character)")
default:
panic("BUG: unhandled character " + QuoteRune(src[n:]))
}
@@ -386,7 +386,7 @@ func AppendUnquote[Bytes ~[]byte | ~string](dst []byte, src Bytes) (v []byte, er
// hasEscapedUTF16Prefix reports whether b is possibly
// the truncated prefix of a \uFFFF escape sequence.
func hasEscapedUTF16Prefix[Bytes ~[]byte | ~string](b Bytes, lowerSurrogateHalf bool) bool {
for i := 0; i < len(b); i++ {
for i := range len(b) {
switch c := b[i]; {
case i == 0 && c != '\\':
return false
@@ -513,7 +513,7 @@ beforeInteger:
}
state = withinIntegerDigits
default:
return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)")
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
// Consume optional fractional component.
@@ -527,7 +527,7 @@ beforeFractional:
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)")
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
@@ -549,7 +549,7 @@ beforeExponent:
case '0' <= b[n] && b[n] <= '9':
n++
default:
return n, state, NewInvalidCharacterError(b[n:], "within number (expecting digit)")
return n, state, NewInvalidCharacterError(b[n:], "in number (expecting digit)")
}
for uint(len(b)) > uint(n) && ('0' <= b[n] && b[n] <= '9') {
n++
@@ -567,7 +567,7 @@ func parseHexUint16[Bytes ~[]byte | ~string](b Bytes) (v uint16, ok bool) {
if len(b) != 4 {
return 0, false
}
for i := 0; i < 4; i++ {
for i := range 4 {
c := b[i]
switch {
case '0' <= c && c <= '9':
@@ -610,19 +610,6 @@ func ParseUint(b []byte) (v uint64, ok bool) {
// then we return MaxFloat since any finite value will always be infinitely
// more accurate at representing another finite value than an infinite value.
func ParseFloat(b []byte, bits int) (v float64, ok bool) {
// Fast path for exact integer numbers which fit in the
// 24-bit or 53-bit significand of a float32 or float64.
var negLen int // either 0 or 1
if len(b) > 0 && b[0] == '-' {
negLen = 1
}
u, ok := ParseUint(b[negLen:])
if ok && ((bits == 32 && u <= 1<<24) || (bits == 64 && u <= 1<<53)) {
return math.Copysign(float64(u), float64(-1*negLen)), true
}
// Note that the []byte->string conversion unfortunately allocates.
// See https://go.dev/issue/42429 for more information.
fv, err := strconv.ParseFloat(string(b), bits)
if math.IsInf(fv, 0) {
switch {

View File

@@ -66,35 +66,41 @@ func AppendQuote[Bytes ~[]byte | ~string](dst []byte, src Bytes, flags *jsonflag
dst = slices.Grow(dst, len(`"`)+len(src)+len(`"`))
dst = append(dst, '"')
for uint(len(src)) > uint(n) {
// Handle single-byte ASCII.
if c := src[n]; c < utf8.RuneSelf {
// Handle single-byte ASCII.
n++
if escapeASCII[c] > 0 {
if (c == '<' || c == '>' || c == '&') && !flags.Get(jsonflags.EscapeForHTML) {
continue
}
if escapeASCII[c] == 0 {
continue // no escaping possibly needed
}
// Handle escaping of single-byte ASCII.
if !(c == '<' || c == '>' || c == '&') || flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[i:n-1]...)
dst = appendEscapedASCII(dst, c)
i = n
}
continue
}
// Handle multi-byte Unicode.
switch r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:]))); {
case r == utf8.RuneError && rn == 1:
hasInvalidUTF8 = true
dst = append(dst, src[i:n]...)
dst = append(dst, "\ufffd"...)
n += rn
i = n
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
dst = append(dst, src[i:n]...)
dst = appendEscapedUnicode(dst, r)
n += rn
i = n
default:
} else {
// Handle multi-byte Unicode.
r, rn := utf8.DecodeRuneInString(string(truncateMaxUTF8(src[n:])))
n += rn
if r != utf8.RuneError && r != '\u2028' && r != '\u2029' {
continue // no escaping possibly needed
}
// Handle escaping of multi-byte Unicode.
switch {
case isInvalidUTF8(r, rn):
hasInvalidUTF8 = true
dst = append(dst, src[i:n-rn]...)
if flags.Get(jsonflags.EscapeInvalidUTF8) {
dst = append(dst, `\ufffd`...)
} else {
dst = append(dst, "\ufffd"...)
}
i = n
case (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS):
dst = append(dst, src[i:n-rn]...)
dst = appendEscapedUnicode(dst, r)
i = n
}
}
}
dst = append(dst, src[i:n]...)
@@ -141,7 +147,7 @@ func appendEscapedUTF16(dst []byte, x uint16) []byte {
}
// ReformatString consumes a JSON string from src and appends it to dst,
// reformatting it if necessary for the given escapeRune parameter.
// reformatting it if necessary according to the specified flags.
// It returns the appended output and the number of consumed input bytes.
func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
// TODO: Should this update ValueFlags as input?
@@ -150,18 +156,48 @@ func ReformatString(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error
if err != nil {
return dst, n, err
}
isCanonical := !flags.Get(jsonflags.EscapeForHTML | jsonflags.EscapeForJS)
if flags.Get(jsonflags.PreserveRawStrings) || (isCanonical && valFlags.IsCanonical()) {
// If the output requires no special escapes, and the input
// is already in canonical form or should be preserved verbatim,
// then directly copy the input to the output.
if !flags.Get(jsonflags.AnyEscape) &&
(valFlags.IsCanonical() || flags.Get(jsonflags.PreserveRawStrings)) {
dst = append(dst, src[:n]...) // copy the string verbatim
return dst, n, nil
}
// TODO: Implement a direct, raw-to-raw reformat for strings.
// If the escapeRune option would have resulted in no changes to the output,
// it would be faster to simply append src to dst without going through
// an intermediary representation in a separate buffer.
// Under [jsonflags.PreserveRawStrings], any pre-escaped sequences
// remain escaped, however we still need to respect the
// [jsonflags.EscapeForHTML] and [jsonflags.EscapeForJS] options.
if flags.Get(jsonflags.PreserveRawStrings) {
var i, lastAppendIndex int
for i < n {
if c := src[i]; c < utf8.RuneSelf {
if (c == '<' || c == '>' || c == '&') && flags.Get(jsonflags.EscapeForHTML) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedASCII(dst, c)
lastAppendIndex = i + 1
}
i++
} else {
r, rn := utf8.DecodeRune(truncateMaxUTF8(src[i:]))
if (r == '\u2028' || r == '\u2029') && flags.Get(jsonflags.EscapeForJS) {
dst = append(dst, src[lastAppendIndex:i]...)
dst = appendEscapedUnicode(dst, r)
lastAppendIndex = i + rn
}
i += rn
}
}
return append(dst, src[lastAppendIndex:n]...), n, nil
}
// The input contains characters that might need escaping,
// unnecessary escape sequences, or invalid UTF-8.
// Perform a round-trip unquote and quote to properly reformat
// these sequences according the current flags.
b, _ := AppendUnquote(nil, src[:n])
dst, _ = AppendQuote(dst, string(b), flags)
dst, _ = AppendQuote(dst, b, flags)
return dst, n, nil
}
@@ -204,23 +240,45 @@ func AppendFloat(dst []byte, src float64, bits int) []byte {
// ReformatNumber consumes a JSON string from src and appends it to dst,
// canonicalizing it if specified.
// It returns the appended output and the number of consumed input bytes.
func ReformatNumber(dst, src []byte, canonicalize bool) ([]byte, int, error) {
func ReformatNumber(dst, src []byte, flags *jsonflags.Flags) ([]byte, int, error) {
n, err := ConsumeNumber(src)
if err != nil {
return dst, n, err
}
if !canonicalize {
if !flags.Get(jsonflags.CanonicalizeNumbers) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
// Canonicalize the number per RFC 8785, section 3.2.2.3.
// As an optimization, we can copy integer numbers below 2⁵³ verbatim.
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
if n < maxExactIntegerDigits && ConsumeSimpleNumber(src[:n]) == n {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
// Identify the kind of number.
var isFloat bool
for _, c := range src[:n] {
if c == '.' || c == 'e' || c == 'E' {
isFloat = true // has fraction or exponent
break
}
}
// Check if need to canonicalize this kind of number.
switch {
case string(src[:n]) == "-0":
break // canonicalize -0 as 0 regardless of kind
case isFloat:
if !flags.Get(jsonflags.CanonicalizeRawFloats) {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
default:
// As an optimization, we can copy integer numbers below 2⁵³ verbatim
// since the canonical form is always identical.
const maxExactIntegerDigits = 16 // len(strconv.AppendUint(nil, 1<<53, 10))
if !flags.Get(jsonflags.CanonicalizeRawInts) || n < maxExactIntegerDigits {
dst = append(dst, src[:n]...) // copy the number verbatim
return dst, n, nil
}
}
// Parse and reformat the number (which uses a canonical format).
fv, _ := strconv.ParseFloat(string(src[:n]), 64)
switch {
case fv == 0:

View File

@@ -76,13 +76,8 @@ func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
return ('\u0000' <= r && r <= '\uD7FF') || ('\uE000' <= r && r <= '\uFFFF')
}
var invalidUTF8 bool
x0, y0 := x, y
for {
if len(x) == 0 || len(y) == 0 {
if len(x) == len(y) && invalidUTF8 {
return strings.Compare(string(x0), string(y0))
}
return cmp.Compare(len(x), len(y))
}
@@ -114,7 +109,14 @@ func CompareUTF16[Bytes ~[]byte | ~string](x, y Bytes) int {
if rx != ry {
return cmp.Compare(rx, ry)
}
invalidUTF8 = invalidUTF8 || (rx == utf8.RuneError && nx == 1) || (ry == utf8.RuneError && ny == 1)
// Check for invalid UTF-8, in which case,
// we just perform a byte-for-byte comparison.
if isInvalidUTF8(rx, nx) || isInvalidUTF8(ry, ny) {
if x[0] != y[0] {
return cmp.Compare(x[0], y[0])
}
}
x, y = x[nx:], y[ny:]
}
}
@@ -141,16 +143,12 @@ func truncateMaxUTF8[Bytes ~[]byte | ~string](b Bytes) Bytes {
return b
}
// NewError and ErrInvalidUTF8 are injected by the "jsontext" package,
// so that these error types use the jsontext.SyntacticError type.
var (
NewError = errors.New
ErrInvalidUTF8 = errors.New("invalid UTF-8 within string")
)
// TODO(https://go.dev/issue/70547): Use utf8.ErrInvalid instead.
var ErrInvalidUTF8 = errors.New("invalid UTF-8")
func NewInvalidCharacterError[Bytes ~[]byte | ~string](prefix Bytes, where string) error {
what := QuoteRune(prefix)
return NewError("invalid character " + what + " " + where)
return errors.New("invalid character " + what + " " + where)
}
func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
@@ -162,8 +160,56 @@ func NewInvalidEscapeSequenceError[Bytes ~[]byte | ~string](what Bytes) error {
return r == '`' || r == utf8.RuneError || unicode.IsSpace(r) || !unicode.IsPrint(r)
}) >= 0
if needEscape {
return NewError("invalid " + label + " " + strconv.Quote(string(what)) + " within string")
return errors.New("invalid " + label + " " + strconv.Quote(string(what)) + " in string")
} else {
return NewError("invalid " + label + " `" + string(what) + "` within string")
return errors.New("invalid " + label + " `" + string(what) + "` in string")
}
}
// TruncatePointer optionally truncates the JSON pointer,
// enforcing that the length roughly does not exceed n.
func TruncatePointer(s string, n int) string {
if len(s) <= n {
return s
}
i := n / 2
j := len(s) - n/2
// Avoid truncating a name if there are multiple names present.
if k := strings.LastIndexByte(s[:i], '/'); k > 0 {
i = k
}
if k := strings.IndexByte(s[j:], '/'); k >= 0 {
j += k + len("/")
}
// Avoid truncation in the middle of a UTF-8 rune.
for i > 0 && isInvalidUTF8(utf8.DecodeLastRuneInString(s[:i])) {
i--
}
for j < len(s) && isInvalidUTF8(utf8.DecodeRuneInString(s[j:])) {
j++
}
// Determine the right middle fragment to use.
var middle string
switch strings.Count(s[i:j], "/") {
case 0:
middle = "…"
case 1:
middle = "…/…"
default:
middle = "…/…/…"
}
if strings.HasPrefix(s[i:j], "/") && middle != "…" {
middle = strings.TrimPrefix(middle, "…")
}
if strings.HasSuffix(s[i:j], "/") && middle != "…" {
middle = strings.TrimSuffix(middle, "…")
}
return s[:i] + middle + s[j:]
}
func isInvalidUTF8(r rune, rn int) bool {
return r == utf8.RuneError && rn == 1
}