Update dependencies
This commit is contained in:
481
vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go
generated
vendored
Normal file
481
vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go
generated
vendored
Normal file
@@ -0,0 +1,481 @@
|
||||
package lz4block
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"math/bits"
|
||||
"sync"
|
||||
|
||||
"github.com/pierrec/lz4/v4/internal/lz4errors"
|
||||
)
|
||||
|
||||
const (
|
||||
// The following constants are used to setup the compression algorithm.
|
||||
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
|
||||
winSizeLog = 16 // LZ4 64Kb window size limit
|
||||
winSize = 1 << winSizeLog
|
||||
winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
|
||||
|
||||
// hashLog determines the size of the hash table used to quickly find a previous match position.
|
||||
// Its value influences the compression speed and memory usage, the lower the faster,
|
||||
// but at the expense of the compression ratio.
|
||||
// 16 seems to be the best compromise for fast compression.
|
||||
hashLog = 16
|
||||
htSize = 1 << hashLog
|
||||
|
||||
mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
|
||||
)
|
||||
|
||||
func recoverBlock(e *error) {
|
||||
if r := recover(); r != nil && *e == nil {
|
||||
*e = lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
}
|
||||
|
||||
// blockHash hashes the lower 6 bytes into a value < htSize.
|
||||
func blockHash(x uint64) uint32 {
|
||||
const prime6bytes = 227718039650203
|
||||
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
|
||||
}
|
||||
|
||||
func CompressBlockBound(n int) int {
|
||||
return n + n/255 + 16
|
||||
}
|
||||
|
||||
func UncompressBlock(src, dst, dict []byte) (int, error) {
|
||||
if len(src) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if di := decodeBlock(dst, src, dict); di >= 0 {
|
||||
return di, nil
|
||||
}
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
|
||||
type Compressor struct {
|
||||
// Offsets are at most 64kiB, so we can store only the lower 16 bits of
|
||||
// match positions: effectively, an offset from some 64kiB block boundary.
|
||||
//
|
||||
// When we retrieve such an offset, we interpret it as relative to the last
|
||||
// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
|
||||
// depending on which of these is inside the current window. If a table
|
||||
// entry was generated more than 64kiB back in the input, we find out by
|
||||
// inspecting the input stream.
|
||||
table [htSize]uint16
|
||||
|
||||
// Bitmap indicating which positions in the table are in use.
|
||||
// This allows us to quickly reset the table for reuse,
|
||||
// without having to zero everything.
|
||||
inUse [htSize / 32]uint32
|
||||
}
|
||||
|
||||
// Get returns the position of a presumptive match for the hash h.
|
||||
// The match may be a false positive due to a hash collision or an old entry.
|
||||
// If si < winSize, the return value may be negative.
|
||||
func (c *Compressor) get(h uint32, si int) int {
|
||||
h &= htSize - 1
|
||||
i := 0
|
||||
if c.inUse[h/32]&(1<<(h%32)) != 0 {
|
||||
i = int(c.table[h])
|
||||
}
|
||||
i += si &^ winMask
|
||||
if i >= si {
|
||||
// Try previous 64kiB block (negative when in first block).
|
||||
i -= winSize
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
func (c *Compressor) put(h uint32, si int) {
|
||||
h &= htSize - 1
|
||||
c.table[h] = uint16(si)
|
||||
c.inUse[h/32] |= 1 << (h % 32)
|
||||
}
|
||||
|
||||
func (c *Compressor) reset() { c.inUse = [htSize / 32]uint32{} }
|
||||
|
||||
var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
|
||||
|
||||
func CompressBlock(src, dst []byte) (int, error) {
|
||||
c := compressorPool.Get().(*Compressor)
|
||||
n, err := c.CompressBlock(src, dst)
|
||||
compressorPool.Put(c)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
|
||||
// Zero out reused table to avoid non-deterministic output (issue #65).
|
||||
c.reset()
|
||||
|
||||
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
|
||||
isNotCompressible := len(dst) < CompressBlockBound(len(src))
|
||||
|
||||
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
|
||||
// This significantly speeds up incompressible data and usually has very small impact on compression.
|
||||
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
|
||||
const adaptSkipLog = 7
|
||||
|
||||
// si: Current position of the search.
|
||||
// anchor: Position of the current literals.
|
||||
var si, di, anchor int
|
||||
sn := len(src) - mfLimit
|
||||
if sn <= 0 {
|
||||
goto lastLiterals
|
||||
}
|
||||
|
||||
// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
|
||||
for si < sn {
|
||||
// Hash the next 6 bytes (sequence)...
|
||||
match := binary.LittleEndian.Uint64(src[si:])
|
||||
h := blockHash(match)
|
||||
h2 := blockHash(match >> 8)
|
||||
|
||||
// We check a match at s, s+1 and s+2 and pick the first one we get.
|
||||
// Checking 3 only requires us to load the source one.
|
||||
ref := c.get(h, si)
|
||||
ref2 := c.get(h2, si+1)
|
||||
c.put(h, si)
|
||||
c.put(h2, si+1)
|
||||
|
||||
offset := si - ref
|
||||
|
||||
if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
|
||||
// No match. Start calculating another hash.
|
||||
// The processor can usually do this out-of-order.
|
||||
h = blockHash(match >> 16)
|
||||
ref3 := c.get(h, si+2)
|
||||
|
||||
// Check the second match at si+1
|
||||
si += 1
|
||||
offset = si - ref2
|
||||
|
||||
if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
|
||||
// No match. Check the third match at si+2
|
||||
si += 1
|
||||
offset = si - ref3
|
||||
c.put(h, si)
|
||||
|
||||
if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
|
||||
// Skip one extra byte (at si+3) before we check 3 matches again.
|
||||
si += 2 + (si-anchor)>>adaptSkipLog
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Match found.
|
||||
lLen := si - anchor // Literal length.
|
||||
// We already matched 4 bytes.
|
||||
mLen := 4
|
||||
|
||||
// Extend backwards if we can, reducing literals.
|
||||
tOff := si - offset - 1
|
||||
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
|
||||
si--
|
||||
tOff--
|
||||
lLen--
|
||||
mLen++
|
||||
}
|
||||
|
||||
// Add the match length, so we continue search at the end.
|
||||
// Use mLen to store the offset base.
|
||||
si, mLen = si+mLen, si+minMatch
|
||||
|
||||
// Find the longest match by looking by batches of 8 bytes.
|
||||
for si+8 <= sn {
|
||||
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
|
||||
if x == 0 {
|
||||
si += 8
|
||||
} else {
|
||||
// Stop is first non-zero byte.
|
||||
si += bits.TrailingZeros64(x) >> 3
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
mLen = si - mLen
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
if mLen < 0xF {
|
||||
dst[di] = byte(mLen)
|
||||
} else {
|
||||
dst[di] = 0xF
|
||||
}
|
||||
|
||||
// Encode literals length.
|
||||
if lLen < 0xF {
|
||||
dst[di] |= byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] |= 0xF0
|
||||
di++
|
||||
l := lLen - 0xF
|
||||
for ; l >= 0xFF && di < len(dst); l -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di] = byte(l)
|
||||
}
|
||||
di++
|
||||
|
||||
// Literals.
|
||||
if di+lLen > len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
|
||||
di += lLen + 2
|
||||
anchor = si
|
||||
|
||||
// Encode offset.
|
||||
if di > len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
|
||||
|
||||
// Encode match length part 2.
|
||||
if mLen >= 0xF {
|
||||
for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di] = byte(mLen)
|
||||
di++
|
||||
}
|
||||
// Check if we can load next values.
|
||||
if si >= sn {
|
||||
break
|
||||
}
|
||||
// Hash match end-2
|
||||
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
|
||||
c.put(h, si-2)
|
||||
}
|
||||
|
||||
lastLiterals:
|
||||
if isNotCompressible && anchor == 0 {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Last literals.
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
lLen := len(src) - anchor
|
||||
if lLen < 0xF {
|
||||
dst[di] = byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] = 0xF0
|
||||
di++
|
||||
for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
if di >= len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
dst[di] = byte(lLen)
|
||||
}
|
||||
di++
|
||||
|
||||
// Write the last literals.
|
||||
if isNotCompressible && di >= anchor {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
if di+len(src)-anchor > len(dst) {
|
||||
return 0, lz4errors.ErrInvalidSourceShortBuffer
|
||||
}
|
||||
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
|
||||
return di, nil
|
||||
}
|
||||
|
||||
// blockHash hashes 4 bytes into a value < winSize.
|
||||
func blockHashHC(x uint32) uint32 {
|
||||
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
|
||||
return x * hasher >> (32 - winSizeLog)
|
||||
}
|
||||
|
||||
type CompressorHC struct {
|
||||
// hashTable: stores the last position found for a given hash
|
||||
// chainTable: stores previous positions for a given hash
|
||||
hashTable, chainTable [htSize]int
|
||||
needsReset bool
|
||||
}
|
||||
|
||||
var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }}
|
||||
|
||||
func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) {
|
||||
c := compressorHCPool.Get().(*CompressorHC)
|
||||
n, err := c.CompressBlock(src, dst, depth)
|
||||
compressorHCPool.Put(c)
|
||||
return n, err
|
||||
}
|
||||
|
||||
func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) {
|
||||
if c.needsReset {
|
||||
// Zero out reused table to avoid non-deterministic output (issue #65).
|
||||
c.hashTable = [htSize]int{}
|
||||
c.chainTable = [htSize]int{}
|
||||
}
|
||||
c.needsReset = true // Only false on first call.
|
||||
|
||||
defer recoverBlock(&err)
|
||||
|
||||
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
|
||||
isNotCompressible := len(dst) < CompressBlockBound(len(src))
|
||||
|
||||
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
|
||||
// This significantly speeds up incompressible data and usually has very small impact on compression.
|
||||
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
|
||||
const adaptSkipLog = 7
|
||||
|
||||
var si, di, anchor int
|
||||
sn := len(src) - mfLimit
|
||||
if sn <= 0 {
|
||||
goto lastLiterals
|
||||
}
|
||||
|
||||
if depth == 0 {
|
||||
depth = winSize
|
||||
}
|
||||
|
||||
for si < sn {
|
||||
// Hash the next 4 bytes (sequence).
|
||||
match := binary.LittleEndian.Uint32(src[si:])
|
||||
h := blockHashHC(match)
|
||||
|
||||
// Follow the chain until out of window and give the longest match.
|
||||
mLen := 0
|
||||
offset := 0
|
||||
for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 {
|
||||
// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
|
||||
// must match to improve on the match length.
|
||||
if src[next+mLen] != src[si+mLen] {
|
||||
continue
|
||||
}
|
||||
ml := 0
|
||||
// Compare the current position with a previous with the same hash.
|
||||
for ml < sn-si {
|
||||
x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
|
||||
if x == 0 {
|
||||
ml += 8
|
||||
} else {
|
||||
// Stop is first non-zero byte.
|
||||
ml += bits.TrailingZeros64(x) >> 3
|
||||
break
|
||||
}
|
||||
}
|
||||
if ml < minMatch || ml <= mLen {
|
||||
// Match too small (<minMath) or smaller than the current match.
|
||||
continue
|
||||
}
|
||||
// Found a longer match, keep its position and length.
|
||||
mLen = ml
|
||||
offset = si - next
|
||||
// Try another previous position with the same hash.
|
||||
}
|
||||
c.chainTable[si&winMask] = c.hashTable[h]
|
||||
c.hashTable[h] = si
|
||||
|
||||
// No match found.
|
||||
if mLen == 0 {
|
||||
si += 1 + (si-anchor)>>adaptSkipLog
|
||||
continue
|
||||
}
|
||||
|
||||
// Match found.
|
||||
// Update hash/chain tables with overlapping bytes:
|
||||
// si already hashed, add everything from si+1 up to the match length.
|
||||
winStart := si + 1
|
||||
if ws := si + mLen - winSize; ws > winStart {
|
||||
winStart = ws
|
||||
}
|
||||
for si, ml := winStart, si+mLen; si < ml; {
|
||||
match >>= 8
|
||||
match |= uint32(src[si+3]) << 24
|
||||
h := blockHashHC(match)
|
||||
c.chainTable[si&winMask] = c.hashTable[h]
|
||||
c.hashTable[h] = si
|
||||
si++
|
||||
}
|
||||
|
||||
lLen := si - anchor
|
||||
si += mLen
|
||||
mLen -= minMatch // Match length does not include minMatch.
|
||||
|
||||
if mLen < 0xF {
|
||||
dst[di] = byte(mLen)
|
||||
} else {
|
||||
dst[di] = 0xF
|
||||
}
|
||||
|
||||
// Encode literals length.
|
||||
if lLen < 0xF {
|
||||
dst[di] |= byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] |= 0xF0
|
||||
di++
|
||||
l := lLen - 0xF
|
||||
for ; l >= 0xFF; l -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
dst[di] = byte(l)
|
||||
}
|
||||
di++
|
||||
|
||||
// Literals.
|
||||
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
|
||||
di += lLen
|
||||
anchor = si
|
||||
|
||||
// Encode offset.
|
||||
di += 2
|
||||
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
|
||||
|
||||
// Encode match length part 2.
|
||||
if mLen >= 0xF {
|
||||
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
dst[di] = byte(mLen)
|
||||
di++
|
||||
}
|
||||
}
|
||||
|
||||
if isNotCompressible && anchor == 0 {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
// Last literals.
|
||||
lastLiterals:
|
||||
lLen := len(src) - anchor
|
||||
if lLen < 0xF {
|
||||
dst[di] = byte(lLen << 4)
|
||||
} else {
|
||||
dst[di] = 0xF0
|
||||
di++
|
||||
lLen -= 0xF
|
||||
for ; lLen >= 0xFF; lLen -= 0xFF {
|
||||
dst[di] = 0xFF
|
||||
di++
|
||||
}
|
||||
dst[di] = byte(lLen)
|
||||
}
|
||||
di++
|
||||
|
||||
// Write the last literals.
|
||||
if isNotCompressible && di >= anchor {
|
||||
// Incompressible.
|
||||
return 0, nil
|
||||
}
|
||||
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
|
||||
return di, nil
|
||||
}
|
||||
87
vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go
generated
vendored
Normal file
87
vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go
generated
vendored
Normal file
@@ -0,0 +1,87 @@
|
||||
// Package lz4block provides LZ4 BlockSize types and pools of buffers.
|
||||
package lz4block
|
||||
|
||||
import "sync"
|
||||
|
||||
const (
|
||||
Block64Kb uint32 = 1 << (16 + iota*2)
|
||||
Block256Kb
|
||||
Block1Mb
|
||||
Block4Mb
|
||||
Block8Mb = 2 * Block4Mb
|
||||
)
|
||||
|
||||
var (
|
||||
BlockPool64K = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }}
|
||||
BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }}
|
||||
BlockPool1M = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }}
|
||||
BlockPool4M = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }}
|
||||
BlockPool8M = sync.Pool{New: func() interface{} { return make([]byte, Block8Mb) }}
|
||||
)
|
||||
|
||||
func Index(b uint32) BlockSizeIndex {
|
||||
switch b {
|
||||
case Block64Kb:
|
||||
return 4
|
||||
case Block256Kb:
|
||||
return 5
|
||||
case Block1Mb:
|
||||
return 6
|
||||
case Block4Mb:
|
||||
return 7
|
||||
case Block8Mb: // only valid in legacy mode
|
||||
return 3
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
func IsValid(b uint32) bool {
|
||||
return Index(b) > 0
|
||||
}
|
||||
|
||||
type BlockSizeIndex uint8
|
||||
|
||||
func (b BlockSizeIndex) IsValid() bool {
|
||||
switch b {
|
||||
case 4, 5, 6, 7:
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (b BlockSizeIndex) Get() []byte {
|
||||
var buf interface{}
|
||||
switch b {
|
||||
case 4:
|
||||
buf = BlockPool64K.Get()
|
||||
case 5:
|
||||
buf = BlockPool256K.Get()
|
||||
case 6:
|
||||
buf = BlockPool1M.Get()
|
||||
case 7:
|
||||
buf = BlockPool4M.Get()
|
||||
case 3:
|
||||
buf = BlockPool8M.Get()
|
||||
}
|
||||
return buf.([]byte)
|
||||
}
|
||||
|
||||
func Put(buf []byte) {
|
||||
// Safeguard: do not allow invalid buffers.
|
||||
switch c := cap(buf); uint32(c) {
|
||||
case Block64Kb:
|
||||
BlockPool64K.Put(buf[:c])
|
||||
case Block256Kb:
|
||||
BlockPool256K.Put(buf[:c])
|
||||
case Block1Mb:
|
||||
BlockPool1M.Put(buf[:c])
|
||||
case Block4Mb:
|
||||
BlockPool4M.Put(buf[:c])
|
||||
case Block8Mb:
|
||||
BlockPool8M.Put(buf[:c])
|
||||
}
|
||||
}
|
||||
|
||||
type CompressionLevel uint32
|
||||
|
||||
const Fast CompressionLevel = 0
|
||||
448
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s
generated
vendored
Normal file
448
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s
generated
vendored
Normal file
@@ -0,0 +1,448 @@
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// AX scratch
|
||||
// BX scratch
|
||||
// CX literal and match lengths
|
||||
// DX token, match offset
|
||||
//
|
||||
// DI &dst
|
||||
// SI &src
|
||||
// R8 &dst + len(dst)
|
||||
// R9 &src + len(src)
|
||||
// R11 &dst
|
||||
// R12 short output end
|
||||
// R13 short input end
|
||||
// R14 &dict
|
||||
// R15 len(dict)
|
||||
|
||||
// func decodeBlock(dst, src, dict []byte) int
|
||||
TEXT ·decodeBlock(SB), NOSPLIT, $48-80
|
||||
MOVQ dst_base+0(FP), DI
|
||||
MOVQ DI, R11
|
||||
MOVQ dst_len+8(FP), R8
|
||||
ADDQ DI, R8
|
||||
|
||||
MOVQ src_base+24(FP), SI
|
||||
MOVQ src_len+32(FP), R9
|
||||
CMPQ R9, $0
|
||||
JE err_corrupt
|
||||
ADDQ SI, R9
|
||||
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
|
||||
// shortcut ends
|
||||
// short output end
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
// short input end
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
|
||||
XORL CX, CX
|
||||
|
||||
loop:
|
||||
// token := uint32(src[si])
|
||||
MOVBLZX (SI), DX
|
||||
INCQ SI
|
||||
|
||||
// lit_len = token >> 4
|
||||
// if lit_len > 0
|
||||
// CX = lit_len
|
||||
MOVL DX, CX
|
||||
SHRL $4, CX
|
||||
|
||||
// if lit_len != 0xF
|
||||
CMPL CX, $0xF
|
||||
JEQ lit_len_loop
|
||||
CMPQ DI, R12
|
||||
JAE copy_literal
|
||||
CMPQ SI, R13
|
||||
JAE copy_literal
|
||||
|
||||
// copy shortcut
|
||||
|
||||
// A two-stage shortcut for the most common case:
|
||||
// 1) If the literal length is 0..14, and there is enough space,
|
||||
// enter the shortcut and copy 16 bytes on behalf of the literals
|
||||
// (in the fast mode, only 8 bytes can be safely copied this way).
|
||||
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
|
||||
// manner; but we ensure that there's enough space in the output for
|
||||
// those 18 bytes earlier, upon entering the shortcut (in other words,
|
||||
// there is a combined check for both stages).
|
||||
|
||||
// copy literal
|
||||
MOVOU (SI), X0
|
||||
MOVOU X0, (DI)
|
||||
ADDQ CX, DI
|
||||
ADDQ CX, SI
|
||||
|
||||
MOVL DX, CX
|
||||
ANDL $0xF, CX
|
||||
|
||||
// The second stage: prepare for match copying, decode full info.
|
||||
// If it doesn't work out, the info won't be wasted.
|
||||
// offset := uint16(data[:2])
|
||||
MOVWLZX (SI), DX
|
||||
TESTL DX, DX
|
||||
JE err_corrupt
|
||||
ADDQ $2, SI
|
||||
JC err_short_buf
|
||||
|
||||
MOVQ DI, AX
|
||||
SUBQ DX, AX
|
||||
JC err_corrupt
|
||||
CMPQ AX, DI
|
||||
JA err_short_buf
|
||||
|
||||
// if we can't do the second stage then jump straight to read the
|
||||
// match length, we already have the offset.
|
||||
CMPL CX, $0xF
|
||||
JEQ match_len_loop_pre
|
||||
CMPL DX, $8
|
||||
JLT match_len_loop_pre
|
||||
CMPQ AX, R11
|
||||
JB match_len_loop_pre
|
||||
|
||||
// memcpy(op + 0, match + 0, 8);
|
||||
MOVQ (AX), BX
|
||||
MOVQ BX, (DI)
|
||||
// memcpy(op + 8, match + 8, 8);
|
||||
MOVQ 8(AX), BX
|
||||
MOVQ BX, 8(DI)
|
||||
// memcpy(op +16, match +16, 2);
|
||||
MOVW 16(AX), BX
|
||||
MOVW BX, 16(DI)
|
||||
|
||||
LEAQ const_minMatch(DI)(CX*1), DI
|
||||
|
||||
// shortcut complete, load next token
|
||||
JMP loopcheck
|
||||
|
||||
// Read the rest of the literal length:
|
||||
// do { BX = src[si++]; lit_len += BX } while (BX == 0xFF).
|
||||
lit_len_loop:
|
||||
CMPQ SI, R9
|
||||
JAE err_short_buf
|
||||
|
||||
MOVBLZX (SI), BX
|
||||
INCQ SI
|
||||
ADDQ BX, CX
|
||||
|
||||
CMPB BX, $0xFF
|
||||
JE lit_len_loop
|
||||
|
||||
copy_literal:
|
||||
// bounds check src and dst
|
||||
MOVQ SI, AX
|
||||
ADDQ CX, AX
|
||||
JC err_short_buf
|
||||
CMPQ AX, R9
|
||||
JA err_short_buf
|
||||
|
||||
MOVQ DI, BX
|
||||
ADDQ CX, BX
|
||||
JC err_short_buf
|
||||
CMPQ BX, R8
|
||||
JA err_short_buf
|
||||
|
||||
// Copy literals of <=48 bytes through the XMM registers.
|
||||
CMPQ CX, $48
|
||||
JGT memmove_lit
|
||||
|
||||
// if len(dst[di:]) < 48
|
||||
MOVQ R8, AX
|
||||
SUBQ DI, AX
|
||||
CMPQ AX, $48
|
||||
JLT memmove_lit
|
||||
|
||||
// if len(src[si:]) < 48
|
||||
MOVQ R9, BX
|
||||
SUBQ SI, BX
|
||||
CMPQ BX, $48
|
||||
JLT memmove_lit
|
||||
|
||||
MOVOU (SI), X0
|
||||
MOVOU 16(SI), X1
|
||||
MOVOU 32(SI), X2
|
||||
MOVOU X0, (DI)
|
||||
MOVOU X1, 16(DI)
|
||||
MOVOU X2, 32(DI)
|
||||
|
||||
ADDQ CX, SI
|
||||
ADDQ CX, DI
|
||||
|
||||
JMP finish_lit_copy
|
||||
|
||||
memmove_lit:
|
||||
// memmove(to, from, len)
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ SI, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
|
||||
// Spill registers. Increment SI, DI now so we don't need to save CX.
|
||||
ADDQ CX, DI
|
||||
ADDQ CX, SI
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
MOVL DX, 40(SP)
|
||||
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// restore registers
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
MOVL 40(SP), DX
|
||||
|
||||
// recalc initial values
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ R8, R11
|
||||
ADDQ dst_len+8(FP), R8
|
||||
MOVQ src_base+24(FP), R9
|
||||
ADDQ src_len+32(FP), R9
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
|
||||
finish_lit_copy:
|
||||
// CX := mLen
|
||||
// free up DX to use for offset
|
||||
MOVL DX, CX
|
||||
ANDL $0xF, CX
|
||||
|
||||
CMPQ SI, R9
|
||||
JAE end
|
||||
|
||||
// offset
|
||||
// si += 2
|
||||
// DX := int(src[si-2]) | int(src[si-1])<<8
|
||||
ADDQ $2, SI
|
||||
JC err_short_buf
|
||||
CMPQ SI, R9
|
||||
JA err_short_buf
|
||||
MOVWQZX -2(SI), DX
|
||||
|
||||
// 0 offset is invalid
|
||||
TESTL DX, DX
|
||||
JEQ err_corrupt
|
||||
|
||||
match_len_loop_pre:
|
||||
// if mlen != 0xF
|
||||
CMPB CX, $0xF
|
||||
JNE copy_match
|
||||
|
||||
// do { BX = src[si++]; mlen += BX } while (BX == 0xFF).
|
||||
match_len_loop:
|
||||
CMPQ SI, R9
|
||||
JAE err_short_buf
|
||||
|
||||
MOVBLZX (SI), BX
|
||||
INCQ SI
|
||||
ADDQ BX, CX
|
||||
|
||||
CMPB BX, $0xFF
|
||||
JE match_len_loop
|
||||
|
||||
copy_match:
|
||||
ADDQ $const_minMatch, CX
|
||||
|
||||
// check we have match_len bytes left in dst
|
||||
// di+match_len < len(dst)
|
||||
MOVQ DI, AX
|
||||
ADDQ CX, AX
|
||||
JC err_short_buf
|
||||
CMPQ AX, R8
|
||||
JA err_short_buf
|
||||
|
||||
// DX = offset
|
||||
// CX = match_len
|
||||
// BX = &dst + (di - offset)
|
||||
MOVQ DI, BX
|
||||
SUBQ DX, BX
|
||||
|
||||
// check BX is within dst
|
||||
// if BX < &dst
|
||||
JC copy_match_from_dict
|
||||
CMPQ BX, R11
|
||||
JBE copy_match_from_dict
|
||||
|
||||
// if offset + match_len < di
|
||||
LEAQ (BX)(CX*1), AX
|
||||
CMPQ DI, AX
|
||||
JA copy_interior_match
|
||||
|
||||
// AX := len(dst[:di])
|
||||
// MOVQ DI, AX
|
||||
// SUBQ R11, AX
|
||||
|
||||
// copy 16 bytes at a time
|
||||
// if di-offset < 16 copy 16-(di-offset) bytes to di
|
||||
// then do the remaining
|
||||
|
||||
copy_match_loop:
|
||||
// for match_len >= 0
|
||||
// dst[di] = dst[i]
|
||||
// di++
|
||||
// i++
|
||||
MOVB (BX), AX
|
||||
MOVB AX, (DI)
|
||||
INCQ DI
|
||||
INCQ BX
|
||||
DECQ CX
|
||||
JNZ copy_match_loop
|
||||
|
||||
JMP loopcheck
|
||||
|
||||
copy_interior_match:
|
||||
CMPQ CX, $16
|
||||
JGT memmove_match
|
||||
|
||||
// if len(dst[di:]) < 16
|
||||
MOVQ R8, AX
|
||||
SUBQ DI, AX
|
||||
CMPQ AX, $16
|
||||
JLT memmove_match
|
||||
|
||||
MOVOU (BX), X0
|
||||
MOVOU X0, (DI)
|
||||
|
||||
ADDQ CX, DI
|
||||
XORL CX, CX
|
||||
JMP loopcheck
|
||||
|
||||
copy_match_from_dict:
|
||||
// CX = match_len
|
||||
// BX = &dst + (di - offset)
|
||||
|
||||
// AX = offset - di = dict_bytes_available => count of bytes potentially covered by the dictionary
|
||||
MOVQ R11, AX
|
||||
SUBQ BX, AX
|
||||
|
||||
// BX = len(dict) - dict_bytes_available
|
||||
MOVQ R15, BX
|
||||
SUBQ AX, BX
|
||||
JS err_short_dict
|
||||
|
||||
ADDQ R14, BX
|
||||
|
||||
// if match_len > dict_bytes_available, match fits entirely within external dictionary : just copy
|
||||
CMPQ CX, AX
|
||||
JLT memmove_match
|
||||
|
||||
// The match stretches over the dictionary and our block
|
||||
// 1) copy what comes from the dictionary
|
||||
// AX = dict_bytes_available = copy_size
|
||||
// BX = &dict_end - copy_size
|
||||
// CX = match_len
|
||||
|
||||
// memmove(to, from, len)
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ BX, 8(SP)
|
||||
MOVQ AX, 16(SP)
|
||||
// store extra stuff we want to recover
|
||||
// spill
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
MOVQ CX, 40(SP)
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// restore registers
|
||||
MOVQ 16(SP), AX // copy_size
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
MOVQ 40(SP), CX // match_len
|
||||
|
||||
// recalc initial values
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ R8, R11 // TODO: make these sensible numbers
|
||||
ADDQ dst_len+8(FP), R8
|
||||
MOVQ src_base+24(FP), R9
|
||||
ADDQ src_len+32(FP), R9
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
|
||||
// di+=copy_size
|
||||
ADDQ AX, DI
|
||||
|
||||
// 2) copy the rest from the current block
|
||||
// CX = match_len - copy_size = rest_size
|
||||
SUBQ AX, CX
|
||||
MOVQ R11, BX
|
||||
|
||||
// check if we have a copy overlap
|
||||
// AX = &dst + rest_size
|
||||
MOVQ CX, AX
|
||||
ADDQ BX, AX
|
||||
// if &dst + rest_size > di, copy byte by byte
|
||||
CMPQ AX, DI
|
||||
|
||||
JA copy_match_loop
|
||||
|
||||
memmove_match:
|
||||
// memmove(to, from, len)
|
||||
MOVQ DI, 0(SP)
|
||||
MOVQ BX, 8(SP)
|
||||
MOVQ CX, 16(SP)
|
||||
|
||||
// Spill registers. Increment DI now so we don't need to save CX.
|
||||
ADDQ CX, DI
|
||||
MOVQ DI, 24(SP)
|
||||
MOVQ SI, 32(SP)
|
||||
|
||||
CALL runtime·memmove(SB)
|
||||
|
||||
// restore registers
|
||||
MOVQ 24(SP), DI
|
||||
MOVQ 32(SP), SI
|
||||
|
||||
// recalc initial values
|
||||
MOVQ dst_base+0(FP), R8
|
||||
MOVQ R8, R11 // TODO: make these sensible numbers
|
||||
ADDQ dst_len+8(FP), R8
|
||||
MOVQ src_base+24(FP), R9
|
||||
ADDQ src_len+32(FP), R9
|
||||
MOVQ R8, R12
|
||||
SUBQ $32, R12
|
||||
MOVQ R9, R13
|
||||
SUBQ $16, R13
|
||||
MOVQ dict_base+48(FP), R14
|
||||
MOVQ dict_len+56(FP), R15
|
||||
XORL CX, CX
|
||||
|
||||
loopcheck:
|
||||
// for si < len(src)
|
||||
CMPQ SI, R9
|
||||
JB loop
|
||||
|
||||
end:
|
||||
// Remaining length must be zero.
|
||||
TESTQ CX, CX
|
||||
JNE err_corrupt
|
||||
|
||||
SUBQ R11, DI
|
||||
MOVQ DI, ret+72(FP)
|
||||
RET
|
||||
|
||||
err_corrupt:
|
||||
MOVQ $-1, ret+72(FP)
|
||||
RET
|
||||
|
||||
err_short_buf:
|
||||
MOVQ $-2, ret+72(FP)
|
||||
RET
|
||||
|
||||
err_short_dict:
|
||||
MOVQ $-3, ret+72(FP)
|
||||
RET
|
||||
231
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s
generated
vendored
Normal file
231
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s
generated
vendored
Normal file
@@ -0,0 +1,231 @@
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation.
|
||||
#define dst R0
|
||||
#define dstorig R1
|
||||
#define src R2
|
||||
#define dstend R3
|
||||
#define srcend R4
|
||||
#define match R5 // Match address.
|
||||
#define dictend R6
|
||||
#define token R7
|
||||
#define len R8 // Literal and match lengths.
|
||||
#define offset R7 // Match offset; overlaps with token.
|
||||
#define tmp1 R9
|
||||
#define tmp2 R11
|
||||
#define tmp3 R12
|
||||
|
||||
// func decodeBlock(dst, src, dict []byte) int
|
||||
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $-4-40
|
||||
MOVW dst_base +0(FP), dst
|
||||
MOVW dst_len +4(FP), dstend
|
||||
MOVW src_base +12(FP), src
|
||||
MOVW src_len +16(FP), srcend
|
||||
|
||||
CMP $0, srcend
|
||||
BEQ shortSrc
|
||||
|
||||
ADD dst, dstend
|
||||
ADD src, srcend
|
||||
|
||||
MOVW dst, dstorig
|
||||
|
||||
loop:
|
||||
// Read token. Extract literal length.
|
||||
MOVBU.P 1(src), token
|
||||
MOVW token >> 4, len
|
||||
CMP $15, len
|
||||
BNE readLitlenDone
|
||||
|
||||
readLitlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADD.S tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readLitlenLoop
|
||||
|
||||
readLitlenDone:
|
||||
CMP $0, len
|
||||
BEQ copyLiteralDone
|
||||
|
||||
// Bounds check dst+len and src+len.
|
||||
ADD.S dst, len, tmp1
|
||||
ADD.CC.S src, len, tmp2
|
||||
BCS shortSrc
|
||||
CMP dstend, tmp1
|
||||
//BHI shortDst // Uncomment for distinct error codes.
|
||||
CMP.LS srcend, tmp2
|
||||
BHI shortSrc
|
||||
|
||||
// Copy literal.
|
||||
CMP $4, len
|
||||
BLO copyLiteralFinish
|
||||
|
||||
// Copy 0-3 bytes until src is aligned.
|
||||
TST $1, src
|
||||
MOVBU.NE.P 1(src), tmp1
|
||||
MOVB.NE.P tmp1, 1(dst)
|
||||
SUB.NE $1, len
|
||||
|
||||
TST $2, src
|
||||
MOVHU.NE.P 2(src), tmp2
|
||||
MOVB.NE.P tmp2, 1(dst)
|
||||
MOVW.NE tmp2 >> 8, tmp1
|
||||
MOVB.NE.P tmp1, 1(dst)
|
||||
SUB.NE $2, len
|
||||
|
||||
B copyLiteralLoopCond
|
||||
|
||||
copyLiteralLoop:
|
||||
// Aligned load, unaligned write.
|
||||
MOVW.P 4(src), tmp1
|
||||
MOVW tmp1 >> 8, tmp2
|
||||
MOVB tmp2, 1(dst)
|
||||
MOVW tmp1 >> 16, tmp3
|
||||
MOVB tmp3, 2(dst)
|
||||
MOVW tmp1 >> 24, tmp2
|
||||
MOVB tmp2, 3(dst)
|
||||
MOVB.P tmp1, 4(dst)
|
||||
copyLiteralLoopCond:
|
||||
// Loop until len-4 < 0.
|
||||
SUB.S $4, len
|
||||
BPL copyLiteralLoop
|
||||
|
||||
copyLiteralFinish:
|
||||
// Copy remaining 0-3 bytes.
|
||||
// At this point, len may be < 0, but len&3 is still accurate.
|
||||
TST $1, len
|
||||
MOVB.NE.P 1(src), tmp3
|
||||
MOVB.NE.P tmp3, 1(dst)
|
||||
TST $2, len
|
||||
MOVB.NE.P 2(src), tmp1
|
||||
MOVB.NE.P tmp1, 2(dst)
|
||||
MOVB.NE -1(src), tmp2
|
||||
MOVB.NE tmp2, -1(dst)
|
||||
|
||||
copyLiteralDone:
|
||||
// Initial part of match length.
|
||||
// This frees up the token register for reuse as offset.
|
||||
AND $15, token, len
|
||||
|
||||
CMP src, srcend
|
||||
BEQ end
|
||||
|
||||
// Read offset.
|
||||
ADD.S $2, src
|
||||
BCS shortSrc
|
||||
CMP srcend, src
|
||||
BHI shortSrc
|
||||
MOVBU -2(src), offset
|
||||
MOVBU -1(src), tmp1
|
||||
ORR.S tmp1 << 8, offset
|
||||
BEQ corrupt
|
||||
|
||||
// Read rest of match length.
|
||||
CMP $15, len
|
||||
BNE readMatchlenDone
|
||||
|
||||
readMatchlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADD.S tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readMatchlenLoop
|
||||
|
||||
readMatchlenDone:
|
||||
// Bounds check dst+len+minMatch.
|
||||
ADD.S dst, len, tmp1
|
||||
ADD.CC.S $const_minMatch, tmp1
|
||||
BCS shortDst
|
||||
CMP dstend, tmp1
|
||||
BHI shortDst
|
||||
|
||||
RSB dst, offset, match
|
||||
CMP dstorig, match
|
||||
BGE copyMatch4
|
||||
|
||||
// match < dstorig means the match starts in the dictionary,
|
||||
// at len(dict) - offset + (dst - dstorig).
|
||||
MOVW dict_base+24(FP), match
|
||||
MOVW dict_len +28(FP), dictend
|
||||
|
||||
ADD $const_minMatch, len
|
||||
|
||||
RSB dst, dstorig, tmp1
|
||||
RSB dictend, offset, tmp2
|
||||
ADD.S tmp2, tmp1
|
||||
BMI shortDict
|
||||
ADD match, dictend
|
||||
ADD tmp1, match
|
||||
|
||||
copyDict:
|
||||
MOVBU.P 1(match), tmp1
|
||||
MOVB.P tmp1, 1(dst)
|
||||
SUB.S $1, len
|
||||
CMP.NE match, dictend
|
||||
BNE copyDict
|
||||
|
||||
// If the match extends beyond the dictionary, the rest is at dstorig.
|
||||
CMP $0, len
|
||||
BEQ copyMatchDone
|
||||
MOVW dstorig, match
|
||||
B copyMatch
|
||||
|
||||
// Copy a regular match.
|
||||
// Since len+minMatch is at least four, we can do a 4× unrolled
|
||||
// byte copy loop. Using MOVW instead of four byte loads is faster,
|
||||
// but to remain portable we'd have to align match first, which is
|
||||
// too expensive. By alternating loads and stores, we also handle
|
||||
// the case offset < 4.
|
||||
copyMatch4:
|
||||
SUB.S $4, len
|
||||
MOVBU.P 4(match), tmp1
|
||||
MOVB.P tmp1, 4(dst)
|
||||
MOVBU -3(match), tmp2
|
||||
MOVB tmp2, -3(dst)
|
||||
MOVBU -2(match), tmp3
|
||||
MOVB tmp3, -2(dst)
|
||||
MOVBU -1(match), tmp1
|
||||
MOVB tmp1, -1(dst)
|
||||
BPL copyMatch4
|
||||
|
||||
// Restore len, which is now negative.
|
||||
ADD.S $4, len
|
||||
BEQ copyMatchDone
|
||||
|
||||
copyMatch:
|
||||
// Finish with a byte-at-a-time copy.
|
||||
SUB.S $1, len
|
||||
MOVBU.P 1(match), tmp2
|
||||
MOVB.P tmp2, 1(dst)
|
||||
BNE copyMatch
|
||||
|
||||
copyMatchDone:
|
||||
CMP src, srcend
|
||||
BNE loop
|
||||
|
||||
end:
|
||||
CMP $0, len
|
||||
BNE corrupt
|
||||
SUB dstorig, dst, tmp1
|
||||
MOVW tmp1, ret+36(FP)
|
||||
RET
|
||||
|
||||
// The error cases have distinct labels so we can put different
|
||||
// return codes here when debugging, or if the error returns need to
|
||||
// be changed.
|
||||
shortDict:
|
||||
shortDst:
|
||||
shortSrc:
|
||||
corrupt:
|
||||
MOVW $-1, tmp1
|
||||
MOVW tmp1, ret+36(FP)
|
||||
RET
|
||||
241
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm64.s
generated
vendored
Normal file
241
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm64.s
generated
vendored
Normal file
@@ -0,0 +1,241 @@
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
// This implementation assumes that strict alignment checking is turned off.
|
||||
// The Go compiler makes the same assumption.
|
||||
|
||||
#include "go_asm.h"
|
||||
#include "textflag.h"
|
||||
|
||||
// Register allocation.
|
||||
#define dst R0
|
||||
#define dstorig R1
|
||||
#define src R2
|
||||
#define dstend R3
|
||||
#define dstend16 R4 // dstend - 16
|
||||
#define srcend R5
|
||||
#define srcend16 R6 // srcend - 16
|
||||
#define match R7 // Match address.
|
||||
#define dict R8
|
||||
#define dictlen R9
|
||||
#define dictend R10
|
||||
#define token R11
|
||||
#define len R12 // Literal and match lengths.
|
||||
#define lenRem R13
|
||||
#define offset R14 // Match offset.
|
||||
#define tmp1 R15
|
||||
#define tmp2 R16
|
||||
#define tmp3 R17
|
||||
#define tmp4 R19
|
||||
|
||||
// func decodeBlock(dst, src, dict []byte) int
|
||||
TEXT ·decodeBlock(SB), NOFRAME+NOSPLIT, $0-80
|
||||
LDP dst_base+0(FP), (dst, dstend)
|
||||
ADD dst, dstend
|
||||
MOVD dst, dstorig
|
||||
|
||||
LDP src_base+24(FP), (src, srcend)
|
||||
CBZ srcend, shortSrc
|
||||
ADD src, srcend
|
||||
|
||||
// dstend16 = max(dstend-16, 0) and similarly for srcend16.
|
||||
SUBS $16, dstend, dstend16
|
||||
CSEL LO, ZR, dstend16, dstend16
|
||||
SUBS $16, srcend, srcend16
|
||||
CSEL LO, ZR, srcend16, srcend16
|
||||
|
||||
LDP dict_base+48(FP), (dict, dictlen)
|
||||
ADD dict, dictlen, dictend
|
||||
|
||||
loop:
|
||||
// Read token. Extract literal length.
|
||||
MOVBU.P 1(src), token
|
||||
LSR $4, token, len
|
||||
CMP $15, len
|
||||
BNE readLitlenDone
|
||||
|
||||
readLitlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADDS tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readLitlenLoop
|
||||
|
||||
readLitlenDone:
|
||||
CBZ len, copyLiteralDone
|
||||
|
||||
// Bounds check dst+len and src+len.
|
||||
ADDS dst, len, tmp1
|
||||
BCS shortSrc
|
||||
ADDS src, len, tmp2
|
||||
BCS shortSrc
|
||||
CMP dstend, tmp1
|
||||
BHI shortDst
|
||||
CMP srcend, tmp2
|
||||
BHI shortSrc
|
||||
|
||||
// Copy literal.
|
||||
SUBS $16, len
|
||||
BLO copyLiteralShort
|
||||
|
||||
copyLiteralLoop:
|
||||
LDP.P 16(src), (tmp1, tmp2)
|
||||
STP.P (tmp1, tmp2), 16(dst)
|
||||
SUBS $16, len
|
||||
BPL copyLiteralLoop
|
||||
|
||||
// Copy (final part of) literal of length 0-15.
|
||||
// If we have >=16 bytes left in src and dst, just copy 16 bytes.
|
||||
copyLiteralShort:
|
||||
CMP dstend16, dst
|
||||
CCMP LO, src, srcend16, $0b0010 // 0010 = preserve carry (LO).
|
||||
BHS copyLiteralShortEnd
|
||||
|
||||
AND $15, len
|
||||
|
||||
LDP (src), (tmp1, tmp2)
|
||||
ADD len, src
|
||||
STP (tmp1, tmp2), (dst)
|
||||
ADD len, dst
|
||||
|
||||
B copyLiteralDone
|
||||
|
||||
// Safe but slow copy near the end of src, dst.
|
||||
copyLiteralShortEnd:
|
||||
TBZ $3, len, 3(PC)
|
||||
MOVD.P 8(src), tmp1
|
||||
MOVD.P tmp1, 8(dst)
|
||||
TBZ $2, len, 3(PC)
|
||||
MOVW.P 4(src), tmp2
|
||||
MOVW.P tmp2, 4(dst)
|
||||
TBZ $1, len, 3(PC)
|
||||
MOVH.P 2(src), tmp3
|
||||
MOVH.P tmp3, 2(dst)
|
||||
TBZ $0, len, 3(PC)
|
||||
MOVBU.P 1(src), tmp4
|
||||
MOVB.P tmp4, 1(dst)
|
||||
|
||||
copyLiteralDone:
|
||||
// Initial part of match length.
|
||||
AND $15, token, len
|
||||
|
||||
CMP src, srcend
|
||||
BEQ end
|
||||
|
||||
// Read offset.
|
||||
ADDS $2, src
|
||||
BCS shortSrc
|
||||
CMP srcend, src
|
||||
BHI shortSrc
|
||||
MOVHU -2(src), offset
|
||||
CBZ offset, corrupt
|
||||
|
||||
// Read rest of match length.
|
||||
CMP $15, len
|
||||
BNE readMatchlenDone
|
||||
|
||||
readMatchlenLoop:
|
||||
CMP src, srcend
|
||||
BEQ shortSrc
|
||||
MOVBU.P 1(src), tmp1
|
||||
ADDS tmp1, len
|
||||
BVS shortDst
|
||||
CMP $255, tmp1
|
||||
BEQ readMatchlenLoop
|
||||
|
||||
readMatchlenDone:
|
||||
ADD $const_minMatch, len
|
||||
|
||||
// Bounds check dst+len.
|
||||
ADDS dst, len, tmp2
|
||||
BCS shortDst
|
||||
CMP dstend, tmp2
|
||||
BHI shortDst
|
||||
|
||||
SUB offset, dst, match
|
||||
CMP dstorig, match
|
||||
BHS copyMatchTry8
|
||||
|
||||
// match < dstorig means the match starts in the dictionary,
|
||||
// at len(dict) - offset + (dst - dstorig).
|
||||
SUB dstorig, dst, tmp1
|
||||
SUB offset, dictlen, tmp2
|
||||
ADDS tmp2, tmp1
|
||||
BMI shortDict
|
||||
ADD dict, tmp1, match
|
||||
|
||||
copyDict:
|
||||
MOVBU.P 1(match), tmp3
|
||||
MOVB.P tmp3, 1(dst)
|
||||
SUBS $1, len
|
||||
CCMP NE, dictend, match, $0b0100 // 0100 sets the Z (EQ) flag.
|
||||
BNE copyDict
|
||||
|
||||
CBZ len, copyMatchDone
|
||||
|
||||
// If the match extends beyond the dictionary, the rest is at dstorig.
|
||||
// Recompute the offset for the next check.
|
||||
MOVD dstorig, match
|
||||
SUB dstorig, dst, offset
|
||||
|
||||
copyMatchTry8:
|
||||
// Copy doublewords if both len and offset are at least eight.
|
||||
// A 16-at-a-time loop doesn't provide a further speedup.
|
||||
CMP $8, len
|
||||
CCMP HS, offset, $8, $0
|
||||
BLO copyMatchTry4
|
||||
|
||||
AND $7, len, lenRem
|
||||
SUB $8, len
|
||||
copyMatchLoop8:
|
||||
MOVD.P 8(match), tmp1
|
||||
MOVD.P tmp1, 8(dst)
|
||||
SUBS $8, len
|
||||
BPL copyMatchLoop8
|
||||
|
||||
MOVD (match)(len), tmp2 // match+len == match+lenRem-8.
|
||||
ADD lenRem, dst
|
||||
MOVD $0, len
|
||||
MOVD tmp2, -8(dst)
|
||||
B copyMatchDone
|
||||
|
||||
copyMatchTry4:
|
||||
// Copy words if both len and offset are at least four.
|
||||
CMP $4, len
|
||||
CCMP HS, offset, $4, $0
|
||||
BLO copyMatchLoop1
|
||||
|
||||
MOVWU.P 4(match), tmp2
|
||||
MOVWU.P tmp2, 4(dst)
|
||||
SUBS $4, len
|
||||
BEQ copyMatchDone
|
||||
|
||||
copyMatchLoop1:
|
||||
// Byte-at-a-time copy for small offsets <= 3.
|
||||
MOVBU.P 1(match), tmp2
|
||||
MOVB.P tmp2, 1(dst)
|
||||
SUBS $1, len
|
||||
BNE copyMatchLoop1
|
||||
|
||||
copyMatchDone:
|
||||
CMP src, srcend
|
||||
BNE loop
|
||||
|
||||
end:
|
||||
CBNZ len, corrupt
|
||||
SUB dstorig, dst, tmp1
|
||||
MOVD tmp1, ret+72(FP)
|
||||
RET
|
||||
|
||||
// The error cases have distinct labels so we can put different
|
||||
// return codes here when debugging, or if the error returns need to
|
||||
// be changed.
|
||||
shortDict:
|
||||
shortDst:
|
||||
shortSrc:
|
||||
corrupt:
|
||||
MOVD $-1, tmp1
|
||||
MOVD tmp1, ret+72(FP)
|
||||
RET
|
||||
10
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go
generated
vendored
Normal file
10
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
//go:build (amd64 || arm || arm64) && !appengine && gc && !noasm
|
||||
// +build amd64 arm arm64
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
package lz4block
|
||||
|
||||
//go:noescape
|
||||
func decodeBlock(dst, src, dict []byte) int
|
||||
139
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go
generated
vendored
Normal file
139
vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go
generated
vendored
Normal file
@@ -0,0 +1,139 @@
|
||||
//go:build (!amd64 && !arm && !arm64) || appengine || !gc || noasm
|
||||
// +build !amd64,!arm,!arm64 appengine !gc noasm
|
||||
|
||||
package lz4block
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
)
|
||||
|
||||
func decodeBlock(dst, src, dict []byte) (ret int) {
|
||||
// Restrict capacities so we don't read or write out of bounds.
|
||||
dst = dst[:len(dst):len(dst)]
|
||||
src = src[:len(src):len(src)]
|
||||
|
||||
const hasError = -2
|
||||
|
||||
if len(src) == 0 {
|
||||
return hasError
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if recover() != nil {
|
||||
ret = hasError
|
||||
}
|
||||
}()
|
||||
|
||||
var si, di uint
|
||||
for si < uint(len(src)) {
|
||||
// Literals and match lengths (token).
|
||||
b := uint(src[si])
|
||||
si++
|
||||
|
||||
// Literals.
|
||||
if lLen := b >> 4; lLen > 0 {
|
||||
switch {
|
||||
case lLen < 0xF && si+16 < uint(len(src)):
|
||||
// Shortcut 1
|
||||
// if we have enough room in src and dst, and the literals length
|
||||
// is small enough (0..14) then copy all 16 bytes, even if not all
|
||||
// are part of the literals.
|
||||
copy(dst[di:], src[si:si+16])
|
||||
si += lLen
|
||||
di += lLen
|
||||
if mLen := b & 0xF; mLen < 0xF {
|
||||
// Shortcut 2
|
||||
// if the match length (4..18) fits within the literals, then copy
|
||||
// all 18 bytes, even if not all are part of the literals.
|
||||
mLen += 4
|
||||
if offset := u16(src[si:]); mLen <= offset && offset < di {
|
||||
i := di - offset
|
||||
// The remaining buffer may not hold 18 bytes.
|
||||
// See https://github.com/pierrec/lz4/issues/51.
|
||||
if end := i + 18; end <= uint(len(dst)) {
|
||||
copy(dst[di:], dst[i:end])
|
||||
si += 2
|
||||
di += mLen
|
||||
continue
|
||||
}
|
||||
}
|
||||
}
|
||||
case lLen == 0xF:
|
||||
for {
|
||||
x := uint(src[si])
|
||||
if lLen += x; int(lLen) < 0 {
|
||||
return hasError
|
||||
}
|
||||
si++
|
||||
if x != 0xFF {
|
||||
break
|
||||
}
|
||||
}
|
||||
fallthrough
|
||||
default:
|
||||
copy(dst[di:di+lLen], src[si:si+lLen])
|
||||
si += lLen
|
||||
di += lLen
|
||||
}
|
||||
}
|
||||
|
||||
mLen := b & 0xF
|
||||
if si == uint(len(src)) && mLen == 0 {
|
||||
break
|
||||
} else if si >= uint(len(src)) {
|
||||
return hasError
|
||||
}
|
||||
|
||||
offset := u16(src[si:])
|
||||
if offset == 0 {
|
||||
return hasError
|
||||
}
|
||||
si += 2
|
||||
|
||||
// Match.
|
||||
mLen += minMatch
|
||||
if mLen == minMatch+0xF {
|
||||
for {
|
||||
x := uint(src[si])
|
||||
if mLen += x; int(mLen) < 0 {
|
||||
return hasError
|
||||
}
|
||||
si++
|
||||
if x != 0xFF {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the match.
|
||||
if di < offset {
|
||||
// The match is beyond our block, meaning the first part
|
||||
// is in the dictionary.
|
||||
fromDict := dict[uint(len(dict))+di-offset:]
|
||||
n := uint(copy(dst[di:di+mLen], fromDict))
|
||||
di += n
|
||||
if mLen -= n; mLen == 0 {
|
||||
continue
|
||||
}
|
||||
// We copied n = offset-di bytes from the dictionary,
|
||||
// then set di = di+n = offset, so the following code
|
||||
// copies from dst[di-offset:] = dst[0:].
|
||||
}
|
||||
|
||||
expanded := dst[di-offset:]
|
||||
if mLen > offset {
|
||||
// Efficiently copy the match dst[di-offset:di] into the dst slice.
|
||||
bytesToCopy := offset * (mLen / offset)
|
||||
for n := offset; n <= bytesToCopy+offset; n *= 2 {
|
||||
copy(expanded[n:], expanded[:n])
|
||||
}
|
||||
di += bytesToCopy
|
||||
mLen -= bytesToCopy
|
||||
}
|
||||
di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
|
||||
}
|
||||
|
||||
return int(di)
|
||||
}
|
||||
|
||||
func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) }
|
||||
Reference in New Issue
Block a user