1
0
Fork 0
forked from forgejo/forgejo

Vendor Update Go Libs (#13444)

* denisenkom/go-mssqldb untagged -> v0.9.0

* github.com/editorconfig/editorconfig-core-go v2.3.7 -> v2.3.8

* github.com/go-testfixtures/testfixtures v3.4.0 -> v3.4.1

* github.com/mholt/archiver v3.3.2 -> v3.5.0

* github.com/olivere/elastic v7.0.20 -> v7.0.21

* github.com/urfave/cli v1.22.4 -> v1.22.5

* github.com/xanzy/go-gitlab v0.38.1 -> v0.39.0

* github.com/yuin/goldmark-meta untagged -> v1.0.0

* github.com/ethantkoenig/rupture 0a76f03a811a -> c3b3b810dc77

* github.com/jaytaylor/html2text 8fb95d837f7d -> 3577fbdbcff7

* github.com/kballard/go-shellquote cd60e84ee657 -> 95032a82bc51

* github.com/msteinert/pam 02ccfbfaf0cc -> 913b8f8cdf8b

* github.com/unknwon/paginater 7748a72e0141 -> 042474bd0eae

* CI.restart()

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
6543 2020-11-06 19:41:42 +01:00 committed by GitHub
parent eebaa81f43
commit 30ce3731a1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
184 changed files with 12387 additions and 2975 deletions

View file

@ -0,0 +1,469 @@
package lz4block
import (
"encoding/binary"
"math/bits"
"sync"
"github.com/pierrec/lz4/v4/internal/lz4errors"
)
const (
// The following constants are used to setup the compression algorithm.
minMatch = 4 // the minimum size of the match sequence size (4 bytes)
winSizeLog = 16 // LZ4 64Kb window size limit
winSize = 1 << winSizeLog
winMask = winSize - 1 // 64Kb window of previous data for dependent blocks
// hashLog determines the size of the hash table used to quickly find a previous match position.
// Its value influences the compression speed and memory usage, the lower the faster,
// but at the expense of the compression ratio.
// 16 seems to be the best compromise for fast compression.
hashLog = 16
htSize = 1 << hashLog
mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes.
)
func recoverBlock(e *error) {
if r := recover(); r != nil && *e == nil {
*e = lz4errors.ErrInvalidSourceShortBuffer
}
}
// blockHash hashes the lower 6 bytes into a value < htSize.
func blockHash(x uint64) uint32 {
const prime6bytes = 227718039650203
return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog))
}
func CompressBlockBound(n int) int {
return n + n/255 + 16
}
func UncompressBlock(src, dst []byte) (int, error) {
if len(src) == 0 {
return 0, nil
}
if di := decodeBlock(dst, src); di >= 0 {
return di, nil
}
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
type Compressor struct {
// Offsets are at most 64kiB, so we can store only the lower 16 bits of
// match positions: effectively, an offset from some 64kiB block boundary.
//
// When we retrieve such an offset, we interpret it as relative to the last
// block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000,
// depending on which of these is inside the current window. If a table
// entry was generated more than 64kiB back in the input, we find out by
// inspecting the input stream.
table [htSize]uint16
needsReset bool
}
// Get returns the position of a presumptive match for the hash h.
// The match may be a false positive due to a hash collision or an old entry.
// If si < winSize, the return value may be negative.
func (c *Compressor) get(h uint32, si int) int {
h &= htSize - 1
i := int(c.table[h])
i += si &^ winMask
if i >= si {
// Try previous 64kiB block (negative when in first block).
i -= winSize
}
return i
}
func (c *Compressor) put(h uint32, si int) {
h &= htSize - 1
c.table[h] = uint16(si)
}
var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }}
func CompressBlock(src, dst []byte) (int, error) {
c := compressorPool.Get().(*Compressor)
n, err := c.CompressBlock(src, dst)
compressorPool.Put(c)
return n, err
}
func (c *Compressor) CompressBlock(src, dst []byte) (int, error) {
if c.needsReset {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.table = [htSize]uint16{}
}
c.needsReset = true // Only false on first call.
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
// si: Current position of the search.
// anchor: Position of the current literals.
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}
// Fast scan strategy: the hash table only stores the last 4 bytes sequences.
for si < sn {
// Hash the next 6 bytes (sequence)...
match := binary.LittleEndian.Uint64(src[si:])
h := blockHash(match)
h2 := blockHash(match >> 8)
// We check a match at s, s+1 and s+2 and pick the first one we get.
// Checking 3 only requires us to load the source one.
ref := c.get(h, si)
ref2 := c.get(h2, si)
c.put(h, si)
c.put(h2, si+1)
offset := si - ref
if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) {
// No match. Start calculating another hash.
// The processor can usually do this out-of-order.
h = blockHash(match >> 16)
ref3 := c.get(h, si+2)
// Check the second match at si+1
si += 1
offset = si - ref2
if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) {
// No match. Check the third match at si+2
si += 1
offset = si - ref3
c.put(h, si)
if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) {
// Skip one extra byte (at si+3) before we check 3 matches again.
si += 2 + (si-anchor)>>adaptSkipLog
continue
}
}
}
// Match found.
lLen := si - anchor // Literal length.
// We already matched 4 bytes.
mLen := 4
// Extend backwards if we can, reducing literals.
tOff := si - offset - 1
for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] {
si--
tOff--
lLen--
mLen++
}
// Add the match length, so we continue search at the end.
// Use mLen to store the offset base.
si, mLen = si+mLen, si+minMatch
// Find the longest match by looking by batches of 8 bytes.
for si+8 < sn {
x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:])
if x == 0 {
si += 8
} else {
// Stop is first non-zero byte.
si += bits.TrailingZeros64(x) >> 3
break
}
}
mLen = si - mLen
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
if di+lLen > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen + 2
anchor = si
// Encode offset.
if di > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(mLen)
di++
}
// Check if we can load next values.
if si >= sn {
break
}
// Hash match end-2
h = blockHash(binary.LittleEndian.Uint64(src[si-2:]))
c.put(h, si-2)
}
lastLiterals:
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF {
dst[di] = 0xFF
di++
}
if di >= len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
if di+len(src)-anchor > len(dst) {
return 0, lz4errors.ErrInvalidSourceShortBuffer
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}
// blockHash hashes 4 bytes into a value < winSize.
func blockHashHC(x uint32) uint32 {
const hasher uint32 = 2654435761 // Knuth multiplicative hash.
return x * hasher >> (32 - winSizeLog)
}
type CompressorHC struct {
// hashTable: stores the last position found for a given hash
// chainTable: stores previous positions for a given hash
hashTable, chainTable [htSize]int
needsReset bool
}
var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }}
func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) {
c := compressorHCPool.Get().(*CompressorHC)
n, err := c.CompressBlock(src, dst, depth)
compressorHCPool.Put(c)
return n, err
}
func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) {
if c.needsReset {
// Zero out reused table to avoid non-deterministic output (issue #65).
c.hashTable = [htSize]int{}
c.chainTable = [htSize]int{}
}
c.needsReset = true // Only false on first call.
defer recoverBlock(&err)
// Return 0, nil only if the destination buffer size is < CompressBlockBound.
isNotCompressible := len(dst) < CompressBlockBound(len(src))
// adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible.
// This significantly speeds up incompressible data and usually has very small impact on compression.
// bytes to skip = 1 + (bytes since last match >> adaptSkipLog)
const adaptSkipLog = 7
var si, di, anchor int
sn := len(src) - mfLimit
if sn <= 0 {
goto lastLiterals
}
if depth == 0 {
depth = winSize
}
for si < sn {
// Hash the next 4 bytes (sequence).
match := binary.LittleEndian.Uint32(src[si:])
h := blockHashHC(match)
// Follow the chain until out of window and give the longest match.
mLen := 0
offset := 0
for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 {
// The first (mLen==0) or next byte (mLen>=minMatch) at current match length
// must match to improve on the match length.
if src[next+mLen] != src[si+mLen] {
continue
}
ml := 0
// Compare the current position with a previous with the same hash.
for ml < sn-si {
x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:])
if x == 0 {
ml += 8
} else {
// Stop is first non-zero byte.
ml += bits.TrailingZeros64(x) >> 3
break
}
}
if ml < minMatch || ml <= mLen {
// Match too small (<minMath) or smaller than the current match.
continue
}
// Found a longer match, keep its position and length.
mLen = ml
offset = si - next
// Try another previous position with the same hash.
}
c.chainTable[si&winMask] = c.hashTable[h]
c.hashTable[h] = si
// No match found.
if mLen == 0 {
si += 1 + (si-anchor)>>adaptSkipLog
continue
}
// Match found.
// Update hash/chain tables with overlapping bytes:
// si already hashed, add everything from si+1 up to the match length.
winStart := si + 1
if ws := si + mLen - winSize; ws > winStart {
winStart = ws
}
for si, ml := winStart, si+mLen; si < ml; {
match >>= 8
match |= uint32(src[si+3]) << 24
h := blockHashHC(match)
c.chainTable[si&winMask] = c.hashTable[h]
c.hashTable[h] = si
si++
}
lLen := si - anchor
si += mLen
mLen -= minMatch // Match length does not include minMatch.
if mLen < 0xF {
dst[di] = byte(mLen)
} else {
dst[di] = 0xF
}
// Encode literals length.
if lLen < 0xF {
dst[di] |= byte(lLen << 4)
} else {
dst[di] |= 0xF0
di++
l := lLen - 0xF
for ; l >= 0xFF; l -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(l)
}
di++
// Literals.
copy(dst[di:di+lLen], src[anchor:anchor+lLen])
di += lLen
anchor = si
// Encode offset.
di += 2
dst[di-2], dst[di-1] = byte(offset), byte(offset>>8)
// Encode match length part 2.
if mLen >= 0xF {
for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(mLen)
di++
}
}
if isNotCompressible && anchor == 0 {
// Incompressible.
return 0, nil
}
// Last literals.
lastLiterals:
lLen := len(src) - anchor
if lLen < 0xF {
dst[di] = byte(lLen << 4)
} else {
dst[di] = 0xF0
di++
lLen -= 0xF
for ; lLen >= 0xFF; lLen -= 0xFF {
dst[di] = 0xFF
di++
}
dst[di] = byte(lLen)
}
di++
// Write the last literals.
if isNotCompressible && di >= anchor {
// Incompressible.
return 0, nil
}
di += copy(dst[di:di+len(src)-anchor], src[anchor:])
return di, nil
}

View file

@ -0,0 +1,88 @@
// Package lz4block provides LZ4 BlockSize types and pools of buffers.
package lz4block
import "sync"
const (
Block64Kb uint32 = 1 << (16 + iota*2)
Block256Kb
Block1Mb
Block4Mb
Block8Mb = 2 * Block4Mb
legacyBlockSize = Block8Mb + Block8Mb/255 + 16 // CompressBound(Block8Mb)
)
var (
BlockPool64K = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }}
BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }}
BlockPool1M = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }}
BlockPool4M = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }}
BlockPool8M = sync.Pool{New: func() interface{} { return make([]byte, legacyBlockSize) }}
)
func Index(b uint32) BlockSizeIndex {
switch b {
case Block64Kb:
return 4
case Block256Kb:
return 5
case Block1Mb:
return 6
case Block4Mb:
return 7
case Block8Mb: // only valid in legacy mode
return 3
}
return 0
}
func IsValid(b uint32) bool {
return Index(b) > 0
}
type BlockSizeIndex uint8
func (b BlockSizeIndex) IsValid() bool {
switch b {
case 4, 5, 6, 7:
return true
}
return false
}
func (b BlockSizeIndex) Get() []byte {
var buf interface{}
switch b {
case 4:
buf = BlockPool64K.Get()
case 5:
buf = BlockPool256K.Get()
case 6:
buf = BlockPool1M.Get()
case 7:
buf = BlockPool4M.Get()
case 3:
buf = BlockPool8M.Get()
}
return buf.([]byte)
}
func Put(buf []byte) {
// Safeguard: do not allow invalid buffers.
switch c := cap(buf); uint32(c) {
case Block64Kb:
BlockPool64K.Put(buf[:c])
case Block256Kb:
BlockPool256K.Put(buf[:c])
case Block1Mb:
BlockPool1M.Put(buf[:c])
case Block4Mb:
BlockPool4M.Put(buf[:c])
case legacyBlockSize:
BlockPool8M.Put(buf[:c])
}
}
type CompressionLevel uint32
const Fast CompressionLevel = 0

View file

@ -0,0 +1,369 @@
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// AX scratch
// BX scratch
// CX scratch
// DX token
//
// DI &dst
// SI &src
// R8 &dst + len(dst)
// R9 &src + len(src)
// R11 &dst
// R12 short output end
// R13 short input end
// func decodeBlock(dst, src []byte) int
// using 50 bytes of stack currently
TEXT ·decodeBlock(SB), NOSPLIT, $64-56
MOVQ dst_base+0(FP), DI
MOVQ DI, R11
MOVQ dst_len+8(FP), R8
ADDQ DI, R8
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R9
CMPQ R9, $0
JE err_corrupt
ADDQ SI, R9
// shortcut ends
// short output end
MOVQ R8, R12
SUBQ $32, R12
// short input end
MOVQ R9, R13
SUBQ $16, R13
loop:
// for si < len(src)
CMPQ SI, R9
JGE end
// token := uint32(src[si])
MOVBQZX (SI), DX
INCQ SI
// lit_len = token >> 4
// if lit_len > 0
// CX = lit_len
MOVQ DX, CX
SHRQ $4, CX
// if lit_len != 0xF
CMPQ CX, $0xF
JEQ lit_len_loop_pre
CMPQ DI, R12
JGE lit_len_loop_pre
CMPQ SI, R13
JGE lit_len_loop_pre
// copy shortcut
// A two-stage shortcut for the most common case:
// 1) If the literal length is 0..14, and there is enough space,
// enter the shortcut and copy 16 bytes on behalf of the literals
// (in the fast mode, only 8 bytes can be safely copied this way).
// 2) Further if the match length is 4..18, copy 18 bytes in a similar
// manner; but we ensure that there's enough space in the output for
// those 18 bytes earlier, upon entering the shortcut (in other words,
// there is a combined check for both stages).
// copy literal
MOVOU (SI), X0
MOVOU X0, (DI)
ADDQ CX, DI
ADDQ CX, SI
MOVQ DX, CX
ANDQ $0xF, CX
// The second stage: prepare for match copying, decode full info.
// If it doesn't work out, the info won't be wasted.
// offset := uint16(data[:2])
MOVWQZX (SI), DX
ADDQ $2, SI
MOVQ DI, AX
SUBQ DX, AX
CMPQ AX, DI
JGT err_short_buf
// if we can't do the second stage then jump straight to read the
// match length, we already have the offset.
CMPQ CX, $0xF
JEQ match_len_loop_pre
CMPQ DX, $8
JLT match_len_loop_pre
CMPQ AX, R11
JLT err_short_buf
// memcpy(op + 0, match + 0, 8);
MOVQ (AX), BX
MOVQ BX, (DI)
// memcpy(op + 8, match + 8, 8);
MOVQ 8(AX), BX
MOVQ BX, 8(DI)
// memcpy(op +16, match +16, 2);
MOVW 16(AX), BX
MOVW BX, 16(DI)
LEAQ 4(DI)(CX*1), DI // minmatch
// shortcut complete, load next token
JMP loop
lit_len_loop_pre:
// if lit_len > 0
CMPQ CX, $0
JEQ offset
CMPQ CX, $0xF
JNE copy_literal
lit_len_loop:
// for src[si] == 0xFF
CMPB (SI), $0xFF
JNE lit_len_finalise
// bounds check src[si+1]
LEAQ 1(SI), AX
CMPQ AX, R9
JGT err_short_buf
// lit_len += 0xFF
ADDQ $0xFF, CX
INCQ SI
JMP lit_len_loop
lit_len_finalise:
// lit_len += int(src[si])
// si++
MOVBQZX (SI), AX
ADDQ AX, CX
INCQ SI
copy_literal:
// bounds check src and dst
LEAQ (SI)(CX*1), AX
CMPQ AX, R9
JGT err_short_buf
LEAQ (DI)(CX*1), AX
CMPQ AX, R8
JGT err_short_buf
// whats a good cut off to call memmove?
CMPQ CX, $16
JGT memmove_lit
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_lit
// if len(src[si:]) < 16
MOVQ R9, AX
SUBQ SI, AX
CMPQ AX, $16
JLT memmove_lit
MOVOU (SI), X0
MOVOU X0, (DI)
JMP finish_lit_copy
memmove_lit:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP) // need len to inc SI, DI after
MOVB DX, 48(SP)
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
MOVB 48(SP), DX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
finish_lit_copy:
ADDQ CX, SI
ADDQ CX, DI
CMPQ SI, R9
JGE end
offset:
// CX := mLen
// free up DX to use for offset
MOVQ DX, CX
LEAQ 2(SI), AX
CMPQ AX, R9
JGT err_short_buf
// offset
// DX := int(src[si]) | int(src[si+1])<<8
MOVWQZX (SI), DX
ADDQ $2, SI
// 0 offset is invalid
CMPQ DX, $0
JEQ err_corrupt
ANDB $0xF, CX
match_len_loop_pre:
// if mlen != 0xF
CMPB CX, $0xF
JNE copy_match
match_len_loop:
// for src[si] == 0xFF
// lit_len += 0xFF
CMPB (SI), $0xFF
JNE match_len_finalise
// bounds check src[si+1]
LEAQ 1(SI), AX
CMPQ AX, R9
JGT err_short_buf
ADDQ $0xFF, CX
INCQ SI
JMP match_len_loop
match_len_finalise:
// lit_len += int(src[si])
// si++
MOVBQZX (SI), AX
ADDQ AX, CX
INCQ SI
copy_match:
// mLen += minMatch
ADDQ $4, CX
// check we have match_len bytes left in dst
// di+match_len < len(dst)
LEAQ (DI)(CX*1), AX
CMPQ AX, R8
JGT err_short_buf
// DX = offset
// CX = match_len
// BX = &dst + (di - offset)
MOVQ DI, BX
SUBQ DX, BX
// check BX is within dst
// if BX < &dst
CMPQ BX, R11
JLT err_short_buf
// if offset + match_len < di
LEAQ (BX)(CX*1), AX
CMPQ DI, AX
JGT copy_interior_match
// AX := len(dst[:di])
// MOVQ DI, AX
// SUBQ R11, AX
// copy 16 bytes at a time
// if di-offset < 16 copy 16-(di-offset) bytes to di
// then do the remaining
copy_match_loop:
// for match_len >= 0
// dst[di] = dst[i]
// di++
// i++
MOVB (BX), AX
MOVB AX, (DI)
INCQ DI
INCQ BX
DECQ CX
CMPQ CX, $0
JGT copy_match_loop
JMP loop
copy_interior_match:
CMPQ CX, $16
JGT memmove_match
// if len(dst[di:]) < 16
MOVQ R8, AX
SUBQ DI, AX
CMPQ AX, $16
JLT memmove_match
MOVOU (BX), X0
MOVOU X0, (DI)
ADDQ CX, DI
JMP loop
memmove_match:
// memmove(to, from, len)
MOVQ DI, 0(SP)
MOVQ BX, 8(SP)
MOVQ CX, 16(SP)
// spill
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP) // need len to inc SI, DI after
CALL runtime·memmove(SB)
// restore registers
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
// recalc initial values
MOVQ dst_base+0(FP), R8
MOVQ R8, R11 // TODO: make these sensible numbers
ADDQ dst_len+8(FP), R8
MOVQ src_base+24(FP), R9
ADDQ src_len+32(FP), R9
MOVQ R8, R12
SUBQ $32, R12
MOVQ R9, R13
SUBQ $16, R13
ADDQ CX, DI
JMP loop
err_corrupt:
MOVQ $-1, ret+48(FP)
RET
err_short_buf:
MOVQ $-2, ret+48(FP)
RET
end:
SUBQ R11, DI
MOVQ DI, ret+48(FP)
RET

View file

@ -0,0 +1,201 @@
// +build gc
// +build !noasm
#include "textflag.h"
// Register allocation.
#define dst R0
#define dstorig R1
#define src R2
#define dstend R3
#define srcend R4
#define match R5 // Match address.
#define token R6
#define len R7 // Literal and match lengths.
#define offset R6 // Match offset; overlaps with token.
#define tmp1 R8
#define tmp2 R9
#define tmp3 R12
#define minMatch $4
// func decodeBlock(dst, src []byte) int
TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28
MOVW dst_base +0(FP), dst
MOVW dst_len +4(FP), dstend
MOVW src_base+12(FP), src
MOVW src_len +16(FP), srcend
CMP $0, srcend
BEQ shortSrc
ADD dst, dstend
ADD src, srcend
MOVW dst, dstorig
loop:
// Read token. Extract literal length.
MOVBU.P 1(src), token
MOVW token >> 4, len
CMP $15, len
BNE readLitlenDone
readLitlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD tmp1, len
CMP $255, tmp1
BEQ readLitlenLoop
readLitlenDone:
CMP $0, len
BEQ copyLiteralDone
// Bounds check dst+len and src+len.
ADD dst, len, tmp1
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
ADD src, len, tmp2
CMP.LS srcend, tmp2
BHI shortSrc
// Copy literal.
CMP $4, len
BLO copyLiteralFinish
// Copy 0-3 bytes until src is aligned.
TST $1, src
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $1, len
TST $2, src
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
SUB.NE $2, len
B copyLiteralLoopCond
copyLiteralLoop:
// Aligned load, unaligned write.
MOVW.P 4(src), tmp1
MOVW tmp1 >> 8, tmp2
MOVB tmp2, 1(dst)
MOVW tmp1 >> 16, tmp3
MOVB tmp3, 2(dst)
MOVW tmp1 >> 24, tmp2
MOVB tmp2, 3(dst)
MOVB.P tmp1, 4(dst)
copyLiteralLoopCond:
// Loop until len-4 < 0.
SUB.S $4, len
BPL copyLiteralLoop
// Restore len, which is now negative.
ADD $4, len
copyLiteralFinish:
// Copy remaining 0-3 bytes.
TST $2, len
MOVHU.NE.P 2(src), tmp2
MOVB.NE.P tmp2, 1(dst)
MOVW.NE tmp2 >> 8, tmp1
MOVB.NE.P tmp1, 1(dst)
TST $1, len
MOVBU.NE.P 1(src), tmp1
MOVB.NE.P tmp1, 1(dst)
copyLiteralDone:
CMP src, srcend
BEQ end
// Initial part of match length.
// This frees up the token register for reuse as offset.
AND $15, token, len
// Read offset.
ADD $2, src
CMP srcend, src
BHI shortSrc
MOVBU -2(src), offset
MOVBU -1(src), tmp1
ORR tmp1 << 8, offset
CMP $0, offset
BEQ corrupt
// Read rest of match length.
CMP $15, len
BNE readMatchlenDone
readMatchlenLoop:
CMP src, srcend
BEQ shortSrc
MOVBU.P 1(src), tmp1
ADD tmp1, len
CMP $255, tmp1
BEQ readMatchlenLoop
readMatchlenDone:
ADD minMatch, len
// Bounds check dst+len and match = dst-offset.
ADD dst, len, tmp1
CMP dstend, tmp1
//BHI shortDst // Uncomment for distinct error codes.
SUB offset, dst, match
CMP.LS match, dstorig
BHI corrupt
// If the offset is at least four (len is, because of minMatch),
// do a four-way unrolled byte copy loop. Using MOVD instead of four
// byte loads is much faster, but to remain portable we'd have to
// align match first, which in turn is too expensive.
CMP $4, offset
BLO copyMatch
SUB $4, len
copyMatch4:
MOVBU.P 4(match), tmp1
MOVB.P tmp1, 4(dst)
MOVBU -3(match), tmp2
MOVB tmp2, -3(dst)
MOVBU -2(match), tmp3
MOVB tmp3, -2(dst)
MOVBU -1(match), tmp1
MOVB tmp1, -1(dst)
SUB.S $4, len
BPL copyMatch4
// Restore len, which is now negative.
ADD.S $4, len
BEQ copyMatchDone
copyMatch:
// Simple byte-at-a-time copy.
SUB.S $1, len
MOVBU.P 1(match), tmp2
MOVB.P tmp2, 1(dst)
BNE copyMatch
copyMatchDone:
CMP src, srcend
BNE loop
end:
SUB dstorig, dst, tmp1
MOVW tmp1, ret+24(FP)
RET
// The three error cases have distinct labels so we can put different
// return codes here when debugging, or if the error returns need to
// be changed.
shortDst:
shortSrc:
corrupt:
MOVW $-1, tmp1
MOVW tmp1, ret+24(FP)
RET

View file

@ -0,0 +1,9 @@
// +build amd64 arm
// +build !appengine
// +build gc
// +build !noasm
package lz4block
//go:noescape
func decodeBlock(dst, src []byte) int

View file

@ -0,0 +1,100 @@
// +build !amd64,!arm appengine !gc noasm
package lz4block
func decodeBlock(dst, src []byte) (ret int) {
const hasError = -2
defer func() {
if recover() != nil {
ret = hasError
}
}()
var si, di uint
for {
// Literals and match lengths (token).
b := uint(src[si])
si++
// Literals.
if lLen := b >> 4; lLen > 0 {
switch {
case lLen < 0xF && si+16 < uint(len(src)):
// Shortcut 1
// if we have enough room in src and dst, and the literals length
// is small enough (0..14) then copy all 16 bytes, even if not all
// are part of the literals.
copy(dst[di:], src[si:si+16])
si += lLen
di += lLen
if mLen := b & 0xF; mLen < 0xF {
// Shortcut 2
// if the match length (4..18) fits within the literals, then copy
// all 18 bytes, even if not all are part of the literals.
mLen += 4
if offset := uint(src[si]) | uint(src[si+1])<<8; mLen <= offset {
i := di - offset
end := i + 18
if end > uint(len(dst)) {
// The remaining buffer may not hold 18 bytes.
// See https://github.com/pierrec/lz4/issues/51.
end = uint(len(dst))
}
copy(dst[di:], dst[i:end])
si += 2
di += mLen
continue
}
}
case lLen == 0xF:
for src[si] == 0xFF {
lLen += 0xFF
si++
}
lLen += uint(src[si])
si++
fallthrough
default:
copy(dst[di:di+lLen], src[si:si+lLen])
si += lLen
di += lLen
}
}
if si == uint(len(src)) {
return int(di)
} else if si > uint(len(src)) {
return hasError
}
offset := uint(src[si]) | uint(src[si+1])<<8
if offset == 0 {
return hasError
}
si += 2
// Match.
mLen := b & 0xF
if mLen == 0xF {
for src[si] == 0xFF {
mLen += 0xFF
si++
}
mLen += uint(src[si])
si++
}
mLen += minMatch
// Copy the match.
expanded := dst[di-offset:]
if mLen > offset {
// Efficiently copy the match dst[di-offset:di] into the dst slice.
bytesToCopy := offset * (mLen / offset)
for n := offset; n <= bytesToCopy+offset; n *= 2 {
copy(expanded[n:], expanded[:n])
}
di += bytesToCopy
mLen -= bytesToCopy
}
di += uint(copy(dst[di:di+mLen], expanded[:mLen]))
}
}