1
0
Fork 0
forked from forgejo/forgejo

Macaron 1.5 (#12596)

* update macaron to v1.5 of fork

* update macaron to v1.5 of fork

* test gzip PR

* add push method impl to context_tests

* use proper gzip commit

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
techknowlogick 2020-08-27 22:47:17 -04:00 committed by GitHub
parent 211321fb93
commit c5d5d63c9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
53 changed files with 2622 additions and 665 deletions

View file

@ -6,6 +6,7 @@ package zstd
import (
"fmt"
"math"
"math/bits"
"github.com/klauspost/compress/zstd/internal/xxhash"
@ -23,26 +24,29 @@ type tableEntry struct {
offset int32
}
type fastEncoder struct {
o encParams
type fastBase struct {
// cur is the offset at the start of hist
cur int32
// maximum offset. Should be at least 2x block size.
maxMatchOff int32
hist []byte
crc *xxhash.Digest
table [tableSize]tableEntry
tmp [8]byte
blk *blockEnc
}
type fastEncoder struct {
fastBase
table [tableSize]tableEntry
}
// CRC returns the underlying CRC writer.
func (e *fastEncoder) CRC() *xxhash.Digest {
func (e *fastBase) CRC() *xxhash.Digest {
return e.crc
}
// AppendCRC will append the CRC to the destination slice and return it.
func (e *fastEncoder) AppendCRC(dst []byte) []byte {
func (e *fastBase) AppendCRC(dst []byte) []byte {
crc := e.crc.Sum(e.tmp[:0])
dst = append(dst, crc[7], crc[6], crc[5], crc[4])
return dst
@ -50,7 +54,7 @@ func (e *fastEncoder) AppendCRC(dst []byte) []byte {
// WindowSize returns the window size of the encoder,
// or a window size small enough to contain the input size, if > 0.
func (e *fastEncoder) WindowSize(size int) int32 {
func (e *fastBase) WindowSize(size int) int32 {
if size > 0 && size < int(e.maxMatchOff) {
b := int32(1) << uint(bits.Len(uint(size)))
// Keep minimum window.
@ -63,7 +67,7 @@ func (e *fastEncoder) WindowSize(size int) int32 {
}
// Block returns the current block.
func (e *fastEncoder) Block() *blockEnc {
func (e *fastBase) Block() *blockEnc {
return e.blk
}
@ -112,11 +116,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 2.
stepSize := int32(e.o.targetLength)
if stepSize == 0 {
stepSize++
}
stepSize++
const stepSize = 2
// TEMPLATE
const hashLog = tableBits
@ -169,9 +169,22 @@ encodeLoop:
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
lenght := 4 + e.matchlen(s+6, repIndex+4, src)
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
seq.matchLen = uint32(lenght - zstdMinMatch)
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@ -197,11 +210,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += lenght + 2
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
println("repeat ended", s, length)
}
break encodeLoop
@ -257,7 +270,20 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
l := e.matchlen(s+4, t+4, src) + 4
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Extend backwards
tMin := s - e.maxMatchOff
@ -294,7 +320,20 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
@ -344,6 +383,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
panic("src too big")
}
}
// Protect against e.cur wraparound.
if e.cur >= bufferReset {
for i := range e.table[:] {
@ -412,10 +452,23 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// lenght := 4 + e.matchlen(s+6, repIndex+4, src)
lenght := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
seq.matchLen = uint32(lenght - zstdMinMatch)
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
@ -441,11 +494,11 @@ encodeLoop:
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += lenght + 2
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
println("repeat ended", s, length)
}
break encodeLoop
@ -464,6 +517,9 @@ encodeLoop:
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
}
break
}
@ -496,9 +552,25 @@ encodeLoop:
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && t < 0 {
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
l := int32(matchLen(src[s+4:], src[t+4:])) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Extend backwards
tMin := s - e.maxMatchOff
@ -536,7 +608,20 @@ encodeLoop:
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
@ -569,9 +654,13 @@ encodeLoop:
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
if e.cur < bufferReset {
e.cur += int32(len(src))
}
}
func (e *fastEncoder) addBlock(src []byte) int32 {
func (e *fastBase) addBlock(src []byte) int32 {
if debugAsserts && e.cur > bufferReset {
panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset))
}
@ -602,17 +691,17 @@ func (e *fastEncoder) addBlock(src []byte) int32 {
// useBlock will replace the block with the provided one,
// but transfer recent offsets from the previous.
func (e *fastEncoder) UseBlock(enc *blockEnc) {
func (e *fastBase) UseBlock(enc *blockEnc) {
enc.reset(e.blk)
e.blk = enc
}
func (e *fastEncoder) matchlenNoHist(s, t int32, src []byte) int32 {
func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts {
if s < 0 {
err := fmt.Sprintf("s (%d) < 0", s)
@ -626,18 +715,17 @@ func (e *fastEncoder) matchlen(s, t int32, src []byte) int32 {
err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
panic(err)
}
}
s1 := int(s) + maxMatchLength - 4
if s1 > len(src) {
s1 = len(src)
if len(src)-int(s) > maxCompressedBlockSize {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:s1], src[t:]))
return int32(matchLen(src[s:], src[t:]))
}
// Reset the encoding table.
func (e *fastEncoder) Reset() {
func (e *fastBase) Reset(singleBlock bool) {
if e.blk == nil {
e.blk = &blockEnc{}
e.blk.init()
@ -650,7 +738,7 @@ func (e *fastEncoder) Reset() {
} else {
e.crc.Reset()
}
if cap(e.hist) < int(e.maxMatchOff*2) {
if !singleBlock && cap(e.hist) < int(e.maxMatchOff*2) {
l := e.maxMatchOff * 2
// Make it at least 1MB.
if l < 1<<20 {