1
0
Fork 0
forked from forgejo/forgejo

Vendor Update (#16121)

* update github.com/PuerkitoBio/goquery

* update github.com/alecthomas/chroma

* update github.com/blevesearch/bleve/v2

* update github.com/caddyserver/certmagic

* update github.com/go-enry/go-enry/v2

* update github.com/go-git/go-billy/v5

* update github.com/go-git/go-git/v5

* update github.com/go-redis/redis/v8

* update github.com/go-testfixtures/testfixtures/v3

* update github.com/jaytaylor/html2text

* update github.com/json-iterator/go

* update github.com/klauspost/compress

* update github.com/markbates/goth

* update github.com/mattn/go-isatty

* update github.com/mholt/archiver/v3

* update github.com/microcosm-cc/bluemonday

* update github.com/minio/minio-go/v7

* update github.com/prometheus/client_golang

* update github.com/unrolled/render

* update github.com/xanzy/go-gitlab

* update github.com/yuin/goldmark

* update github.com/yuin/goldmark-highlighting

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
6543 2021-06-10 16:44:25 +02:00 committed by GitHub
parent f088dc4ea1
commit 86e2789960
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
819 changed files with 38072 additions and 34969 deletions

View file

@ -644,7 +644,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logNewTablePenalty = 8
d.w.logNewTablePenalty = 10
d.window = make([]byte, 32<<10)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff

View file

@ -45,7 +45,7 @@ const (
bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history.
allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)

View file

@ -6,6 +6,7 @@ package flate
import (
"encoding/binary"
"fmt"
"io"
)
@ -27,7 +28,7 @@ const (
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
bufferFlushSize = 240
bufferFlushSize = 246
// bufferSize is the actual output byte buffer size.
// It must have additional headroom for a flush
@ -59,19 +60,31 @@ var offsetExtraBits = [64]int8{
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
}
var offsetBase = [64]uint32{
/* normal deflate */
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
var offsetCombined = [32]uint32{}
/* extended window */
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
0x100000, 0x180000, 0x200000, 0x300000,
func init() {
var offsetBase = [64]uint32{
/* normal deflate */
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
/* extended window */
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
0x100000, 0x180000, 0x200000, 0x300000,
}
for i := range offsetCombined[:] {
// Don't use extended window values...
if offsetBase[i] > 0x006000 {
continue
}
offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i])
}
}
// The odd order in which the codegen code sizes are written.
@ -88,15 +101,16 @@ type huffmanBitWriter struct {
bits uint64
nbits uint16
nbytes uint8
lastHuffMan bool
literalEncoding *huffmanEncoder
tmpLitEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logNewTablePenalty uint
lastHuffMan bool
bytes [256]byte
bytes [256 + 8]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
@ -128,6 +142,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalEncoding: newHuffmanEncoder(literalCount),
tmpLitEncoding: newHuffmanEncoder(literalCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
@ -745,9 +760,31 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
offs := oeCodes[:32]
lengths := leCodes[lengthCodesStart:]
lengths = lengths[:32]
// Go 1.16 LOVES having these on stack.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
for _, t := range tokens {
if t < matchType {
w.writeCode(lits[t.literal()])
//w.writeCode(lits[t.literal()])
c := lits[t.literal()]
bits |= uint64(c.code) << nbits
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
continue
}
@ -759,38 +796,99 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := lengths[lengthCode&31]
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
bits |= uint64(c.code) << nbits
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
if extraLengthBits > 0 {
//w.writeBits(extraLength, extraLengthBits)
extraLength := int32(length - lengthBase[lengthCode&31])
w.writeBits(extraLength, extraLengthBits)
bits |= uint64(extraLength) << nbits
nbits += extraLengthBits
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
offsetCode := offset >> 16
offset &= matchOffsetOnlyMask
if false {
w.writeCode(offs[offsetCode&31])
} else {
// inlined
c := offs[offsetCode&31]
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
c := offs[offsetCode]
bits |= uint64(c.code) << nbits
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode&63])
w.writeBits(extraOffset, extraOffsetBits)
offsetComb := offsetCombined[offsetCode]
if offsetComb > 1<<16 {
//w.writeBits(extraOffset, extraOffsetBits)
bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << nbits
nbits += uint16(offsetComb >> 16)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
}
// Restore...
w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
if deferEOB {
w.writeCode(leCodes[endBlockMarker])
}
@ -825,13 +923,28 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
}
// Fill is rarely better...
const fill = false
const numLiterals = endBlockMarker + 1
const numOffsets = 1
// Add everything as literals
// We have to estimate the header size.
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + len(input)/32
histogram(input, w.literalFreq[:numLiterals], fill)
w.literalFreq[endBlockMarker] = 1
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
if fill {
// Clear fill...
for i := range w.literalFreq[:numLiterals] {
w.literalFreq[i] = 0
}
histogram(input, w.literalFreq[:numLiterals], false)
}
estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
estBits += w.lastHeader
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
@ -839,33 +952,31 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize < estBits {
if storable && ssize <= estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
reuseSize := 0
if w.lastHeader > 0 {
reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256])
reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])
if estBits < reuseSize {
if debugDeflate {
//fmt.Println("not reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
}
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
} else if debugDeflate {
fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
}
}
const numLiterals = endBlockMarker + 1
const numOffsets = 1
count := 0
if w.lastHeader == 0 {
if !eof && !sync {
// Generate a slightly suboptimal tree that can be used for all.
fillHist(w.literalFreq[:numLiterals])
}
w.literalFreq[endBlockMarker] = 1
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
// Use the temp encoding, so swap.
w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
@ -876,34 +987,47 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHuffMan = true
w.lastHeader, _ = w.headerSize()
if debugDeflate {
count += w.lastHeader
fmt.Println("header:", count/8)
}
}
encoding := w.literalEncoding.codes[:257]
encoding := w.literalEncoding.codes[:256]
// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6
if n >= bufferFlushSize {
bits |= uint64(c.code) << nbits
nbits += c.len
if debugDeflate {
count += int(c.len)
}
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
n = 0
nbytes = 0
return
}
w.write(w.bytes[:n])
n = 0
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
w.nbytes = n
}
}
// Restore...
w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
if debugDeflate {
fmt.Println("wrote", count/8, "bytes")
}
if eof || sync {
w.writeCode(encoding[endBlockMarker])
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}

View file

@ -21,9 +21,13 @@ type hcode struct {
}
type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
codes []hcode
bitCount [17]int32
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
freqcache [literalCount + 1]literalNode
}
type literalNode struct {
@ -132,6 +136,21 @@ func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
return total
}
// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
code := h.codes[i]
if code.len == 0 {
return math.MaxInt32
}
total += int(f) * int(code.len)
}
}
return total
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
@ -291,12 +310,6 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
h.freqcache = make([]literalNode, literalCount+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
@ -330,10 +343,14 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
h.assignEncodingAndSize(bitCount, list)
}
// atLeastOne clamps the result between 1 and 15.
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
if v > 15 {
return 15
}
return v
}
@ -346,31 +363,12 @@ func fillHist(b []uint16) {
}
}
// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) (bits int) {
func histogram(b []byte, h []uint16, fill bool) {
h = h[:256]
for _, t := range b {
h[t]++
}
total := len(b)
if fill {
for _, v := range h {
if v == 0 {
total++
}
}
fillHist(h)
}
invTotal := 1.0 / float32(total)
shannon := float32(0.0)
for _, v := range h {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
return int(shannon + 0.99)
}

View file

@ -182,12 +182,27 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
// Test current
t2 := eLong - e.cur - l
off := s - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l {
t = t2
l = l2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--

View file

@ -211,6 +211,31 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end-of-match...
if sAt := s + l; sAt < sLimit {
eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
// Test current
t2 := eLong.Cur.offset - e.cur - l
off := s - t2
if off < maxMatchOffset {
if off > 0 && t2 >= 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l {
t = t2
l = l2
}
}
// Test next:
t2 = eLong.Prev.offset - e.cur - l
off := s - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l {
t = t2
l = l2
}
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--

View file

@ -13,14 +13,17 @@ import (
)
const (
// From top
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
// 5 bits offsetcode
// 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
matchOffsetOnlyMask = 0xffff
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
@ -187,7 +190,7 @@ func (t *tokens) indexTokens(in []token) {
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset())
t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
}
}
@ -232,7 +235,7 @@ func (t *tokens) EstimatedBits() int {
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
// Just add 15 for EOB
@ -240,7 +243,7 @@ func (t *tokens) EstimatedBits() int {
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
@ -251,7 +254,7 @@ func (t *tokens) EstimatedBits() int {
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
@ -270,11 +273,13 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oCode := offsetCode(xoffset)
xoffset |= oCode << 16
t.nLits++
lengthCode := lengthCodes1[uint8(xlength)] & 31
t.extraHist[lengthCodes1[uint8(xlength)]]++
t.offHist[oCode]++
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.offHist[offsetCode(xoffset)&31]++
t.n++
}
@ -286,7 +291,8 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oc := offsetCode(xoffset) & 31
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
@ -294,12 +300,11 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
xl = 258 - baseMatchLength
}
xlength -= xl
xl -= 3
xl -= baseMatchLength
t.nLits++
lengthCode := lengthCodes1[uint8(xl)] & 31
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.extraHist[lengthCodes1[uint8(xl)]]++
t.offHist[oc]++
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.n++
}
}