Vendor Update (#16121)

* update github.com/PuerkitoBio/goquery * update github.com/alecthomas/chroma * update github.com/blevesearch/bleve/v2 * update github.com/caddyserver/certmagic * update github.com/go-enry/go-enry/v2 * update github.com/go-git/go-billy/v5 * update github.com/go-git/go-git/v5 * update github.com/go-redis/redis/v8 * update github.com/go-testfixtures/testfixtures/v3 * update github.com/jaytaylor/html2text * update github.com/json-iterator/go * update github.com/klauspost/compress * update github.com/markbates/goth * update github.com/mattn/go-isatty * update github.com/mholt/archiver/v3 * update github.com/microcosm-cc/bluemonday * update github.com/minio/minio-go/v7 * update github.com/prometheus/client_golang * update github.com/unrolled/render * update github.com/xanzy/go-gitlab * update github.com/yuin/goldmark * update github.com/yuin/goldmark-highlighting Co-authored-by: techknowlogick <techknowlogick@gitea.io>
2021-06-10 16:44:25 +02:00 · 2021-06-10 16:44:25 +02:00 · 86e2789960
commit 86e2789960
parent f088dc4ea1
819 changed files with 38072 additions and 34969 deletions
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@ -644,7 +644,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).store
 	case level == ConstantCompression:
-		d.w.logNewTablePenalty = 8
+		d.w.logNewTablePenalty = 10
 		d.window = make([]byte, 32<<10)
 		d.fill = (*compressor).fillBlock
 		d.step = (*compressor).storeHuff
--- a/vendor/github.com/klauspost/compress/flate/fast_encoder.go
+++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go
@ -45,7 +45,7 @@ const (

 	bTableBits   = 17                                               // Bits used in the big tables
 	bTableSize   = 1 << bTableBits                                  // Size of the table
-	allocHistory = maxStoreBlockSize * 10                           // Size to preallocate for history.
+	allocHistory = maxStoreBlockSize * 5                            // Size to preallocate for history.
 	bufferReset  = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
 )

--- a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go
@ -6,6 +6,7 @@ package flate

 import (
 	"encoding/binary"
+	"fmt"
 	"io"
 )

@ -27,7 +28,7 @@ const (
 	// after which bytes are flushed to the writer.
 	// Should preferably be a multiple of 6, since
 	// we accumulate 6 bytes between writes to the buffer.
-	bufferFlushSize = 240
+	bufferFlushSize = 246

 	// bufferSize is the actual output byte buffer size.
 	// It must have additional headroom for a flush
@ -59,19 +60,31 @@ var offsetExtraBits = [64]int8{
 	14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
 }

-var offsetBase = [64]uint32{
-	/* normal deflate */
-	0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
-	0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
-	0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
-	0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
-	0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
-	0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+var offsetCombined = [32]uint32{}

-	/* extended window */
-	0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
-	0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
-	0x100000, 0x180000, 0x200000, 0x300000,
+func init() {
+	var offsetBase = [64]uint32{
+		/* normal deflate */
+		0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
+		0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
+		0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
+		0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
+		0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
+		0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
+
+		/* extended window */
+		0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
+		0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
+		0x100000, 0x180000, 0x200000, 0x300000,
+	}
+
+	for i := range offsetCombined[:] {
+		// Don't use extended window values...
+		if offsetBase[i] > 0x006000 {
+			continue
+		}
+		offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i])
+	}
 }

 // The odd order in which the codegen code sizes are written.
@ -88,15 +101,16 @@ type huffmanBitWriter struct {
 	bits            uint64
 	nbits           uint16
 	nbytes          uint8
+	lastHuffMan     bool
 	literalEncoding *huffmanEncoder
+	tmpLitEncoding  *huffmanEncoder
 	offsetEncoding  *huffmanEncoder
 	codegenEncoding *huffmanEncoder
 	err             error
 	lastHeader      int
 	// Set between 0 (reused block can be up to 2x the size)
 	logNewTablePenalty uint
-	lastHuffMan        bool
-	bytes              [256]byte
+	bytes              [256 + 8]byte
 	literalFreq        [lengthCodesStart + 32]uint16
 	offsetFreq         [32]uint16
 	codegenFreq        [codegenCodeCount]uint16
@ -128,6 +142,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
 	return &huffmanBitWriter{
 		writer:          w,
 		literalEncoding: newHuffmanEncoder(literalCount),
+		tmpLitEncoding:  newHuffmanEncoder(literalCount),
 		codegenEncoding: newHuffmanEncoder(codegenCodeCount),
 		offsetEncoding:  newHuffmanEncoder(offsetCodeCount),
 	}
@ -745,9 +760,31 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
 	offs := oeCodes[:32]
 	lengths := leCodes[lengthCodesStart:]
 	lengths = lengths[:32]
+
+	// Go 1.16 LOVES having these on stack.
+	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
+
 	for _, t := range tokens {
 		if t < matchType {
-			w.writeCode(lits[t.literal()])
+			//w.writeCode(lits[t.literal()])
+			c := lits[t.literal()]
+			bits |= uint64(c.code) << nbits
+			nbits += c.len
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
 			continue
 		}

@ -759,38 +796,99 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
 		} else {
 			// inlined
 			c := lengths[lengthCode&31]
-			w.bits |= uint64(c.code) << w.nbits
-			w.nbits += c.len
-			if w.nbits >= 48 {
-				w.writeOutBits()
+			bits |= uint64(c.code) << nbits
+			nbits += c.len
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
 			}
 		}

 		extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
 		if extraLengthBits > 0 {
+			//w.writeBits(extraLength, extraLengthBits)
 			extraLength := int32(length - lengthBase[lengthCode&31])
-			w.writeBits(extraLength, extraLengthBits)
+			bits |= uint64(extraLength) << nbits
+			nbits += extraLengthBits
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
 		}
 		// Write the offset
 		offset := t.offset()
-		offsetCode := offsetCode(offset)
+		offsetCode := offset >> 16
+		offset &= matchOffsetOnlyMask
 		if false {
 			w.writeCode(offs[offsetCode&31])
 		} else {
 			// inlined
-			c := offs[offsetCode&31]
-			w.bits |= uint64(c.code) << w.nbits
-			w.nbits += c.len
-			if w.nbits >= 48 {
-				w.writeOutBits()
+			c := offs[offsetCode]
+			bits |= uint64(c.code) << nbits
+			nbits += c.len
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
 			}
 		}
-		extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
-		if extraOffsetBits > 0 {
-			extraOffset := int32(offset - offsetBase[offsetCode&63])
-			w.writeBits(extraOffset, extraOffsetBits)
+		offsetComb := offsetCombined[offsetCode]
+		if offsetComb > 1<<16 {
+			//w.writeBits(extraOffset, extraOffsetBits)
+			bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << nbits
+			nbits += uint16(offsetComb >> 16)
+			if nbits >= 48 {
+				binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+				//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+				bits >>= 48
+				nbits -= 48
+				nbytes += 6
+				if nbytes >= bufferFlushSize {
+					if w.err != nil {
+						nbytes = 0
+						return
+					}
+					_, w.err = w.writer.Write(w.bytes[:nbytes])
+					nbytes = 0
+				}
+			}
 		}
 	}
+	// Restore...
+	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
+
 	if deferEOB {
 		w.writeCode(leCodes[endBlockMarker])
 	}
@ -825,13 +923,28 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
 		}
 	}

+	// Fill is rarely better...
+	const fill = false
+	const numLiterals = endBlockMarker + 1
+	const numOffsets = 1
+
 	// Add everything as literals
 	// We have to estimate the header size.
 	// Assume header is around 70 bytes:
 	// https://stackoverflow.com/a/25454430
 	const guessHeaderSizeBits = 70 * 8
-	estBits := histogramSize(input, w.literalFreq[:], !eof && !sync)
-	estBits += w.lastHeader + len(input)/32
+	histogram(input, w.literalFreq[:numLiterals], fill)
+	w.literalFreq[endBlockMarker] = 1
+	w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
+	if fill {
+		// Clear fill...
+		for i := range w.literalFreq[:numLiterals] {
+			w.literalFreq[i] = 0
+		}
+		histogram(input, w.literalFreq[:numLiterals], false)
+	}
+	estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
+	estBits += w.lastHeader
 	if w.lastHeader == 0 {
 		estBits += guessHeaderSizeBits
 	}
@ -839,33 +952,31 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {

 	// Store bytes, if we don't get a reasonable improvement.
 	ssize, storable := w.storedSize(input)
-	if storable && ssize < estBits {
+	if storable && ssize <= estBits {
 		w.writeStoredHeader(len(input), eof)
 		w.writeBytes(input)
 		return
 	}

-	reuseSize := 0
 	if w.lastHeader > 0 {
-		reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256])
+		reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])

 		if estBits < reuseSize {
+			if debugDeflate {
+				//fmt.Println("not reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
+			}
 			// We owe an EOB
 			w.writeCode(w.literalEncoding.codes[endBlockMarker])
 			w.lastHeader = 0
+		} else if debugDeflate {
+			fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
 		}
 	}

-	const numLiterals = endBlockMarker + 1
-	const numOffsets = 1
+	count := 0
 	if w.lastHeader == 0 {
-		if !eof && !sync {
-			// Generate a slightly suboptimal tree that can be used for all.
-			fillHist(w.literalFreq[:numLiterals])
-		}
-		w.literalFreq[endBlockMarker] = 1
-		w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
-
+		// Use the temp encoding, so swap.
+		w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
 		// Generate codegen and codegenFrequencies, which indicates how to encode
 		// the literalEncoding and the offsetEncoding.
 		w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
@ -876,34 +987,47 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
 		w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
 		w.lastHuffMan = true
 		w.lastHeader, _ = w.headerSize()
+		if debugDeflate {
+			count += w.lastHeader
+			fmt.Println("header:", count/8)
+		}
 	}

-	encoding := w.literalEncoding.codes[:257]
+	encoding := w.literalEncoding.codes[:256]
+	// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
+	bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
 	for _, t := range input {
 		// Bitwriting inlined, ~30% speedup
 		c := encoding[t]
-		w.bits |= uint64(c.code) << w.nbits
-		w.nbits += c.len
-		if w.nbits >= 48 {
-			bits := w.bits
-			w.bits >>= 48
-			w.nbits -= 48
-			n := w.nbytes
-			binary.LittleEndian.PutUint64(w.bytes[n:], bits)
-			n += 6
-			if n >= bufferFlushSize {
+		bits |= uint64(c.code) << nbits
+		nbits += c.len
+		if debugDeflate {
+			count += int(c.len)
+		}
+		if nbits >= 48 {
+			binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
+			//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
+			bits >>= 48
+			nbits -= 48
+			nbytes += 6
+			if nbytes >= bufferFlushSize {
 				if w.err != nil {
-					n = 0
+					nbytes = 0
 					return
 				}
-				w.write(w.bytes[:n])
-				n = 0
+				_, w.err = w.writer.Write(w.bytes[:nbytes])
+				nbytes = 0
 			}
-			w.nbytes = n
 		}
 	}
+	// Restore...
+	w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
+
+	if debugDeflate {
+		fmt.Println("wrote", count/8, "bytes")
+	}
 	if eof || sync {
-		w.writeCode(encoding[endBlockMarker])
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
 		w.lastHeader = 0
 		w.lastHuffMan = false
 	}
--- a/vendor/github.com/klauspost/compress/flate/huffman_code.go
+++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go
@ -21,9 +21,13 @@ type hcode struct {
 }

 type huffmanEncoder struct {
-	codes     []hcode
-	freqcache []literalNode
-	bitCount  [17]int32
+	codes    []hcode
+	bitCount [17]int32
+
+	// Allocate a reusable buffer with the longest possible frequency table.
+	// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
+	// The largest of these is literalCount, so we allocate for that case.
+	freqcache [literalCount + 1]literalNode
 }

 type literalNode struct {
@ -132,6 +136,21 @@ func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
 	return total
 }

+// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
+func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
+	var total int
+	for i, f := range freq {
+		if f != 0 {
+			code := h.codes[i]
+			if code.len == 0 {
+				return math.MaxInt32
+			}
+			total += int(f) * int(code.len)
+		}
+	}
+	return total
+}
+
 // Return the number of literals assigned to each bit size in the Huffman encoding
 //
 // This method is only called when list.length >= 3
@ -291,12 +310,6 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
 // freq  An array of frequencies, in which frequency[i] gives the frequency of literal i.
 // maxBits  The maximum number of bits to use for any literal.
 func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
-	if h.freqcache == nil {
-		// Allocate a reusable buffer with the longest possible frequency table.
-		// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
-		// The largest of these is literalCount, so we allocate for that case.
-		h.freqcache = make([]literalNode, literalCount+1)
-	}
 	list := h.freqcache[:len(freq)+1]
 	// Number of non-zero literals
 	count := 0
@ -330,10 +343,14 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
 	h.assignEncodingAndSize(bitCount, list)
 }

+// atLeastOne clamps the result between 1 and 15.
 func atLeastOne(v float32) float32 {
 	if v < 1 {
 		return 1
 	}
+	if v > 15 {
+		return 15
+	}
 	return v
 }

@ -346,31 +363,12 @@ func fillHist(b []uint16) {
 	}
 }

-// histogramSize accumulates a histogram of b in h.
-// An estimated size in bits is returned.
-// len(h) must be >= 256, and h's elements must be all zeroes.
-func histogramSize(b []byte, h []uint16, fill bool) (bits int) {
+func histogram(b []byte, h []uint16, fill bool) {
 	h = h[:256]
 	for _, t := range b {
 		h[t]++
 	}
-	total := len(b)
 	if fill {
-		for _, v := range h {
-			if v == 0 {
-				total++
-			}
-		}
+		fillHist(h)
 	}
-
-	invTotal := 1.0 / float32(total)
-	shannon := float32(0.0)
-	for _, v := range h {
-		if v > 0 {
-			n := float32(v)
-			shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
-		}
-	}
-
-	return int(shannon + 0.99)
 }
--- a/vendor/github.com/klauspost/compress/flate/level5.go
+++ b/vendor/github.com/klauspost/compress/flate/level5.go
@ -182,12 +182,27 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
 		// them as literal bytes.

-		// Extend the 4-byte match as long as possible.
 		if l == 0 {
+			// Extend the 4-byte match as long as possible.
 			l = e.matchlenLong(s+4, t+4, src) + 4
 		} else if l == maxMatchLength {
 			l += e.matchlenLong(s+l, t+l, src)
 		}
+
+		// Try to locate a better match by checking the end of best match...
+		if sAt := s + l; l < 30 && sAt < sLimit {
+			eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
+			// Test current
+			t2 := eLong - e.cur - l
+			off := s - t2
+			if t2 >= 0 && off < maxMatchOffset && off > 0 {
+				if l2 := e.matchlenLong(s, t2, src); l2 > l {
+					t = t2
+					l = l2
+				}
+			}
+		}
+
 		// Extend backwards
 		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
 			s--
--- a/vendor/github.com/klauspost/compress/flate/level6.go
+++ b/vendor/github.com/klauspost/compress/flate/level6.go
@ -211,6 +211,31 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			l += e.matchlenLong(s+l, t+l, src)
 		}

+		// Try to locate a better match by checking the end-of-match...
+		if sAt := s + l; sAt < sLimit {
+			eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
+			// Test current
+			t2 := eLong.Cur.offset - e.cur - l
+			off := s - t2
+			if off < maxMatchOffset {
+				if off > 0 && t2 >= 0 {
+					if l2 := e.matchlenLong(s, t2, src); l2 > l {
+						t = t2
+						l = l2
+					}
+				}
+				// Test next:
+				t2 = eLong.Prev.offset - e.cur - l
+				off := s - t2
+				if off > 0 && off < maxMatchOffset && t2 >= 0 {
+					if l2 := e.matchlenLong(s, t2, src); l2 > l {
+						t = t2
+						l = l2
+					}
+				}
+			}
+		}
+
 		// Extend backwards
 		for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
 			s--
--- a/vendor/github.com/klauspost/compress/flate/token.go
+++ b/vendor/github.com/klauspost/compress/flate/token.go
@ -13,14 +13,17 @@ import (
 )

 const (
+	// From top
 	// 2 bits:   type   0 = literal  1=EOF  2=Match   3=Unused
 	// 8 bits:   xlength = length - MIN_MATCH_LENGTH
-	// 22 bits   xoffset = offset - MIN_OFFSET_SIZE, or literal
-	lengthShift = 22
-	offsetMask  = 1<<lengthShift - 1
-	typeMask    = 3 << 30
-	literalType = 0 << 30
-	matchType   = 1 << 30
+	// 5 bits    offsetcode
+	// 16 bits   xoffset = offset - MIN_OFFSET_SIZE, or literal
+	lengthShift         = 22
+	offsetMask          = 1<<lengthShift - 1
+	typeMask            = 3 << 30
+	literalType         = 0 << 30
+	matchType           = 1 << 30
+	matchOffsetOnlyMask = 0xffff
 )

 // The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
@ -187,7 +190,7 @@ func (t *tokens) indexTokens(in []token) {
 			t.AddLiteral(tok.literal())
 			continue
 		}
-		t.AddMatch(uint32(tok.length()), tok.offset())
+		t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
 	}
 }

@ -232,7 +235,7 @@ func (t *tokens) EstimatedBits() int {
 		for _, v := range t.litHist[:] {
 			if v > 0 {
 				n := float32(v)
-				shannon += -mFastLog2(n*invTotal) * n
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
 			}
 		}
 		// Just add 15 for EOB
@ -240,7 +243,7 @@ func (t *tokens) EstimatedBits() int {
 		for i, v := range t.extraHist[1 : literalCount-256] {
 			if v > 0 {
 				n := float32(v)
-				shannon += -mFastLog2(n*invTotal) * n
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
 				bits += int(lengthExtraBits[i&31]) * int(v)
 				nMatches += int(v)
 			}
@ -251,7 +254,7 @@ func (t *tokens) EstimatedBits() int {
 		for i, v := range t.offHist[:offsetCodeCount] {
 			if v > 0 {
 				n := float32(v)
-				shannon += -mFastLog2(n*invTotal) * n
+				shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
 				bits += int(offsetExtraBits[i&31]) * int(v)
 			}
 		}
@ -270,11 +273,13 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
 			panic(fmt.Errorf("invalid offset: %v", xoffset))
 		}
 	}
+	oCode := offsetCode(xoffset)
+	xoffset |= oCode << 16
 	t.nLits++
-	lengthCode := lengthCodes1[uint8(xlength)] & 31
+
+	t.extraHist[lengthCodes1[uint8(xlength)]]++
+	t.offHist[oCode]++
 	t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
-	t.extraHist[lengthCode]++
-	t.offHist[offsetCode(xoffset)&31]++
 	t.n++
 }

@ -286,7 +291,8 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
 			panic(fmt.Errorf("invalid offset: %v", xoffset))
 		}
 	}
-	oc := offsetCode(xoffset) & 31
+	oc := offsetCode(xoffset)
+	xoffset |= oc << 16
 	for xlength > 0 {
 		xl := xlength
 		if xl > 258 {
@ -294,12 +300,11 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
 			xl = 258 - baseMatchLength
 		}
 		xlength -= xl
-		xl -= 3
+		xl -= baseMatchLength
 		t.nLits++
-		lengthCode := lengthCodes1[uint8(xl)] & 31
-		t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
-		t.extraHist[lengthCode]++
+		t.extraHist[lengthCodes1[uint8(xl)]]++
 		t.offHist[oc]++
+		t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
 		t.n++
 	}
 }
--- a/vendor/github.com/klauspost/compress/gzip/gunzip.go
+++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go
@ -75,6 +75,7 @@ type Header struct {
 type Reader struct {
 	Header       // valid after NewReader or Reader.Reset
 	r            flate.Reader
+	br           *bufio.Reader
 	decompressor io.ReadCloser
 	digest       uint32 // CRC-32, IEEE polynomial (section 8)
 	size         uint32 // Uncompressed size (section 2.3.1)
@ -109,7 +110,13 @@ func (z *Reader) Reset(r io.Reader) error {
 	if rr, ok := r.(flate.Reader); ok {
 		z.r = rr
 	} else {
-		z.r = bufio.NewReader(r)
+		// Reuse if we can.
+		if z.br != nil {
+			z.br.Reset(r)
+		} else {
+			z.br = bufio.NewReader(r)
+		}
+		z.r = z.br
 	}
 	z.Header, z.err = z.readHeader()
 	return z.err
--- a/vendor/github.com/klauspost/compress/zstd/README.md
+++ b/vendor/github.com/klauspost/compress/zstd/README.md
@ -16,8 +16,7 @@ Currently the package is heavily optimized for 64 bit processors and will be sig

 Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.

-Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
-
+[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd)

 ## Compressor

@ -152,8 +151,8 @@ This package:
 file    out     level   insize      outsize     millis  mb/s
 silesia.tar zskp    1   211947520   73101992    643     313.87
 silesia.tar zskp    2   211947520   67504318    969     208.38
-silesia.tar zskp    3   211947520   65177448    1899    106.44
-silesia.tar zskp    4   211947520   61381950    8115    24.91
+silesia.tar zskp    3   211947520   64595893    2007    100.68
+silesia.tar zskp    4   211947520   60995370    7691    26.28

 cgo zstd:
 silesia.tar zstd    1   211947520   73605392    543     371.56
@ -171,8 +170,8 @@ https://files.klauspost.com/compress/gob-stream.7z
 file        out     level   insize  outsize     millis  mb/s
 gob-stream  zskp    1   1911399616  235022249   3088    590.30
 gob-stream  zskp    2   1911399616  205669791   3786    481.34
-gob-stream  zskp    3   1911399616  185792019   9324    195.48
-gob-stream  zskp    4   1911399616  171537212   32113   56.76
+gob-stream  zskp    3   1911399616  175034659   9636    189.17
+gob-stream  zskp    4   1911399616  167273881   29337   62.13
 gob-stream  zstd    1   1911399616  249810424   2637    691.26
 gob-stream  zstd    3   1911399616  208192146   3490    522.31
 gob-stream  zstd    6   1911399616  193632038   6687    272.56
@ -187,8 +186,8 @@ http://mattmahoney.net/dc/textdata.html
 file    out level   insize      outsize     millis  mb/s
 enwik9  zskp    1   1000000000  343848582   3609    264.18
 enwik9  zskp    2   1000000000  317276632   5746    165.97
-enwik9  zskp    3   1000000000  294540704   11725   81.34
-enwik9  zskp    4   1000000000  276609671   44029   21.66
+enwik9  zskp    3   1000000000  292243069   12162   78.41
+enwik9  zskp    4   1000000000  275241169   36430   26.18
 enwik9  zstd    1   1000000000  358072021   3110    306.65
 enwik9  zstd    3   1000000000  313734672   4784    199.35
 enwik9  zstd    6   1000000000  295138875   10290   92.68
@ -202,8 +201,8 @@ https://files.klauspost.com/compress/github-june-2days-2019.json.zst
 file                        out level   insize      outsize     millis  mb/s
 github-june-2days-2019.json zskp    1   6273951764  699045015   10620   563.40
 github-june-2days-2019.json zskp    2   6273951764  617881763   11687   511.96
-github-june-2days-2019.json zskp    3   6273951764  537511906   29252   204.54
-github-june-2days-2019.json zskp    4   6273951764  512796117   97791   61.18
+github-june-2days-2019.json zskp    3   6273951764  524340691   34043   175.75
+github-june-2days-2019.json zskp    4   6273951764  503314661   93811   63.78
 github-june-2days-2019.json zstd    1   6273951764  766284037   8450    708.00
 github-june-2days-2019.json zstd    3   6273951764  661889476   10927   547.57
 github-june-2days-2019.json zstd    6   6273951764  642756859   22996   260.18
@ -217,8 +216,8 @@ https://files.klauspost.com/compress/rawstudio-mint14.7z
 file                    out level   insize      outsize     millis  mb/s
 rawstudio-mint14.tar    zskp    1   8558382592  3667489370  20210   403.84
 rawstudio-mint14.tar    zskp    2   8558382592  3364592300  31873   256.07
-rawstudio-mint14.tar    zskp    3   8558382592  3224594213  71751   113.75
-rawstudio-mint14.tar    zskp    4   8558382592  3027332295  486243  16.79
+rawstudio-mint14.tar    zskp    3   8558382592  3158085214  77675   105.08
+rawstudio-mint14.tar    zskp    4   8558382592  3020370044  404956  20.16
 rawstudio-mint14.tar    zstd    1   8558382592  3609250104  17136   476.27
 rawstudio-mint14.tar    zstd    3   8558382592  3341679997  29262   278.92
 rawstudio-mint14.tar    zstd    6   8558382592  3235846406  77904   104.77
@ -232,8 +231,8 @@ https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
 file                    out level   insize      outsize     millis  mb/s
 nyc-taxi-data-10M.csv   zskp    1   3325605752  641339945   8925    355.35
 nyc-taxi-data-10M.csv   zskp    2   3325605752  591748091   11268   281.44
-nyc-taxi-data-10M.csv   zskp    3   3325605752  538490114   19880   159.53
-nyc-taxi-data-10M.csv   zskp    4   3325605752  495986829   89368   35.49
+nyc-taxi-data-10M.csv   zskp    3   3325605752  530289687   25239   125.66
+nyc-taxi-data-10M.csv   zskp    4   3325605752  490907191   65939   48.10
 nyc-taxi-data-10M.csv   zstd    1   3325605752  687399637   8233    385.18
 nyc-taxi-data-10M.csv   zstd    3   3325605752  598514411   10065   315.07
 nyc-taxi-data-10M.csv   zstd    6   3325605752  570522953   20038   158.27
@ -405,13 +404,28 @@ BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16        749938

 This reflects the performance around May 2020, but this may be out of date.

+## Zstd inside ZIP files
+
+It is possible to use zstandard to compress individual files inside zip archives.
+While this isn't widely supported it can be useful for internal files.
+
+To support the compression and decompression of these files you must register a compressor and decompressor.
+
+It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT
+use the global registration functions. The main reason for this is that 2 registrations from 
+different packages will result in a panic.
+
+It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip
+files concurrently, and using a single instance will allow reusing some resources.
+
+See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for 
+how to compress and decompress files inside zip archives.
+
 # Contributions

 Contributions are always welcome. 
 For new features/fixes, remember to add tests and for performance enhancements include benchmarks.

-For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files.
-
 For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan).

 This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@ -123,12 +123,10 @@ func newBlockDec(lowMem bool) *blockDec {
 // Input must be a start of a block and will be at the end of the block when returned.
 func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 	b.WindowSize = windowSize
-	tmp := br.readSmall(3)
-	if tmp == nil {
-		if debug {
-			println("Reading block header:", io.ErrUnexpectedEOF)
-		}
-		return io.ErrUnexpectedEOF
+	tmp, err := br.readSmall(3)
+	if err != nil {
+		println("Reading block header:", err)
+		return err
 	}
 	bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
 	b.Last = bh&1 != 0
@ -146,7 +144,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 		}
 		cSize = 1
 	case blockTypeCompressed:
-		if debug {
+		if debugDecoder {
 			println("Data size on stream:", cSize)
 		}
 		b.RLESize = 0
@ -155,7 +153,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 			maxSize = int(windowSize)
 		}
 		if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
-			if debug {
+			if debugDecoder {
 				printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
 			}
 			return ErrCompressedSizeTooBig
@ -179,10 +177,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
 	if cap(b.dst) <= maxSize {
 		b.dst = make([]byte, 0, maxSize+1)
 	}
-	var err error
 	b.data, err = br.readBig(cSize, b.dataStorage)
 	if err != nil {
-		if debug {
+		if debugDecoder {
 			println("Reading block:", err, "(", cSize, ")", len(b.data))
 			printf("%T", br)
 		}
@ -252,7 +249,7 @@ func (b *blockDec) startDecoder() {
 				b:   b.dst,
 				err: err,
 			}
-			if debug {
+			if debugDecoder {
 				println("Decompressed to", len(b.dst), "bytes, error:", err)
 			}
 			b.result <- o
@ -267,7 +264,7 @@ func (b *blockDec) startDecoder() {
 		default:
 			panic("Invalid block type")
 		}
-		if debug {
+		if debugDecoder {
 			println("blockDec: Finished block")
 		}
 	}
@ -300,7 +297,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
 		b.dst = hist.b
 		hist.b = nil
 		err := b.decodeCompressed(hist)
-		if debug {
+		if debugDecoder {
 			println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
 		}
 		hist.b = b.dst
@ -393,7 +390,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			in = in[5:]
 		}
 	}
-	if debug {
+	if debugDecoder {
 		println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
 	}
 	var literals []byte
@ -431,7 +428,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 			literals[i] = v
 		}
 		in = in[1:]
-		if debug {
+		if debugDecoder {
 			printf("Found %d RLE compressed literals\n", litRegenSize)
 		}
 	case literalsBlockTreeless:
@ -442,7 +439,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		// Store compressed literals, so we defer decoding until we get history.
 		literals = in[:litCompSize]
 		in = in[litCompSize:]
-		if debug {
+		if debugDecoder {
 			printf("Found %d compressed literals\n", litCompSize)
 		}
 	case literalsBlockCompressed:
@ -484,7 +481,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		if len(literals) != litRegenSize {
 			return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
 		}
-		if debug {
+		if debugDecoder {
 			printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
 		}
 	}
@ -535,12 +532,12 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 		br := byteReader{b: in, off: 0}
 		compMode := br.Uint8()
 		br.advance(1)
-		if debug {
+		if debugDecoder {
 			printf("Compression modes: 0b%b", compMode)
 		}
 		for i := uint(0); i < 3; i++ {
 			mode := seqCompMode((compMode >> (6 - i*2)) & 3)
-			if debug {
+			if debugDecoder {
 				println("Table", tableIndex(i), "is", mode)
 			}
 			var seq *sequenceDec
@ -571,7 +568,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 				}
 				dec.setRLE(symb)
 				seq.fse = dec
-				if debug {
+				if debugDecoder {
 					printf("RLE set to %+v, code: %v", symb, v)
 				}
 			case compModeFSE:
@ -587,7 +584,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 					println("Transform table error:", err)
 					return err
 				}
-				if debug {
+				if debugDecoder {
 					println("Read table ok", "symbolLen:", dec.symbolLen)
 				}
 				seq.fse = dec
@ -655,7 +652,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 	if huff != nil {
 		hist.huffTree = huff
 	}
-	if debug {
+	if debugDecoder {
 		println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
 	}

@ -672,7 +669,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 	if err != nil {
 		return err
 	}
-	if debug {
+	if debugDecoder {
 		println("History merged ok")
 	}
 	br := &bitReader{}
@ -731,7 +728,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
 	}
 	hist.append(b.dst)
 	hist.recentOffsets = seqs.prevOffset
-	if debug {
+	if debugDecoder {
 		println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
 	}

--- a/vendor/github.com/klauspost/compress/zstd/blockenc.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go
@ -156,7 +156,7 @@ func (h *literalsHeader) setSize(regenLen int) {
 	switch {
 	case inBits < 5:
 		lh |= (uint64(regenLen) << 3) | (1 << 60)
-		if debug {
+		if debugEncoder {
 			got := int(lh>>3) & 0xff
 			if got != regenLen {
 				panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
@ -184,7 +184,7 @@ func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
 			lh |= 1 << 2
 		}
 		lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
-		if debug {
+		if debugEncoder {
 			const mmask = (1 << 24) - 1
 			n := (lh >> 4) & mmask
 			if int(n&1023) != inLen {
@ -312,7 +312,7 @@ func (b *blockEnc) encodeRaw(a []byte) {
 	bh.setType(blockTypeRaw)
 	b.output = bh.appendTo(b.output[:0])
 	b.output = append(b.output, a...)
-	if debug {
+	if debugEncoder {
 		println("Adding RAW block, length", len(a), "last:", b.last)
 	}
 }
@ -325,7 +325,7 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
 	bh.setType(blockTypeRaw)
 	dst = bh.appendTo(dst)
 	dst = append(dst, src...)
-	if debug {
+	if debugEncoder {
 		println("Adding RAW block, length", len(src), "last:", b.last)
 	}
 	return dst
@ -339,7 +339,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {

 	// Don't compress extremely small blocks
 	if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
-		if debug {
+		if debugEncoder {
 			println("Adding RAW block, length", len(lits), "last:", b.last)
 		}
 		bh.setType(blockTypeRaw)
@ -371,7 +371,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {

 	switch err {
 	case huff0.ErrIncompressible:
-		if debug {
+		if debugEncoder {
 			println("Adding RAW block, length", len(lits), "last:", b.last)
 		}
 		bh.setType(blockTypeRaw)
@ -379,7 +379,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 		b.output = append(b.output, lits...)
 		return nil
 	case huff0.ErrUseRLE:
-		if debug {
+		if debugEncoder {
 			println("Adding RLE block, length", len(lits))
 		}
 		bh.setType(blockTypeRLE)
@ -396,12 +396,12 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
 	bh.setType(blockTypeCompressed)
 	var lh literalsHeader
 	if reUsed {
-		if debug {
+		if debugEncoder {
 			println("Reused tree, compressed to", len(out))
 		}
 		lh.setType(literalsBlockTreeless)
 	} else {
-		if debug {
+		if debugEncoder {
 			println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
 		}
 		lh.setType(literalsBlockCompressed)
@ -517,7 +517,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		lh.setSize(len(b.literals))
 		b.output = lh.appendTo(b.output)
 		b.output = append(b.output, b.literals...)
-		if debug {
+		if debugEncoder {
 			println("Adding literals RAW, length", len(b.literals))
 		}
 	case huff0.ErrUseRLE:
@ -525,22 +525,22 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		lh.setSize(len(b.literals))
 		b.output = lh.appendTo(b.output)
 		b.output = append(b.output, b.literals[0])
-		if debug {
+		if debugEncoder {
 			println("Adding literals RLE")
 		}
 	case nil:
 		// Compressed litLen...
 		if reUsed {
-			if debug {
+			if debugEncoder {
 				println("reused tree")
 			}
 			lh.setType(literalsBlockTreeless)
 		} else {
-			if debug {
+			if debugEncoder {
 				println("new tree, size:", len(b.litEnc.OutTable))
 			}
 			lh.setType(literalsBlockCompressed)
-			if debug {
+			if debugEncoder {
 				_, _, err := huff0.ReadTable(out, nil)
 				if err != nil {
 					panic(err)
@ -548,18 +548,18 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 			}
 		}
 		lh.setSizes(len(out), len(b.literals), single)
-		if debug {
+		if debugEncoder {
 			printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
 			println("Adding literal header:", lh)
 		}
 		b.output = lh.appendTo(b.output)
 		b.output = append(b.output, out...)
 		b.litEnc.Reuse = huff0.ReusePolicyAllow
-		if debug {
+		if debugEncoder {
 			println("Adding literals compressed")
 		}
 	default:
-		if debug {
+		if debugEncoder {
 			println("Adding literals ERROR:", err)
 		}
 		return err
@ -577,7 +577,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		n := len(b.sequences) - 0x7f00
 		b.output = append(b.output, 255, uint8(n), uint8(n>>8))
 	}
-	if debug {
+	if debugEncoder {
 		println("Encoding", len(b.sequences), "sequences")
 	}
 	b.genCodes()
@ -611,17 +611,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		nSize = nSize + (nSize+2*8*16)>>4
 		switch {
 		case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
-			if debug {
+			if debugEncoder {
 				println("Using predefined", predefSize>>3, "<=", nSize>>3)
 			}
 			return preDef, compModePredefined
 		case prevSize <= nSize:
-			if debug {
+			if debugEncoder {
 				println("Using previous", prevSize>>3, "<=", nSize>>3)
 			}
 			return prev, compModeRepeat
 		default:
-			if debug {
+			if debugEncoder {
 				println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
 				println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
 			}
@ -634,7 +634,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if llEnc.useRLE {
 		mode |= uint8(compModeRLE) << 6
 		llEnc.setRLE(b.sequences[0].llCode)
-		if debug {
+		if debugEncoder {
 			println("llEnc.useRLE")
 		}
 	} else {
@ -645,7 +645,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if ofEnc.useRLE {
 		mode |= uint8(compModeRLE) << 4
 		ofEnc.setRLE(b.sequences[0].ofCode)
-		if debug {
+		if debugEncoder {
 			println("ofEnc.useRLE")
 		}
 	} else {
@ -657,7 +657,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 	if mlEnc.useRLE {
 		mode |= uint8(compModeRLE) << 2
 		mlEnc.setRLE(b.sequences[0].mlCode)
-		if debug {
+		if debugEncoder {
 			println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
 		}
 	} else {
@ -666,7 +666,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
 		mode |= uint8(m) << 2
 	}
 	b.output = append(b.output, mode)
-	if debug {
+	if debugEncoder {
 		printf("Compression modes: 0b%b", mode)
 	}
 	b.output, err = llEnc.writeCount(b.output)
@ -786,7 +786,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {

 	// Size is output minus block header.
 	bh.setSize(uint32(len(b.output)-bhOffset) - 3)
-	if debug {
+	if debugEncoder {
 		println("Rewriting block header", bh)
 	}
 	_ = bh.appendTo(b.output[bhOffset:bhOffset])
--- a/vendor/github.com/klauspost/compress/zstd/bytebuf.go
+++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go
@ -12,8 +12,8 @@ import (

 type byteBuffer interface {
 	// Read up to 8 bytes.
-	// Returns nil if no more input is available.
-	readSmall(n int) []byte
+	// Returns io.ErrUnexpectedEOF if this cannot be satisfied.
+	readSmall(n int) ([]byte, error)

 	// Read >8 bytes.
 	// MAY use the destination slice.
@ -29,17 +29,17 @@ type byteBuffer interface {
 // in-memory buffer
 type byteBuf []byte

-func (b *byteBuf) readSmall(n int) []byte {
+func (b *byteBuf) readSmall(n int) ([]byte, error) {
 	if debugAsserts && n > 8 {
 		panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
 	}
 	bb := *b
 	if len(bb) < n {
-		return nil
+		return nil, io.ErrUnexpectedEOF
 	}
 	r := bb[:n]
 	*b = bb[n:]
-	return r
+	return r, nil
 }

 func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
@ -81,19 +81,22 @@ type readerWrapper struct {
 	tmp [8]byte
 }

-func (r *readerWrapper) readSmall(n int) []byte {
+func (r *readerWrapper) readSmall(n int) ([]byte, error) {
 	if debugAsserts && n > 8 {
 		panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
 	}
 	n2, err := io.ReadFull(r.r, r.tmp[:n])
 	// We only really care about the actual bytes read.
-	if n2 != n {
-		if debug {
+	if err != nil {
+		if err == io.EOF {
+			return nil, io.ErrUnexpectedEOF
+		}
+		if debugDecoder {
 			println("readSmall: got", n2, "want", n, "err", err)
 		}
-		return nil
+		return nil, err
 	}
-	return r.tmp[:n]
+	return r.tmp[:n], nil
 }

 func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {
--- a/vendor/github.com/klauspost/compress/zstd/decoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/decoder.go
@ -113,9 +113,6 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
 // Returns the number of bytes written and any error that occurred.
 // When the stream is done, io.EOF will be returned.
 func (d *Decoder) Read(p []byte) (int, error) {
-	if d.stream == nil {
-		return 0, ErrDecoderNilInput
-	}
 	var n int
 	for {
 		if len(d.current.b) > 0 {
@ -138,7 +135,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
 		}
 	}
 	if len(d.current.b) > 0 {
-		if debug {
+		if debugDecoder {
 			println("returning", n, "still bytes left:", len(d.current.b))
 		}
 		// Only return error at end of block
@ -147,7 +144,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
 	if d.current.err != nil {
 		d.drainOutput()
 	}
-	if debug {
+	if debugDecoder {
 		println("returning", n, d.current.err, len(d.decoders))
 	}
 	return n, d.current.err
@ -167,20 +164,17 @@ func (d *Decoder) Reset(r io.Reader) error {

 	if r == nil {
 		d.current.err = ErrDecoderNilInput
+		if len(d.current.b) > 0 {
+			d.current.b = d.current.b[:0]
+		}
 		d.current.flushed = true
 		return nil
 	}

-	if d.stream == nil {
-		d.stream = make(chan decodeStream, 1)
-		d.streamWg.Add(1)
-		go d.startStreamDecoder(d.stream)
-	}
-
-	// If bytes buffer and < 1MB, do sync decoding anyway.
-	if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
+	// If bytes buffer and < 5MB, do sync decoding anyway.
+	if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
 		bb2 := bb
-		if debug {
+		if debugDecoder {
 			println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
 		}
 		b := bb2.Bytes()
@ -196,12 +190,18 @@ func (d *Decoder) Reset(r io.Reader) error {
 		d.current.b = dst
 		d.current.err = err
 		d.current.flushed = true
-		if debug {
+		if debugDecoder {
 			println("sync decode to", len(dst), "bytes, err:", err)
 		}
 		return nil
 	}

+	if d.stream == nil {
+		d.stream = make(chan decodeStream, 1)
+		d.streamWg.Add(1)
+		go d.startStreamDecoder(d.stream)
+	}
+
 	// Remove current block.
 	d.current.decodeOutput = decodeOutput{}
 	d.current.err = nil
@ -225,7 +225,7 @@ func (d *Decoder) drainOutput() {
 		d.current.cancel = nil
 	}
 	if d.current.d != nil {
-		if debug {
+		if debugDecoder {
 			printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders))
 		}
 		d.decoders <- d.current.d
@ -238,7 +238,7 @@ func (d *Decoder) drainOutput() {
 	}
 	for v := range d.current.output {
 		if v.d != nil {
-			if debug {
+			if debugDecoder {
 				printf("re-adding decoder %p", v.d)
 			}
 			d.decoders <- v.d
@ -255,9 +255,6 @@ func (d *Decoder) drainOutput() {
 // The return value n is the number of bytes written.
 // Any error encountered during the write is also returned.
 func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
-	if d.stream == nil {
-		return 0, ErrDecoderNilInput
-	}
 	var n int64
 	for {
 		if len(d.current.b) > 0 {
@ -297,7 +294,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 	block := <-d.decoders
 	frame := block.localFrame
 	defer func() {
-		if debug {
+		if debugDecoder {
 			printf("re-adding decoder: %p", block)
 		}
 		frame.rawInput = nil
@ -310,7 +307,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 		frame.history.reset()
 		err := frame.reset(&frame.bBuf)
 		if err == io.EOF {
-			if debug {
+			if debugDecoder {
 				println("frame reset return EOF")
 			}
 			return dst, nil
@ -355,7 +352,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 			return dst, err
 		}
 		if len(frame.bBuf) == 0 {
-			if debug {
+			if debugDecoder {
 				println("frame dbuf empty")
 			}
 			break
@ -371,7 +368,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
 // if no data was available without blocking.
 func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 	if d.current.d != nil {
-		if debug {
+		if debugDecoder {
 			printf("re-adding current decoder %p", d.current.d)
 		}
 		d.decoders <- d.current.d
@ -391,7 +388,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
 			return false
 		}
 	}
-	if debug {
+	if debugDecoder {
 		println("got", len(d.current.b), "bytes, error:", d.current.err)
 	}
 	return true
@ -485,7 +482,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
 	defer d.streamWg.Done()
 	frame := newFrameDec(d.o)
 	for stream := range inStream {
-		if debug {
+		if debugDecoder {
 			println("got new stream")
 		}
 		br := readerWrapper{r: stream.r}
@ -493,7 +490,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
 		for {
 			frame.history.reset()
 			err := frame.reset(&br)
-			if debug && err != nil {
+			if debugDecoder && err != nil {
 				println("Frame decoder returned", err)
 			}
 			if err == nil && frame.DictionaryID != nil {
@ -510,7 +507,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
 				}
 				break
 			}
-			if debug {
+			if debugDecoder {
 				println("starting frame decoder")
 			}

--- a/vendor/github.com/klauspost/compress/zstd/dict.go
+++ b/vendor/github.com/klauspost/compress/zstd/dict.go
@ -82,7 +82,7 @@ func loadDict(b []byte) (*dict, error) {
 			println("Transform table error:", err)
 			return err
 		}
-		if debug {
+		if debugDecoder || debugEncoder {
 			println("Read table ok", "symbolLen:", dec.symbolLen)
 		}
 		// Set decoders as predefined so they aren't reused.
--- a/vendor/github.com/klauspost/compress/zstd/enc_best.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_best.go
@ -132,7 +132,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
 	}
 	_ = addLiterals

-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -220,6 +220,20 @@ encodeLoop:
 			best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
 			best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
 			best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
+
+			// See if we can find a better match by checking where the current best ends.
+			// Use that offset to see if we can find a better full match.
+			if sAt := best.s + best.length; sAt < sLimit {
+				nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
+				candidateEnd := e.longTable[nextHashL]
+				if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
+					bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
+					if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
+						bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
+					}
+					best = bestEnd
+				}
+			}
 		}

 		// We have a match, we can store the forward value
@ -260,7 +274,7 @@ encodeLoop:

 			nextEmit = s
 			if s >= sLimit {
-				if debug {
+				if debugEncoder {
 					println("repeat ended", s, best.length)

 				}
@ -398,7 +412,7 @@ encodeLoop:
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
 	blk.recentOffsets[2] = uint32(offset3)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 }
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@ -138,7 +138,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -204,7 +204,7 @@ encodeLoop:

 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, lenght)

 						}
@ -264,7 +264,7 @@ encodeLoop:
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, lenght)

 						}
@ -412,8 +412,41 @@ encodeLoop:
 			cv = load6432(src, s)
 		}

-		// A 4-byte match has been found. Update recent offsets.
-		// We'll later see if more than 4 bytes.
+		// Try to find a better match by searching for a long match at the end of the current best match
+		if true && s+matched < sLimit {
+			nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+			cv := load3232(src, s)
+			candidateL := e.longTable[nextHashL]
+			coffsetL := candidateL.offset - e.cur - matched
+			if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+				// Found a long match, at least 4 bytes.
+				matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+				if matchedNext > matched {
+					t = coffsetL
+					matched = matchedNext
+					if debugMatches {
+						println("long match at end-of-match")
+					}
+				}
+			}
+
+			// Check prev long...
+			if true {
+				coffsetL = candidateL.prev - e.cur - matched
+				if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+					// Found a long match, at least 4 bytes.
+					matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+					if matchedNext > matched {
+						t = coffsetL
+						matched = matchedNext
+						if debugMatches {
+							println("prev long match at end-of-match")
+						}
+					}
+				}
+			}
+		}
+		// A match has been found. Update recent offsets.
 		offset2 = offset1
 		offset1 = s - t

@ -520,7 +553,7 @@ encodeLoop:
 	}
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 }
@ -623,7 +656,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -691,7 +724,7 @@ encodeLoop:

 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, lenght)

 						}
@ -754,7 +787,7 @@ encodeLoop:
 					s += lenght + repOff2
 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, lenght)

 						}
@ -905,9 +938,41 @@ encodeLoop:
 			}
 			cv = load6432(src, s)
 		}
+		// Try to find a better match by searching for a long match at the end of the current best match
+		if s+matched < sLimit {
+			nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
+			cv := load3232(src, s)
+			candidateL := e.longTable[nextHashL]
+			coffsetL := candidateL.offset - e.cur - matched
+			if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+				// Found a long match, at least 4 bytes.
+				matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+				if matchedNext > matched {
+					t = coffsetL
+					matched = matchedNext
+					if debugMatches {
+						println("long match at end-of-match")
+					}
+				}
+			}

-		// A 4-byte match has been found. Update recent offsets.
-		// We'll later see if more than 4 bytes.
+			// Check prev long...
+			if true {
+				coffsetL = candidateL.prev - e.cur - matched
+				if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
+					// Found a long match, at least 4 bytes.
+					matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
+					if matchedNext > matched {
+						t = coffsetL
+						matched = matchedNext
+						if debugMatches {
+							println("prev long match at end-of-match")
+						}
+					}
+				}
+			}
+		}
+		// A match has been found. Update recent offsets.
 		offset2 = offset1
 		offset1 = s - t

@ -1019,7 +1084,7 @@ encodeLoop:
 	}
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 }
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@ -109,7 +109,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -170,7 +170,7 @@ encodeLoop:
 					s += lenght + repOff
 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, lenght)

 						}
@ -368,7 +368,7 @@ encodeLoop:
 	}
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 }
@ -427,7 +427,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -483,7 +483,7 @@ encodeLoop:
 					s += length + repOff
 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, length)

 						}
@ -677,7 +677,7 @@ encodeLoop:
 		blk.literals = append(blk.literals, src[nextEmit:]...)
 		blk.extraLits = len(src) - int(nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}

@ -767,7 +767,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -830,7 +830,7 @@ encodeLoop:
 					s += lenght + repOff
 					nextEmit = s
 					if s >= sLimit {
-						if debug {
+						if debugEncoder {
 							println("repeat ended", s, lenght)

 						}
@ -1039,7 +1039,7 @@ encodeLoop:
 	}
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 	// If we encoded more than 64K mark all dirty.
--- a/vendor/github.com/klauspost/compress/zstd/enc_fast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go
@ -103,7 +103,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -178,7 +178,7 @@ encodeLoop:
 				s += length + 2
 				nextEmit = s
 				if s >= sLimit {
-					if debug {
+					if debugEncoder {
 						println("repeat ended", s, length)

 					}
@ -330,7 +330,7 @@ encodeLoop:
 	}
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 }
@ -343,7 +343,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 		inputMargin            = 8
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
 	)
-	if debug {
+	if debugEncoder {
 		if len(src) > maxBlockSize {
 			panic("src too big")
 		}
@ -391,7 +391,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -462,7 +462,7 @@ encodeLoop:
 				s += length + 2
 				nextEmit = s
 				if s >= sLimit {
-					if debug {
+					if debugEncoder {
 						println("repeat ended", s, length)

 					}
@ -616,7 +616,7 @@ encodeLoop:
 		blk.literals = append(blk.literals, src[nextEmit:]...)
 		blk.extraLits = len(src) - int(nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 	// We do not store history, so we must offset e.cur to avoid false matches for next user.
@ -696,7 +696,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
 		blk.literals = append(blk.literals, src[nextEmit:until]...)
 		s.litLen = uint32(until - nextEmit)
 	}
-	if debug {
+	if debugEncoder {
 		println("recent offsets:", blk.recentOffsets)
 	}

@ -773,7 +773,7 @@ encodeLoop:
 				s += length + 2
 				nextEmit = s
 				if s >= sLimit {
-					if debug {
+					if debugEncoder {
 						println("repeat ended", s, length)

 					}
@ -926,7 +926,7 @@ encodeLoop:
 	}
 	blk.recentOffsets[0] = uint32(offset1)
 	blk.recentOffsets[1] = uint32(offset2)
-	if debug {
+	if debugEncoder {
 		println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
 	}
 }
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@ -245,7 +245,7 @@ func (e *Encoder) nextBlock(final bool) error {
 	s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
 	s.wg.Add(1)
 	go func(src []byte) {
-		if debug {
+		if debugEncoder {
 			println("Adding block,", len(src), "bytes, final:", final)
 		}
 		defer func() {
@ -290,7 +290,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			}
 			switch err {
 			case errIncompressible:
-				if debug {
+				if debugEncoder {
 					println("Storing incompressible block as raw")
 				}
 				blk.encodeRaw(src)
@ -313,7 +313,7 @@ func (e *Encoder) nextBlock(final bool) error {
 //
 // The Copy function uses ReaderFrom if available.
 func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
-	if debug {
+	if debugEncoder {
 		println("Using ReadFrom")
 	}

@ -336,20 +336,20 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
 		switch err {
 		case io.EOF:
 			e.state.filling = e.state.filling[:len(e.state.filling)-len(src)]
-			if debug {
+			if debugEncoder {
 				println("ReadFrom: got EOF final block:", len(e.state.filling))
 			}
 			return n, nil
 		case nil:
 		default:
-			if debug {
+			if debugEncoder {
 				println("ReadFrom: got error:", err)
 			}
 			e.state.err = err
 			return n, err
 		}
 		if len(src) > 0 {
-			if debug {
+			if debugEncoder {
 				println("ReadFrom: got space left in source:", len(src))
 			}
 			continue
@ -512,7 +512,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {

 		switch err {
 		case errIncompressible:
-			if debug {
+			if debugEncoder {
 				println("Storing incompressible block as raw")
 			}
 			dst = blk.encodeRawTo(dst, src)
@ -548,7 +548,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {

 			switch err {
 			case errIncompressible:
-				if debug {
+				if debugEncoder {
 					println("Storing incompressible block as raw")
 				}
 				dst = blk.encodeRawTo(dst, todo)
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@ -78,44 +78,68 @@ func newFrameDec(o decoderOptions) *frameDec {
 func (d *frameDec) reset(br byteBuffer) error {
 	d.HasCheckSum = false
 	d.WindowSize = 0
-	var b []byte
+	var signature [4]byte
 	for {
-		b = br.readSmall(4)
-		if b == nil {
+		var err error
+		// Check if we can read more...
+		b, err := br.readSmall(1)
+		switch err {
+		case io.EOF, io.ErrUnexpectedEOF:
 			return io.EOF
+		default:
+			return err
+		case nil:
+			signature[0] = b[0]
 		}
-		if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
-			if debug {
-				println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic))
+		// Read the rest, don't allow io.ErrUnexpectedEOF
+		b, err = br.readSmall(3)
+		switch err {
+		case io.EOF:
+			return io.EOF
+		default:
+			return err
+		case nil:
+			copy(signature[1:], b)
+		}
+
+		if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
+			if debugDecoder {
+				println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
 			}
 			// Break if not skippable frame.
 			break
 		}
 		// Read size to skip
-		b = br.readSmall(4)
-		if b == nil {
-			println("Reading Frame Size EOF")
-			return io.ErrUnexpectedEOF
+		b, err = br.readSmall(4)
+		if err != nil {
+			if debugDecoder {
+				println("Reading Frame Size", err)
+			}
+			return err
 		}
 		n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
 		println("Skipping frame with", n, "bytes.")
-		err := br.skipN(int(n))
+		err = br.skipN(int(n))
 		if err != nil {
-			if debug {
+			if debugDecoder {
 				println("Reading discarded frame", err)
 			}
 			return err
 		}
 	}
-	if !bytes.Equal(b, frameMagic) {
-		println("Got magic numbers: ", b, "want:", frameMagic)
+	if !bytes.Equal(signature[:], frameMagic) {
+		if debugDecoder {
+			println("Got magic numbers: ", signature, "want:", frameMagic)
+		}
 		return ErrMagicMismatch
 	}

 	// Read Frame_Header_Descriptor
 	fhd, err := br.readByte()
 	if err != nil {
-		println("Reading Frame_Header_Descriptor", err)
+		if debugDecoder {
+			println("Reading Frame_Header_Descriptor", err)
+		}
 		return err
 	}
 	d.SingleSegment = fhd&(1<<5) != 0
@ -130,7 +154,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 	if !d.SingleSegment {
 		wd, err := br.readByte()
 		if err != nil {
-			println("Reading Window_Descriptor", err)
+			if debugDecoder {
+				println("Reading Window_Descriptor", err)
+			}
 			return err
 		}
 		printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
@ -147,12 +173,11 @@ func (d *frameDec) reset(br byteBuffer) error {
 		if size == 3 {
 			size = 4
 		}
-		b = br.readSmall(int(size))
-		if b == nil {
-			if debug {
-				println("Reading Dictionary_ID", io.ErrUnexpectedEOF)
-			}
-			return io.ErrUnexpectedEOF
+
+		b, err := br.readSmall(int(size))
+		if err != nil {
+			println("Reading Dictionary_ID", err)
+			return err
 		}
 		var id uint32
 		switch size {
@ -163,7 +188,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 		case 4:
 			id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
 		}
-		if debug {
+		if debugDecoder {
 			println("Dict size", size, "ID:", id)
 		}
 		if id > 0 {
@ -187,10 +212,10 @@ func (d *frameDec) reset(br byteBuffer) error {
 	}
 	d.FrameContentSize = 0
 	if fcsSize > 0 {
-		b := br.readSmall(fcsSize)
-		if b == nil {
-			println("Reading Frame content", io.ErrUnexpectedEOF)
-			return io.ErrUnexpectedEOF
+		b, err := br.readSmall(fcsSize)
+		if err != nil {
+			println("Reading Frame content", err)
+			return err
 		}
 		switch fcsSize {
 		case 1:
@ -205,7 +230,7 @@ func (d *frameDec) reset(br byteBuffer) error {
 			d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
 			d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
 		}
-		if debug {
+		if debugDecoder {
 			println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
 		}
 	}
@ -248,7 +273,7 @@ func (d *frameDec) reset(br byteBuffer) error {

 // next will start decoding the next block from stream.
 func (d *frameDec) next(block *blockDec) error {
-	if debug {
+	if debugDecoder {
 		printf("decoding new block %p:%p", block, block.data)
 	}
 	err := block.reset(d.rawInput, d.WindowSize)
@ -259,7 +284,7 @@ func (d *frameDec) next(block *blockDec) error {
 		return err
 	}
 	block.input <- struct{}{}
-	if debug {
+	if debugDecoder {
 		println("next block:", block)
 	}
 	d.asyncRunningMu.Lock()
@ -307,19 +332,19 @@ func (d *frameDec) checkCRC() error {
 	tmp[3] = byte(got >> 24)

 	// We can overwrite upper tmp now
-	want := d.rawInput.readSmall(4)
-	if want == nil {
-		println("CRC missing?")
-		return io.ErrUnexpectedEOF
+	want, err := d.rawInput.readSmall(4)
+	if err != nil {
+		println("CRC missing?", err)
+		return err
 	}

 	if !bytes.Equal(tmp[:], want) {
-		if debug {
+		if debugDecoder {
 			println("CRC Check Failed:", tmp[:], "!=", want)
 		}
 		return ErrCRCMismatch
 	}
-	if debug {
+	if debugDecoder {
 		println("CRC ok", tmp[:])
 	}
 	return nil
@ -340,7 +365,7 @@ func (d *frameDec) initAsync() {
 	if cap(d.decoding) < d.o.concurrent {
 		d.decoding = make(chan *blockDec, d.o.concurrent)
 	}
-	if debug {
+	if debugDecoder {
 		h := d.history
 		printf("history init. len: %d, cap: %d", len(h.b), cap(h.b))
 	}
@ -388,7 +413,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
 			output <- r
 			return
 		}
-		if debug {
+		if debugDecoder {
 			println("got result, from ", d.offset, "to", d.offset+int64(len(r.b)))
 			d.offset += int64(len(r.b))
 		}
@ -396,7 +421,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
 			// Send history to next block
 			select {
 			case next = <-d.decoding:
-				if debug {
+				if debugDecoder {
 					println("Sending ", len(d.history.b), "bytes as history")
 				}
 				next.history <- &d.history
@ -434,7 +459,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
 		output <- r
 		if next == nil {
 			// There was no decoder available, we wait for one now that we have sent to the writer.
-			if debug {
+			if debugDecoder {
 				println("Sending ", len(d.history.b), " bytes as history")
 			}
 			next = <-d.decoding
@ -458,7 +483,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
 		if err != nil {
 			break
 		}
-		if debug {
+		if debugDecoder {
 			println("next block:", dec)
 		}
 		err = dec.decodeBuf(&d.history)
--- a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go
@ -229,7 +229,7 @@ func (s *fseEncoder) setRLE(val byte) {
 		deltaFindState: 0,
 		deltaNbBits:    0,
 	}
-	if debug {
+	if debugEncoder {
 		println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
 	}
 	s.rleVal = val
--- a/vendor/github.com/klauspost/compress/zstd/snappy.go
+++ b/vendor/github.com/klauspost/compress/zstd/snappy.go
@ -203,7 +203,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 			written += int64(n)
 			continue
 		case chunkTypeUncompressedData:
-			if debug {
+			if debugEncoder {
 				println("Uncompressed, chunklen", chunkLen)
 			}
 			// Section 4.3. Uncompressed data (chunk type 0x01).
@ -246,7 +246,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
 			continue

 		case chunkTypeStreamIdentifier:
-			if debug {
+			if debugEncoder {
 				println("stream id", chunkLen, len(snappyMagicBody))
 			}
 			// Section 4.1. Stream identifier (chunk type 0xff).
--- a/vendor/github.com/klauspost/compress/zstd/zip.go
+++ b/vendor/github.com/klauspost/compress/zstd/zip.go
@ -0,0 +1,121 @@
+// Copyright 2019+ Klaus Post. All rights reserved.
+// License information can be found in the LICENSE file.
+
+package zstd
+
+import (
+	"errors"
+	"io"
+	"sync"
+)
+
+// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip.
+// See https://www.winzip.com/win/en/comp_info.html
+const ZipMethodWinZip = 93
+
+// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression.
+// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression.
+// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
+const ZipMethodPKWare = 20
+
+var zipReaderPool sync.Pool
+
+// newZipReader cannot be used since we would leak goroutines...
+func newZipReader(r io.Reader) io.ReadCloser {
+	dec, ok := zipReaderPool.Get().(*Decoder)
+	if ok {
+		dec.Reset(r)
+	} else {
+		d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
+		if err != nil {
+			panic(err)
+		}
+		dec = d
+	}
+	return &pooledZipReader{dec: dec}
+}
+
+type pooledZipReader struct {
+	mu  sync.Mutex // guards Close and Read
+	dec *Decoder
+}
+
+func (r *pooledZipReader) Read(p []byte) (n int, err error) {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	if r.dec == nil {
+		return 0, errors.New("Read after Close")
+	}
+	dec, err := r.dec.Read(p)
+
+	return dec, err
+}
+
+func (r *pooledZipReader) Close() error {
+	r.mu.Lock()
+	defer r.mu.Unlock()
+	var err error
+	if r.dec != nil {
+		err = r.dec.Reset(nil)
+		zipReaderPool.Put(r.dec)
+		r.dec = nil
+	}
+	return err
+}
+
+type pooledZipWriter struct {
+	mu  sync.Mutex // guards Close and Read
+	enc *Encoder
+}
+
+func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	if w.enc == nil {
+		return 0, errors.New("Write after Close")
+	}
+	return w.enc.Write(p)
+}
+
+func (w *pooledZipWriter) Close() error {
+	w.mu.Lock()
+	defer w.mu.Unlock()
+	var err error
+	if w.enc != nil {
+		err = w.enc.Close()
+		zipReaderPool.Put(w.enc)
+		w.enc = nil
+	}
+	return err
+}
+
+// ZipCompressor returns a compressor that can be registered with zip libraries.
+// The provided encoder options will be used on all encodes.
+func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
+	var pool sync.Pool
+	return func(w io.Writer) (io.WriteCloser, error) {
+		enc, ok := pool.Get().(*Encoder)
+		if ok {
+			enc.Reset(w)
+		} else {
+			var err error
+			enc, err = NewWriter(w, opts...)
+			if err != nil {
+				return nil, err
+			}
+		}
+		return &pooledZipWriter{enc: enc}, nil
+	}
+}
+
+// ZipDecompressor returns a decompressor that can be registered with zip libraries.
+// See ZipCompressor for example.
+func ZipDecompressor() func(r io.Reader) io.ReadCloser {
+	return func(r io.Reader) io.ReadCloser {
+		d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
+		if err != nil {
+			panic(err)
+		}
+		return d.IOReadCloser()
+	}
+}
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@ -15,6 +15,12 @@ import (
 // enable debug printing
 const debug = false

+// enable encoding debug printing
+const debugEncoder = debug
+
+// enable decoding debug printing
+const debugDecoder = debug
+
 // Enable extra assertions.
 const debugAsserts = debug || false

@ -82,13 +88,13 @@ var (
 )

 func println(a ...interface{}) {
-	if debug {
+	if debug || debugDecoder || debugEncoder {
 		log.Println(a...)
 	}
 }

 func printf(format string, a ...interface{}) {
-	if debug {
+	if debug || debugDecoder || debugEncoder {
 		log.Printf(format, a...)
 	}
 }
--- a/vendor/github.com/klauspost/cpuid/.gitignore
+++ b/vendor/github.com/klauspost/cpuid/.gitignore
@ -1,24 +0,0 @@
-# Compiled Object files, Static and Dynamic libs (Shared Objects)
-*.o
-*.a
-*.so
-
-# Folders
-_obj
-_test
-
-# Architecture specific extensions/prefixes
-*.[568vq]
-[568vq].out
-
-*.cgo1.go
-*.cgo2.c
-_cgo_defun.c
-_cgo_gotypes.go
-_cgo_export.*
-
-_testmain.go
-
-*.exe
-*.test
-*.prof
--- a/vendor/github.com/klauspost/cpuid/.travis.yml
+++ b/vendor/github.com/klauspost/cpuid/.travis.yml
@ -1,46 +0,0 @@
-language: go
-
-os:
-  - linux
-  - osx
-  - windows
-
-arch:
-  - amd64
-  - arm64
-
-go:
-  - 1.12.x
-  - 1.13.x
-  - 1.14.x
-  - master
-
-script:
-  - go vet ./...
-  - go test -race ./...
-  - go test -tags=noasm ./...
-
-stages:
-  - gofmt
-  - test
-
-matrix:
-  allow_failures:
-    - go: 'master'
-  fast_finish: true
-  include:
-    - stage: gofmt
-      go: 1.14.x
-      os: linux
-      arch: amd64
-      script:
-        - diff <(gofmt -d .) <(printf "")
-        - diff <(gofmt -d ./private) <(printf "")
-        - go install github.com/klauspost/asmfmt/cmd/asmfmt
-        - diff <(asmfmt -d .) <(printf "")
-    - stage: i386
-      go: 1.14.x
-      os: linux
-      arch: amd64
-      script:
-        - GOOS=linux GOARCH=386 go test .
--- a/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
+++ b/vendor/github.com/klauspost/cpuid/CONTRIBUTING.txt
@ -1,35 +0,0 @@
-Developer Certificate of Origin
-Version 1.1
-
-Copyright (C) 2015- Klaus Post & Contributors.
-Email: klauspost@gmail.com
-
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
-
-
-Developer's Certificate of Origin 1.1
-
-By making a contribution to this project, I certify that:
-
-(a) The contribution was created in whole or in part by me and I
-    have the right to submit it under the open source license
-    indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
-    of my knowledge, is covered under an appropriate open source
-    license and I have the right under that license to submit that
-    work with modifications, whether created in whole or in part
-    by me, under the same open source license (unless I am
-    permitted to submit under a different license), as indicated
-    in the file; or
-
-(c) The contribution was provided directly to me by some other
-    person who certified (a), (b) or (c) and I have not modified
-    it.
-
-(d) I understand and agree that this project and the contribution
-    are public and that a record of the contribution (including all
-    personal information I submit with it, including my sign-off) is
-    maintained indefinitely and may be redistributed consistent with
-    this project or the open source license(s) involved.
--- a/vendor/github.com/klauspost/cpuid/LICENSE
+++ b/vendor/github.com/klauspost/cpuid/LICENSE
@ -1,22 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Klaus Post
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
--- a/vendor/github.com/klauspost/cpuid/README.md
+++ b/vendor/github.com/klauspost/cpuid/README.md
@ -1,191 +0,0 @@
-# cpuid
-Package cpuid provides information about the CPU running the current program.
-
-CPU features are detected on startup, and kept for fast access through the life of the application.
-Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
-
-You can access the CPU information by accessing the shared CPU variable of the cpuid library.
-
-Package home: https://github.com/klauspost/cpuid
-
-[![GoDoc][1]][2] [![Build Status][3]][4]
-
-[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
-[2]: https://godoc.org/github.com/klauspost/cpuid
-[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
-[4]: https://travis-ci.org/klauspost/cpuid
-
-# features
-
-## x86 CPU Instructions
-*  **CMOV** (i686 CMOV)
-*  **NX** (NX (No-Execute) bit)
-*  **AMD3DNOW** (AMD 3DNOW)
-*  **AMD3DNOWEXT** (AMD 3DNowExt)
-*  **MMX** (standard MMX)
-*  **MMXEXT** (SSE integer functions or AMD MMX ext)
-*  **SSE** (SSE functions)
-*  **SSE2** (P4 SSE functions)
-*  **SSE3** (Prescott SSE3 functions)
-*  **SSSE3** (Conroe SSSE3 functions)
-*  **SSE4** (Penryn SSE4.1 functions)
-*  **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
-*  **SSE42** (Nehalem SSE4.2 functions)
-*  **AVX** (AVX functions)
-*  **AVX2** (AVX2 functions)
-*  **FMA3** (Intel FMA 3)
-*  **FMA4** (Bulldozer FMA4 functions)
-*  **XOP** (Bulldozer XOP functions)
-*  **F16C** (Half-precision floating-point conversion)
-*  **BMI1** (Bit Manipulation Instruction Set 1)
-*  **BMI2** (Bit Manipulation Instruction Set 2)
-*  **TBM** (AMD Trailing Bit Manipulation)
-*  **LZCNT** (LZCNT instruction)
-*  **POPCNT** (POPCNT instruction)
-*  **AESNI** (Advanced Encryption Standard New Instructions)
-*  **CLMUL** (Carry-less Multiplication)
-*  **HTT** (Hyperthreading (enabled))
-*  **HLE** (Hardware Lock Elision)
-*  **RTM** (Restricted Transactional Memory)
-*  **RDRAND** (RDRAND instruction is available)
-*  **RDSEED** (RDSEED instruction is available)
-*  **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
-*  **SHA** (Intel SHA Extensions)
-*  **AVX512F** (AVX-512 Foundation)
-*  **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
-*  **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
-*  **AVX512PF** (AVX-512 Prefetch Instructions)
-*  **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
-*  **AVX512CD** (AVX-512 Conflict Detection Instructions)
-*  **AVX512BW** (AVX-512 Byte and Word Instructions)
-*  **AVX512VL** (AVX-512 Vector Length Extensions)
-*  **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
-*  **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
-*  **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
-*  **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
-*  **GFNI** (Galois Field New Instructions)
-*  **VAES** (Vector AES)
-*  **AVX512BITALG** (AVX-512 Bit Algorithms)
-*  **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
-*  **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
-*  **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
-*  **MPX** (Intel MPX (Memory Protection Extensions))
-*  **ERMS** (Enhanced REP MOVSB/STOSB)
-*  **RDTSCP** (RDTSCP Instruction)
-*  **CX16** (CMPXCHG16B Instruction)
-*  **SGX** (Software Guard Extensions, with activation details)
-*  **VMX** (Virtual Machine Extensions)
-
-## Performance
-*  **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
-*  **SSE2SLOW** (SSE2 is supported, but usually not faster)
-*  **SSE3SLOW** (SSE3 is supported, but usually not faster)
-*  **ATOM** (Atom processor, some SSSE3 instructions are slower)
-*  **Cache line** (Probable size of a cache line).
-*  **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
-
-## ARM CPU features
-
-# ARM FEATURE DETECTION DISABLED!
-
-See [#52](https://github.com/klauspost/cpuid/issues/52).
- 
-Currently only `arm64` platforms are implemented. 
-
-*  **FP**  Single-precision and double-precision floating point
-*  **ASIMD**  Advanced SIMD
-*  **EVTSTRM**  Generic timer
-*  **AES**  AES instructions
-*  **PMULL**  Polynomial Multiply instructions (PMULL/PMULL2)
-*  **SHA1**  SHA-1 instructions (SHA1C, etc)
-*  **SHA2**      SHA-2 instructions (SHA256H, etc)
-*  **CRC32**   CRC32/CRC32C instructions
-*  **ATOMICS**   Large System Extensions (LSE)
-*  **FPHP** Half-precision floating point
-*  **ASIMDHP**  Advanced SIMD half-precision floating point
-*  **ARMCPUID**  Some CPU ID registers readable at user-level
-*  **ASIMDRDM**  Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
-*  **JSCVT** Javascript-style double->int convert (FJCVTZS)
-*  **FCMA**  Floating point complex number addition and multiplication
-*  **LRCPC**  Weaker release consistency (LDAPR, etc)
-*  **DCPOP**  Data cache clean to Point of Persistence (DC CVAP)
-*  **SHA3**  SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
-*  **SM3** SM3 instructions
-*  **SM4**  SM4 instructions
-*  **ASIMDDP**  SIMD Dot Product
-*  **SHA512**  SHA512 instructions
-*  **SVE** Scalable Vector Extension
-*  **GPA**  Generic Pointer Authentication
-
-## Cpu Vendor/VM
-* **Intel**
-* **AMD**
-* **VIA**
-* **Transmeta**
-* **NSC**
-* **KVM**  (Kernel-based Virtual Machine)
-* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
-* **VMware**
-* **XenHVM**
-* **Bhyve**
-* **Hygon**
-
-# installing
-
-```go get github.com/klauspost/cpuid```
-
-# example
-
-```Go
-package main
-
-import (
-	"fmt"
-	"github.com/klauspost/cpuid"
-)
-
-func main() {
-	// Print basic CPU information:
-	fmt.Println("Name:", cpuid.CPU.BrandName)
-	fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
-	fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
-	fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
-	fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
-	fmt.Println("Features:", cpuid.CPU.Features)
-	fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
-	fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
-	fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
-	fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
-	fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
-
-	// Test if we have a specific feature:
-	if cpuid.CPU.SSE() {
-		fmt.Println("We have Streaming SIMD Extensions")
-	}
-}
-```
-
-Sample output:
-```
->go run main.go
-Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
-PhysicalCores: 2
-ThreadsPerCore: 2
-LogicalCores: 4
-Family 6 Model: 42
-Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
-Cacheline bytes: 64
-We have Streaming SIMD Extensions
-```
-
-# private package
-
-In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
-
-For this purpose all exports are removed, and functions and constants are lowercased.
-
-This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
-
-# license
-
-This code is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/cpuid/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/cpuid.go
--- a/vendor/github.com/klauspost/cpuid/cpuid_386.s
+++ b/vendor/github.com/klauspost/cpuid/cpuid_386.s
@ -1,42 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build 386,!gccgo,!noasm,!appengine
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
-	XORL CX, CX
-	MOVL op+0(FP), AX
-	CPUID
-	MOVL AX, eax+4(FP)
-	MOVL BX, ebx+8(FP)
-	MOVL CX, ecx+12(FP)
-	MOVL DX, edx+16(FP)
-	RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
-	MOVL op+0(FP), AX
-	MOVL op2+4(FP), CX
-	CPUID
-	MOVL AX, eax+8(FP)
-	MOVL BX, ebx+12(FP)
-	MOVL CX, ecx+16(FP)
-	MOVL DX, edx+20(FP)
-	RET
-
-// func xgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
-	MOVL index+0(FP), CX
-	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
-	MOVL AX, eax+4(FP)
-	MOVL DX, edx+8(FP)
-	RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
-	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
-	MOVL AX, eax+0(FP)
-	MOVL BX, ebx+4(FP)
-	MOVL CX, ecx+8(FP)
-	MOVL DX, edx+12(FP)
-	RET
--- a/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
+++ b/vendor/github.com/klauspost/cpuid/cpuid_amd64.s
@ -1,42 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build amd64,!gccgo,!noasm,!appengine
-
-// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuid(SB), 7, $0
-	XORQ CX, CX
-	MOVL op+0(FP), AX
-	CPUID
-	MOVL AX, eax+8(FP)
-	MOVL BX, ebx+12(FP)
-	MOVL CX, ecx+16(FP)
-	MOVL DX, edx+20(FP)
-	RET
-
-// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-TEXT ·asmCpuidex(SB), 7, $0
-	MOVL op+0(FP), AX
-	MOVL op2+4(FP), CX
-	CPUID
-	MOVL AX, eax+8(FP)
-	MOVL BX, ebx+12(FP)
-	MOVL CX, ecx+16(FP)
-	MOVL DX, edx+20(FP)
-	RET
-
-// func asmXgetbv(index uint32) (eax, edx uint32)
-TEXT ·asmXgetbv(SB), 7, $0
-	MOVL index+0(FP), CX
-	BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
-	MOVL AX, eax+8(FP)
-	MOVL DX, edx+12(FP)
-	RET
-
-// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-TEXT ·asmRdtscpAsm(SB), 7, $0
-	BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
-	MOVL AX, eax+0(FP)
-	MOVL BX, ebx+4(FP)
-	MOVL CX, ecx+8(FP)
-	MOVL DX, edx+12(FP)
-	RET
--- a/vendor/github.com/klauspost/cpuid/cpuid_arm64.s
+++ b/vendor/github.com/klauspost/cpuid/cpuid_arm64.s
@ -1,26 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build arm64,!gccgo
-
-// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
-
-// func getMidr
-TEXT ·getMidr(SB), 7, $0
-	WORD $0xd5380000    // mrs x0, midr_el1         /* Main ID Register */
-	MOVD R0, midr+0(FP)
-	RET
-
-// func getProcFeatures
-TEXT ·getProcFeatures(SB), 7, $0
-	WORD $0xd5380400            // mrs x0, id_aa64pfr0_el1  /* Processor Feature Register 0 */
-	MOVD R0, procFeatures+0(FP)
-	RET
-
-// func getInstAttributes
-TEXT ·getInstAttributes(SB), 7, $0
-	WORD $0xd5380600            // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
-	WORD $0xd5380621            // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
-	MOVD R0, instAttrReg0+0(FP)
-	MOVD R1, instAttrReg1+8(FP)
-	RET
-
--- a/vendor/github.com/klauspost/cpuid/detect_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/detect_arm64.go
@ -1,219 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build arm64,!gccgo,!noasm,!appengine
-
-package cpuid
-
-func getMidr() (midr uint64)
-func getProcFeatures() (procFeatures uint64)
-func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
-
-func initCPU() {
-	cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
-	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
-}
-
-func addInfo(c *CPUInfo) {
-	// ARM64 disabled for now.
-	if true {
-		return
-	}
-	// 	midr := getMidr()
-
-	// MIDR_EL1 - Main ID Register
-	//  x--------------------------------------------------x
-	//  | Name                         |  bits   | visible |
-	//  |--------------------------------------------------|
-	//  | Implementer                  | [31-24] |    y    |
-	//  |--------------------------------------------------|
-	//  | Variant                      | [23-20] |    y    |
-	//  |--------------------------------------------------|
-	//  | Architecture                 | [19-16] |    y    |
-	//  |--------------------------------------------------|
-	//  | PartNum                      | [15-4]  |    y    |
-	//  |--------------------------------------------------|
-	//  | Revision                     | [3-0]   |    y    |
-	//  x--------------------------------------------------x
-
-	// 	fmt.Printf(" implementer:  0x%02x\n", (midr>>24)&0xff)
-	// 	fmt.Printf("     variant:   0x%01x\n", (midr>>20)&0xf)
-	// 	fmt.Printf("architecture:   0x%01x\n", (midr>>16)&0xf)
-	// 	fmt.Printf("    part num: 0x%03x\n", (midr>>4)&0xfff)
-	// 	fmt.Printf("    revision:   0x%01x\n", (midr>>0)&0xf)
-
-	procFeatures := getProcFeatures()
-
-	// ID_AA64PFR0_EL1 - Processor Feature Register 0
-	// x--------------------------------------------------x
-	// | Name                         |  bits   | visible |
-	// |--------------------------------------------------|
-	// | DIT                          | [51-48] |    y    |
-	// |--------------------------------------------------|
-	// | SVE                          | [35-32] |    y    |
-	// |--------------------------------------------------|
-	// | GIC                          | [27-24] |    n    |
-	// |--------------------------------------------------|
-	// | AdvSIMD                      | [23-20] |    y    |
-	// |--------------------------------------------------|
-	// | FP                           | [19-16] |    y    |
-	// |--------------------------------------------------|
-	// | EL3                          | [15-12] |    n    |
-	// |--------------------------------------------------|
-	// | EL2                          | [11-8]  |    n    |
-	// |--------------------------------------------------|
-	// | EL1                          | [7-4]   |    n    |
-	// |--------------------------------------------------|
-	// | EL0                          | [3-0]   |    n    |
-	// x--------------------------------------------------x
-
-	var f ArmFlags
-	// if procFeatures&(0xf<<48) != 0 {
-	// 	fmt.Println("DIT")
-	// }
-	if procFeatures&(0xf<<32) != 0 {
-		f |= SVE
-	}
-	if procFeatures&(0xf<<20) != 15<<20 {
-		f |= ASIMD
-		if procFeatures&(0xf<<20) == 1<<20 {
-			// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
-			// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
-			f |= FPHP
-			f |= ASIMDHP
-		}
-	}
-	if procFeatures&(0xf<<16) != 0 {
-		f |= FP
-	}
-
-	instAttrReg0, instAttrReg1 := getInstAttributes()
-
-	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-	//
-	// ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
-	// x--------------------------------------------------x
-	// | Name                         |  bits   | visible |
-	// |--------------------------------------------------|
-	// | TS                           | [55-52] |    y    |
-	// |--------------------------------------------------|
-	// | FHM                          | [51-48] |    y    |
-	// |--------------------------------------------------|
-	// | DP                           | [47-44] |    y    |
-	// |--------------------------------------------------|
-	// | SM4                          | [43-40] |    y    |
-	// |--------------------------------------------------|
-	// | SM3                          | [39-36] |    y    |
-	// |--------------------------------------------------|
-	// | SHA3                         | [35-32] |    y    |
-	// |--------------------------------------------------|
-	// | RDM                          | [31-28] |    y    |
-	// |--------------------------------------------------|
-	// | ATOMICS                      | [23-20] |    y    |
-	// |--------------------------------------------------|
-	// | CRC32                        | [19-16] |    y    |
-	// |--------------------------------------------------|
-	// | SHA2                         | [15-12] |    y    |
-	// |--------------------------------------------------|
-	// | SHA1                         | [11-8]  |    y    |
-	// |--------------------------------------------------|
-	// | AES                          | [7-4]   |    y    |
-	// x--------------------------------------------------x
-
-	// if instAttrReg0&(0xf<<52) != 0 {
-	// 	fmt.Println("TS")
-	// }
-	// if instAttrReg0&(0xf<<48) != 0 {
-	// 	fmt.Println("FHM")
-	// }
-	if instAttrReg0&(0xf<<44) != 0 {
-		f |= ASIMDDP
-	}
-	if instAttrReg0&(0xf<<40) != 0 {
-		f |= SM4
-	}
-	if instAttrReg0&(0xf<<36) != 0 {
-		f |= SM3
-	}
-	if instAttrReg0&(0xf<<32) != 0 {
-		f |= SHA3
-	}
-	if instAttrReg0&(0xf<<28) != 0 {
-		f |= ASIMDRDM
-	}
-	if instAttrReg0&(0xf<<20) != 0 {
-		f |= ATOMICS
-	}
-	if instAttrReg0&(0xf<<16) != 0 {
-		f |= CRC32
-	}
-	if instAttrReg0&(0xf<<12) != 0 {
-		f |= SHA2
-	}
-	if instAttrReg0&(0xf<<12) == 2<<12 {
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-		// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
-		f |= SHA512
-	}
-	if instAttrReg0&(0xf<<8) != 0 {
-		f |= SHA1
-	}
-	if instAttrReg0&(0xf<<4) != 0 {
-		f |= AES
-	}
-	if instAttrReg0&(0xf<<4) == 2<<4 {
-		// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
-		// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
-		f |= PMULL
-	}
-
-	// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
-	//
-	// ID_AA64ISAR1_EL1 - Instruction set attribute register 1
-	// x--------------------------------------------------x
-	// | Name                         |  bits   | visible |
-	// |--------------------------------------------------|
-	// | GPI                          | [31-28] |    y    |
-	// |--------------------------------------------------|
-	// | GPA                          | [27-24] |    y    |
-	// |--------------------------------------------------|
-	// | LRCPC                        | [23-20] |    y    |
-	// |--------------------------------------------------|
-	// | FCMA                         | [19-16] |    y    |
-	// |--------------------------------------------------|
-	// | JSCVT                        | [15-12] |    y    |
-	// |--------------------------------------------------|
-	// | API                          | [11-8]  |    y    |
-	// |--------------------------------------------------|
-	// | APA                          | [7-4]   |    y    |
-	// |--------------------------------------------------|
-	// | DPB                          | [3-0]   |    y    |
-	// x--------------------------------------------------x
-
-	// if instAttrReg1&(0xf<<28) != 0 {
-	// 	fmt.Println("GPI")
-	// }
-	if instAttrReg1&(0xf<<28) != 24 {
-		f |= GPA
-	}
-	if instAttrReg1&(0xf<<20) != 0 {
-		f |= LRCPC
-	}
-	if instAttrReg1&(0xf<<16) != 0 {
-		f |= FCMA
-	}
-	if instAttrReg1&(0xf<<12) != 0 {
-		f |= JSCVT
-	}
-	// if instAttrReg1&(0xf<<8) != 0 {
-	// 	fmt.Println("API")
-	// }
-	// if instAttrReg1&(0xf<<4) != 0 {
-	// 	fmt.Println("APA")
-	// }
-	if instAttrReg1&(0xf<<0) != 0 {
-		f |= DCPOP
-	}
-	c.Arm = f
-}
--- a/vendor/github.com/klauspost/cpuid/detect_intel.go
+++ b/vendor/github.com/klauspost/cpuid/detect_intel.go
@ -1,33 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine
-
-package cpuid
-
-func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
-func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
-func asmXgetbv(index uint32) (eax, edx uint32)
-func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
-
-func initCPU() {
-	cpuid = asmCpuid
-	cpuidex = asmCpuidex
-	xgetbv = asmXgetbv
-	rdtscpAsm = asmRdtscpAsm
-}
-
-func addInfo(c *CPUInfo) {
-	c.maxFunc = maxFunctionID()
-	c.maxExFunc = maxExtendedFunction()
-	c.BrandName = brandName()
-	c.CacheLine = cacheLine()
-	c.Family, c.Model = familyModel()
-	c.Features = support()
-	c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0)
-	c.ThreadsPerCore = threadsPerCore()
-	c.LogicalCores = logicalCores()
-	c.PhysicalCores = physicalCores()
-	c.VendorID, c.VendorString = vendorID()
-	c.Hz = hertz(c.BrandName)
-	c.cacheSize()
-}
--- a/vendor/github.com/klauspost/cpuid/detect_ref.go
+++ b/vendor/github.com/klauspost/cpuid/detect_ref.go
@ -1,14 +0,0 @@
-// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
-
-//+build !amd64,!386,!arm64 gccgo noasm appengine
-
-package cpuid
-
-func initCPU() {
-	cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
-	xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
-	rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
-}
-
-func addInfo(info *CPUInfo) {}
--- a/vendor/github.com/klauspost/cpuid/go.mod
+++ b/vendor/github.com/klauspost/cpuid/go.mod
@ -1,3 +0,0 @@
-module github.com/klauspost/cpuid
-
-go 1.12
--- a/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
+++ b/vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml
@ -0,0 +1,74 @@
+# This is an example goreleaser.yaml file with some sane defaults.
+# Make sure to check the documentation at http://goreleaser.com
+
+builds:
+  -
+    id: "cpuid"
+    binary: cpuid
+    main: ./cmd/cpuid/main.go
+    env:
+      - CGO_ENABLED=0
+    flags:
+      - -ldflags=-s -w
+    goos:
+      - aix
+      - linux
+      - freebsd
+      - netbsd
+      - windows
+      - darwin
+    goarch:
+      - 386
+      - amd64
+      - arm64
+    goarm:
+      - 7
+
+archives:
+  -
+    id: cpuid
+    name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
+    replacements:
+      aix: AIX
+      darwin: OSX
+      linux: Linux
+      windows: Windows
+      386: i386
+      amd64: x86_64
+      freebsd: FreeBSD
+      netbsd: NetBSD
+    format_overrides:
+      - goos: windows
+        format: zip
+    files:
+      - LICENSE
+checksum:
+  name_template: 'checksums.txt'
+snapshot:
+  name_template: "{{ .Tag }}-next"
+changelog:
+  sort: asc
+  filters:
+    exclude:
+    - '^doc:'
+    - '^docs:'
+    - '^test:'
+    - '^tests:'
+    - '^Update\sREADME.md'
+
+nfpms:
+  -
+    file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
+    vendor: Klaus Post
+    homepage: https://github.com/klauspost/cpuid
+    maintainer: Klaus Post <klauspost@gmail.com>
+    description: CPUID Tool
+    license: BSD 3-Clause
+    formats:
+      - deb
+      - rpm
+    replacements:
+      darwin: Darwin
+      linux: Linux
+      freebsd: FreeBSD
+      amd64: x86_64
--- a/vendor/github.com/klauspost/cpuid/v2/.travis.yml
+++ b/vendor/github.com/klauspost/cpuid/v2/.travis.yml
@ -16,41 +16,52 @@ go:
  - 1.16.x
  - master

+env:
+  - CGO_ENABLED=0
+
 script:
  - go vet ./...
  - go test -test.v -test.run ^TestCPUID$
-  - go test -race ./...
+  - CGO_ENABLED=1 go test -race ./...
+  - go test -tags=nounsafe -test.v -test.run ^TestCPUID$
  - go test -tags=noasm ./...
+  - go run ./cmd/cpuid/main.go
+  - go run ./cmd/cpuid/main.go -json

 matrix:
  allow_failures:
    - go: 'master'
  fast_finish: true
  include:
-    - stage: gofmt
-      go: 1.15.x
+    - stage: other
+      go: 1.16.x
      os: linux
      arch: amd64
      script:
        - diff <(gofmt -d .) <(printf "")
        - diff <(gofmt -d ./private) <(printf "")
-        - go install github.com/klauspost/asmfmt/cmd/asmfmt
+        - curl -sfL https://git.io/goreleaser | VERSION=v0.157.0 sh -s -- check # check goreleaser config for deprecations
+        - curl -sL https://git.io/goreleaser | VERSION=v0.157.0 sh -s -- --snapshot --skip-publish --rm-dist
+        - go get github.com/klauspost/asmfmt&&go install github.com/klauspost/asmfmt/cmd/asmfmt
        - diff <(asmfmt -d .) <(printf "")
-    - stage: i386
-      go: 1.15.x
-      os: linux
-      arch: amd64
-      script:
        - GOOS=linux GOARCH=386 go test .
-    - stage: buildotherprev
+        - ./test-architectures.sh
+    - stage: other
      go: 1.15.x
      os: linux
      arch: amd64
      script:
        - ./test-architectures.sh
-    - stage: buildother
+
+deploy:
+  - provider: script
+    skip_cleanup: true
+    script: curl -sL https://git.io/goreleaser | VERSION=v0.157.0 bash || true
+    on:
+      tags: true
+      condition: ($TRAVIS_OS_NAME = linux) && ($TRAVIS_CPU_ARCH = amd64)
      go: 1.16.x
-      os: linux
-      arch: amd64
-      script:
-        - ./test-architectures.sh
+branches:
+  only:
+    - master
+    - /^v\d+\.\d+(\.\d+)?(-\S*)?$/
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@ -15,6 +15,7 @@ import (
 	"fmt"
 	"math"
 	"os"
+	"runtime"
 	"strings"
 )

@ -209,6 +210,7 @@ var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
 var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
 var xgetbv func(index uint32) (eax, edx uint32)
 var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
+var darwinHasAVX512 = func() bool { return false }

 // CPU contains information about the CPU as detected on startup,
 // or when Detect last was called.
@ -922,7 +924,11 @@ func support() flagSet {
 			// Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
 			// ZMM16-ZMM31 state are enabled by OS)
 			/// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
-			if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
+			hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
+			if runtime.GOOS == "darwin" {
+				hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
+			}
+			if hasAVX512 {
 				fs.setIf(ebx&(1<<16) != 0, AVX512F)
 				fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
 				fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_386.s
@ -40,3 +40,8 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
 	MOVL CX, ecx+8(FP)
 	MOVL DX, edx+12(FP)
 	RET
+
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0
+	MOVL $0, eax+0(FP)
+	RET
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid_amd64.s
@ -40,3 +40,33 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
 	MOVL CX, ecx+8(FP)
 	MOVL DX, edx+12(FP)
 	RET
+
+// From https://go-review.googlesource.com/c/sys/+/285572/
+// func asmDarwinHasAVX512() bool
+TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
+	MOVB $0, ret+0(FP) // default to false
+
+#ifdef GOOS_darwin // return if not darwin
+#ifdef GOARCH_amd64 // return if not amd64
+// These values from:
+// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
+#define commpage64_base_address         0x00007fffffe00000
+#define commpage64_cpu_capabilities64   (commpage64_base_address+0x010)
+#define commpage64_version              (commpage64_base_address+0x01E)
+#define hasAVX512F                      0x0000004000000000
+	MOVQ $commpage64_version, BX
+	MOVW (BX), AX
+	CMPW AX, $13                            // versions < 13 do not support AVX512
+	JL   no_avx512
+	MOVQ $commpage64_cpu_capabilities64, BX
+	MOVQ (BX), AX
+	MOVQ $hasAVX512F, CX
+	ANDQ CX, AX
+	JZ   no_avx512
+	MOVB $1, ret+0(FP)
+
+no_avx512:
+#endif
+#endif
+	RET
+
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@ -8,12 +8,14 @@ func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
 func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
 func asmXgetbv(index uint32) (eax, edx uint32)
 func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
+func asmDarwinHasAVX512() bool

 func initCPU() {
 	cpuid = asmCpuid
 	cpuidex = asmCpuidex
 	xgetbv = asmXgetbv
 	rdtscpAsm = asmRdtscpAsm
+	darwinHasAVX512 = asmDarwinHasAVX512
 }

 func addInfo(c *CPUInfo, safe bool) {
--- a/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_darwin_arm64.go
@ -11,5 +11,9 @@ func detectOS(c *CPUInfo) bool {
 	// to all Go programs running on darwin/arm64.
 	// TODO: Add more if we know them.
 	c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
+	c.PhysicalCores = runtime.NumCPU()
+	// For now assuming 1 thread per core...
+	c.ThreadsPerCore = 1
+	c.LogicalCores = c.PhysicalCores
 	return true
 }
--- a/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_linux_arm64.go
@ -11,7 +11,6 @@ import (
 	"encoding/binary"
 	"io/ioutil"
 	"runtime"
-	"unsafe"
 )

 // HWCAP bits.
@ -42,12 +41,9 @@ const (
 	hwcap_ASIMDFHM = 1 << 23
 )

-//go:linkname hwcap internal/cpu.HWCap
-var hwcap uint
-
 func detectOS(c *CPUInfo) bool {
 	// For now assuming no hyperthreading is reasonable.
-	c.LogicalCores = int(getproccount())
+	c.LogicalCores = runtime.NumCPU()
 	c.PhysicalCores = c.LogicalCores
 	c.ThreadsPerCore = 1
 	if hwcap == 0 {
@ -132,30 +128,3 @@ func detectOS(c *CPUInfo) bool {
 func isSet(hwc uint, value uint) bool {
 	return hwc&value != 0
 }
-
-//go:noescape
-//go:linkname sched_getaffinity runtime.sched_getaffinity
-func sched_getaffinity(pid, len uintptr, buf *byte) int32
-
-func getproccount() int32 {
-	// This buffer is huge (8 kB) but we are on the system stack
-	// and there should be plenty of space (64 kB).
-	// Also this is a leaf, so we're not holding up the memory for long.
-	const maxCPUs = 64 * 1024
-	var buf [maxCPUs / 8]byte
-	r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
-	if r < 0 {
-		return 0
-	}
-	n := int32(0)
-	for _, v := range buf[:r] {
-		for v != 0 {
-			n += int32(v & 1)
-			v >>= 1
-		}
-	}
-	if n == 0 {
-		n = 1
-	}
-	return n
-}
--- a/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_other_arm64.go
@ -6,6 +6,12 @@

 package cpuid

+import "runtime"
+
 func detectOS(c *CPUInfo) bool {
+	c.PhysicalCores = runtime.NumCPU()
+	// For now assuming 1 thread per core...
+	c.ThreadsPerCore = 1
+	c.LogicalCores = c.PhysicalCores
 	return false
 }
--- a/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_safe_linux_arm64.go
@ -0,0 +1,7 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build nounsafe
+
+package cpuid
+
+var hwcap uint
--- a/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
+++ b/vendor/github.com/klauspost/cpuid/v2/os_unsafe_linux_arm64.go
@ -0,0 +1,10 @@
+// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
+
+//+build !nounsafe
+
+package cpuid
+
+import _ "unsafe" // needed for go:linkname
+
+//go:linkname hwcap internal/cpu.HWCap
+var hwcap uint
--- a/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
+++ b/vendor/github.com/klauspost/cpuid/v2/test-architectures.sh
@ -5,11 +5,11 @@ set -e
 go tool dist list | while IFS=/ read os arch; do
    echo "Checking $os/$arch..."
    echo " normal"
-    GOARCH=$arch GOOS=$os go build -o /dev/null ./...
+    GOARCH=$arch GOOS=$os go build -o /dev/null .
    echo " noasm"
-    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null ./...
+    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
    echo " appengine"
-    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null ./...
+    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
    echo " noasm,appengine"
-    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null ./...
+    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
 done