1
0
Fork 0
forked from forgejo/forgejo

Vendor Update (#16121)

* update github.com/PuerkitoBio/goquery

* update github.com/alecthomas/chroma

* update github.com/blevesearch/bleve/v2

* update github.com/caddyserver/certmagic

* update github.com/go-enry/go-enry/v2

* update github.com/go-git/go-billy/v5

* update github.com/go-git/go-git/v5

* update github.com/go-redis/redis/v8

* update github.com/go-testfixtures/testfixtures/v3

* update github.com/jaytaylor/html2text

* update github.com/json-iterator/go

* update github.com/klauspost/compress

* update github.com/markbates/goth

* update github.com/mattn/go-isatty

* update github.com/mholt/archiver/v3

* update github.com/microcosm-cc/bluemonday

* update github.com/minio/minio-go/v7

* update github.com/prometheus/client_golang

* update github.com/unrolled/render

* update github.com/xanzy/go-gitlab

* update github.com/yuin/goldmark

* update github.com/yuin/goldmark-highlighting

Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
6543 2021-06-10 16:44:25 +02:00 committed by GitHub
parent f088dc4ea1
commit 86e2789960
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
819 changed files with 38072 additions and 34969 deletions

View file

@ -644,7 +644,7 @@ func (d *compressor) init(w io.Writer, level int) (err error) {
d.fill = (*compressor).fillBlock
d.step = (*compressor).store
case level == ConstantCompression:
d.w.logNewTablePenalty = 8
d.w.logNewTablePenalty = 10
d.window = make([]byte, 32<<10)
d.fill = (*compressor).fillBlock
d.step = (*compressor).storeHuff

View file

@ -45,7 +45,7 @@ const (
bTableBits = 17 // Bits used in the big tables
bTableSize = 1 << bTableBits // Size of the table
allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history.
allocHistory = maxStoreBlockSize * 5 // Size to preallocate for history.
bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this.
)

View file

@ -6,6 +6,7 @@ package flate
import (
"encoding/binary"
"fmt"
"io"
)
@ -27,7 +28,7 @@ const (
// after which bytes are flushed to the writer.
// Should preferably be a multiple of 6, since
// we accumulate 6 bytes between writes to the buffer.
bufferFlushSize = 240
bufferFlushSize = 246
// bufferSize is the actual output byte buffer size.
// It must have additional headroom for a flush
@ -59,19 +60,31 @@ var offsetExtraBits = [64]int8{
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20,
}
var offsetBase = [64]uint32{
/* normal deflate */
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
var offsetCombined = [32]uint32{}
/* extended window */
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
0x100000, 0x180000, 0x200000, 0x300000,
func init() {
var offsetBase = [64]uint32{
/* normal deflate */
0x000000, 0x000001, 0x000002, 0x000003, 0x000004,
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018,
0x000020, 0x000030, 0x000040, 0x000060, 0x000080,
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300,
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000,
0x001800, 0x002000, 0x003000, 0x004000, 0x006000,
/* extended window */
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000,
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000,
0x100000, 0x180000, 0x200000, 0x300000,
}
for i := range offsetCombined[:] {
// Don't use extended window values...
if offsetBase[i] > 0x006000 {
continue
}
offsetCombined[i] = uint32(offsetExtraBits[i])<<16 | (offsetBase[i])
}
}
// The odd order in which the codegen code sizes are written.
@ -88,15 +101,16 @@ type huffmanBitWriter struct {
bits uint64
nbits uint16
nbytes uint8
lastHuffMan bool
literalEncoding *huffmanEncoder
tmpLitEncoding *huffmanEncoder
offsetEncoding *huffmanEncoder
codegenEncoding *huffmanEncoder
err error
lastHeader int
// Set between 0 (reused block can be up to 2x the size)
logNewTablePenalty uint
lastHuffMan bool
bytes [256]byte
bytes [256 + 8]byte
literalFreq [lengthCodesStart + 32]uint16
offsetFreq [32]uint16
codegenFreq [codegenCodeCount]uint16
@ -128,6 +142,7 @@ func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter {
return &huffmanBitWriter{
writer: w,
literalEncoding: newHuffmanEncoder(literalCount),
tmpLitEncoding: newHuffmanEncoder(literalCount),
codegenEncoding: newHuffmanEncoder(codegenCodeCount),
offsetEncoding: newHuffmanEncoder(offsetCodeCount),
}
@ -745,9 +760,31 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
offs := oeCodes[:32]
lengths := leCodes[lengthCodesStart:]
lengths = lengths[:32]
// Go 1.16 LOVES having these on stack.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
for _, t := range tokens {
if t < matchType {
w.writeCode(lits[t.literal()])
//w.writeCode(lits[t.literal()])
c := lits[t.literal()]
bits |= uint64(c.code) << nbits
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
continue
}
@ -759,38 +796,99 @@ func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode)
} else {
// inlined
c := lengths[lengthCode&31]
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
bits |= uint64(c.code) << nbits
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
extraLengthBits := uint16(lengthExtraBits[lengthCode&31])
if extraLengthBits > 0 {
//w.writeBits(extraLength, extraLengthBits)
extraLength := int32(length - lengthBase[lengthCode&31])
w.writeBits(extraLength, extraLengthBits)
bits |= uint64(extraLength) << nbits
nbits += extraLengthBits
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
// Write the offset
offset := t.offset()
offsetCode := offsetCode(offset)
offsetCode := offset >> 16
offset &= matchOffsetOnlyMask
if false {
w.writeCode(offs[offsetCode&31])
} else {
// inlined
c := offs[offsetCode&31]
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
w.writeOutBits()
c := offs[offsetCode]
bits |= uint64(c.code) << nbits
nbits += c.len
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
extraOffsetBits := uint16(offsetExtraBits[offsetCode&63])
if extraOffsetBits > 0 {
extraOffset := int32(offset - offsetBase[offsetCode&63])
w.writeBits(extraOffset, extraOffsetBits)
offsetComb := offsetCombined[offsetCode]
if offsetComb > 1<<16 {
//w.writeBits(extraOffset, extraOffsetBits)
bits |= uint64(offset&matchOffsetOnlyMask-(offsetComb&0xffff)) << nbits
nbits += uint16(offsetComb >> 16)
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
nbytes = 0
return
}
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
}
}
}
// Restore...
w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
if deferEOB {
w.writeCode(leCodes[endBlockMarker])
}
@ -825,13 +923,28 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
}
}
// Fill is rarely better...
const fill = false
const numLiterals = endBlockMarker + 1
const numOffsets = 1
// Add everything as literals
// We have to estimate the header size.
// Assume header is around 70 bytes:
// https://stackoverflow.com/a/25454430
const guessHeaderSizeBits = 70 * 8
estBits := histogramSize(input, w.literalFreq[:], !eof && !sync)
estBits += w.lastHeader + len(input)/32
histogram(input, w.literalFreq[:numLiterals], fill)
w.literalFreq[endBlockMarker] = 1
w.tmpLitEncoding.generate(w.literalFreq[:numLiterals], 15)
if fill {
// Clear fill...
for i := range w.literalFreq[:numLiterals] {
w.literalFreq[i] = 0
}
histogram(input, w.literalFreq[:numLiterals], false)
}
estBits := w.tmpLitEncoding.canReuseBits(w.literalFreq[:numLiterals])
estBits += w.lastHeader
if w.lastHeader == 0 {
estBits += guessHeaderSizeBits
}
@ -839,33 +952,31 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
// Store bytes, if we don't get a reasonable improvement.
ssize, storable := w.storedSize(input)
if storable && ssize < estBits {
if storable && ssize <= estBits {
w.writeStoredHeader(len(input), eof)
w.writeBytes(input)
return
}
reuseSize := 0
if w.lastHeader > 0 {
reuseSize = w.literalEncoding.bitLength(w.literalFreq[:256])
reuseSize := w.literalEncoding.canReuseBits(w.literalFreq[:256])
if estBits < reuseSize {
if debugDeflate {
//fmt.Println("not reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
}
// We owe an EOB
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
} else if debugDeflate {
fmt.Println("reusing, reuse:", reuseSize/8, "> new:", estBits/8, "- header est:", w.lastHeader/8)
}
}
const numLiterals = endBlockMarker + 1
const numOffsets = 1
count := 0
if w.lastHeader == 0 {
if !eof && !sync {
// Generate a slightly suboptimal tree that can be used for all.
fillHist(w.literalFreq[:numLiterals])
}
w.literalFreq[endBlockMarker] = 1
w.literalEncoding.generate(w.literalFreq[:numLiterals], 15)
// Use the temp encoding, so swap.
w.literalEncoding, w.tmpLitEncoding = w.tmpLitEncoding, w.literalEncoding
// Generate codegen and codegenFrequencies, which indicates how to encode
// the literalEncoding and the offsetEncoding.
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset)
@ -876,34 +987,47 @@ func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) {
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof)
w.lastHuffMan = true
w.lastHeader, _ = w.headerSize()
if debugDeflate {
count += w.lastHeader
fmt.Println("header:", count/8)
}
}
encoding := w.literalEncoding.codes[:257]
encoding := w.literalEncoding.codes[:256]
// Go 1.16 LOVES having these on stack. At least 1.5x the speed.
bits, nbits, nbytes := w.bits, w.nbits, w.nbytes
for _, t := range input {
// Bitwriting inlined, ~30% speedup
c := encoding[t]
w.bits |= uint64(c.code) << w.nbits
w.nbits += c.len
if w.nbits >= 48 {
bits := w.bits
w.bits >>= 48
w.nbits -= 48
n := w.nbytes
binary.LittleEndian.PutUint64(w.bytes[n:], bits)
n += 6
if n >= bufferFlushSize {
bits |= uint64(c.code) << nbits
nbits += c.len
if debugDeflate {
count += int(c.len)
}
if nbits >= 48 {
binary.LittleEndian.PutUint64(w.bytes[nbytes:], bits)
//*(*uint64)(unsafe.Pointer(&w.bytes[nbytes])) = bits
bits >>= 48
nbits -= 48
nbytes += 6
if nbytes >= bufferFlushSize {
if w.err != nil {
n = 0
nbytes = 0
return
}
w.write(w.bytes[:n])
n = 0
_, w.err = w.writer.Write(w.bytes[:nbytes])
nbytes = 0
}
w.nbytes = n
}
}
// Restore...
w.bits, w.nbits, w.nbytes = bits, nbits, nbytes
if debugDeflate {
fmt.Println("wrote", count/8, "bytes")
}
if eof || sync {
w.writeCode(encoding[endBlockMarker])
w.writeCode(w.literalEncoding.codes[endBlockMarker])
w.lastHeader = 0
w.lastHuffMan = false
}

View file

@ -21,9 +21,13 @@ type hcode struct {
}
type huffmanEncoder struct {
codes []hcode
freqcache []literalNode
bitCount [17]int32
codes []hcode
bitCount [17]int32
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
freqcache [literalCount + 1]literalNode
}
type literalNode struct {
@ -132,6 +136,21 @@ func (h *huffmanEncoder) bitLengthRaw(b []byte) int {
return total
}
// canReuseBits returns the number of bits or math.MaxInt32 if the encoder cannot be reused.
func (h *huffmanEncoder) canReuseBits(freq []uint16) int {
var total int
for i, f := range freq {
if f != 0 {
code := h.codes[i]
if code.len == 0 {
return math.MaxInt32
}
total += int(f) * int(code.len)
}
}
return total
}
// Return the number of literals assigned to each bit size in the Huffman encoding
//
// This method is only called when list.length >= 3
@ -291,12 +310,6 @@ func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalN
// freq An array of frequencies, in which frequency[i] gives the frequency of literal i.
// maxBits The maximum number of bits to use for any literal.
func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
if h.freqcache == nil {
// Allocate a reusable buffer with the longest possible frequency table.
// Possible lengths are codegenCodeCount, offsetCodeCount and literalCount.
// The largest of these is literalCount, so we allocate for that case.
h.freqcache = make([]literalNode, literalCount+1)
}
list := h.freqcache[:len(freq)+1]
// Number of non-zero literals
count := 0
@ -330,10 +343,14 @@ func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) {
h.assignEncodingAndSize(bitCount, list)
}
// atLeastOne clamps the result between 1 and 15.
func atLeastOne(v float32) float32 {
if v < 1 {
return 1
}
if v > 15 {
return 15
}
return v
}
@ -346,31 +363,12 @@ func fillHist(b []uint16) {
}
}
// histogramSize accumulates a histogram of b in h.
// An estimated size in bits is returned.
// len(h) must be >= 256, and h's elements must be all zeroes.
func histogramSize(b []byte, h []uint16, fill bool) (bits int) {
func histogram(b []byte, h []uint16, fill bool) {
h = h[:256]
for _, t := range b {
h[t]++
}
total := len(b)
if fill {
for _, v := range h {
if v == 0 {
total++
}
}
fillHist(h)
}
invTotal := 1.0 / float32(total)
shannon := float32(0.0)
for _, v := range h {
if v > 0 {
n := float32(v)
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
return int(shannon + 0.99)
}

View file

@ -182,12 +182,27 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
// Extend the 4-byte match as long as possible.
if l == 0 {
// Extend the 4-byte match as long as possible.
l = e.matchlenLong(s+4, t+4, src) + 4
} else if l == maxMatchLength {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end of best match...
if sAt := s + l; l < 30 && sAt < sLimit {
eLong := e.bTable[hash7(load6432(src, sAt), tableBits)].Cur.offset
// Test current
t2 := eLong - e.cur - l
off := s - t2
if t2 >= 0 && off < maxMatchOffset && off > 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l {
t = t2
l = l2
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--

View file

@ -211,6 +211,31 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
l += e.matchlenLong(s+l, t+l, src)
}
// Try to locate a better match by checking the end-of-match...
if sAt := s + l; sAt < sLimit {
eLong := &e.bTable[hash7(load6432(src, sAt), tableBits)]
// Test current
t2 := eLong.Cur.offset - e.cur - l
off := s - t2
if off < maxMatchOffset {
if off > 0 && t2 >= 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l {
t = t2
l = l2
}
}
// Test next:
t2 = eLong.Prev.offset - e.cur - l
off := s - t2
if off > 0 && off < maxMatchOffset && t2 >= 0 {
if l2 := e.matchlenLong(s, t2, src); l2 > l {
t = t2
l = l2
}
}
}
}
// Extend backwards
for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
s--

View file

@ -13,14 +13,17 @@ import (
)
const (
// From top
// 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused
// 8 bits: xlength = length - MIN_MATCH_LENGTH
// 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
// 5 bits offsetcode
// 16 bits xoffset = offset - MIN_OFFSET_SIZE, or literal
lengthShift = 22
offsetMask = 1<<lengthShift - 1
typeMask = 3 << 30
literalType = 0 << 30
matchType = 1 << 30
matchOffsetOnlyMask = 0xffff
)
// The length code for length X (MIN_MATCH_LENGTH <= X <= MAX_MATCH_LENGTH)
@ -187,7 +190,7 @@ func (t *tokens) indexTokens(in []token) {
t.AddLiteral(tok.literal())
continue
}
t.AddMatch(uint32(tok.length()), tok.offset())
t.AddMatch(uint32(tok.length()), tok.offset()&matchOffsetOnlyMask)
}
}
@ -232,7 +235,7 @@ func (t *tokens) EstimatedBits() int {
for _, v := range t.litHist[:] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
}
}
// Just add 15 for EOB
@ -240,7 +243,7 @@ func (t *tokens) EstimatedBits() int {
for i, v := range t.extraHist[1 : literalCount-256] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(lengthExtraBits[i&31]) * int(v)
nMatches += int(v)
}
@ -251,7 +254,7 @@ func (t *tokens) EstimatedBits() int {
for i, v := range t.offHist[:offsetCodeCount] {
if v > 0 {
n := float32(v)
shannon += -mFastLog2(n*invTotal) * n
shannon += atLeastOne(-mFastLog2(n*invTotal)) * n
bits += int(offsetExtraBits[i&31]) * int(v)
}
}
@ -270,11 +273,13 @@ func (t *tokens) AddMatch(xlength uint32, xoffset uint32) {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oCode := offsetCode(xoffset)
xoffset |= oCode << 16
t.nLits++
lengthCode := lengthCodes1[uint8(xlength)] & 31
t.extraHist[lengthCodes1[uint8(xlength)]]++
t.offHist[oCode]++
t.tokens[t.n] = token(matchType | xlength<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.offHist[offsetCode(xoffset)&31]++
t.n++
}
@ -286,7 +291,8 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
panic(fmt.Errorf("invalid offset: %v", xoffset))
}
}
oc := offsetCode(xoffset) & 31
oc := offsetCode(xoffset)
xoffset |= oc << 16
for xlength > 0 {
xl := xlength
if xl > 258 {
@ -294,12 +300,11 @@ func (t *tokens) AddMatchLong(xlength int32, xoffset uint32) {
xl = 258 - baseMatchLength
}
xlength -= xl
xl -= 3
xl -= baseMatchLength
t.nLits++
lengthCode := lengthCodes1[uint8(xl)] & 31
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.extraHist[lengthCode]++
t.extraHist[lengthCodes1[uint8(xl)]]++
t.offHist[oc]++
t.tokens[t.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
t.n++
}
}

View file

@ -75,6 +75,7 @@ type Header struct {
type Reader struct {
Header // valid after NewReader or Reader.Reset
r flate.Reader
br *bufio.Reader
decompressor io.ReadCloser
digest uint32 // CRC-32, IEEE polynomial (section 8)
size uint32 // Uncompressed size (section 2.3.1)
@ -109,7 +110,13 @@ func (z *Reader) Reset(r io.Reader) error {
if rr, ok := r.(flate.Reader); ok {
z.r = rr
} else {
z.r = bufio.NewReader(r)
// Reuse if we can.
if z.br != nil {
z.br.Reset(r)
} else {
z.br = bufio.NewReader(r)
}
z.r = z.br
}
z.Header, z.err = z.readHeader()
return z.err

View file

@ -16,8 +16,7 @@ Currently the package is heavily optimized for 64 bit processors and will be sig
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd)
## Compressor
@ -152,8 +151,8 @@ This package:
file out level insize outsize millis mb/s
silesia.tar zskp 1 211947520 73101992 643 313.87
silesia.tar zskp 2 211947520 67504318 969 208.38
silesia.tar zskp 3 211947520 65177448 1899 106.44
silesia.tar zskp 4 211947520 61381950 8115 24.91
silesia.tar zskp 3 211947520 64595893 2007 100.68
silesia.tar zskp 4 211947520 60995370 7691 26.28
cgo zstd:
silesia.tar zstd 1 211947520 73605392 543 371.56
@ -171,8 +170,8 @@ https://files.klauspost.com/compress/gob-stream.7z
file out level insize outsize millis mb/s
gob-stream zskp 1 1911399616 235022249 3088 590.30
gob-stream zskp 2 1911399616 205669791 3786 481.34
gob-stream zskp 3 1911399616 185792019 9324 195.48
gob-stream zskp 4 1911399616 171537212 32113 56.76
gob-stream zskp 3 1911399616 175034659 9636 189.17
gob-stream zskp 4 1911399616 167273881 29337 62.13
gob-stream zstd 1 1911399616 249810424 2637 691.26
gob-stream zstd 3 1911399616 208192146 3490 522.31
gob-stream zstd 6 1911399616 193632038 6687 272.56
@ -187,8 +186,8 @@ http://mattmahoney.net/dc/textdata.html
file out level insize outsize millis mb/s
enwik9 zskp 1 1000000000 343848582 3609 264.18
enwik9 zskp 2 1000000000 317276632 5746 165.97
enwik9 zskp 3 1000000000 294540704 11725 81.34
enwik9 zskp 4 1000000000 276609671 44029 21.66
enwik9 zskp 3 1000000000 292243069 12162 78.41
enwik9 zskp 4 1000000000 275241169 36430 26.18
enwik9 zstd 1 1000000000 358072021 3110 306.65
enwik9 zstd 3 1000000000 313734672 4784 199.35
enwik9 zstd 6 1000000000 295138875 10290 92.68
@ -202,8 +201,8 @@ https://files.klauspost.com/compress/github-june-2days-2019.json.zst
file out level insize outsize millis mb/s
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54
github-june-2days-2019.json zskp 4 6273951764 512796117 97791 61.18
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
@ -217,8 +216,8 @@ https://files.klauspost.com/compress/rawstudio-mint14.7z
file out level insize outsize millis mb/s
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75
rawstudio-mint14.tar zskp 4 8558382592 3027332295 486243 16.79
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
@ -232,8 +231,8 @@ https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
file out level insize outsize millis mb/s
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53
nyc-taxi-data-10M.csv zskp 4 3325605752 495986829 89368 35.49
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
@ -405,13 +404,28 @@ BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938
This reflects the performance around May 2020, but this may be out of date.
## Zstd inside ZIP files
It is possible to use zstandard to compress individual files inside zip archives.
While this isn't widely supported it can be useful for internal files.
To support the compression and decompression of these files you must register a compressor and decompressor.
It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT
use the global registration functions. The main reason for this is that 2 registrations from
different packages will result in a panic.
It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip
files concurrently, and using a single instance will allow reusing some resources.
See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for
how to compress and decompress files inside zip archives.
# Contributions
Contributions are always welcome.
For new features/fixes, remember to add tests and for performance enhancements include benchmarks.
For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files.
For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan).
This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.

View file

@ -123,12 +123,10 @@ func newBlockDec(lowMem bool) *blockDec {
// Input must be a start of a block and will be at the end of the block when returned.
func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
b.WindowSize = windowSize
tmp := br.readSmall(3)
if tmp == nil {
if debug {
println("Reading block header:", io.ErrUnexpectedEOF)
}
return io.ErrUnexpectedEOF
tmp, err := br.readSmall(3)
if err != nil {
println("Reading block header:", err)
return err
}
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
b.Last = bh&1 != 0
@ -146,7 +144,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
}
cSize = 1
case blockTypeCompressed:
if debug {
if debugDecoder {
println("Data size on stream:", cSize)
}
b.RLESize = 0
@ -155,7 +153,7 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
maxSize = int(windowSize)
}
if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize {
if debug {
if debugDecoder {
printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b)
}
return ErrCompressedSizeTooBig
@ -179,10 +177,9 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
if cap(b.dst) <= maxSize {
b.dst = make([]byte, 0, maxSize+1)
}
var err error
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debug {
if debugDecoder {
println("Reading block:", err, "(", cSize, ")", len(b.data))
printf("%T", br)
}
@ -252,7 +249,7 @@ func (b *blockDec) startDecoder() {
b: b.dst,
err: err,
}
if debug {
if debugDecoder {
println("Decompressed to", len(b.dst), "bytes, error:", err)
}
b.result <- o
@ -267,7 +264,7 @@ func (b *blockDec) startDecoder() {
default:
panic("Invalid block type")
}
if debug {
if debugDecoder {
println("blockDec: Finished block")
}
}
@ -300,7 +297,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
b.dst = hist.b
hist.b = nil
err := b.decodeCompressed(hist)
if debug {
if debugDecoder {
println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err)
}
hist.b = b.dst
@ -393,7 +390,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
in = in[5:]
}
}
if debug {
if debugDecoder {
println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams)
}
var literals []byte
@ -431,7 +428,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
literals[i] = v
}
in = in[1:]
if debug {
if debugDecoder {
printf("Found %d RLE compressed literals\n", litRegenSize)
}
case literalsBlockTreeless:
@ -442,7 +439,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// Store compressed literals, so we defer decoding until we get history.
literals = in[:litCompSize]
in = in[litCompSize:]
if debug {
if debugDecoder {
printf("Found %d compressed literals\n", litCompSize)
}
case literalsBlockCompressed:
@ -484,7 +481,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if len(literals) != litRegenSize {
return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals))
}
if debug {
if debugDecoder {
printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize)
}
}
@ -535,12 +532,12 @@ func (b *blockDec) decodeCompressed(hist *history) error {
br := byteReader{b: in, off: 0}
compMode := br.Uint8()
br.advance(1)
if debug {
if debugDecoder {
printf("Compression modes: 0b%b", compMode)
}
for i := uint(0); i < 3; i++ {
mode := seqCompMode((compMode >> (6 - i*2)) & 3)
if debug {
if debugDecoder {
println("Table", tableIndex(i), "is", mode)
}
var seq *sequenceDec
@ -571,7 +568,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
dec.setRLE(symb)
seq.fse = dec
if debug {
if debugDecoder {
printf("RLE set to %+v, code: %v", symb, v)
}
case compModeFSE:
@ -587,7 +584,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
println("Transform table error:", err)
return err
}
if debug {
if debugDecoder {
println("Read table ok", "symbolLen:", dec.symbolLen)
}
seq.fse = dec
@ -655,7 +652,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if huff != nil {
hist.huffTree = huff
}
if debug {
if debugDecoder {
println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.")
}
@ -672,7 +669,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
if err != nil {
return err
}
if debug {
if debugDecoder {
println("History merged ok")
}
br := &bitReader{}
@ -731,7 +728,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
}
hist.append(b.dst)
hist.recentOffsets = seqs.prevOffset
if debug {
if debugDecoder {
println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.")
}

View file

@ -156,7 +156,7 @@ func (h *literalsHeader) setSize(regenLen int) {
switch {
case inBits < 5:
lh |= (uint64(regenLen) << 3) | (1 << 60)
if debug {
if debugEncoder {
got := int(lh>>3) & 0xff
if got != regenLen {
panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)"))
@ -184,7 +184,7 @@ func (h *literalsHeader) setSizes(compLen, inLen int, single bool) {
lh |= 1 << 2
}
lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60)
if debug {
if debugEncoder {
const mmask = (1 << 24) - 1
n := (lh >> 4) & mmask
if int(n&1023) != inLen {
@ -312,7 +312,7 @@ func (b *blockEnc) encodeRaw(a []byte) {
bh.setType(blockTypeRaw)
b.output = bh.appendTo(b.output[:0])
b.output = append(b.output, a...)
if debug {
if debugEncoder {
println("Adding RAW block, length", len(a), "last:", b.last)
}
}
@ -325,7 +325,7 @@ func (b *blockEnc) encodeRawTo(dst, src []byte) []byte {
bh.setType(blockTypeRaw)
dst = bh.appendTo(dst)
dst = append(dst, src...)
if debug {
if debugEncoder {
println("Adding RAW block, length", len(src), "last:", b.last)
}
return dst
@ -339,7 +339,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
// Don't compress extremely small blocks
if len(lits) < 8 || (len(lits) < 32 && b.dictLitEnc == nil) || raw {
if debug {
if debugEncoder {
println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
@ -371,7 +371,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
switch err {
case huff0.ErrIncompressible:
if debug {
if debugEncoder {
println("Adding RAW block, length", len(lits), "last:", b.last)
}
bh.setType(blockTypeRaw)
@ -379,7 +379,7 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
b.output = append(b.output, lits...)
return nil
case huff0.ErrUseRLE:
if debug {
if debugEncoder {
println("Adding RLE block, length", len(lits))
}
bh.setType(blockTypeRLE)
@ -396,12 +396,12 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
bh.setType(blockTypeCompressed)
var lh literalsHeader
if reUsed {
if debug {
if debugEncoder {
println("Reused tree, compressed to", len(out))
}
lh.setType(literalsBlockTreeless)
} else {
if debug {
if debugEncoder {
println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable))
}
lh.setType(literalsBlockCompressed)
@ -517,7 +517,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
lh.setSize(len(b.literals))
b.output = lh.appendTo(b.output)
b.output = append(b.output, b.literals...)
if debug {
if debugEncoder {
println("Adding literals RAW, length", len(b.literals))
}
case huff0.ErrUseRLE:
@ -525,22 +525,22 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
lh.setSize(len(b.literals))
b.output = lh.appendTo(b.output)
b.output = append(b.output, b.literals[0])
if debug {
if debugEncoder {
println("Adding literals RLE")
}
case nil:
// Compressed litLen...
if reUsed {
if debug {
if debugEncoder {
println("reused tree")
}
lh.setType(literalsBlockTreeless)
} else {
if debug {
if debugEncoder {
println("new tree, size:", len(b.litEnc.OutTable))
}
lh.setType(literalsBlockCompressed)
if debug {
if debugEncoder {
_, _, err := huff0.ReadTable(out, nil)
if err != nil {
panic(err)
@ -548,18 +548,18 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
}
}
lh.setSizes(len(out), len(b.literals), single)
if debug {
if debugEncoder {
printf("Compressed %d literals to %d bytes", len(b.literals), len(out))
println("Adding literal header:", lh)
}
b.output = lh.appendTo(b.output)
b.output = append(b.output, out...)
b.litEnc.Reuse = huff0.ReusePolicyAllow
if debug {
if debugEncoder {
println("Adding literals compressed")
}
default:
if debug {
if debugEncoder {
println("Adding literals ERROR:", err)
}
return err
@ -577,7 +577,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
n := len(b.sequences) - 0x7f00
b.output = append(b.output, 255, uint8(n), uint8(n>>8))
}
if debug {
if debugEncoder {
println("Encoding", len(b.sequences), "sequences")
}
b.genCodes()
@ -611,17 +611,17 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
nSize = nSize + (nSize+2*8*16)>>4
switch {
case predefSize <= prevSize && predefSize <= nSize || forcePreDef:
if debug {
if debugEncoder {
println("Using predefined", predefSize>>3, "<=", nSize>>3)
}
return preDef, compModePredefined
case prevSize <= nSize:
if debug {
if debugEncoder {
println("Using previous", prevSize>>3, "<=", nSize>>3)
}
return prev, compModeRepeat
default:
if debug {
if debugEncoder {
println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes")
println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen])
}
@ -634,7 +634,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if llEnc.useRLE {
mode |= uint8(compModeRLE) << 6
llEnc.setRLE(b.sequences[0].llCode)
if debug {
if debugEncoder {
println("llEnc.useRLE")
}
} else {
@ -645,7 +645,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if ofEnc.useRLE {
mode |= uint8(compModeRLE) << 4
ofEnc.setRLE(b.sequences[0].ofCode)
if debug {
if debugEncoder {
println("ofEnc.useRLE")
}
} else {
@ -657,7 +657,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if mlEnc.useRLE {
mode |= uint8(compModeRLE) << 2
mlEnc.setRLE(b.sequences[0].mlCode)
if debug {
if debugEncoder {
println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen)
}
} else {
@ -666,7 +666,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
mode |= uint8(m) << 2
}
b.output = append(b.output, mode)
if debug {
if debugEncoder {
printf("Compression modes: 0b%b", mode)
}
b.output, err = llEnc.writeCount(b.output)
@ -786,7 +786,7 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
// Size is output minus block header.
bh.setSize(uint32(len(b.output)-bhOffset) - 3)
if debug {
if debugEncoder {
println("Rewriting block header", bh)
}
_ = bh.appendTo(b.output[bhOffset:bhOffset])

View file

@ -12,8 +12,8 @@ import (
type byteBuffer interface {
// Read up to 8 bytes.
// Returns nil if no more input is available.
readSmall(n int) []byte
// Returns io.ErrUnexpectedEOF if this cannot be satisfied.
readSmall(n int) ([]byte, error)
// Read >8 bytes.
// MAY use the destination slice.
@ -29,17 +29,17 @@ type byteBuffer interface {
// in-memory buffer
type byteBuf []byte
func (b *byteBuf) readSmall(n int) []byte {
func (b *byteBuf) readSmall(n int) ([]byte, error) {
if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
bb := *b
if len(bb) < n {
return nil
return nil, io.ErrUnexpectedEOF
}
r := bb[:n]
*b = bb[n:]
return r
return r, nil
}
func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
@ -81,19 +81,22 @@ type readerWrapper struct {
tmp [8]byte
}
func (r *readerWrapper) readSmall(n int) []byte {
func (r *readerWrapper) readSmall(n int) ([]byte, error) {
if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
n2, err := io.ReadFull(r.r, r.tmp[:n])
// We only really care about the actual bytes read.
if n2 != n {
if debug {
if err != nil {
if err == io.EOF {
return nil, io.ErrUnexpectedEOF
}
if debugDecoder {
println("readSmall: got", n2, "want", n, "err", err)
}
return nil
return nil, err
}
return r.tmp[:n]
return r.tmp[:n], nil
}
func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {

View file

@ -113,9 +113,6 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
// Returns the number of bytes written and any error that occurred.
// When the stream is done, io.EOF will be returned.
func (d *Decoder) Read(p []byte) (int, error) {
if d.stream == nil {
return 0, ErrDecoderNilInput
}
var n int
for {
if len(d.current.b) > 0 {
@ -138,7 +135,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
}
}
if len(d.current.b) > 0 {
if debug {
if debugDecoder {
println("returning", n, "still bytes left:", len(d.current.b))
}
// Only return error at end of block
@ -147,7 +144,7 @@ func (d *Decoder) Read(p []byte) (int, error) {
if d.current.err != nil {
d.drainOutput()
}
if debug {
if debugDecoder {
println("returning", n, d.current.err, len(d.decoders))
}
return n, d.current.err
@ -167,20 +164,17 @@ func (d *Decoder) Reset(r io.Reader) error {
if r == nil {
d.current.err = ErrDecoderNilInput
if len(d.current.b) > 0 {
d.current.b = d.current.b[:0]
}
d.current.flushed = true
return nil
}
if d.stream == nil {
d.stream = make(chan decodeStream, 1)
d.streamWg.Add(1)
go d.startStreamDecoder(d.stream)
}
// If bytes buffer and < 1MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
// If bytes buffer and < 5MB, do sync decoding anyway.
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
bb2 := bb
if debug {
if debugDecoder {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb2.Bytes()
@ -196,12 +190,18 @@ func (d *Decoder) Reset(r io.Reader) error {
d.current.b = dst
d.current.err = err
d.current.flushed = true
if debug {
if debugDecoder {
println("sync decode to", len(dst), "bytes, err:", err)
}
return nil
}
if d.stream == nil {
d.stream = make(chan decodeStream, 1)
d.streamWg.Add(1)
go d.startStreamDecoder(d.stream)
}
// Remove current block.
d.current.decodeOutput = decodeOutput{}
d.current.err = nil
@ -225,7 +225,7 @@ func (d *Decoder) drainOutput() {
d.current.cancel = nil
}
if d.current.d != nil {
if debug {
if debugDecoder {
printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders))
}
d.decoders <- d.current.d
@ -238,7 +238,7 @@ func (d *Decoder) drainOutput() {
}
for v := range d.current.output {
if v.d != nil {
if debug {
if debugDecoder {
printf("re-adding decoder %p", v.d)
}
d.decoders <- v.d
@ -255,9 +255,6 @@ func (d *Decoder) drainOutput() {
// The return value n is the number of bytes written.
// Any error encountered during the write is also returned.
func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
if d.stream == nil {
return 0, ErrDecoderNilInput
}
var n int64
for {
if len(d.current.b) > 0 {
@ -297,7 +294,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
block := <-d.decoders
frame := block.localFrame
defer func() {
if debug {
if debugDecoder {
printf("re-adding decoder: %p", block)
}
frame.rawInput = nil
@ -310,7 +307,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
frame.history.reset()
err := frame.reset(&frame.bBuf)
if err == io.EOF {
if debug {
if debugDecoder {
println("frame reset return EOF")
}
return dst, nil
@ -355,7 +352,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
return dst, err
}
if len(frame.bBuf) == 0 {
if debug {
if debugDecoder {
println("frame dbuf empty")
}
break
@ -371,7 +368,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
// if no data was available without blocking.
func (d *Decoder) nextBlock(blocking bool) (ok bool) {
if d.current.d != nil {
if debug {
if debugDecoder {
printf("re-adding current decoder %p", d.current.d)
}
d.decoders <- d.current.d
@ -391,7 +388,7 @@ func (d *Decoder) nextBlock(blocking bool) (ok bool) {
return false
}
}
if debug {
if debugDecoder {
println("got", len(d.current.b), "bytes, error:", d.current.err)
}
return true
@ -485,7 +482,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
defer d.streamWg.Done()
frame := newFrameDec(d.o)
for stream := range inStream {
if debug {
if debugDecoder {
println("got new stream")
}
br := readerWrapper{r: stream.r}
@ -493,7 +490,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
for {
frame.history.reset()
err := frame.reset(&br)
if debug && err != nil {
if debugDecoder && err != nil {
println("Frame decoder returned", err)
}
if err == nil && frame.DictionaryID != nil {
@ -510,7 +507,7 @@ func (d *Decoder) startStreamDecoder(inStream chan decodeStream) {
}
break
}
if debug {
if debugDecoder {
println("starting frame decoder")
}

View file

@ -82,7 +82,7 @@ func loadDict(b []byte) (*dict, error) {
println("Transform table error:", err)
return err
}
if debug {
if debugDecoder || debugEncoder {
println("Read table ok", "symbolLen:", dec.symbolLen)
}
// Set decoders as predefined so they aren't reused.

View file

@ -132,7 +132,7 @@ func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
}
_ = addLiterals
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -220,6 +220,20 @@ encodeLoop:
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
candidateEnd := e.longTable[nextHashL]
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
}
best = bestEnd
}
}
}
// We have a match, we can store the forward value
@ -260,7 +274,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, best.length)
}
@ -398,7 +412,7 @@ encodeLoop:
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
blk.recentOffsets[2] = uint32(offset3)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}

View file

@ -138,7 +138,7 @@ func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -204,7 +204,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -264,7 +264,7 @@ encodeLoop:
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -412,8 +412,41 @@ encodeLoop:
cv = load6432(src, s)
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
// Try to find a better match by searching for a long match at the end of the current best match
if true && s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t
@ -520,7 +553,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -623,7 +656,7 @@ func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -691,7 +724,7 @@ encodeLoop:
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -754,7 +787,7 @@ encodeLoop:
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -905,9 +938,41 @@ encodeLoop:
}
cv = load6432(src, s)
}
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t
@ -1019,7 +1084,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}

View file

@ -109,7 +109,7 @@ func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -170,7 +170,7 @@ encodeLoop:
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -368,7 +368,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -427,7 +427,7 @@ func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -483,7 +483,7 @@ encodeLoop:
s += length + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -677,7 +677,7 @@ encodeLoop:
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
@ -767,7 +767,7 @@ func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -830,7 +830,7 @@ encodeLoop:
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, lenght)
}
@ -1039,7 +1039,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// If we encoded more than 64K mark all dirty.

View file

@ -103,7 +103,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -178,7 +178,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -330,7 +330,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
@ -343,7 +343,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
inputMargin = 8
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if debug {
if debugEncoder {
if len(src) > maxBlockSize {
panic("src too big")
}
@ -391,7 +391,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -462,7 +462,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -616,7 +616,7 @@ encodeLoop:
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// We do not store history, so we must offset e.cur to avoid false matches for next user.
@ -696,7 +696,7 @@ func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
if debugEncoder {
println("recent offsets:", blk.recentOffsets)
}
@ -773,7 +773,7 @@ encodeLoop:
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
if debugEncoder {
println("repeat ended", s, length)
}
@ -926,7 +926,7 @@ encodeLoop:
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
if debugEncoder {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}

View file

@ -245,7 +245,7 @@ func (e *Encoder) nextBlock(final bool) error {
s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
s.wg.Add(1)
go func(src []byte) {
if debug {
if debugEncoder {
println("Adding block,", len(src), "bytes, final:", final)
}
defer func() {
@ -290,7 +290,7 @@ func (e *Encoder) nextBlock(final bool) error {
}
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
blk.encodeRaw(src)
@ -313,7 +313,7 @@ func (e *Encoder) nextBlock(final bool) error {
//
// The Copy function uses ReaderFrom if available.
func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
if debug {
if debugEncoder {
println("Using ReadFrom")
}
@ -336,20 +336,20 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
switch err {
case io.EOF:
e.state.filling = e.state.filling[:len(e.state.filling)-len(src)]
if debug {
if debugEncoder {
println("ReadFrom: got EOF final block:", len(e.state.filling))
}
return n, nil
case nil:
default:
if debug {
if debugEncoder {
println("ReadFrom: got error:", err)
}
e.state.err = err
return n, err
}
if len(src) > 0 {
if debug {
if debugEncoder {
println("ReadFrom: got space left in source:", len(src))
}
continue
@ -512,7 +512,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, src)
@ -548,7 +548,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
switch err {
case errIncompressible:
if debug {
if debugEncoder {
println("Storing incompressible block as raw")
}
dst = blk.encodeRawTo(dst, todo)

View file

@ -78,44 +78,68 @@ func newFrameDec(o decoderOptions) *frameDec {
func (d *frameDec) reset(br byteBuffer) error {
d.HasCheckSum = false
d.WindowSize = 0
var b []byte
var signature [4]byte
for {
b = br.readSmall(4)
if b == nil {
var err error
// Check if we can read more...
b, err := br.readSmall(1)
switch err {
case io.EOF, io.ErrUnexpectedEOF:
return io.EOF
default:
return err
case nil:
signature[0] = b[0]
}
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
if debug {
println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic))
// Read the rest, don't allow io.ErrUnexpectedEOF
b, err = br.readSmall(3)
switch err {
case io.EOF:
return io.EOF
default:
return err
case nil:
copy(signature[1:], b)
}
if !bytes.Equal(signature[1:4], skippableFrameMagic) || signature[0]&0xf0 != 0x50 {
if debugDecoder {
println("Not skippable", hex.EncodeToString(signature[:]), hex.EncodeToString(skippableFrameMagic))
}
// Break if not skippable frame.
break
}
// Read size to skip
b = br.readSmall(4)
if b == nil {
println("Reading Frame Size EOF")
return io.ErrUnexpectedEOF
b, err = br.readSmall(4)
if err != nil {
if debugDecoder {
println("Reading Frame Size", err)
}
return err
}
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
println("Skipping frame with", n, "bytes.")
err := br.skipN(int(n))
err = br.skipN(int(n))
if err != nil {
if debug {
if debugDecoder {
println("Reading discarded frame", err)
}
return err
}
}
if !bytes.Equal(b, frameMagic) {
println("Got magic numbers: ", b, "want:", frameMagic)
if !bytes.Equal(signature[:], frameMagic) {
if debugDecoder {
println("Got magic numbers: ", signature, "want:", frameMagic)
}
return ErrMagicMismatch
}
// Read Frame_Header_Descriptor
fhd, err := br.readByte()
if err != nil {
println("Reading Frame_Header_Descriptor", err)
if debugDecoder {
println("Reading Frame_Header_Descriptor", err)
}
return err
}
d.SingleSegment = fhd&(1<<5) != 0
@ -130,7 +154,9 @@ func (d *frameDec) reset(br byteBuffer) error {
if !d.SingleSegment {
wd, err := br.readByte()
if err != nil {
println("Reading Window_Descriptor", err)
if debugDecoder {
println("Reading Window_Descriptor", err)
}
return err
}
printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
@ -147,12 +173,11 @@ func (d *frameDec) reset(br byteBuffer) error {
if size == 3 {
size = 4
}
b = br.readSmall(int(size))
if b == nil {
if debug {
println("Reading Dictionary_ID", io.ErrUnexpectedEOF)
}
return io.ErrUnexpectedEOF
b, err := br.readSmall(int(size))
if err != nil {
println("Reading Dictionary_ID", err)
return err
}
var id uint32
switch size {
@ -163,7 +188,7 @@ func (d *frameDec) reset(br byteBuffer) error {
case 4:
id = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
if debug {
if debugDecoder {
println("Dict size", size, "ID:", id)
}
if id > 0 {
@ -187,10 +212,10 @@ func (d *frameDec) reset(br byteBuffer) error {
}
d.FrameContentSize = 0
if fcsSize > 0 {
b := br.readSmall(fcsSize)
if b == nil {
println("Reading Frame content", io.ErrUnexpectedEOF)
return io.ErrUnexpectedEOF
b, err := br.readSmall(fcsSize)
if err != nil {
println("Reading Frame content", err)
return err
}
switch fcsSize {
case 1:
@ -205,7 +230,7 @@ func (d *frameDec) reset(br byteBuffer) error {
d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
d.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
}
if debug {
if debugDecoder {
println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize)
}
}
@ -248,7 +273,7 @@ func (d *frameDec) reset(br byteBuffer) error {
// next will start decoding the next block from stream.
func (d *frameDec) next(block *blockDec) error {
if debug {
if debugDecoder {
printf("decoding new block %p:%p", block, block.data)
}
err := block.reset(d.rawInput, d.WindowSize)
@ -259,7 +284,7 @@ func (d *frameDec) next(block *blockDec) error {
return err
}
block.input <- struct{}{}
if debug {
if debugDecoder {
println("next block:", block)
}
d.asyncRunningMu.Lock()
@ -307,19 +332,19 @@ func (d *frameDec) checkCRC() error {
tmp[3] = byte(got >> 24)
// We can overwrite upper tmp now
want := d.rawInput.readSmall(4)
if want == nil {
println("CRC missing?")
return io.ErrUnexpectedEOF
want, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
}
if !bytes.Equal(tmp[:], want) {
if debug {
if debugDecoder {
println("CRC Check Failed:", tmp[:], "!=", want)
}
return ErrCRCMismatch
}
if debug {
if debugDecoder {
println("CRC ok", tmp[:])
}
return nil
@ -340,7 +365,7 @@ func (d *frameDec) initAsync() {
if cap(d.decoding) < d.o.concurrent {
d.decoding = make(chan *blockDec, d.o.concurrent)
}
if debug {
if debugDecoder {
h := d.history
printf("history init. len: %d, cap: %d", len(h.b), cap(h.b))
}
@ -388,7 +413,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
output <- r
return
}
if debug {
if debugDecoder {
println("got result, from ", d.offset, "to", d.offset+int64(len(r.b)))
d.offset += int64(len(r.b))
}
@ -396,7 +421,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
// Send history to next block
select {
case next = <-d.decoding:
if debug {
if debugDecoder {
println("Sending ", len(d.history.b), "bytes as history")
}
next.history <- &d.history
@ -434,7 +459,7 @@ func (d *frameDec) startDecoder(output chan decodeOutput) {
output <- r
if next == nil {
// There was no decoder available, we wait for one now that we have sent to the writer.
if debug {
if debugDecoder {
println("Sending ", len(d.history.b), " bytes as history")
}
next = <-d.decoding
@ -458,7 +483,7 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
if err != nil {
break
}
if debug {
if debugDecoder {
println("next block:", dec)
}
err = dec.decodeBuf(&d.history)

View file

@ -229,7 +229,7 @@ func (s *fseEncoder) setRLE(val byte) {
deltaFindState: 0,
deltaNbBits: 0,
}
if debug {
if debugEncoder {
println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
}
s.rleVal = val

View file

@ -203,7 +203,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
written += int64(n)
continue
case chunkTypeUncompressedData:
if debug {
if debugEncoder {
println("Uncompressed, chunklen", chunkLen)
}
// Section 4.3. Uncompressed data (chunk type 0x01).
@ -246,7 +246,7 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
continue
case chunkTypeStreamIdentifier:
if debug {
if debugEncoder {
println("stream id", chunkLen, len(snappyMagicBody))
}
// Section 4.1. Stream identifier (chunk type 0xff).

121
vendor/github.com/klauspost/compress/zstd/zip.go generated vendored Normal file
View file

@ -0,0 +1,121 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
import (
"errors"
"io"
"sync"
)
// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip.
// See https://www.winzip.com/win/en/comp_info.html
const ZipMethodWinZip = 93
// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression.
// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression.
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
const ZipMethodPKWare = 20
var zipReaderPool sync.Pool
// newZipReader cannot be used since we would leak goroutines...
func newZipReader(r io.Reader) io.ReadCloser {
dec, ok := zipReaderPool.Get().(*Decoder)
if ok {
dec.Reset(r)
} else {
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
panic(err)
}
dec = d
}
return &pooledZipReader{dec: dec}
}
type pooledZipReader struct {
mu sync.Mutex // guards Close and Read
dec *Decoder
}
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
r.mu.Lock()
defer r.mu.Unlock()
if r.dec == nil {
return 0, errors.New("Read after Close")
}
dec, err := r.dec.Read(p)
return dec, err
}
func (r *pooledZipReader) Close() error {
r.mu.Lock()
defer r.mu.Unlock()
var err error
if r.dec != nil {
err = r.dec.Reset(nil)
zipReaderPool.Put(r.dec)
r.dec = nil
}
return err
}
type pooledZipWriter struct {
mu sync.Mutex // guards Close and Read
enc *Encoder
}
func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
w.mu.Lock()
defer w.mu.Unlock()
if w.enc == nil {
return 0, errors.New("Write after Close")
}
return w.enc.Write(p)
}
func (w *pooledZipWriter) Close() error {
w.mu.Lock()
defer w.mu.Unlock()
var err error
if w.enc != nil {
err = w.enc.Close()
zipReaderPool.Put(w.enc)
w.enc = nil
}
return err
}
// ZipCompressor returns a compressor that can be registered with zip libraries.
// The provided encoder options will be used on all encodes.
func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
var pool sync.Pool
return func(w io.Writer) (io.WriteCloser, error) {
enc, ok := pool.Get().(*Encoder)
if ok {
enc.Reset(w)
} else {
var err error
enc, err = NewWriter(w, opts...)
if err != nil {
return nil, err
}
}
return &pooledZipWriter{enc: enc}, nil
}
}
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
// See ZipCompressor for example.
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
return func(r io.Reader) io.ReadCloser {
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
panic(err)
}
return d.IOReadCloser()
}
}

View file

@ -15,6 +15,12 @@ import (
// enable debug printing
const debug = false
// enable encoding debug printing
const debugEncoder = debug
// enable decoding debug printing
const debugDecoder = debug
// Enable extra assertions.
const debugAsserts = debug || false
@ -82,13 +88,13 @@ var (
)
func println(a ...interface{}) {
if debug {
if debug || debugDecoder || debugEncoder {
log.Println(a...)
}
}
func printf(format string, a ...interface{}) {
if debug {
if debug || debugDecoder || debugEncoder {
log.Printf(format, a...)
}
}

View file

@ -1,24 +0,0 @@
# Compiled Object files, Static and Dynamic libs (Shared Objects)
*.o
*.a
*.so
# Folders
_obj
_test
# Architecture specific extensions/prefixes
*.[568vq]
[568vq].out
*.cgo1.go
*.cgo2.c
_cgo_defun.c
_cgo_gotypes.go
_cgo_export.*
_testmain.go
*.exe
*.test
*.prof

View file

@ -1,46 +0,0 @@
language: go
os:
- linux
- osx
- windows
arch:
- amd64
- arm64
go:
- 1.12.x
- 1.13.x
- 1.14.x
- master
script:
- go vet ./...
- go test -race ./...
- go test -tags=noasm ./...
stages:
- gofmt
- test
matrix:
allow_failures:
- go: 'master'
fast_finish: true
include:
- stage: gofmt
go: 1.14.x
os: linux
arch: amd64
script:
- diff <(gofmt -d .) <(printf "")
- diff <(gofmt -d ./private) <(printf "")
- go install github.com/klauspost/asmfmt/cmd/asmfmt
- diff <(asmfmt -d .) <(printf "")
- stage: i386
go: 1.14.x
os: linux
arch: amd64
script:
- GOOS=linux GOARCH=386 go test .

View file

@ -1,35 +0,0 @@
Developer Certificate of Origin
Version 1.1
Copyright (C) 2015- Klaus Post & Contributors.
Email: klauspost@gmail.com
Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.
Developer's Certificate of Origin 1.1
By making a contribution to this project, I certify that:
(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or
(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or
(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.
(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.

View file

@ -1,22 +0,0 @@
The MIT License (MIT)
Copyright (c) 2015 Klaus Post
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -1,191 +0,0 @@
# cpuid
Package cpuid provides information about the CPU running the current program.
CPU features are detected on startup, and kept for fast access through the life of the application.
Currently x86 / x64 (AMD64/i386) and ARM (ARM64) is supported, and no external C (cgo) code is used, which should make the library very easy to use.
You can access the CPU information by accessing the shared CPU variable of the cpuid library.
Package home: https://github.com/klauspost/cpuid
[![GoDoc][1]][2] [![Build Status][3]][4]
[1]: https://godoc.org/github.com/klauspost/cpuid?status.svg
[2]: https://godoc.org/github.com/klauspost/cpuid
[3]: https://travis-ci.org/klauspost/cpuid.svg?branch=master
[4]: https://travis-ci.org/klauspost/cpuid
# features
## x86 CPU Instructions
* **CMOV** (i686 CMOV)
* **NX** (NX (No-Execute) bit)
* **AMD3DNOW** (AMD 3DNOW)
* **AMD3DNOWEXT** (AMD 3DNowExt)
* **MMX** (standard MMX)
* **MMXEXT** (SSE integer functions or AMD MMX ext)
* **SSE** (SSE functions)
* **SSE2** (P4 SSE functions)
* **SSE3** (Prescott SSE3 functions)
* **SSSE3** (Conroe SSSE3 functions)
* **SSE4** (Penryn SSE4.1 functions)
* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
* **SSE42** (Nehalem SSE4.2 functions)
* **AVX** (AVX functions)
* **AVX2** (AVX2 functions)
* **FMA3** (Intel FMA 3)
* **FMA4** (Bulldozer FMA4 functions)
* **XOP** (Bulldozer XOP functions)
* **F16C** (Half-precision floating-point conversion)
* **BMI1** (Bit Manipulation Instruction Set 1)
* **BMI2** (Bit Manipulation Instruction Set 2)
* **TBM** (AMD Trailing Bit Manipulation)
* **LZCNT** (LZCNT instruction)
* **POPCNT** (POPCNT instruction)
* **AESNI** (Advanced Encryption Standard New Instructions)
* **CLMUL** (Carry-less Multiplication)
* **HTT** (Hyperthreading (enabled))
* **HLE** (Hardware Lock Elision)
* **RTM** (Restricted Transactional Memory)
* **RDRAND** (RDRAND instruction is available)
* **RDSEED** (RDSEED instruction is available)
* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
* **SHA** (Intel SHA Extensions)
* **AVX512F** (AVX-512 Foundation)
* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
* **AVX512PF** (AVX-512 Prefetch Instructions)
* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
* **AVX512CD** (AVX-512 Conflict Detection Instructions)
* **AVX512BW** (AVX-512 Byte and Word Instructions)
* **AVX512VL** (AVX-512 Vector Length Extensions)
* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
* **AVX512VBMI2** (AVX-512 Vector Bit Manipulation Instructions, Version 2)
* **AVX512VNNI** (AVX-512 Vector Neural Network Instructions)
* **AVX512VPOPCNTDQ** (AVX-512 Vector Population Count Doubleword and Quadword)
* **GFNI** (Galois Field New Instructions)
* **VAES** (Vector AES)
* **AVX512BITALG** (AVX-512 Bit Algorithms)
* **VPCLMULQDQ** (Carry-Less Multiplication Quadword)
* **AVX512BF16** (AVX-512 BFLOAT16 Instructions)
* **AVX512VP2INTERSECT** (AVX-512 Intersect for D/Q)
* **MPX** (Intel MPX (Memory Protection Extensions))
* **ERMS** (Enhanced REP MOVSB/STOSB)
* **RDTSCP** (RDTSCP Instruction)
* **CX16** (CMPXCHG16B Instruction)
* **SGX** (Software Guard Extensions, with activation details)
* **VMX** (Virtual Machine Extensions)
## Performance
* **RDTSCP()** Returns current cycle count. Can be used for benchmarking.
* **SSE2SLOW** (SSE2 is supported, but usually not faster)
* **SSE3SLOW** (SSE3 is supported, but usually not faster)
* **ATOM** (Atom processor, some SSSE3 instructions are slower)
* **Cache line** (Probable size of a cache line).
* **L1, L2, L3 Cache size** on newer Intel/AMD CPUs.
## ARM CPU features
# ARM FEATURE DETECTION DISABLED!
See [#52](https://github.com/klauspost/cpuid/issues/52).
Currently only `arm64` platforms are implemented.
* **FP** Single-precision and double-precision floating point
* **ASIMD** Advanced SIMD
* **EVTSTRM** Generic timer
* **AES** AES instructions
* **PMULL** Polynomial Multiply instructions (PMULL/PMULL2)
* **SHA1** SHA-1 instructions (SHA1C, etc)
* **SHA2** SHA-2 instructions (SHA256H, etc)
* **CRC32** CRC32/CRC32C instructions
* **ATOMICS** Large System Extensions (LSE)
* **FPHP** Half-precision floating point
* **ASIMDHP** Advanced SIMD half-precision floating point
* **ARMCPUID** Some CPU ID registers readable at user-level
* **ASIMDRDM** Rounding Double Multiply Accumulate/Subtract (SQRDMLAH/SQRDMLSH)
* **JSCVT** Javascript-style double->int convert (FJCVTZS)
* **FCMA** Floating point complex number addition and multiplication
* **LRCPC** Weaker release consistency (LDAPR, etc)
* **DCPOP** Data cache clean to Point of Persistence (DC CVAP)
* **SHA3** SHA-3 instructions (EOR3, RAXI, XAR, BCAX)
* **SM3** SM3 instructions
* **SM4** SM4 instructions
* **ASIMDDP** SIMD Dot Product
* **SHA512** SHA512 instructions
* **SVE** Scalable Vector Extension
* **GPA** Generic Pointer Authentication
## Cpu Vendor/VM
* **Intel**
* **AMD**
* **VIA**
* **Transmeta**
* **NSC**
* **KVM** (Kernel-based Virtual Machine)
* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
* **VMware**
* **XenHVM**
* **Bhyve**
* **Hygon**
# installing
```go get github.com/klauspost/cpuid```
# example
```Go
package main
import (
"fmt"
"github.com/klauspost/cpuid"
)
func main() {
// Print basic CPU information:
fmt.Println("Name:", cpuid.CPU.BrandName)
fmt.Println("PhysicalCores:", cpuid.CPU.PhysicalCores)
fmt.Println("ThreadsPerCore:", cpuid.CPU.ThreadsPerCore)
fmt.Println("LogicalCores:", cpuid.CPU.LogicalCores)
fmt.Println("Family", cpuid.CPU.Family, "Model:", cpuid.CPU.Model)
fmt.Println("Features:", cpuid.CPU.Features)
fmt.Println("Cacheline bytes:", cpuid.CPU.CacheLine)
fmt.Println("L1 Data Cache:", cpuid.CPU.Cache.L1D, "bytes")
fmt.Println("L1 Instruction Cache:", cpuid.CPU.Cache.L1D, "bytes")
fmt.Println("L2 Cache:", cpuid.CPU.Cache.L2, "bytes")
fmt.Println("L3 Cache:", cpuid.CPU.Cache.L3, "bytes")
// Test if we have a specific feature:
if cpuid.CPU.SSE() {
fmt.Println("We have Streaming SIMD Extensions")
}
}
```
Sample output:
```
>go run main.go
Name: Intel(R) Core(TM) i5-2540M CPU @ 2.60GHz
PhysicalCores: 2
ThreadsPerCore: 2
LogicalCores: 4
Family 6 Model: 42
Features: CMOV,MMX,MMXEXT,SSE,SSE2,SSE3,SSSE3,SSE4.1,SSE4.2,AVX,AESNI,CLMUL
Cacheline bytes: 64
We have Streaming SIMD Extensions
```
# private package
In the "private" folder you can find an autogenerated version of the library you can include in your own packages.
For this purpose all exports are removed, and functions and constants are lowercased.
This is not a recommended way of using the library, but provided for convenience, if it is difficult for you to use external packages.
# license
This code is published under an MIT license. See LICENSE file for more information.

File diff suppressed because it is too large Load diff

View file

@ -1,42 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build 386,!gccgo,!noasm,!appengine
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORL CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+4(FP)
MOVL BX, ebx+8(FP)
MOVL CX, ecx+12(FP)
MOVL DX, edx+16(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func xgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+4(FP)
MOVL DX, edx+8(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET

View file

@ -1,42 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build amd64,!gccgo,!noasm,!appengine
// func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuid(SB), 7, $0
XORQ CX, CX
MOVL op+0(FP), AX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
TEXT ·asmCpuidex(SB), 7, $0
MOVL op+0(FP), AX
MOVL op2+4(FP), CX
CPUID
MOVL AX, eax+8(FP)
MOVL BX, ebx+12(FP)
MOVL CX, ecx+16(FP)
MOVL DX, edx+20(FP)
RET
// func asmXgetbv(index uint32) (eax, edx uint32)
TEXT ·asmXgetbv(SB), 7, $0
MOVL index+0(FP), CX
BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV
MOVL AX, eax+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
TEXT ·asmRdtscpAsm(SB), 7, $0
BYTE $0x0F; BYTE $0x01; BYTE $0xF9 // RDTSCP
MOVL AX, eax+0(FP)
MOVL BX, ebx+4(FP)
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET

View file

@ -1,26 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build arm64,!gccgo
// See https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt
// func getMidr
TEXT ·getMidr(SB), 7, $0
WORD $0xd5380000 // mrs x0, midr_el1 /* Main ID Register */
MOVD R0, midr+0(FP)
RET
// func getProcFeatures
TEXT ·getProcFeatures(SB), 7, $0
WORD $0xd5380400 // mrs x0, id_aa64pfr0_el1 /* Processor Feature Register 0 */
MOVD R0, procFeatures+0(FP)
RET
// func getInstAttributes
TEXT ·getInstAttributes(SB), 7, $0
WORD $0xd5380600 // mrs x0, id_aa64isar0_el1 /* Instruction Set Attribute Register 0 */
WORD $0xd5380621 // mrs x1, id_aa64isar1_el1 /* Instruction Set Attribute Register 1 */
MOVD R0, instAttrReg0+0(FP)
MOVD R1, instAttrReg1+8(FP)
RET

View file

@ -1,219 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build arm64,!gccgo,!noasm,!appengine
package cpuid
func getMidr() (midr uint64)
func getProcFeatures() (procFeatures uint64)
func getInstAttributes() (instAttrReg0, instAttrReg1 uint64)
func initCPU() {
cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
}
func addInfo(c *CPUInfo) {
// ARM64 disabled for now.
if true {
return
}
// midr := getMidr()
// MIDR_EL1 - Main ID Register
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | Implementer | [31-24] | y |
// |--------------------------------------------------|
// | Variant | [23-20] | y |
// |--------------------------------------------------|
// | Architecture | [19-16] | y |
// |--------------------------------------------------|
// | PartNum | [15-4] | y |
// |--------------------------------------------------|
// | Revision | [3-0] | y |
// x--------------------------------------------------x
// fmt.Printf(" implementer: 0x%02x\n", (midr>>24)&0xff)
// fmt.Printf(" variant: 0x%01x\n", (midr>>20)&0xf)
// fmt.Printf("architecture: 0x%01x\n", (midr>>16)&0xf)
// fmt.Printf(" part num: 0x%03x\n", (midr>>4)&0xfff)
// fmt.Printf(" revision: 0x%01x\n", (midr>>0)&0xf)
procFeatures := getProcFeatures()
// ID_AA64PFR0_EL1 - Processor Feature Register 0
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | DIT | [51-48] | y |
// |--------------------------------------------------|
// | SVE | [35-32] | y |
// |--------------------------------------------------|
// | GIC | [27-24] | n |
// |--------------------------------------------------|
// | AdvSIMD | [23-20] | y |
// |--------------------------------------------------|
// | FP | [19-16] | y |
// |--------------------------------------------------|
// | EL3 | [15-12] | n |
// |--------------------------------------------------|
// | EL2 | [11-8] | n |
// |--------------------------------------------------|
// | EL1 | [7-4] | n |
// |--------------------------------------------------|
// | EL0 | [3-0] | n |
// x--------------------------------------------------x
var f ArmFlags
// if procFeatures&(0xf<<48) != 0 {
// fmt.Println("DIT")
// }
if procFeatures&(0xf<<32) != 0 {
f |= SVE
}
if procFeatures&(0xf<<20) != 15<<20 {
f |= ASIMD
if procFeatures&(0xf<<20) == 1<<20 {
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64pfr0_el1
// 0b0001 --> As for 0b0000, and also includes support for half-precision floating-point arithmetic.
f |= FPHP
f |= ASIMDHP
}
}
if procFeatures&(0xf<<16) != 0 {
f |= FP
}
instAttrReg0, instAttrReg1 := getInstAttributes()
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
//
// ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | TS | [55-52] | y |
// |--------------------------------------------------|
// | FHM | [51-48] | y |
// |--------------------------------------------------|
// | DP | [47-44] | y |
// |--------------------------------------------------|
// | SM4 | [43-40] | y |
// |--------------------------------------------------|
// | SM3 | [39-36] | y |
// |--------------------------------------------------|
// | SHA3 | [35-32] | y |
// |--------------------------------------------------|
// | RDM | [31-28] | y |
// |--------------------------------------------------|
// | ATOMICS | [23-20] | y |
// |--------------------------------------------------|
// | CRC32 | [19-16] | y |
// |--------------------------------------------------|
// | SHA2 | [15-12] | y |
// |--------------------------------------------------|
// | SHA1 | [11-8] | y |
// |--------------------------------------------------|
// | AES | [7-4] | y |
// x--------------------------------------------------x
// if instAttrReg0&(0xf<<52) != 0 {
// fmt.Println("TS")
// }
// if instAttrReg0&(0xf<<48) != 0 {
// fmt.Println("FHM")
// }
if instAttrReg0&(0xf<<44) != 0 {
f |= ASIMDDP
}
if instAttrReg0&(0xf<<40) != 0 {
f |= SM4
}
if instAttrReg0&(0xf<<36) != 0 {
f |= SM3
}
if instAttrReg0&(0xf<<32) != 0 {
f |= SHA3
}
if instAttrReg0&(0xf<<28) != 0 {
f |= ASIMDRDM
}
if instAttrReg0&(0xf<<20) != 0 {
f |= ATOMICS
}
if instAttrReg0&(0xf<<16) != 0 {
f |= CRC32
}
if instAttrReg0&(0xf<<12) != 0 {
f |= SHA2
}
if instAttrReg0&(0xf<<12) == 2<<12 {
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As 0b0001, plus SHA512H, SHA512H2, SHA512SU0, and SHA512SU1 instructions implemented.
f |= SHA512
}
if instAttrReg0&(0xf<<8) != 0 {
f |= SHA1
}
if instAttrReg0&(0xf<<4) != 0 {
f |= AES
}
if instAttrReg0&(0xf<<4) == 2<<4 {
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar0_el1
// 0b0010 --> As for 0b0001, plus PMULL/PMULL2 instructions operating on 64-bit data quantities.
f |= PMULL
}
// https://developer.arm.com/docs/ddi0595/b/aarch64-system-registers/id_aa64isar1_el1
//
// ID_AA64ISAR1_EL1 - Instruction set attribute register 1
// x--------------------------------------------------x
// | Name | bits | visible |
// |--------------------------------------------------|
// | GPI | [31-28] | y |
// |--------------------------------------------------|
// | GPA | [27-24] | y |
// |--------------------------------------------------|
// | LRCPC | [23-20] | y |
// |--------------------------------------------------|
// | FCMA | [19-16] | y |
// |--------------------------------------------------|
// | JSCVT | [15-12] | y |
// |--------------------------------------------------|
// | API | [11-8] | y |
// |--------------------------------------------------|
// | APA | [7-4] | y |
// |--------------------------------------------------|
// | DPB | [3-0] | y |
// x--------------------------------------------------x
// if instAttrReg1&(0xf<<28) != 0 {
// fmt.Println("GPI")
// }
if instAttrReg1&(0xf<<28) != 24 {
f |= GPA
}
if instAttrReg1&(0xf<<20) != 0 {
f |= LRCPC
}
if instAttrReg1&(0xf<<16) != 0 {
f |= FCMA
}
if instAttrReg1&(0xf<<12) != 0 {
f |= JSCVT
}
// if instAttrReg1&(0xf<<8) != 0 {
// fmt.Println("API")
// }
// if instAttrReg1&(0xf<<4) != 0 {
// fmt.Println("APA")
// }
if instAttrReg1&(0xf<<0) != 0 {
f |= DCPOP
}
c.Arm = f
}

View file

@ -1,33 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build 386,!gccgo,!noasm amd64,!gccgo,!noasm,!appengine
package cpuid
func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
}
func addInfo(c *CPUInfo) {
c.maxFunc = maxFunctionID()
c.maxExFunc = maxExtendedFunction()
c.BrandName = brandName()
c.CacheLine = cacheLine()
c.Family, c.Model = familyModel()
c.Features = support()
c.SGX = hasSGX(c.Features&SGX != 0, c.Features&SGXLC != 0)
c.ThreadsPerCore = threadsPerCore()
c.LogicalCores = logicalCores()
c.PhysicalCores = physicalCores()
c.VendorID, c.VendorString = vendorID()
c.Hz = hertz(c.BrandName)
c.cacheSize()
}

View file

@ -1,14 +0,0 @@
// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
//+build !amd64,!386,!arm64 gccgo noasm appengine
package cpuid
func initCPU() {
cpuid = func(uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
cpuidex = func(x, y uint32) (a, b, c, d uint32) { return 0, 0, 0, 0 }
xgetbv = func(uint32) (a, b uint32) { return 0, 0 }
rdtscpAsm = func() (a, b, c, d uint32) { return 0, 0, 0, 0 }
}
func addInfo(info *CPUInfo) {}

View file

@ -1,3 +0,0 @@
module github.com/klauspost/cpuid
go 1.12

74
vendor/github.com/klauspost/cpuid/v2/.goreleaser.yml generated vendored Normal file
View file

@ -0,0 +1,74 @@
# This is an example goreleaser.yaml file with some sane defaults.
# Make sure to check the documentation at http://goreleaser.com
builds:
-
id: "cpuid"
binary: cpuid
main: ./cmd/cpuid/main.go
env:
- CGO_ENABLED=0
flags:
- -ldflags=-s -w
goos:
- aix
- linux
- freebsd
- netbsd
- windows
- darwin
goarch:
- 386
- amd64
- arm64
goarm:
- 7
archives:
-
id: cpuid
name_template: "cpuid-{{ .Os }}_{{ .Arch }}_{{ .Version }}"
replacements:
aix: AIX
darwin: OSX
linux: Linux
windows: Windows
386: i386
amd64: x86_64
freebsd: FreeBSD
netbsd: NetBSD
format_overrides:
- goos: windows
format: zip
files:
- LICENSE
checksum:
name_template: 'checksums.txt'
snapshot:
name_template: "{{ .Tag }}-next"
changelog:
sort: asc
filters:
exclude:
- '^doc:'
- '^docs:'
- '^test:'
- '^tests:'
- '^Update\sREADME.md'
nfpms:
-
file_name_template: "cpuid_package_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
vendor: Klaus Post
homepage: https://github.com/klauspost/cpuid
maintainer: Klaus Post <klauspost@gmail.com>
description: CPUID Tool
license: BSD 3-Clause
formats:
- deb
- rpm
replacements:
darwin: Darwin
linux: Linux
freebsd: FreeBSD
amd64: x86_64

View file

@ -16,41 +16,52 @@ go:
- 1.16.x
- master
env:
- CGO_ENABLED=0
script:
- go vet ./...
- go test -test.v -test.run ^TestCPUID$
- go test -race ./...
- CGO_ENABLED=1 go test -race ./...
- go test -tags=nounsafe -test.v -test.run ^TestCPUID$
- go test -tags=noasm ./...
- go run ./cmd/cpuid/main.go
- go run ./cmd/cpuid/main.go -json
matrix:
allow_failures:
- go: 'master'
fast_finish: true
include:
- stage: gofmt
go: 1.15.x
- stage: other
go: 1.16.x
os: linux
arch: amd64
script:
- diff <(gofmt -d .) <(printf "")
- diff <(gofmt -d ./private) <(printf "")
- go install github.com/klauspost/asmfmt/cmd/asmfmt
- curl -sfL https://git.io/goreleaser | VERSION=v0.157.0 sh -s -- check # check goreleaser config for deprecations
- curl -sL https://git.io/goreleaser | VERSION=v0.157.0 sh -s -- --snapshot --skip-publish --rm-dist
- go get github.com/klauspost/asmfmt&&go install github.com/klauspost/asmfmt/cmd/asmfmt
- diff <(asmfmt -d .) <(printf "")
- stage: i386
go: 1.15.x
os: linux
arch: amd64
script:
- GOOS=linux GOARCH=386 go test .
- stage: buildotherprev
- ./test-architectures.sh
- stage: other
go: 1.15.x
os: linux
arch: amd64
script:
- ./test-architectures.sh
- stage: buildother
deploy:
- provider: script
skip_cleanup: true
script: curl -sL https://git.io/goreleaser | VERSION=v0.157.0 bash || true
on:
tags: true
condition: ($TRAVIS_OS_NAME = linux) && ($TRAVIS_CPU_ARCH = amd64)
go: 1.16.x
os: linux
arch: amd64
script:
- ./test-architectures.sh
branches:
only:
- master
- /^v\d+\.\d+(\.\d+)?(-\S*)?$/

View file

@ -15,6 +15,7 @@ import (
"fmt"
"math"
"os"
"runtime"
"strings"
)
@ -209,6 +210,7 @@ var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
var xgetbv func(index uint32) (eax, edx uint32)
var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
var darwinHasAVX512 = func() bool { return false }
// CPU contains information about the CPU as detected on startup,
// or when Detect last was called.
@ -922,7 +924,11 @@ func support() flagSet {
// Verify that XCR0[7:5] = 111b (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
// ZMM16-ZMM31 state are enabled by OS)
/// and that XCR0[2:1] = 11b (XMM state and YMM state are enabled by OS).
if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
hasAVX512 := (eax>>5)&7 == 7 && (eax>>1)&3 == 3
if runtime.GOOS == "darwin" {
hasAVX512 = fs.inSet(AVX) && darwinHasAVX512()
}
if hasAVX512 {
fs.setIf(ebx&(1<<16) != 0, AVX512F)
fs.setIf(ebx&(1<<17) != 0, AVX512DQ)
fs.setIf(ebx&(1<<21) != 0, AVX512IFMA)

View file

@ -40,3 +40,8 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// func asmDarwinHasAVX512() bool
TEXT ·asmDarwinHasAVX512(SB), 7, $0
MOVL $0, eax+0(FP)
RET

View file

@ -40,3 +40,33 @@ TEXT ·asmRdtscpAsm(SB), 7, $0
MOVL CX, ecx+8(FP)
MOVL DX, edx+12(FP)
RET
// From https://go-review.googlesource.com/c/sys/+/285572/
// func asmDarwinHasAVX512() bool
TEXT ·asmDarwinHasAVX512(SB), 7, $0-1
MOVB $0, ret+0(FP) // default to false
#ifdef GOOS_darwin // return if not darwin
#ifdef GOARCH_amd64 // return if not amd64
// These values from:
// https://github.com/apple/darwin-xnu/blob/xnu-4570.1.46/osfmk/i386/cpu_capabilities.h
#define commpage64_base_address 0x00007fffffe00000
#define commpage64_cpu_capabilities64 (commpage64_base_address+0x010)
#define commpage64_version (commpage64_base_address+0x01E)
#define hasAVX512F 0x0000004000000000
MOVQ $commpage64_version, BX
MOVW (BX), AX
CMPW AX, $13 // versions < 13 do not support AVX512
JL no_avx512
MOVQ $commpage64_cpu_capabilities64, BX
MOVQ (BX), AX
MOVQ $hasAVX512F, CX
ANDQ CX, AX
JZ no_avx512
MOVB $1, ret+0(FP)
no_avx512:
#endif
#endif
RET

View file

@ -8,12 +8,14 @@ func asmCpuid(op uint32) (eax, ebx, ecx, edx uint32)
func asmCpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32)
func asmXgetbv(index uint32) (eax, edx uint32)
func asmRdtscpAsm() (eax, ebx, ecx, edx uint32)
func asmDarwinHasAVX512() bool
func initCPU() {
cpuid = asmCpuid
cpuidex = asmCpuidex
xgetbv = asmXgetbv
rdtscpAsm = asmRdtscpAsm
darwinHasAVX512 = asmDarwinHasAVX512
}
func addInfo(c *CPUInfo, safe bool) {

View file

@ -11,5 +11,9 @@ func detectOS(c *CPUInfo) bool {
// to all Go programs running on darwin/arm64.
// TODO: Add more if we know them.
c.featureSet.setIf(runtime.GOOS != "ios", AESARM, PMULL, SHA1, SHA2)
c.PhysicalCores = runtime.NumCPU()
// For now assuming 1 thread per core...
c.ThreadsPerCore = 1
c.LogicalCores = c.PhysicalCores
return true
}

View file

@ -11,7 +11,6 @@ import (
"encoding/binary"
"io/ioutil"
"runtime"
"unsafe"
)
// HWCAP bits.
@ -42,12 +41,9 @@ const (
hwcap_ASIMDFHM = 1 << 23
)
//go:linkname hwcap internal/cpu.HWCap
var hwcap uint
func detectOS(c *CPUInfo) bool {
// For now assuming no hyperthreading is reasonable.
c.LogicalCores = int(getproccount())
c.LogicalCores = runtime.NumCPU()
c.PhysicalCores = c.LogicalCores
c.ThreadsPerCore = 1
if hwcap == 0 {
@ -132,30 +128,3 @@ func detectOS(c *CPUInfo) bool {
func isSet(hwc uint, value uint) bool {
return hwc&value != 0
}
//go:noescape
//go:linkname sched_getaffinity runtime.sched_getaffinity
func sched_getaffinity(pid, len uintptr, buf *byte) int32
func getproccount() int32 {
// This buffer is huge (8 kB) but we are on the system stack
// and there should be plenty of space (64 kB).
// Also this is a leaf, so we're not holding up the memory for long.
const maxCPUs = 64 * 1024
var buf [maxCPUs / 8]byte
r := sched_getaffinity(0, unsafe.Sizeof(buf), &buf[0])
if r < 0 {
return 0
}
n := int32(0)
for _, v := range buf[:r] {
for v != 0 {
n += int32(v & 1)
v >>= 1
}
}
if n == 0 {
n = 1
}
return n
}

View file

@ -6,6 +6,12 @@
package cpuid
import "runtime"
func detectOS(c *CPUInfo) bool {
c.PhysicalCores = runtime.NumCPU()
// For now assuming 1 thread per core...
c.ThreadsPerCore = 1
c.LogicalCores = c.PhysicalCores
return false
}

View file

@ -0,0 +1,7 @@
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
//+build nounsafe
package cpuid
var hwcap uint

View file

@ -0,0 +1,10 @@
// Copyright (c) 2021 Klaus Post, released under MIT License. See LICENSE file.
//+build !nounsafe
package cpuid
import _ "unsafe" // needed for go:linkname
//go:linkname hwcap internal/cpu.HWCap
var hwcap uint

View file

@ -5,11 +5,11 @@ set -e
go tool dist list | while IFS=/ read os arch; do
echo "Checking $os/$arch..."
echo " normal"
GOARCH=$arch GOOS=$os go build -o /dev/null ./...
GOARCH=$arch GOOS=$os go build -o /dev/null .
echo " noasm"
GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null ./...
GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null .
echo " appengine"
GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null ./...
GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null .
echo " noasm,appengine"
GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null ./...
GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null .
done