1
0
Fork 0
forked from forgejo/forgejo

Dump: add output format tar and output to stdout (#10376)

* Dump: Use mholt/archive/v3 to support tar including many compressions

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Allow dump output to stdout

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file"

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: also dump LFS (fixes #10058)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Use log.Info instead of fmt.Fprintf

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* import ordering

* make fmt

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
PhilippHomann 2020-06-05 22:47:39 +02:00 committed by GitHub
parent 209b17c4e2
commit 684b7a999f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
303 changed files with 301317 additions and 1183 deletions

107
vendor/github.com/dsnet/compress/internal/common.go generated vendored Normal file
View file

@ -0,0 +1,107 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package internal is a collection of common compression algorithms.
//
// For performance reasons, these packages lack strong error checking and
// require that the caller to ensure that strict invariants are kept.
package internal
var (
// IdentityLUT returns the input key itself.
IdentityLUT = func() (lut [256]byte) {
for i := range lut {
lut[i] = uint8(i)
}
return lut
}()
// ReverseLUT returns the input key with its bits reversed.
ReverseLUT = func() (lut [256]byte) {
for i := range lut {
b := uint8(i)
b = (b&0xaa)>>1 | (b&0x55)<<1
b = (b&0xcc)>>2 | (b&0x33)<<2
b = (b&0xf0)>>4 | (b&0x0f)<<4
lut[i] = b
}
return lut
}()
)
// ReverseUint32 reverses all bits of v.
func ReverseUint32(v uint32) (x uint32) {
x |= uint32(ReverseLUT[byte(v>>0)]) << 24
x |= uint32(ReverseLUT[byte(v>>8)]) << 16
x |= uint32(ReverseLUT[byte(v>>16)]) << 8
x |= uint32(ReverseLUT[byte(v>>24)]) << 0
return x
}
// ReverseUint32N reverses the lower n bits of v.
func ReverseUint32N(v uint32, n uint) (x uint32) {
return ReverseUint32(v << (32 - n))
}
// ReverseUint64 reverses all bits of v.
func ReverseUint64(v uint64) (x uint64) {
x |= uint64(ReverseLUT[byte(v>>0)]) << 56
x |= uint64(ReverseLUT[byte(v>>8)]) << 48
x |= uint64(ReverseLUT[byte(v>>16)]) << 40
x |= uint64(ReverseLUT[byte(v>>24)]) << 32
x |= uint64(ReverseLUT[byte(v>>32)]) << 24
x |= uint64(ReverseLUT[byte(v>>40)]) << 16
x |= uint64(ReverseLUT[byte(v>>48)]) << 8
x |= uint64(ReverseLUT[byte(v>>56)]) << 0
return x
}
// ReverseUint64N reverses the lower n bits of v.
func ReverseUint64N(v uint64, n uint) (x uint64) {
return ReverseUint64(v << (64 - n))
}
// MoveToFront is a data structure that allows for more efficient move-to-front
// transformations. This specific implementation assumes that the alphabet is
// densely packed within 0..255.
type MoveToFront struct {
dict [256]uint8 // Mapping from indexes to values
tail int // Number of tail bytes that are already ordered
}
func (m *MoveToFront) Encode(vals []uint8) {
copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity
var max int
for i, val := range vals {
var idx uint8 // Reverse lookup idx in dict
for di, dv := range m.dict {
if dv == val {
idx = uint8(di)
break
}
}
vals[i] = idx
max |= int(idx)
copy(m.dict[1:], m.dict[:idx])
m.dict[0] = val
}
m.tail = 256 - max - 1
}
func (m *MoveToFront) Decode(idxs []uint8) {
copy(m.dict[:], IdentityLUT[:256-m.tail]) // Reset dict to be identity
var max int
for i, idx := range idxs {
val := m.dict[idx] // Forward lookup val in dict
idxs[i] = val
max |= int(idx)
copy(m.dict[1:], m.dict[:idx])
m.dict[0] = val
}
m.tail = 256 - max - 1
}

12
vendor/github.com/dsnet/compress/internal/debug.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build debug,!gofuzz
package internal
const (
Debug = true
GoFuzz = false
)

View file

@ -0,0 +1,120 @@
// Copyright 2016, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package errors implements functions to manipulate compression errors.
//
// In idiomatic Go, it is an anti-pattern to use panics as a form of error
// reporting in the API. Instead, the expected way to transmit errors is by
// returning an error value. Unfortunately, the checking of "err != nil" in
// tight loops commonly found in compression causes non-negligible performance
// degradation. While this may not be idiomatic, the internal packages of this
// repository rely on panics as a normal means to convey errors. In order to
// ensure that these panics do not leak across the public API, the public
// packages must recover from these panics and present an error value.
//
// The Panic and Recover functions in this package provide a safe way to
// recover from errors only generated from within this repository.
//
// Example usage:
// func Foo() (err error) {
// defer errors.Recover(&err)
//
// if rand.Intn(2) == 0 {
// // Unexpected panics will not be caught by Recover.
// io.Closer(nil).Close()
// } else {
// // Errors thrown by Panic will be caught by Recover.
// errors.Panic(errors.New("whoopsie"))
// }
// }
//
package errors
import "strings"
const (
// Unknown indicates that there is no classification for this error.
Unknown = iota
// Internal indicates that this error is due to an internal bug.
// Users should file a issue report if this type of error is encountered.
Internal
// Invalid indicates that this error is due to the user misusing the API
// and is indicative of a bug on the user's part.
Invalid
// Deprecated indicates the use of a deprecated and unsupported feature.
Deprecated
// Corrupted indicates that the input stream is corrupted.
Corrupted
// Closed indicates that the handlers are closed.
Closed
)
var codeMap = map[int]string{
Unknown: "unknown error",
Internal: "internal error",
Invalid: "invalid argument",
Deprecated: "deprecated format",
Corrupted: "corrupted input",
Closed: "closed handler",
}
type Error struct {
Code int // The error type
Pkg string // Name of the package where the error originated
Msg string // Descriptive message about the error (optional)
}
func (e Error) Error() string {
var ss []string
for _, s := range []string{e.Pkg, codeMap[e.Code], e.Msg} {
if s != "" {
ss = append(ss, s)
}
}
return strings.Join(ss, ": ")
}
func (e Error) CompressError() {}
func (e Error) IsInternal() bool { return e.Code == Internal }
func (e Error) IsInvalid() bool { return e.Code == Invalid }
func (e Error) IsDeprecated() bool { return e.Code == Deprecated }
func (e Error) IsCorrupted() bool { return e.Code == Corrupted }
func (e Error) IsClosed() bool { return e.Code == Closed }
func IsInternal(err error) bool { return isCode(err, Internal) }
func IsInvalid(err error) bool { return isCode(err, Invalid) }
func IsDeprecated(err error) bool { return isCode(err, Deprecated) }
func IsCorrupted(err error) bool { return isCode(err, Corrupted) }
func IsClosed(err error) bool { return isCode(err, Closed) }
func isCode(err error, code int) bool {
if cerr, ok := err.(Error); ok && cerr.Code == code {
return true
}
return false
}
// errWrap is used by Panic and Recover to ensure that only errors raised by
// Panic are recovered by Recover.
type errWrap struct{ e *error }
func Recover(err *error) {
switch ex := recover().(type) {
case nil:
// Do nothing.
case errWrap:
*err = *ex.e
default:
panic(ex)
}
}
func Panic(err error) {
panic(errWrap{&err})
}

12
vendor/github.com/dsnet/compress/internal/gofuzz.go generated vendored Normal file
View file

@ -0,0 +1,12 @@
// Copyright 2016, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build gofuzz
package internal
const (
Debug = true
GoFuzz = true
)

View file

@ -0,0 +1,159 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build debug
package prefix
import (
"fmt"
"math"
"strings"
)
func max(a, b int) int {
if a > b {
return a
}
return b
}
func lenBase2(n uint) int {
return int(math.Ceil(math.Log2(float64(n + 1))))
}
func padBase2(v, n uint, m int) string {
s := fmt.Sprintf("%b", 1<<n|v)[1:]
if pad := m - len(s); pad > 0 {
return strings.Repeat(" ", pad) + s
}
return s
}
func lenBase10(n int) int {
return int(math.Ceil(math.Log10(float64(n + 1))))
}
func padBase10(n, m int) string {
s := fmt.Sprintf("%d", n)
if pad := m - len(s); pad > 0 {
return strings.Repeat(" ", pad) + s
}
return s
}
func (rc RangeCodes) String() string {
var maxLen, maxBase int
for _, c := range rc {
maxLen = max(maxLen, int(c.Len))
maxBase = max(maxBase, int(c.Base))
}
var ss []string
ss = append(ss, "{")
for i, c := range rc {
base := padBase10(int(c.Base), lenBase10(maxBase))
if c.Len > 0 {
base += fmt.Sprintf("-%d", c.End()-1)
}
ss = append(ss, fmt.Sprintf("\t%s: {len: %s, range: %s},",
padBase10(int(i), lenBase10(len(rc)-1)),
padBase10(int(c.Len), lenBase10(maxLen)),
base,
))
}
ss = append(ss, "}")
return strings.Join(ss, "\n")
}
func (pc PrefixCodes) String() string {
var maxSym, maxLen, maxCnt int
for _, c := range pc {
maxSym = max(maxSym, int(c.Sym))
maxLen = max(maxLen, int(c.Len))
maxCnt = max(maxCnt, int(c.Cnt))
}
var ss []string
ss = append(ss, "{")
for _, c := range pc {
var cntStr string
if maxCnt > 0 {
cnt := int(32*float32(c.Cnt)/float32(maxCnt) + 0.5)
cntStr = fmt.Sprintf("%s |%s",
padBase10(int(c.Cnt), lenBase10(maxCnt)),
strings.Repeat("#", cnt),
)
}
ss = append(ss, fmt.Sprintf("\t%s: %s, %s",
padBase10(int(c.Sym), lenBase10(maxSym)),
padBase2(uint(c.Val), uint(c.Len), maxLen),
cntStr,
))
}
ss = append(ss, "}")
return strings.Join(ss, "\n")
}
func (pd Decoder) String() string {
var ss []string
ss = append(ss, "{")
if len(pd.chunks) > 0 {
ss = append(ss, "\tchunks: {")
for i, c := range pd.chunks {
label := "sym"
if uint(c&countMask) > uint(pd.chunkBits) {
label = "idx"
}
ss = append(ss, fmt.Sprintf("\t\t%s: {%s: %s, len: %s}",
padBase2(uint(i), uint(pd.chunkBits), int(pd.chunkBits)),
label, padBase10(int(c>>countBits), 3),
padBase10(int(c&countMask), 2),
))
}
ss = append(ss, "\t},")
for j, links := range pd.links {
ss = append(ss, fmt.Sprintf("\tlinks[%d]: {", j))
linkBits := lenBase2(uint(pd.linkMask))
for i, c := range links {
ss = append(ss, fmt.Sprintf("\t\t%s: {sym: %s, len: %s},",
padBase2(uint(i), uint(linkBits), int(linkBits)),
padBase10(int(c>>countBits), 3),
padBase10(int(c&countMask), 2),
))
}
ss = append(ss, "\t},")
}
}
ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pd.chunkMask))
ss = append(ss, fmt.Sprintf("\tlinkMask: %b,", pd.linkMask))
ss = append(ss, fmt.Sprintf("\tchunkBits: %d,", pd.chunkBits))
ss = append(ss, fmt.Sprintf("\tMinBits: %d,", pd.MinBits))
ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pd.NumSyms))
ss = append(ss, "}")
return strings.Join(ss, "\n")
}
func (pe Encoder) String() string {
var maxLen int
for _, c := range pe.chunks {
maxLen = max(maxLen, int(c&countMask))
}
var ss []string
ss = append(ss, "{")
if len(pe.chunks) > 0 {
ss = append(ss, "\tchunks: {")
for i, c := range pe.chunks {
ss = append(ss, fmt.Sprintf("\t\t%s: %s,",
padBase10(i, 3),
padBase2(uint(c>>countBits), uint(c&countMask), maxLen),
))
}
ss = append(ss, "\t},")
}
ss = append(ss, fmt.Sprintf("\tchunkMask: %b,", pe.chunkMask))
ss = append(ss, fmt.Sprintf("\tNumSyms: %d,", pe.NumSyms))
ss = append(ss, "}")
return strings.Join(ss, "\n")
}

View file

@ -0,0 +1,136 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"sort"
"github.com/dsnet/compress/internal"
)
// The algorithm used to decode variable length codes is based on the lookup
// method in zlib. If the code is less-than-or-equal to maxChunkBits,
// then the symbol can be decoded using a single lookup into the chunks table.
// Otherwise, the links table will be used for a second level lookup.
//
// The chunks slice is keyed by the contents of the bit buffer ANDed with
// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
// that is decoded as follow:
//
// var length = chunks[bitBuffer&chunkMask] & countMask
// var symbol = chunks[bitBuffer&chunkMask] >> countBits
//
// If the decoded length is larger than chunkBits, then an overflow link table
// must be used for further decoding. In this case, the symbol is actually the
// index into the links tables. The second-level links table returned is
// processed in the same way as the chunks table.
//
// if length > chunkBits {
// var index = symbol // Previous symbol is index into links tables
// length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
// symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
// }
//
// See the following:
// http://www.gzip.org/algorithm.txt
type Decoder struct {
chunks []uint32 // First-level lookup map
links [][]uint32 // Second-level lookup map
chunkMask uint32 // Mask the length of the chunks table
linkMask uint32 // Mask the length of the link table
chunkBits uint32 // Bit-length of the chunks table
MinBits uint32 // The minimum number of bits to safely make progress
NumSyms uint32 // Number of symbols
}
// Init initializes Decoder according to the codes provided.
func (pd *Decoder) Init(codes PrefixCodes) {
// Handle special case trees.
if len(codes) <= 1 {
switch {
case len(codes) == 0: // Empty tree (should error if used later)
*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
default:
panic("invalid codes")
}
return
}
if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
panic("input codes is not sorted")
}
if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
panic("detected incomplete or overlapping codes")
}
var minBits, maxBits uint32 = valueBits, 0
for _, c := range codes {
if minBits > c.Len {
minBits = c.Len
}
if maxBits < c.Len {
maxBits = c.Len
}
}
// Allocate chunks table as needed.
const maxChunkBits = 9 // This can be tuned for better performance
pd.NumSyms = uint32(len(codes))
pd.MinBits = minBits
pd.chunkBits = maxBits
if pd.chunkBits > maxChunkBits {
pd.chunkBits = maxChunkBits
}
numChunks := 1 << pd.chunkBits
pd.chunks = allocUint32s(pd.chunks, numChunks)
pd.chunkMask = uint32(numChunks - 1)
// Allocate links tables as needed.
pd.links = pd.links[:0]
pd.linkMask = 0
if pd.chunkBits < maxBits {
numLinks := 1 << (maxBits - pd.chunkBits)
pd.linkMask = uint32(numLinks - 1)
var linkIdx uint32
for i := range pd.chunks {
pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
}
for _, c := range codes {
if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
linkIdx++
}
}
pd.links = extendSliceUint32s(pd.links, int(linkIdx))
linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
pd.links[i] = linksFlat[j : j+numLinks]
}
}
// Fill out chunks and links tables with values.
for _, c := range codes {
chunk := c.Sym<<countBits | c.Len
if c.Len <= pd.chunkBits {
skip := 1 << uint(c.Len)
for j := int(c.Val); j < len(pd.chunks); j += skip {
pd.chunks[j] = chunk
}
} else {
linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
links := pd.links[linkIdx]
skip := 1 << uint(c.Len-pd.chunkBits)
for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
links[j] = chunk
}
}
}
}

View file

@ -0,0 +1,66 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"sort"
"github.com/dsnet/compress/internal"
)
type Encoder struct {
chunks []uint32 // First-level lookup map
chunkMask uint32 // Mask the length of the chunks table
NumSyms uint32 // Number of symbols
}
// Init initializes Encoder according to the codes provided.
func (pe *Encoder) Init(codes PrefixCodes) {
// Handle special case trees.
if len(codes) <= 1 {
switch {
case len(codes) == 0: // Empty tree (should error if used later)
*pe = Encoder{chunks: pe.chunks[:0], NumSyms: 0}
case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
pe.chunks = append(pe.chunks[:0], codes[0].Val<<countBits|0)
*pe = Encoder{chunks: pe.chunks[:1], NumSyms: 1}
default:
panic("invalid codes")
}
return
}
if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
panic("input codes is not sorted")
}
if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
panic("detected incomplete or overlapping codes")
}
// Enough chunks to contain all the symbols.
numChunks := 1
for n := len(codes) - 1; n > 0; n >>= 1 {
numChunks <<= 1
}
pe.NumSyms = uint32(len(codes))
retry:
// Allocate and reset chunks.
pe.chunks = allocUint32s(pe.chunks, numChunks)
pe.chunkMask = uint32(numChunks - 1)
for i := range pe.chunks {
pe.chunks[i] = 0 // Logic below relies on zero value as uninitialized
}
// Insert each symbol, checking that there are no conflicts.
for _, c := range codes {
if pe.chunks[c.Sym&pe.chunkMask] > 0 {
// Collision found our "hash" table, so grow and try again.
numChunks <<= 1
goto retry
}
pe.chunks[c.Sym&pe.chunkMask] = c.Val<<countBits | c.Len
}
}

View file

@ -0,0 +1,400 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package prefix implements bit readers and writers that use prefix encoding.
package prefix
import (
"fmt"
"sort"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
)
func errorf(c int, f string, a ...interface{}) error {
return errors.Error{Code: c, Pkg: "prefix", Msg: fmt.Sprintf(f, a...)}
}
func panicf(c int, f string, a ...interface{}) {
errors.Panic(errorf(c, f, a...))
}
const (
countBits = 5 // Number of bits to store the bit-length of the code
valueBits = 27 // Number of bits to store the code value
countMask = (1 << countBits) - 1
)
// PrefixCode is a representation of a prefix code, which is conceptually a
// mapping from some arbitrary symbol to some bit-string.
//
// The Sym and Cnt fields are typically provided by the user,
// while the Len and Val fields are generated by this package.
type PrefixCode struct {
Sym uint32 // The symbol being mapped
Cnt uint32 // The number times this symbol is used
Len uint32 // Bit-length of the prefix code
Val uint32 // Value of the prefix code (must be in 0..(1<<Len)-1)
}
type PrefixCodes []PrefixCode
type prefixCodesBySymbol []PrefixCode
func (c prefixCodesBySymbol) Len() int { return len(c) }
func (c prefixCodesBySymbol) Less(i, j int) bool { return c[i].Sym < c[j].Sym }
func (c prefixCodesBySymbol) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
type prefixCodesByCount []PrefixCode
func (c prefixCodesByCount) Len() int { return len(c) }
func (c prefixCodesByCount) Less(i, j int) bool {
return c[i].Cnt < c[j].Cnt || (c[i].Cnt == c[j].Cnt && c[i].Sym < c[j].Sym)
}
func (c prefixCodesByCount) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (pc PrefixCodes) SortBySymbol() { sort.Sort(prefixCodesBySymbol(pc)) }
func (pc PrefixCodes) SortByCount() { sort.Sort(prefixCodesByCount(pc)) }
// Length computes the total bit-length using the Len and Cnt fields.
func (pc PrefixCodes) Length() (nb uint) {
for _, c := range pc {
nb += uint(c.Len * c.Cnt)
}
return nb
}
// checkLengths reports whether the codes form a complete prefix tree.
func (pc PrefixCodes) checkLengths() bool {
sum := 1 << valueBits
for _, c := range pc {
sum -= (1 << valueBits) >> uint(c.Len)
}
return sum == 0 || len(pc) == 0
}
// checkPrefixes reports whether all codes have non-overlapping prefixes.
func (pc PrefixCodes) checkPrefixes() bool {
for i, c1 := range pc {
for j, c2 := range pc {
mask := uint32(1)<<c1.Len - 1
if i != j && c1.Len <= c2.Len && c1.Val&mask == c2.Val&mask {
return false
}
}
}
return true
}
// checkCanonical reports whether all codes are canonical.
// That is, they have the following properties:
//
// 1. All codes of a given bit-length are consecutive values.
// 2. Shorter codes lexicographically precede longer codes.
//
// The codes must have unique symbols and be sorted by the symbol
// The Len and Val fields in each code must be populated.
func (pc PrefixCodes) checkCanonical() bool {
// Rule 1.
var vals [valueBits + 1]PrefixCode
for _, c := range pc {
if c.Len > 0 {
c.Val = internal.ReverseUint32N(c.Val, uint(c.Len))
if vals[c.Len].Cnt > 0 && vals[c.Len].Val+1 != c.Val {
return false
}
vals[c.Len].Val = c.Val
vals[c.Len].Cnt++
}
}
// Rule 2.
var last PrefixCode
for _, v := range vals {
if v.Cnt > 0 {
curVal := v.Val - v.Cnt + 1
if last.Cnt != 0 && last.Val >= curVal {
return false
}
last = v
}
}
return true
}
// GenerateLengths assigns non-zero bit-lengths to all codes. Codes with high
// frequency counts will be assigned shorter codes to reduce bit entropy.
// This function is used primarily by compressors.
//
// The input codes must have the Cnt field populated, be sorted by count.
// Even if a code has a count of 0, a non-zero bit-length will be assigned.
//
// The result will have the Len field populated. The algorithm used guarantees
// that Len <= maxBits and that it is a complete prefix tree. The resulting
// codes will remain sorted by count.
func GenerateLengths(codes PrefixCodes, maxBits uint) error {
if len(codes) <= 1 {
if len(codes) == 1 {
codes[0].Len = 0
}
return nil
}
// Verify that the codes are in ascending order by count.
cntLast := codes[0].Cnt
for _, c := range codes[1:] {
if c.Cnt < cntLast {
return errorf(errors.Invalid, "non-monotonically increasing symbol counts")
}
cntLast = c.Cnt
}
// Construct a Huffman tree used to generate the bit-lengths.
//
// The Huffman tree is a binary tree where each symbol lies as a leaf node
// on this tree. The length of the prefix code to assign is the depth of
// that leaf from the root. The Huffman algorithm, which runs in O(n),
// is used to generate the tree. It assumes that codes are sorted in
// increasing order of frequency.
//
// The algorithm is as follows:
// 1. Start with two queues, F and Q, where F contains all of the starting
// symbols sorted such that symbols with lowest counts come first.
// 2. While len(F)+len(Q) > 1:
// 2a. Dequeue the node from F or Q that has the lowest weight as N0.
// 2b. Dequeue the node from F or Q that has the lowest weight as N1.
// 2c. Create a new node N that has N0 and N1 as its children.
// 2d. Enqueue N into the back of Q.
// 3. The tree's root node is Q[0].
type node struct {
cnt uint32
// n0 or c0 represent the left child of this node.
// Since Go does not have unions, only one of these will be set.
// Similarly, n1 or c1 represent the right child of this node.
//
// If n0 or n1 is set, then it represents a "pointer" to another
// node in the Huffman tree. Since Go's pointer analysis cannot reason
// that these node pointers do not escape (golang.org/issue/13493),
// we use an index to a node in the nodes slice as a pseudo-pointer.
//
// If c0 or c1 is set, then it represents a leaf "node" in the
// Huffman tree. The leaves are the PrefixCode values themselves.
n0, n1 int // Index to child nodes
c0, c1 *PrefixCode
}
var nodeIdx int
var nodeArr [1024]node // Large enough to handle most cases on the stack
nodes := nodeArr[:]
if len(nodes) < len(codes) {
nodes = make([]node, len(codes)) // Number of internal nodes < number of leaves
}
freqs, queue := codes, nodes[:0]
for len(freqs)+len(queue) > 1 {
// These are the two smallest nodes at the front of freqs and queue.
var n node
if len(queue) == 0 || (len(freqs) > 0 && freqs[0].Cnt <= queue[0].cnt) {
n.c0, freqs = &freqs[0], freqs[1:]
n.cnt += n.c0.Cnt
} else {
n.cnt += queue[0].cnt
n.n0 = nodeIdx // nodeIdx is same as &queue[0] - &nodes[0]
nodeIdx++
queue = queue[1:]
}
if len(queue) == 0 || (len(freqs) > 0 && freqs[0].Cnt <= queue[0].cnt) {
n.c1, freqs = &freqs[0], freqs[1:]
n.cnt += n.c1.Cnt
} else {
n.cnt += queue[0].cnt
n.n1 = nodeIdx // nodeIdx is same as &queue[0] - &nodes[0]
nodeIdx++
queue = queue[1:]
}
queue = append(queue, n)
}
rootIdx := nodeIdx
// Search the whole binary tree, noting when we hit each leaf node.
// We do not care about the exact Huffman tree structure, but rather we only
// care about depth of each of the leaf nodes. That is, the depth determines
// how long each symbol is in bits.
//
// Since the number of leaves is n, there is at most n internal nodes.
// Thus, this algorithm runs in O(n).
var fixBits bool
var explore func(int, uint)
explore = func(rootIdx int, level uint) {
root := &nodes[rootIdx]
// Explore left branch.
if root.c0 == nil {
explore(root.n0, level+1)
} else {
fixBits = fixBits || (level > maxBits)
root.c0.Len = uint32(level)
}
// Explore right branch.
if root.c1 == nil {
explore(root.n1, level+1)
} else {
fixBits = fixBits || (level > maxBits)
root.c1.Len = uint32(level)
}
}
explore(rootIdx, 1)
// Fix the bit-lengths if we violate the maxBits requirement.
if fixBits {
// Create histogram for number of symbols with each bit-length.
var symBitsArr [valueBits + 1]uint32
symBits := symBitsArr[:] // symBits[nb] indicates number of symbols using nb bits
for _, c := range codes {
for int(c.Len) >= len(symBits) {
symBits = append(symBits, 0)
}
symBits[c.Len]++
}
// Fudge the tree such that the largest bit-length is <= maxBits.
// This is accomplish by effectively doing a tree rotation. That is, we
// increase the bit-length of some higher frequency code, so that the
// bit-lengths of lower frequency codes can be decreased.
//
// Visually, this looks like the following transform:
//
// Level Before After
// __ ___
// / \ / \
// n-1 X / \ /\ /\
// n X /\ X X X X
// n+1 X X
//
var treeRotate func(uint)
treeRotate = func(nb uint) {
if symBits[nb-1] == 0 {
treeRotate(nb - 1)
}
symBits[nb-1] -= 1 // Push this node to the level below
symBits[nb] += 3 // This level gets one node from above, two from below
symBits[nb+1] -= 2 // Push two nodes to the level above
}
for i := uint(len(symBits)) - 1; i > maxBits; i-- {
for symBits[i] > 0 {
treeRotate(i - 1)
}
}
// Assign bit-lengths to each code. Since codes is sorted in increasing
// order of frequency, that means that the most frequently used symbols
// should have the shortest bit-lengths. Thus, we copy symbols to codes
// from the back of codes first.
cs := codes
for nb, cnt := range symBits {
if cnt > 0 {
pos := len(cs) - int(cnt)
cs2 := cs[pos:]
for i := range cs2 {
cs2[i].Len = uint32(nb)
}
cs = cs[:pos]
}
}
if len(cs) != 0 {
panic("not all codes were used up")
}
}
if internal.Debug && !codes.checkLengths() {
panic("incomplete prefix tree detected")
}
return nil
}
// GeneratePrefixes assigns a prefix value to all codes according to the
// bit-lengths. This function is used by both compressors and decompressors.
//
// The input codes must have the Sym and Len fields populated and be
// sorted by symbol. The bit-lengths of each code must be properly allocated,
// such that it forms a complete tree.
//
// The result will have the Val field populated and will produce a canonical
// prefix tree. The resulting codes will remain sorted by symbol.
func GeneratePrefixes(codes PrefixCodes) error {
if len(codes) <= 1 {
if len(codes) == 1 {
if codes[0].Len != 0 {
return errorf(errors.Invalid, "degenerate prefix tree with one node")
}
codes[0].Val = 0
}
return nil
}
// Compute basic statistics on the symbols.
var bitCnts [valueBits + 1]uint
c0 := codes[0]
bitCnts[c0.Len]++
minBits, maxBits, symLast := c0.Len, c0.Len, c0.Sym
for _, c := range codes[1:] {
if c.Sym <= symLast {
return errorf(errors.Invalid, "non-unique or non-monotonically increasing symbols")
}
if minBits > c.Len {
minBits = c.Len
}
if maxBits < c.Len {
maxBits = c.Len
}
bitCnts[c.Len]++ // Histogram of bit counts
symLast = c.Sym // Keep track of last symbol
}
if minBits == 0 {
return errorf(errors.Invalid, "invalid prefix bit-length")
}
// Compute the next code for a symbol of a given bit length.
var nextCodes [valueBits + 1]uint
var code uint
for i := minBits; i <= maxBits; i++ {
code <<= 1
nextCodes[i] = code
code += bitCnts[i]
}
if code != 1<<maxBits {
return errorf(errors.Invalid, "degenerate prefix tree")
}
// Assign the code to each symbol.
for i, c := range codes {
codes[i].Val = internal.ReverseUint32N(uint32(nextCodes[c.Len]), uint(c.Len))
nextCodes[c.Len]++
}
if internal.Debug && !codes.checkPrefixes() {
panic("overlapping prefixes detected")
}
if internal.Debug && !codes.checkCanonical() {
panic("non-canonical prefixes detected")
}
return nil
}
func allocUint32s(s []uint32, n int) []uint32 {
if cap(s) >= n {
return s[:n]
}
return make([]uint32, n, n*3/2)
}
func extendSliceUint32s(s [][]uint32, n int) [][]uint32 {
if cap(s) >= n {
return s[:n]
}
ss := make([][]uint32, n, n*3/2)
copy(ss, s[:cap(s)])
return ss
}

View file

@ -0,0 +1,93 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
type RangeCode struct {
Base uint32 // Starting base offset of the range
Len uint32 // Bit-length of a subsequent integer to add to base offset
}
type RangeCodes []RangeCode
type RangeEncoder struct {
rcs RangeCodes
lut [1024]uint32
minBase uint
}
// End reports the non-inclusive ending range.
func (rc RangeCode) End() uint32 { return rc.Base + (1 << rc.Len) }
// MakeRangeCodes creates a RangeCodes, where each region is assumed to be
// contiguously stacked, without any gaps, with bit-lengths taken from bits.
func MakeRangeCodes(minBase uint, bits []uint) (rc RangeCodes) {
for _, nb := range bits {
rc = append(rc, RangeCode{Base: uint32(minBase), Len: uint32(nb)})
minBase += 1 << nb
}
return rc
}
// Base reports the inclusive starting range for all ranges.
func (rcs RangeCodes) Base() uint32 { return rcs[0].Base }
// End reports the non-inclusive ending range for all ranges.
func (rcs RangeCodes) End() uint32 { return rcs[len(rcs)-1].End() }
// checkValid reports whether the RangeCodes is valid. In order to be valid,
// the following must hold true:
// rcs[i-1].Base <= rcs[i].Base
// rcs[i-1].End <= rcs[i].End
// rcs[i-1].End >= rcs[i].Base
//
// Practically speaking, each range must be increasing and must not have any
// gaps in between. It is okay for ranges to overlap.
func (rcs RangeCodes) checkValid() bool {
if len(rcs) == 0 {
return false
}
pre := rcs[0]
for _, cur := range rcs[1:] {
preBase, preEnd := pre.Base, pre.End()
curBase, curEnd := cur.Base, cur.End()
if preBase > curBase || preEnd > curEnd || preEnd < curBase {
return false
}
pre = cur
}
return true
}
func (re *RangeEncoder) Init(rcs RangeCodes) {
if !rcs.checkValid() {
panic("invalid range codes")
}
*re = RangeEncoder{rcs: rcs, minBase: uint(rcs.Base())}
for sym, rc := range rcs {
base := int(rc.Base) - int(re.minBase)
end := int(rc.End()) - int(re.minBase)
if base >= len(re.lut) {
break
}
if end > len(re.lut) {
end = len(re.lut)
}
for i := base; i < end; i++ {
re.lut[i] = uint32(sym)
}
}
}
func (re *RangeEncoder) Encode(offset uint) (sym uint) {
if idx := int(offset - re.minBase); idx < len(re.lut) {
return uint(re.lut[idx])
}
sym = uint(re.lut[len(re.lut)-1])
retry:
if int(sym) >= len(re.rcs) || re.rcs[sym].Base > uint32(offset) {
return sym - 1
}
sym++
goto retry // Avoid for-loop so that this function can be inlined
}

View file

@ -0,0 +1,335 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"bufio"
"bytes"
"encoding/binary"
"io"
"strings"
"github.com/dsnet/compress"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
)
// Reader implements a prefix decoder. If the input io.Reader satisfies the
// compress.ByteReader or compress.BufferedReader interface, then it also
// guarantees that it will never read more bytes than is necessary.
//
// For high performance, provide an io.Reader that satisfies the
// compress.BufferedReader interface. If the input does not satisfy either
// compress.ByteReader or compress.BufferedReader, then it will be internally
// wrapped with a bufio.Reader.
type Reader struct {
Offset int64 // Number of bytes read from the underlying io.Reader
rd io.Reader
byteRd compress.ByteReader // Set if rd is a ByteReader
bufRd compress.BufferedReader // Set if rd is a BufferedReader
bufBits uint64 // Buffer to hold some bits
numBits uint // Number of valid bits in bufBits
bigEndian bool // Do we treat input bytes as big endian?
// These fields are only used if rd is a compress.BufferedReader.
bufPeek []byte // Buffer for the Peek data
discardBits int // Number of bits to discard from reader
fedBits uint // Number of bits fed in last call to PullBits
// These fields are used to reduce allocations.
bb *buffer
br *bytesReader
sr *stringReader
bu *bufio.Reader
}
// Init initializes the bit Reader to read from r. If bigEndian is true, then
// bits will be read starting from the most-significant bits of a byte
// (as done in bzip2), otherwise it will read starting from the
// least-significant bits of a byte (such as for deflate and brotli).
func (pr *Reader) Init(r io.Reader, bigEndian bool) {
*pr = Reader{
rd: r,
bigEndian: bigEndian,
bb: pr.bb,
br: pr.br,
sr: pr.sr,
bu: pr.bu,
}
switch rr := r.(type) {
case *bytes.Buffer:
if pr.bb == nil {
pr.bb = new(buffer)
}
*pr.bb = buffer{Buffer: rr}
pr.bufRd = pr.bb
case *bytes.Reader:
if pr.br == nil {
pr.br = new(bytesReader)
}
*pr.br = bytesReader{Reader: rr}
pr.bufRd = pr.br
case *strings.Reader:
if pr.sr == nil {
pr.sr = new(stringReader)
}
*pr.sr = stringReader{Reader: rr}
pr.bufRd = pr.sr
case compress.BufferedReader:
pr.bufRd = rr
case compress.ByteReader:
pr.byteRd = rr
default:
if pr.bu == nil {
pr.bu = bufio.NewReader(nil)
}
pr.bu.Reset(r)
pr.rd, pr.bufRd = pr.bu, pr.bu
}
}
// BitsRead reports the total number of bits emitted from any Read method.
func (pr *Reader) BitsRead() int64 {
offset := 8*pr.Offset - int64(pr.numBits)
if pr.bufRd != nil {
discardBits := pr.discardBits + int(pr.fedBits-pr.numBits)
offset = 8*pr.Offset + int64(discardBits)
}
return offset
}
// IsBufferedReader reports whether the underlying io.Reader is also a
// compress.BufferedReader.
func (pr *Reader) IsBufferedReader() bool {
return pr.bufRd != nil
}
// ReadPads reads 0-7 bits from the bit buffer to achieve byte-alignment.
func (pr *Reader) ReadPads() uint {
nb := pr.numBits % 8
val := uint(pr.bufBits & uint64(1<<nb-1))
pr.bufBits >>= nb
pr.numBits -= nb
return val
}
// Read reads bytes into buf.
// The bit-ordering mode does not affect this method.
func (pr *Reader) Read(buf []byte) (cnt int, err error) {
if pr.numBits > 0 {
if pr.numBits%8 != 0 {
return 0, errorf(errors.Invalid, "non-aligned bit buffer")
}
for cnt = 0; len(buf) > cnt && pr.numBits > 0; cnt++ {
if pr.bigEndian {
buf[cnt] = internal.ReverseLUT[byte(pr.bufBits)]
} else {
buf[cnt] = byte(pr.bufBits)
}
pr.bufBits >>= 8
pr.numBits -= 8
}
return cnt, nil
}
if _, err := pr.Flush(); err != nil {
return 0, err
}
cnt, err = pr.rd.Read(buf)
pr.Offset += int64(cnt)
return cnt, err
}
// ReadOffset reads an offset value using the provided RangeCodes indexed by
// the symbol read.
func (pr *Reader) ReadOffset(pd *Decoder, rcs RangeCodes) uint {
rc := rcs[pr.ReadSymbol(pd)]
return uint(rc.Base) + pr.ReadBits(uint(rc.Len))
}
// TryReadBits attempts to read nb bits using the contents of the bit buffer
// alone. It returns the value and whether it succeeded.
//
// This method is designed to be inlined for performance reasons.
func (pr *Reader) TryReadBits(nb uint) (uint, bool) {
if pr.numBits < nb {
return 0, false
}
val := uint(pr.bufBits & uint64(1<<nb-1))
pr.bufBits >>= nb
pr.numBits -= nb
return val, true
}
// ReadBits reads nb bits in from the underlying reader.
func (pr *Reader) ReadBits(nb uint) uint {
if err := pr.PullBits(nb); err != nil {
errors.Panic(err)
}
val := uint(pr.bufBits & uint64(1<<nb-1))
pr.bufBits >>= nb
pr.numBits -= nb
return val
}
// TryReadSymbol attempts to decode the next symbol using the contents of the
// bit buffer alone. It returns the decoded symbol and whether it succeeded.
//
// This method is designed to be inlined for performance reasons.
func (pr *Reader) TryReadSymbol(pd *Decoder) (uint, bool) {
if pr.numBits < uint(pd.MinBits) || len(pd.chunks) == 0 {
return 0, false
}
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
nb := uint(chunk & countMask)
if nb > pr.numBits || nb > uint(pd.chunkBits) {
return 0, false
}
pr.bufBits >>= nb
pr.numBits -= nb
return uint(chunk >> countBits), true
}
// ReadSymbol reads the next symbol using the provided prefix Decoder.
func (pr *Reader) ReadSymbol(pd *Decoder) uint {
if len(pd.chunks) == 0 {
panicf(errors.Invalid, "decode with empty prefix tree")
}
nb := uint(pd.MinBits)
for {
if err := pr.PullBits(nb); err != nil {
errors.Panic(err)
}
chunk := pd.chunks[uint32(pr.bufBits)&pd.chunkMask]
nb = uint(chunk & countMask)
if nb > uint(pd.chunkBits) {
linkIdx := chunk >> countBits
chunk = pd.links[linkIdx][uint32(pr.bufBits>>pd.chunkBits)&pd.linkMask]
nb = uint(chunk & countMask)
}
if nb <= pr.numBits {
pr.bufBits >>= nb
pr.numBits -= nb
return uint(chunk >> countBits)
}
}
}
// Flush updates the read offset of the underlying ByteReader.
// If reader is a compress.BufferedReader, then this calls Discard to update
// the read offset.
func (pr *Reader) Flush() (int64, error) {
if pr.bufRd == nil {
return pr.Offset, nil
}
// Update the number of total bits to discard.
pr.discardBits += int(pr.fedBits - pr.numBits)
pr.fedBits = pr.numBits
// Discard some bytes to update read offset.
var err error
nd := (pr.discardBits + 7) / 8 // Round up to nearest byte
nd, err = pr.bufRd.Discard(nd)
pr.discardBits -= nd * 8 // -7..0
pr.Offset += int64(nd)
// These are invalid after Discard.
pr.bufPeek = nil
return pr.Offset, err
}
// PullBits ensures that at least nb bits exist in the bit buffer.
// If the underlying reader is a compress.BufferedReader, then this will fill
// the bit buffer with as many bits as possible, relying on Peek and Discard to
// properly advance the read offset. Otherwise, it will use ReadByte to fill the
// buffer with just the right number of bits.
func (pr *Reader) PullBits(nb uint) error {
if pr.bufRd != nil {
pr.discardBits += int(pr.fedBits - pr.numBits)
for {
if len(pr.bufPeek) == 0 {
pr.fedBits = pr.numBits // Don't discard bits just added
if _, err := pr.Flush(); err != nil {
return err
}
// Peek no more bytes than necessary.
// The computation for cntPeek computes the minimum number of
// bytes to Peek to fill nb bits.
var err error
cntPeek := int(nb+(-nb&7)) / 8
if cntPeek < pr.bufRd.Buffered() {
cntPeek = pr.bufRd.Buffered()
}
pr.bufPeek, err = pr.bufRd.Peek(cntPeek)
pr.bufPeek = pr.bufPeek[int(pr.numBits/8):] // Skip buffered bits
if len(pr.bufPeek) == 0 {
if pr.numBits >= nb {
break
}
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
}
n := int(64-pr.numBits) / 8 // Number of bytes to copy to bit buffer
if len(pr.bufPeek) >= 8 {
// Starting with Go 1.7, the compiler should use a wide integer
// load here if the architecture supports it.
u := binary.LittleEndian.Uint64(pr.bufPeek)
if pr.bigEndian {
// Swap all the bits within each byte.
u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
}
pr.bufBits |= u << pr.numBits
pr.numBits += uint(n * 8)
pr.bufPeek = pr.bufPeek[n:]
break
} else {
if n > len(pr.bufPeek) {
n = len(pr.bufPeek)
}
for _, c := range pr.bufPeek[:n] {
if pr.bigEndian {
c = internal.ReverseLUT[c]
}
pr.bufBits |= uint64(c) << pr.numBits
pr.numBits += 8
}
pr.bufPeek = pr.bufPeek[n:]
if pr.numBits > 56 {
break
}
}
}
pr.fedBits = pr.numBits
} else {
for pr.numBits < nb {
c, err := pr.byteRd.ReadByte()
if err != nil {
if err == io.EOF {
err = io.ErrUnexpectedEOF
}
return err
}
if pr.bigEndian {
c = internal.ReverseLUT[c]
}
pr.bufBits |= uint64(c) << pr.numBits
pr.numBits += 8
pr.Offset++
}
}
return nil
}

View file

@ -0,0 +1,146 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"bytes"
"io"
"strings"
)
// For some of the common Readers, we wrap and extend them to satisfy the
// compress.BufferedReader interface to improve performance.
type buffer struct {
*bytes.Buffer
}
type bytesReader struct {
*bytes.Reader
pos int64
buf []byte
arr [512]byte
}
type stringReader struct {
*strings.Reader
pos int64
buf []byte
arr [512]byte
}
func (r *buffer) Buffered() int {
return r.Len()
}
func (r *buffer) Peek(n int) ([]byte, error) {
b := r.Bytes()
if len(b) < n {
return b, io.EOF
}
return b[:n], nil
}
func (r *buffer) Discard(n int) (int, error) {
b := r.Next(n)
if len(b) < n {
return len(b), io.EOF
}
return n, nil
}
func (r *bytesReader) Buffered() int {
r.update()
if r.Len() > len(r.buf) {
return len(r.buf)
}
return r.Len()
}
func (r *bytesReader) Peek(n int) ([]byte, error) {
if n > len(r.arr) {
return nil, io.ErrShortBuffer
}
// Return sub-slice of local buffer if possible.
r.update()
if len(r.buf) >= n {
return r.buf[:n], nil
}
// Fill entire local buffer, and return appropriate sub-slice.
cnt, err := r.ReadAt(r.arr[:], r.pos)
r.buf = r.arr[:cnt]
if cnt < n {
return r.arr[:cnt], err
}
return r.arr[:n], nil
}
func (r *bytesReader) Discard(n int) (int, error) {
var err error
if n > r.Len() {
n, err = r.Len(), io.EOF
}
r.Seek(int64(n), io.SeekCurrent)
return n, err
}
// update reslices the internal buffer to be consistent with the read offset.
func (r *bytesReader) update() {
pos, _ := r.Seek(0, io.SeekCurrent)
if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) {
r.buf, r.pos = r.buf[off:], pos
} else {
r.buf, r.pos = nil, pos
}
}
func (r *stringReader) Buffered() int {
r.update()
if r.Len() > len(r.buf) {
return len(r.buf)
}
return r.Len()
}
func (r *stringReader) Peek(n int) ([]byte, error) {
if n > len(r.arr) {
return nil, io.ErrShortBuffer
}
// Return sub-slice of local buffer if possible.
r.update()
if len(r.buf) >= n {
return r.buf[:n], nil
}
// Fill entire local buffer, and return appropriate sub-slice.
cnt, err := r.ReadAt(r.arr[:], r.pos)
r.buf = r.arr[:cnt]
if cnt < n {
return r.arr[:cnt], err
}
return r.arr[:n], nil
}
func (r *stringReader) Discard(n int) (int, error) {
var err error
if n > r.Len() {
n, err = r.Len(), io.EOF
}
r.Seek(int64(n), io.SeekCurrent)
return n, err
}
// update reslices the internal buffer to be consistent with the read offset.
func (r *stringReader) update() {
pos, _ := r.Seek(0, io.SeekCurrent)
if off := pos - r.pos; off >= 0 && off < int64(len(r.buf)) {
r.buf, r.pos = r.buf[off:], pos
} else {
r.buf, r.pos = nil, pos
}
}

View file

@ -0,0 +1,166 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"encoding/binary"
"io"
"github.com/dsnet/compress/internal/errors"
)
// Writer implements a prefix encoder. For performance reasons, Writer will not
// write bytes immediately to the underlying stream.
type Writer struct {
Offset int64 // Number of bytes written to the underlying io.Writer
wr io.Writer
bufBits uint64 // Buffer to hold some bits
numBits uint // Number of valid bits in bufBits
bigEndian bool // Are bits written in big-endian order?
buf [512]byte
cntBuf int
}
// Init initializes the bit Writer to write to w. If bigEndian is true, then
// bits will be written starting from the most-significant bits of a byte
// (as done in bzip2), otherwise it will write starting from the
// least-significant bits of a byte (such as for deflate and brotli).
func (pw *Writer) Init(w io.Writer, bigEndian bool) {
*pw = Writer{wr: w, bigEndian: bigEndian}
return
}
// BitsWritten reports the total number of bits issued to any Write method.
func (pw *Writer) BitsWritten() int64 {
return 8*pw.Offset + 8*int64(pw.cntBuf) + int64(pw.numBits)
}
// WritePads writes 0-7 bits to the bit buffer to achieve byte-alignment.
func (pw *Writer) WritePads(v uint) {
nb := -pw.numBits & 7
pw.bufBits |= uint64(v) << pw.numBits
pw.numBits += nb
}
// Write writes bytes from buf.
// The bit-ordering mode does not affect this method.
func (pw *Writer) Write(buf []byte) (cnt int, err error) {
if pw.numBits > 0 || pw.cntBuf > 0 {
if pw.numBits%8 != 0 {
return 0, errorf(errors.Invalid, "non-aligned bit buffer")
}
if _, err := pw.Flush(); err != nil {
return 0, err
}
}
cnt, err = pw.wr.Write(buf)
pw.Offset += int64(cnt)
return cnt, err
}
// WriteOffset writes ofs in a (sym, extra) fashion using the provided prefix
// Encoder and RangeEncoder.
func (pw *Writer) WriteOffset(ofs uint, pe *Encoder, re *RangeEncoder) {
sym := re.Encode(ofs)
pw.WriteSymbol(sym, pe)
rc := re.rcs[sym]
pw.WriteBits(ofs-uint(rc.Base), uint(rc.Len))
}
// TryWriteBits attempts to write nb bits using the contents of the bit buffer
// alone. It reports whether it succeeded.
//
// This method is designed to be inlined for performance reasons.
func (pw *Writer) TryWriteBits(v, nb uint) bool {
if 64-pw.numBits < nb {
return false
}
pw.bufBits |= uint64(v) << pw.numBits
pw.numBits += nb
return true
}
// WriteBits writes nb bits of v to the underlying writer.
func (pw *Writer) WriteBits(v, nb uint) {
if _, err := pw.PushBits(); err != nil {
errors.Panic(err)
}
pw.bufBits |= uint64(v) << pw.numBits
pw.numBits += nb
}
// TryWriteSymbol attempts to encode the next symbol using the contents of the
// bit buffer alone. It reports whether it succeeded.
//
// This method is designed to be inlined for performance reasons.
func (pw *Writer) TryWriteSymbol(sym uint, pe *Encoder) bool {
chunk := pe.chunks[uint32(sym)&pe.chunkMask]
nb := uint(chunk & countMask)
if 64-pw.numBits < nb {
return false
}
pw.bufBits |= uint64(chunk>>countBits) << pw.numBits
pw.numBits += nb
return true
}
// WriteSymbol writes the symbol using the provided prefix Encoder.
func (pw *Writer) WriteSymbol(sym uint, pe *Encoder) {
if _, err := pw.PushBits(); err != nil {
errors.Panic(err)
}
chunk := pe.chunks[uint32(sym)&pe.chunkMask]
nb := uint(chunk & countMask)
pw.bufBits |= uint64(chunk>>countBits) << pw.numBits
pw.numBits += nb
}
// Flush flushes all complete bytes from the bit buffer to the byte buffer, and
// then flushes all bytes in the byte buffer to the underlying writer.
// After this call, the bit Writer is will only withhold 7 bits at most.
func (pw *Writer) Flush() (int64, error) {
if pw.numBits < 8 && pw.cntBuf == 0 {
return pw.Offset, nil
}
if _, err := pw.PushBits(); err != nil {
return pw.Offset, err
}
cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf])
pw.cntBuf -= cnt
pw.Offset += int64(cnt)
return pw.Offset, err
}
// PushBits pushes as many bytes as possible from the bit buffer to the byte
// buffer, reporting the number of bits pushed.
func (pw *Writer) PushBits() (uint, error) {
if pw.cntBuf >= len(pw.buf)-8 {
cnt, err := pw.wr.Write(pw.buf[:pw.cntBuf])
pw.cntBuf -= cnt
pw.Offset += int64(cnt)
if err != nil {
return 0, err
}
}
u := pw.bufBits
if pw.bigEndian {
// Swap all the bits within each byte.
u = (u&0xaaaaaaaaaaaaaaaa)>>1 | (u&0x5555555555555555)<<1
u = (u&0xcccccccccccccccc)>>2 | (u&0x3333333333333333)<<2
u = (u&0xf0f0f0f0f0f0f0f0)>>4 | (u&0x0f0f0f0f0f0f0f0f)<<4
}
// Starting with Go 1.7, the compiler should use a wide integer
// store here if the architecture supports it.
binary.LittleEndian.PutUint64(pw.buf[pw.cntBuf:], u)
nb := pw.numBits / 8 // Number of bytes to copy from bit buffer
pw.cntBuf += int(nb)
pw.bufBits >>= 8 * nb
pw.numBits -= 8 * nb
return 8 * nb, nil
}

21
vendor/github.com/dsnet/compress/internal/release.go generated vendored Normal file
View file

@ -0,0 +1,21 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !debug,!gofuzz
package internal
// Debug indicates whether the debug build tag was set.
//
// If set, programs may choose to print with more human-readable
// debug information and also perform sanity checks that would otherwise be too
// expensive to run in a release build.
const Debug = false
// GoFuzz indicates whether the gofuzz build tag was set.
//
// If set, programs may choose to disable certain checks (like checksums) that
// would be nearly impossible for gofuzz to properly get right.
// If GoFuzz is set, it implies that Debug is set as well.
const GoFuzz = false