1
0
Fork 0
forked from forgejo/forgejo

Dump: add output format tar and output to stdout (#10376)

* Dump: Use mholt/archive/v3 to support tar including many compressions

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Allow dump output to stdout

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file"

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: also dump LFS (fixes #10058)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365)

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* Use log.Info instead of fmt.Fprintf

Signed-off-by: Philipp Homann <homann.philipp@googlemail.com>

* import ordering

* make fmt

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
PhilippHomann 2020-06-05 22:47:39 +02:00 committed by GitHub
parent 209b17c4e2
commit 684b7a999f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
303 changed files with 301317 additions and 1183 deletions

110
vendor/github.com/dsnet/compress/bzip2/bwt.go generated vendored Normal file
View file

@ -0,0 +1,110 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import "github.com/dsnet/compress/bzip2/internal/sais"
// The Burrows-Wheeler Transform implementation used here is based on the
// Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan.
// This implementation uses the sais algorithm originally written by Yuta Mori.
//
// The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a
// mathematical relationship between Suffix Arrays and the Burrows-Wheeler
// Transform, such that a SA can be converted to a BWT in O(n) time.
//
// References:
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
// https://github.com/cscott/compressjs/blob/master/lib/BWT.js
// https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space
type burrowsWheelerTransform struct {
buf []byte
sa []int
perm []uint32
}
func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) {
if len(buf) == 0 {
return -1
}
// TODO(dsnet): Find a way to avoid the duplicate input string method.
// We only need to do this because suffix arrays (by definition) only
// operate non-wrapped suffixes of a string. On the other hand,
// the BWT specifically used in bzip2 operate on a strings that wrap-around
// when being sorted.
// Step 1: Concatenate the input string to itself so that we can use the
// suffix array algorithm for bzip2's variant of BWT.
n := len(buf)
bwt.buf = append(append(bwt.buf[:0], buf...), buf...)
if cap(bwt.sa) < 2*n {
bwt.sa = make([]int, 2*n)
}
t := bwt.buf[:2*n]
sa := bwt.sa[:2*n]
// Step 2: Compute the suffix array (SA). The input string, t, will not be
// modified, while the results will be written to the output, sa.
sais.ComputeSA(t, sa)
// Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the
// input, we have two copies of the input; in buf and buf2. Thus, we write
// the transformation to buf, while using buf2.
var j int
buf2 := t[n:]
for _, i := range sa {
if i < n {
if i == 0 {
ptr = j
i = n
}
buf[j] = buf2[i-1]
j++
}
}
return ptr
}
func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) {
if len(buf) == 0 {
return
}
// Step 1: Compute cumm, where cumm[ch] reports the total number of
// characters that precede the character ch in the alphabet.
var cumm [256]int
for _, v := range buf {
cumm[v]++
}
var sum int
for i, v := range cumm {
cumm[i] = sum
sum += v
}
// Step 2: Compute perm, where perm[ptr] contains a pointer to the next
// byte in buf and the next pointer in perm itself.
if cap(bwt.perm) < len(buf) {
bwt.perm = make([]uint32, len(buf))
}
perm := bwt.perm[:len(buf)]
for i, b := range buf {
perm[cumm[b]] = uint32(i)
cumm[b]++
}
// Step 3: Follow each pointer in perm to the next byte, starting with the
// origin pointer.
if cap(bwt.buf) < len(buf) {
bwt.buf = make([]byte, len(buf))
}
buf2 := bwt.buf[:len(buf)]
i := perm[ptr]
for j := range buf2 {
buf2[j] = buf[i]
i = perm[i]
}
copy(buf, buf2)
}

110
vendor/github.com/dsnet/compress/bzip2/common.go generated vendored Normal file
View file

@ -0,0 +1,110 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package bzip2 implements the BZip2 compressed data format.
//
// Canonical C implementation:
// http://bzip.org
//
// Unofficial format specification:
// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
package bzip2
import (
"fmt"
"hash/crc32"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
)
// There does not exist a formal specification of the BZip2 format. As such,
// much of this work is derived by either reverse engineering the original C
// source code or using secondary sources.
//
// Significant amounts of fuzz testing is done to ensure that outputs from
// this package is properly decoded by the C library. Furthermore, we test that
// both this package and the C library agree about what inputs are invalid.
//
// Compression stack:
// Run-length encoding 1 (RLE1)
// Burrows-Wheeler transform (BWT)
// Move-to-front transform (MTF)
// Run-length encoding 2 (RLE2)
// Prefix encoding (PE)
//
// References:
// http://bzip.org/
// https://en.wikipedia.org/wiki/Bzip2
// https://code.google.com/p/jbzip2/
const (
BestSpeed = 1
BestCompression = 9
DefaultCompression = 6
)
const (
hdrMagic = 0x425a // Hex of "BZ"
blkMagic = 0x314159265359 // BCD of PI
endMagic = 0x177245385090 // BCD of sqrt(PI)
blockSize = 100000
)
func errorf(c int, f string, a ...interface{}) error {
return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)}
}
func panicf(c int, f string, a ...interface{}) {
errors.Panic(errorf(c, f, a...))
}
// errWrap converts a lower-level errors.Error to be one from this package.
// The replaceCode passed in will be used to replace the code for any errors
// with the errors.Invalid code.
//
// For the Reader, set this to errors.Corrupted.
// For the Writer, set this to errors.Internal.
func errWrap(err error, replaceCode int) error {
if cerr, ok := err.(errors.Error); ok {
if errors.IsInvalid(cerr) {
cerr.Code = replaceCode
}
err = errorf(cerr.Code, "%s", cerr.Msg)
}
return err
}
var errClosed = errorf(errors.Closed, "")
// crc computes the CRC-32 used by BZip2.
//
// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
// order. That is, the MSB is read before the LSB. Thus, we can use the
// standard library version of CRC-32 IEEE with some minor adjustments.
//
// The byte array is used as an intermediate buffer to swap the bits of every
// byte of the input.
type crc struct {
val uint32
buf [256]byte
}
// update computes the CRC-32 of appending buf to c.
func (c *crc) update(buf []byte) {
cval := internal.ReverseUint32(c.val)
for len(buf) > 0 {
n := len(buf)
if n > len(c.buf) {
n = len(c.buf)
}
for i, b := range buf[:n] {
c.buf[i] = internal.ReverseLUT[b]
}
cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n])
buf = buf[n:]
}
c.val = internal.ReverseUint32(cval)
}

13
vendor/github.com/dsnet/compress/bzip2/fuzz_off.go generated vendored Normal file
View file

@ -0,0 +1,13 @@
// Copyright 2016, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build !gofuzz
// This file exists to suppress fuzzing details from release builds.
package bzip2
type fuzzReader struct{}
func (*fuzzReader) updateChecksum(int64, uint32) {}

77
vendor/github.com/dsnet/compress/bzip2/fuzz_on.go generated vendored Normal file
View file

@ -0,0 +1,77 @@
// Copyright 2016, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// +build gofuzz
// This file exists to export internal implementation details for fuzz testing.
package bzip2
func ForwardBWT(buf []byte) (ptr int) {
var bwt burrowsWheelerTransform
return bwt.Encode(buf)
}
func ReverseBWT(buf []byte, ptr int) {
var bwt burrowsWheelerTransform
bwt.Decode(buf, ptr)
}
type fuzzReader struct {
Checksums Checksums
}
// updateChecksum updates Checksums.
//
// If a valid pos is provided, it appends the (pos, val) pair to the slice.
// Otherwise, it will update the last record with the new value.
func (fr *fuzzReader) updateChecksum(pos int64, val uint32) {
if pos >= 0 {
fr.Checksums = append(fr.Checksums, Checksum{pos, val})
} else {
fr.Checksums[len(fr.Checksums)-1].Value = val
}
}
type Checksum struct {
Offset int64 // Bit offset of the checksum
Value uint32 // Checksum value
}
type Checksums []Checksum
// Apply overwrites all checksum fields in d with the ones in cs.
func (cs Checksums) Apply(d []byte) []byte {
d = append([]byte(nil), d...)
for _, c := range cs {
setU32(d, c.Offset, c.Value)
}
return d
}
func setU32(d []byte, pos int64, val uint32) {
for i := uint(0); i < 32; i++ {
bpos := uint64(pos) + uint64(i)
d[bpos/8] &= ^byte(1 << (7 - bpos%8))
d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8)
}
}
// Verify checks that all checksum fields in d matches those in cs.
func (cs Checksums) Verify(d []byte) bool {
for _, c := range cs {
if getU32(d, c.Offset) != c.Value {
return false
}
}
return true
}
func getU32(d []byte, pos int64) (val uint32) {
for i := uint(0); i < 32; i++ {
bpos := uint64(pos) + uint64(i)
val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i)
}
return val
}

View file

@ -0,0 +1,28 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Package sais implements a linear time suffix array algorithm.
package sais
//go:generate go run sais_gen.go byte sais_byte.go
//go:generate go run sais_gen.go int sais_int.go
// This package ports the C sais implementation by Yuta Mori. The ports are
// located in sais_byte.go and sais_int.go, which are identical to each other
// except for the types. Since Go does not support generics, we use generators to
// create the two files.
//
// References:
// https://sites.google.com/site/yuta256/sais
// https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings
// https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction
// ComputeSA computes the suffix array of t and places the result in sa.
// Both t and sa must be the same length.
func ComputeSA(t []byte, sa []int) {
if len(sa) != len(t) {
panic("mismatching sizes")
}
computeSA_byte(t, sa, 0, len(t), 256)
}

View file

@ -0,0 +1,661 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Code generated by sais_gen.go. DO NOT EDIT.
// ====================================================
// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// ====================================================
package sais
func getCounts_byte(T []byte, C []int, n, k int) {
var i int
for i = 0; i < k; i++ {
C[i] = 0
}
for i = 0; i < n; i++ {
C[T[i]]++
}
}
func getBuckets_byte(C, B []int, k int, end bool) {
var i, sum int
if end {
for i = 0; i < k; i++ {
sum += C[i]
B[i] = sum
}
} else {
for i = 0; i < k; i++ {
sum += C[i]
B[i] = sum - C[i]
}
}
}
func sortLMS1_byte(T []byte, SA, C, B []int, n, k int) {
var b, i, j int
var c0, c1 int
// Compute SAl.
if &C[0] == &B[0] {
getCounts_byte(T, C, n, k)
}
getBuckets_byte(C, B, k, false) // Find starts of buckets
j = n - 1
c1 = int(T[j])
b = B[c1]
j--
if int(T[j]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
for i = 0; i < n; i++ {
if j = SA[i]; j > 0 {
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
if int(T[j]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
SA[i] = 0
} else if j < 0 {
SA[i] = ^j
}
}
// Compute SAs.
if &C[0] == &B[0] {
getCounts_byte(T, C, n, k)
}
getBuckets_byte(C, B, k, true) // Find ends of buckets
c1 = 0
b = B[c1]
for i = n - 1; i >= 0; i-- {
if j = SA[i]; j > 0 {
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
b--
if int(T[j]) > c1 {
SA[b] = ^(j + 1)
} else {
SA[b] = j
}
SA[i] = 0
}
}
}
func postProcLMS1_byte(T []byte, SA []int, n, m int) int {
var i, j, p, q, plen, qlen, name int
var c0, c1 int
var diff bool
// Compact all the sorted substrings into the first m items of SA.
// 2*m must be not larger than n (provable).
for i = 0; SA[i] < 0; i++ {
SA[i] = ^SA[i]
}
if i < m {
for j, i = i, i+1; ; i++ {
if p = SA[i]; p < 0 {
SA[j] = ^p
j++
SA[i] = 0
if j == m {
break
}
}
}
}
// Store the length of all substrings.
i = n - 1
j = n - 1
c0 = int(T[n-1])
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
for i >= 0 {
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 > c1 {
break
}
}
if i >= 0 {
SA[m+((i+1)>>1)] = j - i
j = i + 1
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
}
}
// Find the lexicographic names of all substrings.
name = 0
qlen = 0
for i, q = 0, n; i < m; i++ {
p = SA[i]
plen = SA[m+(p>>1)]
diff = true
if (plen == qlen) && ((q + plen) < n) {
for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
}
if j == plen {
diff = false
}
}
if diff {
name++
q = p
qlen = plen
}
SA[m+(p>>1)] = name
}
return name
}
func sortLMS2_byte(T []byte, SA, C, B, D []int, n, k int) {
var b, i, j, t, d int
var c0, c1 int
// Compute SAl.
getBuckets_byte(C, B, k, false) // Find starts of buckets
j = n - 1
c1 = int(T[j])
b = B[c1]
j--
if int(T[j]) < c1 {
t = 1
} else {
t = 0
}
j += n
if t&1 > 0 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
for i, d = 0, 0; i < n; i++ {
if j = SA[i]; j > 0 {
if n <= j {
d += 1
j -= n
}
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
t = int(c0) << 1
if int(T[j]) < c1 {
t |= 1
}
if D[t] != d {
j += n
D[t] = d
}
if t&1 > 0 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
SA[i] = 0
} else if j < 0 {
SA[i] = ^j
}
}
for i = n - 1; 0 <= i; i-- {
if SA[i] > 0 {
if SA[i] < n {
SA[i] += n
for j = i - 1; SA[j] < n; j-- {
}
SA[j] -= n
i = j
}
}
}
// Compute SAs.
getBuckets_byte(C, B, k, true) // Find ends of buckets
c1 = 0
b = B[c1]
for i, d = n-1, d+1; i >= 0; i-- {
if j = SA[i]; j > 0 {
if n <= j {
d += 1
j -= n
}
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
t = int(c0) << 1
if int(T[j]) > c1 {
t |= 1
}
if D[t] != d {
j += n
D[t] = d
}
b--
if t&1 > 0 {
SA[b] = ^(j + 1)
} else {
SA[b] = j
}
SA[i] = 0
}
}
}
func postProcLMS2_byte(SA []int, n, m int) int {
var i, j, d, name int
// Compact all the sorted LMS substrings into the first m items of SA.
name = 0
for i = 0; SA[i] < 0; i++ {
j = ^SA[i]
if n <= j {
name += 1
}
SA[i] = j
}
if i < m {
for d, i = i, i+1; ; i++ {
if j = SA[i]; j < 0 {
j = ^j
if n <= j {
name += 1
}
SA[d] = j
d++
SA[i] = 0
if d == m {
break
}
}
}
}
if name < m {
// Store the lexicographic names.
for i, d = m-1, name+1; 0 <= i; i-- {
if j = SA[i]; n <= j {
j -= n
d--
}
SA[m+(j>>1)] = d
}
} else {
// Unset flags.
for i = 0; i < m; i++ {
if j = SA[i]; n <= j {
j -= n
SA[i] = j
}
}
}
return name
}
func induceSA_byte(T []byte, SA, C, B []int, n, k int) {
var b, i, j int
var c0, c1 int
// Compute SAl.
if &C[0] == &B[0] {
getCounts_byte(T, C, n, k)
}
getBuckets_byte(C, B, k, false) // Find starts of buckets
j = n - 1
c1 = int(T[j])
b = B[c1]
if j > 0 && int(T[j-1]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
for i = 0; i < n; i++ {
j = SA[i]
SA[i] = ^j
if j > 0 {
j--
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
if j > 0 && int(T[j-1]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
}
}
// Compute SAs.
if &C[0] == &B[0] {
getCounts_byte(T, C, n, k)
}
getBuckets_byte(C, B, k, true) // Find ends of buckets
c1 = 0
b = B[c1]
for i = n - 1; i >= 0; i-- {
if j = SA[i]; j > 0 {
j--
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
b--
if (j == 0) || (int(T[j-1]) > c1) {
SA[b] = ^j
} else {
SA[b] = j
}
} else {
SA[i] = ^j
}
}
}
func computeSA_byte(T []byte, SA []int, fs, n, k int) {
const (
minBucketSize = 512
sortLMS2Limit = 0x3fffffff
)
var C, B, D, RA []int
var bo int // Offset of B relative to SA
var b, i, j, m, p, q, name, newfs int
var c0, c1 int
var flags uint
if k <= minBucketSize {
C = make([]int, k)
if k <= fs {
bo = n + fs - k
B = SA[bo:]
flags = 1
} else {
B = make([]int, k)
flags = 3
}
} else if k <= fs {
C = SA[n+fs-k:]
if k <= fs-k {
bo = n + fs - 2*k
B = SA[bo:]
flags = 0
} else if k <= 4*minBucketSize {
B = make([]int, k)
flags = 2
} else {
B = C
flags = 8
}
} else {
C = make([]int, k)
B = C
flags = 4 | 8
}
if n <= sortLMS2Limit && 2 <= (n/k) {
if flags&1 > 0 {
if 2*k <= fs-k {
flags |= 32
} else {
flags |= 16
}
} else if flags == 0 && 2*k <= (fs-2*k) {
flags |= 32
}
}
// Stage 1: Reduce the problem by at least 1/2.
// Sort all the LMS-substrings.
getCounts_byte(T, C, n, k)
getBuckets_byte(C, B, k, true) // Find ends of buckets
for i = 0; i < n; i++ {
SA[i] = 0
}
b = -1
i = n - 1
j = n
m = 0
c0 = int(T[n-1])
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
for i >= 0 {
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 > c1 {
break
}
}
if i >= 0 {
if b >= 0 {
SA[b] = j
}
B[c1]--
b = B[c1]
j = i
m++
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
}
}
if m > 1 {
if flags&(16|32) > 0 {
if flags&16 > 0 {
D = make([]int, 2*k)
} else {
D = SA[bo-2*k:]
}
B[T[j+1]]++
for i, j = 0, 0; i < k; i++ {
j += C[i]
if B[i] != j {
SA[B[i]] += n
}
D[i] = 0
D[i+k] = 0
}
sortLMS2_byte(T, SA, C, B, D, n, k)
name = postProcLMS2_byte(SA, n, m)
} else {
sortLMS1_byte(T, SA, C, B, n, k)
name = postProcLMS1_byte(T, SA, n, m)
}
} else if m == 1 {
SA[b] = j + 1
name = 1
} else {
name = 0
}
// Stage 2: Solve the reduced problem.
// Recurse if names are not yet unique.
if name < m {
newfs = n + fs - 2*m
if flags&(1|4|8) == 0 {
if k+name <= newfs {
newfs -= k
} else {
flags |= 8
}
}
RA = SA[m+newfs:]
for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
if SA[i] != 0 {
RA[j] = SA[i] - 1
j--
}
}
computeSA_int(RA, SA, newfs, m, name)
i = n - 1
j = m - 1
c0 = int(T[n-1])
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
for i >= 0 {
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 > c1 {
break
}
}
if i >= 0 {
RA[j] = i + 1
j--
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
}
}
for i = 0; i < m; i++ {
SA[i] = RA[SA[i]]
}
if flags&4 > 0 {
B = make([]int, k)
C = B
}
if flags&2 > 0 {
B = make([]int, k)
}
}
// Stage 3: Induce the result for the original problem.
if flags&8 > 0 {
getCounts_byte(T, C, n, k)
}
// Put all left-most S characters into their buckets.
if m > 1 {
getBuckets_byte(C, B, k, true) // Find ends of buckets
i = m - 1
j = n
p = SA[m-1]
c1 = int(T[p])
for {
c0 = c1
q = B[c0]
for q < j {
j--
SA[j] = 0
}
for {
j--
SA[j] = p
if i--; i < 0 {
break
}
p = SA[i]
if c1 = int(T[p]); c1 != c0 {
break
}
}
if i < 0 {
break
}
}
for j > 0 {
j--
SA[j] = 0
}
}
induceSA_byte(T, SA, C, B, n, k)
}

View file

@ -0,0 +1,661 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
// Code generated by sais_gen.go. DO NOT EDIT.
// ====================================================
// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
// ====================================================
package sais
func getCounts_int(T []int, C []int, n, k int) {
var i int
for i = 0; i < k; i++ {
C[i] = 0
}
for i = 0; i < n; i++ {
C[T[i]]++
}
}
func getBuckets_int(C, B []int, k int, end bool) {
var i, sum int
if end {
for i = 0; i < k; i++ {
sum += C[i]
B[i] = sum
}
} else {
for i = 0; i < k; i++ {
sum += C[i]
B[i] = sum - C[i]
}
}
}
func sortLMS1_int(T []int, SA, C, B []int, n, k int) {
var b, i, j int
var c0, c1 int
// Compute SAl.
if &C[0] == &B[0] {
getCounts_int(T, C, n, k)
}
getBuckets_int(C, B, k, false) // Find starts of buckets
j = n - 1
c1 = int(T[j])
b = B[c1]
j--
if int(T[j]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
for i = 0; i < n; i++ {
if j = SA[i]; j > 0 {
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
if int(T[j]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
SA[i] = 0
} else if j < 0 {
SA[i] = ^j
}
}
// Compute SAs.
if &C[0] == &B[0] {
getCounts_int(T, C, n, k)
}
getBuckets_int(C, B, k, true) // Find ends of buckets
c1 = 0
b = B[c1]
for i = n - 1; i >= 0; i-- {
if j = SA[i]; j > 0 {
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
b--
if int(T[j]) > c1 {
SA[b] = ^(j + 1)
} else {
SA[b] = j
}
SA[i] = 0
}
}
}
func postProcLMS1_int(T []int, SA []int, n, m int) int {
var i, j, p, q, plen, qlen, name int
var c0, c1 int
var diff bool
// Compact all the sorted substrings into the first m items of SA.
// 2*m must be not larger than n (provable).
for i = 0; SA[i] < 0; i++ {
SA[i] = ^SA[i]
}
if i < m {
for j, i = i, i+1; ; i++ {
if p = SA[i]; p < 0 {
SA[j] = ^p
j++
SA[i] = 0
if j == m {
break
}
}
}
}
// Store the length of all substrings.
i = n - 1
j = n - 1
c0 = int(T[n-1])
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
for i >= 0 {
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 > c1 {
break
}
}
if i >= 0 {
SA[m+((i+1)>>1)] = j - i
j = i + 1
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
}
}
// Find the lexicographic names of all substrings.
name = 0
qlen = 0
for i, q = 0, n; i < m; i++ {
p = SA[i]
plen = SA[m+(p>>1)]
diff = true
if (plen == qlen) && ((q + plen) < n) {
for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
}
if j == plen {
diff = false
}
}
if diff {
name++
q = p
qlen = plen
}
SA[m+(p>>1)] = name
}
return name
}
func sortLMS2_int(T []int, SA, C, B, D []int, n, k int) {
var b, i, j, t, d int
var c0, c1 int
// Compute SAl.
getBuckets_int(C, B, k, false) // Find starts of buckets
j = n - 1
c1 = int(T[j])
b = B[c1]
j--
if int(T[j]) < c1 {
t = 1
} else {
t = 0
}
j += n
if t&1 > 0 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
for i, d = 0, 0; i < n; i++ {
if j = SA[i]; j > 0 {
if n <= j {
d += 1
j -= n
}
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
t = int(c0) << 1
if int(T[j]) < c1 {
t |= 1
}
if D[t] != d {
j += n
D[t] = d
}
if t&1 > 0 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
SA[i] = 0
} else if j < 0 {
SA[i] = ^j
}
}
for i = n - 1; 0 <= i; i-- {
if SA[i] > 0 {
if SA[i] < n {
SA[i] += n
for j = i - 1; SA[j] < n; j-- {
}
SA[j] -= n
i = j
}
}
}
// Compute SAs.
getBuckets_int(C, B, k, true) // Find ends of buckets
c1 = 0
b = B[c1]
for i, d = n-1, d+1; i >= 0; i-- {
if j = SA[i]; j > 0 {
if n <= j {
d += 1
j -= n
}
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
j--
t = int(c0) << 1
if int(T[j]) > c1 {
t |= 1
}
if D[t] != d {
j += n
D[t] = d
}
b--
if t&1 > 0 {
SA[b] = ^(j + 1)
} else {
SA[b] = j
}
SA[i] = 0
}
}
}
func postProcLMS2_int(SA []int, n, m int) int {
var i, j, d, name int
// Compact all the sorted LMS substrings into the first m items of SA.
name = 0
for i = 0; SA[i] < 0; i++ {
j = ^SA[i]
if n <= j {
name += 1
}
SA[i] = j
}
if i < m {
for d, i = i, i+1; ; i++ {
if j = SA[i]; j < 0 {
j = ^j
if n <= j {
name += 1
}
SA[d] = j
d++
SA[i] = 0
if d == m {
break
}
}
}
}
if name < m {
// Store the lexicographic names.
for i, d = m-1, name+1; 0 <= i; i-- {
if j = SA[i]; n <= j {
j -= n
d--
}
SA[m+(j>>1)] = d
}
} else {
// Unset flags.
for i = 0; i < m; i++ {
if j = SA[i]; n <= j {
j -= n
SA[i] = j
}
}
}
return name
}
func induceSA_int(T []int, SA, C, B []int, n, k int) {
var b, i, j int
var c0, c1 int
// Compute SAl.
if &C[0] == &B[0] {
getCounts_int(T, C, n, k)
}
getBuckets_int(C, B, k, false) // Find starts of buckets
j = n - 1
c1 = int(T[j])
b = B[c1]
if j > 0 && int(T[j-1]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
for i = 0; i < n; i++ {
j = SA[i]
SA[i] = ^j
if j > 0 {
j--
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
if j > 0 && int(T[j-1]) < c1 {
SA[b] = ^j
} else {
SA[b] = j
}
b++
}
}
// Compute SAs.
if &C[0] == &B[0] {
getCounts_int(T, C, n, k)
}
getBuckets_int(C, B, k, true) // Find ends of buckets
c1 = 0
b = B[c1]
for i = n - 1; i >= 0; i-- {
if j = SA[i]; j > 0 {
j--
if c0 = int(T[j]); c0 != c1 {
B[c1] = b
c1 = c0
b = B[c1]
}
b--
if (j == 0) || (int(T[j-1]) > c1) {
SA[b] = ^j
} else {
SA[b] = j
}
} else {
SA[i] = ^j
}
}
}
func computeSA_int(T []int, SA []int, fs, n, k int) {
const (
minBucketSize = 512
sortLMS2Limit = 0x3fffffff
)
var C, B, D, RA []int
var bo int // Offset of B relative to SA
var b, i, j, m, p, q, name, newfs int
var c0, c1 int
var flags uint
if k <= minBucketSize {
C = make([]int, k)
if k <= fs {
bo = n + fs - k
B = SA[bo:]
flags = 1
} else {
B = make([]int, k)
flags = 3
}
} else if k <= fs {
C = SA[n+fs-k:]
if k <= fs-k {
bo = n + fs - 2*k
B = SA[bo:]
flags = 0
} else if k <= 4*minBucketSize {
B = make([]int, k)
flags = 2
} else {
B = C
flags = 8
}
} else {
C = make([]int, k)
B = C
flags = 4 | 8
}
if n <= sortLMS2Limit && 2 <= (n/k) {
if flags&1 > 0 {
if 2*k <= fs-k {
flags |= 32
} else {
flags |= 16
}
} else if flags == 0 && 2*k <= (fs-2*k) {
flags |= 32
}
}
// Stage 1: Reduce the problem by at least 1/2.
// Sort all the LMS-substrings.
getCounts_int(T, C, n, k)
getBuckets_int(C, B, k, true) // Find ends of buckets
for i = 0; i < n; i++ {
SA[i] = 0
}
b = -1
i = n - 1
j = n
m = 0
c0 = int(T[n-1])
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
for i >= 0 {
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 > c1 {
break
}
}
if i >= 0 {
if b >= 0 {
SA[b] = j
}
B[c1]--
b = B[c1]
j = i
m++
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
}
}
if m > 1 {
if flags&(16|32) > 0 {
if flags&16 > 0 {
D = make([]int, 2*k)
} else {
D = SA[bo-2*k:]
}
B[T[j+1]]++
for i, j = 0, 0; i < k; i++ {
j += C[i]
if B[i] != j {
SA[B[i]] += n
}
D[i] = 0
D[i+k] = 0
}
sortLMS2_int(T, SA, C, B, D, n, k)
name = postProcLMS2_int(SA, n, m)
} else {
sortLMS1_int(T, SA, C, B, n, k)
name = postProcLMS1_int(T, SA, n, m)
}
} else if m == 1 {
SA[b] = j + 1
name = 1
} else {
name = 0
}
// Stage 2: Solve the reduced problem.
// Recurse if names are not yet unique.
if name < m {
newfs = n + fs - 2*m
if flags&(1|4|8) == 0 {
if k+name <= newfs {
newfs -= k
} else {
flags |= 8
}
}
RA = SA[m+newfs:]
for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
if SA[i] != 0 {
RA[j] = SA[i] - 1
j--
}
}
computeSA_int(RA, SA, newfs, m, name)
i = n - 1
j = m - 1
c0 = int(T[n-1])
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
for i >= 0 {
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 > c1 {
break
}
}
if i >= 0 {
RA[j] = i + 1
j--
for {
c1 = c0
if i--; i < 0 {
break
}
if c0 = int(T[i]); c0 < c1 {
break
}
}
}
}
for i = 0; i < m; i++ {
SA[i] = RA[SA[i]]
}
if flags&4 > 0 {
B = make([]int, k)
C = B
}
if flags&2 > 0 {
B = make([]int, k)
}
}
// Stage 3: Induce the result for the original problem.
if flags&8 > 0 {
getCounts_int(T, C, n, k)
}
// Put all left-most S characters into their buckets.
if m > 1 {
getBuckets_int(C, B, k, true) // Find ends of buckets
i = m - 1
j = n
p = SA[m-1]
c1 = int(T[p])
for {
c0 = c1
q = B[c0]
for q < j {
j--
SA[j] = 0
}
for {
j--
SA[j] = p
if i--; i < 0 {
break
}
p = SA[i]
if c1 = int(T[p]); c1 != c0 {
break
}
}
if i < 0 {
break
}
}
for j > 0 {
j--
SA[j] = 0
}
}
induceSA_int(T, SA, C, B, n, k)
}

131
vendor/github.com/dsnet/compress/bzip2/mtf_rle2.go generated vendored Normal file
View file

@ -0,0 +1,131 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import "github.com/dsnet/compress/internal/errors"
// moveToFront implements both the MTF and RLE stages of bzip2 at the same time.
// Any runs of zeros in the encoded output will be replaced by a sequence of
// RUNA and RUNB symbols are encode the length of the run.
//
// The RLE encoding used can actually be encoded to and decoded from using
// normal two's complement arithmetic. The methodology for doing so is below.
//
// Assuming the following:
// num: The value being encoded by RLE encoding.
// run: A sequence of RUNA and RUNB symbols represented as a binary integer,
// where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN
// symbols are at the least-significant bit positions.
// cnt: The number of RUNA and RUNB symbols.
//
// Then the RLE encoding used by bzip2 has this mathematical property:
// num+1 == (1<<cnt) | run
type moveToFront struct {
dictBuf [256]uint8
dictLen int
vals []byte
syms []uint16
blkSize int
}
func (mtf *moveToFront) Init(dict []uint8, blkSize int) {
if len(dict) > len(mtf.dictBuf) {
panicf(errors.Internal, "alphabet too large")
}
copy(mtf.dictBuf[:], dict)
mtf.dictLen = len(dict)
mtf.blkSize = blkSize
}
func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) {
dict := mtf.dictBuf[:mtf.dictLen]
syms = mtf.syms[:0]
if len(vals) > mtf.blkSize {
panicf(errors.Internal, "exceeded block size")
}
var lastNum uint32
for _, val := range vals {
// Normal move-to-front transform.
var idx uint8 // Reverse lookup idx in dict
for di, dv := range dict {
if dv == val {
idx = uint8(di)
break
}
}
copy(dict[1:], dict[:idx])
dict[0] = val
// Run-length encoding augmentation.
if idx == 0 {
lastNum++
continue
}
if lastNum > 0 {
for rc := lastNum + 1; rc != 1; rc >>= 1 {
syms = append(syms, uint16(rc&1))
}
lastNum = 0
}
syms = append(syms, uint16(idx)+1)
}
if lastNum > 0 {
for rc := lastNum + 1; rc != 1; rc >>= 1 {
syms = append(syms, uint16(rc&1))
}
}
mtf.syms = syms
return syms
}
func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) {
dict := mtf.dictBuf[:mtf.dictLen]
vals = mtf.vals[:0]
var lastCnt uint
var lastRun uint32
for _, sym := range syms {
// Run-length encoding augmentation.
if sym < 2 {
lastRun |= uint32(sym) << lastCnt
lastCnt++
continue
}
if lastCnt > 0 {
cnt := int((1<<lastCnt)|lastRun) - 1
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
panicf(errors.Corrupted, "run-length decoding exceeded block size")
}
for i := cnt; i > 0; i-- {
vals = append(vals, dict[0])
}
lastCnt, lastRun = 0, 0
}
// Normal move-to-front transform.
val := dict[sym-1] // Forward lookup val in dict
copy(dict[1:], dict[:sym-1])
dict[0] = val
if len(vals) >= mtf.blkSize {
panicf(errors.Corrupted, "run-length decoding exceeded block size")
}
vals = append(vals, val)
}
if lastCnt > 0 {
cnt := int((1<<lastCnt)|lastRun) - 1
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
panicf(errors.Corrupted, "run-length decoding exceeded block size")
}
for i := cnt; i > 0; i-- {
vals = append(vals, dict[0])
}
}
mtf.vals = vals
return vals
}

374
vendor/github.com/dsnet/compress/bzip2/prefix.go generated vendored Normal file
View file

@ -0,0 +1,374 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import (
"io"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
"github.com/dsnet/compress/internal/prefix"
)
const (
minNumTrees = 2
maxNumTrees = 6
maxPrefixBits = 20 // Maximum bit-width of a prefix code
maxNumSyms = 256 + 2 // Maximum number of symbols in the alphabet
numBlockSyms = 50 // Number of bytes in a block
)
// encSel and decSel are used to handle the prefix encoding for tree selectors.
// The prefix encoding is as follows:
//
// Code TreeIdx
// 0 <=> 0
// 10 <=> 1
// 110 <=> 2
// 1110 <=> 3
// 11110 <=> 4
// 111110 <=> 5
// 111111 <=> 6 Invalid tree index, so should fail
//
var encSel, decSel = func() (e prefix.Encoder, d prefix.Decoder) {
var selCodes [maxNumTrees + 1]prefix.PrefixCode
for i := range selCodes {
selCodes[i] = prefix.PrefixCode{Sym: uint32(i), Len: uint32(i + 1)}
}
selCodes[maxNumTrees] = prefix.PrefixCode{Sym: maxNumTrees, Len: maxNumTrees}
prefix.GeneratePrefixes(selCodes[:])
e.Init(selCodes[:])
d.Init(selCodes[:])
return
}()
type prefixReader struct{ prefix.Reader }
func (pr *prefixReader) Init(r io.Reader) {
pr.Reader.Init(r, true)
}
func (pr *prefixReader) ReadBitsBE64(nb uint) uint64 {
if nb <= 32 {
v := uint32(pr.ReadBits(nb))
return uint64(internal.ReverseUint32N(v, nb))
}
v0 := internal.ReverseUint32(uint32(pr.ReadBits(32)))
v1 := internal.ReverseUint32(uint32(pr.ReadBits(nb - 32)))
v := uint64(v0)<<32 | uint64(v1)
return v >> (64 - nb)
}
func (pr *prefixReader) ReadPrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Decoder) {
for i, pc := range codes {
clen := int(pr.ReadBitsBE64(5))
sum := 1 << maxPrefixBits
for sym := range pc {
for {
if clen < 1 || clen > maxPrefixBits {
panicf(errors.Corrupted, "invalid prefix bit-length: %d", clen)
}
b, ok := pr.TryReadBits(1)
if !ok {
b = pr.ReadBits(1)
}
if b == 0 {
break
}
b, ok = pr.TryReadBits(1)
if !ok {
b = pr.ReadBits(1)
}
clen -= int(b*2) - 1 // +1 or -1
}
pc[sym] = prefix.PrefixCode{Sym: uint32(sym), Len: uint32(clen)}
sum -= (1 << maxPrefixBits) >> uint(clen)
}
if sum == 0 {
// Fast path, but only handles complete trees.
if err := prefix.GeneratePrefixes(pc); err != nil {
errors.Panic(err) // Using complete trees; should never fail
}
} else {
// Slow path, but handles anything.
pc = handleDegenerateCodes(pc) // Never fails, but may fail later
codes[i] = pc
}
trees[i].Init(pc)
}
}
type prefixWriter struct{ prefix.Writer }
func (pw *prefixWriter) Init(w io.Writer) {
pw.Writer.Init(w, true)
}
func (pw *prefixWriter) WriteBitsBE64(v uint64, nb uint) {
if nb <= 32 {
v := internal.ReverseUint32N(uint32(v), nb)
pw.WriteBits(uint(v), nb)
return
}
v <<= (64 - nb)
v0 := internal.ReverseUint32(uint32(v >> 32))
v1 := internal.ReverseUint32(uint32(v))
pw.WriteBits(uint(v0), 32)
pw.WriteBits(uint(v1), nb-32)
return
}
func (pw *prefixWriter) WritePrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Encoder) {
for i, pc := range codes {
if err := prefix.GeneratePrefixes(pc); err != nil {
errors.Panic(err) // Using complete trees; should never fail
}
trees[i].Init(pc)
clen := int(pc[0].Len)
pw.WriteBitsBE64(uint64(clen), 5)
for _, c := range pc {
for int(c.Len) < clen {
pw.WriteBits(3, 2) // 11
clen--
}
for int(c.Len) > clen {
pw.WriteBits(1, 2) // 10
clen++
}
pw.WriteBits(0, 1)
}
}
}
// handleDegenerateCodes converts a degenerate tree into a canonical tree.
//
// For example, when the input is an under-subscribed tree:
// input: []PrefixCode{
// {Sym: 0, Len: 3},
// {Sym: 1, Len: 4},
// {Sym: 2, Len: 3},
// }
// output: []PrefixCode{
// {Sym: 0, Len: 3, Val: 0}, // 000
// {Sym: 1, Len: 4, Val: 2}, // 0010
// {Sym: 2, Len: 3, Val: 4}, // 100
// {Sym: 258, Len: 4, Val: 10}, // 1010
// {Sym: 259, Len: 3, Val: 6}, // 110
// {Sym: 260, Len: 1, Val: 1}, // 1
// }
//
// For example, when the input is an over-subscribed tree:
// input: []PrefixCode{
// {Sym: 0, Len: 1},
// {Sym: 1, Len: 3},
// {Sym: 2, Len: 4},
// {Sym: 3, Len: 3},
// {Sym: 4, Len: 2},
// }
// output: []PrefixCode{
// {Sym: 0, Len: 1, Val: 0}, // 0
// {Sym: 1, Len: 3, Val: 3}, // 011
// {Sym: 3, Len: 3, Val: 7}, // 111
// {Sym: 4, Len: 2, Val: 1}, // 01
// }
func handleDegenerateCodes(codes prefix.PrefixCodes) prefix.PrefixCodes {
// Since there is no formal definition for the BZip2 format, there is no
// specification that says that the code lengths must form a complete
// prefix tree (IE: it is neither over-subscribed nor under-subscribed).
// Thus, the original C implementation becomes the reference for how prefix
// decoding is done in these edge cases. Unfortunately, the C version does
// not error when an invalid tree is used, but rather allows decoding to
// continue and only errors if some bit pattern happens to cause an error.
// Thus, it is possible for an invalid tree to end up decoding an input
// "properly" so long as invalid bit patterns are not present. In order to
// replicate this non-specified behavior, we use a ported version of the
// C code to generate the codes as a valid canonical tree by substituting
// invalid nodes with invalid symbols.
//
// ====================================================
// This program, "bzip2", the associated library "libbzip2", and all
// documentation, are copyright (C) 1996-2010 Julian R Seward. All
// rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. The origin of this software must not be misrepresented; you must
// not claim that you wrote the original software. If you use this
// software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 3. Altered source versions must be plainly marked as such, and must
// not be misrepresented as being the original software.
//
// 4. The name of the author may not be used to endorse or promote
// products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Julian Seward, jseward@bzip.org
// bzip2/libbzip2 version 1.0.6 of 6 September 2010
// ====================================================
var (
limits [maxPrefixBits + 2]int32
bases [maxPrefixBits + 2]int32
perms [maxNumSyms]int32
minLen = uint32(maxPrefixBits)
maxLen = uint32(0)
)
const (
statusOkay = iota
statusInvalid
statusNeedBits
statusMaxBits
)
// createTables is the BZ2_hbCreateDecodeTables function from the C code.
createTables := func(codes []prefix.PrefixCode) {
for _, c := range codes {
if c.Len > maxLen {
maxLen = c.Len
}
if c.Len < minLen {
minLen = c.Len
}
}
var pp int
for i := minLen; i <= maxLen; i++ {
for j, c := range codes {
if c.Len == i {
perms[pp] = int32(j)
pp++
}
}
}
var vec int32
for _, c := range codes {
bases[c.Len+1]++
}
for i := 1; i < len(bases); i++ {
bases[i] += bases[i-1]
}
for i := minLen; i <= maxLen; i++ {
vec += bases[i+1] - bases[i]
limits[i] = vec - 1
vec <<= 1
}
for i := minLen + 1; i <= maxLen; i++ {
bases[i] = ((limits[i-1] + 1) << 1) - bases[i]
}
}
// getSymbol is the GET_MTF_VAL macro from the C code.
getSymbol := func(c prefix.PrefixCode) (uint32, int) {
v := internal.ReverseUint32(c.Val)
n := c.Len
zn := minLen
if zn > n {
return 0, statusNeedBits
}
zvec := int32(v >> (32 - zn))
v <<= zn
for {
if zn > maxLen {
return 0, statusMaxBits
}
if zvec <= limits[zn] {
break
}
zn++
if zn > n {
return 0, statusNeedBits
}
zvec = (zvec << 1) | int32(v>>31)
v <<= 1
}
if zvec-bases[zn] < 0 || zvec-bases[zn] >= maxNumSyms {
return 0, statusInvalid
}
return uint32(perms[zvec-bases[zn]]), statusOkay
}
// Step 1: Create the prefix trees using the C algorithm.
createTables(codes)
// Step 2: Starting with the shortest bit pattern, explore the whole tree.
// If tree is under-subscribed, the worst-case runtime is O(1<<maxLen).
// If tree is over-subscribed, the worst-case runtime is O(maxNumSyms).
var pcodesArr [2 * maxNumSyms]prefix.PrefixCode
pcodes := pcodesArr[:maxNumSyms]
var exploreCode func(prefix.PrefixCode) bool
exploreCode = func(c prefix.PrefixCode) (term bool) {
sym, status := getSymbol(c)
switch status {
case statusOkay:
// This code is valid, so insert it.
c.Sym = sym
pcodes[sym] = c
term = true
case statusInvalid:
// This code is invalid, so insert an invalid symbol.
c.Sym = uint32(len(pcodes))
pcodes = append(pcodes, c)
term = true
case statusNeedBits:
// This code is too short, so explore both children.
c.Len++
c0, c1 := c, c
c1.Val |= 1 << (c.Len - 1)
b0 := exploreCode(c0)
b1 := exploreCode(c1)
switch {
case !b0 && b1:
c0.Sym = uint32(len(pcodes))
pcodes = append(pcodes, c0)
case !b1 && b0:
c1.Sym = uint32(len(pcodes))
pcodes = append(pcodes, c1)
}
term = b0 || b1
case statusMaxBits:
// This code is too long, so report it upstream.
term = false
}
return term // Did this code terminate?
}
exploreCode(prefix.PrefixCode{})
// Step 3: Copy new sparse codes to old output codes.
codes = codes[:0]
for _, c := range pcodes {
if c.Len > 0 {
codes = append(codes, c)
}
}
return codes
}

274
vendor/github.com/dsnet/compress/bzip2/reader.go generated vendored Normal file
View file

@ -0,0 +1,274 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import (
"io"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
"github.com/dsnet/compress/internal/prefix"
)
type Reader struct {
InputOffset int64 // Total number of bytes read from underlying io.Reader
OutputOffset int64 // Total number of bytes emitted from Read
rd prefixReader
err error
level int // The current compression level
rdHdrFtr int // Number of times we read the stream header and footer
blkCRC uint32 // CRC-32 IEEE of each block (as stored)
endCRC uint32 // Checksum of all blocks using bzip2's custom method
crc crc
mtf moveToFront
bwt burrowsWheelerTransform
rle runLengthEncoding
// These fields are allocated with Reader and re-used later.
treeSels []uint8
codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
codes1D [maxNumTrees]prefix.PrefixCodes
trees1D [maxNumTrees]prefix.Decoder
syms []uint16
fuzzReader // Exported functionality when fuzz testing
}
type ReaderConfig struct {
_ struct{} // Blank field to prevent unkeyed struct literals
}
func NewReader(r io.Reader, conf *ReaderConfig) (*Reader, error) {
zr := new(Reader)
zr.Reset(r)
return zr, nil
}
func (zr *Reader) Reset(r io.Reader) error {
*zr = Reader{
rd: zr.rd,
mtf: zr.mtf,
bwt: zr.bwt,
rle: zr.rle,
treeSels: zr.treeSels,
trees1D: zr.trees1D,
syms: zr.syms,
}
zr.rd.Init(r)
return nil
}
func (zr *Reader) Read(buf []byte) (int, error) {
for {
cnt, err := zr.rle.Read(buf)
if err != rleDone && zr.err == nil {
zr.err = err
}
if cnt > 0 {
zr.crc.update(buf[:cnt])
zr.OutputOffset += int64(cnt)
return cnt, nil
}
if zr.err != nil || len(buf) == 0 {
return 0, zr.err
}
// Read the next chunk.
zr.rd.Offset = zr.InputOffset
func() {
defer errors.Recover(&zr.err)
if zr.rdHdrFtr%2 == 0 {
// Check if we are already at EOF.
if err := zr.rd.PullBits(1); err != nil {
if err == io.ErrUnexpectedEOF && zr.rdHdrFtr > 0 {
err = io.EOF // EOF is okay if we read at least one stream
}
errors.Panic(err)
}
// Read stream header.
if zr.rd.ReadBitsBE64(16) != hdrMagic {
panicf(errors.Corrupted, "invalid stream magic")
}
if ver := zr.rd.ReadBitsBE64(8); ver != 'h' {
if ver == '0' {
panicf(errors.Deprecated, "bzip1 format is not supported")
}
panicf(errors.Corrupted, "invalid version: %q", ver)
}
lvl := int(zr.rd.ReadBitsBE64(8)) - '0'
if lvl < BestSpeed || lvl > BestCompression {
panicf(errors.Corrupted, "invalid block size: %d", lvl*blockSize)
}
zr.level = lvl
zr.rdHdrFtr++
} else {
// Check and update the CRC.
if internal.GoFuzz {
zr.updateChecksum(-1, zr.crc.val) // Update with value
zr.blkCRC = zr.crc.val // Suppress CRC failures
}
if zr.blkCRC != zr.crc.val {
panicf(errors.Corrupted, "mismatching block checksum")
}
zr.endCRC = (zr.endCRC<<1 | zr.endCRC>>31) ^ zr.blkCRC
}
buf := zr.decodeBlock()
zr.rle.Init(buf)
}()
if zr.InputOffset, err = zr.rd.Flush(); zr.err == nil {
zr.err = err
}
if zr.err != nil {
zr.err = errWrap(zr.err, errors.Corrupted)
return 0, zr.err
}
}
}
func (zr *Reader) Close() error {
if zr.err == io.EOF || zr.err == errClosed {
zr.rle.Init(nil) // Make sure future reads fail
zr.err = errClosed
return nil
}
return zr.err // Return the persistent error
}
func (zr *Reader) decodeBlock() []byte {
if magic := zr.rd.ReadBitsBE64(48); magic != blkMagic {
if magic == endMagic {
endCRC := uint32(zr.rd.ReadBitsBE64(32))
if internal.GoFuzz {
zr.updateChecksum(zr.rd.BitsRead()-32, zr.endCRC)
endCRC = zr.endCRC // Suppress CRC failures
}
if zr.endCRC != endCRC {
panicf(errors.Corrupted, "mismatching stream checksum")
}
zr.endCRC = 0
zr.rd.ReadPads()
zr.rdHdrFtr++
return nil
}
panicf(errors.Corrupted, "invalid block or footer magic")
}
zr.crc.val = 0
zr.blkCRC = uint32(zr.rd.ReadBitsBE64(32))
if internal.GoFuzz {
zr.updateChecksum(zr.rd.BitsRead()-32, 0) // Record offset only
}
if zr.rd.ReadBitsBE64(1) != 0 {
panicf(errors.Deprecated, "block randomization is not supported")
}
// Read BWT related fields.
ptr := int(zr.rd.ReadBitsBE64(24)) // BWT origin pointer
// Read MTF related fields.
var dictArr [256]uint8
dict := dictArr[:0]
bmapHi := uint16(zr.rd.ReadBits(16))
for i := 0; i < 256; i, bmapHi = i+16, bmapHi>>1 {
if bmapHi&1 > 0 {
bmapLo := uint16(zr.rd.ReadBits(16))
for j := 0; j < 16; j, bmapLo = j+1, bmapLo>>1 {
if bmapLo&1 > 0 {
dict = append(dict, uint8(i+j))
}
}
}
}
// Step 1: Prefix encoding.
syms := zr.decodePrefix(len(dict))
// Step 2: Move-to-front transform and run-length encoding.
zr.mtf.Init(dict, zr.level*blockSize)
buf := zr.mtf.Decode(syms)
// Step 3: Burrows-Wheeler transformation.
if ptr >= len(buf) {
panicf(errors.Corrupted, "origin pointer (0x%06x) exceeds block size: %d", ptr, len(buf))
}
zr.bwt.Decode(buf, ptr)
return buf
}
func (zr *Reader) decodePrefix(numSyms int) (syms []uint16) {
numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols
if numSyms < 3 {
panicf(errors.Corrupted, "not enough prefix symbols: %d", numSyms)
}
// Read information about the trees and tree selectors.
var mtf internal.MoveToFront
numTrees := int(zr.rd.ReadBitsBE64(3))
if numTrees < minNumTrees || numTrees > maxNumTrees {
panicf(errors.Corrupted, "invalid number of prefix trees: %d", numTrees)
}
numSels := int(zr.rd.ReadBitsBE64(15))
if cap(zr.treeSels) < numSels {
zr.treeSels = make([]uint8, numSels)
}
treeSels := zr.treeSels[:numSels]
for i := range treeSels {
sym, ok := zr.rd.TryReadSymbol(&decSel)
if !ok {
sym = zr.rd.ReadSymbol(&decSel)
}
if int(sym) >= numTrees {
panicf(errors.Corrupted, "invalid prefix tree selector: %d", sym)
}
treeSels[i] = uint8(sym)
}
mtf.Decode(treeSels)
zr.treeSels = treeSels
// Initialize prefix codes.
for i := range zr.codes2D[:numTrees] {
zr.codes1D[i] = zr.codes2D[i][:numSyms]
}
zr.rd.ReadPrefixCodes(zr.codes1D[:numTrees], zr.trees1D[:numTrees])
// Read prefix encoded symbols of compressed data.
var tree *prefix.Decoder
var blkLen, selIdx int
syms = zr.syms[:0]
for {
if blkLen == 0 {
blkLen = numBlockSyms
if selIdx >= len(treeSels) {
panicf(errors.Corrupted, "not enough prefix tree selectors")
}
tree = &zr.trees1D[treeSels[selIdx]]
selIdx++
}
blkLen--
sym, ok := zr.rd.TryReadSymbol(tree)
if !ok {
sym = zr.rd.ReadSymbol(tree)
}
if int(sym) == numSyms-1 {
break // EOF marker
}
if int(sym) >= numSyms {
panicf(errors.Corrupted, "invalid prefix symbol: %d", sym)
}
if len(syms) >= zr.level*blockSize {
panicf(errors.Corrupted, "number of prefix symbols exceeds block size")
}
syms = append(syms, uint16(sym))
}
zr.syms = syms
return syms
}

101
vendor/github.com/dsnet/compress/bzip2/rle1.go generated vendored Normal file
View file

@ -0,0 +1,101 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import "github.com/dsnet/compress/internal/errors"
// rleDone is a special "error" to indicate that the RLE stage is done.
var rleDone = errorf(errors.Unknown, "RLE1 stage is completed")
// runLengthEncoding implements the first RLE stage of bzip2. Every sequence
// of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a
// single byte representing the repeat length. Similar to the C bzip2
// implementation, the encoder will always terminate repeat sequences with a
// count (even if it is the end of the buffer), and it will also never produce
// run lengths of 256..259. The decoder can handle the latter case.
//
// For example, if the input was:
// input: "AAAAAAABBBBCCCD"
//
// Then the output will be:
// output: "AAAA\x03BBBB\x00CCCD"
type runLengthEncoding struct {
buf []byte
idx int
lastVal byte
lastCnt int
}
func (rle *runLengthEncoding) Init(buf []byte) {
*rle = runLengthEncoding{buf: buf}
}
func (rle *runLengthEncoding) Write(buf []byte) (int, error) {
for i, b := range buf {
if rle.lastVal != b {
rle.lastCnt = 0
}
rle.lastCnt++
switch {
case rle.lastCnt < 4:
if rle.idx >= len(rle.buf) {
return i, rleDone
}
rle.buf[rle.idx] = b
rle.idx++
case rle.lastCnt == 4:
if rle.idx+1 >= len(rle.buf) {
return i, rleDone
}
rle.buf[rle.idx] = b
rle.idx++
rle.buf[rle.idx] = 0
rle.idx++
case rle.lastCnt < 256:
rle.buf[rle.idx-1]++
default:
if rle.idx >= len(rle.buf) {
return i, rleDone
}
rle.lastCnt = 1
rle.buf[rle.idx] = b
rle.idx++
}
rle.lastVal = b
}
return len(buf), nil
}
func (rle *runLengthEncoding) Read(buf []byte) (int, error) {
for i := range buf {
switch {
case rle.lastCnt == -4:
if rle.idx >= len(rle.buf) {
return i, errorf(errors.Corrupted, "missing terminating run-length repeater")
}
rle.lastCnt = int(rle.buf[rle.idx])
rle.idx++
if rle.lastCnt > 0 {
break // Break the switch
}
fallthrough // Count was zero, continue the work
case rle.lastCnt <= 0:
if rle.idx >= len(rle.buf) {
return i, rleDone
}
b := rle.buf[rle.idx]
rle.idx++
if b != rle.lastVal {
rle.lastCnt = 0
rle.lastVal = b
}
}
buf[i] = rle.lastVal
rle.lastCnt--
}
return len(buf), nil
}
func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] }

307
vendor/github.com/dsnet/compress/bzip2/writer.go generated vendored Normal file
View file

@ -0,0 +1,307 @@
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import (
"io"
"github.com/dsnet/compress/internal"
"github.com/dsnet/compress/internal/errors"
"github.com/dsnet/compress/internal/prefix"
)
type Writer struct {
InputOffset int64 // Total number of bytes issued to Write
OutputOffset int64 // Total number of bytes written to underlying io.Writer
wr prefixWriter
err error
level int // The current compression level
wrHdr bool // Have we written the stream header?
blkCRC uint32 // CRC-32 IEEE of each block
endCRC uint32 // Checksum of all blocks using bzip2's custom method
crc crc
rle runLengthEncoding
bwt burrowsWheelerTransform
mtf moveToFront
// These fields are allocated with Writer and re-used later.
buf []byte
treeSels []uint8
treeSelsMTF []uint8
codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
codes1D [maxNumTrees]prefix.PrefixCodes
trees1D [maxNumTrees]prefix.Encoder
}
type WriterConfig struct {
Level int
_ struct{} // Blank field to prevent unkeyed struct literals
}
func NewWriter(w io.Writer, conf *WriterConfig) (*Writer, error) {
var lvl int
if conf != nil {
lvl = conf.Level
}
if lvl == 0 {
lvl = DefaultCompression
}
if lvl < BestSpeed || lvl > BestCompression {
return nil, errorf(errors.Invalid, "compression level: %d", lvl)
}
zw := new(Writer)
zw.level = lvl
zw.Reset(w)
return zw, nil
}
func (zw *Writer) Reset(w io.Writer) error {
*zw = Writer{
wr: zw.wr,
level: zw.level,
rle: zw.rle,
bwt: zw.bwt,
mtf: zw.mtf,
buf: zw.buf,
treeSels: zw.treeSels,
treeSelsMTF: zw.treeSelsMTF,
trees1D: zw.trees1D,
}
zw.wr.Init(w)
if len(zw.buf) != zw.level*blockSize {
zw.buf = make([]byte, zw.level*blockSize)
}
zw.rle.Init(zw.buf)
return nil
}
func (zw *Writer) Write(buf []byte) (int, error) {
if zw.err != nil {
return 0, zw.err
}
cnt := len(buf)
for {
wrCnt, err := zw.rle.Write(buf)
if err != rleDone && zw.err == nil {
zw.err = err
}
zw.crc.update(buf[:wrCnt])
buf = buf[wrCnt:]
if len(buf) == 0 {
zw.InputOffset += int64(cnt)
return cnt, nil
}
if zw.err = zw.flush(); zw.err != nil {
return 0, zw.err
}
}
}
func (zw *Writer) flush() error {
vals := zw.rle.Bytes()
if len(vals) == 0 {
return nil
}
zw.wr.Offset = zw.OutputOffset
func() {
defer errors.Recover(&zw.err)
if !zw.wrHdr {
// Write stream header.
zw.wr.WriteBitsBE64(hdrMagic, 16)
zw.wr.WriteBitsBE64('h', 8)
zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8)
zw.wrHdr = true
}
zw.encodeBlock(vals)
}()
var err error
if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil {
zw.err = err
}
if zw.err != nil {
zw.err = errWrap(zw.err, errors.Internal)
return zw.err
}
zw.endCRC = (zw.endCRC<<1 | zw.endCRC>>31) ^ zw.blkCRC
zw.blkCRC = 0
zw.rle.Init(zw.buf)
return nil
}
func (zw *Writer) Close() error {
if zw.err == errClosed {
return nil
}
// Flush RLE buffer if there is left-over data.
if zw.err = zw.flush(); zw.err != nil {
return zw.err
}
// Write stream footer.
zw.wr.Offset = zw.OutputOffset
func() {
defer errors.Recover(&zw.err)
if !zw.wrHdr {
// Write stream header.
zw.wr.WriteBitsBE64(hdrMagic, 16)
zw.wr.WriteBitsBE64('h', 8)
zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8)
zw.wrHdr = true
}
zw.wr.WriteBitsBE64(endMagic, 48)
zw.wr.WriteBitsBE64(uint64(zw.endCRC), 32)
zw.wr.WritePads(0)
}()
var err error
if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil {
zw.err = err
}
if zw.err != nil {
zw.err = errWrap(zw.err, errors.Internal)
return zw.err
}
zw.err = errClosed
return nil
}
func (zw *Writer) encodeBlock(buf []byte) {
zw.blkCRC = zw.crc.val
zw.wr.WriteBitsBE64(blkMagic, 48)
zw.wr.WriteBitsBE64(uint64(zw.blkCRC), 32)
zw.wr.WriteBitsBE64(0, 1)
zw.crc.val = 0
// Step 1: Burrows-Wheeler transformation.
ptr := zw.bwt.Encode(buf)
zw.wr.WriteBitsBE64(uint64(ptr), 24)
// Step 2: Move-to-front transform and run-length encoding.
var dictMap [256]bool
for _, c := range buf {
dictMap[c] = true
}
var dictArr [256]uint8
var bmapLo [16]uint16
dict := dictArr[:0]
bmapHi := uint16(0)
for i, b := range dictMap {
if b {
c := uint8(i)
dict = append(dict, c)
bmapHi |= 1 << (c >> 4)
bmapLo[c>>4] |= 1 << (c & 0xf)
}
}
zw.wr.WriteBits(uint(bmapHi), 16)
for _, m := range bmapLo {
if m > 0 {
zw.wr.WriteBits(uint(m), 16)
}
}
zw.mtf.Init(dict, len(buf))
syms := zw.mtf.Encode(buf)
// Step 3: Prefix encoding.
zw.encodePrefix(syms, len(dict))
}
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) {
numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOB symbols
if numSyms < 3 {
panicf(errors.Internal, "unable to encode EOB marker")
}
syms = append(syms, uint16(numSyms-1)) // EOB marker
// Compute number of prefix trees needed.
numTrees := maxNumTrees
for i, lim := range []int{200, 600, 1200, 2400} {
if len(syms) < lim {
numTrees = minNumTrees + i
break
}
}
// Compute number of block selectors.
numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms
if cap(zw.treeSels) < numSels {
zw.treeSels = make([]uint8, numSels)
}
treeSels := zw.treeSels[:numSels]
for i := range treeSels {
treeSels[i] = uint8(i % numTrees)
}
// Initialize prefix codes.
for i := range zw.codes2D[:numTrees] {
pc := zw.codes2D[i][:numSyms]
for j := range pc {
pc[j] = prefix.PrefixCode{Sym: uint32(j)}
}
zw.codes1D[i] = pc
}
// First cut at assigning prefix trees to each group.
var codes prefix.PrefixCodes
var blkLen, selIdx int
for _, sym := range syms {
if blkLen == 0 {
blkLen = numBlockSyms
codes = zw.codes2D[treeSels[selIdx]][:numSyms]
selIdx++
}
blkLen--
codes[sym].Cnt++
}
// TODO(dsnet): Use K-means to cluster groups to each prefix tree.
// Generate lengths and prefixes based on symbol frequencies.
for i := range zw.trees1D[:numTrees] {
pc := prefix.PrefixCodes(zw.codes2D[i][:numSyms])
pc.SortByCount()
if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil {
errors.Panic(err)
}
pc.SortBySymbol()
}
// Write out information about the trees and tree selectors.
var mtf internal.MoveToFront
zw.wr.WriteBitsBE64(uint64(numTrees), 3)
zw.wr.WriteBitsBE64(uint64(numSels), 15)
zw.treeSelsMTF = append(zw.treeSelsMTF[:0], treeSels...)
mtf.Encode(zw.treeSelsMTF)
for _, sym := range zw.treeSelsMTF {
zw.wr.WriteSymbol(uint(sym), &encSel)
}
zw.wr.WritePrefixCodes(zw.codes1D[:numTrees], zw.trees1D[:numTrees])
// Write out prefix encoded symbols of compressed data.
var tree *prefix.Encoder
blkLen, selIdx = 0, 0
for _, sym := range syms {
if blkLen == 0 {
blkLen = numBlockSyms
tree = &zw.trees1D[treeSels[selIdx]]
selIdx++
}
blkLen--
ok := zw.wr.TryWriteSymbol(uint(sym), tree)
if !ok {
zw.wr.WriteSymbol(uint(sym), tree)
}
}
}