forked from forgejo/forgejo
Dump: add output format tar and output to stdout (#10376)
* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
parent
209b17c4e2
commit
684b7a999f
303 changed files with 301317 additions and 1183 deletions
110
vendor/github.com/dsnet/compress/bzip2/bwt.go
generated
vendored
Normal file
110
vendor/github.com/dsnet/compress/bzip2/bwt.go
generated
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import "github.com/dsnet/compress/bzip2/internal/sais"
|
||||
|
||||
// The Burrows-Wheeler Transform implementation used here is based on the
|
||||
// Suffix Array by Induced Sorting (SA-IS) methodology by Nong, Zhang, and Chan.
|
||||
// This implementation uses the sais algorithm originally written by Yuta Mori.
|
||||
//
|
||||
// The SA-IS algorithm runs in O(n) and outputs a Suffix Array. There is a
|
||||
// mathematical relationship between Suffix Arrays and the Burrows-Wheeler
|
||||
// Transform, such that a SA can be converted to a BWT in O(n) time.
|
||||
//
|
||||
// References:
|
||||
// http://www.hpl.hp.com/techreports/Compaq-DEC/SRC-RR-124.pdf
|
||||
// https://github.com/cscott/compressjs/blob/master/lib/BWT.js
|
||||
// https://www.quora.com/How-can-I-optimize-burrows-wheeler-transform-and-inverse-transform-to-work-in-O-n-time-O-n-space
|
||||
type burrowsWheelerTransform struct {
|
||||
buf []byte
|
||||
sa []int
|
||||
perm []uint32
|
||||
}
|
||||
|
||||
func (bwt *burrowsWheelerTransform) Encode(buf []byte) (ptr int) {
|
||||
if len(buf) == 0 {
|
||||
return -1
|
||||
}
|
||||
|
||||
// TODO(dsnet): Find a way to avoid the duplicate input string method.
|
||||
// We only need to do this because suffix arrays (by definition) only
|
||||
// operate non-wrapped suffixes of a string. On the other hand,
|
||||
// the BWT specifically used in bzip2 operate on a strings that wrap-around
|
||||
// when being sorted.
|
||||
|
||||
// Step 1: Concatenate the input string to itself so that we can use the
|
||||
// suffix array algorithm for bzip2's variant of BWT.
|
||||
n := len(buf)
|
||||
bwt.buf = append(append(bwt.buf[:0], buf...), buf...)
|
||||
if cap(bwt.sa) < 2*n {
|
||||
bwt.sa = make([]int, 2*n)
|
||||
}
|
||||
t := bwt.buf[:2*n]
|
||||
sa := bwt.sa[:2*n]
|
||||
|
||||
// Step 2: Compute the suffix array (SA). The input string, t, will not be
|
||||
// modified, while the results will be written to the output, sa.
|
||||
sais.ComputeSA(t, sa)
|
||||
|
||||
// Step 3: Convert the SA to a BWT. Since ComputeSA does not mutate the
|
||||
// input, we have two copies of the input; in buf and buf2. Thus, we write
|
||||
// the transformation to buf, while using buf2.
|
||||
var j int
|
||||
buf2 := t[n:]
|
||||
for _, i := range sa {
|
||||
if i < n {
|
||||
if i == 0 {
|
||||
ptr = j
|
||||
i = n
|
||||
}
|
||||
buf[j] = buf2[i-1]
|
||||
j++
|
||||
}
|
||||
}
|
||||
return ptr
|
||||
}
|
||||
|
||||
func (bwt *burrowsWheelerTransform) Decode(buf []byte, ptr int) {
|
||||
if len(buf) == 0 {
|
||||
return
|
||||
}
|
||||
|
||||
// Step 1: Compute cumm, where cumm[ch] reports the total number of
|
||||
// characters that precede the character ch in the alphabet.
|
||||
var cumm [256]int
|
||||
for _, v := range buf {
|
||||
cumm[v]++
|
||||
}
|
||||
var sum int
|
||||
for i, v := range cumm {
|
||||
cumm[i] = sum
|
||||
sum += v
|
||||
}
|
||||
|
||||
// Step 2: Compute perm, where perm[ptr] contains a pointer to the next
|
||||
// byte in buf and the next pointer in perm itself.
|
||||
if cap(bwt.perm) < len(buf) {
|
||||
bwt.perm = make([]uint32, len(buf))
|
||||
}
|
||||
perm := bwt.perm[:len(buf)]
|
||||
for i, b := range buf {
|
||||
perm[cumm[b]] = uint32(i)
|
||||
cumm[b]++
|
||||
}
|
||||
|
||||
// Step 3: Follow each pointer in perm to the next byte, starting with the
|
||||
// origin pointer.
|
||||
if cap(bwt.buf) < len(buf) {
|
||||
bwt.buf = make([]byte, len(buf))
|
||||
}
|
||||
buf2 := bwt.buf[:len(buf)]
|
||||
i := perm[ptr]
|
||||
for j := range buf2 {
|
||||
buf2[j] = buf[i]
|
||||
i = perm[i]
|
||||
}
|
||||
copy(buf, buf2)
|
||||
}
|
110
vendor/github.com/dsnet/compress/bzip2/common.go
generated
vendored
Normal file
110
vendor/github.com/dsnet/compress/bzip2/common.go
generated
vendored
Normal file
|
@ -0,0 +1,110 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package bzip2 implements the BZip2 compressed data format.
|
||||
//
|
||||
// Canonical C implementation:
|
||||
// http://bzip.org
|
||||
//
|
||||
// Unofficial format specification:
|
||||
// https://github.com/dsnet/compress/blob/master/doc/bzip2-format.pdf
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
)
|
||||
|
||||
// There does not exist a formal specification of the BZip2 format. As such,
|
||||
// much of this work is derived by either reverse engineering the original C
|
||||
// source code or using secondary sources.
|
||||
//
|
||||
// Significant amounts of fuzz testing is done to ensure that outputs from
|
||||
// this package is properly decoded by the C library. Furthermore, we test that
|
||||
// both this package and the C library agree about what inputs are invalid.
|
||||
//
|
||||
// Compression stack:
|
||||
// Run-length encoding 1 (RLE1)
|
||||
// Burrows-Wheeler transform (BWT)
|
||||
// Move-to-front transform (MTF)
|
||||
// Run-length encoding 2 (RLE2)
|
||||
// Prefix encoding (PE)
|
||||
//
|
||||
// References:
|
||||
// http://bzip.org/
|
||||
// https://en.wikipedia.org/wiki/Bzip2
|
||||
// https://code.google.com/p/jbzip2/
|
||||
|
||||
const (
|
||||
BestSpeed = 1
|
||||
BestCompression = 9
|
||||
DefaultCompression = 6
|
||||
)
|
||||
|
||||
const (
|
||||
hdrMagic = 0x425a // Hex of "BZ"
|
||||
blkMagic = 0x314159265359 // BCD of PI
|
||||
endMagic = 0x177245385090 // BCD of sqrt(PI)
|
||||
|
||||
blockSize = 100000
|
||||
)
|
||||
|
||||
func errorf(c int, f string, a ...interface{}) error {
|
||||
return errors.Error{Code: c, Pkg: "bzip2", Msg: fmt.Sprintf(f, a...)}
|
||||
}
|
||||
|
||||
func panicf(c int, f string, a ...interface{}) {
|
||||
errors.Panic(errorf(c, f, a...))
|
||||
}
|
||||
|
||||
// errWrap converts a lower-level errors.Error to be one from this package.
|
||||
// The replaceCode passed in will be used to replace the code for any errors
|
||||
// with the errors.Invalid code.
|
||||
//
|
||||
// For the Reader, set this to errors.Corrupted.
|
||||
// For the Writer, set this to errors.Internal.
|
||||
func errWrap(err error, replaceCode int) error {
|
||||
if cerr, ok := err.(errors.Error); ok {
|
||||
if errors.IsInvalid(cerr) {
|
||||
cerr.Code = replaceCode
|
||||
}
|
||||
err = errorf(cerr.Code, "%s", cerr.Msg)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
var errClosed = errorf(errors.Closed, "")
|
||||
|
||||
// crc computes the CRC-32 used by BZip2.
|
||||
//
|
||||
// The CRC-32 computation in bzip2 treats bytes as having bits in big-endian
|
||||
// order. That is, the MSB is read before the LSB. Thus, we can use the
|
||||
// standard library version of CRC-32 IEEE with some minor adjustments.
|
||||
//
|
||||
// The byte array is used as an intermediate buffer to swap the bits of every
|
||||
// byte of the input.
|
||||
type crc struct {
|
||||
val uint32
|
||||
buf [256]byte
|
||||
}
|
||||
|
||||
// update computes the CRC-32 of appending buf to c.
|
||||
func (c *crc) update(buf []byte) {
|
||||
cval := internal.ReverseUint32(c.val)
|
||||
for len(buf) > 0 {
|
||||
n := len(buf)
|
||||
if n > len(c.buf) {
|
||||
n = len(c.buf)
|
||||
}
|
||||
for i, b := range buf[:n] {
|
||||
c.buf[i] = internal.ReverseLUT[b]
|
||||
}
|
||||
cval = crc32.Update(cval, crc32.IEEETable, c.buf[:n])
|
||||
buf = buf[n:]
|
||||
}
|
||||
c.val = internal.ReverseUint32(cval)
|
||||
}
|
13
vendor/github.com/dsnet/compress/bzip2/fuzz_off.go
generated
vendored
Normal file
13
vendor/github.com/dsnet/compress/bzip2/fuzz_off.go
generated
vendored
Normal file
|
@ -0,0 +1,13 @@
|
|||
// Copyright 2016, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build !gofuzz
|
||||
|
||||
// This file exists to suppress fuzzing details from release builds.
|
||||
|
||||
package bzip2
|
||||
|
||||
type fuzzReader struct{}
|
||||
|
||||
func (*fuzzReader) updateChecksum(int64, uint32) {}
|
77
vendor/github.com/dsnet/compress/bzip2/fuzz_on.go
generated
vendored
Normal file
77
vendor/github.com/dsnet/compress/bzip2/fuzz_on.go
generated
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
// Copyright 2016, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// +build gofuzz
|
||||
|
||||
// This file exists to export internal implementation details for fuzz testing.
|
||||
|
||||
package bzip2
|
||||
|
||||
func ForwardBWT(buf []byte) (ptr int) {
|
||||
var bwt burrowsWheelerTransform
|
||||
return bwt.Encode(buf)
|
||||
}
|
||||
|
||||
func ReverseBWT(buf []byte, ptr int) {
|
||||
var bwt burrowsWheelerTransform
|
||||
bwt.Decode(buf, ptr)
|
||||
}
|
||||
|
||||
type fuzzReader struct {
|
||||
Checksums Checksums
|
||||
}
|
||||
|
||||
// updateChecksum updates Checksums.
|
||||
//
|
||||
// If a valid pos is provided, it appends the (pos, val) pair to the slice.
|
||||
// Otherwise, it will update the last record with the new value.
|
||||
func (fr *fuzzReader) updateChecksum(pos int64, val uint32) {
|
||||
if pos >= 0 {
|
||||
fr.Checksums = append(fr.Checksums, Checksum{pos, val})
|
||||
} else {
|
||||
fr.Checksums[len(fr.Checksums)-1].Value = val
|
||||
}
|
||||
}
|
||||
|
||||
type Checksum struct {
|
||||
Offset int64 // Bit offset of the checksum
|
||||
Value uint32 // Checksum value
|
||||
}
|
||||
|
||||
type Checksums []Checksum
|
||||
|
||||
// Apply overwrites all checksum fields in d with the ones in cs.
|
||||
func (cs Checksums) Apply(d []byte) []byte {
|
||||
d = append([]byte(nil), d...)
|
||||
for _, c := range cs {
|
||||
setU32(d, c.Offset, c.Value)
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func setU32(d []byte, pos int64, val uint32) {
|
||||
for i := uint(0); i < 32; i++ {
|
||||
bpos := uint64(pos) + uint64(i)
|
||||
d[bpos/8] &= ^byte(1 << (7 - bpos%8))
|
||||
d[bpos/8] |= byte(val>>(31-i)) << (7 - bpos%8)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify checks that all checksum fields in d matches those in cs.
|
||||
func (cs Checksums) Verify(d []byte) bool {
|
||||
for _, c := range cs {
|
||||
if getU32(d, c.Offset) != c.Value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func getU32(d []byte, pos int64) (val uint32) {
|
||||
for i := uint(0); i < 32; i++ {
|
||||
bpos := uint64(pos) + uint64(i)
|
||||
val |= (uint32(d[bpos/8] >> (7 - bpos%8))) << (31 - i)
|
||||
}
|
||||
return val
|
||||
}
|
28
vendor/github.com/dsnet/compress/bzip2/internal/sais/common.go
generated
vendored
Normal file
28
vendor/github.com/dsnet/compress/bzip2/internal/sais/common.go
generated
vendored
Normal file
|
@ -0,0 +1,28 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Package sais implements a linear time suffix array algorithm.
|
||||
package sais
|
||||
|
||||
//go:generate go run sais_gen.go byte sais_byte.go
|
||||
//go:generate go run sais_gen.go int sais_int.go
|
||||
|
||||
// This package ports the C sais implementation by Yuta Mori. The ports are
|
||||
// located in sais_byte.go and sais_int.go, which are identical to each other
|
||||
// except for the types. Since Go does not support generics, we use generators to
|
||||
// create the two files.
|
||||
//
|
||||
// References:
|
||||
// https://sites.google.com/site/yuta256/sais
|
||||
// https://www.researchgate.net/publication/221313676_Linear_Time_Suffix_Array_Construction_Using_D-Critical_Substrings
|
||||
// https://www.researchgate.net/publication/224176324_Two_Efficient_Algorithms_for_Linear_Time_Suffix_Array_Construction
|
||||
|
||||
// ComputeSA computes the suffix array of t and places the result in sa.
|
||||
// Both t and sa must be the same length.
|
||||
func ComputeSA(t []byte, sa []int) {
|
||||
if len(sa) != len(t) {
|
||||
panic("mismatching sizes")
|
||||
}
|
||||
computeSA_byte(t, sa, 0, len(t), 256)
|
||||
}
|
661
vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_byte.go
generated
vendored
Normal file
661
vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_byte.go
generated
vendored
Normal file
|
@ -0,0 +1,661 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Code generated by sais_gen.go. DO NOT EDIT.
|
||||
|
||||
// ====================================================
|
||||
// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
// ====================================================
|
||||
|
||||
package sais
|
||||
|
||||
func getCounts_byte(T []byte, C []int, n, k int) {
|
||||
var i int
|
||||
for i = 0; i < k; i++ {
|
||||
C[i] = 0
|
||||
}
|
||||
for i = 0; i < n; i++ {
|
||||
C[T[i]]++
|
||||
}
|
||||
}
|
||||
|
||||
func getBuckets_byte(C, B []int, k int, end bool) {
|
||||
var i, sum int
|
||||
if end {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum
|
||||
}
|
||||
} else {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum - C[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortLMS1_byte(T []byte, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
b--
|
||||
if int(T[j]) > c1 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS1_byte(T []byte, SA []int, n, m int) int {
|
||||
var i, j, p, q, plen, qlen, name int
|
||||
var c0, c1 int
|
||||
var diff bool
|
||||
|
||||
// Compact all the sorted substrings into the first m items of SA.
|
||||
// 2*m must be not larger than n (provable).
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
SA[i] = ^SA[i]
|
||||
}
|
||||
if i < m {
|
||||
for j, i = i, i+1; ; i++ {
|
||||
if p = SA[i]; p < 0 {
|
||||
SA[j] = ^p
|
||||
j++
|
||||
SA[i] = 0
|
||||
if j == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the length of all substrings.
|
||||
i = n - 1
|
||||
j = n - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
SA[m+((i+1)>>1)] = j - i
|
||||
j = i + 1
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the lexicographic names of all substrings.
|
||||
name = 0
|
||||
qlen = 0
|
||||
for i, q = 0, n; i < m; i++ {
|
||||
p = SA[i]
|
||||
plen = SA[m+(p>>1)]
|
||||
diff = true
|
||||
if (plen == qlen) && ((q + plen) < n) {
|
||||
for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
|
||||
}
|
||||
if j == plen {
|
||||
diff = false
|
||||
}
|
||||
}
|
||||
if diff {
|
||||
name++
|
||||
q = p
|
||||
qlen = plen
|
||||
}
|
||||
SA[m+(p>>1)] = name
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func sortLMS2_byte(T []byte, SA, C, B, D []int, n, k int) {
|
||||
var b, i, j, t, d int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
getBuckets_byte(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
t = 1
|
||||
} else {
|
||||
t = 0
|
||||
}
|
||||
j += n
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i, d = 0, 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) < c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
for i = n - 1; 0 <= i; i-- {
|
||||
if SA[i] > 0 {
|
||||
if SA[i] < n {
|
||||
SA[i] += n
|
||||
for j = i - 1; SA[j] < n; j-- {
|
||||
}
|
||||
SA[j] -= n
|
||||
i = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i, d = n-1, d+1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) > c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
b--
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS2_byte(SA []int, n, m int) int {
|
||||
var i, j, d, name int
|
||||
|
||||
// Compact all the sorted LMS substrings into the first m items of SA.
|
||||
name = 0
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
j = ^SA[i]
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[i] = j
|
||||
}
|
||||
if i < m {
|
||||
for d, i = i, i+1; ; i++ {
|
||||
if j = SA[i]; j < 0 {
|
||||
j = ^j
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[d] = j
|
||||
d++
|
||||
SA[i] = 0
|
||||
if d == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if name < m {
|
||||
// Store the lexicographic names.
|
||||
for i, d = m-1, name+1; 0 <= i; i-- {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
d--
|
||||
}
|
||||
SA[m+(j>>1)] = d
|
||||
}
|
||||
} else {
|
||||
// Unset flags.
|
||||
for i = 0; i < m; i++ {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
SA[i] = j
|
||||
}
|
||||
}
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func induceSA_byte(T []byte, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
j = SA[i]
|
||||
SA[i] = ^j
|
||||
if j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
b--
|
||||
if (j == 0) || (int(T[j-1]) > c1) {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
} else {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeSA_byte(T []byte, SA []int, fs, n, k int) {
|
||||
const (
|
||||
minBucketSize = 512
|
||||
sortLMS2Limit = 0x3fffffff
|
||||
)
|
||||
|
||||
var C, B, D, RA []int
|
||||
var bo int // Offset of B relative to SA
|
||||
var b, i, j, m, p, q, name, newfs int
|
||||
var c0, c1 int
|
||||
var flags uint
|
||||
|
||||
if k <= minBucketSize {
|
||||
C = make([]int, k)
|
||||
if k <= fs {
|
||||
bo = n + fs - k
|
||||
B = SA[bo:]
|
||||
flags = 1
|
||||
} else {
|
||||
B = make([]int, k)
|
||||
flags = 3
|
||||
}
|
||||
} else if k <= fs {
|
||||
C = SA[n+fs-k:]
|
||||
if k <= fs-k {
|
||||
bo = n + fs - 2*k
|
||||
B = SA[bo:]
|
||||
flags = 0
|
||||
} else if k <= 4*minBucketSize {
|
||||
B = make([]int, k)
|
||||
flags = 2
|
||||
} else {
|
||||
B = C
|
||||
flags = 8
|
||||
}
|
||||
} else {
|
||||
C = make([]int, k)
|
||||
B = C
|
||||
flags = 4 | 8
|
||||
}
|
||||
if n <= sortLMS2Limit && 2 <= (n/k) {
|
||||
if flags&1 > 0 {
|
||||
if 2*k <= fs-k {
|
||||
flags |= 32
|
||||
} else {
|
||||
flags |= 16
|
||||
}
|
||||
} else if flags == 0 && 2*k <= (fs-2*k) {
|
||||
flags |= 32
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 1: Reduce the problem by at least 1/2.
|
||||
// Sort all the LMS-substrings.
|
||||
getCounts_byte(T, C, n, k)
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
for i = 0; i < n; i++ {
|
||||
SA[i] = 0
|
||||
}
|
||||
b = -1
|
||||
i = n - 1
|
||||
j = n
|
||||
m = 0
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
if b >= 0 {
|
||||
SA[b] = j
|
||||
}
|
||||
B[c1]--
|
||||
b = B[c1]
|
||||
j = i
|
||||
m++
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if m > 1 {
|
||||
if flags&(16|32) > 0 {
|
||||
if flags&16 > 0 {
|
||||
D = make([]int, 2*k)
|
||||
} else {
|
||||
D = SA[bo-2*k:]
|
||||
}
|
||||
B[T[j+1]]++
|
||||
for i, j = 0, 0; i < k; i++ {
|
||||
j += C[i]
|
||||
if B[i] != j {
|
||||
SA[B[i]] += n
|
||||
}
|
||||
D[i] = 0
|
||||
D[i+k] = 0
|
||||
}
|
||||
sortLMS2_byte(T, SA, C, B, D, n, k)
|
||||
name = postProcLMS2_byte(SA, n, m)
|
||||
} else {
|
||||
sortLMS1_byte(T, SA, C, B, n, k)
|
||||
name = postProcLMS1_byte(T, SA, n, m)
|
||||
}
|
||||
} else if m == 1 {
|
||||
SA[b] = j + 1
|
||||
name = 1
|
||||
} else {
|
||||
name = 0
|
||||
}
|
||||
|
||||
// Stage 2: Solve the reduced problem.
|
||||
// Recurse if names are not yet unique.
|
||||
if name < m {
|
||||
newfs = n + fs - 2*m
|
||||
if flags&(1|4|8) == 0 {
|
||||
if k+name <= newfs {
|
||||
newfs -= k
|
||||
} else {
|
||||
flags |= 8
|
||||
}
|
||||
}
|
||||
RA = SA[m+newfs:]
|
||||
for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
|
||||
if SA[i] != 0 {
|
||||
RA[j] = SA[i] - 1
|
||||
j--
|
||||
}
|
||||
}
|
||||
computeSA_int(RA, SA, newfs, m, name)
|
||||
|
||||
i = n - 1
|
||||
j = m - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
RA[j] = i + 1
|
||||
j--
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for i = 0; i < m; i++ {
|
||||
SA[i] = RA[SA[i]]
|
||||
}
|
||||
if flags&4 > 0 {
|
||||
B = make([]int, k)
|
||||
C = B
|
||||
}
|
||||
if flags&2 > 0 {
|
||||
B = make([]int, k)
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 3: Induce the result for the original problem.
|
||||
if flags&8 > 0 {
|
||||
getCounts_byte(T, C, n, k)
|
||||
}
|
||||
// Put all left-most S characters into their buckets.
|
||||
if m > 1 {
|
||||
getBuckets_byte(C, B, k, true) // Find ends of buckets
|
||||
i = m - 1
|
||||
j = n
|
||||
p = SA[m-1]
|
||||
c1 = int(T[p])
|
||||
for {
|
||||
c0 = c1
|
||||
q = B[c0]
|
||||
for q < j {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
for {
|
||||
j--
|
||||
SA[j] = p
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
p = SA[i]
|
||||
if c1 = int(T[p]); c1 != c0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for j > 0 {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
}
|
||||
induceSA_byte(T, SA, C, B, n, k)
|
||||
}
|
661
vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_int.go
generated
vendored
Normal file
661
vendor/github.com/dsnet/compress/bzip2/internal/sais/sais_int.go
generated
vendored
Normal file
|
@ -0,0 +1,661 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
// Code generated by sais_gen.go. DO NOT EDIT.
|
||||
|
||||
// ====================================================
|
||||
// Copyright (c) 2008-2010 Yuta Mori All Rights Reserved.
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person
|
||||
// obtaining a copy of this software and associated documentation
|
||||
// files (the "Software"), to deal in the Software without
|
||||
// restriction, including without limitation the rights to use,
|
||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
// copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following
|
||||
// conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be
|
||||
// included in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
// OTHER DEALINGS IN THE SOFTWARE.
|
||||
// ====================================================
|
||||
|
||||
package sais
|
||||
|
||||
func getCounts_int(T []int, C []int, n, k int) {
|
||||
var i int
|
||||
for i = 0; i < k; i++ {
|
||||
C[i] = 0
|
||||
}
|
||||
for i = 0; i < n; i++ {
|
||||
C[T[i]]++
|
||||
}
|
||||
}
|
||||
|
||||
func getBuckets_int(C, B []int, k int, end bool) {
|
||||
var i, sum int
|
||||
if end {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum
|
||||
}
|
||||
} else {
|
||||
for i = 0; i < k; i++ {
|
||||
sum += C[i]
|
||||
B[i] = sum - C[i]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func sortLMS1_int(T []int, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
b--
|
||||
if int(T[j]) > c1 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS1_int(T []int, SA []int, n, m int) int {
|
||||
var i, j, p, q, plen, qlen, name int
|
||||
var c0, c1 int
|
||||
var diff bool
|
||||
|
||||
// Compact all the sorted substrings into the first m items of SA.
|
||||
// 2*m must be not larger than n (provable).
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
SA[i] = ^SA[i]
|
||||
}
|
||||
if i < m {
|
||||
for j, i = i, i+1; ; i++ {
|
||||
if p = SA[i]; p < 0 {
|
||||
SA[j] = ^p
|
||||
j++
|
||||
SA[i] = 0
|
||||
if j == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Store the length of all substrings.
|
||||
i = n - 1
|
||||
j = n - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
SA[m+((i+1)>>1)] = j - i
|
||||
j = i + 1
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Find the lexicographic names of all substrings.
|
||||
name = 0
|
||||
qlen = 0
|
||||
for i, q = 0, n; i < m; i++ {
|
||||
p = SA[i]
|
||||
plen = SA[m+(p>>1)]
|
||||
diff = true
|
||||
if (plen == qlen) && ((q + plen) < n) {
|
||||
for j = 0; (j < plen) && (T[p+j] == T[q+j]); j++ {
|
||||
}
|
||||
if j == plen {
|
||||
diff = false
|
||||
}
|
||||
}
|
||||
if diff {
|
||||
name++
|
||||
q = p
|
||||
qlen = plen
|
||||
}
|
||||
SA[m+(p>>1)] = name
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func sortLMS2_int(T []int, SA, C, B, D []int, n, k int) {
|
||||
var b, i, j, t, d int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
getBuckets_int(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
j--
|
||||
if int(T[j]) < c1 {
|
||||
t = 1
|
||||
} else {
|
||||
t = 0
|
||||
}
|
||||
j += n
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i, d = 0, 0; i < n; i++ {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) < c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
SA[i] = 0
|
||||
} else if j < 0 {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
for i = n - 1; 0 <= i; i-- {
|
||||
if SA[i] > 0 {
|
||||
if SA[i] < n {
|
||||
SA[i] += n
|
||||
for j = i - 1; SA[j] < n; j-- {
|
||||
}
|
||||
SA[j] -= n
|
||||
i = j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i, d = n-1, d+1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
if n <= j {
|
||||
d += 1
|
||||
j -= n
|
||||
}
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
j--
|
||||
t = int(c0) << 1
|
||||
if int(T[j]) > c1 {
|
||||
t |= 1
|
||||
}
|
||||
if D[t] != d {
|
||||
j += n
|
||||
D[t] = d
|
||||
}
|
||||
b--
|
||||
if t&1 > 0 {
|
||||
SA[b] = ^(j + 1)
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
SA[i] = 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func postProcLMS2_int(SA []int, n, m int) int {
|
||||
var i, j, d, name int
|
||||
|
||||
// Compact all the sorted LMS substrings into the first m items of SA.
|
||||
name = 0
|
||||
for i = 0; SA[i] < 0; i++ {
|
||||
j = ^SA[i]
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[i] = j
|
||||
}
|
||||
if i < m {
|
||||
for d, i = i, i+1; ; i++ {
|
||||
if j = SA[i]; j < 0 {
|
||||
j = ^j
|
||||
if n <= j {
|
||||
name += 1
|
||||
}
|
||||
SA[d] = j
|
||||
d++
|
||||
SA[i] = 0
|
||||
if d == m {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if name < m {
|
||||
// Store the lexicographic names.
|
||||
for i, d = m-1, name+1; 0 <= i; i-- {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
d--
|
||||
}
|
||||
SA[m+(j>>1)] = d
|
||||
}
|
||||
} else {
|
||||
// Unset flags.
|
||||
for i = 0; i < m; i++ {
|
||||
if j = SA[i]; n <= j {
|
||||
j -= n
|
||||
SA[i] = j
|
||||
}
|
||||
}
|
||||
}
|
||||
return name
|
||||
}
|
||||
|
||||
func induceSA_int(T []int, SA, C, B []int, n, k int) {
|
||||
var b, i, j int
|
||||
var c0, c1 int
|
||||
|
||||
// Compute SAl.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, false) // Find starts of buckets
|
||||
j = n - 1
|
||||
c1 = int(T[j])
|
||||
b = B[c1]
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
for i = 0; i < n; i++ {
|
||||
j = SA[i]
|
||||
SA[i] = ^j
|
||||
if j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
if j > 0 && int(T[j-1]) < c1 {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
b++
|
||||
}
|
||||
}
|
||||
|
||||
// Compute SAs.
|
||||
if &C[0] == &B[0] {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
c1 = 0
|
||||
b = B[c1]
|
||||
for i = n - 1; i >= 0; i-- {
|
||||
if j = SA[i]; j > 0 {
|
||||
j--
|
||||
if c0 = int(T[j]); c0 != c1 {
|
||||
B[c1] = b
|
||||
c1 = c0
|
||||
b = B[c1]
|
||||
}
|
||||
b--
|
||||
if (j == 0) || (int(T[j-1]) > c1) {
|
||||
SA[b] = ^j
|
||||
} else {
|
||||
SA[b] = j
|
||||
}
|
||||
} else {
|
||||
SA[i] = ^j
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func computeSA_int(T []int, SA []int, fs, n, k int) {
|
||||
const (
|
||||
minBucketSize = 512
|
||||
sortLMS2Limit = 0x3fffffff
|
||||
)
|
||||
|
||||
var C, B, D, RA []int
|
||||
var bo int // Offset of B relative to SA
|
||||
var b, i, j, m, p, q, name, newfs int
|
||||
var c0, c1 int
|
||||
var flags uint
|
||||
|
||||
if k <= minBucketSize {
|
||||
C = make([]int, k)
|
||||
if k <= fs {
|
||||
bo = n + fs - k
|
||||
B = SA[bo:]
|
||||
flags = 1
|
||||
} else {
|
||||
B = make([]int, k)
|
||||
flags = 3
|
||||
}
|
||||
} else if k <= fs {
|
||||
C = SA[n+fs-k:]
|
||||
if k <= fs-k {
|
||||
bo = n + fs - 2*k
|
||||
B = SA[bo:]
|
||||
flags = 0
|
||||
} else if k <= 4*minBucketSize {
|
||||
B = make([]int, k)
|
||||
flags = 2
|
||||
} else {
|
||||
B = C
|
||||
flags = 8
|
||||
}
|
||||
} else {
|
||||
C = make([]int, k)
|
||||
B = C
|
||||
flags = 4 | 8
|
||||
}
|
||||
if n <= sortLMS2Limit && 2 <= (n/k) {
|
||||
if flags&1 > 0 {
|
||||
if 2*k <= fs-k {
|
||||
flags |= 32
|
||||
} else {
|
||||
flags |= 16
|
||||
}
|
||||
} else if flags == 0 && 2*k <= (fs-2*k) {
|
||||
flags |= 32
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 1: Reduce the problem by at least 1/2.
|
||||
// Sort all the LMS-substrings.
|
||||
getCounts_int(T, C, n, k)
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
for i = 0; i < n; i++ {
|
||||
SA[i] = 0
|
||||
}
|
||||
b = -1
|
||||
i = n - 1
|
||||
j = n
|
||||
m = 0
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
if b >= 0 {
|
||||
SA[b] = j
|
||||
}
|
||||
B[c1]--
|
||||
b = B[c1]
|
||||
j = i
|
||||
m++
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if m > 1 {
|
||||
if flags&(16|32) > 0 {
|
||||
if flags&16 > 0 {
|
||||
D = make([]int, 2*k)
|
||||
} else {
|
||||
D = SA[bo-2*k:]
|
||||
}
|
||||
B[T[j+1]]++
|
||||
for i, j = 0, 0; i < k; i++ {
|
||||
j += C[i]
|
||||
if B[i] != j {
|
||||
SA[B[i]] += n
|
||||
}
|
||||
D[i] = 0
|
||||
D[i+k] = 0
|
||||
}
|
||||
sortLMS2_int(T, SA, C, B, D, n, k)
|
||||
name = postProcLMS2_int(SA, n, m)
|
||||
} else {
|
||||
sortLMS1_int(T, SA, C, B, n, k)
|
||||
name = postProcLMS1_int(T, SA, n, m)
|
||||
}
|
||||
} else if m == 1 {
|
||||
SA[b] = j + 1
|
||||
name = 1
|
||||
} else {
|
||||
name = 0
|
||||
}
|
||||
|
||||
// Stage 2: Solve the reduced problem.
|
||||
// Recurse if names are not yet unique.
|
||||
if name < m {
|
||||
newfs = n + fs - 2*m
|
||||
if flags&(1|4|8) == 0 {
|
||||
if k+name <= newfs {
|
||||
newfs -= k
|
||||
} else {
|
||||
flags |= 8
|
||||
}
|
||||
}
|
||||
RA = SA[m+newfs:]
|
||||
for i, j = m+(n>>1)-1, m-1; m <= i; i-- {
|
||||
if SA[i] != 0 {
|
||||
RA[j] = SA[i] - 1
|
||||
j--
|
||||
}
|
||||
}
|
||||
computeSA_int(RA, SA, newfs, m, name)
|
||||
|
||||
i = n - 1
|
||||
j = m - 1
|
||||
c0 = int(T[n-1])
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for i >= 0 {
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 > c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i >= 0 {
|
||||
RA[j] = i + 1
|
||||
j--
|
||||
for {
|
||||
c1 = c0
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
if c0 = int(T[i]); c0 < c1 {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for i = 0; i < m; i++ {
|
||||
SA[i] = RA[SA[i]]
|
||||
}
|
||||
if flags&4 > 0 {
|
||||
B = make([]int, k)
|
||||
C = B
|
||||
}
|
||||
if flags&2 > 0 {
|
||||
B = make([]int, k)
|
||||
}
|
||||
}
|
||||
|
||||
// Stage 3: Induce the result for the original problem.
|
||||
if flags&8 > 0 {
|
||||
getCounts_int(T, C, n, k)
|
||||
}
|
||||
// Put all left-most S characters into their buckets.
|
||||
if m > 1 {
|
||||
getBuckets_int(C, B, k, true) // Find ends of buckets
|
||||
i = m - 1
|
||||
j = n
|
||||
p = SA[m-1]
|
||||
c1 = int(T[p])
|
||||
for {
|
||||
c0 = c1
|
||||
q = B[c0]
|
||||
for q < j {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
for {
|
||||
j--
|
||||
SA[j] = p
|
||||
if i--; i < 0 {
|
||||
break
|
||||
}
|
||||
p = SA[i]
|
||||
if c1 = int(T[p]); c1 != c0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
if i < 0 {
|
||||
break
|
||||
}
|
||||
}
|
||||
for j > 0 {
|
||||
j--
|
||||
SA[j] = 0
|
||||
}
|
||||
}
|
||||
induceSA_int(T, SA, C, B, n, k)
|
||||
}
|
131
vendor/github.com/dsnet/compress/bzip2/mtf_rle2.go
generated
vendored
Normal file
131
vendor/github.com/dsnet/compress/bzip2/mtf_rle2.go
generated
vendored
Normal file
|
@ -0,0 +1,131 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import "github.com/dsnet/compress/internal/errors"
|
||||
|
||||
// moveToFront implements both the MTF and RLE stages of bzip2 at the same time.
|
||||
// Any runs of zeros in the encoded output will be replaced by a sequence of
|
||||
// RUNA and RUNB symbols are encode the length of the run.
|
||||
//
|
||||
// The RLE encoding used can actually be encoded to and decoded from using
|
||||
// normal two's complement arithmetic. The methodology for doing so is below.
|
||||
//
|
||||
// Assuming the following:
|
||||
// num: The value being encoded by RLE encoding.
|
||||
// run: A sequence of RUNA and RUNB symbols represented as a binary integer,
|
||||
// where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN
|
||||
// symbols are at the least-significant bit positions.
|
||||
// cnt: The number of RUNA and RUNB symbols.
|
||||
//
|
||||
// Then the RLE encoding used by bzip2 has this mathematical property:
|
||||
// num+1 == (1<<cnt) | run
|
||||
type moveToFront struct {
|
||||
dictBuf [256]uint8
|
||||
dictLen int
|
||||
|
||||
vals []byte
|
||||
syms []uint16
|
||||
blkSize int
|
||||
}
|
||||
|
||||
func (mtf *moveToFront) Init(dict []uint8, blkSize int) {
|
||||
if len(dict) > len(mtf.dictBuf) {
|
||||
panicf(errors.Internal, "alphabet too large")
|
||||
}
|
||||
copy(mtf.dictBuf[:], dict)
|
||||
mtf.dictLen = len(dict)
|
||||
mtf.blkSize = blkSize
|
||||
}
|
||||
|
||||
func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) {
|
||||
dict := mtf.dictBuf[:mtf.dictLen]
|
||||
syms = mtf.syms[:0]
|
||||
|
||||
if len(vals) > mtf.blkSize {
|
||||
panicf(errors.Internal, "exceeded block size")
|
||||
}
|
||||
|
||||
var lastNum uint32
|
||||
for _, val := range vals {
|
||||
// Normal move-to-front transform.
|
||||
var idx uint8 // Reverse lookup idx in dict
|
||||
for di, dv := range dict {
|
||||
if dv == val {
|
||||
idx = uint8(di)
|
||||
break
|
||||
}
|
||||
}
|
||||
copy(dict[1:], dict[:idx])
|
||||
dict[0] = val
|
||||
|
||||
// Run-length encoding augmentation.
|
||||
if idx == 0 {
|
||||
lastNum++
|
||||
continue
|
||||
}
|
||||
if lastNum > 0 {
|
||||
for rc := lastNum + 1; rc != 1; rc >>= 1 {
|
||||
syms = append(syms, uint16(rc&1))
|
||||
}
|
||||
lastNum = 0
|
||||
}
|
||||
syms = append(syms, uint16(idx)+1)
|
||||
}
|
||||
if lastNum > 0 {
|
||||
for rc := lastNum + 1; rc != 1; rc >>= 1 {
|
||||
syms = append(syms, uint16(rc&1))
|
||||
}
|
||||
}
|
||||
mtf.syms = syms
|
||||
return syms
|
||||
}
|
||||
|
||||
func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) {
|
||||
dict := mtf.dictBuf[:mtf.dictLen]
|
||||
vals = mtf.vals[:0]
|
||||
|
||||
var lastCnt uint
|
||||
var lastRun uint32
|
||||
for _, sym := range syms {
|
||||
// Run-length encoding augmentation.
|
||||
if sym < 2 {
|
||||
lastRun |= uint32(sym) << lastCnt
|
||||
lastCnt++
|
||||
continue
|
||||
}
|
||||
if lastCnt > 0 {
|
||||
cnt := int((1<<lastCnt)|lastRun) - 1
|
||||
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
|
||||
panicf(errors.Corrupted, "run-length decoding exceeded block size")
|
||||
}
|
||||
for i := cnt; i > 0; i-- {
|
||||
vals = append(vals, dict[0])
|
||||
}
|
||||
lastCnt, lastRun = 0, 0
|
||||
}
|
||||
|
||||
// Normal move-to-front transform.
|
||||
val := dict[sym-1] // Forward lookup val in dict
|
||||
copy(dict[1:], dict[:sym-1])
|
||||
dict[0] = val
|
||||
|
||||
if len(vals) >= mtf.blkSize {
|
||||
panicf(errors.Corrupted, "run-length decoding exceeded block size")
|
||||
}
|
||||
vals = append(vals, val)
|
||||
}
|
||||
if lastCnt > 0 {
|
||||
cnt := int((1<<lastCnt)|lastRun) - 1
|
||||
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
|
||||
panicf(errors.Corrupted, "run-length decoding exceeded block size")
|
||||
}
|
||||
for i := cnt; i > 0; i-- {
|
||||
vals = append(vals, dict[0])
|
||||
}
|
||||
}
|
||||
mtf.vals = vals
|
||||
return vals
|
||||
}
|
374
vendor/github.com/dsnet/compress/bzip2/prefix.go
generated
vendored
Normal file
374
vendor/github.com/dsnet/compress/bzip2/prefix.go
generated
vendored
Normal file
|
@ -0,0 +1,374 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
"github.com/dsnet/compress/internal/prefix"
|
||||
)
|
||||
|
||||
const (
|
||||
minNumTrees = 2
|
||||
maxNumTrees = 6
|
||||
|
||||
maxPrefixBits = 20 // Maximum bit-width of a prefix code
|
||||
maxNumSyms = 256 + 2 // Maximum number of symbols in the alphabet
|
||||
numBlockSyms = 50 // Number of bytes in a block
|
||||
)
|
||||
|
||||
// encSel and decSel are used to handle the prefix encoding for tree selectors.
|
||||
// The prefix encoding is as follows:
|
||||
//
|
||||
// Code TreeIdx
|
||||
// 0 <=> 0
|
||||
// 10 <=> 1
|
||||
// 110 <=> 2
|
||||
// 1110 <=> 3
|
||||
// 11110 <=> 4
|
||||
// 111110 <=> 5
|
||||
// 111111 <=> 6 Invalid tree index, so should fail
|
||||
//
|
||||
var encSel, decSel = func() (e prefix.Encoder, d prefix.Decoder) {
|
||||
var selCodes [maxNumTrees + 1]prefix.PrefixCode
|
||||
for i := range selCodes {
|
||||
selCodes[i] = prefix.PrefixCode{Sym: uint32(i), Len: uint32(i + 1)}
|
||||
}
|
||||
selCodes[maxNumTrees] = prefix.PrefixCode{Sym: maxNumTrees, Len: maxNumTrees}
|
||||
prefix.GeneratePrefixes(selCodes[:])
|
||||
e.Init(selCodes[:])
|
||||
d.Init(selCodes[:])
|
||||
return
|
||||
}()
|
||||
|
||||
type prefixReader struct{ prefix.Reader }
|
||||
|
||||
func (pr *prefixReader) Init(r io.Reader) {
|
||||
pr.Reader.Init(r, true)
|
||||
}
|
||||
|
||||
func (pr *prefixReader) ReadBitsBE64(nb uint) uint64 {
|
||||
if nb <= 32 {
|
||||
v := uint32(pr.ReadBits(nb))
|
||||
return uint64(internal.ReverseUint32N(v, nb))
|
||||
}
|
||||
v0 := internal.ReverseUint32(uint32(pr.ReadBits(32)))
|
||||
v1 := internal.ReverseUint32(uint32(pr.ReadBits(nb - 32)))
|
||||
v := uint64(v0)<<32 | uint64(v1)
|
||||
return v >> (64 - nb)
|
||||
}
|
||||
|
||||
func (pr *prefixReader) ReadPrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Decoder) {
|
||||
for i, pc := range codes {
|
||||
clen := int(pr.ReadBitsBE64(5))
|
||||
sum := 1 << maxPrefixBits
|
||||
for sym := range pc {
|
||||
for {
|
||||
if clen < 1 || clen > maxPrefixBits {
|
||||
panicf(errors.Corrupted, "invalid prefix bit-length: %d", clen)
|
||||
}
|
||||
|
||||
b, ok := pr.TryReadBits(1)
|
||||
if !ok {
|
||||
b = pr.ReadBits(1)
|
||||
}
|
||||
if b == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
b, ok = pr.TryReadBits(1)
|
||||
if !ok {
|
||||
b = pr.ReadBits(1)
|
||||
}
|
||||
clen -= int(b*2) - 1 // +1 or -1
|
||||
}
|
||||
pc[sym] = prefix.PrefixCode{Sym: uint32(sym), Len: uint32(clen)}
|
||||
sum -= (1 << maxPrefixBits) >> uint(clen)
|
||||
}
|
||||
|
||||
if sum == 0 {
|
||||
// Fast path, but only handles complete trees.
|
||||
if err := prefix.GeneratePrefixes(pc); err != nil {
|
||||
errors.Panic(err) // Using complete trees; should never fail
|
||||
}
|
||||
} else {
|
||||
// Slow path, but handles anything.
|
||||
pc = handleDegenerateCodes(pc) // Never fails, but may fail later
|
||||
codes[i] = pc
|
||||
}
|
||||
trees[i].Init(pc)
|
||||
}
|
||||
}
|
||||
|
||||
type prefixWriter struct{ prefix.Writer }
|
||||
|
||||
func (pw *prefixWriter) Init(w io.Writer) {
|
||||
pw.Writer.Init(w, true)
|
||||
}
|
||||
|
||||
func (pw *prefixWriter) WriteBitsBE64(v uint64, nb uint) {
|
||||
if nb <= 32 {
|
||||
v := internal.ReverseUint32N(uint32(v), nb)
|
||||
pw.WriteBits(uint(v), nb)
|
||||
return
|
||||
}
|
||||
v <<= (64 - nb)
|
||||
v0 := internal.ReverseUint32(uint32(v >> 32))
|
||||
v1 := internal.ReverseUint32(uint32(v))
|
||||
pw.WriteBits(uint(v0), 32)
|
||||
pw.WriteBits(uint(v1), nb-32)
|
||||
return
|
||||
}
|
||||
|
||||
func (pw *prefixWriter) WritePrefixCodes(codes []prefix.PrefixCodes, trees []prefix.Encoder) {
|
||||
for i, pc := range codes {
|
||||
if err := prefix.GeneratePrefixes(pc); err != nil {
|
||||
errors.Panic(err) // Using complete trees; should never fail
|
||||
}
|
||||
trees[i].Init(pc)
|
||||
|
||||
clen := int(pc[0].Len)
|
||||
pw.WriteBitsBE64(uint64(clen), 5)
|
||||
for _, c := range pc {
|
||||
for int(c.Len) < clen {
|
||||
pw.WriteBits(3, 2) // 11
|
||||
clen--
|
||||
}
|
||||
for int(c.Len) > clen {
|
||||
pw.WriteBits(1, 2) // 10
|
||||
clen++
|
||||
}
|
||||
pw.WriteBits(0, 1)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleDegenerateCodes converts a degenerate tree into a canonical tree.
|
||||
//
|
||||
// For example, when the input is an under-subscribed tree:
|
||||
// input: []PrefixCode{
|
||||
// {Sym: 0, Len: 3},
|
||||
// {Sym: 1, Len: 4},
|
||||
// {Sym: 2, Len: 3},
|
||||
// }
|
||||
// output: []PrefixCode{
|
||||
// {Sym: 0, Len: 3, Val: 0}, // 000
|
||||
// {Sym: 1, Len: 4, Val: 2}, // 0010
|
||||
// {Sym: 2, Len: 3, Val: 4}, // 100
|
||||
// {Sym: 258, Len: 4, Val: 10}, // 1010
|
||||
// {Sym: 259, Len: 3, Val: 6}, // 110
|
||||
// {Sym: 260, Len: 1, Val: 1}, // 1
|
||||
// }
|
||||
//
|
||||
// For example, when the input is an over-subscribed tree:
|
||||
// input: []PrefixCode{
|
||||
// {Sym: 0, Len: 1},
|
||||
// {Sym: 1, Len: 3},
|
||||
// {Sym: 2, Len: 4},
|
||||
// {Sym: 3, Len: 3},
|
||||
// {Sym: 4, Len: 2},
|
||||
// }
|
||||
// output: []PrefixCode{
|
||||
// {Sym: 0, Len: 1, Val: 0}, // 0
|
||||
// {Sym: 1, Len: 3, Val: 3}, // 011
|
||||
// {Sym: 3, Len: 3, Val: 7}, // 111
|
||||
// {Sym: 4, Len: 2, Val: 1}, // 01
|
||||
// }
|
||||
func handleDegenerateCodes(codes prefix.PrefixCodes) prefix.PrefixCodes {
|
||||
// Since there is no formal definition for the BZip2 format, there is no
|
||||
// specification that says that the code lengths must form a complete
|
||||
// prefix tree (IE: it is neither over-subscribed nor under-subscribed).
|
||||
// Thus, the original C implementation becomes the reference for how prefix
|
||||
// decoding is done in these edge cases. Unfortunately, the C version does
|
||||
// not error when an invalid tree is used, but rather allows decoding to
|
||||
// continue and only errors if some bit pattern happens to cause an error.
|
||||
// Thus, it is possible for an invalid tree to end up decoding an input
|
||||
// "properly" so long as invalid bit patterns are not present. In order to
|
||||
// replicate this non-specified behavior, we use a ported version of the
|
||||
// C code to generate the codes as a valid canonical tree by substituting
|
||||
// invalid nodes with invalid symbols.
|
||||
//
|
||||
// ====================================================
|
||||
// This program, "bzip2", the associated library "libbzip2", and all
|
||||
// documentation, are copyright (C) 1996-2010 Julian R Seward. All
|
||||
// rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without
|
||||
// modification, are permitted provided that the following conditions
|
||||
// are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright
|
||||
// notice, this list of conditions and the following disclaimer.
|
||||
//
|
||||
// 2. The origin of this software must not be misrepresented; you must
|
||||
// not claim that you wrote the original software. If you use this
|
||||
// software in a product, an acknowledgment in the product
|
||||
// documentation would be appreciated but is not required.
|
||||
//
|
||||
// 3. Altered source versions must be plainly marked as such, and must
|
||||
// not be misrepresented as being the original software.
|
||||
//
|
||||
// 4. The name of the author may not be used to endorse or promote
|
||||
// products derived from this software without specific prior written
|
||||
// permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
|
||||
// OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
// ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
|
||||
// DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
|
||||
// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Julian Seward, jseward@bzip.org
|
||||
// bzip2/libbzip2 version 1.0.6 of 6 September 2010
|
||||
// ====================================================
|
||||
var (
|
||||
limits [maxPrefixBits + 2]int32
|
||||
bases [maxPrefixBits + 2]int32
|
||||
perms [maxNumSyms]int32
|
||||
|
||||
minLen = uint32(maxPrefixBits)
|
||||
maxLen = uint32(0)
|
||||
)
|
||||
|
||||
const (
|
||||
statusOkay = iota
|
||||
statusInvalid
|
||||
statusNeedBits
|
||||
statusMaxBits
|
||||
)
|
||||
|
||||
// createTables is the BZ2_hbCreateDecodeTables function from the C code.
|
||||
createTables := func(codes []prefix.PrefixCode) {
|
||||
for _, c := range codes {
|
||||
if c.Len > maxLen {
|
||||
maxLen = c.Len
|
||||
}
|
||||
if c.Len < minLen {
|
||||
minLen = c.Len
|
||||
}
|
||||
}
|
||||
|
||||
var pp int
|
||||
for i := minLen; i <= maxLen; i++ {
|
||||
for j, c := range codes {
|
||||
if c.Len == i {
|
||||
perms[pp] = int32(j)
|
||||
pp++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var vec int32
|
||||
for _, c := range codes {
|
||||
bases[c.Len+1]++
|
||||
}
|
||||
for i := 1; i < len(bases); i++ {
|
||||
bases[i] += bases[i-1]
|
||||
}
|
||||
for i := minLen; i <= maxLen; i++ {
|
||||
vec += bases[i+1] - bases[i]
|
||||
limits[i] = vec - 1
|
||||
vec <<= 1
|
||||
}
|
||||
for i := minLen + 1; i <= maxLen; i++ {
|
||||
bases[i] = ((limits[i-1] + 1) << 1) - bases[i]
|
||||
}
|
||||
}
|
||||
|
||||
// getSymbol is the GET_MTF_VAL macro from the C code.
|
||||
getSymbol := func(c prefix.PrefixCode) (uint32, int) {
|
||||
v := internal.ReverseUint32(c.Val)
|
||||
n := c.Len
|
||||
|
||||
zn := minLen
|
||||
if zn > n {
|
||||
return 0, statusNeedBits
|
||||
}
|
||||
zvec := int32(v >> (32 - zn))
|
||||
v <<= zn
|
||||
for {
|
||||
if zn > maxLen {
|
||||
return 0, statusMaxBits
|
||||
}
|
||||
if zvec <= limits[zn] {
|
||||
break
|
||||
}
|
||||
zn++
|
||||
if zn > n {
|
||||
return 0, statusNeedBits
|
||||
}
|
||||
zvec = (zvec << 1) | int32(v>>31)
|
||||
v <<= 1
|
||||
}
|
||||
if zvec-bases[zn] < 0 || zvec-bases[zn] >= maxNumSyms {
|
||||
return 0, statusInvalid
|
||||
}
|
||||
return uint32(perms[zvec-bases[zn]]), statusOkay
|
||||
}
|
||||
|
||||
// Step 1: Create the prefix trees using the C algorithm.
|
||||
createTables(codes)
|
||||
|
||||
// Step 2: Starting with the shortest bit pattern, explore the whole tree.
|
||||
// If tree is under-subscribed, the worst-case runtime is O(1<<maxLen).
|
||||
// If tree is over-subscribed, the worst-case runtime is O(maxNumSyms).
|
||||
var pcodesArr [2 * maxNumSyms]prefix.PrefixCode
|
||||
pcodes := pcodesArr[:maxNumSyms]
|
||||
var exploreCode func(prefix.PrefixCode) bool
|
||||
exploreCode = func(c prefix.PrefixCode) (term bool) {
|
||||
sym, status := getSymbol(c)
|
||||
switch status {
|
||||
case statusOkay:
|
||||
// This code is valid, so insert it.
|
||||
c.Sym = sym
|
||||
pcodes[sym] = c
|
||||
term = true
|
||||
case statusInvalid:
|
||||
// This code is invalid, so insert an invalid symbol.
|
||||
c.Sym = uint32(len(pcodes))
|
||||
pcodes = append(pcodes, c)
|
||||
term = true
|
||||
case statusNeedBits:
|
||||
// This code is too short, so explore both children.
|
||||
c.Len++
|
||||
c0, c1 := c, c
|
||||
c1.Val |= 1 << (c.Len - 1)
|
||||
|
||||
b0 := exploreCode(c0)
|
||||
b1 := exploreCode(c1)
|
||||
switch {
|
||||
case !b0 && b1:
|
||||
c0.Sym = uint32(len(pcodes))
|
||||
pcodes = append(pcodes, c0)
|
||||
case !b1 && b0:
|
||||
c1.Sym = uint32(len(pcodes))
|
||||
pcodes = append(pcodes, c1)
|
||||
}
|
||||
term = b0 || b1
|
||||
case statusMaxBits:
|
||||
// This code is too long, so report it upstream.
|
||||
term = false
|
||||
}
|
||||
return term // Did this code terminate?
|
||||
}
|
||||
exploreCode(prefix.PrefixCode{})
|
||||
|
||||
// Step 3: Copy new sparse codes to old output codes.
|
||||
codes = codes[:0]
|
||||
for _, c := range pcodes {
|
||||
if c.Len > 0 {
|
||||
codes = append(codes, c)
|
||||
}
|
||||
}
|
||||
return codes
|
||||
}
|
274
vendor/github.com/dsnet/compress/bzip2/reader.go
generated
vendored
Normal file
274
vendor/github.com/dsnet/compress/bzip2/reader.go
generated
vendored
Normal file
|
@ -0,0 +1,274 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
"github.com/dsnet/compress/internal/prefix"
|
||||
)
|
||||
|
||||
type Reader struct {
|
||||
InputOffset int64 // Total number of bytes read from underlying io.Reader
|
||||
OutputOffset int64 // Total number of bytes emitted from Read
|
||||
|
||||
rd prefixReader
|
||||
err error
|
||||
level int // The current compression level
|
||||
rdHdrFtr int // Number of times we read the stream header and footer
|
||||
blkCRC uint32 // CRC-32 IEEE of each block (as stored)
|
||||
endCRC uint32 // Checksum of all blocks using bzip2's custom method
|
||||
|
||||
crc crc
|
||||
mtf moveToFront
|
||||
bwt burrowsWheelerTransform
|
||||
rle runLengthEncoding
|
||||
|
||||
// These fields are allocated with Reader and re-used later.
|
||||
treeSels []uint8
|
||||
codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
|
||||
codes1D [maxNumTrees]prefix.PrefixCodes
|
||||
trees1D [maxNumTrees]prefix.Decoder
|
||||
syms []uint16
|
||||
|
||||
fuzzReader // Exported functionality when fuzz testing
|
||||
}
|
||||
|
||||
type ReaderConfig struct {
|
||||
_ struct{} // Blank field to prevent unkeyed struct literals
|
||||
}
|
||||
|
||||
func NewReader(r io.Reader, conf *ReaderConfig) (*Reader, error) {
|
||||
zr := new(Reader)
|
||||
zr.Reset(r)
|
||||
return zr, nil
|
||||
}
|
||||
|
||||
func (zr *Reader) Reset(r io.Reader) error {
|
||||
*zr = Reader{
|
||||
rd: zr.rd,
|
||||
|
||||
mtf: zr.mtf,
|
||||
bwt: zr.bwt,
|
||||
rle: zr.rle,
|
||||
|
||||
treeSels: zr.treeSels,
|
||||
trees1D: zr.trees1D,
|
||||
syms: zr.syms,
|
||||
}
|
||||
zr.rd.Init(r)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zr *Reader) Read(buf []byte) (int, error) {
|
||||
for {
|
||||
cnt, err := zr.rle.Read(buf)
|
||||
if err != rleDone && zr.err == nil {
|
||||
zr.err = err
|
||||
}
|
||||
if cnt > 0 {
|
||||
zr.crc.update(buf[:cnt])
|
||||
zr.OutputOffset += int64(cnt)
|
||||
return cnt, nil
|
||||
}
|
||||
if zr.err != nil || len(buf) == 0 {
|
||||
return 0, zr.err
|
||||
}
|
||||
|
||||
// Read the next chunk.
|
||||
zr.rd.Offset = zr.InputOffset
|
||||
func() {
|
||||
defer errors.Recover(&zr.err)
|
||||
if zr.rdHdrFtr%2 == 0 {
|
||||
// Check if we are already at EOF.
|
||||
if err := zr.rd.PullBits(1); err != nil {
|
||||
if err == io.ErrUnexpectedEOF && zr.rdHdrFtr > 0 {
|
||||
err = io.EOF // EOF is okay if we read at least one stream
|
||||
}
|
||||
errors.Panic(err)
|
||||
}
|
||||
|
||||
// Read stream header.
|
||||
if zr.rd.ReadBitsBE64(16) != hdrMagic {
|
||||
panicf(errors.Corrupted, "invalid stream magic")
|
||||
}
|
||||
if ver := zr.rd.ReadBitsBE64(8); ver != 'h' {
|
||||
if ver == '0' {
|
||||
panicf(errors.Deprecated, "bzip1 format is not supported")
|
||||
}
|
||||
panicf(errors.Corrupted, "invalid version: %q", ver)
|
||||
}
|
||||
lvl := int(zr.rd.ReadBitsBE64(8)) - '0'
|
||||
if lvl < BestSpeed || lvl > BestCompression {
|
||||
panicf(errors.Corrupted, "invalid block size: %d", lvl*blockSize)
|
||||
}
|
||||
zr.level = lvl
|
||||
zr.rdHdrFtr++
|
||||
} else {
|
||||
// Check and update the CRC.
|
||||
if internal.GoFuzz {
|
||||
zr.updateChecksum(-1, zr.crc.val) // Update with value
|
||||
zr.blkCRC = zr.crc.val // Suppress CRC failures
|
||||
}
|
||||
if zr.blkCRC != zr.crc.val {
|
||||
panicf(errors.Corrupted, "mismatching block checksum")
|
||||
}
|
||||
zr.endCRC = (zr.endCRC<<1 | zr.endCRC>>31) ^ zr.blkCRC
|
||||
}
|
||||
buf := zr.decodeBlock()
|
||||
zr.rle.Init(buf)
|
||||
}()
|
||||
if zr.InputOffset, err = zr.rd.Flush(); zr.err == nil {
|
||||
zr.err = err
|
||||
}
|
||||
if zr.err != nil {
|
||||
zr.err = errWrap(zr.err, errors.Corrupted)
|
||||
return 0, zr.err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (zr *Reader) Close() error {
|
||||
if zr.err == io.EOF || zr.err == errClosed {
|
||||
zr.rle.Init(nil) // Make sure future reads fail
|
||||
zr.err = errClosed
|
||||
return nil
|
||||
}
|
||||
return zr.err // Return the persistent error
|
||||
}
|
||||
|
||||
func (zr *Reader) decodeBlock() []byte {
|
||||
if magic := zr.rd.ReadBitsBE64(48); magic != blkMagic {
|
||||
if magic == endMagic {
|
||||
endCRC := uint32(zr.rd.ReadBitsBE64(32))
|
||||
if internal.GoFuzz {
|
||||
zr.updateChecksum(zr.rd.BitsRead()-32, zr.endCRC)
|
||||
endCRC = zr.endCRC // Suppress CRC failures
|
||||
}
|
||||
if zr.endCRC != endCRC {
|
||||
panicf(errors.Corrupted, "mismatching stream checksum")
|
||||
}
|
||||
zr.endCRC = 0
|
||||
zr.rd.ReadPads()
|
||||
zr.rdHdrFtr++
|
||||
return nil
|
||||
}
|
||||
panicf(errors.Corrupted, "invalid block or footer magic")
|
||||
}
|
||||
|
||||
zr.crc.val = 0
|
||||
zr.blkCRC = uint32(zr.rd.ReadBitsBE64(32))
|
||||
if internal.GoFuzz {
|
||||
zr.updateChecksum(zr.rd.BitsRead()-32, 0) // Record offset only
|
||||
}
|
||||
if zr.rd.ReadBitsBE64(1) != 0 {
|
||||
panicf(errors.Deprecated, "block randomization is not supported")
|
||||
}
|
||||
|
||||
// Read BWT related fields.
|
||||
ptr := int(zr.rd.ReadBitsBE64(24)) // BWT origin pointer
|
||||
|
||||
// Read MTF related fields.
|
||||
var dictArr [256]uint8
|
||||
dict := dictArr[:0]
|
||||
bmapHi := uint16(zr.rd.ReadBits(16))
|
||||
for i := 0; i < 256; i, bmapHi = i+16, bmapHi>>1 {
|
||||
if bmapHi&1 > 0 {
|
||||
bmapLo := uint16(zr.rd.ReadBits(16))
|
||||
for j := 0; j < 16; j, bmapLo = j+1, bmapLo>>1 {
|
||||
if bmapLo&1 > 0 {
|
||||
dict = append(dict, uint8(i+j))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Step 1: Prefix encoding.
|
||||
syms := zr.decodePrefix(len(dict))
|
||||
|
||||
// Step 2: Move-to-front transform and run-length encoding.
|
||||
zr.mtf.Init(dict, zr.level*blockSize)
|
||||
buf := zr.mtf.Decode(syms)
|
||||
|
||||
// Step 3: Burrows-Wheeler transformation.
|
||||
if ptr >= len(buf) {
|
||||
panicf(errors.Corrupted, "origin pointer (0x%06x) exceeds block size: %d", ptr, len(buf))
|
||||
}
|
||||
zr.bwt.Decode(buf, ptr)
|
||||
|
||||
return buf
|
||||
}
|
||||
|
||||
func (zr *Reader) decodePrefix(numSyms int) (syms []uint16) {
|
||||
numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOF symbols
|
||||
if numSyms < 3 {
|
||||
panicf(errors.Corrupted, "not enough prefix symbols: %d", numSyms)
|
||||
}
|
||||
|
||||
// Read information about the trees and tree selectors.
|
||||
var mtf internal.MoveToFront
|
||||
numTrees := int(zr.rd.ReadBitsBE64(3))
|
||||
if numTrees < minNumTrees || numTrees > maxNumTrees {
|
||||
panicf(errors.Corrupted, "invalid number of prefix trees: %d", numTrees)
|
||||
}
|
||||
numSels := int(zr.rd.ReadBitsBE64(15))
|
||||
if cap(zr.treeSels) < numSels {
|
||||
zr.treeSels = make([]uint8, numSels)
|
||||
}
|
||||
treeSels := zr.treeSels[:numSels]
|
||||
for i := range treeSels {
|
||||
sym, ok := zr.rd.TryReadSymbol(&decSel)
|
||||
if !ok {
|
||||
sym = zr.rd.ReadSymbol(&decSel)
|
||||
}
|
||||
if int(sym) >= numTrees {
|
||||
panicf(errors.Corrupted, "invalid prefix tree selector: %d", sym)
|
||||
}
|
||||
treeSels[i] = uint8(sym)
|
||||
}
|
||||
mtf.Decode(treeSels)
|
||||
zr.treeSels = treeSels
|
||||
|
||||
// Initialize prefix codes.
|
||||
for i := range zr.codes2D[:numTrees] {
|
||||
zr.codes1D[i] = zr.codes2D[i][:numSyms]
|
||||
}
|
||||
zr.rd.ReadPrefixCodes(zr.codes1D[:numTrees], zr.trees1D[:numTrees])
|
||||
|
||||
// Read prefix encoded symbols of compressed data.
|
||||
var tree *prefix.Decoder
|
||||
var blkLen, selIdx int
|
||||
syms = zr.syms[:0]
|
||||
for {
|
||||
if blkLen == 0 {
|
||||
blkLen = numBlockSyms
|
||||
if selIdx >= len(treeSels) {
|
||||
panicf(errors.Corrupted, "not enough prefix tree selectors")
|
||||
}
|
||||
tree = &zr.trees1D[treeSels[selIdx]]
|
||||
selIdx++
|
||||
}
|
||||
blkLen--
|
||||
sym, ok := zr.rd.TryReadSymbol(tree)
|
||||
if !ok {
|
||||
sym = zr.rd.ReadSymbol(tree)
|
||||
}
|
||||
|
||||
if int(sym) == numSyms-1 {
|
||||
break // EOF marker
|
||||
}
|
||||
if int(sym) >= numSyms {
|
||||
panicf(errors.Corrupted, "invalid prefix symbol: %d", sym)
|
||||
}
|
||||
if len(syms) >= zr.level*blockSize {
|
||||
panicf(errors.Corrupted, "number of prefix symbols exceeds block size")
|
||||
}
|
||||
syms = append(syms, uint16(sym))
|
||||
}
|
||||
zr.syms = syms
|
||||
return syms
|
||||
}
|
101
vendor/github.com/dsnet/compress/bzip2/rle1.go
generated
vendored
Normal file
101
vendor/github.com/dsnet/compress/bzip2/rle1.go
generated
vendored
Normal file
|
@ -0,0 +1,101 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import "github.com/dsnet/compress/internal/errors"
|
||||
|
||||
// rleDone is a special "error" to indicate that the RLE stage is done.
|
||||
var rleDone = errorf(errors.Unknown, "RLE1 stage is completed")
|
||||
|
||||
// runLengthEncoding implements the first RLE stage of bzip2. Every sequence
|
||||
// of 4..255 duplicated bytes is replaced by only the first 4 bytes, and a
|
||||
// single byte representing the repeat length. Similar to the C bzip2
|
||||
// implementation, the encoder will always terminate repeat sequences with a
|
||||
// count (even if it is the end of the buffer), and it will also never produce
|
||||
// run lengths of 256..259. The decoder can handle the latter case.
|
||||
//
|
||||
// For example, if the input was:
|
||||
// input: "AAAAAAABBBBCCCD"
|
||||
//
|
||||
// Then the output will be:
|
||||
// output: "AAAA\x03BBBB\x00CCCD"
|
||||
type runLengthEncoding struct {
|
||||
buf []byte
|
||||
idx int
|
||||
lastVal byte
|
||||
lastCnt int
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Init(buf []byte) {
|
||||
*rle = runLengthEncoding{buf: buf}
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Write(buf []byte) (int, error) {
|
||||
for i, b := range buf {
|
||||
if rle.lastVal != b {
|
||||
rle.lastCnt = 0
|
||||
}
|
||||
rle.lastCnt++
|
||||
switch {
|
||||
case rle.lastCnt < 4:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
rle.buf[rle.idx] = b
|
||||
rle.idx++
|
||||
case rle.lastCnt == 4:
|
||||
if rle.idx+1 >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
rle.buf[rle.idx] = b
|
||||
rle.idx++
|
||||
rle.buf[rle.idx] = 0
|
||||
rle.idx++
|
||||
case rle.lastCnt < 256:
|
||||
rle.buf[rle.idx-1]++
|
||||
default:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
rle.lastCnt = 1
|
||||
rle.buf[rle.idx] = b
|
||||
rle.idx++
|
||||
}
|
||||
rle.lastVal = b
|
||||
}
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Read(buf []byte) (int, error) {
|
||||
for i := range buf {
|
||||
switch {
|
||||
case rle.lastCnt == -4:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, errorf(errors.Corrupted, "missing terminating run-length repeater")
|
||||
}
|
||||
rle.lastCnt = int(rle.buf[rle.idx])
|
||||
rle.idx++
|
||||
if rle.lastCnt > 0 {
|
||||
break // Break the switch
|
||||
}
|
||||
fallthrough // Count was zero, continue the work
|
||||
case rle.lastCnt <= 0:
|
||||
if rle.idx >= len(rle.buf) {
|
||||
return i, rleDone
|
||||
}
|
||||
b := rle.buf[rle.idx]
|
||||
rle.idx++
|
||||
if b != rle.lastVal {
|
||||
rle.lastCnt = 0
|
||||
rle.lastVal = b
|
||||
}
|
||||
}
|
||||
buf[i] = rle.lastVal
|
||||
rle.lastCnt--
|
||||
}
|
||||
return len(buf), nil
|
||||
}
|
||||
|
||||
func (rle *runLengthEncoding) Bytes() []byte { return rle.buf[:rle.idx] }
|
307
vendor/github.com/dsnet/compress/bzip2/writer.go
generated
vendored
Normal file
307
vendor/github.com/dsnet/compress/bzip2/writer.go
generated
vendored
Normal file
|
@ -0,0 +1,307 @@
|
|||
// Copyright 2015, Joe Tsai. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE.md file.
|
||||
|
||||
package bzip2
|
||||
|
||||
import (
|
||||
"io"
|
||||
|
||||
"github.com/dsnet/compress/internal"
|
||||
"github.com/dsnet/compress/internal/errors"
|
||||
"github.com/dsnet/compress/internal/prefix"
|
||||
)
|
||||
|
||||
type Writer struct {
|
||||
InputOffset int64 // Total number of bytes issued to Write
|
||||
OutputOffset int64 // Total number of bytes written to underlying io.Writer
|
||||
|
||||
wr prefixWriter
|
||||
err error
|
||||
level int // The current compression level
|
||||
wrHdr bool // Have we written the stream header?
|
||||
blkCRC uint32 // CRC-32 IEEE of each block
|
||||
endCRC uint32 // Checksum of all blocks using bzip2's custom method
|
||||
|
||||
crc crc
|
||||
rle runLengthEncoding
|
||||
bwt burrowsWheelerTransform
|
||||
mtf moveToFront
|
||||
|
||||
// These fields are allocated with Writer and re-used later.
|
||||
buf []byte
|
||||
treeSels []uint8
|
||||
treeSelsMTF []uint8
|
||||
codes2D [maxNumTrees][maxNumSyms]prefix.PrefixCode
|
||||
codes1D [maxNumTrees]prefix.PrefixCodes
|
||||
trees1D [maxNumTrees]prefix.Encoder
|
||||
}
|
||||
|
||||
type WriterConfig struct {
|
||||
Level int
|
||||
|
||||
_ struct{} // Blank field to prevent unkeyed struct literals
|
||||
}
|
||||
|
||||
func NewWriter(w io.Writer, conf *WriterConfig) (*Writer, error) {
|
||||
var lvl int
|
||||
if conf != nil {
|
||||
lvl = conf.Level
|
||||
}
|
||||
if lvl == 0 {
|
||||
lvl = DefaultCompression
|
||||
}
|
||||
if lvl < BestSpeed || lvl > BestCompression {
|
||||
return nil, errorf(errors.Invalid, "compression level: %d", lvl)
|
||||
}
|
||||
zw := new(Writer)
|
||||
zw.level = lvl
|
||||
zw.Reset(w)
|
||||
return zw, nil
|
||||
}
|
||||
|
||||
func (zw *Writer) Reset(w io.Writer) error {
|
||||
*zw = Writer{
|
||||
wr: zw.wr,
|
||||
level: zw.level,
|
||||
|
||||
rle: zw.rle,
|
||||
bwt: zw.bwt,
|
||||
mtf: zw.mtf,
|
||||
|
||||
buf: zw.buf,
|
||||
treeSels: zw.treeSels,
|
||||
treeSelsMTF: zw.treeSelsMTF,
|
||||
trees1D: zw.trees1D,
|
||||
}
|
||||
zw.wr.Init(w)
|
||||
if len(zw.buf) != zw.level*blockSize {
|
||||
zw.buf = make([]byte, zw.level*blockSize)
|
||||
}
|
||||
zw.rle.Init(zw.buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zw *Writer) Write(buf []byte) (int, error) {
|
||||
if zw.err != nil {
|
||||
return 0, zw.err
|
||||
}
|
||||
|
||||
cnt := len(buf)
|
||||
for {
|
||||
wrCnt, err := zw.rle.Write(buf)
|
||||
if err != rleDone && zw.err == nil {
|
||||
zw.err = err
|
||||
}
|
||||
zw.crc.update(buf[:wrCnt])
|
||||
buf = buf[wrCnt:]
|
||||
if len(buf) == 0 {
|
||||
zw.InputOffset += int64(cnt)
|
||||
return cnt, nil
|
||||
}
|
||||
if zw.err = zw.flush(); zw.err != nil {
|
||||
return 0, zw.err
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (zw *Writer) flush() error {
|
||||
vals := zw.rle.Bytes()
|
||||
if len(vals) == 0 {
|
||||
return nil
|
||||
}
|
||||
zw.wr.Offset = zw.OutputOffset
|
||||
func() {
|
||||
defer errors.Recover(&zw.err)
|
||||
if !zw.wrHdr {
|
||||
// Write stream header.
|
||||
zw.wr.WriteBitsBE64(hdrMagic, 16)
|
||||
zw.wr.WriteBitsBE64('h', 8)
|
||||
zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8)
|
||||
zw.wrHdr = true
|
||||
}
|
||||
zw.encodeBlock(vals)
|
||||
}()
|
||||
var err error
|
||||
if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil {
|
||||
zw.err = err
|
||||
}
|
||||
if zw.err != nil {
|
||||
zw.err = errWrap(zw.err, errors.Internal)
|
||||
return zw.err
|
||||
}
|
||||
zw.endCRC = (zw.endCRC<<1 | zw.endCRC>>31) ^ zw.blkCRC
|
||||
zw.blkCRC = 0
|
||||
zw.rle.Init(zw.buf)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zw *Writer) Close() error {
|
||||
if zw.err == errClosed {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush RLE buffer if there is left-over data.
|
||||
if zw.err = zw.flush(); zw.err != nil {
|
||||
return zw.err
|
||||
}
|
||||
|
||||
// Write stream footer.
|
||||
zw.wr.Offset = zw.OutputOffset
|
||||
func() {
|
||||
defer errors.Recover(&zw.err)
|
||||
if !zw.wrHdr {
|
||||
// Write stream header.
|
||||
zw.wr.WriteBitsBE64(hdrMagic, 16)
|
||||
zw.wr.WriteBitsBE64('h', 8)
|
||||
zw.wr.WriteBitsBE64(uint64('0'+zw.level), 8)
|
||||
zw.wrHdr = true
|
||||
}
|
||||
zw.wr.WriteBitsBE64(endMagic, 48)
|
||||
zw.wr.WriteBitsBE64(uint64(zw.endCRC), 32)
|
||||
zw.wr.WritePads(0)
|
||||
}()
|
||||
var err error
|
||||
if zw.OutputOffset, err = zw.wr.Flush(); zw.err == nil {
|
||||
zw.err = err
|
||||
}
|
||||
if zw.err != nil {
|
||||
zw.err = errWrap(zw.err, errors.Internal)
|
||||
return zw.err
|
||||
}
|
||||
|
||||
zw.err = errClosed
|
||||
return nil
|
||||
}
|
||||
|
||||
func (zw *Writer) encodeBlock(buf []byte) {
|
||||
zw.blkCRC = zw.crc.val
|
||||
zw.wr.WriteBitsBE64(blkMagic, 48)
|
||||
zw.wr.WriteBitsBE64(uint64(zw.blkCRC), 32)
|
||||
zw.wr.WriteBitsBE64(0, 1)
|
||||
zw.crc.val = 0
|
||||
|
||||
// Step 1: Burrows-Wheeler transformation.
|
||||
ptr := zw.bwt.Encode(buf)
|
||||
zw.wr.WriteBitsBE64(uint64(ptr), 24)
|
||||
|
||||
// Step 2: Move-to-front transform and run-length encoding.
|
||||
var dictMap [256]bool
|
||||
for _, c := range buf {
|
||||
dictMap[c] = true
|
||||
}
|
||||
|
||||
var dictArr [256]uint8
|
||||
var bmapLo [16]uint16
|
||||
dict := dictArr[:0]
|
||||
bmapHi := uint16(0)
|
||||
for i, b := range dictMap {
|
||||
if b {
|
||||
c := uint8(i)
|
||||
dict = append(dict, c)
|
||||
bmapHi |= 1 << (c >> 4)
|
||||
bmapLo[c>>4] |= 1 << (c & 0xf)
|
||||
}
|
||||
}
|
||||
|
||||
zw.wr.WriteBits(uint(bmapHi), 16)
|
||||
for _, m := range bmapLo {
|
||||
if m > 0 {
|
||||
zw.wr.WriteBits(uint(m), 16)
|
||||
}
|
||||
}
|
||||
|
||||
zw.mtf.Init(dict, len(buf))
|
||||
syms := zw.mtf.Encode(buf)
|
||||
|
||||
// Step 3: Prefix encoding.
|
||||
zw.encodePrefix(syms, len(dict))
|
||||
}
|
||||
|
||||
func (zw *Writer) encodePrefix(syms []uint16, numSyms int) {
|
||||
numSyms += 2 // Remove 0 symbol, add RUNA, RUNB, and EOB symbols
|
||||
if numSyms < 3 {
|
||||
panicf(errors.Internal, "unable to encode EOB marker")
|
||||
}
|
||||
syms = append(syms, uint16(numSyms-1)) // EOB marker
|
||||
|
||||
// Compute number of prefix trees needed.
|
||||
numTrees := maxNumTrees
|
||||
for i, lim := range []int{200, 600, 1200, 2400} {
|
||||
if len(syms) < lim {
|
||||
numTrees = minNumTrees + i
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Compute number of block selectors.
|
||||
numSels := (len(syms) + numBlockSyms - 1) / numBlockSyms
|
||||
if cap(zw.treeSels) < numSels {
|
||||
zw.treeSels = make([]uint8, numSels)
|
||||
}
|
||||
treeSels := zw.treeSels[:numSels]
|
||||
for i := range treeSels {
|
||||
treeSels[i] = uint8(i % numTrees)
|
||||
}
|
||||
|
||||
// Initialize prefix codes.
|
||||
for i := range zw.codes2D[:numTrees] {
|
||||
pc := zw.codes2D[i][:numSyms]
|
||||
for j := range pc {
|
||||
pc[j] = prefix.PrefixCode{Sym: uint32(j)}
|
||||
}
|
||||
zw.codes1D[i] = pc
|
||||
}
|
||||
|
||||
// First cut at assigning prefix trees to each group.
|
||||
var codes prefix.PrefixCodes
|
||||
var blkLen, selIdx int
|
||||
for _, sym := range syms {
|
||||
if blkLen == 0 {
|
||||
blkLen = numBlockSyms
|
||||
codes = zw.codes2D[treeSels[selIdx]][:numSyms]
|
||||
selIdx++
|
||||
}
|
||||
blkLen--
|
||||
codes[sym].Cnt++
|
||||
}
|
||||
|
||||
// TODO(dsnet): Use K-means to cluster groups to each prefix tree.
|
||||
|
||||
// Generate lengths and prefixes based on symbol frequencies.
|
||||
for i := range zw.trees1D[:numTrees] {
|
||||
pc := prefix.PrefixCodes(zw.codes2D[i][:numSyms])
|
||||
pc.SortByCount()
|
||||
if err := prefix.GenerateLengths(pc, maxPrefixBits); err != nil {
|
||||
errors.Panic(err)
|
||||
}
|
||||
pc.SortBySymbol()
|
||||
}
|
||||
|
||||
// Write out information about the trees and tree selectors.
|
||||
var mtf internal.MoveToFront
|
||||
zw.wr.WriteBitsBE64(uint64(numTrees), 3)
|
||||
zw.wr.WriteBitsBE64(uint64(numSels), 15)
|
||||
zw.treeSelsMTF = append(zw.treeSelsMTF[:0], treeSels...)
|
||||
mtf.Encode(zw.treeSelsMTF)
|
||||
for _, sym := range zw.treeSelsMTF {
|
||||
zw.wr.WriteSymbol(uint(sym), &encSel)
|
||||
}
|
||||
zw.wr.WritePrefixCodes(zw.codes1D[:numTrees], zw.trees1D[:numTrees])
|
||||
|
||||
// Write out prefix encoded symbols of compressed data.
|
||||
var tree *prefix.Encoder
|
||||
blkLen, selIdx = 0, 0
|
||||
for _, sym := range syms {
|
||||
if blkLen == 0 {
|
||||
blkLen = numBlockSyms
|
||||
tree = &zw.trees1D[treeSels[selIdx]]
|
||||
selIdx++
|
||||
}
|
||||
blkLen--
|
||||
ok := zw.wr.TryWriteSymbol(uint(sym), tree)
|
||||
if !ok {
|
||||
zw.wr.WriteSymbol(uint(sym), tree)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue