forked from forgejo/forgejo
Dump: add output format tar and output to stdout (#10376)
* Dump: Use mholt/archive/v3 to support tar including many compressions Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Allow dump output to stdout Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: Fixed bug present since #6677 where SessionConfig.Provider is never "file" Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never pack RepoRootPath, LFS.ContentPath and LogRootPath when they are below AppDataPath Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: also dump LFS (fixes #10058) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Dump: never dump CustomPath if CustomPath is a subdir of or equal to AppDataPath (fixes #10365) Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * Use log.Info instead of fmt.Fprintf Signed-off-by: Philipp Homann <homann.philipp@googlemail.com> * import ordering * make fmt Co-authored-by: zeripath <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Matti R <matti@mdranta.net>
This commit is contained in:
parent
209b17c4e2
commit
684b7a999f
303 changed files with 301317 additions and 1183 deletions
726
vendor/github.com/klauspost/compress/zstd/fse_encoder.go
generated
vendored
Normal file
726
vendor/github.com/klauspost/compress/zstd/fse_encoder.go
generated
vendored
Normal file
|
@ -0,0 +1,726 @@
|
|||
// Copyright 2019+ Klaus Post. All rights reserved.
|
||||
// License information can be found in the LICENSE file.
|
||||
// Based on work by Yann Collet, released under BSD License.
|
||||
|
||||
package zstd
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
const (
|
||||
// For encoding we only support up to
|
||||
maxEncTableLog = 8
|
||||
maxEncTablesize = 1 << maxTableLog
|
||||
maxEncTableMask = (1 << maxTableLog) - 1
|
||||
minEncTablelog = 5
|
||||
maxEncSymbolValue = maxMatchLengthSymbol
|
||||
)
|
||||
|
||||
// Scratch provides temporary storage for compression and decompression.
|
||||
type fseEncoder struct {
|
||||
symbolLen uint16 // Length of active part of the symbol table.
|
||||
actualTableLog uint8 // Selected tablelog.
|
||||
ct cTable // Compression tables.
|
||||
maxCount int // count of the most probable symbol
|
||||
zeroBits bool // no bits has prob > 50%.
|
||||
clearCount bool // clear count
|
||||
useRLE bool // This encoder is for RLE
|
||||
preDefined bool // This encoder is predefined.
|
||||
reUsed bool // Set to know when the encoder has been reused.
|
||||
rleVal uint8 // RLE Symbol
|
||||
maxBits uint8 // Maximum output bits after transform.
|
||||
|
||||
// TODO: Technically zstd should be fine with 64 bytes.
|
||||
count [256]uint32
|
||||
norm [256]int16
|
||||
}
|
||||
|
||||
// cTable contains tables used for compression.
|
||||
type cTable struct {
|
||||
tableSymbol []byte
|
||||
stateTable []uint16
|
||||
symbolTT []symbolTransform
|
||||
}
|
||||
|
||||
// symbolTransform contains the state transform for a symbol.
|
||||
type symbolTransform struct {
|
||||
deltaNbBits uint32
|
||||
deltaFindState int16
|
||||
outBits uint8
|
||||
}
|
||||
|
||||
// String prints values as a human readable string.
|
||||
func (s symbolTransform) String() string {
|
||||
return fmt.Sprintf("{deltabits: %08x, findstate:%d outbits:%d}", s.deltaNbBits, s.deltaFindState, s.outBits)
|
||||
}
|
||||
|
||||
// Histogram allows to populate the histogram and skip that step in the compression,
|
||||
// It otherwise allows to inspect the histogram when compression is done.
|
||||
// To indicate that you have populated the histogram call HistogramFinished
|
||||
// with the value of the highest populated symbol, as well as the number of entries
|
||||
// in the most populated entry. These are accepted at face value.
|
||||
// The returned slice will always be length 256.
|
||||
func (s *fseEncoder) Histogram() []uint32 {
|
||||
return s.count[:]
|
||||
}
|
||||
|
||||
// HistogramFinished can be called to indicate that the histogram has been populated.
|
||||
// maxSymbol is the index of the highest set symbol of the next data segment.
|
||||
// maxCount is the number of entries in the most populated entry.
|
||||
// These are accepted at face value.
|
||||
func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) {
|
||||
s.maxCount = maxCount
|
||||
s.symbolLen = uint16(maxSymbol) + 1
|
||||
s.clearCount = maxCount != 0
|
||||
}
|
||||
|
||||
// prepare will prepare and allocate scratch tables used for both compression and decompression.
|
||||
func (s *fseEncoder) prepare() (*fseEncoder, error) {
|
||||
if s == nil {
|
||||
s = &fseEncoder{}
|
||||
}
|
||||
s.useRLE = false
|
||||
if s.clearCount && s.maxCount == 0 {
|
||||
for i := range s.count {
|
||||
s.count[i] = 0
|
||||
}
|
||||
s.clearCount = false
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// allocCtable will allocate tables needed for compression.
|
||||
// If existing tables a re big enough, they are simply re-used.
|
||||
func (s *fseEncoder) allocCtable() {
|
||||
tableSize := 1 << s.actualTableLog
|
||||
// get tableSymbol that is big enough.
|
||||
if cap(s.ct.tableSymbol) < int(tableSize) {
|
||||
s.ct.tableSymbol = make([]byte, tableSize)
|
||||
}
|
||||
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
|
||||
|
||||
ctSize := tableSize
|
||||
if cap(s.ct.stateTable) < ctSize {
|
||||
s.ct.stateTable = make([]uint16, ctSize)
|
||||
}
|
||||
s.ct.stateTable = s.ct.stateTable[:ctSize]
|
||||
|
||||
if cap(s.ct.symbolTT) < 256 {
|
||||
s.ct.symbolTT = make([]symbolTransform, 256)
|
||||
}
|
||||
s.ct.symbolTT = s.ct.symbolTT[:256]
|
||||
}
|
||||
|
||||
// buildCTable will populate the compression table so it is ready to be used.
|
||||
func (s *fseEncoder) buildCTable() error {
|
||||
tableSize := uint32(1 << s.actualTableLog)
|
||||
highThreshold := tableSize - 1
|
||||
var cumul [256]int16
|
||||
|
||||
s.allocCtable()
|
||||
tableSymbol := s.ct.tableSymbol[:tableSize]
|
||||
// symbol start positions
|
||||
{
|
||||
cumul[0] = 0
|
||||
for ui, v := range s.norm[:s.symbolLen-1] {
|
||||
u := byte(ui) // one less than reference
|
||||
if v == -1 {
|
||||
// Low proba symbol
|
||||
cumul[u+1] = cumul[u] + 1
|
||||
tableSymbol[highThreshold] = u
|
||||
highThreshold--
|
||||
} else {
|
||||
cumul[u+1] = cumul[u] + v
|
||||
}
|
||||
}
|
||||
// Encode last symbol separately to avoid overflowing u
|
||||
u := int(s.symbolLen - 1)
|
||||
v := s.norm[s.symbolLen-1]
|
||||
if v == -1 {
|
||||
// Low proba symbol
|
||||
cumul[u+1] = cumul[u] + 1
|
||||
tableSymbol[highThreshold] = byte(u)
|
||||
highThreshold--
|
||||
} else {
|
||||
cumul[u+1] = cumul[u] + v
|
||||
}
|
||||
if uint32(cumul[s.symbolLen]) != tableSize {
|
||||
return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize)
|
||||
}
|
||||
cumul[s.symbolLen] = int16(tableSize) + 1
|
||||
}
|
||||
// Spread symbols
|
||||
s.zeroBits = false
|
||||
{
|
||||
step := tableStep(tableSize)
|
||||
tableMask := tableSize - 1
|
||||
var position uint32
|
||||
// if any symbol > largeLimit, we may have 0 bits output.
|
||||
largeLimit := int16(1 << (s.actualTableLog - 1))
|
||||
for ui, v := range s.norm[:s.symbolLen] {
|
||||
symbol := byte(ui)
|
||||
if v > largeLimit {
|
||||
s.zeroBits = true
|
||||
}
|
||||
for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ {
|
||||
tableSymbol[position] = symbol
|
||||
position = (position + step) & tableMask
|
||||
for position > highThreshold {
|
||||
position = (position + step) & tableMask
|
||||
} /* Low proba area */
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we have gone through all positions
|
||||
if position != 0 {
|
||||
return errors.New("position!=0")
|
||||
}
|
||||
}
|
||||
|
||||
// Build table
|
||||
table := s.ct.stateTable
|
||||
{
|
||||
tsi := int(tableSize)
|
||||
for u, v := range tableSymbol {
|
||||
// TableU16 : sorted by symbol order; gives next state value
|
||||
table[cumul[v]] = uint16(tsi + u)
|
||||
cumul[v]++
|
||||
}
|
||||
}
|
||||
|
||||
// Build Symbol Transformation Table
|
||||
{
|
||||
total := int16(0)
|
||||
symbolTT := s.ct.symbolTT[:s.symbolLen]
|
||||
tableLog := s.actualTableLog
|
||||
tl := (uint32(tableLog) << 16) - (1 << tableLog)
|
||||
for i, v := range s.norm[:s.symbolLen] {
|
||||
switch v {
|
||||
case 0:
|
||||
case -1, 1:
|
||||
symbolTT[i].deltaNbBits = tl
|
||||
symbolTT[i].deltaFindState = int16(total - 1)
|
||||
total++
|
||||
default:
|
||||
maxBitsOut := uint32(tableLog) - highBit(uint32(v-1))
|
||||
minStatePlus := uint32(v) << maxBitsOut
|
||||
symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
|
||||
symbolTT[i].deltaFindState = int16(total - v)
|
||||
total += v
|
||||
}
|
||||
}
|
||||
if total != int16(tableSize) {
|
||||
return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000}
|
||||
|
||||
func (s *fseEncoder) setRLE(val byte) {
|
||||
s.allocCtable()
|
||||
s.actualTableLog = 0
|
||||
s.ct.stateTable = s.ct.stateTable[:1]
|
||||
s.ct.symbolTT[val] = symbolTransform{
|
||||
deltaFindState: 0,
|
||||
deltaNbBits: 0,
|
||||
}
|
||||
if debug {
|
||||
println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val])
|
||||
}
|
||||
s.rleVal = val
|
||||
s.useRLE = true
|
||||
}
|
||||
|
||||
// setBits will set output bits for the transform.
|
||||
// if nil is provided, the number of bits is equal to the index.
|
||||
func (s *fseEncoder) setBits(transform []byte) {
|
||||
if s.reUsed || s.preDefined {
|
||||
return
|
||||
}
|
||||
if s.useRLE {
|
||||
if transform == nil {
|
||||
s.ct.symbolTT[s.rleVal].outBits = s.rleVal
|
||||
s.maxBits = s.rleVal
|
||||
return
|
||||
}
|
||||
s.maxBits = transform[s.rleVal]
|
||||
s.ct.symbolTT[s.rleVal].outBits = s.maxBits
|
||||
return
|
||||
}
|
||||
if transform == nil {
|
||||
for i := range s.ct.symbolTT[:s.symbolLen] {
|
||||
s.ct.symbolTT[i].outBits = uint8(i)
|
||||
}
|
||||
s.maxBits = uint8(s.symbolLen - 1)
|
||||
return
|
||||
}
|
||||
s.maxBits = 0
|
||||
for i, v := range transform[:s.symbolLen] {
|
||||
s.ct.symbolTT[i].outBits = v
|
||||
if v > s.maxBits {
|
||||
// We could assume bits always going up, but we play safe.
|
||||
s.maxBits = v
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// normalizeCount will normalize the count of the symbols so
|
||||
// the total is equal to the table size.
|
||||
// If successful, compression tables will also be made ready.
|
||||
func (s *fseEncoder) normalizeCount(length int) error {
|
||||
if s.reUsed {
|
||||
return nil
|
||||
}
|
||||
s.optimalTableLog(length)
|
||||
var (
|
||||
tableLog = s.actualTableLog
|
||||
scale = 62 - uint64(tableLog)
|
||||
step = (1 << 62) / uint64(length)
|
||||
vStep = uint64(1) << (scale - 20)
|
||||
stillToDistribute = int16(1 << tableLog)
|
||||
largest int
|
||||
largestP int16
|
||||
lowThreshold = (uint32)(length >> tableLog)
|
||||
)
|
||||
if s.maxCount == length {
|
||||
s.useRLE = true
|
||||
return nil
|
||||
}
|
||||
s.useRLE = false
|
||||
for i, cnt := range s.count[:s.symbolLen] {
|
||||
// already handled
|
||||
// if (count[s] == s.length) return 0; /* rle special case */
|
||||
|
||||
if cnt == 0 {
|
||||
s.norm[i] = 0
|
||||
continue
|
||||
}
|
||||
if cnt <= lowThreshold {
|
||||
s.norm[i] = -1
|
||||
stillToDistribute--
|
||||
} else {
|
||||
proba := (int16)((uint64(cnt) * step) >> scale)
|
||||
if proba < 8 {
|
||||
restToBeat := vStep * uint64(rtbTable[proba])
|
||||
v := uint64(cnt)*step - (uint64(proba) << scale)
|
||||
if v > restToBeat {
|
||||
proba++
|
||||
}
|
||||
}
|
||||
if proba > largestP {
|
||||
largestP = proba
|
||||
largest = i
|
||||
}
|
||||
s.norm[i] = proba
|
||||
stillToDistribute -= proba
|
||||
}
|
||||
}
|
||||
|
||||
if -stillToDistribute >= (s.norm[largest] >> 1) {
|
||||
// corner case, need another normalization method
|
||||
err := s.normalizeCount2(length)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if debugAsserts {
|
||||
err = s.validateNorm()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return s.buildCTable()
|
||||
}
|
||||
s.norm[largest] += stillToDistribute
|
||||
if debugAsserts {
|
||||
err := s.validateNorm()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return s.buildCTable()
|
||||
}
|
||||
|
||||
// Secondary normalization method.
|
||||
// To be used when primary method fails.
|
||||
func (s *fseEncoder) normalizeCount2(length int) error {
|
||||
const notYetAssigned = -2
|
||||
var (
|
||||
distributed uint32
|
||||
total = uint32(length)
|
||||
tableLog = s.actualTableLog
|
||||
lowThreshold = uint32(total >> tableLog)
|
||||
lowOne = uint32((total * 3) >> (tableLog + 1))
|
||||
)
|
||||
for i, cnt := range s.count[:s.symbolLen] {
|
||||
if cnt == 0 {
|
||||
s.norm[i] = 0
|
||||
continue
|
||||
}
|
||||
if cnt <= lowThreshold {
|
||||
s.norm[i] = -1
|
||||
distributed++
|
||||
total -= cnt
|
||||
continue
|
||||
}
|
||||
if cnt <= lowOne {
|
||||
s.norm[i] = 1
|
||||
distributed++
|
||||
total -= cnt
|
||||
continue
|
||||
}
|
||||
s.norm[i] = notYetAssigned
|
||||
}
|
||||
toDistribute := (1 << tableLog) - distributed
|
||||
|
||||
if (total / toDistribute) > lowOne {
|
||||
// risk of rounding to zero
|
||||
lowOne = uint32((total * 3) / (toDistribute * 2))
|
||||
for i, cnt := range s.count[:s.symbolLen] {
|
||||
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
|
||||
s.norm[i] = 1
|
||||
distributed++
|
||||
total -= cnt
|
||||
continue
|
||||
}
|
||||
}
|
||||
toDistribute = (1 << tableLog) - distributed
|
||||
}
|
||||
if distributed == uint32(s.symbolLen)+1 {
|
||||
// all values are pretty poor;
|
||||
// probably incompressible data (should have already been detected);
|
||||
// find max, then give all remaining points to max
|
||||
var maxV int
|
||||
var maxC uint32
|
||||
for i, cnt := range s.count[:s.symbolLen] {
|
||||
if cnt > maxC {
|
||||
maxV = i
|
||||
maxC = cnt
|
||||
}
|
||||
}
|
||||
s.norm[maxV] += int16(toDistribute)
|
||||
return nil
|
||||
}
|
||||
|
||||
if total == 0 {
|
||||
// all of the symbols were low enough for the lowOne or lowThreshold
|
||||
for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) {
|
||||
if s.norm[i] > 0 {
|
||||
toDistribute--
|
||||
s.norm[i]++
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
var (
|
||||
vStepLog = 62 - uint64(tableLog)
|
||||
mid = uint64((1 << (vStepLog - 1)) - 1)
|
||||
rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining
|
||||
tmpTotal = mid
|
||||
)
|
||||
for i, cnt := range s.count[:s.symbolLen] {
|
||||
if s.norm[i] == notYetAssigned {
|
||||
var (
|
||||
end = tmpTotal + uint64(cnt)*rStep
|
||||
sStart = uint32(tmpTotal >> vStepLog)
|
||||
sEnd = uint32(end >> vStepLog)
|
||||
weight = sEnd - sStart
|
||||
)
|
||||
if weight < 1 {
|
||||
return errors.New("weight < 1")
|
||||
}
|
||||
s.norm[i] = int16(weight)
|
||||
tmpTotal = end
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog
|
||||
func (s *fseEncoder) optimalTableLog(length int) {
|
||||
tableLog := uint8(maxEncTableLog)
|
||||
minBitsSrc := highBit(uint32(length)) + 1
|
||||
minBitsSymbols := highBit(uint32(s.symbolLen-1)) + 2
|
||||
minBits := uint8(minBitsSymbols)
|
||||
if minBitsSrc < minBitsSymbols {
|
||||
minBits = uint8(minBitsSrc)
|
||||
}
|
||||
|
||||
maxBitsSrc := uint8(highBit(uint32(length-1))) - 2
|
||||
if maxBitsSrc < tableLog {
|
||||
// Accuracy can be reduced
|
||||
tableLog = maxBitsSrc
|
||||
}
|
||||
if minBits > tableLog {
|
||||
tableLog = minBits
|
||||
}
|
||||
// Need a minimum to safely represent all symbol values
|
||||
if tableLog < minEncTablelog {
|
||||
tableLog = minEncTablelog
|
||||
}
|
||||
if tableLog > maxEncTableLog {
|
||||
tableLog = maxEncTableLog
|
||||
}
|
||||
s.actualTableLog = tableLog
|
||||
}
|
||||
|
||||
// validateNorm validates the normalized histogram table.
|
||||
func (s *fseEncoder) validateNorm() (err error) {
|
||||
var total int
|
||||
for _, v := range s.norm[:s.symbolLen] {
|
||||
if v >= 0 {
|
||||
total += int(v)
|
||||
} else {
|
||||
total -= int(v)
|
||||
}
|
||||
}
|
||||
defer func() {
|
||||
if err == nil {
|
||||
return
|
||||
}
|
||||
fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen)
|
||||
for i, v := range s.norm[:s.symbolLen] {
|
||||
fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v)
|
||||
}
|
||||
}()
|
||||
if total != (1 << s.actualTableLog) {
|
||||
return fmt.Errorf("warning: Total == %d != %d", total, 1<<s.actualTableLog)
|
||||
}
|
||||
for i, v := range s.count[s.symbolLen:] {
|
||||
if v != 0 {
|
||||
return fmt.Errorf("warning: Found symbol out of range, %d after cut", i)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// writeCount will write the normalized histogram count to header.
|
||||
// This is read back by readNCount.
|
||||
func (s *fseEncoder) writeCount(out []byte) ([]byte, error) {
|
||||
if s.useRLE {
|
||||
return append(out, s.rleVal), nil
|
||||
}
|
||||
if s.preDefined || s.reUsed {
|
||||
// Never write predefined.
|
||||
return out, nil
|
||||
}
|
||||
|
||||
var (
|
||||
tableLog = s.actualTableLog
|
||||
tableSize = 1 << tableLog
|
||||
previous0 bool
|
||||
charnum uint16
|
||||
|
||||
// maximum header size plus 2 extra bytes for final output if bitCount == 0.
|
||||
maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + 2
|
||||
|
||||
// Write Table Size
|
||||
bitStream = uint32(tableLog - minEncTablelog)
|
||||
bitCount = uint(4)
|
||||
remaining = int16(tableSize + 1) /* +1 for extra accuracy */
|
||||
threshold = int16(tableSize)
|
||||
nbBits = uint(tableLog + 1)
|
||||
outP = len(out)
|
||||
)
|
||||
if cap(out) < outP+maxHeaderSize {
|
||||
out = append(out, make([]byte, maxHeaderSize*3)...)
|
||||
out = out[:len(out)-maxHeaderSize*3]
|
||||
}
|
||||
out = out[:outP+maxHeaderSize]
|
||||
|
||||
// stops at 1
|
||||
for remaining > 1 {
|
||||
if previous0 {
|
||||
start := charnum
|
||||
for s.norm[charnum] == 0 {
|
||||
charnum++
|
||||
}
|
||||
for charnum >= start+24 {
|
||||
start += 24
|
||||
bitStream += uint32(0xFFFF) << bitCount
|
||||
out[outP] = byte(bitStream)
|
||||
out[outP+1] = byte(bitStream >> 8)
|
||||
outP += 2
|
||||
bitStream >>= 16
|
||||
}
|
||||
for charnum >= start+3 {
|
||||
start += 3
|
||||
bitStream += 3 << bitCount
|
||||
bitCount += 2
|
||||
}
|
||||
bitStream += uint32(charnum-start) << bitCount
|
||||
bitCount += 2
|
||||
if bitCount > 16 {
|
||||
out[outP] = byte(bitStream)
|
||||
out[outP+1] = byte(bitStream >> 8)
|
||||
outP += 2
|
||||
bitStream >>= 16
|
||||
bitCount -= 16
|
||||
}
|
||||
}
|
||||
|
||||
count := s.norm[charnum]
|
||||
charnum++
|
||||
max := (2*threshold - 1) - remaining
|
||||
if count < 0 {
|
||||
remaining += count
|
||||
} else {
|
||||
remaining -= count
|
||||
}
|
||||
count++ // +1 for extra accuracy
|
||||
if count >= threshold {
|
||||
count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[
|
||||
}
|
||||
bitStream += uint32(count) << bitCount
|
||||
bitCount += nbBits
|
||||
if count < max {
|
||||
bitCount--
|
||||
}
|
||||
|
||||
previous0 = count == 1
|
||||
if remaining < 1 {
|
||||
return nil, errors.New("internal error: remaining < 1")
|
||||
}
|
||||
for remaining < threshold {
|
||||
nbBits--
|
||||
threshold >>= 1
|
||||
}
|
||||
|
||||
if bitCount > 16 {
|
||||
out[outP] = byte(bitStream)
|
||||
out[outP+1] = byte(bitStream >> 8)
|
||||
outP += 2
|
||||
bitStream >>= 16
|
||||
bitCount -= 16
|
||||
}
|
||||
}
|
||||
|
||||
if outP+2 > len(out) {
|
||||
return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen])
|
||||
}
|
||||
out[outP] = byte(bitStream)
|
||||
out[outP+1] = byte(bitStream >> 8)
|
||||
outP += int((bitCount + 7) / 8)
|
||||
|
||||
if charnum > s.symbolLen {
|
||||
return nil, errors.New("internal error: charnum > s.symbolLen")
|
||||
}
|
||||
return out[:outP], nil
|
||||
}
|
||||
|
||||
// Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
|
||||
// note 1 : assume symbolValue is valid (<= maxSymbolValue)
|
||||
// note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits *
|
||||
func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 {
|
||||
minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16
|
||||
threshold := (minNbBits + 1) << 16
|
||||
if debugAsserts {
|
||||
if !(s.actualTableLog < 16) {
|
||||
panic("!s.actualTableLog < 16")
|
||||
}
|
||||
// ensure enough room for renormalization double shift
|
||||
if !(uint8(accuracyLog) < 31-s.actualTableLog) {
|
||||
panic("!uint8(accuracyLog) < 31-s.actualTableLog")
|
||||
}
|
||||
}
|
||||
tableSize := uint32(1) << s.actualTableLog
|
||||
deltaFromThreshold := threshold - (s.ct.symbolTT[symbolValue].deltaNbBits + tableSize)
|
||||
// linear interpolation (very approximate)
|
||||
normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog
|
||||
bitMultiplier := uint32(1) << accuracyLog
|
||||
if debugAsserts {
|
||||
if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold {
|
||||
panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold")
|
||||
}
|
||||
if normalizedDeltaFromThreshold > bitMultiplier {
|
||||
panic("normalizedDeltaFromThreshold > bitMultiplier")
|
||||
}
|
||||
}
|
||||
return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold
|
||||
}
|
||||
|
||||
// Returns the cost in bits of encoding the distribution in count using ctable.
|
||||
// Histogram should only be up to the last non-zero symbol.
|
||||
// Returns an -1 if ctable cannot represent all the symbols in count.
|
||||
func (s *fseEncoder) approxSize(hist []uint32) uint32 {
|
||||
if int(s.symbolLen) < len(hist) {
|
||||
// More symbols than we have.
|
||||
return math.MaxUint32
|
||||
}
|
||||
if s.useRLE {
|
||||
// We will never reuse RLE encoders.
|
||||
return math.MaxUint32
|
||||
}
|
||||
const kAccuracyLog = 8
|
||||
badCost := (uint32(s.actualTableLog) + 1) << kAccuracyLog
|
||||
var cost uint32
|
||||
for i, v := range hist {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if s.norm[i] == 0 {
|
||||
return math.MaxUint32
|
||||
}
|
||||
bitCost := s.bitCost(uint8(i), kAccuracyLog)
|
||||
if bitCost > badCost {
|
||||
return math.MaxUint32
|
||||
}
|
||||
cost += v * bitCost
|
||||
}
|
||||
return cost >> kAccuracyLog
|
||||
}
|
||||
|
||||
// maxHeaderSize returns the maximum header size in bits.
|
||||
// This is not exact size, but we want a penalty for new tables anyway.
|
||||
func (s *fseEncoder) maxHeaderSize() uint32 {
|
||||
if s.preDefined {
|
||||
return 0
|
||||
}
|
||||
if s.useRLE {
|
||||
return 8
|
||||
}
|
||||
return (((uint32(s.symbolLen) * uint32(s.actualTableLog)) >> 3) + 3) * 8
|
||||
}
|
||||
|
||||
// cState contains the compression state of a stream.
|
||||
type cState struct {
|
||||
bw *bitWriter
|
||||
stateTable []uint16
|
||||
state uint16
|
||||
}
|
||||
|
||||
// init will initialize the compression state to the first symbol of the stream.
|
||||
func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
|
||||
c.bw = bw
|
||||
c.stateTable = ct.stateTable
|
||||
if len(c.stateTable) == 1 {
|
||||
// RLE
|
||||
c.stateTable[0] = uint16(0)
|
||||
c.state = 0
|
||||
return
|
||||
}
|
||||
nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16
|
||||
im := int32((nbBitsOut << 16) - first.deltaNbBits)
|
||||
lu := (im >> nbBitsOut) + int32(first.deltaFindState)
|
||||
c.state = c.stateTable[lu]
|
||||
return
|
||||
}
|
||||
|
||||
// encode the output symbol provided and write it to the bitstream.
|
||||
func (c *cState) encode(symbolTT symbolTransform) {
|
||||
nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16
|
||||
dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState)
|
||||
c.bw.addBits16NC(c.state, uint8(nbBitsOut))
|
||||
c.state = c.stateTable[dstState]
|
||||
}
|
||||
|
||||
// flush will write the tablelog to the output and flush the remaining full bytes.
|
||||
func (c *cState) flush(tableLog uint8) {
|
||||
c.bw.flush32()
|
||||
c.bw.addBits16NC(c.state, tableLog)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue