forked from forgejo/forgejo
Update to last common bleve (#3986)
This commit is contained in:
parent
1b7cd3d0b0
commit
917b9641ec
184 changed files with 39576 additions and 121 deletions
18
vendor/github.com/glycerine/go-unsnap-stream/LICENSE
generated
vendored
Normal file
18
vendor/github.com/glycerine/go-unsnap-stream/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,18 @@
|
|||
Copyright (c) 2014 the go-unsnap-stream authors.
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal in
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
||||
the Software, and to permit persons to whom the Software is furnished to do so,
|
||||
subject to the following conditions:
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
||||
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
||||
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
Permission is explicitly granted to relicense this material under new terms of
|
||||
your choice when integrating this library with another library or project.
|
20
vendor/github.com/glycerine/go-unsnap-stream/README.md
generated
vendored
Normal file
20
vendor/github.com/glycerine/go-unsnap-stream/README.md
generated
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
go-unsnap-stream
|
||||
================
|
||||
|
||||
This is a small golang library for decoding and encoding the snappy *streaming* format, specified here: https://github.com/google/snappy/blob/master/framing_format.txt
|
||||
|
||||
Note that the *streaming or framing format* for snappy is different from snappy itself. Think of it as a train of boxcars: the streaming format breaks your data in chunks, applies snappy to each chunk alone, then puts a thin wrapper around the chunk, and sends it along in turn. You can begin decoding before receiving everything. And memory requirements for decoding are sane.
|
||||
|
||||
Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4].
|
||||
|
||||
For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
|
||||
|
||||
However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead.
|
||||
|
||||
[1] https://groups.google.com/forum/#!msg/snappy-compression/qvLNe2cSH9s/R19oBC-p7g4J
|
||||
|
||||
[2] https://codereview.appspot.com/5167058
|
||||
|
||||
[3] https://github.com/kubo/snzip
|
||||
|
||||
[4] https://pypi.python.org/pypi/python-snappy
|
BIN
vendor/github.com/glycerine/go-unsnap-stream/binary.dat
generated
vendored
Normal file
BIN
vendor/github.com/glycerine/go-unsnap-stream/binary.dat
generated
vendored
Normal file
Binary file not shown.
BIN
vendor/github.com/glycerine/go-unsnap-stream/binary.dat.snappy
generated
vendored
Normal file
BIN
vendor/github.com/glycerine/go-unsnap-stream/binary.dat.snappy
generated
vendored
Normal file
Binary file not shown.
375
vendor/github.com/glycerine/go-unsnap-stream/rbuf.go
generated
vendored
Normal file
375
vendor/github.com/glycerine/go-unsnap-stream/rbuf.go
generated
vendored
Normal file
|
@ -0,0 +1,375 @@
|
|||
package unsnap
|
||||
|
||||
// copyright (c) 2014, Jason E. Aten
|
||||
// license: MIT
|
||||
|
||||
// Some text from the Golang standard library doc is adapted and
|
||||
// reproduced in fragments below to document the expected behaviors
|
||||
// of the interface functions Read()/Write()/ReadFrom()/WriteTo() that
|
||||
// are implemented here. Those descriptions (see
|
||||
// http://golang.org/pkg/io/#Reader for example) are
|
||||
// copyright 2010 The Go Authors.
|
||||
|
||||
import "io"
|
||||
|
||||
// FixedSizeRingBuf:
|
||||
//
|
||||
// a fixed-size circular ring buffer. Yes, just what is says.
|
||||
//
|
||||
// We keep a pair of ping/pong buffers so that we can linearize
|
||||
// the circular buffer into a contiguous slice if need be.
|
||||
//
|
||||
// For efficiency, a FixedSizeRingBuf may be vastly preferred to
|
||||
// a bytes.Buffer. The ReadWithoutAdvance(), Advance(), and Adopt()
|
||||
// methods are all non-standard methods written for speed.
|
||||
//
|
||||
// For an I/O heavy application, I have replaced bytes.Buffer with
|
||||
// FixedSizeRingBuf and seen memory consumption go from 8GB to 25MB.
|
||||
// Yes, that is a 300x reduction in memory footprint. Everything ran
|
||||
// faster too.
|
||||
//
|
||||
// Note that Bytes(), while inescapable at times, is expensive: avoid
|
||||
// it if possible. Instead it is better to use the FixedSizeRingBuf.Readable
|
||||
// member to get the number of bytes available. Bytes() is expensive because
|
||||
// it may copy the back and then the front of a wrapped buffer A[Use]
|
||||
// into A[1-Use] in order to get a contiguous slice. If possible use ContigLen()
|
||||
// first to get the size that can be read without copying, Read() that
|
||||
// amount, and then Read() a second time -- to avoid the copy.
|
||||
|
||||
type FixedSizeRingBuf struct {
|
||||
A [2][]byte // a pair of ping/pong buffers. Only one is active.
|
||||
Use int // which A buffer is in active use, 0 or 1
|
||||
N int // MaxViewInBytes, the size of A[0] and A[1] in bytes.
|
||||
Beg int // start of data in A[Use]
|
||||
Readable int // number of bytes available to read in A[Use]
|
||||
|
||||
OneMade bool // lazily instantiate the [1] buffer. If we never call Bytes(),
|
||||
// we may never need it. If OneMade is false, the Use must be = 0.
|
||||
}
|
||||
|
||||
func (b *FixedSizeRingBuf) Make2ndBuffer() {
|
||||
if b.OneMade {
|
||||
return
|
||||
}
|
||||
b.A[1] = make([]byte, b.N, b.N)
|
||||
b.OneMade = true
|
||||
}
|
||||
|
||||
// get the length of the largest read that we can provide to a contiguous slice
|
||||
// without an extra linearizing copy of all bytes internally.
|
||||
func (b *FixedSizeRingBuf) ContigLen() int {
|
||||
extent := b.Beg + b.Readable
|
||||
firstContigLen := intMin(extent, b.N) - b.Beg
|
||||
return firstContigLen
|
||||
}
|
||||
|
||||
func NewFixedSizeRingBuf(maxViewInBytes int) *FixedSizeRingBuf {
|
||||
n := maxViewInBytes
|
||||
r := &FixedSizeRingBuf{
|
||||
Use: 0, // 0 or 1, whichever is actually in use at the moment.
|
||||
// If we are asked for Bytes() and we wrap, linearize into the other.
|
||||
|
||||
N: n,
|
||||
Beg: 0,
|
||||
Readable: 0,
|
||||
OneMade: false,
|
||||
}
|
||||
r.A[0] = make([]byte, n, n)
|
||||
|
||||
// r.A[1] initialized lazily now.
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// from the standard library description of Bytes():
|
||||
// Bytes() returns a slice of the contents of the unread portion of the buffer.
|
||||
// If the caller changes the contents of the
|
||||
// returned slice, the contents of the buffer will change provided there
|
||||
// are no intervening method calls on the Buffer.
|
||||
//
|
||||
func (b *FixedSizeRingBuf) Bytes() []byte {
|
||||
|
||||
extent := b.Beg + b.Readable
|
||||
if extent <= b.N {
|
||||
// we fit contiguously in this buffer without wrapping to the other
|
||||
return b.A[b.Use][b.Beg:(b.Beg + b.Readable)]
|
||||
}
|
||||
|
||||
// wrap into the other buffer
|
||||
b.Make2ndBuffer()
|
||||
|
||||
src := b.Use
|
||||
dest := 1 - b.Use
|
||||
|
||||
n := copy(b.A[dest], b.A[src][b.Beg:])
|
||||
n += copy(b.A[dest][n:], b.A[src][0:(extent%b.N)])
|
||||
|
||||
b.Use = dest
|
||||
b.Beg = 0
|
||||
|
||||
return b.A[b.Use][:n]
|
||||
}
|
||||
|
||||
// Read():
|
||||
//
|
||||
// from bytes.Buffer.Read(): Read reads the next len(p) bytes
|
||||
// from the buffer or until the buffer is drained. The return
|
||||
// value n is the number of bytes read. If the buffer has no data
|
||||
// to return, err is io.EOF (unless len(p) is zero); otherwise it is nil.
|
||||
//
|
||||
// from the description of the Reader interface,
|
||||
// http://golang.org/pkg/io/#Reader
|
||||
//
|
||||
/*
|
||||
Reader is the interface that wraps the basic Read method.
|
||||
|
||||
Read reads up to len(p) bytes into p. It returns the number
|
||||
of bytes read (0 <= n <= len(p)) and any error encountered.
|
||||
Even if Read returns n < len(p), it may use all of p as scratch
|
||||
space during the call. If some data is available but not
|
||||
len(p) bytes, Read conventionally returns what is available
|
||||
instead of waiting for more.
|
||||
|
||||
When Read encounters an error or end-of-file condition after
|
||||
successfully reading n > 0 bytes, it returns the number of bytes
|
||||
read. It may return the (non-nil) error from the same call or
|
||||
return the error (and n == 0) from a subsequent call. An instance
|
||||
of this general case is that a Reader returning a non-zero number
|
||||
of bytes at the end of the input stream may return
|
||||
either err == EOF or err == nil. The next Read should
|
||||
return 0, EOF regardless.
|
||||
|
||||
Callers should always process the n > 0 bytes returned before
|
||||
considering the error err. Doing so correctly handles I/O errors
|
||||
that happen after reading some bytes and also both of the
|
||||
allowed EOF behaviors.
|
||||
|
||||
Implementations of Read are discouraged from returning a zero
|
||||
byte count with a nil error, and callers should treat that
|
||||
situation as a no-op.
|
||||
*/
|
||||
//
|
||||
|
||||
func (b *FixedSizeRingBuf) Read(p []byte) (n int, err error) {
|
||||
return b.ReadAndMaybeAdvance(p, true)
|
||||
}
|
||||
|
||||
// if you want to Read the data and leave it in the buffer, so as
|
||||
// to peek ahead for example.
|
||||
func (b *FixedSizeRingBuf) ReadWithoutAdvance(p []byte) (n int, err error) {
|
||||
return b.ReadAndMaybeAdvance(p, false)
|
||||
}
|
||||
|
||||
func (b *FixedSizeRingBuf) ReadAndMaybeAdvance(p []byte, doAdvance bool) (n int, err error) {
|
||||
if len(p) == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
if b.Readable == 0 {
|
||||
return 0, io.EOF
|
||||
}
|
||||
extent := b.Beg + b.Readable
|
||||
if extent <= b.N {
|
||||
n += copy(p, b.A[b.Use][b.Beg:extent])
|
||||
} else {
|
||||
n += copy(p, b.A[b.Use][b.Beg:b.N])
|
||||
if n < len(p) {
|
||||
n += copy(p[n:], b.A[b.Use][0:(extent%b.N)])
|
||||
}
|
||||
}
|
||||
if doAdvance {
|
||||
b.Advance(n)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
//
|
||||
// Write writes len(p) bytes from p to the underlying data stream.
|
||||
// It returns the number of bytes written from p (0 <= n <= len(p))
|
||||
// and any error encountered that caused the write to stop early.
|
||||
// Write must return a non-nil error if it returns n < len(p).
|
||||
//
|
||||
func (b *FixedSizeRingBuf) Write(p []byte) (n int, err error) {
|
||||
for {
|
||||
if len(p) == 0 {
|
||||
// nothing (left) to copy in; notice we shorten our
|
||||
// local copy p (below) as we read from it.
|
||||
return
|
||||
}
|
||||
|
||||
writeCapacity := b.N - b.Readable
|
||||
if writeCapacity <= 0 {
|
||||
// we are all full up already.
|
||||
return n, io.ErrShortWrite
|
||||
}
|
||||
if len(p) > writeCapacity {
|
||||
err = io.ErrShortWrite
|
||||
// leave err set and
|
||||
// keep going, write what we can.
|
||||
}
|
||||
|
||||
writeStart := (b.Beg + b.Readable) % b.N
|
||||
|
||||
upperLim := intMin(writeStart+writeCapacity, b.N)
|
||||
|
||||
k := copy(b.A[b.Use][writeStart:upperLim], p)
|
||||
|
||||
n += k
|
||||
b.Readable += k
|
||||
p = p[k:]
|
||||
|
||||
// we can fill from b.A[b.Use][0:something] from
|
||||
// p's remainder, so loop
|
||||
}
|
||||
}
|
||||
|
||||
// WriteTo and ReadFrom avoid intermediate allocation and copies.
|
||||
|
||||
// WriteTo writes data to w until there's no more data to write
|
||||
// or when an error occurs. The return value n is the number of
|
||||
// bytes written. Any error encountered during the write is also returned.
|
||||
func (b *FixedSizeRingBuf) WriteTo(w io.Writer) (n int64, err error) {
|
||||
|
||||
if b.Readable == 0 {
|
||||
return 0, io.EOF
|
||||
}
|
||||
|
||||
extent := b.Beg + b.Readable
|
||||
firstWriteLen := intMin(extent, b.N) - b.Beg
|
||||
secondWriteLen := b.Readable - firstWriteLen
|
||||
if firstWriteLen > 0 {
|
||||
m, e := w.Write(b.A[b.Use][b.Beg:(b.Beg + firstWriteLen)])
|
||||
n += int64(m)
|
||||
b.Advance(m)
|
||||
|
||||
if e != nil {
|
||||
return n, e
|
||||
}
|
||||
// all bytes should have been written, by definition of
|
||||
// Write method in io.Writer
|
||||
if m != firstWriteLen {
|
||||
return n, io.ErrShortWrite
|
||||
}
|
||||
}
|
||||
if secondWriteLen > 0 {
|
||||
m, e := w.Write(b.A[b.Use][0:secondWriteLen])
|
||||
n += int64(m)
|
||||
b.Advance(m)
|
||||
|
||||
if e != nil {
|
||||
return n, e
|
||||
}
|
||||
// all bytes should have been written, by definition of
|
||||
// Write method in io.Writer
|
||||
if m != secondWriteLen {
|
||||
return n, io.ErrShortWrite
|
||||
}
|
||||
}
|
||||
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// ReadFrom() reads data from r until EOF or error. The return value n
|
||||
// is the number of bytes read. Any error except io.EOF encountered
|
||||
// during the read is also returned.
|
||||
func (b *FixedSizeRingBuf) ReadFrom(r io.Reader) (n int64, err error) {
|
||||
for {
|
||||
writeCapacity := b.N - b.Readable
|
||||
if writeCapacity <= 0 {
|
||||
// we are all full
|
||||
return n, nil
|
||||
}
|
||||
writeStart := (b.Beg + b.Readable) % b.N
|
||||
upperLim := intMin(writeStart+writeCapacity, b.N)
|
||||
|
||||
m, e := r.Read(b.A[b.Use][writeStart:upperLim])
|
||||
n += int64(m)
|
||||
b.Readable += m
|
||||
if e == io.EOF {
|
||||
return n, nil
|
||||
}
|
||||
if e != nil {
|
||||
return n, e
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (b *FixedSizeRingBuf) Reset() {
|
||||
b.Beg = 0
|
||||
b.Readable = 0
|
||||
b.Use = 0
|
||||
}
|
||||
|
||||
// Advance(): non-standard, but better than Next(),
|
||||
// because we don't have to unwrap our buffer and pay the cpu time
|
||||
// for the copy that unwrapping may need.
|
||||
// Useful in conjuction/after ReadWithoutAdvance() above.
|
||||
func (b *FixedSizeRingBuf) Advance(n int) {
|
||||
if n <= 0 {
|
||||
return
|
||||
}
|
||||
if n > b.Readable {
|
||||
n = b.Readable
|
||||
}
|
||||
b.Readable -= n
|
||||
b.Beg = (b.Beg + n) % b.N
|
||||
}
|
||||
|
||||
// Adopt(): non-standard.
|
||||
//
|
||||
// For efficiency's sake, (possibly) take ownership of
|
||||
// already allocated slice offered in me.
|
||||
//
|
||||
// If me is large we will adopt it, and we will potentially then
|
||||
// write to the me buffer.
|
||||
// If we already have a bigger buffer, copy me into the existing
|
||||
// buffer instead.
|
||||
func (b *FixedSizeRingBuf) Adopt(me []byte) {
|
||||
n := len(me)
|
||||
if n > b.N {
|
||||
b.A[0] = me
|
||||
b.OneMade = false
|
||||
b.N = n
|
||||
b.Use = 0
|
||||
b.Beg = 0
|
||||
b.Readable = n
|
||||
} else {
|
||||
// we already have a larger buffer, reuse it.
|
||||
copy(b.A[0], me)
|
||||
b.Use = 0
|
||||
b.Beg = 0
|
||||
b.Readable = n
|
||||
}
|
||||
}
|
||||
|
||||
func intMax(a, b int) int {
|
||||
if a > b {
|
||||
return a
|
||||
} else {
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
func intMin(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
} else {
|
||||
return b
|
||||
}
|
||||
}
|
||||
|
||||
// Get the (beg, end] indices of the tailing empty buffer of bytes slice that from that is free for writing.
|
||||
// Note: not guaranteed to be zeroed. At all.
|
||||
func (b *FixedSizeRingBuf) GetEndmostWritable() (beg int, end int) {
|
||||
extent := b.Beg + b.Readable
|
||||
if extent < b.N {
|
||||
return extent, b.N
|
||||
}
|
||||
|
||||
return extent % b.N, b.Beg
|
||||
}
|
||||
|
||||
// Note: not guaranteed to be zeroed.
|
||||
func (b *FixedSizeRingBuf) GetEndmostWritableSlice() []byte {
|
||||
beg, e := b.GetEndmostWritable()
|
||||
return b.A[b.Use][beg:e]
|
||||
}
|
100
vendor/github.com/glycerine/go-unsnap-stream/snap.go
generated
vendored
Normal file
100
vendor/github.com/glycerine/go-unsnap-stream/snap.go
generated
vendored
Normal file
|
@ -0,0 +1,100 @@
|
|||
package unsnap
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
|
||||
// no c lib dependency
|
||||
snappy "github.com/golang/snappy"
|
||||
// or, use the C wrapper for speed
|
||||
//snappy "github.com/dgryski/go-csnappy"
|
||||
)
|
||||
|
||||
// add Write() method for SnappyFile (see unsnap.go)
|
||||
|
||||
// reference for snappy framing/streaming format:
|
||||
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
|
||||
// ?spec=svn68&r=71
|
||||
|
||||
//
|
||||
// Write writes len(p) bytes from p to the underlying data stream.
|
||||
// It returns the number of bytes written from p (0 <= n <= len(p)) and
|
||||
// any error encountered that caused the write to stop early. Write
|
||||
// must return a non-nil error if it returns n < len(p).
|
||||
//
|
||||
func (sf *SnappyFile) Write(p []byte) (n int, err error) {
|
||||
|
||||
if sf.SnappyEncodeDecodeOff {
|
||||
return sf.Writer.Write(p)
|
||||
}
|
||||
|
||||
if !sf.Writing {
|
||||
panic("Writing on a read-only SnappyFile")
|
||||
}
|
||||
|
||||
// encoding in snappy can apparently go beyond the original size, beware.
|
||||
// so our buffers must be sized 2*max snappy chunk => 2 * CHUNK_MAX(65536)
|
||||
|
||||
sf.DecBuf.Reset()
|
||||
sf.EncBuf.Reset()
|
||||
|
||||
if !sf.HeaderChunkWritten {
|
||||
sf.HeaderChunkWritten = true
|
||||
_, err = sf.Writer.Write(SnappyStreamHeaderMagic)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
var chunk []byte
|
||||
var chunk_type byte
|
||||
var crc uint32
|
||||
|
||||
for len(p) > 0 {
|
||||
|
||||
// chunk points to input p by default, unencoded input.
|
||||
chunk = p[:IntMin(len(p), CHUNK_MAX)]
|
||||
crc = masked_crc32c(chunk)
|
||||
|
||||
writeme := chunk[:]
|
||||
|
||||
// first write to EncBuf, as a temp, in case we want
|
||||
// to discard and send uncompressed instead.
|
||||
compressed_chunk := snappy.Encode(sf.EncBuf.GetEndmostWritableSlice(), chunk)
|
||||
|
||||
if len(compressed_chunk) <= int((1-_COMPRESSION_THRESHOLD)*float64(len(chunk))) {
|
||||
writeme = compressed_chunk
|
||||
chunk_type = _COMPRESSED_CHUNK
|
||||
} else {
|
||||
// keep writeme pointing at original chunk (uncompressed)
|
||||
chunk_type = _UNCOMPRESSED_CHUNK
|
||||
}
|
||||
|
||||
const crc32Sz = 4
|
||||
var tag32 uint32 = uint32(chunk_type) + (uint32(len(writeme)+crc32Sz) << 8)
|
||||
|
||||
err = binary.Write(sf.Writer, binary.LittleEndian, tag32)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
err = binary.Write(sf.Writer, binary.LittleEndian, crc)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
_, err = sf.Writer.Write(writeme)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
n += len(chunk)
|
||||
p = p[len(chunk):]
|
||||
}
|
||||
return n, nil
|
||||
}
|
||||
|
||||
func IntMin(a int, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
1
vendor/github.com/glycerine/go-unsnap-stream/unenc.txt
generated
vendored
Normal file
1
vendor/github.com/glycerine/go-unsnap-stream/unenc.txt
generated
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
hello_snappy
|
BIN
vendor/github.com/glycerine/go-unsnap-stream/unenc.txt.snappy
generated
vendored
Normal file
BIN
vendor/github.com/glycerine/go-unsnap-stream/unenc.txt.snappy
generated
vendored
Normal file
Binary file not shown.
513
vendor/github.com/glycerine/go-unsnap-stream/unsnap.go
generated
vendored
Normal file
513
vendor/github.com/glycerine/go-unsnap-stream/unsnap.go
generated
vendored
Normal file
|
@ -0,0 +1,513 @@
|
|||
package unsnap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
"hash/crc32"
|
||||
|
||||
snappy "github.com/golang/snappy"
|
||||
// The C library can be used, but this makes the binary dependent
|
||||
// lots of extraneous c-libraries; it is no longer stand-alone. Yuck.
|
||||
//
|
||||
// Therefore we comment out the "dgryski/go-csnappy" path and use the
|
||||
// "github.com/golang/snappy/snappy" above instead. If you are
|
||||
// performance limited and can deal with distributing more libraries,
|
||||
// then this is easy to swap.
|
||||
//
|
||||
// If you swap, note that some of the tests won't pass
|
||||
// because snappy-go produces slightly different (but still
|
||||
// conformant) encodings on some data. Here are bindings
|
||||
// to the C-snappy:
|
||||
// snappy "github.com/dgryski/go-csnappy"
|
||||
)
|
||||
|
||||
// SnappyFile: create a drop-in-replacement/wrapper for an *os.File that handles doing the unsnappification online as more is read from it
|
||||
|
||||
type SnappyFile struct {
|
||||
Fname string
|
||||
|
||||
Reader io.Reader
|
||||
Writer io.Writer
|
||||
|
||||
// allow clients to substitute us for an os.File and just switch
|
||||
// off compression if they don't want it.
|
||||
SnappyEncodeDecodeOff bool // if true, we bypass straight to Filep
|
||||
|
||||
EncBuf FixedSizeRingBuf // holds any extra that isn't yet returned, encoded
|
||||
DecBuf FixedSizeRingBuf // holds any extra that isn't yet returned, decoded
|
||||
|
||||
// for writing to stream-framed snappy
|
||||
HeaderChunkWritten bool
|
||||
|
||||
// Sanity check: we can only read, or only write, to one SnappyFile.
|
||||
// EncBuf and DecBuf are used differently in each mode. Verify
|
||||
// that we are consistent with this flag.
|
||||
Writing bool
|
||||
}
|
||||
|
||||
var total int
|
||||
|
||||
// for debugging, show state of buffers
|
||||
func (f *SnappyFile) Dump() {
|
||||
fmt.Printf("EncBuf has length %d and contents:\n%s\n", len(f.EncBuf.Bytes()), string(f.EncBuf.Bytes()))
|
||||
fmt.Printf("DecBuf has length %d and contents:\n%s\n", len(f.DecBuf.Bytes()), string(f.DecBuf.Bytes()))
|
||||
}
|
||||
|
||||
func (f *SnappyFile) Read(p []byte) (n int, err error) {
|
||||
|
||||
if f.SnappyEncodeDecodeOff {
|
||||
return f.Reader.Read(p)
|
||||
}
|
||||
|
||||
if f.Writing {
|
||||
panic("Reading on a write-only SnappyFile")
|
||||
}
|
||||
|
||||
// before we unencrypt more, try to drain the DecBuf first
|
||||
n, _ = f.DecBuf.Read(p)
|
||||
if n > 0 {
|
||||
total += n
|
||||
return n, nil
|
||||
}
|
||||
|
||||
//nEncRead, nDecAdded, err := UnsnapOneFrame(f.Filep, &f.EncBuf, &f.DecBuf, f.Fname)
|
||||
_, _, err = UnsnapOneFrame(f.Reader, &f.EncBuf, &f.DecBuf, f.Fname)
|
||||
if err != nil && err != io.EOF {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
n, _ = f.DecBuf.Read(p)
|
||||
|
||||
if n > 0 {
|
||||
total += n
|
||||
return n, nil
|
||||
}
|
||||
if f.DecBuf.Readable == 0 {
|
||||
if f.DecBuf.Readable == 0 && f.EncBuf.Readable == 0 {
|
||||
// only now (when EncBuf is empty) can we give io.EOF.
|
||||
// Any earlier, and we leave stuff un-decoded!
|
||||
return 0, io.EOF
|
||||
}
|
||||
}
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
func Open(name string) (file *SnappyFile, err error) {
|
||||
fp, err := os.Open(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// encoding in snappy can apparently go beyond the original size, so
|
||||
// we make our buffers big enough, 2*max snappy chunk => 2 * CHUNK_MAX(65536)
|
||||
|
||||
snap := NewReader(fp)
|
||||
snap.Fname = name
|
||||
return snap, nil
|
||||
}
|
||||
|
||||
func NewReader(r io.Reader) *SnappyFile {
|
||||
return &SnappyFile{
|
||||
Reader: r,
|
||||
EncBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snappy encoded bytes
|
||||
DecBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snapppy decoded bytes
|
||||
Writing: false,
|
||||
}
|
||||
}
|
||||
|
||||
func NewWriter(w io.Writer) *SnappyFile {
|
||||
return &SnappyFile{
|
||||
Writer: w,
|
||||
EncBuf: *NewFixedSizeRingBuf(65536), // on writing: temp for testing compression
|
||||
DecBuf: *NewFixedSizeRingBuf(65536 * 2), // on writing: final buffer of snappy framed and encoded bytes
|
||||
Writing: true,
|
||||
}
|
||||
}
|
||||
|
||||
func Create(name string) (file *SnappyFile, err error) {
|
||||
fp, err := os.Create(name)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
snap := NewWriter(fp)
|
||||
snap.Fname = name
|
||||
return snap, nil
|
||||
}
|
||||
|
||||
func (f *SnappyFile) Close() error {
|
||||
if f.Writing {
|
||||
wc, ok := f.Writer.(io.WriteCloser)
|
||||
if ok {
|
||||
return wc.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
rc, ok := f.Reader.(io.ReadCloser)
|
||||
if ok {
|
||||
return rc.Close()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (f *SnappyFile) Sync() error {
|
||||
file, ok := f.Writer.(*os.File)
|
||||
if ok {
|
||||
return file.Sync()
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// for an increment of a frame at a time:
|
||||
// read from r into encBuf (encBuf is still encoded, thus the name), and write unsnappified frames into outDecodedBuf
|
||||
// the returned n: number of bytes read from the encrypted encBuf
|
||||
func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedSizeRingBuf, fname string) (nEnc int64, nDec int64, err error) {
|
||||
// b, err := ioutil.ReadAll(r)
|
||||
// if err != nil {
|
||||
// panic(err)
|
||||
// }
|
||||
|
||||
nEnc = 0
|
||||
nDec = 0
|
||||
|
||||
// read up to 65536 bytes from r into encBuf, at least a snappy frame
|
||||
nread, err := io.CopyN(encBuf, r, 65536) // returns nwrotebytes, err
|
||||
nEnc += nread
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
if nread == 0 {
|
||||
if encBuf.Readable == 0 {
|
||||
return nEnc, nDec, io.EOF
|
||||
}
|
||||
// else we have bytes in encBuf, so decode them!
|
||||
err = nil
|
||||
} else {
|
||||
// continue below, processing the nread bytes
|
||||
err = nil
|
||||
}
|
||||
} else {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// flag for printing chunk size alignment messages
|
||||
verbose := false
|
||||
|
||||
const snappyStreamHeaderSz = 10
|
||||
const headerSz = 4
|
||||
const crc32Sz = 4
|
||||
// the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
|
||||
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
|
||||
|
||||
chunk := (*encBuf).Bytes()
|
||||
|
||||
// however we exit, advance as
|
||||
// defer func() { (*encBuf).Next(N) }()
|
||||
|
||||
// 65536 is the max size of a snappy framed chunk. See
|
||||
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
|
||||
// buf := make([]byte, 65536)
|
||||
|
||||
// fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
|
||||
// fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)
|
||||
|
||||
//fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])
|
||||
|
||||
fourbytes := make([]byte, 4)
|
||||
chunkCount := 0
|
||||
|
||||
for nDec < 65536 {
|
||||
if len(chunk) == 0 {
|
||||
break
|
||||
}
|
||||
chunkCount++
|
||||
fourbytes[3] = 0
|
||||
copy(fourbytes, chunk[1:4])
|
||||
chunksz := binary.LittleEndian.Uint32(fourbytes)
|
||||
chunk_type := chunk[0]
|
||||
|
||||
switch true {
|
||||
case chunk_type == 0xff:
|
||||
{ // stream identifier
|
||||
|
||||
streamHeader := chunk[:snappyStreamHeaderSz]
|
||||
if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {
|
||||
panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")
|
||||
} else {
|
||||
//fmt.Printf("got streaming snappy magic header just fine.\n")
|
||||
}
|
||||
chunk = chunk[snappyStreamHeaderSz:]
|
||||
(*encBuf).Advance(snappyStreamHeaderSz)
|
||||
nEnc += snappyStreamHeaderSz
|
||||
continue
|
||||
}
|
||||
case chunk_type == 0x00:
|
||||
{ // compressed data
|
||||
if verbose {
|
||||
fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4)
|
||||
}
|
||||
|
||||
crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
|
||||
section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
|
||||
|
||||
dec, ok := snappy.Decode(nil, section)
|
||||
if ok != nil {
|
||||
// we've probably truncated a snappy frame at this point
|
||||
// ok=snappy: corrupt input
|
||||
// len(dec) == 0
|
||||
//
|
||||
panic(fmt.Sprintf("could not decode snappy stream: '%s' and len dec=%d and ok=%v\n", fname, len(dec), ok))
|
||||
|
||||
// get back to caller with what we've got so far
|
||||
return nEnc, nDec, nil
|
||||
}
|
||||
// fmt.Printf("ok, b is %#v , %#v\n", ok, dec)
|
||||
|
||||
// spit out decoded text
|
||||
// n, err := w.Write(dec)
|
||||
//fmt.Printf("len(dec) = %d, outDecodedBuf.Readable=%d\n", len(dec), outDecodedBuf.Readable)
|
||||
bnb := bytes.NewBuffer(dec)
|
||||
n, err := io.Copy(outDecodedBuf, bnb)
|
||||
if err != nil {
|
||||
//fmt.Printf("got n=%d, err= %s ; when trying to io.Copy(outDecodedBuf: N=%d, Readable=%d)\n", n, err, outDecodedBuf.N, outDecodedBuf.Readable)
|
||||
panic(err)
|
||||
}
|
||||
if n != int64(len(dec)) {
|
||||
panic("could not write all bytes to outDecodedBuf")
|
||||
}
|
||||
nDec += n
|
||||
|
||||
// verify the crc32 rotated checksum
|
||||
m32 := masked_crc32c(dec)
|
||||
if m32 != crc {
|
||||
panic(fmt.Sprintf("crc32 masked failiure. expected: %v but got: %v", crc, m32))
|
||||
} else {
|
||||
//fmt.Printf("\nchecksums match: %v == %v\n", crc, m32)
|
||||
}
|
||||
|
||||
// move to next header
|
||||
inc := (headerSz + int(chunksz))
|
||||
chunk = chunk[inc:]
|
||||
(*encBuf).Advance(inc)
|
||||
nEnc += int64(inc)
|
||||
continue
|
||||
}
|
||||
case chunk_type == 0x01:
|
||||
{ // uncompressed data
|
||||
|
||||
//n, err := w.Write(chunk[(headerSz+crc32Sz):(headerSz + int(chunksz))])
|
||||
n, err := io.Copy(outDecodedBuf, bytes.NewBuffer(chunk[(headerSz+crc32Sz):(headerSz+int(chunksz))]))
|
||||
if verbose {
|
||||
//fmt.Printf("debug: n=%d err=%v chunksz=%d outDecodedBuf='%v'\n", n, err, chunksz, outDecodedBuf)
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if n != int64(chunksz-crc32Sz) {
|
||||
panic("could not write all bytes to stdout")
|
||||
}
|
||||
nDec += n
|
||||
|
||||
inc := (headerSz + int(chunksz))
|
||||
chunk = chunk[inc:]
|
||||
(*encBuf).Advance(inc)
|
||||
nEnc += int64(inc)
|
||||
continue
|
||||
}
|
||||
case chunk_type == 0xfe:
|
||||
fallthrough // padding, just skip it
|
||||
case chunk_type >= 0x80 && chunk_type <= 0xfd:
|
||||
{ // Reserved skippable chunks
|
||||
//fmt.Printf("\nin reserved skippable chunks, at nEnc=%v\n", nEnc)
|
||||
inc := (headerSz + int(chunksz))
|
||||
chunk = chunk[inc:]
|
||||
nEnc += int64(inc)
|
||||
(*encBuf).Advance(inc)
|
||||
continue
|
||||
}
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))
|
||||
}
|
||||
|
||||
} // end for{}
|
||||
|
||||
return nEnc, nDec, err
|
||||
//return int64(N), nil
|
||||
}
|
||||
|
||||
// for whole file at once:
|
||||
//
|
||||
// receive on stdin a stream of bytes in the snappy-streaming framed
|
||||
// format, defined here: http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
|
||||
// Grab each frame, run it through the snappy decoder, and spit out
|
||||
// each frame all joined back-to-back on stdout.
|
||||
//
|
||||
func Unsnappy(r io.Reader, w io.Writer) (err error) {
|
||||
b, err := ioutil.ReadAll(r)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
// flag for printing chunk size alignment messages
|
||||
verbose := false
|
||||
|
||||
const snappyStreamHeaderSz = 10
|
||||
const headerSz = 4
|
||||
const crc32Sz = 4
|
||||
// the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
|
||||
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
|
||||
|
||||
chunk := b[:]
|
||||
|
||||
// 65536 is the max size of a snappy framed chunk. See
|
||||
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
|
||||
//buf := make([]byte, 65536)
|
||||
|
||||
// fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
|
||||
// fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)
|
||||
|
||||
//fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])
|
||||
|
||||
fourbytes := make([]byte, 4)
|
||||
chunkCount := 0
|
||||
|
||||
for {
|
||||
if len(chunk) == 0 {
|
||||
break
|
||||
}
|
||||
chunkCount++
|
||||
fourbytes[3] = 0
|
||||
copy(fourbytes, chunk[1:4])
|
||||
chunksz := binary.LittleEndian.Uint32(fourbytes)
|
||||
chunk_type := chunk[0]
|
||||
|
||||
switch true {
|
||||
case chunk_type == 0xff:
|
||||
{ // stream identifier
|
||||
|
||||
streamHeader := chunk[:snappyStreamHeaderSz]
|
||||
if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {
|
||||
panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")
|
||||
} else {
|
||||
//fmt.Printf("got streaming snappy magic header just fine.\n")
|
||||
}
|
||||
chunk = chunk[snappyStreamHeaderSz:]
|
||||
continue
|
||||
}
|
||||
case chunk_type == 0x00:
|
||||
{ // compressed data
|
||||
if verbose {
|
||||
fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4)
|
||||
}
|
||||
|
||||
//crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
|
||||
section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
|
||||
|
||||
dec, ok := snappy.Decode(nil, section)
|
||||
if ok != nil {
|
||||
panic("could not decode snappy stream")
|
||||
}
|
||||
// fmt.Printf("ok, b is %#v , %#v\n", ok, dec)
|
||||
|
||||
// spit out decoded text
|
||||
n, err := w.Write(dec)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if n != len(dec) {
|
||||
panic("could not write all bytes to stdout")
|
||||
}
|
||||
|
||||
// TODO: verify the crc32 rotated checksum?
|
||||
|
||||
// move to next header
|
||||
chunk = chunk[(headerSz + int(chunksz)):]
|
||||
continue
|
||||
}
|
||||
case chunk_type == 0x01:
|
||||
{ // uncompressed data
|
||||
|
||||
//crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
|
||||
section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
|
||||
|
||||
n, err := w.Write(section)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if n != int(chunksz-crc32Sz) {
|
||||
panic("could not write all bytes to stdout")
|
||||
}
|
||||
|
||||
chunk = chunk[(headerSz + int(chunksz)):]
|
||||
continue
|
||||
}
|
||||
case chunk_type == 0xfe:
|
||||
fallthrough // padding, just skip it
|
||||
case chunk_type >= 0x80 && chunk_type <= 0xfd:
|
||||
{ // Reserved skippable chunks
|
||||
chunk = chunk[(headerSz + int(chunksz)):]
|
||||
continue
|
||||
}
|
||||
|
||||
default:
|
||||
panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))
|
||||
}
|
||||
|
||||
} // end for{}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// 0xff 0x06 0x00 0x00 sNaPpY
|
||||
var SnappyStreamHeaderMagic = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}
|
||||
|
||||
const CHUNK_MAX = 65536
|
||||
const _STREAM_TO_STREAM_BLOCK_SIZE = CHUNK_MAX
|
||||
const _STREAM_IDENTIFIER = `sNaPpY`
|
||||
const _COMPRESSED_CHUNK = 0x00
|
||||
const _UNCOMPRESSED_CHUNK = 0x01
|
||||
const _IDENTIFIER_CHUNK = 0xff
|
||||
const _RESERVED_UNSKIPPABLE0 = 0x02 // chunk ranges are [inclusive, exclusive)
|
||||
const _RESERVED_UNSKIPPABLE1 = 0x80
|
||||
const _RESERVED_SKIPPABLE0 = 0x80
|
||||
const _RESERVED_SKIPPABLE1 = 0xff
|
||||
|
||||
// the minimum percent of bytes compression must save to be enabled in automatic
|
||||
// mode
|
||||
const _COMPRESSION_THRESHOLD = .125
|
||||
|
||||
var crctab *crc32.Table
|
||||
|
||||
func init() {
|
||||
crctab = crc32.MakeTable(crc32.Castagnoli) // this is correct table, matches the crc32c.c code used by python
|
||||
}
|
||||
|
||||
func masked_crc32c(data []byte) uint32 {
|
||||
|
||||
// see the framing format specification, http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
|
||||
var crc uint32 = crc32.Checksum(data, crctab)
|
||||
return (uint32((crc>>15)|(crc<<17)) + 0xa282ead8)
|
||||
}
|
||||
|
||||
func ReadSnappyStreamCompressedFile(filename string) ([]byte, error) {
|
||||
|
||||
snappyFile, err := Open(filename)
|
||||
if err != nil {
|
||||
return []byte{}, err
|
||||
}
|
||||
|
||||
var bb bytes.Buffer
|
||||
_, err = bb.ReadFrom(snappyFile)
|
||||
if err == io.EOF {
|
||||
err = nil
|
||||
}
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return bb.Bytes(), err
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue