1
0
Fork 0
forked from forgejo/forgejo

Update to last common bleve (#3986)

This commit is contained in:
Antoine GIRARD 2018-05-19 14:49:46 +02:00 committed by Lunny Xiao
parent 1b7cd3d0b0
commit 917b9641ec
184 changed files with 39576 additions and 121 deletions

18
vendor/github.com/glycerine/go-unsnap-stream/LICENSE generated vendored Normal file
View file

@ -0,0 +1,18 @@
Copyright (c) 2014 the go-unsnap-stream authors.
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so,
subject to the following conditions:
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Permission is explicitly granted to relicense this material under new terms of
your choice when integrating this library with another library or project.

20
vendor/github.com/glycerine/go-unsnap-stream/README.md generated vendored Normal file
View file

@ -0,0 +1,20 @@
go-unsnap-stream
================
This is a small golang library for decoding and encoding the snappy *streaming* format, specified here: https://github.com/google/snappy/blob/master/framing_format.txt
Note that the *streaming or framing format* for snappy is different from snappy itself. Think of it as a train of boxcars: the streaming format breaks your data in chunks, applies snappy to each chunk alone, then puts a thin wrapper around the chunk, and sends it along in turn. You can begin decoding before receiving everything. And memory requirements for decoding are sane.
Strangely, though the streaming format was first proposed in Go[1][2], it was never upated, and I could not locate any other library for Go that would handle the streaming/framed snappy format. Hence this implementation of the spec. There is a command line tool[3] that has a C implementation, but this is the only Go implementation that I am aware of. The reference for the framing/streaming spec seems to be the python implementation[4].
For binary compatibility with the python implementation, one could use the C-snappy compressor/decompressor code directly; using github.com/dgryski/go-csnappy. In fact we did this for a while to verify byte-for-byte compatiblity, as the native Go implementation produces slightly different binary compression (still conformant with the standard of course), which made test-diffs harder, and some have complained about it being slower than the C.
However, while the c-snappy was useful for checking compatibility, it introduced dependencies on external C libraries (both the c-snappy library and the C standard library). Our go binary executable that used the go-unsnap-stream library was no longer standalone, and deployment was painful if not impossible if the target had a different C standard library. So we've gone back to using the snappy-go implementation (entirely in Go) for ease of deployment. See the comments at the top of unsnap.go if you wish to use c-snappy instead.
[1] https://groups.google.com/forum/#!msg/snappy-compression/qvLNe2cSH9s/R19oBC-p7g4J
[2] https://codereview.appspot.com/5167058
[3] https://github.com/kubo/snzip
[4] https://pypi.python.org/pypi/python-snappy

BIN
vendor/github.com/glycerine/go-unsnap-stream/binary.dat generated vendored Normal file

Binary file not shown.

Binary file not shown.

375
vendor/github.com/glycerine/go-unsnap-stream/rbuf.go generated vendored Normal file
View file

@ -0,0 +1,375 @@
package unsnap
// copyright (c) 2014, Jason E. Aten
// license: MIT
// Some text from the Golang standard library doc is adapted and
// reproduced in fragments below to document the expected behaviors
// of the interface functions Read()/Write()/ReadFrom()/WriteTo() that
// are implemented here. Those descriptions (see
// http://golang.org/pkg/io/#Reader for example) are
// copyright 2010 The Go Authors.
import "io"
// FixedSizeRingBuf:
//
// a fixed-size circular ring buffer. Yes, just what is says.
//
// We keep a pair of ping/pong buffers so that we can linearize
// the circular buffer into a contiguous slice if need be.
//
// For efficiency, a FixedSizeRingBuf may be vastly preferred to
// a bytes.Buffer. The ReadWithoutAdvance(), Advance(), and Adopt()
// methods are all non-standard methods written for speed.
//
// For an I/O heavy application, I have replaced bytes.Buffer with
// FixedSizeRingBuf and seen memory consumption go from 8GB to 25MB.
// Yes, that is a 300x reduction in memory footprint. Everything ran
// faster too.
//
// Note that Bytes(), while inescapable at times, is expensive: avoid
// it if possible. Instead it is better to use the FixedSizeRingBuf.Readable
// member to get the number of bytes available. Bytes() is expensive because
// it may copy the back and then the front of a wrapped buffer A[Use]
// into A[1-Use] in order to get a contiguous slice. If possible use ContigLen()
// first to get the size that can be read without copying, Read() that
// amount, and then Read() a second time -- to avoid the copy.
type FixedSizeRingBuf struct {
A [2][]byte // a pair of ping/pong buffers. Only one is active.
Use int // which A buffer is in active use, 0 or 1
N int // MaxViewInBytes, the size of A[0] and A[1] in bytes.
Beg int // start of data in A[Use]
Readable int // number of bytes available to read in A[Use]
OneMade bool // lazily instantiate the [1] buffer. If we never call Bytes(),
// we may never need it. If OneMade is false, the Use must be = 0.
}
func (b *FixedSizeRingBuf) Make2ndBuffer() {
if b.OneMade {
return
}
b.A[1] = make([]byte, b.N, b.N)
b.OneMade = true
}
// get the length of the largest read that we can provide to a contiguous slice
// without an extra linearizing copy of all bytes internally.
func (b *FixedSizeRingBuf) ContigLen() int {
extent := b.Beg + b.Readable
firstContigLen := intMin(extent, b.N) - b.Beg
return firstContigLen
}
func NewFixedSizeRingBuf(maxViewInBytes int) *FixedSizeRingBuf {
n := maxViewInBytes
r := &FixedSizeRingBuf{
Use: 0, // 0 or 1, whichever is actually in use at the moment.
// If we are asked for Bytes() and we wrap, linearize into the other.
N: n,
Beg: 0,
Readable: 0,
OneMade: false,
}
r.A[0] = make([]byte, n, n)
// r.A[1] initialized lazily now.
return r
}
// from the standard library description of Bytes():
// Bytes() returns a slice of the contents of the unread portion of the buffer.
// If the caller changes the contents of the
// returned slice, the contents of the buffer will change provided there
// are no intervening method calls on the Buffer.
//
func (b *FixedSizeRingBuf) Bytes() []byte {
extent := b.Beg + b.Readable
if extent <= b.N {
// we fit contiguously in this buffer without wrapping to the other
return b.A[b.Use][b.Beg:(b.Beg + b.Readable)]
}
// wrap into the other buffer
b.Make2ndBuffer()
src := b.Use
dest := 1 - b.Use
n := copy(b.A[dest], b.A[src][b.Beg:])
n += copy(b.A[dest][n:], b.A[src][0:(extent%b.N)])
b.Use = dest
b.Beg = 0
return b.A[b.Use][:n]
}
// Read():
//
// from bytes.Buffer.Read(): Read reads the next len(p) bytes
// from the buffer or until the buffer is drained. The return
// value n is the number of bytes read. If the buffer has no data
// to return, err is io.EOF (unless len(p) is zero); otherwise it is nil.
//
// from the description of the Reader interface,
// http://golang.org/pkg/io/#Reader
//
/*
Reader is the interface that wraps the basic Read method.
Read reads up to len(p) bytes into p. It returns the number
of bytes read (0 <= n <= len(p)) and any error encountered.
Even if Read returns n < len(p), it may use all of p as scratch
space during the call. If some data is available but not
len(p) bytes, Read conventionally returns what is available
instead of waiting for more.
When Read encounters an error or end-of-file condition after
successfully reading n > 0 bytes, it returns the number of bytes
read. It may return the (non-nil) error from the same call or
return the error (and n == 0) from a subsequent call. An instance
of this general case is that a Reader returning a non-zero number
of bytes at the end of the input stream may return
either err == EOF or err == nil. The next Read should
return 0, EOF regardless.
Callers should always process the n > 0 bytes returned before
considering the error err. Doing so correctly handles I/O errors
that happen after reading some bytes and also both of the
allowed EOF behaviors.
Implementations of Read are discouraged from returning a zero
byte count with a nil error, and callers should treat that
situation as a no-op.
*/
//
func (b *FixedSizeRingBuf) Read(p []byte) (n int, err error) {
return b.ReadAndMaybeAdvance(p, true)
}
// if you want to Read the data and leave it in the buffer, so as
// to peek ahead for example.
func (b *FixedSizeRingBuf) ReadWithoutAdvance(p []byte) (n int, err error) {
return b.ReadAndMaybeAdvance(p, false)
}
func (b *FixedSizeRingBuf) ReadAndMaybeAdvance(p []byte, doAdvance bool) (n int, err error) {
if len(p) == 0 {
return 0, nil
}
if b.Readable == 0 {
return 0, io.EOF
}
extent := b.Beg + b.Readable
if extent <= b.N {
n += copy(p, b.A[b.Use][b.Beg:extent])
} else {
n += copy(p, b.A[b.Use][b.Beg:b.N])
if n < len(p) {
n += copy(p[n:], b.A[b.Use][0:(extent%b.N)])
}
}
if doAdvance {
b.Advance(n)
}
return
}
//
// Write writes len(p) bytes from p to the underlying data stream.
// It returns the number of bytes written from p (0 <= n <= len(p))
// and any error encountered that caused the write to stop early.
// Write must return a non-nil error if it returns n < len(p).
//
func (b *FixedSizeRingBuf) Write(p []byte) (n int, err error) {
for {
if len(p) == 0 {
// nothing (left) to copy in; notice we shorten our
// local copy p (below) as we read from it.
return
}
writeCapacity := b.N - b.Readable
if writeCapacity <= 0 {
// we are all full up already.
return n, io.ErrShortWrite
}
if len(p) > writeCapacity {
err = io.ErrShortWrite
// leave err set and
// keep going, write what we can.
}
writeStart := (b.Beg + b.Readable) % b.N
upperLim := intMin(writeStart+writeCapacity, b.N)
k := copy(b.A[b.Use][writeStart:upperLim], p)
n += k
b.Readable += k
p = p[k:]
// we can fill from b.A[b.Use][0:something] from
// p's remainder, so loop
}
}
// WriteTo and ReadFrom avoid intermediate allocation and copies.
// WriteTo writes data to w until there's no more data to write
// or when an error occurs. The return value n is the number of
// bytes written. Any error encountered during the write is also returned.
func (b *FixedSizeRingBuf) WriteTo(w io.Writer) (n int64, err error) {
if b.Readable == 0 {
return 0, io.EOF
}
extent := b.Beg + b.Readable
firstWriteLen := intMin(extent, b.N) - b.Beg
secondWriteLen := b.Readable - firstWriteLen
if firstWriteLen > 0 {
m, e := w.Write(b.A[b.Use][b.Beg:(b.Beg + firstWriteLen)])
n += int64(m)
b.Advance(m)
if e != nil {
return n, e
}
// all bytes should have been written, by definition of
// Write method in io.Writer
if m != firstWriteLen {
return n, io.ErrShortWrite
}
}
if secondWriteLen > 0 {
m, e := w.Write(b.A[b.Use][0:secondWriteLen])
n += int64(m)
b.Advance(m)
if e != nil {
return n, e
}
// all bytes should have been written, by definition of
// Write method in io.Writer
if m != secondWriteLen {
return n, io.ErrShortWrite
}
}
return n, nil
}
// ReadFrom() reads data from r until EOF or error. The return value n
// is the number of bytes read. Any error except io.EOF encountered
// during the read is also returned.
func (b *FixedSizeRingBuf) ReadFrom(r io.Reader) (n int64, err error) {
for {
writeCapacity := b.N - b.Readable
if writeCapacity <= 0 {
// we are all full
return n, nil
}
writeStart := (b.Beg + b.Readable) % b.N
upperLim := intMin(writeStart+writeCapacity, b.N)
m, e := r.Read(b.A[b.Use][writeStart:upperLim])
n += int64(m)
b.Readable += m
if e == io.EOF {
return n, nil
}
if e != nil {
return n, e
}
}
}
func (b *FixedSizeRingBuf) Reset() {
b.Beg = 0
b.Readable = 0
b.Use = 0
}
// Advance(): non-standard, but better than Next(),
// because we don't have to unwrap our buffer and pay the cpu time
// for the copy that unwrapping may need.
// Useful in conjuction/after ReadWithoutAdvance() above.
func (b *FixedSizeRingBuf) Advance(n int) {
if n <= 0 {
return
}
if n > b.Readable {
n = b.Readable
}
b.Readable -= n
b.Beg = (b.Beg + n) % b.N
}
// Adopt(): non-standard.
//
// For efficiency's sake, (possibly) take ownership of
// already allocated slice offered in me.
//
// If me is large we will adopt it, and we will potentially then
// write to the me buffer.
// If we already have a bigger buffer, copy me into the existing
// buffer instead.
func (b *FixedSizeRingBuf) Adopt(me []byte) {
n := len(me)
if n > b.N {
b.A[0] = me
b.OneMade = false
b.N = n
b.Use = 0
b.Beg = 0
b.Readable = n
} else {
// we already have a larger buffer, reuse it.
copy(b.A[0], me)
b.Use = 0
b.Beg = 0
b.Readable = n
}
}
func intMax(a, b int) int {
if a > b {
return a
} else {
return b
}
}
func intMin(a, b int) int {
if a < b {
return a
} else {
return b
}
}
// Get the (beg, end] indices of the tailing empty buffer of bytes slice that from that is free for writing.
// Note: not guaranteed to be zeroed. At all.
func (b *FixedSizeRingBuf) GetEndmostWritable() (beg int, end int) {
extent := b.Beg + b.Readable
if extent < b.N {
return extent, b.N
}
return extent % b.N, b.Beg
}
// Note: not guaranteed to be zeroed.
func (b *FixedSizeRingBuf) GetEndmostWritableSlice() []byte {
beg, e := b.GetEndmostWritable()
return b.A[b.Use][beg:e]
}

100
vendor/github.com/glycerine/go-unsnap-stream/snap.go generated vendored Normal file
View file

@ -0,0 +1,100 @@
package unsnap
import (
"encoding/binary"
// no c lib dependency
snappy "github.com/golang/snappy"
// or, use the C wrapper for speed
//snappy "github.com/dgryski/go-csnappy"
)
// add Write() method for SnappyFile (see unsnap.go)
// reference for snappy framing/streaming format:
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
// ?spec=svn68&r=71
//
// Write writes len(p) bytes from p to the underlying data stream.
// It returns the number of bytes written from p (0 <= n <= len(p)) and
// any error encountered that caused the write to stop early. Write
// must return a non-nil error if it returns n < len(p).
//
func (sf *SnappyFile) Write(p []byte) (n int, err error) {
if sf.SnappyEncodeDecodeOff {
return sf.Writer.Write(p)
}
if !sf.Writing {
panic("Writing on a read-only SnappyFile")
}
// encoding in snappy can apparently go beyond the original size, beware.
// so our buffers must be sized 2*max snappy chunk => 2 * CHUNK_MAX(65536)
sf.DecBuf.Reset()
sf.EncBuf.Reset()
if !sf.HeaderChunkWritten {
sf.HeaderChunkWritten = true
_, err = sf.Writer.Write(SnappyStreamHeaderMagic)
if err != nil {
return
}
}
var chunk []byte
var chunk_type byte
var crc uint32
for len(p) > 0 {
// chunk points to input p by default, unencoded input.
chunk = p[:IntMin(len(p), CHUNK_MAX)]
crc = masked_crc32c(chunk)
writeme := chunk[:]
// first write to EncBuf, as a temp, in case we want
// to discard and send uncompressed instead.
compressed_chunk := snappy.Encode(sf.EncBuf.GetEndmostWritableSlice(), chunk)
if len(compressed_chunk) <= int((1-_COMPRESSION_THRESHOLD)*float64(len(chunk))) {
writeme = compressed_chunk
chunk_type = _COMPRESSED_CHUNK
} else {
// keep writeme pointing at original chunk (uncompressed)
chunk_type = _UNCOMPRESSED_CHUNK
}
const crc32Sz = 4
var tag32 uint32 = uint32(chunk_type) + (uint32(len(writeme)+crc32Sz) << 8)
err = binary.Write(sf.Writer, binary.LittleEndian, tag32)
if err != nil {
return
}
err = binary.Write(sf.Writer, binary.LittleEndian, crc)
if err != nil {
return
}
_, err = sf.Writer.Write(writeme)
if err != nil {
return
}
n += len(chunk)
p = p[len(chunk):]
}
return n, nil
}
func IntMin(a int, b int) int {
if a < b {
return a
}
return b
}

View file

@ -0,0 +1 @@
hello_snappy

Binary file not shown.

513
vendor/github.com/glycerine/go-unsnap-stream/unsnap.go generated vendored Normal file
View file

@ -0,0 +1,513 @@
package unsnap
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"io/ioutil"
"os"
"hash/crc32"
snappy "github.com/golang/snappy"
// The C library can be used, but this makes the binary dependent
// lots of extraneous c-libraries; it is no longer stand-alone. Yuck.
//
// Therefore we comment out the "dgryski/go-csnappy" path and use the
// "github.com/golang/snappy/snappy" above instead. If you are
// performance limited and can deal with distributing more libraries,
// then this is easy to swap.
//
// If you swap, note that some of the tests won't pass
// because snappy-go produces slightly different (but still
// conformant) encodings on some data. Here are bindings
// to the C-snappy:
// snappy "github.com/dgryski/go-csnappy"
)
// SnappyFile: create a drop-in-replacement/wrapper for an *os.File that handles doing the unsnappification online as more is read from it
type SnappyFile struct {
Fname string
Reader io.Reader
Writer io.Writer
// allow clients to substitute us for an os.File and just switch
// off compression if they don't want it.
SnappyEncodeDecodeOff bool // if true, we bypass straight to Filep
EncBuf FixedSizeRingBuf // holds any extra that isn't yet returned, encoded
DecBuf FixedSizeRingBuf // holds any extra that isn't yet returned, decoded
// for writing to stream-framed snappy
HeaderChunkWritten bool
// Sanity check: we can only read, or only write, to one SnappyFile.
// EncBuf and DecBuf are used differently in each mode. Verify
// that we are consistent with this flag.
Writing bool
}
var total int
// for debugging, show state of buffers
func (f *SnappyFile) Dump() {
fmt.Printf("EncBuf has length %d and contents:\n%s\n", len(f.EncBuf.Bytes()), string(f.EncBuf.Bytes()))
fmt.Printf("DecBuf has length %d and contents:\n%s\n", len(f.DecBuf.Bytes()), string(f.DecBuf.Bytes()))
}
func (f *SnappyFile) Read(p []byte) (n int, err error) {
if f.SnappyEncodeDecodeOff {
return f.Reader.Read(p)
}
if f.Writing {
panic("Reading on a write-only SnappyFile")
}
// before we unencrypt more, try to drain the DecBuf first
n, _ = f.DecBuf.Read(p)
if n > 0 {
total += n
return n, nil
}
//nEncRead, nDecAdded, err := UnsnapOneFrame(f.Filep, &f.EncBuf, &f.DecBuf, f.Fname)
_, _, err = UnsnapOneFrame(f.Reader, &f.EncBuf, &f.DecBuf, f.Fname)
if err != nil && err != io.EOF {
panic(err)
}
n, _ = f.DecBuf.Read(p)
if n > 0 {
total += n
return n, nil
}
if f.DecBuf.Readable == 0 {
if f.DecBuf.Readable == 0 && f.EncBuf.Readable == 0 {
// only now (when EncBuf is empty) can we give io.EOF.
// Any earlier, and we leave stuff un-decoded!
return 0, io.EOF
}
}
return 0, nil
}
func Open(name string) (file *SnappyFile, err error) {
fp, err := os.Open(name)
if err != nil {
return nil, err
}
// encoding in snappy can apparently go beyond the original size, so
// we make our buffers big enough, 2*max snappy chunk => 2 * CHUNK_MAX(65536)
snap := NewReader(fp)
snap.Fname = name
return snap, nil
}
func NewReader(r io.Reader) *SnappyFile {
return &SnappyFile{
Reader: r,
EncBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snappy encoded bytes
DecBuf: *NewFixedSizeRingBuf(CHUNK_MAX * 2), // buffer of snapppy decoded bytes
Writing: false,
}
}
func NewWriter(w io.Writer) *SnappyFile {
return &SnappyFile{
Writer: w,
EncBuf: *NewFixedSizeRingBuf(65536), // on writing: temp for testing compression
DecBuf: *NewFixedSizeRingBuf(65536 * 2), // on writing: final buffer of snappy framed and encoded bytes
Writing: true,
}
}
func Create(name string) (file *SnappyFile, err error) {
fp, err := os.Create(name)
if err != nil {
return nil, err
}
snap := NewWriter(fp)
snap.Fname = name
return snap, nil
}
func (f *SnappyFile) Close() error {
if f.Writing {
wc, ok := f.Writer.(io.WriteCloser)
if ok {
return wc.Close()
}
return nil
}
rc, ok := f.Reader.(io.ReadCloser)
if ok {
return rc.Close()
}
return nil
}
func (f *SnappyFile) Sync() error {
file, ok := f.Writer.(*os.File)
if ok {
return file.Sync()
}
return nil
}
// for an increment of a frame at a time:
// read from r into encBuf (encBuf is still encoded, thus the name), and write unsnappified frames into outDecodedBuf
// the returned n: number of bytes read from the encrypted encBuf
func UnsnapOneFrame(r io.Reader, encBuf *FixedSizeRingBuf, outDecodedBuf *FixedSizeRingBuf, fname string) (nEnc int64, nDec int64, err error) {
// b, err := ioutil.ReadAll(r)
// if err != nil {
// panic(err)
// }
nEnc = 0
nDec = 0
// read up to 65536 bytes from r into encBuf, at least a snappy frame
nread, err := io.CopyN(encBuf, r, 65536) // returns nwrotebytes, err
nEnc += nread
if err != nil {
if err == io.EOF {
if nread == 0 {
if encBuf.Readable == 0 {
return nEnc, nDec, io.EOF
}
// else we have bytes in encBuf, so decode them!
err = nil
} else {
// continue below, processing the nread bytes
err = nil
}
} else {
panic(err)
}
}
// flag for printing chunk size alignment messages
verbose := false
const snappyStreamHeaderSz = 10
const headerSz = 4
const crc32Sz = 4
// the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
chunk := (*encBuf).Bytes()
// however we exit, advance as
// defer func() { (*encBuf).Next(N) }()
// 65536 is the max size of a snappy framed chunk. See
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
// buf := make([]byte, 65536)
// fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
// fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)
//fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])
fourbytes := make([]byte, 4)
chunkCount := 0
for nDec < 65536 {
if len(chunk) == 0 {
break
}
chunkCount++
fourbytes[3] = 0
copy(fourbytes, chunk[1:4])
chunksz := binary.LittleEndian.Uint32(fourbytes)
chunk_type := chunk[0]
switch true {
case chunk_type == 0xff:
{ // stream identifier
streamHeader := chunk[:snappyStreamHeaderSz]
if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {
panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")
} else {
//fmt.Printf("got streaming snappy magic header just fine.\n")
}
chunk = chunk[snappyStreamHeaderSz:]
(*encBuf).Advance(snappyStreamHeaderSz)
nEnc += snappyStreamHeaderSz
continue
}
case chunk_type == 0x00:
{ // compressed data
if verbose {
fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4)
}
crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
dec, ok := snappy.Decode(nil, section)
if ok != nil {
// we've probably truncated a snappy frame at this point
// ok=snappy: corrupt input
// len(dec) == 0
//
panic(fmt.Sprintf("could not decode snappy stream: '%s' and len dec=%d and ok=%v\n", fname, len(dec), ok))
// get back to caller with what we've got so far
return nEnc, nDec, nil
}
// fmt.Printf("ok, b is %#v , %#v\n", ok, dec)
// spit out decoded text
// n, err := w.Write(dec)
//fmt.Printf("len(dec) = %d, outDecodedBuf.Readable=%d\n", len(dec), outDecodedBuf.Readable)
bnb := bytes.NewBuffer(dec)
n, err := io.Copy(outDecodedBuf, bnb)
if err != nil {
//fmt.Printf("got n=%d, err= %s ; when trying to io.Copy(outDecodedBuf: N=%d, Readable=%d)\n", n, err, outDecodedBuf.N, outDecodedBuf.Readable)
panic(err)
}
if n != int64(len(dec)) {
panic("could not write all bytes to outDecodedBuf")
}
nDec += n
// verify the crc32 rotated checksum
m32 := masked_crc32c(dec)
if m32 != crc {
panic(fmt.Sprintf("crc32 masked failiure. expected: %v but got: %v", crc, m32))
} else {
//fmt.Printf("\nchecksums match: %v == %v\n", crc, m32)
}
// move to next header
inc := (headerSz + int(chunksz))
chunk = chunk[inc:]
(*encBuf).Advance(inc)
nEnc += int64(inc)
continue
}
case chunk_type == 0x01:
{ // uncompressed data
//n, err := w.Write(chunk[(headerSz+crc32Sz):(headerSz + int(chunksz))])
n, err := io.Copy(outDecodedBuf, bytes.NewBuffer(chunk[(headerSz+crc32Sz):(headerSz+int(chunksz))]))
if verbose {
//fmt.Printf("debug: n=%d err=%v chunksz=%d outDecodedBuf='%v'\n", n, err, chunksz, outDecodedBuf)
}
if err != nil {
panic(err)
}
if n != int64(chunksz-crc32Sz) {
panic("could not write all bytes to stdout")
}
nDec += n
inc := (headerSz + int(chunksz))
chunk = chunk[inc:]
(*encBuf).Advance(inc)
nEnc += int64(inc)
continue
}
case chunk_type == 0xfe:
fallthrough // padding, just skip it
case chunk_type >= 0x80 && chunk_type <= 0xfd:
{ // Reserved skippable chunks
//fmt.Printf("\nin reserved skippable chunks, at nEnc=%v\n", nEnc)
inc := (headerSz + int(chunksz))
chunk = chunk[inc:]
nEnc += int64(inc)
(*encBuf).Advance(inc)
continue
}
default:
panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))
}
} // end for{}
return nEnc, nDec, err
//return int64(N), nil
}
// for whole file at once:
//
// receive on stdin a stream of bytes in the snappy-streaming framed
// format, defined here: http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
// Grab each frame, run it through the snappy decoder, and spit out
// each frame all joined back-to-back on stdout.
//
func Unsnappy(r io.Reader, w io.Writer) (err error) {
b, err := ioutil.ReadAll(r)
if err != nil {
panic(err)
}
// flag for printing chunk size alignment messages
verbose := false
const snappyStreamHeaderSz = 10
const headerSz = 4
const crc32Sz = 4
// the magic 18 bytes accounts for the snappy streaming header and the first chunks size and checksum
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
chunk := b[:]
// 65536 is the max size of a snappy framed chunk. See
// http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt:91
//buf := make([]byte, 65536)
// fmt.Printf("read from file, b is len:%d with value: %#v\n", len(b), b)
// fmt.Printf("read from file, bcut is len:%d with value: %#v\n", len(bcut), bcut)
//fmt.Printf("raw bytes of chunksz are: %v\n", b[11:14])
fourbytes := make([]byte, 4)
chunkCount := 0
for {
if len(chunk) == 0 {
break
}
chunkCount++
fourbytes[3] = 0
copy(fourbytes, chunk[1:4])
chunksz := binary.LittleEndian.Uint32(fourbytes)
chunk_type := chunk[0]
switch true {
case chunk_type == 0xff:
{ // stream identifier
streamHeader := chunk[:snappyStreamHeaderSz]
if 0 != bytes.Compare(streamHeader, []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}) {
panic("file had chunk starting with 0xff but then no magic snappy streaming protocol bytes, aborting.")
} else {
//fmt.Printf("got streaming snappy magic header just fine.\n")
}
chunk = chunk[snappyStreamHeaderSz:]
continue
}
case chunk_type == 0x00:
{ // compressed data
if verbose {
fmt.Fprintf(os.Stderr, "chunksz is %d while total bytes avail are: %d\n", int(chunksz), len(chunk)-4)
}
//crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
dec, ok := snappy.Decode(nil, section)
if ok != nil {
panic("could not decode snappy stream")
}
// fmt.Printf("ok, b is %#v , %#v\n", ok, dec)
// spit out decoded text
n, err := w.Write(dec)
if err != nil {
panic(err)
}
if n != len(dec) {
panic("could not write all bytes to stdout")
}
// TODO: verify the crc32 rotated checksum?
// move to next header
chunk = chunk[(headerSz + int(chunksz)):]
continue
}
case chunk_type == 0x01:
{ // uncompressed data
//crc := binary.LittleEndian.Uint32(chunk[headerSz:(headerSz + crc32Sz)])
section := chunk[(headerSz + crc32Sz):(headerSz + chunksz)]
n, err := w.Write(section)
if err != nil {
panic(err)
}
if n != int(chunksz-crc32Sz) {
panic("could not write all bytes to stdout")
}
chunk = chunk[(headerSz + int(chunksz)):]
continue
}
case chunk_type == 0xfe:
fallthrough // padding, just skip it
case chunk_type >= 0x80 && chunk_type <= 0xfd:
{ // Reserved skippable chunks
chunk = chunk[(headerSz + int(chunksz)):]
continue
}
default:
panic(fmt.Sprintf("unrecognized/unsupported chunk type %#v", chunk_type))
}
} // end for{}
return nil
}
// 0xff 0x06 0x00 0x00 sNaPpY
var SnappyStreamHeaderMagic = []byte{0xff, 0x06, 0x00, 0x00, 0x73, 0x4e, 0x61, 0x50, 0x70, 0x59}
const CHUNK_MAX = 65536
const _STREAM_TO_STREAM_BLOCK_SIZE = CHUNK_MAX
const _STREAM_IDENTIFIER = `sNaPpY`
const _COMPRESSED_CHUNK = 0x00
const _UNCOMPRESSED_CHUNK = 0x01
const _IDENTIFIER_CHUNK = 0xff
const _RESERVED_UNSKIPPABLE0 = 0x02 // chunk ranges are [inclusive, exclusive)
const _RESERVED_UNSKIPPABLE1 = 0x80
const _RESERVED_SKIPPABLE0 = 0x80
const _RESERVED_SKIPPABLE1 = 0xff
// the minimum percent of bytes compression must save to be enabled in automatic
// mode
const _COMPRESSION_THRESHOLD = .125
var crctab *crc32.Table
func init() {
crctab = crc32.MakeTable(crc32.Castagnoli) // this is correct table, matches the crc32c.c code used by python
}
func masked_crc32c(data []byte) uint32 {
// see the framing format specification, http://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
var crc uint32 = crc32.Checksum(data, crctab)
return (uint32((crc>>15)|(crc<<17)) + 0xa282ead8)
}
func ReadSnappyStreamCompressedFile(filename string) ([]byte, error) {
snappyFile, err := Open(filename)
if err != nil {
return []byte{}, err
}
var bb bytes.Buffer
_, err = bb.ReadFrom(snappyFile)
if err == io.EOF {
err = nil
}
if err != nil {
panic(err)
}
return bb.Bytes(), err
}