forked from forgejo/forgejo
Update to last common bleve (#3986)
This commit is contained in:
parent
1b7cd3d0b0
commit
917b9641ec
184 changed files with 39576 additions and 121 deletions
22
vendor/github.com/Smerity/govarint/LICENSE
generated
vendored
Normal file
22
vendor/github.com/Smerity/govarint/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,22 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Stephen Merity
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
67
vendor/github.com/Smerity/govarint/README.md
generated
vendored
Normal file
67
vendor/github.com/Smerity/govarint/README.md
generated
vendored
Normal file
|
@ -0,0 +1,67 @@
|
|||
# Govarint
|
||||
|
||||
This project aims to provide a simple API for the performant encoding and decoding of 32 and 64 bit integers using a variety of algorithms.
|
||||
|
||||
[](https://www.flickr.com/photos/tsevis/8648521649/)
|
||||
|
||||
## Usage
|
||||
|
||||
Each integer encoding algorithm conforms to an encoding and decoding interface.
|
||||
The interfaces also specify the size of the unsigned integer, either 32 or 64 bits, and will be referred to as XX below.
|
||||
To create an encoder:
|
||||
|
||||
NewU32Base128Encoder(w io.Writer)
|
||||
NewU64Base128Encoder(w io.Writer)
|
||||
NewU32GroupVarintEncoder(w io.Writer)
|
||||
|
||||
For encoders, the only two commands are `PutUXX` and `Close`.
|
||||
`Close` must be called as some integer encoding algorithms write in multiples.
|
||||
|
||||
var buf bytes.Buffer
|
||||
enc := NewU32Base128Encoder(&buf)
|
||||
enc.PutU32(117)
|
||||
enc.PutU32(343)
|
||||
enc.Close()
|
||||
|
||||
To create a decoder:
|
||||
|
||||
NewU32Base128Decoder(r io.ByteReader)
|
||||
NewU64Base128Decoder(r io.ByteReader)
|
||||
NewU32GroupVarintDecoder(r io.ByteReader)
|
||||
|
||||
For decoders, the only command is `GetUXX`.
|
||||
`GetUXX` returns the value and any potential errors.
|
||||
When reading is complete, `GetUXX` will return an `EOF` (End Of File).
|
||||
|
||||
dec := NewU32Base128Decoder(&buf)
|
||||
x, err := dec.GetU32()
|
||||
|
||||
## Use Cases
|
||||
|
||||
Using fixed width integers, such as uint32 and uint64, usually waste large amounts of space, especially when encoding small values.
|
||||
Optimally, smaller numbers should take less space to represent.
|
||||
|
||||
Using integer encoding algorithms is especially common in specific applications, such as storing edge lists or indexes for search engines.
|
||||
In these situations, you have a sorted list of numbers that you want to keep as compactly as possible in memory.
|
||||
Additionally, by storing only the difference between the given number and the previous (delta encoding), the numbers are quite small, and thus compress well.
|
||||
|
||||
For an explicit example, the Web Data Commons Hyperlink Graph contains 128 billion edges linking page A to page B, where each page is represented by a 32 bit integer.
|
||||
By converting all these edges to 64 bit integers (32 | 32), sorting them, and then using delta encoding, memory usage can be reduced from 64 bits per edge down to only 9 bits per edge using the Base128 integer encoding algorithm.
|
||||
This figure improves even further if compressed using conventional compression algorithms (3 bits per edge).
|
||||
|
||||
## Encodings supported
|
||||
|
||||
`govarint` supports:
|
||||
|
||||
+ Base128 [32, 64] - each byte uses 7 bits for encoding the integer and 1 bit for indicating if the integer requires another byte
|
||||
+ Group Varint [32] - integers are encoded in blocks of four - one byte encodes the size of the following four integers, then the values of the four integers follows
|
||||
|
||||
Group Varint consistently beats Base128 in decompression speed but Base128 may offer improved compression ratios depending on the distribution of the supplied integers.
|
||||
|
||||
## Tests
|
||||
|
||||
go test -v -bench=.
|
||||
|
||||
## License
|
||||
|
||||
MIT License, as per `LICENSE`
|
229
vendor/github.com/Smerity/govarint/govarint.go
generated
vendored
Normal file
229
vendor/github.com/Smerity/govarint/govarint.go
generated
vendored
Normal file
|
@ -0,0 +1,229 @@
|
|||
package govarint
|
||||
|
||||
import "encoding/binary"
|
||||
import "io"
|
||||
|
||||
type U32VarintEncoder interface {
|
||||
PutU32(x uint32) int
|
||||
Close()
|
||||
}
|
||||
|
||||
type U32VarintDecoder interface {
|
||||
GetU32() (uint32, error)
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
type U64VarintEncoder interface {
|
||||
PutU64(x uint64) int
|
||||
Close()
|
||||
}
|
||||
|
||||
type U64VarintDecoder interface {
|
||||
GetU64() (uint64, error)
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
type U32GroupVarintEncoder struct {
|
||||
w io.Writer
|
||||
index int
|
||||
store [4]uint32
|
||||
temp [17]byte
|
||||
}
|
||||
|
||||
func NewU32GroupVarintEncoder(w io.Writer) *U32GroupVarintEncoder { return &U32GroupVarintEncoder{w: w} }
|
||||
|
||||
func (b *U32GroupVarintEncoder) Flush() (int, error) {
|
||||
// TODO: Is it more efficient to have a tailored version that's called only in Close()?
|
||||
// If index is zero, there are no integers to flush
|
||||
if b.index == 0 {
|
||||
return 0, nil
|
||||
}
|
||||
// In the case we're flushing (the group isn't of size four), the non-values should be zero
|
||||
// This ensures the unused entries are all zero in the sizeByte
|
||||
for i := b.index; i < 4; i++ {
|
||||
b.store[i] = 0
|
||||
}
|
||||
length := 1
|
||||
// We need to reset the size byte to zero as we only bitwise OR into it, we don't overwrite it
|
||||
b.temp[0] = 0
|
||||
for i, x := range b.store {
|
||||
size := byte(0)
|
||||
shifts := []byte{24, 16, 8, 0}
|
||||
for _, shift := range shifts {
|
||||
// Always writes at least one byte -- the first one (shift = 0)
|
||||
// Will write more bytes until the rest of the integer is all zeroes
|
||||
if (x>>shift) != 0 || shift == 0 {
|
||||
size += 1
|
||||
b.temp[length] = byte(x >> shift)
|
||||
length += 1
|
||||
}
|
||||
}
|
||||
// We store the size in two of the eight bits in the first byte (sizeByte)
|
||||
// 0 means there is one byte in total, hence why we subtract one from size
|
||||
b.temp[0] |= (size - 1) << (uint8(3-i) * 2)
|
||||
}
|
||||
// If we're flushing without a full group of four, remove the unused bytes we computed
|
||||
// This enables us to realize it's a partial group on decoding thanks to EOF
|
||||
if b.index != 4 {
|
||||
length -= 4 - b.index
|
||||
}
|
||||
_, err := b.w.Write(b.temp[:length])
|
||||
return length, err
|
||||
}
|
||||
|
||||
func (b *U32GroupVarintEncoder) PutU32(x uint32) (int, error) {
|
||||
bytesWritten := 0
|
||||
b.store[b.index] = x
|
||||
b.index += 1
|
||||
if b.index == 4 {
|
||||
n, err := b.Flush()
|
||||
if err != nil {
|
||||
return n, err
|
||||
}
|
||||
bytesWritten += n
|
||||
b.index = 0
|
||||
}
|
||||
return bytesWritten, nil
|
||||
}
|
||||
|
||||
func (b *U32GroupVarintEncoder) Close() {
|
||||
// On Close, we flush any remaining values that might not have been in a full group
|
||||
b.Flush()
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
type U32GroupVarintDecoder struct {
|
||||
r io.ByteReader
|
||||
group [4]uint32
|
||||
pos int
|
||||
finished bool
|
||||
capacity int
|
||||
}
|
||||
|
||||
func NewU32GroupVarintDecoder(r io.ByteReader) *U32GroupVarintDecoder {
|
||||
return &U32GroupVarintDecoder{r: r, pos: 4, capacity: 4}
|
||||
}
|
||||
|
||||
func (b *U32GroupVarintDecoder) getGroup() error {
|
||||
// We should always receive a sizeByte if there are more values to read
|
||||
sizeByte, err := b.r.ReadByte()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// Calculate the size of the four incoming 32 bit integers
|
||||
// 0b00 means 1 byte to read, 0b01 = 2, etc
|
||||
b.group[0] = uint32((sizeByte >> 6) & 3)
|
||||
b.group[1] = uint32((sizeByte >> 4) & 3)
|
||||
b.group[2] = uint32((sizeByte >> 2) & 3)
|
||||
b.group[3] = uint32(sizeByte & 3)
|
||||
//
|
||||
for index, size := range b.group {
|
||||
b.group[index] = 0
|
||||
// Any error that occurs in earlier byte reads should be repeated at the end one
|
||||
// Hence we only catch and report the final ReadByte's error
|
||||
var err error
|
||||
switch size {
|
||||
case 0:
|
||||
var x byte
|
||||
x, err = b.r.ReadByte()
|
||||
b.group[index] = uint32(x)
|
||||
case 1:
|
||||
var x, y byte
|
||||
x, _ = b.r.ReadByte()
|
||||
y, err = b.r.ReadByte()
|
||||
b.group[index] = uint32(x)<<8 | uint32(y)
|
||||
case 2:
|
||||
var x, y, z byte
|
||||
x, _ = b.r.ReadByte()
|
||||
y, _ = b.r.ReadByte()
|
||||
z, err = b.r.ReadByte()
|
||||
b.group[index] = uint32(x)<<16 | uint32(y)<<8 | uint32(z)
|
||||
case 3:
|
||||
var x, y, z, zz byte
|
||||
x, _ = b.r.ReadByte()
|
||||
y, _ = b.r.ReadByte()
|
||||
z, _ = b.r.ReadByte()
|
||||
zz, err = b.r.ReadByte()
|
||||
b.group[index] = uint32(x)<<24 | uint32(y)<<16 | uint32(z)<<8 | uint32(zz)
|
||||
}
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
// If we hit EOF here, we have found a partial group
|
||||
// We've return any valid entries we have read and return EOF once we run out
|
||||
b.capacity = index
|
||||
b.finished = true
|
||||
break
|
||||
} else {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
// Reset the pos pointer to the beginning of the read values
|
||||
b.pos = 0
|
||||
return nil
|
||||
}
|
||||
|
||||
func (b *U32GroupVarintDecoder) GetU32() (uint32, error) {
|
||||
// Check if we have any more values to give out - if not, let's get them
|
||||
if b.pos == b.capacity {
|
||||
// If finished is set, there is nothing else to do
|
||||
if b.finished {
|
||||
return 0, io.EOF
|
||||
}
|
||||
err := b.getGroup()
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
// Increment pointer and return the value stored at that point
|
||||
b.pos += 1
|
||||
return b.group[b.pos-1], nil
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
type Base128Encoder struct {
|
||||
w io.Writer
|
||||
tmpBytes []byte
|
||||
}
|
||||
|
||||
func NewU32Base128Encoder(w io.Writer) *Base128Encoder {
|
||||
return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen32)}
|
||||
}
|
||||
func NewU64Base128Encoder(w io.Writer) *Base128Encoder {
|
||||
return &Base128Encoder{w: w, tmpBytes: make([]byte, binary.MaxVarintLen64)}
|
||||
}
|
||||
|
||||
func (b *Base128Encoder) PutU32(x uint32) (int, error) {
|
||||
writtenBytes := binary.PutUvarint(b.tmpBytes, uint64(x))
|
||||
return b.w.Write(b.tmpBytes[:writtenBytes])
|
||||
}
|
||||
|
||||
func (b *Base128Encoder) PutU64(x uint64) (int, error) {
|
||||
writtenBytes := binary.PutUvarint(b.tmpBytes, x)
|
||||
return b.w.Write(b.tmpBytes[:writtenBytes])
|
||||
}
|
||||
|
||||
func (b *Base128Encoder) Close() {
|
||||
}
|
||||
|
||||
///
|
||||
|
||||
type Base128Decoder struct {
|
||||
r io.ByteReader
|
||||
}
|
||||
|
||||
func NewU32Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
|
||||
func NewU64Base128Decoder(r io.ByteReader) *Base128Decoder { return &Base128Decoder{r: r} }
|
||||
|
||||
func (b *Base128Decoder) GetU32() (uint32, error) {
|
||||
v, err := binary.ReadUvarint(b.r)
|
||||
return uint32(v), err
|
||||
}
|
||||
|
||||
func (b *Base128Decoder) GetU64() (uint64, error) {
|
||||
return binary.ReadUvarint(b.r)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue