1
0
Fork 0
forked from forgejo/forgejo

[Vendor] blevesearch v0.8.1 -> v1.0.7 (#11360)

* Update blevesearch v0.8.1 -> v1.0.7

* make vendor

Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
6543 2020-05-10 07:40:54 +02:00 committed by GitHub
parent a44854c287
commit fdf750e4d4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
255 changed files with 9786 additions and 974 deletions

29
vendor/github.com/blevesearch/snowballstem/COPYING generated vendored Normal file
View file

@ -0,0 +1,29 @@
Copyright (c) 2001, Dr Martin Porter
Copyright (c) 2004,2005, Richard Boulton
Copyright (c) 2013, Yoshiki Shibukawa
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the Snowball project nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

66
vendor/github.com/blevesearch/snowballstem/README.md generated vendored Normal file
View file

@ -0,0 +1,66 @@
# snowballstem
This repository contains the Go stemmers generated by the [Snowball](https://github.com/snowballstem/snowball) project. They are maintained outside of the core bleve package so that they may be more easily be reused in other contexts.
## Usage
All these stemmers export a single `Stem()` method which operates on a snowball `Env` structure. The `Env` structure maintains all state for the stemmer. A new `Env` is created to point at an initial string. After stemming, the results of the `Stem()` operation can be retrieved using the `Current()` method. The `Env` structure can be reused for subsequent calls by using the `SetCurrent()` method.
## Example
```
package main
import (
"fmt"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/english"
)
func main() {
// words to stem
words := []string{
"running",
"jumping",
}
// build new environment
env := snowballstem.NewEnv("")
for _, word := range words {
// set up environment for word
env.SetCurrent(word)
// invoke stemmer
english.Stem(env)
// print results
fmt.Printf("%s stemmed to %s\n", word, env.Current())
}
}
```
Produces Output:
```
$ ./snowtest
running stemmed to run
jumping stemmed to jump
```
## Testing
The test harness for these stemmers is hosted in the main [Snowball](https://github.com/snowballstem/snowball) repository. There are functional tests built around the separate [snowballstem-data](https://github.com/snowballstem/snowball-data) repository, and there is support for fuzz-testing the stemmers there as well.
## Generating the Stemmers
```
$ export SNOWBALL=/path/to/github.com/snowballstem/snowball/after/snowball/built
$ go generate
```
## Updated the Go Generate Commands
A simple tool is provided to automate these from the snowball algorithms directory:
```
$ go run gengen.go /path/to/github.com/snowballstem/snowball/algorithms
```

16
vendor/github.com/blevesearch/snowballstem/among.go generated vendored Normal file
View file

@ -0,0 +1,16 @@
package snowballstem
import "fmt"
type AmongF func(env *Env, ctx interface{}) bool
type Among struct {
Str string
A int32
B int32
F AmongF
}
func (a *Among) String() string {
return fmt.Sprintf("str: `%s`, a: %d, b: %d, f: %p", a.Str, a.A, a.B, a.F)
}

File diff suppressed because it is too large Load diff

389
vendor/github.com/blevesearch/snowballstem/env.go generated vendored Normal file
View file

@ -0,0 +1,389 @@
package snowballstem
import (
"log"
"strings"
"unicode/utf8"
)
// Env represents the Snowball execution environment
type Env struct {
current string
Cursor int
Limit int
LimitBackward int
Bra int
Ket int
}
// NewEnv creates a new Snowball execution environment on the provided string
func NewEnv(val string) *Env {
return &Env{
current: val,
Cursor: 0,
Limit: len(val),
LimitBackward: 0,
Bra: 0,
Ket: len(val),
}
}
func (env *Env) Current() string {
return env.current
}
func (env *Env) SetCurrent(s string) {
env.current = s
env.Cursor = 0
env.Limit = len(s)
env.LimitBackward = 0
env.Bra = 0
env.Ket = len(s)
}
func (env *Env) ReplaceS(bra, ket int, s string) int32 {
adjustment := int32(len(s)) - (int32(ket) - int32(bra))
result, _ := splitAt(env.current, bra)
rsplit := ket
if ket < bra {
rsplit = bra
}
_, rhs := splitAt(env.current, rsplit)
result += s
result += rhs
newLim := int32(env.Limit) + adjustment
env.Limit = int(newLim)
if env.Cursor >= ket {
newCur := int32(env.Cursor) + adjustment
env.Cursor = int(newCur)
} else if env.Cursor > bra {
env.Cursor = bra
}
env.current = result
return adjustment
}
func (env *Env) EqS(s string) bool {
if env.Cursor >= env.Limit {
return false
}
if strings.HasPrefix(env.current[env.Cursor:], s) {
env.Cursor += len(s)
for !onCharBoundary(env.current, env.Cursor) {
env.Cursor++
}
return true
}
return false
}
func (env *Env) EqSB(s string) bool {
if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
return false
} else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
!strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
return false
} else {
env.Cursor -= len(s)
return true
}
}
func (env *Env) SliceFrom(s string) bool {
bra, ket := env.Bra, env.Ket
env.ReplaceS(bra, ket, s)
return true
}
func (env *Env) NextChar() {
env.Cursor++
for !onCharBoundary(env.current, env.Cursor) {
env.Cursor++
}
}
func (env *Env) PrevChar() {
env.Cursor--
for !onCharBoundary(env.current, env.Cursor) {
env.Cursor--
}
}
func (env *Env) ByteIndexForHop(delta int32) int32 {
if delta > 0 {
res := env.Cursor
for delta > 0 {
res++
delta--
for res <= len(env.current) && !onCharBoundary(env.current, res) {
res++
}
}
return int32(res)
} else if delta < 0 {
res := env.Cursor
for delta < 0 {
res--
delta++
for res >= 0 && !onCharBoundary(env.current, res) {
res--
}
}
return int32(res)
} else {
return int32(env.Cursor)
}
}
func (env *Env) InGrouping(chars []byte, min, max int32) bool {
if env.Cursor >= env.Limit {
return false
}
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
if r != utf8.RuneError {
if r > max || r < min {
return false
}
r -= min
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
return false
}
env.NextChar()
return true
}
return false
}
func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
if env.Cursor <= env.LimitBackward {
return false
}
env.PrevChar()
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
if r != utf8.RuneError {
env.NextChar()
if r > max || r < min {
return false
}
r -= min
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
return false
}
env.PrevChar()
return true
}
return false
}
func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
if env.Cursor >= env.Limit {
return false
}
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
if r != utf8.RuneError {
if r > max || r < min {
env.NextChar()
return true
}
r -= min
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
env.NextChar()
return true
}
}
return false
}
func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
if env.Cursor <= env.LimitBackward {
return false
}
env.PrevChar()
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
if r != utf8.RuneError {
env.NextChar()
if r > max || r < min {
env.PrevChar()
return true
}
r -= min
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
env.PrevChar()
return true
}
}
return false
}
func (env *Env) SliceDel() bool {
return env.SliceFrom("")
}
func (env *Env) Insert(bra, ket int, s string) {
adjustment := env.ReplaceS(bra, ket, s)
if bra <= env.Bra {
env.Bra = int(int32(env.Bra) + adjustment)
}
if bra <= env.Ket {
env.Ket = int(int32(env.Ket) + adjustment)
}
}
func (env *Env) SliceTo() string {
return env.current[env.Bra:env.Ket]
}
func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
var i int32
j := int32(len(amongs))
c := env.Cursor
l := env.Limit
var commonI, commonJ int
firstKeyInspected := false
for {
k := i + ((j - i) >> 1)
var diff int32
common := min(commonI, commonJ)
w := amongs[k]
for lvar := common; lvar < len(w.Str); lvar++ {
if c+common == l {
diff--
break
}
diff = int32(env.current[c+common]) - int32(w.Str[lvar])
if diff != 0 {
break
}
common++
}
if diff < 0 {
j = k
commonJ = common
} else {
i = k
commonI = common
}
if j-i <= 1 {
if i > 0 {
break
}
if j == i {
break
}
if firstKeyInspected {
break
}
firstKeyInspected = true
}
}
for {
w := amongs[i]
if commonI >= len(w.Str) {
env.Cursor = c + len(w.Str)
if w.F != nil {
res := w.F(env, ctx)
env.Cursor = c + len(w.Str)
if res {
return w.B
}
} else {
return w.B
}
}
i = w.A
if i < 0 {
return 0
}
}
}
func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
var i int32
j := int32(len(amongs))
c := env.Cursor
lb := env.LimitBackward
var commonI, commonJ int
firstKeyInspected := false
for {
k := i + ((j - i) >> 1)
diff := int32(0)
common := min(commonI, commonJ)
w := amongs[k]
for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
if c-common == lb {
diff--
break
}
diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
if diff != 0 {
break
}
// Count up commons. But not one character but the byte width of that char
common++
}
if diff < 0 {
j = k
commonJ = common
} else {
i = k
commonI = common
}
if j-i <= 1 {
if i > 0 {
break
}
if j == i {
break
}
if firstKeyInspected {
break
}
firstKeyInspected = true
}
}
for {
w := amongs[i]
if commonI >= len(w.Str) {
env.Cursor = c - len(w.Str)
if w.F != nil {
res := w.F(env, ctx)
env.Cursor = c - len(w.Str)
if res {
return w.B
}
} else {
return w.B
}
}
i = w.A
if i < 0 {
return 0
}
}
}
func (env *Env) Debug(count, lineNumber int) {
log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
}
func (env *Env) Clone() *Env {
clone := *env
return &clone
}
func (env *Env) AssignTo() string {
return env.Current()
}

61
vendor/github.com/blevesearch/snowballstem/gen.go generated vendored Normal file
View file

@ -0,0 +1,61 @@
package snowballstem
// to regenerate these commands, run
// go run gengen.go /path/to/snowball/algorithms/directory
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/arabic/stem_Unicode.sbl -go -o arabic/arabic_stemmer -gop arabic -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w arabic/arabic_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/danish/stem_ISO_8859_1.sbl -go -o danish/danish_stemmer -gop danish -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w danish/danish_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/dutch/stem_ISO_8859_1.sbl -go -o dutch/dutch_stemmer -gop dutch -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w dutch/dutch_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/english/stem_ISO_8859_1.sbl -go -o english/english_stemmer -gop english -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w english/english_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/finnish/stem_ISO_8859_1.sbl -go -o finnish/finnish_stemmer -gop finnish -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w finnish/finnish_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/french/stem_ISO_8859_1.sbl -go -o french/french_stemmer -gop french -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w french/french_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/german/stem_ISO_8859_1.sbl -go -o german/german_stemmer -gop german -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w german/german_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/hungarian/stem_Unicode.sbl -go -o hungarian/hungarian_stemmer -gop hungarian -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w hungarian/hungarian_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/irish/stem_ISO_8859_1.sbl -go -o irish/irish_stemmer -gop irish -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w irish/irish_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/italian/stem_ISO_8859_1.sbl -go -o italian/italian_stemmer -gop italian -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w italian/italian_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/norwegian/stem_ISO_8859_1.sbl -go -o norwegian/norwegian_stemmer -gop norwegian -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w norwegian/norwegian_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/porter/stem_ISO_8859_1.sbl -go -o porter/porter_stemmer -gop porter -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w porter/porter_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/portuguese/stem_ISO_8859_1.sbl -go -o portuguese/portuguese_stemmer -gop portuguese -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w portuguese/portuguese_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/romanian/stem_Unicode.sbl -go -o romanian/romanian_stemmer -gop romanian -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w romanian/romanian_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/russian/stem_Unicode.sbl -go -o russian/russian_stemmer -gop russian -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w russian/russian_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/spanish/stem_ISO_8859_1.sbl -go -o spanish/spanish_stemmer -gop spanish -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w spanish/spanish_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/swedish/stem_ISO_8859_1.sbl -go -o swedish/swedish_stemmer -gop swedish -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w swedish/swedish_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/tamil/stem_Unicode.sbl -go -o tamil/tamil_stemmer -gop tamil -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w tamil/tamil_stemmer.go
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/turkish/stem_Unicode.sbl -go -o turkish/turkish_stemmer -gop turkish -gor github.com/blevesearch/snowballstem
//go:generate gofmt -s -w turkish/turkish_stemmer.go

3
vendor/github.com/blevesearch/snowballstem/go.mod generated vendored Normal file
View file

@ -0,0 +1,3 @@
module github.com/blevesearch/snowballstem
go 1.13

34
vendor/github.com/blevesearch/snowballstem/util.go generated vendored Normal file
View file

@ -0,0 +1,34 @@
package snowballstem
import (
"math"
"unicode/utf8"
)
const MaxInt = math.MaxInt32
const MinInt = math.MinInt32
func splitAt(str string, mid int) (string, string) {
return str[:mid], str[mid:]
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func onCharBoundary(s string, pos int) bool {
if pos <= 0 || pos >= len(s) {
return true
}
return utf8.RuneStart(s[pos])
}
// RuneCountInString is a wrapper around utf8.RuneCountInString
// this allows us to not have to conditionally include
// the utf8 package into some stemmers and not others
func RuneCountInString(str string) int {
return utf8.RuneCountInString(str)
}