forked from forgejo/forgejo
[Vendor] blevesearch v0.8.1 -> v1.0.7 (#11360)
* Update blevesearch v0.8.1 -> v1.0.7 * make vendor Co-authored-by: zeripath <art27@cantab.net>
This commit is contained in:
parent
a44854c287
commit
fdf750e4d4
255 changed files with 9786 additions and 974 deletions
29
vendor/github.com/blevesearch/snowballstem/COPYING
generated
vendored
Normal file
29
vendor/github.com/blevesearch/snowballstem/COPYING
generated
vendored
Normal file
|
@ -0,0 +1,29 @@
|
|||
Copyright (c) 2001, Dr Martin Porter
|
||||
Copyright (c) 2004,2005, Richard Boulton
|
||||
Copyright (c) 2013, Yoshiki Shibukawa
|
||||
Copyright (c) 2006,2007,2009,2010,2011,2014-2019, Olly Betts
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions
|
||||
are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimer in the documentation
|
||||
and/or other materials provided with the distribution.
|
||||
3. Neither the name of the Snowball project nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
66
vendor/github.com/blevesearch/snowballstem/README.md
generated
vendored
Normal file
66
vendor/github.com/blevesearch/snowballstem/README.md
generated
vendored
Normal file
|
@ -0,0 +1,66 @@
|
|||
# snowballstem
|
||||
|
||||
This repository contains the Go stemmers generated by the [Snowball](https://github.com/snowballstem/snowball) project. They are maintained outside of the core bleve package so that they may be more easily be reused in other contexts.
|
||||
|
||||
## Usage
|
||||
|
||||
All these stemmers export a single `Stem()` method which operates on a snowball `Env` structure. The `Env` structure maintains all state for the stemmer. A new `Env` is created to point at an initial string. After stemming, the results of the `Stem()` operation can be retrieved using the `Current()` method. The `Env` structure can be reused for subsequent calls by using the `SetCurrent()` method.
|
||||
|
||||
## Example
|
||||
|
||||
```
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/blevesearch/snowballstem"
|
||||
"github.com/blevesearch/snowballstem/english"
|
||||
)
|
||||
|
||||
func main() {
|
||||
|
||||
// words to stem
|
||||
words := []string{
|
||||
"running",
|
||||
"jumping",
|
||||
}
|
||||
|
||||
// build new environment
|
||||
env := snowballstem.NewEnv("")
|
||||
|
||||
for _, word := range words {
|
||||
// set up environment for word
|
||||
env.SetCurrent(word)
|
||||
// invoke stemmer
|
||||
english.Stem(env)
|
||||
// print results
|
||||
fmt.Printf("%s stemmed to %s\n", word, env.Current())
|
||||
}
|
||||
}
|
||||
```
|
||||
Produces Output:
|
||||
```
|
||||
$ ./snowtest
|
||||
running stemmed to run
|
||||
jumping stemmed to jump
|
||||
```
|
||||
|
||||
## Testing
|
||||
|
||||
The test harness for these stemmers is hosted in the main [Snowball](https://github.com/snowballstem/snowball) repository. There are functional tests built around the separate [snowballstem-data](https://github.com/snowballstem/snowball-data) repository, and there is support for fuzz-testing the stemmers there as well.
|
||||
|
||||
## Generating the Stemmers
|
||||
|
||||
```
|
||||
$ export SNOWBALL=/path/to/github.com/snowballstem/snowball/after/snowball/built
|
||||
$ go generate
|
||||
```
|
||||
|
||||
## Updated the Go Generate Commands
|
||||
|
||||
A simple tool is provided to automate these from the snowball algorithms directory:
|
||||
|
||||
```
|
||||
$ go run gengen.go /path/to/github.com/snowballstem/snowball/algorithms
|
||||
```
|
16
vendor/github.com/blevesearch/snowballstem/among.go
generated
vendored
Normal file
16
vendor/github.com/blevesearch/snowballstem/among.go
generated
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
package snowballstem
|
||||
|
||||
import "fmt"
|
||||
|
||||
type AmongF func(env *Env, ctx interface{}) bool
|
||||
|
||||
type Among struct {
|
||||
Str string
|
||||
A int32
|
||||
B int32
|
||||
F AmongF
|
||||
}
|
||||
|
||||
func (a *Among) String() string {
|
||||
return fmt.Sprintf("str: `%s`, a: %d, b: %d, f: %p", a.Str, a.A, a.B, a.F)
|
||||
}
|
1341
vendor/github.com/blevesearch/snowballstem/english/english_stemmer.go
generated
vendored
Normal file
1341
vendor/github.com/blevesearch/snowballstem/english/english_stemmer.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
389
vendor/github.com/blevesearch/snowballstem/env.go
generated
vendored
Normal file
389
vendor/github.com/blevesearch/snowballstem/env.go
generated
vendored
Normal file
|
@ -0,0 +1,389 @@
|
|||
package snowballstem
|
||||
|
||||
import (
|
||||
"log"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
// Env represents the Snowball execution environment
|
||||
type Env struct {
|
||||
current string
|
||||
Cursor int
|
||||
Limit int
|
||||
LimitBackward int
|
||||
Bra int
|
||||
Ket int
|
||||
}
|
||||
|
||||
// NewEnv creates a new Snowball execution environment on the provided string
|
||||
func NewEnv(val string) *Env {
|
||||
return &Env{
|
||||
current: val,
|
||||
Cursor: 0,
|
||||
Limit: len(val),
|
||||
LimitBackward: 0,
|
||||
Bra: 0,
|
||||
Ket: len(val),
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) Current() string {
|
||||
return env.current
|
||||
}
|
||||
|
||||
func (env *Env) SetCurrent(s string) {
|
||||
env.current = s
|
||||
env.Cursor = 0
|
||||
env.Limit = len(s)
|
||||
env.LimitBackward = 0
|
||||
env.Bra = 0
|
||||
env.Ket = len(s)
|
||||
}
|
||||
|
||||
func (env *Env) ReplaceS(bra, ket int, s string) int32 {
|
||||
adjustment := int32(len(s)) - (int32(ket) - int32(bra))
|
||||
result, _ := splitAt(env.current, bra)
|
||||
rsplit := ket
|
||||
if ket < bra {
|
||||
rsplit = bra
|
||||
}
|
||||
_, rhs := splitAt(env.current, rsplit)
|
||||
result += s
|
||||
result += rhs
|
||||
|
||||
newLim := int32(env.Limit) + adjustment
|
||||
env.Limit = int(newLim)
|
||||
|
||||
if env.Cursor >= ket {
|
||||
newCur := int32(env.Cursor) + adjustment
|
||||
env.Cursor = int(newCur)
|
||||
} else if env.Cursor > bra {
|
||||
env.Cursor = bra
|
||||
}
|
||||
|
||||
env.current = result
|
||||
return adjustment
|
||||
}
|
||||
|
||||
func (env *Env) EqS(s string) bool {
|
||||
if env.Cursor >= env.Limit {
|
||||
return false
|
||||
}
|
||||
|
||||
if strings.HasPrefix(env.current[env.Cursor:], s) {
|
||||
env.Cursor += len(s)
|
||||
for !onCharBoundary(env.current, env.Cursor) {
|
||||
env.Cursor++
|
||||
}
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) EqSB(s string) bool {
|
||||
if int32(env.Cursor)-int32(env.LimitBackward) < int32(len(s)) {
|
||||
return false
|
||||
} else if !onCharBoundary(env.current, env.Cursor-len(s)) ||
|
||||
!strings.HasPrefix(env.current[env.Cursor-len(s):], s) {
|
||||
return false
|
||||
} else {
|
||||
env.Cursor -= len(s)
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) SliceFrom(s string) bool {
|
||||
bra, ket := env.Bra, env.Ket
|
||||
env.ReplaceS(bra, ket, s)
|
||||
return true
|
||||
}
|
||||
|
||||
func (env *Env) NextChar() {
|
||||
env.Cursor++
|
||||
for !onCharBoundary(env.current, env.Cursor) {
|
||||
env.Cursor++
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) PrevChar() {
|
||||
env.Cursor--
|
||||
for !onCharBoundary(env.current, env.Cursor) {
|
||||
env.Cursor--
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) ByteIndexForHop(delta int32) int32 {
|
||||
if delta > 0 {
|
||||
res := env.Cursor
|
||||
for delta > 0 {
|
||||
res++
|
||||
delta--
|
||||
for res <= len(env.current) && !onCharBoundary(env.current, res) {
|
||||
res++
|
||||
}
|
||||
}
|
||||
return int32(res)
|
||||
} else if delta < 0 {
|
||||
res := env.Cursor
|
||||
for delta < 0 {
|
||||
res--
|
||||
delta++
|
||||
for res >= 0 && !onCharBoundary(env.current, res) {
|
||||
res--
|
||||
}
|
||||
}
|
||||
return int32(res)
|
||||
} else {
|
||||
return int32(env.Cursor)
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) InGrouping(chars []byte, min, max int32) bool {
|
||||
if env.Cursor >= env.Limit {
|
||||
return false
|
||||
}
|
||||
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
if r > max || r < min {
|
||||
return false
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
return false
|
||||
}
|
||||
env.NextChar()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) InGroupingB(chars []byte, min, max int32) bool {
|
||||
if env.Cursor <= env.LimitBackward {
|
||||
return false
|
||||
}
|
||||
env.PrevChar()
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
env.NextChar()
|
||||
if r > max || r < min {
|
||||
return false
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
return false
|
||||
}
|
||||
env.PrevChar()
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) OutGrouping(chars []byte, min, max int32) bool {
|
||||
if env.Cursor >= env.Limit {
|
||||
return false
|
||||
}
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
if r > max || r < min {
|
||||
env.NextChar()
|
||||
return true
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
env.NextChar()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) OutGroupingB(chars []byte, min, max int32) bool {
|
||||
if env.Cursor <= env.LimitBackward {
|
||||
return false
|
||||
}
|
||||
env.PrevChar()
|
||||
r, _ := utf8.DecodeRuneInString(env.current[env.Cursor:])
|
||||
if r != utf8.RuneError {
|
||||
env.NextChar()
|
||||
if r > max || r < min {
|
||||
env.PrevChar()
|
||||
return true
|
||||
}
|
||||
r -= min
|
||||
if (chars[uint(r>>3)] & (0x1 << uint(r&0x7))) == 0 {
|
||||
env.PrevChar()
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (env *Env) SliceDel() bool {
|
||||
return env.SliceFrom("")
|
||||
}
|
||||
|
||||
func (env *Env) Insert(bra, ket int, s string) {
|
||||
adjustment := env.ReplaceS(bra, ket, s)
|
||||
if bra <= env.Bra {
|
||||
env.Bra = int(int32(env.Bra) + adjustment)
|
||||
}
|
||||
if bra <= env.Ket {
|
||||
env.Ket = int(int32(env.Ket) + adjustment)
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) SliceTo() string {
|
||||
return env.current[env.Bra:env.Ket]
|
||||
}
|
||||
|
||||
func (env *Env) FindAmong(amongs []*Among, ctx interface{}) int32 {
|
||||
var i int32
|
||||
j := int32(len(amongs))
|
||||
|
||||
c := env.Cursor
|
||||
l := env.Limit
|
||||
|
||||
var commonI, commonJ int
|
||||
|
||||
firstKeyInspected := false
|
||||
for {
|
||||
k := i + ((j - i) >> 1)
|
||||
var diff int32
|
||||
common := min(commonI, commonJ)
|
||||
w := amongs[k]
|
||||
for lvar := common; lvar < len(w.Str); lvar++ {
|
||||
if c+common == l {
|
||||
diff--
|
||||
break
|
||||
}
|
||||
diff = int32(env.current[c+common]) - int32(w.Str[lvar])
|
||||
if diff != 0 {
|
||||
break
|
||||
}
|
||||
common++
|
||||
}
|
||||
if diff < 0 {
|
||||
j = k
|
||||
commonJ = common
|
||||
} else {
|
||||
i = k
|
||||
commonI = common
|
||||
}
|
||||
if j-i <= 1 {
|
||||
if i > 0 {
|
||||
break
|
||||
}
|
||||
if j == i {
|
||||
break
|
||||
}
|
||||
if firstKeyInspected {
|
||||
break
|
||||
}
|
||||
firstKeyInspected = true
|
||||
}
|
||||
}
|
||||
|
||||
for {
|
||||
w := amongs[i]
|
||||
if commonI >= len(w.Str) {
|
||||
env.Cursor = c + len(w.Str)
|
||||
if w.F != nil {
|
||||
res := w.F(env, ctx)
|
||||
env.Cursor = c + len(w.Str)
|
||||
if res {
|
||||
return w.B
|
||||
}
|
||||
} else {
|
||||
return w.B
|
||||
}
|
||||
}
|
||||
i = w.A
|
||||
if i < 0 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) FindAmongB(amongs []*Among, ctx interface{}) int32 {
|
||||
var i int32
|
||||
j := int32(len(amongs))
|
||||
|
||||
c := env.Cursor
|
||||
lb := env.LimitBackward
|
||||
|
||||
var commonI, commonJ int
|
||||
|
||||
firstKeyInspected := false
|
||||
|
||||
for {
|
||||
k := i + ((j - i) >> 1)
|
||||
diff := int32(0)
|
||||
common := min(commonI, commonJ)
|
||||
w := amongs[k]
|
||||
for lvar := len(w.Str) - int(common) - 1; lvar >= 0; lvar-- {
|
||||
if c-common == lb {
|
||||
diff--
|
||||
break
|
||||
}
|
||||
diff = int32(env.current[c-common-1]) - int32(w.Str[lvar])
|
||||
if diff != 0 {
|
||||
break
|
||||
}
|
||||
// Count up commons. But not one character but the byte width of that char
|
||||
common++
|
||||
}
|
||||
if diff < 0 {
|
||||
j = k
|
||||
commonJ = common
|
||||
} else {
|
||||
i = k
|
||||
commonI = common
|
||||
}
|
||||
if j-i <= 1 {
|
||||
if i > 0 {
|
||||
break
|
||||
}
|
||||
if j == i {
|
||||
break
|
||||
}
|
||||
if firstKeyInspected {
|
||||
break
|
||||
}
|
||||
firstKeyInspected = true
|
||||
}
|
||||
}
|
||||
for {
|
||||
w := amongs[i]
|
||||
if commonI >= len(w.Str) {
|
||||
env.Cursor = c - len(w.Str)
|
||||
if w.F != nil {
|
||||
res := w.F(env, ctx)
|
||||
env.Cursor = c - len(w.Str)
|
||||
if res {
|
||||
return w.B
|
||||
}
|
||||
} else {
|
||||
return w.B
|
||||
}
|
||||
}
|
||||
i = w.A
|
||||
if i < 0 {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (env *Env) Debug(count, lineNumber int) {
|
||||
log.Printf("snowball debug, count: %d, line: %d", count, lineNumber)
|
||||
}
|
||||
|
||||
func (env *Env) Clone() *Env {
|
||||
clone := *env
|
||||
return &clone
|
||||
}
|
||||
|
||||
func (env *Env) AssignTo() string {
|
||||
return env.Current()
|
||||
}
|
61
vendor/github.com/blevesearch/snowballstem/gen.go
generated
vendored
Normal file
61
vendor/github.com/blevesearch/snowballstem/gen.go
generated
vendored
Normal file
|
@ -0,0 +1,61 @@
|
|||
package snowballstem
|
||||
|
||||
// to regenerate these commands, run
|
||||
// go run gengen.go /path/to/snowball/algorithms/directory
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/arabic/stem_Unicode.sbl -go -o arabic/arabic_stemmer -gop arabic -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w arabic/arabic_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/danish/stem_ISO_8859_1.sbl -go -o danish/danish_stemmer -gop danish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w danish/danish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/dutch/stem_ISO_8859_1.sbl -go -o dutch/dutch_stemmer -gop dutch -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w dutch/dutch_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/english/stem_ISO_8859_1.sbl -go -o english/english_stemmer -gop english -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w english/english_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/finnish/stem_ISO_8859_1.sbl -go -o finnish/finnish_stemmer -gop finnish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w finnish/finnish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/french/stem_ISO_8859_1.sbl -go -o french/french_stemmer -gop french -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w french/french_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/german/stem_ISO_8859_1.sbl -go -o german/german_stemmer -gop german -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w german/german_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/hungarian/stem_Unicode.sbl -go -o hungarian/hungarian_stemmer -gop hungarian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w hungarian/hungarian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/irish/stem_ISO_8859_1.sbl -go -o irish/irish_stemmer -gop irish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w irish/irish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/italian/stem_ISO_8859_1.sbl -go -o italian/italian_stemmer -gop italian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w italian/italian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/norwegian/stem_ISO_8859_1.sbl -go -o norwegian/norwegian_stemmer -gop norwegian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w norwegian/norwegian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/porter/stem_ISO_8859_1.sbl -go -o porter/porter_stemmer -gop porter -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w porter/porter_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/portuguese/stem_ISO_8859_1.sbl -go -o portuguese/portuguese_stemmer -gop portuguese -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w portuguese/portuguese_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/romanian/stem_Unicode.sbl -go -o romanian/romanian_stemmer -gop romanian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w romanian/romanian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/russian/stem_Unicode.sbl -go -o russian/russian_stemmer -gop russian -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w russian/russian_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/spanish/stem_ISO_8859_1.sbl -go -o spanish/spanish_stemmer -gop spanish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w spanish/spanish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/swedish/stem_ISO_8859_1.sbl -go -o swedish/swedish_stemmer -gop swedish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w swedish/swedish_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/tamil/stem_Unicode.sbl -go -o tamil/tamil_stemmer -gop tamil -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w tamil/tamil_stemmer.go
|
||||
|
||||
//go:generate $SNOWBALL/snowball $SNOWBALL/algorithms/turkish/stem_Unicode.sbl -go -o turkish/turkish_stemmer -gop turkish -gor github.com/blevesearch/snowballstem
|
||||
//go:generate gofmt -s -w turkish/turkish_stemmer.go
|
3
vendor/github.com/blevesearch/snowballstem/go.mod
generated
vendored
Normal file
3
vendor/github.com/blevesearch/snowballstem/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/blevesearch/snowballstem
|
||||
|
||||
go 1.13
|
34
vendor/github.com/blevesearch/snowballstem/util.go
generated
vendored
Normal file
34
vendor/github.com/blevesearch/snowballstem/util.go
generated
vendored
Normal file
|
@ -0,0 +1,34 @@
|
|||
package snowballstem
|
||||
|
||||
import (
|
||||
"math"
|
||||
"unicode/utf8"
|
||||
)
|
||||
|
||||
const MaxInt = math.MaxInt32
|
||||
const MinInt = math.MinInt32
|
||||
|
||||
func splitAt(str string, mid int) (string, string) {
|
||||
return str[:mid], str[mid:]
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func onCharBoundary(s string, pos int) bool {
|
||||
if pos <= 0 || pos >= len(s) {
|
||||
return true
|
||||
}
|
||||
return utf8.RuneStart(s[pos])
|
||||
}
|
||||
|
||||
// RuneCountInString is a wrapper around utf8.RuneCountInString
|
||||
// this allows us to not have to conditionally include
|
||||
// the utf8 package into some stemmers and not others
|
||||
func RuneCountInString(str string) int {
|
||||
return utf8.RuneCountInString(str)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue