forked from forgejo/forgejo
parent
b50dee5a61
commit
9591185c8f
180 changed files with 43400 additions and 41105 deletions
250
vendor/github.com/couchbase/vellum/levenshtein/dfa.go
generated
vendored
Normal file
250
vendor/github.com/couchbase/vellum/levenshtein/dfa.go
generated
vendored
Normal file
|
@ -0,0 +1,250 @@
|
|||
// Copyright (c) 2018 Couchbase, Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
package levenshtein
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
)
|
||||
|
||||
const SinkState = uint32(0)
|
||||
|
||||
type DFA struct {
|
||||
transitions [][256]uint32
|
||||
distances []Distance
|
||||
initState int
|
||||
ed uint8
|
||||
}
|
||||
|
||||
/// Returns the initial state
|
||||
func (d *DFA) initialState() int {
|
||||
return d.initState
|
||||
}
|
||||
|
||||
/// Returns the Levenshtein distance associated to the
|
||||
/// current state.
|
||||
func (d *DFA) distance(stateId int) Distance {
|
||||
return d.distances[stateId]
|
||||
}
|
||||
|
||||
/// Returns the number of states in the `DFA`.
|
||||
func (d *DFA) numStates() int {
|
||||
return len(d.transitions)
|
||||
}
|
||||
|
||||
/// Returns the destination state reached after consuming a given byte.
|
||||
func (d *DFA) transition(fromState int, b uint8) int {
|
||||
return int(d.transitions[fromState][b])
|
||||
}
|
||||
|
||||
func (d *DFA) eval(bytes []uint8) Distance {
|
||||
state := d.initialState()
|
||||
|
||||
for _, b := range bytes {
|
||||
state = d.transition(state, b)
|
||||
}
|
||||
|
||||
return d.distance(state)
|
||||
}
|
||||
|
||||
func (d *DFA) Start() int {
|
||||
return int(d.initialState())
|
||||
}
|
||||
|
||||
func (d *DFA) IsMatch(state int) bool {
|
||||
if _, ok := d.distance(state).(Exact); ok {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func (d *DFA) CanMatch(state int) bool {
|
||||
return state > 0 && state < d.numStates()
|
||||
}
|
||||
|
||||
func (d *DFA) Accept(state int, b byte) int {
|
||||
return int(d.transition(state, b))
|
||||
}
|
||||
|
||||
// WillAlwaysMatch returns if the specified state will always end in a
|
||||
// matching state.
|
||||
func (d *DFA) WillAlwaysMatch(state int) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func fill(dest []uint32, val uint32) {
|
||||
for i := range dest {
|
||||
dest[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
func fillTransitions(dest *[256]uint32, val uint32) {
|
||||
for i := range dest {
|
||||
dest[i] = val
|
||||
}
|
||||
}
|
||||
|
||||
type Utf8DFAStateBuilder struct {
|
||||
dfaBuilder *Utf8DFABuilder
|
||||
stateID uint32
|
||||
defaultSuccessor []uint32
|
||||
}
|
||||
|
||||
func (sb *Utf8DFAStateBuilder) addTransitionID(fromStateID uint32, b uint8,
|
||||
toStateID uint32) {
|
||||
sb.dfaBuilder.transitions[fromStateID][b] = toStateID
|
||||
}
|
||||
|
||||
func (sb *Utf8DFAStateBuilder) addTransition(in rune, toStateID uint32) {
|
||||
fromStateID := sb.stateID
|
||||
chars := []byte(string(in))
|
||||
lastByte := chars[len(chars)-1]
|
||||
|
||||
for i, ch := range chars[:len(chars)-1] {
|
||||
remNumBytes := len(chars) - i - 1
|
||||
defaultSuccessor := sb.defaultSuccessor[remNumBytes]
|
||||
intermediateStateID := sb.dfaBuilder.transitions[fromStateID][ch]
|
||||
|
||||
if intermediateStateID == defaultSuccessor {
|
||||
intermediateStateID = sb.dfaBuilder.allocate()
|
||||
fillTransitions(&sb.dfaBuilder.transitions[intermediateStateID],
|
||||
sb.defaultSuccessor[remNumBytes-1])
|
||||
}
|
||||
|
||||
sb.addTransitionID(fromStateID, ch, intermediateStateID)
|
||||
fromStateID = intermediateStateID
|
||||
}
|
||||
|
||||
toStateIDDecoded := sb.dfaBuilder.getOrAllocate(original(toStateID))
|
||||
sb.addTransitionID(fromStateID, lastByte, toStateIDDecoded)
|
||||
}
|
||||
|
||||
type Utf8StateId uint32
|
||||
|
||||
func original(stateId uint32) Utf8StateId {
|
||||
return predecessor(stateId, 0)
|
||||
}
|
||||
|
||||
func predecessor(stateId uint32, numSteps uint8) Utf8StateId {
|
||||
return Utf8StateId(stateId*4 + uint32(numSteps))
|
||||
}
|
||||
|
||||
// Utf8DFABuilder makes it possible to define a DFA
|
||||
// that takes unicode character, and build a `DFA`
|
||||
// that operates on utf-8 encoded
|
||||
type Utf8DFABuilder struct {
|
||||
index []uint32
|
||||
distances []Distance
|
||||
transitions [][256]uint32
|
||||
initialState uint32
|
||||
numStates uint32
|
||||
maxNumStates uint32
|
||||
}
|
||||
|
||||
func withMaxStates(maxStates uint32) *Utf8DFABuilder {
|
||||
rv := &Utf8DFABuilder{
|
||||
index: make([]uint32, maxStates*2+100),
|
||||
distances: make([]Distance, 0, maxStates),
|
||||
transitions: make([][256]uint32, 0, maxStates),
|
||||
maxNumStates: maxStates,
|
||||
}
|
||||
|
||||
for i := range rv.index {
|
||||
rv.index[i] = math.MaxUint32
|
||||
}
|
||||
|
||||
return rv
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) allocate() uint32 {
|
||||
newState := dfab.numStates
|
||||
dfab.numStates++
|
||||
|
||||
dfab.distances = append(dfab.distances, Atleast{d: 255})
|
||||
dfab.transitions = append(dfab.transitions, [256]uint32{})
|
||||
|
||||
return newState
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) getOrAllocate(state Utf8StateId) uint32 {
|
||||
if int(state) >= cap(dfab.index) {
|
||||
cloneIndex := make([]uint32, int(state)*2)
|
||||
copy(cloneIndex, dfab.index)
|
||||
dfab.index = cloneIndex
|
||||
}
|
||||
if dfab.index[state] != math.MaxUint32 {
|
||||
return dfab.index[state]
|
||||
}
|
||||
|
||||
nstate := dfab.allocate()
|
||||
dfab.index[state] = nstate
|
||||
|
||||
return nstate
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) setInitialState(iState uint32) {
|
||||
decodedID := dfab.getOrAllocate(original(iState))
|
||||
dfab.initialState = decodedID
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) build(ed uint8) *DFA {
|
||||
return &DFA{
|
||||
transitions: dfab.transitions,
|
||||
distances: dfab.distances,
|
||||
initState: int(dfab.initialState),
|
||||
ed: ed,
|
||||
}
|
||||
}
|
||||
|
||||
func (dfab *Utf8DFABuilder) addState(state, default_suc_orig uint32,
|
||||
distance Distance) (*Utf8DFAStateBuilder, error) {
|
||||
if state > dfab.maxNumStates {
|
||||
return nil, fmt.Errorf("State id is larger than maxNumStates")
|
||||
}
|
||||
|
||||
stateID := dfab.getOrAllocate(original(state))
|
||||
dfab.distances[stateID] = distance
|
||||
|
||||
defaultSuccID := dfab.getOrAllocate(original(default_suc_orig))
|
||||
// creates a chain of states of predecessors of `default_suc_orig`.
|
||||
// Accepting k-bytes (whatever the bytes are) from `predecessor_states[k-1]`
|
||||
// leads to the `default_suc_orig` state.
|
||||
predecessorStates := []uint32{defaultSuccID,
|
||||
defaultSuccID,
|
||||
defaultSuccID,
|
||||
defaultSuccID}
|
||||
|
||||
for numBytes := uint8(1); numBytes < 4; numBytes++ {
|
||||
predecessorState := predecessor(default_suc_orig, numBytes)
|
||||
predecessorStateID := dfab.getOrAllocate(predecessorState)
|
||||
predecessorStates[numBytes] = predecessorStateID
|
||||
succ := predecessorStates[numBytes-1]
|
||||
fillTransitions(&dfab.transitions[predecessorStateID], succ)
|
||||
}
|
||||
|
||||
// 1-byte encoded chars.
|
||||
fill(dfab.transitions[stateID][0:192], predecessorStates[0])
|
||||
// 2-bytes encoded chars.
|
||||
fill(dfab.transitions[stateID][192:224], predecessorStates[1])
|
||||
// 3-bytes encoded chars.
|
||||
fill(dfab.transitions[stateID][224:240], predecessorStates[2])
|
||||
// 4-bytes encoded chars.
|
||||
fill(dfab.transitions[stateID][240:256], predecessorStates[3])
|
||||
|
||||
return &Utf8DFAStateBuilder{
|
||||
dfaBuilder: dfab,
|
||||
stateID: stateID,
|
||||
defaultSuccessor: predecessorStates}, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue