forked from forgejo/forgejo
Migrate to go-enry new version (#10906)
This commit is contained in:
parent
7a67bcc204
commit
4dc62dadce
65 changed files with 111849 additions and 102276 deletions
19
vendor/github.com/go-enry/go-oniguruma/LICENSE
generated
vendored
Normal file
19
vendor/github.com/go-enry/go-oniguruma/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
Copyright (C) 2011 by Zhigang Chen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
26
vendor/github.com/go-enry/go-oniguruma/README.md
generated
vendored
Normal file
26
vendor/github.com/go-enry/go-oniguruma/README.md
generated
vendored
Normal file
|
@ -0,0 +1,26 @@
|
|||
# go-oniguruma 
|
||||
|
||||
This repository is a fork of [moovweb/rubex](https://github.com/moovweb/rubex/tree/go1) - a simple regular expression library (based on [oniguruma](https://github.com/kkos/oniguruma)) that supports Ruby's regex syntax.
|
||||
|
||||
The _rubex_ was originally created by Zhigang Chen (zhigang.chen@moovweb.com or zhigangc@gmail.com). It implements all the public functions of Go's Regexp package, except LiteralPrefix.
|
||||
|
||||
By the benchmark tests in regexp, the library is 40% to 10X faster than Regexp on all but one test. Unlike Go's regexp, this library supports named capture groups and also allow `"\\1"` and `"\\k<name>"` in replacement strings.
|
||||
The library calls the _oniguruma_ regex library for regex pattern searching. All replacement code is done in Go.
|
||||
|
||||
Install
|
||||
-------
|
||||
|
||||
```sh
|
||||
# linux (debian/ubuntu/...)
|
||||
sudo apt-get install libonig-dev
|
||||
|
||||
# osx (homebrew)
|
||||
brew install oniguruma
|
||||
|
||||
go get github.com/go-enry/go-oniguruma
|
||||
```
|
||||
|
||||
|
||||
License
|
||||
-------
|
||||
Apache License Version 2.0, see [LICENSE](LICENSE)
|
184
vendor/github.com/go-enry/go-oniguruma/chelper.c
generated
vendored
Normal file
184
vendor/github.com/go-enry/go-oniguruma/chelper.c
generated
vendored
Normal file
|
@ -0,0 +1,184 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef BENCHMARK_CHELP
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "chelper.h"
|
||||
|
||||
int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
|
||||
int ret = ONIG_NORMAL;
|
||||
int error_msg_len = 0;
|
||||
|
||||
OnigUChar *pattern_start = (OnigUChar *) pattern;
|
||||
OnigUChar *pattern_end = (OnigUChar *) (pattern + pattern_length);
|
||||
|
||||
*error_info = (OnigErrorInfo *) malloc(sizeof(OnigErrorInfo));
|
||||
memset(*error_info, 0, sizeof(OnigErrorInfo));
|
||||
|
||||
onig_initialize_encoding(*encoding);
|
||||
|
||||
*error_buffer = (char*) malloc(ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
|
||||
|
||||
memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
|
||||
|
||||
*region = onig_region_new();
|
||||
|
||||
ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info);
|
||||
|
||||
if (ret != ONIG_NORMAL) {
|
||||
error_msg_len = onig_error_code_to_str((unsigned char*)(*error_buffer), ret, *error_info);
|
||||
if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
|
||||
error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
|
||||
}
|
||||
(*error_buffer)[error_msg_len] = '\0';
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
|
||||
int ret = ONIG_MISMATCH;
|
||||
int error_msg_len = 0;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
#endif
|
||||
|
||||
OnigUChar *str_start = (OnigUChar *) str;
|
||||
OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
|
||||
OnigUChar *search_start = (OnigUChar *)(str_start + offset);
|
||||
OnigUChar *search_end = str_end;
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
|
||||
ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
|
||||
if (ret < 0 && error_buffer != NULL) {
|
||||
error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
|
||||
if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
|
||||
error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
|
||||
}
|
||||
error_buffer[error_msg_len] = '\0';
|
||||
}
|
||||
else if (captures != NULL) {
|
||||
int i;
|
||||
int count = 0;
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
captures[2*count] = region->beg[i];
|
||||
captures[2*count+1] = region->end[i];
|
||||
count ++;
|
||||
}
|
||||
*numCaptures = count;
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
printf("%ld microseconds elapsed\n", t);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int MatchOnigRegex(void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region) {
|
||||
int ret = ONIG_MISMATCH;
|
||||
int error_msg_len = 0;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
#endif
|
||||
|
||||
OnigUChar *str_start = (OnigUChar *) str;
|
||||
OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
|
||||
OnigUChar *search_start = (OnigUChar *)(str_start + offset);
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
ret = onig_match(regex, str_start, str_end, search_start, region, option);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
printf("%ld microseconds elapsed\n", t);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int LookupOnigCaptureByName(char *name, int name_length,
|
||||
OnigRegex regex, OnigRegion *region) {
|
||||
int ret = ONIGERR_UNDEFINED_NAME_REFERENCE;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
#endif
|
||||
OnigUChar *name_start = (OnigUChar *) name;
|
||||
OnigUChar *name_end = (OnigUChar *) (name_start + name_length);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
ret = onig_name_to_backref_number(regex, name_start, name_end, region);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
printf("%ld microseconds elapsed\n", t);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char *nameBuffer;
|
||||
int bufferOffset;
|
||||
int bufferSize;
|
||||
int *numbers;
|
||||
int numIndex;
|
||||
} group_info_t;
|
||||
|
||||
int name_callback(const UChar* name, const UChar* name_end,
|
||||
int ngroup_num, int* group_nums,
|
||||
regex_t* reg, void* arg)
|
||||
{
|
||||
int nameLen, offset, newOffset;
|
||||
group_info_t *groupInfo;
|
||||
|
||||
groupInfo = (group_info_t*) arg;
|
||||
offset = groupInfo->bufferOffset;
|
||||
nameLen = name_end - name;
|
||||
newOffset = offset + nameLen;
|
||||
|
||||
//if there are already names, add a ";"
|
||||
if (offset > 0) {
|
||||
newOffset += 1;
|
||||
}
|
||||
|
||||
if (newOffset <= groupInfo->bufferSize) {
|
||||
if (offset > 0) {
|
||||
groupInfo->nameBuffer[offset] = ';';
|
||||
offset += 1;
|
||||
}
|
||||
memcpy(&groupInfo->nameBuffer[offset], name, nameLen);
|
||||
}
|
||||
groupInfo->bufferOffset = newOffset;
|
||||
if (ngroup_num > 0) {
|
||||
groupInfo->numbers[groupInfo->numIndex] = group_nums[ngroup_num-1];
|
||||
} else {
|
||||
groupInfo->numbers[groupInfo->numIndex] = -1;
|
||||
}
|
||||
groupInfo->numIndex += 1;
|
||||
return 0; /* 0: continue */
|
||||
}
|
||||
|
||||
int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbers) {
|
||||
int ret;
|
||||
group_info_t groupInfo;
|
||||
groupInfo.nameBuffer = (char*)buffer;
|
||||
groupInfo.bufferOffset = 0;
|
||||
groupInfo.bufferSize = bufferSize;
|
||||
groupInfo.numbers = groupNumbers;
|
||||
groupInfo.numIndex = 0;
|
||||
onig_foreach_name(reg, name_callback, (void* )&groupInfo);
|
||||
return groupInfo.bufferOffset;
|
||||
}
|
||||
|
14
vendor/github.com/go-enry/go-oniguruma/chelper.h
generated
vendored
Normal file
14
vendor/github.com/go-enry/go-oniguruma/chelper.h
generated
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include <oniguruma.h>
|
||||
|
||||
extern int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer);
|
||||
|
||||
extern int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures);
|
||||
|
||||
extern int MatchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region);
|
||||
|
||||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region);
|
||||
|
||||
extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers);
|
27
vendor/github.com/go-enry/go-oniguruma/constants.go
generated
vendored
Normal file
27
vendor/github.com/go-enry/go-oniguruma/constants.go
generated
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
package rubex
|
||||
|
||||
const (
|
||||
ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE
|
||||
/* options */
|
||||
ONIG_OPTION_NONE = 0
|
||||
ONIG_OPTION_IGNORECASE = 1
|
||||
ONIG_OPTION_EXTEND = (ONIG_OPTION_IGNORECASE << 1)
|
||||
ONIG_OPTION_MULTILINE = (ONIG_OPTION_EXTEND << 1)
|
||||
ONIG_OPTION_SINGLELINE = (ONIG_OPTION_MULTILINE << 1)
|
||||
ONIG_OPTION_FIND_LONGEST = (ONIG_OPTION_SINGLELINE << 1)
|
||||
ONIG_OPTION_FIND_NOT_EMPTY = (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
ONIG_OPTION_NEGATE_SINGLELINE = (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
ONIG_OPTION_DONT_CAPTURE_GROUP = (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
ONIG_OPTION_CAPTURE_GROUP = (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
/* options (search time) */
|
||||
ONIG_OPTION_NOTBOL = (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
ONIG_OPTION_NOTEOL = (ONIG_OPTION_NOTBOL << 1)
|
||||
ONIG_OPTION_POSIX_REGION = (ONIG_OPTION_NOTEOL << 1)
|
||||
ONIG_OPTION_MAXBIT = ONIG_OPTION_POSIX_REGION /* limit */
|
||||
|
||||
ONIG_NORMAL = 0
|
||||
ONIG_MISMATCH = -1
|
||||
|
||||
ONIG_MISMATCH_STR = "mismatch"
|
||||
ONIGERR_UNDEFINED_NAME_REFERENCE = -217
|
||||
)
|
3
vendor/github.com/go-enry/go-oniguruma/go.mod
generated
vendored
Normal file
3
vendor/github.com/go-enry/go-oniguruma/go.mod
generated
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
module github.com/go-enry/go-oniguruma
|
||||
|
||||
go 1.14
|
36
vendor/github.com/go-enry/go-oniguruma/quotemeta.go
generated
vendored
Normal file
36
vendor/github.com/go-enry/go-oniguruma/quotemeta.go
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package regexp implements a simple regular expression library.
|
||||
|
||||
// QuoteMeta func is copied here to avoid linking the entire Regexp library.
|
||||
|
||||
package rubex
|
||||
|
||||
func special(c int) bool {
|
||||
for _, r := range `\.+*?()|[]^$` {
|
||||
if c == int(r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// QuoteMeta returns a string that quotes all regular expression metacharacters
|
||||
// inside the argument text; the returned string is a regular expression matching
|
||||
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
|
||||
func QuoteMeta(s string) string {
|
||||
b := make([]byte, 2*len(s))
|
||||
|
||||
// A byte loop is correct because all metacharacters are ASCII.
|
||||
j := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if special(int(s[i])) {
|
||||
b[j] = '\\'
|
||||
j++
|
||||
}
|
||||
b[j] = s[i]
|
||||
j++
|
||||
}
|
||||
return string(b[0:j])
|
||||
}
|
668
vendor/github.com/go-enry/go-oniguruma/regex.go
generated
vendored
Normal file
668
vendor/github.com/go-enry/go-oniguruma/regex.go
generated
vendored
Normal file
|
@ -0,0 +1,668 @@
|
|||
package rubex
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -I/usr/local/include
|
||||
#cgo LDFLAGS: -L/usr/local/lib -lonig
|
||||
#include <stdlib.h>
|
||||
#include <oniguruma.h>
|
||||
#include "chelper.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type strRange []int
|
||||
|
||||
const numMatchStartSize = 4
|
||||
const numReadBufferStartSize = 256
|
||||
|
||||
var mutex sync.Mutex
|
||||
|
||||
type MatchData struct {
|
||||
count int
|
||||
indexes [][]int32
|
||||
}
|
||||
|
||||
type NamedGroupInfo map[string]int
|
||||
|
||||
type Regexp struct {
|
||||
pattern string
|
||||
regex C.OnigRegex
|
||||
region *C.OnigRegion
|
||||
encoding C.OnigEncoding
|
||||
errorInfo *C.OnigErrorInfo
|
||||
errorBuf *C.char
|
||||
matchData *MatchData
|
||||
namedGroupInfo NamedGroupInfo
|
||||
}
|
||||
|
||||
// NewRegexp creates and initializes a new Regexp with the given pattern and option.
|
||||
func NewRegexp(pattern string, option int) (re *Regexp, err error) {
|
||||
return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option)
|
||||
}
|
||||
|
||||
// NewRegexpASCII is equivalent to NewRegexp, but with the encoding restricted to ASCII.
|
||||
func NewRegexpASCII(pattern string, option int) (re *Regexp, err error) {
|
||||
return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_ASCII}, option)
|
||||
}
|
||||
|
||||
func initRegexp(re *Regexp, option int) (*Regexp, error) {
|
||||
var err error
|
||||
patternCharPtr := C.CString(re.pattern)
|
||||
defer C.free(unsafe.Pointer(patternCharPtr))
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf)
|
||||
if errorCode != C.ONIG_NORMAL {
|
||||
err = errors.New(C.GoString(re.errorBuf))
|
||||
} else {
|
||||
err = nil
|
||||
numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1
|
||||
re.matchData = &MatchData{}
|
||||
re.matchData.indexes = make([][]int32, numMatchStartSize)
|
||||
for i := 0; i < numMatchStartSize; i++ {
|
||||
re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2)
|
||||
}
|
||||
re.namedGroupInfo = re.getNamedGroupInfo()
|
||||
runtime.SetFinalizer(re, (*Regexp).Free)
|
||||
}
|
||||
return re, err
|
||||
}
|
||||
|
||||
func Compile(str string) (*Regexp, error) {
|
||||
return NewRegexp(str, ONIG_OPTION_DEFAULT)
|
||||
}
|
||||
|
||||
func MustCompile(str string) *Regexp {
|
||||
regexp, error := NewRegexp(str, ONIG_OPTION_DEFAULT)
|
||||
if error != nil {
|
||||
panic("regexp: compiling " + str + ": " + error.Error())
|
||||
}
|
||||
return regexp
|
||||
}
|
||||
|
||||
func CompileWithOption(str string, option int) (*Regexp, error) {
|
||||
return NewRegexp(str, option)
|
||||
}
|
||||
|
||||
func MustCompileWithOption(str string, option int) *Regexp {
|
||||
regexp, error := NewRegexp(str, option)
|
||||
if error != nil {
|
||||
panic("regexp: compiling " + str + ": " + error.Error())
|
||||
}
|
||||
return regexp
|
||||
}
|
||||
|
||||
// MustCompileASCII is equivalent to MustCompile, but with the encoding restricted to ASCII.
|
||||
func MustCompileASCII(str string) *Regexp {
|
||||
regexp, error := NewRegexpASCII(str, ONIG_OPTION_DEFAULT)
|
||||
if error != nil {
|
||||
panic("regexp: compiling " + str + ": " + error.Error())
|
||||
}
|
||||
return regexp
|
||||
}
|
||||
|
||||
func (re *Regexp) Free() {
|
||||
mutex.Lock()
|
||||
if re.regex != nil {
|
||||
C.onig_free(re.regex)
|
||||
re.regex = nil
|
||||
}
|
||||
if re.region != nil {
|
||||
C.onig_region_free(re.region, 1)
|
||||
re.region = nil
|
||||
}
|
||||
mutex.Unlock()
|
||||
if re.errorInfo != nil {
|
||||
C.free(unsafe.Pointer(re.errorInfo))
|
||||
re.errorInfo = nil
|
||||
}
|
||||
if re.errorBuf != nil {
|
||||
C.free(unsafe.Pointer(re.errorBuf))
|
||||
re.errorBuf = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (re *Regexp) getNamedGroupInfo() (namedGroupInfo NamedGroupInfo) {
|
||||
numNamedGroups := int(C.onig_number_of_names(re.regex))
|
||||
//when any named capture exisits, there is no numbered capture even if there are unnamed captures
|
||||
if numNamedGroups > 0 {
|
||||
namedGroupInfo = make(map[string]int)
|
||||
//try to get the names
|
||||
bufferSize := len(re.pattern) * 2
|
||||
nameBuffer := make([]byte, bufferSize)
|
||||
groupNumbers := make([]int32, numNamedGroups)
|
||||
bufferPtr := unsafe.Pointer(&nameBuffer[0])
|
||||
numbersPtr := unsafe.Pointer(&groupNumbers[0])
|
||||
length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr)))
|
||||
if length > 0 {
|
||||
namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";"))
|
||||
if len(namesAsBytes) != numNamedGroups {
|
||||
log.Fatalf("the number of named groups (%d) does not match the number names found (%d)\n", numNamedGroups, len(namesAsBytes))
|
||||
}
|
||||
for i, nameAsBytes := range namesAsBytes {
|
||||
name := string(nameAsBytes)
|
||||
namedGroupInfo[name] = int(groupNumbers[i])
|
||||
}
|
||||
} else {
|
||||
log.Fatalf("could not get the capture group names from %q", re.String())
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) groupNameToId(name string) (id int) {
|
||||
if re.namedGroupInfo == nil {
|
||||
id = ONIGERR_UNDEFINED_NAME_REFERENCE
|
||||
} else {
|
||||
id = re.namedGroupInfo[name]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) processMatch(numCaptures int) (match []int32) {
|
||||
if numCaptures <= 0 {
|
||||
panic("cannot have 0 captures when processing a match")
|
||||
}
|
||||
matchData := re.matchData
|
||||
return matchData.indexes[matchData.count][:numCaptures*2]
|
||||
}
|
||||
|
||||
func (re *Regexp) ClearMatchData() {
|
||||
matchData := re.matchData
|
||||
matchData.count = 0
|
||||
}
|
||||
|
||||
func (re *Regexp) find(b []byte, n int, offset int) (match []int) {
|
||||
if n == 0 {
|
||||
b = []byte{0}
|
||||
}
|
||||
ptr := unsafe.Pointer(&b[0])
|
||||
matchData := re.matchData
|
||||
capturesPtr := unsafe.Pointer(&(matchData.indexes[matchData.count][0]))
|
||||
numCaptures := int32(0)
|
||||
numCapturesPtr := unsafe.Pointer(&numCaptures)
|
||||
pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr)))
|
||||
if pos >= 0 {
|
||||
if numCaptures <= 0 {
|
||||
panic("cannot have 0 captures when processing a match")
|
||||
}
|
||||
match2 := matchData.indexes[matchData.count][:numCaptures*2]
|
||||
match = make([]int, len(match2))
|
||||
for i := range match2 {
|
||||
match[i] = int(match2[i])
|
||||
}
|
||||
numCapturesInPattern := int32(C.onig_number_of_captures(re.regex)) + 1
|
||||
if numCapturesInPattern != numCaptures {
|
||||
log.Fatalf("expected %d captures but got %d\n", numCapturesInPattern, numCaptures)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func getCapture(b []byte, beg int, end int) []byte {
|
||||
if beg < 0 || end < 0 {
|
||||
return nil
|
||||
}
|
||||
return b[beg:end]
|
||||
}
|
||||
|
||||
func (re *Regexp) match(b []byte, n int, offset int) bool {
|
||||
re.ClearMatchData()
|
||||
if n == 0 {
|
||||
b = []byte{0}
|
||||
}
|
||||
ptr := unsafe.Pointer(&b[0])
|
||||
pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(nil), (*C.int)(nil)))
|
||||
return pos >= 0
|
||||
}
|
||||
|
||||
func (re *Regexp) findAll(b []byte, n int) (matches [][]int) {
|
||||
re.ClearMatchData()
|
||||
|
||||
if n < 0 {
|
||||
n = len(b)
|
||||
}
|
||||
matchData := re.matchData
|
||||
offset := 0
|
||||
for offset <= n {
|
||||
if matchData.count >= len(matchData.indexes) {
|
||||
length := len(matchData.indexes[0])
|
||||
matchData.indexes = append(matchData.indexes, make([]int32, length))
|
||||
}
|
||||
if match := re.find(b, n, offset); len(match) > 0 {
|
||||
matchData.count += 1
|
||||
//move offset to the ending index of the current match and prepare to find the next non-overlapping match
|
||||
offset = match[1]
|
||||
//if match[0] == match[1], it means the current match does not advance the search. we need to exit the loop to avoid getting stuck here.
|
||||
if match[0] == match[1] {
|
||||
if offset < n && offset >= 0 {
|
||||
//there are more bytes, so move offset by a word
|
||||
_, width := utf8.DecodeRune(b[offset:])
|
||||
offset += width
|
||||
} else {
|
||||
//search is over, exit loop
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
matches2 := matchData.indexes[:matchData.count]
|
||||
matches = make([][]int, len(matches2))
|
||||
for i, v := range matches2 {
|
||||
matches[i] = make([]int, len(v))
|
||||
for j, v2 := range v {
|
||||
matches[i][j] = int(v2)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) FindIndex(b []byte) []int {
|
||||
re.ClearMatchData()
|
||||
match := re.find(b, len(b), 0)
|
||||
if len(match) == 0 {
|
||||
return nil
|
||||
}
|
||||
return match[:2]
|
||||
}
|
||||
|
||||
func (re *Regexp) Find(b []byte) []byte {
|
||||
loc := re.FindIndex(b)
|
||||
if loc == nil {
|
||||
return nil
|
||||
}
|
||||
return getCapture(b, loc[0], loc[1])
|
||||
}
|
||||
|
||||
func (re *Regexp) FindString(s string) string {
|
||||
b := []byte(s)
|
||||
mb := re.Find(b)
|
||||
if mb == nil {
|
||||
return ""
|
||||
}
|
||||
return string(mb)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindStringIndex(s string) []int {
|
||||
b := []byte(s)
|
||||
return re.FindIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAll(b []byte, n int) [][]byte {
|
||||
matches := re.FindAllIndex(b, n)
|
||||
if matches == nil {
|
||||
return nil
|
||||
}
|
||||
matchBytes := make([][]byte, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
matchBytes = append(matchBytes, getCapture(b, match[0], match[1]))
|
||||
}
|
||||
return matchBytes
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllString(s string, n int) []string {
|
||||
b := []byte(s)
|
||||
matches := re.FindAllIndex(b, n)
|
||||
if matches == nil {
|
||||
return nil
|
||||
}
|
||||
matchStrings := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
m := getCapture(b, match[0], match[1])
|
||||
if m == nil {
|
||||
matchStrings = append(matchStrings, "")
|
||||
} else {
|
||||
matchStrings = append(matchStrings, string(m))
|
||||
}
|
||||
}
|
||||
return matchStrings
|
||||
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
|
||||
b := []byte(s)
|
||||
return re.FindAllIndex(b, n)
|
||||
}
|
||||
|
||||
func (re *Regexp) findSubmatchIndex(b []byte) (match []int) {
|
||||
re.ClearMatchData()
|
||||
match = re.find(b, len(b), 0)
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
||||
match := re.findSubmatchIndex(b)
|
||||
if len(match) == 0 {
|
||||
return nil
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
||||
match := re.findSubmatchIndex(b)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
length := len(match) / 2
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
results := make([][]byte, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
results = append(results, getCapture(b, match[2*i], match[2*i+1]))
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func (re *Regexp) FindStringSubmatch(s string) []string {
|
||||
b := []byte(s)
|
||||
match := re.findSubmatchIndex(b)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
length := len(match) / 2
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
results := make([]string, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
cap := getCapture(b, match[2*i], match[2*i+1])
|
||||
if cap == nil {
|
||||
results = append(results, "")
|
||||
} else {
|
||||
results = append(results, string(cap))
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
||||
b := []byte(s)
|
||||
return re.FindSubmatchIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
allCapturedBytes := make([][][]byte, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
length := len(match) / 2
|
||||
capturedBytes := make([][]byte, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
capturedBytes = append(capturedBytes, getCapture(b, match[2*i], match[2*i+1]))
|
||||
}
|
||||
allCapturedBytes = append(allCapturedBytes, capturedBytes)
|
||||
}
|
||||
|
||||
return allCapturedBytes
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
|
||||
b := []byte(s)
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
allCapturedStrings := make([][]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
length := len(match) / 2
|
||||
capturedStrings := make([]string, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
cap := getCapture(b, match[2*i], match[2*i+1])
|
||||
if cap == nil {
|
||||
capturedStrings = append(capturedStrings, "")
|
||||
} else {
|
||||
capturedStrings = append(capturedStrings, string(cap))
|
||||
}
|
||||
}
|
||||
allCapturedStrings = append(allCapturedStrings, capturedStrings)
|
||||
}
|
||||
return allCapturedStrings
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
|
||||
b := []byte(s)
|
||||
return re.FindAllSubmatchIndex(b, n)
|
||||
}
|
||||
|
||||
func (re *Regexp) Match(b []byte) bool {
|
||||
return re.match(b, len(b), 0)
|
||||
}
|
||||
|
||||
func (re *Regexp) MatchString(s string) bool {
|
||||
b := []byte(s)
|
||||
return re.Match(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) NumSubexp() int {
|
||||
return (int)(C.onig_number_of_captures(re.regex))
|
||||
}
|
||||
|
||||
func (re *Regexp) getNamedCapture(name []byte, capturedBytes [][]byte) []byte {
|
||||
nameStr := string(name)
|
||||
capNum := re.groupNameToId(nameStr)
|
||||
if capNum < 0 || capNum >= len(capturedBytes) {
|
||||
panic(fmt.Sprintf("capture group name (%q) has error\n", nameStr))
|
||||
}
|
||||
return capturedBytes[capNum]
|
||||
}
|
||||
|
||||
func (re *Regexp) getNumberedCapture(num int, capturedBytes [][]byte) []byte {
|
||||
//when named capture groups exist, numbered capture groups returns ""
|
||||
if re.namedGroupInfo == nil && num <= (len(capturedBytes)-1) && num >= 0 {
|
||||
return capturedBytes[num]
|
||||
}
|
||||
return ([]byte)("")
|
||||
}
|
||||
|
||||
func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte {
|
||||
replLen := len(repl)
|
||||
newRepl := make([]byte, 0, replLen*3)
|
||||
inEscapeMode := false
|
||||
inGroupNameMode := false
|
||||
groupName := make([]byte, 0, replLen)
|
||||
for index := 0; index < replLen; index += 1 {
|
||||
ch := repl[index]
|
||||
if inGroupNameMode && ch == byte('<') {
|
||||
} else if inGroupNameMode && ch == byte('>') {
|
||||
inGroupNameMode = false
|
||||
groupNameStr := string(groupName)
|
||||
capBytes := capturedBytes[groupNameStr]
|
||||
newRepl = append(newRepl, capBytes...)
|
||||
groupName = groupName[:0] //reset the name
|
||||
} else if inGroupNameMode {
|
||||
groupName = append(groupName, ch)
|
||||
} else if inEscapeMode && ch <= byte('9') && byte('1') <= ch {
|
||||
capNumStr := string(ch)
|
||||
capBytes := capturedBytes[capNumStr]
|
||||
newRepl = append(newRepl, capBytes...)
|
||||
} else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') {
|
||||
inGroupNameMode = true
|
||||
inEscapeMode = false
|
||||
index += 1 //bypass the next char '<'
|
||||
} else if inEscapeMode {
|
||||
newRepl = append(newRepl, '\\')
|
||||
newRepl = append(newRepl, ch)
|
||||
} else if ch != '\\' {
|
||||
newRepl = append(newRepl, ch)
|
||||
}
|
||||
if ch == byte('\\') || inEscapeMode {
|
||||
inEscapeMode = !inEscapeMode
|
||||
}
|
||||
}
|
||||
return newRepl
|
||||
}
|
||||
|
||||
func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map[string][]byte) []byte) []byte {
|
||||
srcLen := len(src)
|
||||
matches := re.findAll(src, srcLen)
|
||||
if len(matches) == 0 {
|
||||
return src
|
||||
}
|
||||
dest := make([]byte, 0, srcLen)
|
||||
for i, match := range matches {
|
||||
length := len(match) / 2
|
||||
capturedBytes := make(map[string][]byte)
|
||||
if re.namedGroupInfo == nil {
|
||||
for j := 0; j < length; j++ {
|
||||
capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1])
|
||||
}
|
||||
} else {
|
||||
for name, j := range re.namedGroupInfo {
|
||||
capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1])
|
||||
}
|
||||
}
|
||||
matchBytes := getCapture(src, match[0], match[1])
|
||||
newRepl := replFunc(repl, matchBytes, capturedBytes)
|
||||
prevEnd := 0
|
||||
if i > 0 {
|
||||
prevMatch := matches[i-1][:2]
|
||||
prevEnd = prevMatch[1]
|
||||
}
|
||||
if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen {
|
||||
dest = append(dest, src[prevEnd:match[0]]...)
|
||||
}
|
||||
dest = append(dest, newRepl...)
|
||||
}
|
||||
lastEnd := matches[len(matches)-1][1]
|
||||
if lastEnd < srcLen && lastEnd >= 0 {
|
||||
dest = append(dest, src[lastEnd:]...)
|
||||
}
|
||||
return dest
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
|
||||
return re.replaceAll(src, repl, fillCapturedValues)
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
|
||||
return re.replaceAll(src, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
|
||||
return repl(matchBytes)
|
||||
})
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAllString(src, repl string) string {
|
||||
return string(re.ReplaceAll([]byte(src), []byte(repl)))
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
|
||||
srcB := []byte(src)
|
||||
destB := re.replaceAll(srcB, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
|
||||
return []byte(repl(string(matchBytes)))
|
||||
})
|
||||
return string(destB)
|
||||
}
|
||||
|
||||
func (re *Regexp) String() string {
|
||||
return re.pattern
|
||||
}
|
||||
|
||||
func grow_buffer(b []byte, offset int, n int) []byte {
|
||||
if offset+n > cap(b) {
|
||||
buf := make([]byte, 2*cap(b)+n)
|
||||
copy(buf, b[:offset])
|
||||
return buf
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func fromReader(r io.RuneReader) []byte {
|
||||
b := make([]byte, numReadBufferStartSize)
|
||||
offset := 0
|
||||
var err error = nil
|
||||
for err == nil {
|
||||
rune, runeWidth, err := r.ReadRune()
|
||||
if err == nil {
|
||||
b = grow_buffer(b, offset, runeWidth)
|
||||
writeWidth := utf8.EncodeRune(b[offset:], rune)
|
||||
if runeWidth != writeWidth {
|
||||
panic("reading rune width not equal to the written rune width")
|
||||
}
|
||||
offset += writeWidth
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return b[:offset]
|
||||
}
|
||||
|
||||
func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
|
||||
b := fromReader(r)
|
||||
return re.FindIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
|
||||
b := fromReader(r)
|
||||
return re.FindSubmatchIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) MatchReader(r io.RuneReader) bool {
|
||||
b := fromReader(r)
|
||||
return re.Match(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
|
||||
//no easy way to implement this
|
||||
return "", false
|
||||
}
|
||||
|
||||
func MatchString(pattern string, s string) (matched bool, error error) {
|
||||
re, err := Compile(pattern)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return re.MatchString(s), nil
|
||||
}
|
||||
|
||||
func (re *Regexp) Gsub(src, repl string) string {
|
||||
srcBytes := ([]byte)(src)
|
||||
replBytes := ([]byte)(repl)
|
||||
replaced := re.replaceAll(srcBytes, replBytes, fillCapturedValues)
|
||||
return string(replaced)
|
||||
}
|
||||
|
||||
func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string {
|
||||
srcBytes := ([]byte)(src)
|
||||
replaced := re.replaceAll(srcBytes, nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte {
|
||||
capturedStrings := make(map[string]string)
|
||||
for name, capBytes := range capturedBytes {
|
||||
capturedStrings[name] = string(capBytes)
|
||||
}
|
||||
matchString := string(matchBytes)
|
||||
return ([]byte)(replFunc(matchString, capturedStrings))
|
||||
})
|
||||
return string(replaced)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue