forked from forgejo/forgejo
Language statistics bar for repositories (#8037)
* Implementation for calculating language statistics Impement saving code language statistics to database Implement rendering langauge stats Add primary laguage to show in repository list Implement repository stats indexer queue Add indexer test Refactor to use queue module * Do not timeout for queues
This commit is contained in:
parent
37892be635
commit
ad2642a8aa
89 changed files with 182950 additions and 57 deletions
20
vendor/github.com/src-d/go-oniguruma/.travis.yml
generated
vendored
Normal file
20
vendor/github.com/src-d/go-oniguruma/.travis.yml
generated
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
dist: trusty
|
||||
language: go
|
||||
go:
|
||||
- '1.11.x'
|
||||
- '1.12.x'
|
||||
|
||||
env:
|
||||
global:
|
||||
- LD_LIBRARY_PATH="/usr/local/lib":${LD_LIBRARY_PATH}
|
||||
- GO111MODULE=on
|
||||
- ONIGURUMA_VERSION='6.9.1'
|
||||
|
||||
before_install: # install oniguruma manually as trusty has only ancient 5.x
|
||||
- sudo apt-get install -y dpkg # dpkg >= 1.17.5ubuntu5.8 fixes https://bugs.launchpad.net/ubuntu/+source/dpkg/+bug/1730627
|
||||
- wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig5_${ONIGURUMA_VERSION}-1_amd64.deb"
|
||||
- sudo dpkg -i "libonig5_${ONIGURUMA_VERSION}-1_amd64.deb"
|
||||
- wget "http://archive.ubuntu.com/ubuntu/pool/universe/libo/libonig/libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb"
|
||||
- sudo dpkg -i "libonig-dev_${ONIGURUMA_VERSION}-1_amd64.deb"
|
||||
script:
|
||||
- go test -v --cover -race
|
19
vendor/github.com/src-d/go-oniguruma/LICENSE
generated
vendored
Normal file
19
vendor/github.com/src-d/go-oniguruma/LICENSE
generated
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
Copyright (C) 2011 by Zhigang Chen
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
20
vendor/github.com/src-d/go-oniguruma/README.md
generated
vendored
Normal file
20
vendor/github.com/src-d/go-oniguruma/README.md
generated
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
## go-oniguruma
|
||||
<a href="https://travis-ci.org/src-d/go-oniguruma"><img alt="Build Status" src="https://travis-ci.org/src-d/go-oniguruma.svg?branch=master" /></a>
|
||||
|
||||
This repository is a fork of [moovweb/rubex](https://github.com/moovweb/rubex/tree/go1) - a simple regular expression library (based on [oniguruma](https://github.com/kkos/oniguruma)) that supports Ruby's regex syntax.
|
||||
|
||||
The _rubex_ was originally created by Zhigang Chen (zhigang.chen@moovweb.com or zhigangc@gmail.com). It implements all the public functions of Go's Regexp package, except LiteralPrefix.
|
||||
|
||||
By the benchmark tests in regexp, the library is 40% to 10X faster than Regexp on all but one test. Unlike Go's regexp, this library supports named capture groups and also allow `"\\1"` and `"\\k<name>"` in replacement strings.
|
||||
The library calls the _oniguruma_ regex library for regex pattern searching. All replacement code is done in Go.
|
||||
|
||||
### Install all (_oniguruma_ and _rubex_):
|
||||
```sh
|
||||
# linux (debian/ubuntu/...)
|
||||
sudo apt-get install libonig-dev
|
||||
|
||||
# osx (homebrew)
|
||||
brew install oniguruma
|
||||
|
||||
go install -i .
|
||||
```
|
184
vendor/github.com/src-d/go-oniguruma/chelper.c
generated
vendored
Normal file
184
vendor/github.com/src-d/go-oniguruma/chelper.c
generated
vendored
Normal file
|
@ -0,0 +1,184 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#ifdef BENCHMARK_CHELP
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "chelper.h"
|
||||
|
||||
int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) {
|
||||
int ret = ONIG_NORMAL;
|
||||
int error_msg_len = 0;
|
||||
|
||||
OnigUChar *pattern_start = (OnigUChar *) pattern;
|
||||
OnigUChar *pattern_end = (OnigUChar *) (pattern + pattern_length);
|
||||
|
||||
*error_info = (OnigErrorInfo *) malloc(sizeof(OnigErrorInfo));
|
||||
memset(*error_info, 0, sizeof(OnigErrorInfo));
|
||||
|
||||
onig_initialize_encoding(*encoding);
|
||||
|
||||
*error_buffer = (char*) malloc(ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
|
||||
|
||||
memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char));
|
||||
|
||||
*region = onig_region_new();
|
||||
|
||||
ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info);
|
||||
|
||||
if (ret != ONIG_NORMAL) {
|
||||
error_msg_len = onig_error_code_to_str((unsigned char*)(*error_buffer), ret, *error_info);
|
||||
if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
|
||||
error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
|
||||
}
|
||||
(*error_buffer)[error_msg_len] = '\0';
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) {
|
||||
int ret = ONIG_MISMATCH;
|
||||
int error_msg_len = 0;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
#endif
|
||||
|
||||
OnigUChar *str_start = (OnigUChar *) str;
|
||||
OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
|
||||
OnigUChar *search_start = (OnigUChar *)(str_start + offset);
|
||||
OnigUChar *search_end = str_end;
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
|
||||
ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option);
|
||||
if (ret < 0 && error_buffer != NULL) {
|
||||
error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info);
|
||||
if (error_msg_len >= ONIG_MAX_ERROR_MESSAGE_LEN) {
|
||||
error_msg_len = ONIG_MAX_ERROR_MESSAGE_LEN - 1;
|
||||
}
|
||||
error_buffer[error_msg_len] = '\0';
|
||||
}
|
||||
else if (captures != NULL) {
|
||||
int i;
|
||||
int count = 0;
|
||||
for (i = 0; i < region->num_regs; i++) {
|
||||
captures[2*count] = region->beg[i];
|
||||
captures[2*count+1] = region->end[i];
|
||||
count ++;
|
||||
}
|
||||
*numCaptures = count;
|
||||
}
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
printf("%ld microseconds elapsed\n", t);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int MatchOnigRegex(void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region) {
|
||||
int ret = ONIG_MISMATCH;
|
||||
int error_msg_len = 0;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
#endif
|
||||
|
||||
OnigUChar *str_start = (OnigUChar *) str;
|
||||
OnigUChar *str_end = (OnigUChar *) (str_start + str_length);
|
||||
OnigUChar *search_start = (OnigUChar *)(str_start + offset);
|
||||
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
ret = onig_match(regex, str_start, str_end, search_start, region, option);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
printf("%ld microseconds elapsed\n", t);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int LookupOnigCaptureByName(char *name, int name_length,
|
||||
OnigRegex regex, OnigRegion *region) {
|
||||
int ret = ONIGERR_UNDEFINED_NAME_REFERENCE;
|
||||
#ifdef BENCHMARK_CHELP
|
||||
struct timeval tim1, tim2;
|
||||
long t;
|
||||
#endif
|
||||
OnigUChar *name_start = (OnigUChar *) name;
|
||||
OnigUChar *name_end = (OnigUChar *) (name_start + name_length);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim1, NULL);
|
||||
#endif
|
||||
ret = onig_name_to_backref_number(regex, name_start, name_end, region);
|
||||
#ifdef BENCHMARK_CHELP
|
||||
gettimeofday(&tim2, NULL);
|
||||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec;
|
||||
printf("%ld microseconds elapsed\n", t);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
char *nameBuffer;
|
||||
int bufferOffset;
|
||||
int bufferSize;
|
||||
int *numbers;
|
||||
int numIndex;
|
||||
} group_info_t;
|
||||
|
||||
int name_callback(const UChar* name, const UChar* name_end,
|
||||
int ngroup_num, int* group_nums,
|
||||
regex_t* reg, void* arg)
|
||||
{
|
||||
int nameLen, offset, newOffset;
|
||||
group_info_t *groupInfo;
|
||||
|
||||
groupInfo = (group_info_t*) arg;
|
||||
offset = groupInfo->bufferOffset;
|
||||
nameLen = name_end - name;
|
||||
newOffset = offset + nameLen;
|
||||
|
||||
//if there are already names, add a ";"
|
||||
if (offset > 0) {
|
||||
newOffset += 1;
|
||||
}
|
||||
|
||||
if (newOffset <= groupInfo->bufferSize) {
|
||||
if (offset > 0) {
|
||||
groupInfo->nameBuffer[offset] = ';';
|
||||
offset += 1;
|
||||
}
|
||||
memcpy(&groupInfo->nameBuffer[offset], name, nameLen);
|
||||
}
|
||||
groupInfo->bufferOffset = newOffset;
|
||||
if (ngroup_num > 0) {
|
||||
groupInfo->numbers[groupInfo->numIndex] = group_nums[ngroup_num-1];
|
||||
} else {
|
||||
groupInfo->numbers[groupInfo->numIndex] = -1;
|
||||
}
|
||||
groupInfo->numIndex += 1;
|
||||
return 0; /* 0: continue */
|
||||
}
|
||||
|
||||
int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbers) {
|
||||
int ret;
|
||||
group_info_t groupInfo;
|
||||
groupInfo.nameBuffer = (char*)buffer;
|
||||
groupInfo.bufferOffset = 0;
|
||||
groupInfo.bufferSize = bufferSize;
|
||||
groupInfo.numbers = groupNumbers;
|
||||
groupInfo.numIndex = 0;
|
||||
onig_foreach_name(reg, name_callback, (void* )&groupInfo);
|
||||
return groupInfo.bufferOffset;
|
||||
}
|
||||
|
14
vendor/github.com/src-d/go-oniguruma/chelper.h
generated
vendored
Normal file
14
vendor/github.com/src-d/go-oniguruma/chelper.h
generated
vendored
Normal file
|
@ -0,0 +1,14 @@
|
|||
#include <oniguruma.h>
|
||||
|
||||
extern int NewOnigRegex( char *pattern, int pattern_length, int option,
|
||||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer);
|
||||
|
||||
extern int SearchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures);
|
||||
|
||||
extern int MatchOnigRegex( void *str, int str_length, int offset, int option,
|
||||
OnigRegex regex, OnigRegion *region);
|
||||
|
||||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region);
|
||||
|
||||
extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers);
|
27
vendor/github.com/src-d/go-oniguruma/constants.go
generated
vendored
Normal file
27
vendor/github.com/src-d/go-oniguruma/constants.go
generated
vendored
Normal file
|
@ -0,0 +1,27 @@
|
|||
package rubex
|
||||
|
||||
const (
|
||||
ONIG_OPTION_DEFAULT = ONIG_OPTION_NONE
|
||||
/* options */
|
||||
ONIG_OPTION_NONE = 0
|
||||
ONIG_OPTION_IGNORECASE = 1
|
||||
ONIG_OPTION_EXTEND = (ONIG_OPTION_IGNORECASE << 1)
|
||||
ONIG_OPTION_MULTILINE = (ONIG_OPTION_EXTEND << 1)
|
||||
ONIG_OPTION_SINGLELINE = (ONIG_OPTION_MULTILINE << 1)
|
||||
ONIG_OPTION_FIND_LONGEST = (ONIG_OPTION_SINGLELINE << 1)
|
||||
ONIG_OPTION_FIND_NOT_EMPTY = (ONIG_OPTION_FIND_LONGEST << 1)
|
||||
ONIG_OPTION_NEGATE_SINGLELINE = (ONIG_OPTION_FIND_NOT_EMPTY << 1)
|
||||
ONIG_OPTION_DONT_CAPTURE_GROUP = (ONIG_OPTION_NEGATE_SINGLELINE << 1)
|
||||
ONIG_OPTION_CAPTURE_GROUP = (ONIG_OPTION_DONT_CAPTURE_GROUP << 1)
|
||||
/* options (search time) */
|
||||
ONIG_OPTION_NOTBOL = (ONIG_OPTION_CAPTURE_GROUP << 1)
|
||||
ONIG_OPTION_NOTEOL = (ONIG_OPTION_NOTBOL << 1)
|
||||
ONIG_OPTION_POSIX_REGION = (ONIG_OPTION_NOTEOL << 1)
|
||||
ONIG_OPTION_MAXBIT = ONIG_OPTION_POSIX_REGION /* limit */
|
||||
|
||||
ONIG_NORMAL = 0
|
||||
ONIG_MISMATCH = -1
|
||||
|
||||
ONIG_MISMATCH_STR = "mismatch"
|
||||
ONIGERR_UNDEFINED_NAME_REFERENCE = -217
|
||||
)
|
1
vendor/github.com/src-d/go-oniguruma/go.mod
generated
vendored
Normal file
1
vendor/github.com/src-d/go-oniguruma/go.mod
generated
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
module github.com/src-d/go-oniguruma
|
36
vendor/github.com/src-d/go-oniguruma/quotemeta.go
generated
vendored
Normal file
36
vendor/github.com/src-d/go-oniguruma/quotemeta.go
generated
vendored
Normal file
|
@ -0,0 +1,36 @@
|
|||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package regexp implements a simple regular expression library.
|
||||
|
||||
// QuoteMeta func is copied here to avoid linking the entire Regexp library.
|
||||
|
||||
package rubex
|
||||
|
||||
func special(c int) bool {
|
||||
for _, r := range `\.+*?()|[]^$` {
|
||||
if c == int(r) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// QuoteMeta returns a string that quotes all regular expression metacharacters
|
||||
// inside the argument text; the returned string is a regular expression matching
|
||||
// the literal text. For example, QuoteMeta(`[foo]`) returns `\[foo\]`.
|
||||
func QuoteMeta(s string) string {
|
||||
b := make([]byte, 2*len(s))
|
||||
|
||||
// A byte loop is correct because all metacharacters are ASCII.
|
||||
j := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if special(int(s[i])) {
|
||||
b[j] = '\\'
|
||||
j++
|
||||
}
|
||||
b[j] = s[i]
|
||||
j++
|
||||
}
|
||||
return string(b[0:j])
|
||||
}
|
668
vendor/github.com/src-d/go-oniguruma/regex.go
generated
vendored
Normal file
668
vendor/github.com/src-d/go-oniguruma/regex.go
generated
vendored
Normal file
|
@ -0,0 +1,668 @@
|
|||
package rubex
|
||||
|
||||
/*
|
||||
#cgo CFLAGS: -I/usr/local/include
|
||||
#cgo LDFLAGS: -L/usr/local/lib -lonig
|
||||
#include <stdlib.h>
|
||||
#include <oniguruma.h>
|
||||
#include "chelper.h"
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"sync"
|
||||
"unicode/utf8"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
type strRange []int
|
||||
|
||||
const numMatchStartSize = 4
|
||||
const numReadBufferStartSize = 256
|
||||
|
||||
var mutex sync.Mutex
|
||||
|
||||
type MatchData struct {
|
||||
count int
|
||||
indexes [][]int32
|
||||
}
|
||||
|
||||
type NamedGroupInfo map[string]int
|
||||
|
||||
type Regexp struct {
|
||||
pattern string
|
||||
regex C.OnigRegex
|
||||
region *C.OnigRegion
|
||||
encoding C.OnigEncoding
|
||||
errorInfo *C.OnigErrorInfo
|
||||
errorBuf *C.char
|
||||
matchData *MatchData
|
||||
namedGroupInfo NamedGroupInfo
|
||||
}
|
||||
|
||||
// NewRegexp creates and initializes a new Regexp with the given pattern and option.
|
||||
func NewRegexp(pattern string, option int) (re *Regexp, err error) {
|
||||
return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option)
|
||||
}
|
||||
|
||||
// NewRegexpASCII is equivalent to NewRegexp, but with the encoding restricted to ASCII.
|
||||
func NewRegexpASCII(pattern string, option int) (re *Regexp, err error) {
|
||||
return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_ASCII}, option)
|
||||
}
|
||||
|
||||
func initRegexp(re *Regexp, option int) (*Regexp, error) {
|
||||
var err error
|
||||
patternCharPtr := C.CString(re.pattern)
|
||||
defer C.free(unsafe.Pointer(patternCharPtr))
|
||||
mutex.Lock()
|
||||
defer mutex.Unlock()
|
||||
errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf)
|
||||
if errorCode != C.ONIG_NORMAL {
|
||||
err = errors.New(C.GoString(re.errorBuf))
|
||||
} else {
|
||||
err = nil
|
||||
numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1
|
||||
re.matchData = &MatchData{}
|
||||
re.matchData.indexes = make([][]int32, numMatchStartSize)
|
||||
for i := 0; i < numMatchStartSize; i++ {
|
||||
re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2)
|
||||
}
|
||||
re.namedGroupInfo = re.getNamedGroupInfo()
|
||||
runtime.SetFinalizer(re, (*Regexp).Free)
|
||||
}
|
||||
return re, err
|
||||
}
|
||||
|
||||
func Compile(str string) (*Regexp, error) {
|
||||
return NewRegexp(str, ONIG_OPTION_DEFAULT)
|
||||
}
|
||||
|
||||
func MustCompile(str string) *Regexp {
|
||||
regexp, error := NewRegexp(str, ONIG_OPTION_DEFAULT)
|
||||
if error != nil {
|
||||
panic("regexp: compiling " + str + ": " + error.Error())
|
||||
}
|
||||
return regexp
|
||||
}
|
||||
|
||||
func CompileWithOption(str string, option int) (*Regexp, error) {
|
||||
return NewRegexp(str, option)
|
||||
}
|
||||
|
||||
func MustCompileWithOption(str string, option int) *Regexp {
|
||||
regexp, error := NewRegexp(str, option)
|
||||
if error != nil {
|
||||
panic("regexp: compiling " + str + ": " + error.Error())
|
||||
}
|
||||
return regexp
|
||||
}
|
||||
|
||||
// MustCompileASCII is equivalent to MustCompile, but with the encoding restricted to ASCII.
|
||||
func MustCompileASCII(str string) *Regexp {
|
||||
regexp, error := NewRegexpASCII(str, ONIG_OPTION_DEFAULT)
|
||||
if error != nil {
|
||||
panic("regexp: compiling " + str + ": " + error.Error())
|
||||
}
|
||||
return regexp
|
||||
}
|
||||
|
||||
func (re *Regexp) Free() {
|
||||
mutex.Lock()
|
||||
if re.regex != nil {
|
||||
C.onig_free(re.regex)
|
||||
re.regex = nil
|
||||
}
|
||||
if re.region != nil {
|
||||
C.onig_region_free(re.region, 1)
|
||||
re.region = nil
|
||||
}
|
||||
mutex.Unlock()
|
||||
if re.errorInfo != nil {
|
||||
C.free(unsafe.Pointer(re.errorInfo))
|
||||
re.errorInfo = nil
|
||||
}
|
||||
if re.errorBuf != nil {
|
||||
C.free(unsafe.Pointer(re.errorBuf))
|
||||
re.errorBuf = nil
|
||||
}
|
||||
}
|
||||
|
||||
func (re *Regexp) getNamedGroupInfo() (namedGroupInfo NamedGroupInfo) {
|
||||
numNamedGroups := int(C.onig_number_of_names(re.regex))
|
||||
//when any named capture exisits, there is no numbered capture even if there are unnamed captures
|
||||
if numNamedGroups > 0 {
|
||||
namedGroupInfo = make(map[string]int)
|
||||
//try to get the names
|
||||
bufferSize := len(re.pattern) * 2
|
||||
nameBuffer := make([]byte, bufferSize)
|
||||
groupNumbers := make([]int32, numNamedGroups)
|
||||
bufferPtr := unsafe.Pointer(&nameBuffer[0])
|
||||
numbersPtr := unsafe.Pointer(&groupNumbers[0])
|
||||
length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr)))
|
||||
if length > 0 {
|
||||
namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";"))
|
||||
if len(namesAsBytes) != numNamedGroups {
|
||||
log.Fatalf("the number of named groups (%d) does not match the number names found (%d)\n", numNamedGroups, len(namesAsBytes))
|
||||
}
|
||||
for i, nameAsBytes := range namesAsBytes {
|
||||
name := string(nameAsBytes)
|
||||
namedGroupInfo[name] = int(groupNumbers[i])
|
||||
}
|
||||
} else {
|
||||
log.Fatalf("could not get the capture group names from %q", re.String())
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) groupNameToId(name string) (id int) {
|
||||
if re.namedGroupInfo == nil {
|
||||
id = ONIGERR_UNDEFINED_NAME_REFERENCE
|
||||
} else {
|
||||
id = re.namedGroupInfo[name]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) processMatch(numCaptures int) (match []int32) {
|
||||
if numCaptures <= 0 {
|
||||
panic("cannot have 0 captures when processing a match")
|
||||
}
|
||||
matchData := re.matchData
|
||||
return matchData.indexes[matchData.count][:numCaptures*2]
|
||||
}
|
||||
|
||||
func (re *Regexp) ClearMatchData() {
|
||||
matchData := re.matchData
|
||||
matchData.count = 0
|
||||
}
|
||||
|
||||
func (re *Regexp) find(b []byte, n int, offset int) (match []int) {
|
||||
if n == 0 {
|
||||
b = []byte{0}
|
||||
}
|
||||
ptr := unsafe.Pointer(&b[0])
|
||||
matchData := re.matchData
|
||||
capturesPtr := unsafe.Pointer(&(matchData.indexes[matchData.count][0]))
|
||||
numCaptures := int32(0)
|
||||
numCapturesPtr := unsafe.Pointer(&numCaptures)
|
||||
pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr)))
|
||||
if pos >= 0 {
|
||||
if numCaptures <= 0 {
|
||||
panic("cannot have 0 captures when processing a match")
|
||||
}
|
||||
match2 := matchData.indexes[matchData.count][:numCaptures*2]
|
||||
match = make([]int, len(match2))
|
||||
for i := range match2 {
|
||||
match[i] = int(match2[i])
|
||||
}
|
||||
numCapturesInPattern := int32(C.onig_number_of_captures(re.regex)) + 1
|
||||
if numCapturesInPattern != numCaptures {
|
||||
log.Fatalf("expected %d captures but got %d\n", numCapturesInPattern, numCaptures)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func getCapture(b []byte, beg int, end int) []byte {
|
||||
if beg < 0 || end < 0 {
|
||||
return nil
|
||||
}
|
||||
return b[beg:end]
|
||||
}
|
||||
|
||||
func (re *Regexp) match(b []byte, n int, offset int) bool {
|
||||
re.ClearMatchData()
|
||||
if n == 0 {
|
||||
b = []byte{0}
|
||||
}
|
||||
ptr := unsafe.Pointer(&b[0])
|
||||
pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(nil), (*C.int)(nil)))
|
||||
return pos >= 0
|
||||
}
|
||||
|
||||
func (re *Regexp) findAll(b []byte, n int) (matches [][]int) {
|
||||
re.ClearMatchData()
|
||||
|
||||
if n < 0 {
|
||||
n = len(b)
|
||||
}
|
||||
matchData := re.matchData
|
||||
offset := 0
|
||||
for offset <= n {
|
||||
if matchData.count >= len(matchData.indexes) {
|
||||
length := len(matchData.indexes[0])
|
||||
matchData.indexes = append(matchData.indexes, make([]int32, length))
|
||||
}
|
||||
if match := re.find(b, n, offset); len(match) > 0 {
|
||||
matchData.count += 1
|
||||
//move offset to the ending index of the current match and prepare to find the next non-overlapping match
|
||||
offset = match[1]
|
||||
//if match[0] == match[1], it means the current match does not advance the search. we need to exit the loop to avoid getting stuck here.
|
||||
if match[0] == match[1] {
|
||||
if offset < n && offset >= 0 {
|
||||
//there are more bytes, so move offset by a word
|
||||
_, width := utf8.DecodeRune(b[offset:])
|
||||
offset += width
|
||||
} else {
|
||||
//search is over, exit loop
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
matches2 := matchData.indexes[:matchData.count]
|
||||
matches = make([][]int, len(matches2))
|
||||
for i, v := range matches2 {
|
||||
matches[i] = make([]int, len(v))
|
||||
for j, v2 := range v {
|
||||
matches[i][j] = int(v2)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) FindIndex(b []byte) []int {
|
||||
re.ClearMatchData()
|
||||
match := re.find(b, len(b), 0)
|
||||
if len(match) == 0 {
|
||||
return nil
|
||||
}
|
||||
return match[:2]
|
||||
}
|
||||
|
||||
func (re *Regexp) Find(b []byte) []byte {
|
||||
loc := re.FindIndex(b)
|
||||
if loc == nil {
|
||||
return nil
|
||||
}
|
||||
return getCapture(b, loc[0], loc[1])
|
||||
}
|
||||
|
||||
func (re *Regexp) FindString(s string) string {
|
||||
b := []byte(s)
|
||||
mb := re.Find(b)
|
||||
if mb == nil {
|
||||
return ""
|
||||
}
|
||||
return string(mb)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindStringIndex(s string) []int {
|
||||
b := []byte(s)
|
||||
return re.FindIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int {
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAll(b []byte, n int) [][]byte {
|
||||
matches := re.FindAllIndex(b, n)
|
||||
if matches == nil {
|
||||
return nil
|
||||
}
|
||||
matchBytes := make([][]byte, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
matchBytes = append(matchBytes, getCapture(b, match[0], match[1]))
|
||||
}
|
||||
return matchBytes
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllString(s string, n int) []string {
|
||||
b := []byte(s)
|
||||
matches := re.FindAllIndex(b, n)
|
||||
if matches == nil {
|
||||
return nil
|
||||
}
|
||||
matchStrings := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
m := getCapture(b, match[0], match[1])
|
||||
if m == nil {
|
||||
matchStrings = append(matchStrings, "")
|
||||
} else {
|
||||
matchStrings = append(matchStrings, string(m))
|
||||
}
|
||||
}
|
||||
return matchStrings
|
||||
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int {
|
||||
b := []byte(s)
|
||||
return re.FindAllIndex(b, n)
|
||||
}
|
||||
|
||||
func (re *Regexp) findSubmatchIndex(b []byte) (match []int) {
|
||||
re.ClearMatchData()
|
||||
match = re.find(b, len(b), 0)
|
||||
return
|
||||
}
|
||||
|
||||
func (re *Regexp) FindSubmatchIndex(b []byte) []int {
|
||||
match := re.findSubmatchIndex(b)
|
||||
if len(match) == 0 {
|
||||
return nil
|
||||
}
|
||||
return match
|
||||
}
|
||||
|
||||
func (re *Regexp) FindSubmatch(b []byte) [][]byte {
|
||||
match := re.findSubmatchIndex(b)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
length := len(match) / 2
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
results := make([][]byte, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
results = append(results, getCapture(b, match[2*i], match[2*i+1]))
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func (re *Regexp) FindStringSubmatch(s string) []string {
|
||||
b := []byte(s)
|
||||
match := re.findSubmatchIndex(b)
|
||||
if match == nil {
|
||||
return nil
|
||||
}
|
||||
length := len(match) / 2
|
||||
if length == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
results := make([]string, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
cap := getCapture(b, match[2*i], match[2*i+1])
|
||||
if cap == nil {
|
||||
results = append(results, "")
|
||||
} else {
|
||||
results = append(results, string(cap))
|
||||
}
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func (re *Regexp) FindStringSubmatchIndex(s string) []int {
|
||||
b := []byte(s)
|
||||
return re.FindSubmatchIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int {
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
return matches
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte {
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
allCapturedBytes := make([][][]byte, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
length := len(match) / 2
|
||||
capturedBytes := make([][]byte, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
capturedBytes = append(capturedBytes, getCapture(b, match[2*i], match[2*i+1]))
|
||||
}
|
||||
allCapturedBytes = append(allCapturedBytes, capturedBytes)
|
||||
}
|
||||
|
||||
return allCapturedBytes
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string {
|
||||
b := []byte(s)
|
||||
matches := re.findAll(b, n)
|
||||
if len(matches) == 0 {
|
||||
return nil
|
||||
}
|
||||
allCapturedStrings := make([][]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
length := len(match) / 2
|
||||
capturedStrings := make([]string, 0, length)
|
||||
for i := 0; i < length; i++ {
|
||||
cap := getCapture(b, match[2*i], match[2*i+1])
|
||||
if cap == nil {
|
||||
capturedStrings = append(capturedStrings, "")
|
||||
} else {
|
||||
capturedStrings = append(capturedStrings, string(cap))
|
||||
}
|
||||
}
|
||||
allCapturedStrings = append(allCapturedStrings, capturedStrings)
|
||||
}
|
||||
return allCapturedStrings
|
||||
}
|
||||
|
||||
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int {
|
||||
b := []byte(s)
|
||||
return re.FindAllSubmatchIndex(b, n)
|
||||
}
|
||||
|
||||
func (re *Regexp) Match(b []byte) bool {
|
||||
return re.match(b, len(b), 0)
|
||||
}
|
||||
|
||||
func (re *Regexp) MatchString(s string) bool {
|
||||
b := []byte(s)
|
||||
return re.Match(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) NumSubexp() int {
|
||||
return (int)(C.onig_number_of_captures(re.regex))
|
||||
}
|
||||
|
||||
func (re *Regexp) getNamedCapture(name []byte, capturedBytes [][]byte) []byte {
|
||||
nameStr := string(name)
|
||||
capNum := re.groupNameToId(nameStr)
|
||||
if capNum < 0 || capNum >= len(capturedBytes) {
|
||||
panic(fmt.Sprintf("capture group name (%q) has error\n", nameStr))
|
||||
}
|
||||
return capturedBytes[capNum]
|
||||
}
|
||||
|
||||
func (re *Regexp) getNumberedCapture(num int, capturedBytes [][]byte) []byte {
|
||||
//when named capture groups exist, numbered capture groups returns ""
|
||||
if re.namedGroupInfo == nil && num <= (len(capturedBytes)-1) && num >= 0 {
|
||||
return capturedBytes[num]
|
||||
}
|
||||
return ([]byte)("")
|
||||
}
|
||||
|
||||
func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte {
|
||||
replLen := len(repl)
|
||||
newRepl := make([]byte, 0, replLen*3)
|
||||
inEscapeMode := false
|
||||
inGroupNameMode := false
|
||||
groupName := make([]byte, 0, replLen)
|
||||
for index := 0; index < replLen; index += 1 {
|
||||
ch := repl[index]
|
||||
if inGroupNameMode && ch == byte('<') {
|
||||
} else if inGroupNameMode && ch == byte('>') {
|
||||
inGroupNameMode = false
|
||||
groupNameStr := string(groupName)
|
||||
capBytes := capturedBytes[groupNameStr]
|
||||
newRepl = append(newRepl, capBytes...)
|
||||
groupName = groupName[:0] //reset the name
|
||||
} else if inGroupNameMode {
|
||||
groupName = append(groupName, ch)
|
||||
} else if inEscapeMode && ch <= byte('9') && byte('1') <= ch {
|
||||
capNumStr := string(ch)
|
||||
capBytes := capturedBytes[capNumStr]
|
||||
newRepl = append(newRepl, capBytes...)
|
||||
} else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') {
|
||||
inGroupNameMode = true
|
||||
inEscapeMode = false
|
||||
index += 1 //bypass the next char '<'
|
||||
} else if inEscapeMode {
|
||||
newRepl = append(newRepl, '\\')
|
||||
newRepl = append(newRepl, ch)
|
||||
} else if ch != '\\' {
|
||||
newRepl = append(newRepl, ch)
|
||||
}
|
||||
if ch == byte('\\') || inEscapeMode {
|
||||
inEscapeMode = !inEscapeMode
|
||||
}
|
||||
}
|
||||
return newRepl
|
||||
}
|
||||
|
||||
func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map[string][]byte) []byte) []byte {
|
||||
srcLen := len(src)
|
||||
matches := re.findAll(src, srcLen)
|
||||
if len(matches) == 0 {
|
||||
return src
|
||||
}
|
||||
dest := make([]byte, 0, srcLen)
|
||||
for i, match := range matches {
|
||||
length := len(match) / 2
|
||||
capturedBytes := make(map[string][]byte)
|
||||
if re.namedGroupInfo == nil {
|
||||
for j := 0; j < length; j++ {
|
||||
capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1])
|
||||
}
|
||||
} else {
|
||||
for name, j := range re.namedGroupInfo {
|
||||
capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1])
|
||||
}
|
||||
}
|
||||
matchBytes := getCapture(src, match[0], match[1])
|
||||
newRepl := replFunc(repl, matchBytes, capturedBytes)
|
||||
prevEnd := 0
|
||||
if i > 0 {
|
||||
prevMatch := matches[i-1][:2]
|
||||
prevEnd = prevMatch[1]
|
||||
}
|
||||
if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen {
|
||||
dest = append(dest, src[prevEnd:match[0]]...)
|
||||
}
|
||||
dest = append(dest, newRepl...)
|
||||
}
|
||||
lastEnd := matches[len(matches)-1][1]
|
||||
if lastEnd < srcLen && lastEnd >= 0 {
|
||||
dest = append(dest, src[lastEnd:]...)
|
||||
}
|
||||
return dest
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAll(src, repl []byte) []byte {
|
||||
return re.replaceAll(src, repl, fillCapturedValues)
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte {
|
||||
return re.replaceAll(src, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
|
||||
return repl(matchBytes)
|
||||
})
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAllString(src, repl string) string {
|
||||
return string(re.ReplaceAll([]byte(src), []byte(repl)))
|
||||
}
|
||||
|
||||
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string {
|
||||
srcB := []byte(src)
|
||||
destB := re.replaceAll(srcB, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte {
|
||||
return []byte(repl(string(matchBytes)))
|
||||
})
|
||||
return string(destB)
|
||||
}
|
||||
|
||||
func (re *Regexp) String() string {
|
||||
return re.pattern
|
||||
}
|
||||
|
||||
func grow_buffer(b []byte, offset int, n int) []byte {
|
||||
if offset+n > cap(b) {
|
||||
buf := make([]byte, 2*cap(b)+n)
|
||||
copy(buf, b[:offset])
|
||||
return buf
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func fromReader(r io.RuneReader) []byte {
|
||||
b := make([]byte, numReadBufferStartSize)
|
||||
offset := 0
|
||||
var err error = nil
|
||||
for err == nil {
|
||||
rune, runeWidth, err := r.ReadRune()
|
||||
if err == nil {
|
||||
b = grow_buffer(b, offset, runeWidth)
|
||||
writeWidth := utf8.EncodeRune(b[offset:], rune)
|
||||
if runeWidth != writeWidth {
|
||||
panic("reading rune width not equal to the written rune width")
|
||||
}
|
||||
offset += writeWidth
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
return b[:offset]
|
||||
}
|
||||
|
||||
func (re *Regexp) FindReaderIndex(r io.RuneReader) []int {
|
||||
b := fromReader(r)
|
||||
return re.FindIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) FindReaderSubmatchIndex(r io.RuneReader) []int {
|
||||
b := fromReader(r)
|
||||
return re.FindSubmatchIndex(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) MatchReader(r io.RuneReader) bool {
|
||||
b := fromReader(r)
|
||||
return re.Match(b)
|
||||
}
|
||||
|
||||
func (re *Regexp) LiteralPrefix() (prefix string, complete bool) {
|
||||
//no easy way to implement this
|
||||
return "", false
|
||||
}
|
||||
|
||||
func MatchString(pattern string, s string) (matched bool, error error) {
|
||||
re, err := Compile(pattern)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return re.MatchString(s), nil
|
||||
}
|
||||
|
||||
func (re *Regexp) Gsub(src, repl string) string {
|
||||
srcBytes := ([]byte)(src)
|
||||
replBytes := ([]byte)(repl)
|
||||
replaced := re.replaceAll(srcBytes, replBytes, fillCapturedValues)
|
||||
return string(replaced)
|
||||
}
|
||||
|
||||
func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string {
|
||||
srcBytes := ([]byte)(src)
|
||||
replaced := re.replaceAll(srcBytes, nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte {
|
||||
capturedStrings := make(map[string]string)
|
||||
for name, capBytes := range capturedBytes {
|
||||
capturedStrings[name] = string(capBytes)
|
||||
}
|
||||
matchString := string(matchBytes)
|
||||
return ([]byte)(replFunc(matchString, capturedStrings))
|
||||
})
|
||||
return string(replaced)
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue