forked from forgejo/forgejo
Restrict repository indexing by glob match (#7767)
* Restrict repository indexing by file extension
* Use REPO_EXTENSIONS_LIST_INCLUDE instead of REPO_EXTENSIONS_LIST_EXCLUDE and have a more flexible extension pattern
* Corrected to pass lint gosimple
* Add wildcard support to REPO_INDEXER_EXTENSIONS
* This reverts commit 72a650c8e4
.
* Add wildcard support to REPO_INDEXER_EXTENSIONS (no make vendor)
* Simplify isIndexable() for better clarity
* Add gobwas/glob to vendors
* manually set appengine new release
* Implement better REPO_INDEXER_INCLUDE and REPO_INDEXER_EXCLUDE
* Add unit and integration tests
* Update app.ini.sample and reword config-cheat-sheet
* Add doc page and correct app.ini.sample
* Some polish on the doc
* Simplify code as suggested by @lafriks
This commit is contained in:
parent
3fd0eec900
commit
72f6d5c882
38 changed files with 920 additions and 17 deletions
|
@ -7,6 +7,11 @@ package setting
|
|||
import (
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
|
||||
"github.com/gobwas/glob"
|
||||
)
|
||||
|
||||
// enumerates all the indexer queue types
|
||||
|
@ -29,6 +34,8 @@ var (
|
|||
IssueQueueDir string
|
||||
IssueQueueConnStr string
|
||||
IssueQueueBatchNumber int
|
||||
IncludePatterns []glob.Glob
|
||||
ExcludePatterns []glob.Glob
|
||||
}{
|
||||
IssueType: "bleve",
|
||||
IssuePath: "indexers/issues.bleve",
|
||||
|
@ -51,6 +58,9 @@ func newIndexerService() {
|
|||
if !filepath.IsAbs(Indexer.RepoPath) {
|
||||
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
|
||||
}
|
||||
Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString(""))
|
||||
Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString(""))
|
||||
|
||||
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
|
||||
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
|
||||
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
|
||||
|
@ -58,3 +68,19 @@ func newIndexerService() {
|
|||
Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, ""))
|
||||
Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20)
|
||||
}
|
||||
|
||||
// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
|
||||
func IndexerGlobFromString(globstr string) []glob.Glob {
|
||||
extarr := make([]glob.Glob, 0, 10)
|
||||
for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
|
||||
expr = strings.TrimSpace(expr)
|
||||
if expr != "" {
|
||||
if g, err := glob.Compile(expr, '.', '/'); err != nil {
|
||||
log.Info("Invalid glob expresion '%s' (skipped): %v", expr, err)
|
||||
} else {
|
||||
extarr = append(extarr, g)
|
||||
}
|
||||
}
|
||||
}
|
||||
return extarr
|
||||
}
|
||||
|
|
73
modules/setting/indexer_test.go
Normal file
73
modules/setting/indexer_test.go
Normal file
|
@ -0,0 +1,73 @@
|
|||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package setting
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
type indexerMatchList struct {
|
||||
value string
|
||||
position int
|
||||
}
|
||||
|
||||
func Test_newIndexerGlobSettings(t *testing.T) {
|
||||
|
||||
checkGlobMatch(t, "", []indexerMatchList{})
|
||||
checkGlobMatch(t, " ", []indexerMatchList{})
|
||||
checkGlobMatch(t, "data, */data, */data/*, **/data/*, **/data/**", []indexerMatchList{
|
||||
{"", -1},
|
||||
{"don't", -1},
|
||||
{"data", 0},
|
||||
{"/data", 1},
|
||||
{"x/data", 1},
|
||||
{"x/data/y", 2},
|
||||
{"a/b/c/data/z", 3},
|
||||
{"a/b/c/data/x/y/z", 4},
|
||||
})
|
||||
checkGlobMatch(t, "*.txt, txt, **.txt, **txt, **txt*", []indexerMatchList{
|
||||
{"my.txt", 0},
|
||||
{"don't", -1},
|
||||
{"mytxt", 3},
|
||||
{"/data/my.txt", 2},
|
||||
{"data/my.txt", 2},
|
||||
{"data/txt", 3},
|
||||
{"data/thistxtfile", 4},
|
||||
{"/data/thistxtfile", 4},
|
||||
})
|
||||
checkGlobMatch(t, "data/**/*.txt, data/**.txt", []indexerMatchList{
|
||||
{"data/a/b/c/d.txt", 0},
|
||||
{"data/a.txt", 1},
|
||||
})
|
||||
checkGlobMatch(t, "**/*.txt, data/**.txt", []indexerMatchList{
|
||||
{"data/a/b/c/d.txt", 0},
|
||||
{"data/a.txt", 0},
|
||||
{"a.txt", -1},
|
||||
})
|
||||
}
|
||||
|
||||
func checkGlobMatch(t *testing.T, globstr string, list []indexerMatchList) {
|
||||
glist := IndexerGlobFromString(globstr)
|
||||
if len(list) == 0 {
|
||||
assert.Empty(t, glist)
|
||||
return
|
||||
}
|
||||
assert.NotEmpty(t, glist)
|
||||
for _, m := range list {
|
||||
found := false
|
||||
for pos, g := range glist {
|
||||
if g.Match(m.value) {
|
||||
assert.Equal(t, m.position, pos, "Test string `%s` doesn't match `%s`@%d, but matches @%d", m.value, globstr, m.position, pos)
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
assert.Equal(t, m.position, -1, "Test string `%s` doesn't match `%s` anywhere; expected @%d", m.value, globstr, m.position)
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue