1
0
Fork 0
forked from forgejo/forgejo

Restrict repository indexing by glob match (#7767)

* Restrict repository indexing by file extension

* Use REPO_EXTENSIONS_LIST_INCLUDE instead of REPO_EXTENSIONS_LIST_EXCLUDE and have a more flexible extension pattern

* Corrected to pass lint gosimple

* Add wildcard support to REPO_INDEXER_EXTENSIONS

* This reverts commit 72a650c8e4.

* Add wildcard support to REPO_INDEXER_EXTENSIONS (no make vendor)

* Simplify isIndexable() for better clarity

* Add gobwas/glob to vendors

* manually set appengine new release

* Implement better REPO_INDEXER_INCLUDE and REPO_INDEXER_EXCLUDE

* Add unit and integration tests

* Update app.ini.sample and reword config-cheat-sheet

* Add doc page and correct app.ini.sample

* Some polish on the doc

* Simplify code as suggested by @lafriks
This commit is contained in:
guillep2k 2019-09-11 14:26:28 -03:00 committed by Lauris BH
parent 3fd0eec900
commit 72f6d5c882
38 changed files with 920 additions and 17 deletions

View file

@ -7,6 +7,11 @@ package setting
import (
"path"
"path/filepath"
"strings"
"code.gitea.io/gitea/modules/log"
"github.com/gobwas/glob"
)
// enumerates all the indexer queue types
@ -29,6 +34,8 @@ var (
IssueQueueDir string
IssueQueueConnStr string
IssueQueueBatchNumber int
IncludePatterns []glob.Glob
ExcludePatterns []glob.Glob
}{
IssueType: "bleve",
IssuePath: "indexers/issues.bleve",
@ -51,6 +58,9 @@ func newIndexerService() {
if !filepath.IsAbs(Indexer.RepoPath) {
Indexer.RepoPath = path.Join(AppWorkPath, Indexer.RepoPath)
}
Indexer.IncludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_INCLUDE").MustString(""))
Indexer.ExcludePatterns = IndexerGlobFromString(sec.Key("REPO_INDEXER_EXCLUDE").MustString(""))
Indexer.UpdateQueueLength = sec.Key("UPDATE_BUFFER_LEN").MustInt(20)
Indexer.MaxIndexerFileSize = sec.Key("MAX_FILE_SIZE").MustInt64(1024 * 1024)
Indexer.IssueQueueType = sec.Key("ISSUE_INDEXER_QUEUE_TYPE").MustString(LevelQueueType)
@ -58,3 +68,19 @@ func newIndexerService() {
Indexer.IssueQueueConnStr = sec.Key("ISSUE_INDEXER_QUEUE_CONN_STR").MustString(path.Join(AppDataPath, ""))
Indexer.IssueQueueBatchNumber = sec.Key("ISSUE_INDEXER_QUEUE_BATCH_NUMBER").MustInt(20)
}
// IndexerGlobFromString parses a comma separated list of patterns and returns a glob.Glob slice suited for repo indexing
func IndexerGlobFromString(globstr string) []glob.Glob {
extarr := make([]glob.Glob, 0, 10)
for _, expr := range strings.Split(strings.ToLower(globstr), ",") {
expr = strings.TrimSpace(expr)
if expr != "" {
if g, err := glob.Compile(expr, '.', '/'); err != nil {
log.Info("Invalid glob expresion '%s' (skipped): %v", expr, err)
} else {
extarr = append(extarr, g)
}
}
}
return extarr
}

View file

@ -0,0 +1,73 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package setting
import (
"testing"
"github.com/stretchr/testify/assert"
)
type indexerMatchList struct {
value string
position int
}
func Test_newIndexerGlobSettings(t *testing.T) {
checkGlobMatch(t, "", []indexerMatchList{})
checkGlobMatch(t, " ", []indexerMatchList{})
checkGlobMatch(t, "data, */data, */data/*, **/data/*, **/data/**", []indexerMatchList{
{"", -1},
{"don't", -1},
{"data", 0},
{"/data", 1},
{"x/data", 1},
{"x/data/y", 2},
{"a/b/c/data/z", 3},
{"a/b/c/data/x/y/z", 4},
})
checkGlobMatch(t, "*.txt, txt, **.txt, **txt, **txt*", []indexerMatchList{
{"my.txt", 0},
{"don't", -1},
{"mytxt", 3},
{"/data/my.txt", 2},
{"data/my.txt", 2},
{"data/txt", 3},
{"data/thistxtfile", 4},
{"/data/thistxtfile", 4},
})
checkGlobMatch(t, "data/**/*.txt, data/**.txt", []indexerMatchList{
{"data/a/b/c/d.txt", 0},
{"data/a.txt", 1},
})
checkGlobMatch(t, "**/*.txt, data/**.txt", []indexerMatchList{
{"data/a/b/c/d.txt", 0},
{"data/a.txt", 0},
{"a.txt", -1},
})
}
func checkGlobMatch(t *testing.T, globstr string, list []indexerMatchList) {
glist := IndexerGlobFromString(globstr)
if len(list) == 0 {
assert.Empty(t, glist)
return
}
assert.NotEmpty(t, glist)
for _, m := range list {
found := false
for pos, g := range glist {
if g.Match(m.value) {
assert.Equal(t, m.position, pos, "Test string `%s` doesn't match `%s`@%d, but matches @%d", m.value, globstr, m.position, pos)
found = true
break
}
}
if !found {
assert.Equal(t, m.position, -1, "Test string `%s` doesn't match `%s` anywhere; expected @%d", m.value, globstr, m.position)
}
}
}