1
0
Fork 0
forked from forgejo/forgejo

Add .gitattribute assisted language detection to blame, diff and render (#17590)

Use check attribute code to check the assigned language of a file and send that in to
chroma as a hint for the language of the file.

Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
zeripath 2021-11-17 20:37:00 +00:00 committed by GitHub
parent 81a4fc7528
commit 3c4724d70e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 223 additions and 97 deletions

View file

@ -22,6 +22,8 @@ type CheckAttributeOpts struct {
AllAttributes bool
Attributes []string
Filenames []string
IndexFile string
WorkTree string
}
// CheckAttribute return the Blame object of file
@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
return nil, fmt.Errorf("git version missing: %v", err)
}
env := []string{}
if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
}
if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
}
if len(env) > 0 {
env = append(os.Environ(), env...)
}
stdOut := new(bytes.Buffer)
stdErr := new(bytes.Buffer)
@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
cmd := NewCommand(cmdArgs...)
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil {
return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
}

View file

@ -8,6 +8,7 @@ import (
"bytes"
"context"
"os"
"path/filepath"
"strings"
"code.gitea.io/gitea/modules/log"
@ -45,14 +46,15 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error
}
// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
tmpIndex, err := os.CreateTemp("", "index")
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) {
tmpDir, err = os.MkdirTemp("", "index")
if err != nil {
return
}
filename = tmpIndex.Name()
filename = filepath.Join(tmpDir, ".tmp-index")
cancel = func() {
err := util.Remove(filename)
err := util.RemoveAll(tmpDir)
if err != nil {
log.Error("failed to remove tmp index file: %v", err)
}
@ -60,7 +62,7 @@ func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename strin
err = repo.ReadTreeToIndex(treeish, filename)
if err != nil {
defer cancel()
return "", func() {}, err
return "", "", func() {}, err
}
return
}

View file

@ -11,11 +11,10 @@ import (
"bytes"
"context"
"io"
"os"
"strings"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
@ -48,35 +47,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
var checker *CheckAttributeReader
if CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err == nil {
defer deleteTemporaryFile()
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
if err == nil {
defer func() {
_ = util.RemoveAll(tmpWorkTree)
}()
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: tmpWorkTree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: workTree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
}
}
@ -114,6 +106,21 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
sizes[language] += f.Size
return nil
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
sizes[language] += f.Size
return nil
}
}
}
}

View file

@ -13,11 +13,10 @@ import (
"context"
"io"
"math"
"os"
"strings"
"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/util"
"github.com/go-enry/go-enry/v2"
)
@ -68,35 +67,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
var checker *CheckAttributeReader
if CheckGitVersionAtLeast("1.7.8") == nil {
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
if err == nil {
defer deleteTemporaryFile()
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
if err == nil {
defer func() {
_ = util.RemoveAll(tmpWorkTree)
}()
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: tmpWorkTree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
checker = &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: worktree,
}
ctx, cancel := context.WithCancel(DefaultContext)
if err := checker.Init(ctx); err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
} else {
go func() {
err = checker.Run()
if err != nil {
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
cancel()
}
}()
}
defer cancel()
}
}
@ -138,7 +130,23 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
sizes[language] += f.Size()
continue
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}
sizes[language] += f.Size()
continue
}
}
}
}