forked from forgejo/forgejo
Add .gitattribute assisted language detection to blame, diff and render (#17590)
Use check attribute code to check the assigned language of a file and send that in to chroma as a hint for the language of the file. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
81a4fc7528
commit
3c4724d70e
13 changed files with 223 additions and 97 deletions
|
@ -22,6 +22,8 @@ type CheckAttributeOpts struct {
|
|||
AllAttributes bool
|
||||
Attributes []string
|
||||
Filenames []string
|
||||
IndexFile string
|
||||
WorkTree string
|
||||
}
|
||||
|
||||
// CheckAttribute return the Blame object of file
|
||||
|
@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
|
|||
return nil, fmt.Errorf("git version missing: %v", err)
|
||||
}
|
||||
|
||||
env := []string{}
|
||||
|
||||
if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
|
||||
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
|
||||
}
|
||||
if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
|
||||
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
|
||||
}
|
||||
|
||||
if len(env) > 0 {
|
||||
env = append(os.Environ(), env...)
|
||||
}
|
||||
|
||||
stdOut := new(bytes.Buffer)
|
||||
stdErr := new(bytes.Buffer)
|
||||
|
||||
|
@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
|
|||
|
||||
cmd := NewCommand(cmdArgs...)
|
||||
|
||||
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
|
||||
if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil {
|
||||
return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
|
||||
}
|
||||
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
|
@ -45,14 +46,15 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error
|
|||
}
|
||||
|
||||
// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
|
||||
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
|
||||
tmpIndex, err := os.CreateTemp("", "index")
|
||||
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) {
|
||||
tmpDir, err = os.MkdirTemp("", "index")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
filename = tmpIndex.Name()
|
||||
|
||||
filename = filepath.Join(tmpDir, ".tmp-index")
|
||||
cancel = func() {
|
||||
err := util.Remove(filename)
|
||||
err := util.RemoveAll(tmpDir)
|
||||
if err != nil {
|
||||
log.Error("failed to remove tmp index file: %v", err)
|
||||
}
|
||||
|
@ -60,7 +62,7 @@ func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename strin
|
|||
err = repo.ReadTreeToIndex(treeish, filename)
|
||||
if err != nil {
|
||||
defer cancel()
|
||||
return "", func() {}, err
|
||||
return "", "", func() {}, err
|
||||
}
|
||||
return
|
||||
}
|
||||
|
|
|
@ -11,11 +11,10 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/analyze"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
"github.com/go-git/go-git/v5"
|
||||
|
@ -48,35 +47,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||
var checker *CheckAttributeReader
|
||||
|
||||
if CheckGitVersionAtLeast("1.7.8") == nil {
|
||||
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
|
||||
indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
|
||||
if err == nil {
|
||||
defer deleteTemporaryFile()
|
||||
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
|
||||
if err == nil {
|
||||
defer func() {
|
||||
_ = util.RemoveAll(tmpWorkTree)
|
||||
}()
|
||||
|
||||
checker = &CheckAttributeReader{
|
||||
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
|
||||
Repo: repo,
|
||||
IndexFile: indexFilename,
|
||||
WorkTree: tmpWorkTree,
|
||||
}
|
||||
ctx, cancel := context.WithCancel(DefaultContext)
|
||||
if err := checker.Init(ctx); err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
} else {
|
||||
go func() {
|
||||
err = checker.Run()
|
||||
if err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
}
|
||||
defer cancel()
|
||||
checker = &CheckAttributeReader{
|
||||
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
|
||||
Repo: repo,
|
||||
IndexFile: indexFilename,
|
||||
WorkTree: workTree,
|
||||
}
|
||||
ctx, cancel := context.WithCancel(DefaultContext)
|
||||
if err := checker.Init(ctx); err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
} else {
|
||||
go func() {
|
||||
err = checker.Run()
|
||||
if err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
}
|
||||
defer cancel()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -114,6 +106,21 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||
sizes[language] += f.Size
|
||||
|
||||
return nil
|
||||
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
|
||||
// strip off a ? if present
|
||||
if idx := strings.IndexByte(language, '?'); idx >= 0 {
|
||||
language = language[:idx]
|
||||
}
|
||||
if len(language) != 0 {
|
||||
// group languages, such as Pug -> HTML; SCSS -> CSS
|
||||
group := enry.GetLanguageGroup(language)
|
||||
if len(group) != 0 {
|
||||
language = group
|
||||
}
|
||||
|
||||
sizes[language] += f.Size
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,11 +13,10 @@ import (
|
|||
"context"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/analyze"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/go-enry/go-enry/v2"
|
||||
)
|
||||
|
@ -68,35 +67,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||
var checker *CheckAttributeReader
|
||||
|
||||
if CheckGitVersionAtLeast("1.7.8") == nil {
|
||||
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
|
||||
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
|
||||
if err == nil {
|
||||
defer deleteTemporaryFile()
|
||||
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
|
||||
if err == nil {
|
||||
defer func() {
|
||||
_ = util.RemoveAll(tmpWorkTree)
|
||||
}()
|
||||
|
||||
checker = &CheckAttributeReader{
|
||||
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
|
||||
Repo: repo,
|
||||
IndexFile: indexFilename,
|
||||
WorkTree: tmpWorkTree,
|
||||
}
|
||||
ctx, cancel := context.WithCancel(DefaultContext)
|
||||
if err := checker.Init(ctx); err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
} else {
|
||||
go func() {
|
||||
err = checker.Run()
|
||||
if err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
}
|
||||
defer cancel()
|
||||
checker = &CheckAttributeReader{
|
||||
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
|
||||
Repo: repo,
|
||||
IndexFile: indexFilename,
|
||||
WorkTree: worktree,
|
||||
}
|
||||
ctx, cancel := context.WithCancel(DefaultContext)
|
||||
if err := checker.Init(ctx); err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
} else {
|
||||
go func() {
|
||||
err = checker.Run()
|
||||
if err != nil {
|
||||
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
|
||||
cancel()
|
||||
}
|
||||
}()
|
||||
}
|
||||
defer cancel()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -138,7 +130,23 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
|
|||
|
||||
sizes[language] += f.Size()
|
||||
continue
|
||||
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
|
||||
// strip off a ? if present
|
||||
if idx := strings.IndexByte(language, '?'); idx >= 0 {
|
||||
language = language[:idx]
|
||||
}
|
||||
if len(language) != 0 {
|
||||
// group languages, such as Pug -> HTML; SCSS -> CSS
|
||||
group := enry.GetLanguageGroup(language)
|
||||
if len(group) != 0 {
|
||||
language = group
|
||||
}
|
||||
|
||||
sizes[language] += f.Size()
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -55,7 +55,7 @@ func NewContext() {
|
|||
}
|
||||
|
||||
// Code returns a HTML version of code string with chroma syntax highlighting classes
|
||||
func Code(fileName, code string) string {
|
||||
func Code(fileName, language, code string) string {
|
||||
NewContext()
|
||||
|
||||
// diff view newline will be passed as empty, change to literal \n so it can be copied
|
||||
|
@ -69,9 +69,23 @@ func Code(fileName, code string) string {
|
|||
}
|
||||
|
||||
var lexer chroma.Lexer
|
||||
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
|
||||
//use mapped value to find lexer
|
||||
lexer = lexers.Get(val)
|
||||
|
||||
if len(language) > 0 {
|
||||
lexer = lexers.Get(language)
|
||||
|
||||
if lexer == nil {
|
||||
// Attempt stripping off the '?'
|
||||
if idx := strings.IndexByte(language, '?'); idx > 0 {
|
||||
lexer = lexers.Get(language[:idx])
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
|
||||
//use mapped value to find lexer
|
||||
lexer = lexers.Get(val)
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
|
@ -119,7 +133,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
|
|||
}
|
||||
|
||||
// File returns a slice of chroma syntax highlighted lines of code
|
||||
func File(numLines int, fileName string, code []byte) []string {
|
||||
func File(numLines int, fileName, language string, code []byte) []string {
|
||||
NewContext()
|
||||
|
||||
if len(code) > sizeLimit {
|
||||
|
@ -139,8 +153,16 @@ func File(numLines int, fileName string, code []byte) []string {
|
|||
htmlw := bufio.NewWriter(&htmlbuf)
|
||||
|
||||
var lexer chroma.Lexer
|
||||
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
|
||||
lexer = lexers.Get(val)
|
||||
|
||||
// provided language overrides everything
|
||||
if len(language) > 0 {
|
||||
lexer = lexers.Get(language)
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
|
||||
lexer = lexers.Get(val)
|
||||
}
|
||||
}
|
||||
|
||||
if lexer == nil {
|
||||
|
|
|
@ -96,7 +96,7 @@ steps:
|
|||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if got := File(tt.numLines, tt.fileName, []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
|
||||
if got := File(tt.numLines, tt.fileName, "", []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
|
||||
t.Errorf("File() = %v, want %v", got, tt.want)
|
||||
}
|
||||
})
|
||||
|
|
|
@ -101,7 +101,7 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
|
|||
Language: result.Language,
|
||||
Color: result.Color,
|
||||
LineNumbers: lineNumbers,
|
||||
FormattedLines: highlight.Code(result.Filename, formattedLinesBuffer.String()),
|
||||
FormattedLines: highlight.Code(result.Filename, "", formattedLinesBuffer.String()),
|
||||
}, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ import (
|
|||
|
||||
"code.gitea.io/gitea/models"
|
||||
"code.gitea.io/gitea/models/unittest"
|
||||
"code.gitea.io/gitea/modules/json"
|
||||
"code.gitea.io/gitea/modules/test"
|
||||
"code.gitea.io/gitea/services/gitdiff"
|
||||
|
||||
|
@ -118,13 +119,21 @@ func TestGetDiffPreview(t *testing.T) {
|
|||
t.Run("with given branch", func(t *testing.T) {
|
||||
diff, err := GetDiffPreview(ctx.Repo.Repository, branch, treePath, content)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, expectedDiff, diff)
|
||||
expectedBs, err := json.Marshal(expectedDiff)
|
||||
assert.NoError(t, err)
|
||||
bs, err := json.Marshal(diff)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, expectedBs, bs)
|
||||
})
|
||||
|
||||
t.Run("empty branch, same results", func(t *testing.T) {
|
||||
diff, err := GetDiffPreview(ctx.Repo.Repository, "", treePath, content)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, expectedDiff, diff)
|
||||
expectedBs, err := json.Marshal(expectedDiff)
|
||||
assert.NoError(t, err)
|
||||
bs, err := json.Marshal(diff)
|
||||
assert.NoError(t, err)
|
||||
assert.EqualValues(t, expectedBs, bs)
|
||||
})
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue