forked from forgejo/forgejo
Improve code diff highlight, fix incorrect rendered diff result (#19958)
Use Unicode placeholders to replace HTML tags and HTML entities first, then do diff, then recover the HTML tags and HTML entities. Now the code diff with highlight has stable behavior, and won't emit broken tags.
This commit is contained in:
parent
14178c56bb
commit
3310dd1d19
5 changed files with 379 additions and 378 deletions
|
@ -15,7 +15,6 @@ import (
|
|||
"io"
|
||||
"net/url"
|
||||
"os"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -40,7 +39,7 @@ import (
|
|||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// DiffLineType represents the type of a DiffLine.
|
||||
// DiffLineType represents the type of DiffLine.
|
||||
type DiffLineType uint8
|
||||
|
||||
// DiffLineType possible values.
|
||||
|
@ -51,7 +50,7 @@ const (
|
|||
DiffLineSection
|
||||
)
|
||||
|
||||
// DiffFileType represents the type of a DiffFile.
|
||||
// DiffFileType represents the type of DiffFile.
|
||||
type DiffFileType uint8
|
||||
|
||||
// DiffFileType possible values.
|
||||
|
@ -100,12 +99,12 @@ type DiffLineSectionInfo struct {
|
|||
// BlobExcerptChunkSize represent max lines of excerpt
|
||||
const BlobExcerptChunkSize = 20
|
||||
|
||||
// GetType returns the type of a DiffLine.
|
||||
// GetType returns the type of DiffLine.
|
||||
func (d *DiffLine) GetType() int {
|
||||
return int(d.Type)
|
||||
}
|
||||
|
||||
// CanComment returns whether or not a line can get commented
|
||||
// CanComment returns whether a line can get commented
|
||||
func (d *DiffLine) CanComment() bool {
|
||||
return len(d.Comments) == 0 && d.Type != DiffLineSection
|
||||
}
|
||||
|
@ -191,287 +190,13 @@ var (
|
|||
codeTagSuffix = []byte(`</span>`)
|
||||
)
|
||||
|
||||
var (
|
||||
unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
|
||||
trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
|
||||
entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
|
||||
)
|
||||
|
||||
// shouldWriteInline represents combinations where we manually write inline changes
|
||||
func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
|
||||
if true &&
|
||||
diff.Type == diffmatchpatch.DiffEqual ||
|
||||
diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd ||
|
||||
diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
|
||||
// Create a new array to store our fixed up blocks
|
||||
fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))
|
||||
|
||||
// semantically label some numbers
|
||||
const insert, delete, equal = 0, 1, 2
|
||||
|
||||
// record the positions of the last type of each block in the fixedup blocks
|
||||
last := []int{-1, -1, -1}
|
||||
operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}
|
||||
|
||||
// create a writer for insert and deletes
|
||||
toWrite := []strings.Builder{
|
||||
{},
|
||||
{},
|
||||
}
|
||||
|
||||
// make some flags for insert and delete
|
||||
unfinishedTag := []bool{false, false}
|
||||
unfinishedEnt := []bool{false, false}
|
||||
|
||||
// store stores the provided text in the writer for the typ
|
||||
store := func(text string, typ int) {
|
||||
(&(toWrite[typ])).WriteString(text)
|
||||
}
|
||||
|
||||
// hasStored returns true if there is stored content
|
||||
hasStored := func(typ int) bool {
|
||||
return (&toWrite[typ]).Len() > 0
|
||||
}
|
||||
|
||||
// stored will return that content
|
||||
stored := func(typ int) string {
|
||||
return (&toWrite[typ]).String()
|
||||
}
|
||||
|
||||
// empty will empty the stored content
|
||||
empty := func(typ int) {
|
||||
(&toWrite[typ]).Reset()
|
||||
}
|
||||
|
||||
// pop will remove the stored content appending to a diff block for that typ
|
||||
pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
|
||||
if hasStored(typ) {
|
||||
if last[typ] > last[equal] {
|
||||
fixedup[last[typ]].Text += stored(typ)
|
||||
} else {
|
||||
fixedup = append(fixedup, diffmatchpatch.Diff{
|
||||
Type: operation[typ],
|
||||
Text: stored(typ),
|
||||
})
|
||||
}
|
||||
empty(typ)
|
||||
}
|
||||
return fixedup
|
||||
}
|
||||
|
||||
// Now we walk the provided diffs and check the type of each block in turn
|
||||
for _, diff := range diffs {
|
||||
|
||||
typ := delete // flag for handling insert or delete typs
|
||||
switch diff.Type {
|
||||
case diffmatchpatch.DiffEqual:
|
||||
// First check if there is anything stored
|
||||
if hasStored(insert) || hasStored(delete) {
|
||||
// There are two reasons for storing content:
|
||||
// 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
|
||||
if unfinishedEnt[insert] || unfinishedEnt[delete] {
|
||||
// we look for a ';' to finish an entity
|
||||
idx := strings.IndexRune(diff.Text, ';')
|
||||
if idx >= 0 {
|
||||
// if we find a ';' store the preceding content to both insert and delete
|
||||
store(diff.Text[:idx+1], insert)
|
||||
store(diff.Text[:idx+1], delete)
|
||||
|
||||
// and remove it from this block
|
||||
diff.Text = diff.Text[idx+1:]
|
||||
|
||||
// reset the ent flags
|
||||
unfinishedEnt[insert] = false
|
||||
unfinishedEnt[delete] = false
|
||||
} else {
|
||||
// otherwise store it all on insert and delete
|
||||
store(diff.Text, insert)
|
||||
store(diff.Text, delete)
|
||||
// and empty this block
|
||||
diff.Text = ""
|
||||
}
|
||||
}
|
||||
// 2. Unfinished Tag
|
||||
if unfinishedTag[insert] || unfinishedTag[delete] {
|
||||
// we look for a '>' to finish a tag
|
||||
idx := strings.IndexRune(diff.Text, '>')
|
||||
if idx >= 0 {
|
||||
store(diff.Text[:idx+1], insert)
|
||||
store(diff.Text[:idx+1], delete)
|
||||
diff.Text = diff.Text[idx+1:]
|
||||
unfinishedTag[insert] = false
|
||||
unfinishedTag[delete] = false
|
||||
} else {
|
||||
store(diff.Text, insert)
|
||||
store(diff.Text, delete)
|
||||
diff.Text = ""
|
||||
}
|
||||
}
|
||||
|
||||
// If we've completed the required tag/entities
|
||||
if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
|
||||
// pop off the stack
|
||||
fixedup = pop(insert, fixedup)
|
||||
fixedup = pop(delete, fixedup)
|
||||
}
|
||||
|
||||
// If that has left this diff block empty then shortcut
|
||||
if len(diff.Text) == 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// check if this block ends in an unfinished tag?
|
||||
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
|
||||
if idx != nil {
|
||||
unfinishedTag[insert] = true
|
||||
unfinishedTag[delete] = true
|
||||
} else {
|
||||
// otherwise does it end in an unfinished entity?
|
||||
idx = entityRegex.FindStringIndex(diff.Text)
|
||||
if idx != nil {
|
||||
unfinishedEnt[insert] = true
|
||||
unfinishedEnt[delete] = true
|
||||
}
|
||||
}
|
||||
|
||||
// If there is an unfinished component
|
||||
if idx != nil {
|
||||
// Store the fragment
|
||||
store(diff.Text[idx[0]:], insert)
|
||||
store(diff.Text[idx[0]:], delete)
|
||||
// and remove it from this block
|
||||
diff.Text = diff.Text[:idx[0]]
|
||||
}
|
||||
|
||||
// If that hasn't left the block empty
|
||||
if len(diff.Text) > 0 {
|
||||
// store the position of the last equal block and store it in our diffs
|
||||
last[equal] = len(fixedup)
|
||||
fixedup = append(fixedup, diff)
|
||||
}
|
||||
continue
|
||||
case diffmatchpatch.DiffInsert:
|
||||
typ = insert
|
||||
fallthrough
|
||||
case diffmatchpatch.DiffDelete:
|
||||
// First check if there is anything stored for this type
|
||||
if hasStored(typ) {
|
||||
// if there is prepend it to this block, empty the storage and reset our flags
|
||||
diff.Text = stored(typ) + diff.Text
|
||||
empty(typ)
|
||||
unfinishedEnt[typ] = false
|
||||
unfinishedTag[typ] = false
|
||||
}
|
||||
|
||||
// check if this block ends in an unfinished tag
|
||||
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
|
||||
if idx != nil {
|
||||
unfinishedTag[typ] = true
|
||||
} else {
|
||||
// otherwise does it end in an unfinished entity
|
||||
idx = entityRegex.FindStringIndex(diff.Text)
|
||||
if idx != nil {
|
||||
unfinishedEnt[typ] = true
|
||||
}
|
||||
}
|
||||
|
||||
// If there is an unfinished component
|
||||
if idx != nil {
|
||||
// Store the fragment
|
||||
store(diff.Text[idx[0]:], typ)
|
||||
// and remove it from this block
|
||||
diff.Text = diff.Text[:idx[0]]
|
||||
}
|
||||
|
||||
// If that hasn't left the block empty
|
||||
if len(diff.Text) > 0 {
|
||||
// if the last block of this type was after the last equal block
|
||||
if last[typ] > last[equal] {
|
||||
// store this blocks content on that block
|
||||
fixedup[last[typ]].Text += diff.Text
|
||||
} else {
|
||||
// otherwise store the position of the last block of this type and store the block
|
||||
last[typ] = len(fixedup)
|
||||
fixedup = append(fixedup, diff)
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// pop off any remaining stored content
|
||||
fixedup = pop(insert, fixedup)
|
||||
fixedup = pop(delete, fixedup)
|
||||
|
||||
return fixedup
|
||||
}
|
||||
|
||||
func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline {
|
||||
func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
|
||||
buf := bytes.NewBuffer(nil)
|
||||
match := ""
|
||||
|
||||
diffs = fixupBrokenSpans(diffs)
|
||||
|
||||
// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
|
||||
for _, tag := range lineWrapperTags {
|
||||
buf.WriteString(tag)
|
||||
}
|
||||
for _, diff := range diffs {
|
||||
if shouldWriteInline(diff, lineType) {
|
||||
if len(match) > 0 {
|
||||
diff.Text = match + diff.Text
|
||||
match = ""
|
||||
}
|
||||
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
|
||||
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
|
||||
// before writing so we don't try insert added/removed code spans in the middle of one of those
|
||||
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
|
||||
// a line ending with an incomplete HTML entity or partial/opening <span>.
|
||||
|
||||
// EX:
|
||||
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
|
||||
// {Type: dmp.DiffEqual, Text: "c"},
|
||||
// {Type: dmp.DiffDelete, Text: "lass="p">}]
|
||||
|
||||
// After first iteration
|
||||
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
|
||||
// {Type: dmp.DiffEqual, Text: "<span c"},
|
||||
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]
|
||||
|
||||
// After second iteration
|
||||
// {Type: dmp.DiffEqual, Text: ""}, // write out
|
||||
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
|
||||
|
||||
// Final
|
||||
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
|
||||
// end up writing <span class="removed-code"><span class="p">,</span></span>
|
||||
// Instead of <span class="removed-code">lass="p",</span></span>
|
||||
|
||||
m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
|
||||
if m != nil {
|
||||
match = diff.Text[m[0]:m[1]]
|
||||
diff.Text = strings.TrimSuffix(diff.Text, match)
|
||||
}
|
||||
m = entityRegex.FindStringSubmatchIndex(diff.Text)
|
||||
if m != nil {
|
||||
match = diff.Text[m[0]:m[1]]
|
||||
diff.Text = strings.TrimSuffix(diff.Text, match)
|
||||
}
|
||||
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
|
||||
if strings.HasPrefix(diff.Text, "</span>") {
|
||||
buf.WriteString("</span>")
|
||||
diff.Text = strings.TrimPrefix(diff.Text, "</span>")
|
||||
}
|
||||
// If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
|
||||
// The previous/next diff section will contain the rest of the tag that is missing here
|
||||
if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") {
|
||||
buf.WriteString(diff.Text)
|
||||
continue
|
||||
}
|
||||
}
|
||||
switch {
|
||||
case diff.Type == diffmatchpatch.DiffEqual:
|
||||
buf.WriteString(diff.Text)
|
||||
|
@ -485,7 +210,10 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT
|
|||
buf.Write(codeTagSuffix)
|
||||
}
|
||||
}
|
||||
return DiffInlineWithUnicodeEscape(template.HTML(buf.String()))
|
||||
for range lineWrapperTags {
|
||||
buf.WriteString("</span>")
|
||||
}
|
||||
return buf.String()
|
||||
}
|
||||
|
||||
// GetLine gets a specific line by type (add or del) and file line number
|
||||
|
@ -597,10 +325,12 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) Dif
|
|||
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content)
|
||||
}
|
||||
|
||||
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
|
||||
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
|
||||
|
||||
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
|
||||
hcd := newHighlightCodeDiff()
|
||||
diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
|
||||
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
|
||||
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
|
||||
diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
|
||||
return DiffInlineWithUnicodeEscape(template.HTML(diffHTML))
|
||||
}
|
||||
|
||||
// DiffFile represents a file diff.
|
||||
|
@ -1289,7 +1019,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
|
|||
if char == '"' {
|
||||
fmt.Fscanf(rd, "%q ", &name)
|
||||
if len(name) == 0 {
|
||||
log.Error("Reader has no file name: %v", rd)
|
||||
log.Error("Reader has no file name: reader=%+v", rd)
|
||||
return "", true
|
||||
}
|
||||
|
||||
|
@ -1311,7 +1041,7 @@ func readFileName(rd *strings.Reader) (string, bool) {
|
|||
}
|
||||
}
|
||||
if len(name) < 2 {
|
||||
log.Error("Unable to determine name from reader: %v", rd)
|
||||
log.Error("Unable to determine name from reader: reader=%+v", rd)
|
||||
return "", true
|
||||
}
|
||||
return name[2:], ambiguity
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue