diff --git a/services/gitdiff/gitdiff.go b/services/gitdiff/gitdiff.go index 50e2f8cf1ef..4f223fdc031 100644 --- a/services/gitdiff/gitdiff.go +++ b/services/gitdiff/gitdiff.go @@ -182,6 +182,7 @@ var ( codeTagSuffix = []byte(``) ) var trailingSpanRegex = regexp.MustCompile(`]?$`) +var entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`) // shouldWriteInline represents combinations where we manually write inline changes func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool { @@ -205,14 +206,40 @@ func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineT match = "" } // Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency. - // Since inline changes might split in the middle of a chroma span tag, make we manually put it back together - // before writing so we don't try insert added/removed code spans in the middle of an existing chroma span - // and create broken HTML. + // Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together + // before writing so we don't try insert added/removed code spans in the middle of one of those + // and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of + // a line ending with an incomplete HTML entity or partial/opening . + + // EX: + // diffs[{Type: dmp.DiffDelete, Text: "language}] + + // After first iteration + // diffs[{Type: dmp.DiffDelete, Text: "language"}, //write out + // {Type: dmp.DiffEqual, Text: ",}] + + // After second iteration + // {Type: dmp.DiffEqual, Text: ""}, // write out + // {Type: dmp.DiffDelete, Text: ",}] + + // Final + // {Type: dmp.DiffDelete, Text: ",}] + // end up writing , + // Instead of lass="p", + m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text) if m != nil { match = diff.Text[m[0]:m[1]] diff.Text = strings.TrimSuffix(diff.Text, match) } + m = entityRegex.FindStringSubmatchIndex(diff.Text) + if m != nil { + match = diff.Text[m[0]:m[1]] + diff.Text = strings.TrimSuffix(diff.Text, match) + } // Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it if strings.HasPrefix(diff.Text, "") { buf.WriteString("") @@ -290,9 +317,6 @@ func init() { diffMatchPatch.DiffEditCost = 100 } -var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`) -var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`) - // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML { if setting.Git.DisableDiffHighlight { @@ -333,89 +357,11 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - // Now we need to clean up the split entities - diffRecord = unsplitEntities(diffRecord) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) } -// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html -func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff { - // Unsplitting entities is simple... - // - // Iterate through all be the last records because if we're the last record then there's nothing we can do - for i := 0; i+1 < len(records); i++ { - record := &records[i] - - // Look for an unterminated entity at the end of the line - unterminated := unterminatedEntityRE.FindString(record.Text) - if len(unterminated) == 0 { - continue - } - - switch record.Type { - case diffmatchpatch.DiffEqual: - // If we're an diff equal we want to give this unterminated entity to our next delete and insert - record.Text = record.Text[0 : len(record.Text)-len(unterminated)] - records[i+1].Text = unterminated + records[i+1].Text - - nextType := records[i+1].Type - - if nextType == diffmatchpatch.DiffEqual { - continue - } - - // if the next in line is a delete then we will want the thing after that to be an insert and so on. - oneAfterType := diffmatchpatch.DiffInsert - if nextType == diffmatchpatch.DiffInsert { - oneAfterType = diffmatchpatch.DiffDelete - } - - if i+2 < len(records) && records[i+2].Type == oneAfterType { - records[i+2].Text = unterminated + records[i+2].Text - } else { - records = append(records[:i+2], append([]diffmatchpatch.Diff{ - { - Type: oneAfterType, - Text: unterminated, - }}, records[i+2:]...)...) - } - case diffmatchpatch.DiffDelete: - fallthrough - case diffmatchpatch.DiffInsert: - // if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line - targetType := diffmatchpatch.DiffInsert - if record.Type == diffmatchpatch.DiffInsert { - targetType = diffmatchpatch.DiffDelete - } - next := &records[i+1] - if next.Type == diffmatchpatch.DiffEqual { - // if the next is an equal we need to snaffle the entity end off the start and add an delete/insert - if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 { - record.Text += terminal - next.Text = next.Text[len(terminal):] - records = append(records[:i+2], append([]diffmatchpatch.Diff{ - { - Type: targetType, - Text: unterminated, - }}, records[i+2:]...)...) - } - } else if next.Type == targetType { - // if the next is an insert we need to snaffle the entity end off the one after that and add it to both. - if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual { - if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 { - record.Text += terminal - next.Text += terminal - records[i+2].Text = records[i+2].Text[len(terminal):] - } - } - } - } - } - return records -} - // DiffFile represents a file diff. type DiffFile struct { Name string diff --git a/services/gitdiff/gitdiff_test.go b/services/gitdiff/gitdiff_test.go index 0683997dab8..6e3b5b09946 100644 --- a/services/gitdiff/gitdiff_test.go +++ b/services/gitdiff/gitdiff_test.go @@ -15,7 +15,6 @@ import ( "code.gitea.io/gitea/models" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" - "github.com/sergi/go-diff/diffmatchpatch" dmp "github.com/sergi/go-diff/diffmatchpatch" "github.com/stretchr/testify/assert" "gopkg.in/ini.v1" @@ -27,35 +26,6 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) { } } -func TestUnsplitEntities(t *testing.T) { - left := "sh "useradd -u 111 jenkins"" - right := "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'" - diffRecord := diffMatchPatch.DiffMain(left, right, true) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - // Now we need to clean up the split entities - diffRecord = unsplitEntities(diffRecord) - diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) - - leftRecombined := "" - rightRecombined := "" - for _, record := range diffRecord { - assert.False(t, unterminatedEntityRE.MatchString(record.Text), "") - switch record.Type { - case diffmatchpatch.DiffDelete: - leftRecombined += record.Text - case diffmatchpatch.DiffInsert: - rightRecombined += record.Text - default: - leftRecombined += record.Text - rightRecombined += record.Text - } - } - - assert.EqualValues(t, left, leftRecombined) - assert.EqualValues(t, right, rightRecombined) -} - func TestDiffToHTML(t *testing.T) { setting.Cfg = ini.Empty() assertEqual(t, "foo bar biz", diffToHTML("", []dmp.Diff{ @@ -113,6 +83,21 @@ func TestDiffToHTML(t *testing.T) { {Type: dmp.DiffEqual, Text: ""// ", sys.argv"}, {Type: dmp.DiffInsert, Text: ")"}, }, DiffLineAdd)) + + assertEqual(t, "sh 'useradd -u $(stat -c "%u" .gitignore) jenkins'", diffToHTML("", []dmp.Diff{ + {Type: dmp.DiffEqual, Text: "sh "}, + {Type: dmp.DiffDelete, Text: "4;useradd -u 111 jenkins""}, + {Type: dmp.DiffInsert, Text: "9;useradd -u $(stat -c "%u" .gitignore) jenkins'"}, + {Type: dmp.DiffEqual, Text: ";"}, + }, DiffLineAdd)) + + assertEqual(t, " <h4 class="release-list-title df ac">", diffToHTML("", []dmp.Diff{ + {Type: dmp.DiffEqual, Text: " <h"}, + {Type: dmp.DiffInsert, Text: "4 class=&#"}, + {Type: dmp.DiffEqual, Text: "3"}, + {Type: dmp.DiffInsert, Text: "4;release-list-title df ac""}, + {Type: dmp.DiffEqual, Text: ">"}, + }, DiffLineAdd)) } func TestParsePatch_singlefile(t *testing.T) {