Skip to content

Commit 43b1a6b

Browse files
When creating line diffs do not split within an html entity (go-gitea#13357)
Backport go-gitea#13357 * When creating line diffs do not split within an html entity Fix go-gitea#13342 Signed-off-by: Andrew Thornton <art27@cantab.net> * Add test case Signed-off-by: Andrew Thornton <art27@cantab.net> * improve test Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: techknowlogick <techknowlogick@gitea.io>
1 parent 52b4b98 commit 43b1a6b

File tree

2 files changed

+114
-0
lines changed

2 files changed

+114
-0
lines changed

services/gitdiff/gitdiff.go

+84
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,9 @@ func init() {
290290
diffMatchPatch.DiffEditCost = 100
291291
}
292292

293+
var unterminatedEntityRE = regexp.MustCompile(`&[^ ;]*$`)
294+
var unstartedEntiyRE = regexp.MustCompile(`^[^ ;]*;`)
295+
293296
// GetComputedInlineDiffFor computes inline diff for the given line.
294297
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
295298
if setting.Git.DisableDiffHighlight {
@@ -329,9 +332,90 @@ func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) tem
329332

330333
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, diff1[1:]), highlight.Code(diffSection.FileName, diff2[1:]), true)
331334
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
335+
336+
// Now we need to clean up the split entities
337+
diffRecord = unsplitEntities(diffRecord)
338+
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
339+
332340
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
333341
}
334342

343+
// unsplitEntities looks for broken up html entities. It relies on records being presimplified and the data being passed in being valid html
344+
func unsplitEntities(records []diffmatchpatch.Diff) []diffmatchpatch.Diff {
345+
// Unsplitting entities is simple...
346+
//
347+
// Iterate through all be the last records because if we're the last record then there's nothing we can do
348+
for i := 0; i+1 < len(records); i++ {
349+
record := &records[i]
350+
351+
// Look for an unterminated entity at the end of the line
352+
unterminated := unterminatedEntityRE.FindString(record.Text)
353+
if len(unterminated) == 0 {
354+
continue
355+
}
356+
357+
switch record.Type {
358+
case diffmatchpatch.DiffEqual:
359+
// If we're an diff equal we want to give this unterminated entity to our next delete and insert
360+
record.Text = record.Text[0 : len(record.Text)-len(unterminated)]
361+
records[i+1].Text = unterminated + records[i+1].Text
362+
363+
nextType := records[i+1].Type
364+
365+
if nextType == diffmatchpatch.DiffEqual {
366+
continue
367+
}
368+
369+
// if the next in line is a delete then we will want the thing after that to be an insert and so on.
370+
oneAfterType := diffmatchpatch.DiffInsert
371+
if nextType == diffmatchpatch.DiffInsert {
372+
oneAfterType = diffmatchpatch.DiffDelete
373+
}
374+
375+
if i+2 < len(records) && records[i+2].Type == oneAfterType {
376+
records[i+2].Text = unterminated + records[i+2].Text
377+
} else {
378+
records = append(records[:i+2], append([]diffmatchpatch.Diff{
379+
{
380+
Type: oneAfterType,
381+
Text: unterminated,
382+
}}, records[i+2:]...)...)
383+
}
384+
case diffmatchpatch.DiffDelete:
385+
fallthrough
386+
case diffmatchpatch.DiffInsert:
387+
// if we're an insert or delete we want to claim the terminal bit of the entity from the next equal in line
388+
targetType := diffmatchpatch.DiffInsert
389+
if record.Type == diffmatchpatch.DiffInsert {
390+
targetType = diffmatchpatch.DiffDelete
391+
}
392+
next := &records[i+1]
393+
if next.Type == diffmatchpatch.DiffEqual {
394+
// if the next is an equal we need to snaffle the entity end off the start and add an delete/insert
395+
if terminal := unstartedEntiyRE.FindString(next.Text); len(terminal) > 0 {
396+
record.Text += terminal
397+
next.Text = next.Text[len(terminal):]
398+
records = append(records[:i+2], append([]diffmatchpatch.Diff{
399+
{
400+
Type: targetType,
401+
Text: unterminated,
402+
}}, records[i+2:]...)...)
403+
}
404+
} else if next.Type == targetType {
405+
// if the next is an insert we need to snaffle the entity end off the one after that and add it to both.
406+
if i+2 < len(records) && records[i+2].Type == diffmatchpatch.DiffEqual {
407+
if terminal := unstartedEntiyRE.FindString(records[i+2].Text); len(terminal) > 0 {
408+
record.Text += terminal
409+
next.Text += terminal
410+
records[i+2].Text = records[i+2].Text[len(terminal):]
411+
}
412+
}
413+
}
414+
}
415+
}
416+
return records
417+
}
418+
335419
// DiffFile represents a file diff.
336420
type DiffFile struct {
337421
Name string

services/gitdiff/gitdiff_test.go

+30
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"code.gitea.io/gitea/models"
1616
"code.gitea.io/gitea/modules/git"
1717
"code.gitea.io/gitea/modules/setting"
18+
"github.com/sergi/go-diff/diffmatchpatch"
1819
dmp "github.com/sergi/go-diff/diffmatchpatch"
1920
"github.com/stretchr/testify/assert"
2021
"gopkg.in/ini.v1"
@@ -26,6 +27,35 @@ func assertEqual(t *testing.T, s1 string, s2 template.HTML) {
2627
}
2728
}
2829

30+
func TestUnsplitEntities(t *testing.T) {
31+
left := "sh &#34;useradd -u 111 jenkins&#34;"
32+
right := "sh &#39;useradd -u $(stat -c &#34;%u&#34; .gitignore) jenkins&#39;"
33+
diffRecord := diffMatchPatch.DiffMain(left, right, true)
34+
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
35+
36+
// Now we need to clean up the split entities
37+
diffRecord = unsplitEntities(diffRecord)
38+
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
39+
40+
leftRecombined := ""
41+
rightRecombined := ""
42+
for _, record := range diffRecord {
43+
assert.False(t, unterminatedEntityRE.MatchString(record.Text), "")
44+
switch record.Type {
45+
case diffmatchpatch.DiffDelete:
46+
leftRecombined += record.Text
47+
case diffmatchpatch.DiffInsert:
48+
rightRecombined += record.Text
49+
default:
50+
leftRecombined += record.Text
51+
rightRecombined += record.Text
52+
}
53+
}
54+
55+
assert.EqualValues(t, left, leftRecombined)
56+
assert.EqualValues(t, right, rightRecombined)
57+
}
58+
2959
func TestDiffToHTML(t *testing.T) {
3060
setting.Cfg = ini.Empty()
3161
assertEqual(t, "foo <span class=\"added-code\">bar</span> biz", diffToHTML("", []dmp.Diff{

0 commit comments

Comments
 (0)