Skip to content

Commit 32acd22

Browse files
KN4CK3Rsilverwindlunnylafriks
authored andcommitted
Add Image Diff for SVG files (go-gitea#14867)
* Added type sniffer. * Switched content detection from base to typesniffer. * Added GuessContentType to Blob. * Moved image info logic to client. Added support for SVG images in diff. * Restore old blocked svg behaviour. * Added missing image formats. * Execute image diff only when container is visible. * add margin to spinner * improve BIN tag on image diffs * Default to render view. * Show image diff on incomplete diff. Co-authored-by: silverwind <me@silverwind.io> Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com> Co-authored-by: Lauris BH <lauris@nix.lv>
1 parent 0eb0330 commit 32acd22

File tree

19 files changed

+444
-436
lines changed

19 files changed

+444
-436
lines changed

modules/avatar/avatar.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,9 @@ import (
1010
"image"
1111
"image/color/palette"
1212

13-
// Enable PNG support:
14-
_ "image/png"
13+
_ "image/gif" // for processing gif images
14+
_ "image/jpeg" // for processing jpeg images
15+
_ "image/png" // for processing png images
1516

1617
"code.gitea.io/gitea/modules/setting"
1718
"code.gitea.io/gitea/modules/util"

modules/base/tool.go

-68
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@ import (
1212
"encoding/hex"
1313
"errors"
1414
"fmt"
15-
"net/http"
1615
"os"
1716
"path/filepath"
18-
"regexp"
1917
"runtime"
2018
"strconv"
2119
"strings"
@@ -30,15 +28,6 @@ import (
3028
"github.com/dustin/go-humanize"
3129
)
3230

33-
// Use at most this many bytes to determine Content Type.
34-
const sniffLen = 512
35-
36-
// SVGMimeType MIME type of SVG images.
37-
const SVGMimeType = "image/svg+xml"
38-
39-
var svgTagRegex = regexp.MustCompile(`(?si)\A\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
40-
var svgTagInXMLRegex = regexp.MustCompile(`(?si)\A<\?xml\b.*?\?>\s*(?:(<!--.*?-->|<!DOCTYPE\s+svg([\s:]+.*?>|>))\s*)*<svg[\s>\/]`)
41-
4231
// EncodeMD5 encodes string to md5 hex value.
4332
func EncodeMD5(str string) string {
4433
m := md5.New()
@@ -276,63 +265,6 @@ func IsLetter(ch rune) bool {
276265
return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' || ch >= 0x80 && unicode.IsLetter(ch)
277266
}
278267

279-
// DetectContentType extends http.DetectContentType with more content types.
280-
func DetectContentType(data []byte) string {
281-
ct := http.DetectContentType(data)
282-
283-
if len(data) > sniffLen {
284-
data = data[:sniffLen]
285-
}
286-
287-
if setting.UI.SVG.Enabled &&
288-
((strings.Contains(ct, "text/plain") || strings.Contains(ct, "text/html")) && svgTagRegex.Match(data) ||
289-
strings.Contains(ct, "text/xml") && svgTagInXMLRegex.Match(data)) {
290-
291-
// SVG is unsupported. https://github.com/golang/go/issues/15888
292-
return SVGMimeType
293-
}
294-
return ct
295-
}
296-
297-
// IsRepresentableAsText returns true if file content can be represented as
298-
// plain text or is empty.
299-
func IsRepresentableAsText(data []byte) bool {
300-
return IsTextFile(data) || IsSVGImageFile(data)
301-
}
302-
303-
// IsTextFile returns true if file content format is plain text or empty.
304-
func IsTextFile(data []byte) bool {
305-
if len(data) == 0 {
306-
return true
307-
}
308-
return strings.Contains(DetectContentType(data), "text/")
309-
}
310-
311-
// IsImageFile detects if data is an image format
312-
func IsImageFile(data []byte) bool {
313-
return strings.Contains(DetectContentType(data), "image/")
314-
}
315-
316-
// IsSVGImageFile detects if data is an SVG image format
317-
func IsSVGImageFile(data []byte) bool {
318-
return strings.Contains(DetectContentType(data), SVGMimeType)
319-
}
320-
321-
// IsPDFFile detects if data is a pdf format
322-
func IsPDFFile(data []byte) bool {
323-
return strings.Contains(DetectContentType(data), "application/pdf")
324-
}
325-
326-
// IsVideoFile detects if data is an video format
327-
func IsVideoFile(data []byte) bool {
328-
return strings.Contains(DetectContentType(data), "video/")
329-
}
330-
331-
// IsAudioFile detects if data is an video format
332-
func IsAudioFile(data []byte) bool {
333-
return strings.Contains(DetectContentType(data), "audio/")
334-
}
335-
336268
// EntryIcon returns the octicon class for displaying files/directories
337269
func EntryIcon(entry *git.TreeEntry) string {
338270
switch {

modules/base/tool_test.go

-92
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
package base
66

77
import (
8-
"encoding/base64"
98
"os"
109
"testing"
1110
"time"
@@ -246,97 +245,6 @@ func TestIsLetter(t *testing.T) {
246245
assert.False(t, IsLetter(0x93))
247246
}
248247

249-
func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
250-
// Pre-condition: Shorter than sniffLen detects SVG.
251-
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)))
252-
// Longer than sniffLen detects something else.
253-
assert.Equal(t, "text/plain; charset=utf-8", DetectContentType([]byte(`<!--
254-
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
255-
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
256-
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
257-
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
258-
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
259-
Comment Comment Comment Comment Comment Comment Comment Comment Comment Comment
260-
Comment Comment Comment --><svg></svg>`)))
261-
}
262-
263-
// IsRepresentableAsText
264-
265-
func TestIsTextFile(t *testing.T) {
266-
assert.True(t, IsTextFile([]byte{}))
267-
assert.True(t, IsTextFile([]byte("lorem ipsum")))
268-
}
269-
270-
func TestIsImageFile(t *testing.T) {
271-
png, _ := base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAG0lEQVQYlWN4+vTpf3SMDTAMBYXYBLFpHgoKAeiOf0SGE9kbAAAAAElFTkSuQmCC")
272-
assert.True(t, IsImageFile(png))
273-
assert.False(t, IsImageFile([]byte("plain text")))
274-
}
275-
276-
func TestIsSVGImageFile(t *testing.T) {
277-
assert.True(t, IsSVGImageFile([]byte("<svg></svg>")))
278-
assert.True(t, IsSVGImageFile([]byte(" <svg></svg>")))
279-
assert.True(t, IsSVGImageFile([]byte(`<svg width="100"></svg>`)))
280-
assert.True(t, IsSVGImageFile([]byte("<svg/>")))
281-
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?><svg></svg>`)))
282-
assert.True(t, IsSVGImageFile([]byte(`<!-- Comment -->
283-
<svg></svg>`)))
284-
assert.True(t, IsSVGImageFile([]byte(`<!-- Multiple -->
285-
<!-- Comments -->
286-
<svg></svg>`)))
287-
assert.True(t, IsSVGImageFile([]byte(`<!-- Multiline
288-
Comment -->
289-
<svg></svg>`)))
290-
assert.True(t, IsSVGImageFile([]byte(`<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1 Basic//EN"
291-
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11-basic.dtd">
292-
<svg></svg>`)))
293-
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
294-
<!-- Comment -->
295-
<svg></svg>`)))
296-
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
297-
<!-- Multiple -->
298-
<!-- Comments -->
299-
<svg></svg>`)))
300-
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
301-
<!-- Multline
302-
Comment -->
303-
<svg></svg>`)))
304-
assert.True(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
305-
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
306-
<!-- Multline
307-
Comment -->
308-
<svg></svg>`)))
309-
assert.False(t, IsSVGImageFile([]byte{}))
310-
assert.False(t, IsSVGImageFile([]byte("svg")))
311-
assert.False(t, IsSVGImageFile([]byte("<svgfoo></svgfoo>")))
312-
assert.False(t, IsSVGImageFile([]byte("text<svg></svg>")))
313-
assert.False(t, IsSVGImageFile([]byte("<html><body><svg></svg></body></html>")))
314-
assert.False(t, IsSVGImageFile([]byte(`<script>"<svg></svg>"</script>`)))
315-
assert.False(t, IsSVGImageFile([]byte(`<!-- <svg></svg> inside comment -->
316-
<foo></foo>`)))
317-
assert.False(t, IsSVGImageFile([]byte(`<?xml version="1.0" encoding="UTF-8"?>
318-
<!-- <svg></svg> inside comment -->
319-
<foo></foo>`)))
320-
}
321-
322-
func TestIsPDFFile(t *testing.T) {
323-
pdf, _ := base64.StdEncoding.DecodeString("JVBERi0xLjYKJcOkw7zDtsOfCjIgMCBvYmoKPDwvTGVuZ3RoIDMgMCBSL0ZpbHRlci9GbGF0ZURlY29kZT4+CnN0cmVhbQp4nF3NPwsCMQwF8D2f4s2CNYk1baF0EHRwOwg4iJt/NsFb/PpevUE4Mjwe")
324-
assert.True(t, IsPDFFile(pdf))
325-
assert.False(t, IsPDFFile([]byte("plain text")))
326-
}
327-
328-
func TestIsVideoFile(t *testing.T) {
329-
mp4, _ := base64.StdEncoding.DecodeString("AAAAGGZ0eXBtcDQyAAAAAGlzb21tcDQyAAEI721vb3YAAABsbXZoZAAAAADaBlwX2gZcFwAAA+gA")
330-
assert.True(t, IsVideoFile(mp4))
331-
assert.False(t, IsVideoFile([]byte("plain text")))
332-
}
333-
334-
func TestIsAudioFile(t *testing.T) {
335-
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
336-
assert.True(t, IsAudioFile(mp3))
337-
assert.False(t, IsAudioFile([]byte("plain text")))
338-
}
339-
340248
// TODO: Test EntryIcon
341249

342250
func TestSetupGiteaRoot(t *testing.T) {

modules/git/blob.go

+13
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ import (
1010
"encoding/base64"
1111
"io"
1212
"io/ioutil"
13+
14+
"code.gitea.io/gitea/modules/typesniffer"
1315
)
1416

1517
// This file contains common functions between the gogit and !gogit variants for git Blobs
@@ -82,3 +84,14 @@ func (b *Blob) GetBlobContentBase64() (string, error) {
8284
}
8385
return string(out), nil
8486
}
87+
88+
// GuessContentType guesses the content type of the blob.
89+
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
90+
r, err := b.DataAsync()
91+
if err != nil {
92+
return typesniffer.SniffedType{}, err
93+
}
94+
defer r.Close()
95+
96+
return typesniffer.DetectContentTypeFromReader(r)
97+
}

modules/git/commit.go

-70
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,7 @@ import (
1111
"container/list"
1212
"errors"
1313
"fmt"
14-
"image"
15-
"image/color"
16-
_ "image/gif" // for processing gif images
17-
_ "image/jpeg" // for processing jpeg images
18-
_ "image/png" // for processing png images
1914
"io"
20-
"net/http"
2115
"os/exec"
2216
"strconv"
2317
"strings"
@@ -81,70 +75,6 @@ func (c *Commit) ParentCount() int {
8175
return len(c.Parents)
8276
}
8377

84-
func isImageFile(data []byte) (string, bool) {
85-
contentType := http.DetectContentType(data)
86-
if strings.Contains(contentType, "image/") {
87-
return contentType, true
88-
}
89-
return contentType, false
90-
}
91-
92-
// IsImageFile is a file image type
93-
func (c *Commit) IsImageFile(name string) bool {
94-
blob, err := c.GetBlobByPath(name)
95-
if err != nil {
96-
return false
97-
}
98-
99-
dataRc, err := blob.DataAsync()
100-
if err != nil {
101-
return false
102-
}
103-
defer dataRc.Close()
104-
buf := make([]byte, 1024)
105-
n, _ := dataRc.Read(buf)
106-
buf = buf[:n]
107-
_, isImage := isImageFile(buf)
108-
return isImage
109-
}
110-
111-
// ImageMetaData represents metadata of an image file
112-
type ImageMetaData struct {
113-
ColorModel color.Model
114-
Width int
115-
Height int
116-
ByteSize int64
117-
}
118-
119-
// ImageInfo returns information about the dimensions of an image
120-
func (c *Commit) ImageInfo(name string) (*ImageMetaData, error) {
121-
if !c.IsImageFile(name) {
122-
return nil, nil
123-
}
124-
125-
blob, err := c.GetBlobByPath(name)
126-
if err != nil {
127-
return nil, err
128-
}
129-
reader, err := blob.DataAsync()
130-
if err != nil {
131-
return nil, err
132-
}
133-
defer reader.Close()
134-
config, _, err := image.DecodeConfig(reader)
135-
if err != nil {
136-
return nil, err
137-
}
138-
139-
metadata := ImageMetaData{
140-
ColorModel: config.ColorModel,
141-
Width: config.Width,
142-
Height: config.Height,
143-
ByteSize: blob.Size(),
144-
}
145-
return &metadata, nil
146-
}
147-
14878
// GetCommitByPath return the commit of relative path object.
14979
func (c *Commit) GetCommitByPath(relpath string) (*Commit, error) {
15080
return c.repo.getCommitByPathWithID(c.ID, relpath)

modules/indexer/code/bleve.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ import (
1616

1717
"code.gitea.io/gitea/models"
1818
"code.gitea.io/gitea/modules/analyze"
19-
"code.gitea.io/gitea/modules/base"
2019
"code.gitea.io/gitea/modules/charset"
2120
"code.gitea.io/gitea/modules/git"
2221
"code.gitea.io/gitea/modules/log"
2322
"code.gitea.io/gitea/modules/setting"
2423
"code.gitea.io/gitea/modules/timeutil"
24+
"code.gitea.io/gitea/modules/typesniffer"
2525
"code.gitea.io/gitea/modules/util"
2626

2727
"github.com/blevesearch/bleve/v2"
@@ -211,7 +211,7 @@ func (b *BleveIndexer) addUpdate(batchWriter git.WriteCloserError, batchReader *
211211
fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size))
212212
if err != nil {
213213
return err
214-
} else if !base.IsTextFile(fileContents) {
214+
} else if !typesniffer.DetectContentType(fileContents).IsText() {
215215
// FIXME: UTF-16 files will probably fail here
216216
return nil
217217
}

modules/indexer/code/elastic_search.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ import (
1616

1717
"code.gitea.io/gitea/models"
1818
"code.gitea.io/gitea/modules/analyze"
19-
"code.gitea.io/gitea/modules/base"
2019
"code.gitea.io/gitea/modules/charset"
2120
"code.gitea.io/gitea/modules/git"
2221
"code.gitea.io/gitea/modules/log"
2322
"code.gitea.io/gitea/modules/setting"
2423
"code.gitea.io/gitea/modules/timeutil"
24+
"code.gitea.io/gitea/modules/typesniffer"
2525

2626
"github.com/go-enry/go-enry/v2"
2727
jsoniter "github.com/json-iterator/go"
@@ -210,7 +210,7 @@ func (b *ElasticSearchIndexer) addUpdate(batchWriter git.WriteCloserError, batch
210210
fileContents, err := ioutil.ReadAll(io.LimitReader(batchReader, size))
211211
if err != nil {
212212
return nil, err
213-
} else if !base.IsTextFile(fileContents) {
213+
} else if !typesniffer.DetectContentType(fileContents).IsText() {
214214
// FIXME: UTF-16 files will probably fail here
215215
return nil, nil
216216
}

0 commit comments

Comments
 (0)