Skip to content

Commit cf80f82

Browse files
GiteaBotwxiaoguangdelvhtechknowlogicklunny
authored
Do not recognize text files as audio (go-gitea#23355) (go-gitea#23368)
Backport go-gitea#23355 Close go-gitea#17108 This PR uses a trick (removing the ID3 tag) to detect the content again to to see whether the content is text type. Co-authored-by: wxiaoguang <[email protected]> Co-authored-by: delvh <[email protected]> Co-authored-by: techknowlogick <[email protected]> Co-authored-by: Lunny Xiao <[email protected]>
1 parent ed25e09 commit cf80f82

File tree

2 files changed

+14
-0
lines changed

2 files changed

+14
-0
lines changed

modules/typesniffer/typesniffer.go

+10
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ func DetectContentType(data []byte) SniffedType {
106106
}
107107
}
108108

109+
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
110+
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
111+
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
112+
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
113+
ct2 := http.DetectContentType(data[3:])
114+
if strings.HasPrefix(ct2, "text/") {
115+
ct = ct2
116+
}
117+
}
118+
109119
return SniffedType{ct}
110120
}
111121

modules/typesniffer/typesniffer_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,10 @@ func TestIsAudio(t *testing.T) {
109109
mp3, _ := base64.StdEncoding.DecodeString("SUQzBAAAAAABAFRYWFgAAAASAAADbWFqb3JfYnJhbmQAbXA0MgBUWFhYAAAAEQAAA21pbm9yX3Zl")
110110
assert.True(t, DetectContentType(mp3).IsAudio())
111111
assert.False(t, DetectContentType([]byte("plain text")).IsAudio())
112+
113+
assert.True(t, DetectContentType([]byte("ID3Toy\000")).IsAudio())
114+
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ...")).IsText()) // test ID3 tag for plain text
115+
assert.True(t, DetectContentType([]byte("ID3Toy\n====\t* hi 🌞, ..."+"🌛"[0:2])).IsText()) // test ID3 tag with incomplete UTF8 char
112116
}
113117

114118
func TestDetectContentTypeFromReader(t *testing.T) {

0 commit comments

Comments
 (0)