Skip to content

Commit 4bc5517

Browse files
wxiaoguangpull[bot]
authored andcommitted
Do not "guess" the file encoding/BOM when using API to upload files (go-gitea#25828)
Related issue: go-gitea#18368 It doesn't seem right to "guess" the file encoding/BOM when using API to upload files. The API should save the uploaded content as-is.
1 parent 959d7c7 commit 4bc5517

File tree

1 file changed

+3
-102
lines changed

1 file changed

+3
-102
lines changed

services/repository/files/update.go

Lines changed: 3 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,22 @@
44
package files
55

66
import (
7-
"bytes"
87
"context"
98
"fmt"
109
"path"
1110
"strings"
1211
"time"
1312

1413
"code.gitea.io/gitea/models"
15-
"code.gitea.io/gitea/models/db"
1614
git_model "code.gitea.io/gitea/models/git"
1715
repo_model "code.gitea.io/gitea/models/repo"
1816
user_model "code.gitea.io/gitea/models/user"
19-
"code.gitea.io/gitea/modules/charset"
2017
"code.gitea.io/gitea/modules/git"
2118
"code.gitea.io/gitea/modules/lfs"
2219
"code.gitea.io/gitea/modules/log"
2320
"code.gitea.io/gitea/modules/setting"
2421
"code.gitea.io/gitea/modules/structs"
25-
"code.gitea.io/gitea/modules/util"
2622
asymkey_service "code.gitea.io/gitea/services/asymkey"
27-
28-
stdcharset "golang.org/x/net/html/charset"
29-
"golang.org/x/text/transform"
3023
)
3124

3225
// IdentityOptions for a person's identity like an author or committer
@@ -66,78 +59,9 @@ type ChangeRepoFilesOptions struct {
6659
type RepoFileOptions struct {
6760
treePath string
6861
fromTreePath string
69-
encoding string
70-
bom bool
7162
executable bool
7263
}
7364

74-
func detectEncodingAndBOM(entry *git.TreeEntry, repo *repo_model.Repository) (string, bool) {
75-
reader, err := entry.Blob().DataAsync()
76-
if err != nil {
77-
// return default
78-
return "UTF-8", false
79-
}
80-
defer reader.Close()
81-
buf := make([]byte, 1024)
82-
n, err := util.ReadAtMost(reader, buf)
83-
if err != nil {
84-
// return default
85-
return "UTF-8", false
86-
}
87-
buf = buf[:n]
88-
89-
if setting.LFS.StartServer {
90-
pointer, _ := lfs.ReadPointerFromBuffer(buf)
91-
if pointer.IsValid() {
92-
meta, err := git_model.GetLFSMetaObjectByOid(db.DefaultContext, repo.ID, pointer.Oid)
93-
if err != nil && err != git_model.ErrLFSObjectNotExist {
94-
// return default
95-
return "UTF-8", false
96-
}
97-
if meta != nil {
98-
dataRc, err := lfs.ReadMetaObject(pointer)
99-
if err != nil {
100-
// return default
101-
return "UTF-8", false
102-
}
103-
defer dataRc.Close()
104-
buf = make([]byte, 1024)
105-
n, err = util.ReadAtMost(dataRc, buf)
106-
if err != nil {
107-
// return default
108-
return "UTF-8", false
109-
}
110-
buf = buf[:n]
111-
}
112-
}
113-
}
114-
115-
encoding, err := charset.DetectEncoding(buf)
116-
if err != nil {
117-
// just default to utf-8 and no bom
118-
return "UTF-8", false
119-
}
120-
if encoding == "UTF-8" {
121-
return encoding, bytes.Equal(buf[0:3], charset.UTF8BOM)
122-
}
123-
charsetEncoding, _ := stdcharset.Lookup(encoding)
124-
if charsetEncoding == nil {
125-
return "UTF-8", false
126-
}
127-
128-
result, n, err := transform.String(charsetEncoding.NewDecoder(), string(buf))
129-
if err != nil {
130-
// return default
131-
return "UTF-8", false
132-
}
133-
134-
if n > 2 {
135-
return encoding, bytes.Equal([]byte(result)[0:3], charset.UTF8BOM)
136-
}
137-
138-
return encoding, false
139-
}
140-
14165
// ChangeRepoFiles adds, updates or removes multiple files in the given repository
14266
func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *user_model.User, opts *ChangeRepoFilesOptions) (*structs.FilesResponse, error) {
14367
// If no branch name is set, assume default branch
@@ -184,8 +108,6 @@ func ChangeRepoFiles(ctx context.Context, repo *repo_model.Repository, doer *use
184108
file.Options = &RepoFileOptions{
185109
treePath: treePath,
186110
fromTreePath: fromTreePath,
187-
encoding: "UTF-8",
188-
bom: false,
189111
executable: false,
190112
}
191113
treePaths = append(treePaths, treePath)
@@ -381,7 +303,6 @@ func handleCheckErrors(file *ChangeRepoFile, commit *git.Commit, opts *ChangeRep
381303
// haven't been made. We throw an error if one wasn't provided.
382304
return models.ErrSHAOrCommitIDNotProvided{}
383305
}
384-
file.Options.encoding, file.Options.bom = detectEncodingAndBOM(fromEntry, repo)
385306
file.Options.executable = fromEntry.IsExecutable()
386307
}
387308
if file.Operation == "create" || file.Operation == "update" {
@@ -466,28 +387,8 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
466387
}
467388
}
468389

469-
content := file.Content
470-
if file.Options.bom {
471-
content = string(charset.UTF8BOM) + content
472-
}
473-
if file.Options.encoding != "UTF-8" {
474-
charsetEncoding, _ := stdcharset.Lookup(file.Options.encoding)
475-
if charsetEncoding != nil {
476-
result, _, err := transform.String(charsetEncoding.NewEncoder(), content)
477-
if err != nil {
478-
// Look if we can't encode back in to the original we should just stick with utf-8
479-
log.Error("Error re-encoding %s (%s) as %s - will stay as UTF-8: %v", file.TreePath, file.FromTreePath, file.Options.encoding, err)
480-
result = content
481-
}
482-
content = result
483-
} else {
484-
log.Error("Unknown encoding: %s", file.Options.encoding)
485-
}
486-
}
487-
// Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
488-
file.Content = content
390+
treeObjectContent := file.Content
489391
var lfsMetaObject *git_model.LFSMetaObject
490-
491392
if setting.LFS.StartServer && hasOldBranch {
492393
// Check there is no way this can return multiple infos
493394
filename2attribute2info, err := t.gitRepo.CheckAttribute(git.CheckAttributeOpts{
@@ -506,12 +407,12 @@ func CreateOrUpdateFile(ctx context.Context, t *TemporaryUploadRepository, file
506407
return err
507408
}
508409
lfsMetaObject = &git_model.LFSMetaObject{Pointer: pointer, RepositoryID: repoID}
509-
content = pointer.StringContent()
410+
treeObjectContent = pointer.StringContent()
510411
}
511412
}
512413

513414
// Add the object to the database
514-
objectHash, err := t.HashObject(strings.NewReader(content))
415+
objectHash, err := t.HashObject(strings.NewReader(treeObjectContent))
515416
if err != nil {
516417
return err
517418
}

0 commit comments

Comments
 (0)