@ -5,13 +5,19 @@
package repofiles
import (
"bytes"
"fmt"
"path"
"strings"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
"code.gitea.io/gitea/models"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/lfs"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/sdk/gitea"
)
@ -37,6 +43,70 @@ type UpdateRepoFileOptions struct {
Committer * IdentityOptions
}
func detectEncodingAndBOM ( entry * git . TreeEntry , repo * models . Repository ) ( string , bool ) {
reader , err := entry . Blob ( ) . DataAsync ( )
if err != nil {
// return default
return "UTF-8" , false
}
defer reader . Close ( )
buf := make ( [ ] byte , 1024 )
n , err := reader . Read ( buf )
if err != nil {
// return default
return "UTF-8" , false
}
buf = buf [ : n ]
if setting . LFS . StartServer {
meta := lfs . IsPointerFile ( & buf )
if meta != nil {
meta , err = repo . GetLFSMetaObjectByOid ( meta . Oid )
if err != nil && err != models . ErrLFSObjectNotExist {
// return default
return "UTF-8" , false
}
}
if meta != nil {
dataRc , err := lfs . ReadMetaObject ( meta )
if err != nil {
// return default
return "UTF-8" , false
}
defer dataRc . Close ( )
buf = make ( [ ] byte , 1024 )
n , err = dataRc . Read ( buf )
if err != nil {
// return default
return "UTF-8" , false
}
buf = buf [ : n ]
}
}
encoding , err := base . DetectEncoding ( buf )
if err != nil {
// just default to utf-8 and no bom
return "UTF-8" , false
}
if encoding == "UTF-8" {
return encoding , bytes . Equal ( buf [ 0 : 3 ] , base . UTF8BOM )
}
charsetEncoding , _ := charset . Lookup ( encoding )
if charsetEncoding == nil {
return "UTF-8" , false
}
result , n , err := transform . String ( charsetEncoding . NewDecoder ( ) , string ( buf ) )
if n > 2 {
return encoding , bytes . Equal ( [ ] byte ( result ) [ 0 : 3 ] , base . UTF8BOM )
}
return encoding , false
}
// CreateOrUpdateRepoFile adds or updates a file in the given repository
func CreateOrUpdateRepoFile ( repo * models . Repository , doer * models . User , opts * UpdateRepoFileOptions ) ( * gitea . FileResponse , error ) {
// If no branch name is set, assume master
@ -118,6 +188,9 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
opts . LastCommitID = commit . ID . String ( )
}
encoding := "UTF-8"
bom := false
if ! opts . IsNewFile {
fromEntry , err := commit . GetTreeEntryByPath ( fromTreePath )
if err != nil {
@ -151,6 +224,7 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
// haven't been made. We throw an error if one wasn't provided.
return nil , models . ErrSHAOrCommitIDNotProvided { }
}
encoding , bom = detectEncodingAndBOM ( fromEntry , repo )
}
// For the path where this file will be created/updated, we need to make
@ -235,9 +309,28 @@ func CreateOrUpdateRepoFile(repo *models.Repository, doer *models.User, opts *Up
}
content := opts . Content
if bom {
content = string ( base . UTF8BOM ) + content
}
if encoding != "UTF-8" {
charsetEncoding , _ := charset . Lookup ( encoding )
if charsetEncoding != nil {
result , _ , err := transform . String ( charsetEncoding . NewEncoder ( ) , string ( content ) )
if err != nil {
// Look if we can't encode back in to the original we should just stick with utf-8
log . Error ( "Error re-encoding %s (%s) as %s - will stay as UTF-8: %v" , opts . TreePath , opts . FromTreePath , encoding , err )
result = content
}
content = result
} else {
log . Error ( "Unknown encoding: %s" , encoding )
}
}
// Reset the opts.Content to our adjusted content to ensure that LFS gets the correct content
opts . Content = content
var lfsMetaObject * models . LFSMetaObject
if filename2attribute2info [ treePath ] != nil && filename2attribute2info [ treePath ] [ "filter" ] == "lfs" {
if setting . LFS . StartServer && filename2attribute2info [ treePath ] != nil && filename2attribute2info [ treePath ] [ "filter" ] == "lfs" {
// OK so we are supposed to LFS this data!
oid , err := models . GenerateLFSOid ( strings . NewReader ( opts . Content ) )
if err != nil {