// Copyright 2019 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package git
import (
"bytes"
"context"
"fmt"
"io"
"os"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
)
// CheckAttributeOpts represents the possible options to CheckAttribute
type CheckAttributeOpts struct {
CachedOnly bool
AllAttributes bool
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2 years ago
Attributes [ ] string
Filenames [ ] string
IndexFile string
WorkTree string
}
// CheckAttribute return the Blame object of file
func ( repo * Repository ) CheckAttribute ( opts CheckAttributeOpts ) ( map [ string ] map [ string ] string , error ) {
env := [ ] string { }
if len ( opts . IndexFile ) > 0 {
env = append ( env , "GIT_INDEX_FILE=" + opts . IndexFile )
}
if len ( opts . WorkTree ) > 0 {
env = append ( env , "GIT_WORK_TREE=" + opts . WorkTree )
}
if len ( env ) > 0 {
env = append ( os . Environ ( ) , env ... )
}
stdOut := new ( bytes . Buffer )
stdErr := new ( bytes . Buffer )
cmd := NewCommand ( repo . Ctx , "check-attr" , "-z" )
if opts . AllAttributes {
cmd . AddArguments ( "-a" )
} else {
for _ , attribute := range opts . Attributes {
if attribute != "" {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2 years ago
cmd . AddDynamicArguments ( attribute )
}
}
}
if opts . CachedOnly {
cmd . AddArguments ( "--cached" )
}
cmd . AddDashesAndList ( opts . Filenames ... )
if err := cmd . Run ( & RunOpts {
Env : env ,
Dir : repo . Path ,
Stdout : stdOut ,
Stderr : stdErr ,
} ) ; err != nil {
return nil , fmt . Errorf ( "failed to run check-attr: %w\n%s\n%s" , err , stdOut . String ( ) , stdErr . String ( ) )
}
// FIXME: This is incorrect on versions < 1.8.5
fields := bytes . Split ( stdOut . Bytes ( ) , [ ] byte { '\000' } )
if len ( fields ) % 3 != 1 {
return nil , fmt . Errorf ( "wrong number of fields in return from check-attr" )
}
name2attribute2info := make ( map [ string ] map [ string ] string )
for i := 0 ; i < ( len ( fields ) / 3 ) ; i ++ {
filename := string ( fields [ 3 * i ] )
attribute := string ( fields [ 3 * i + 1 ] )
info := string ( fields [ 3 * i + 2 ] )
attribute2info := name2attribute2info [ filename ]
if attribute2info == nil {
attribute2info = make ( map [ string ] string )
}
attribute2info [ attribute ] = info
name2attribute2info [ filename ] = attribute2info
}
return name2attribute2info , nil
}
// CheckAttributeReader provides a reader for check-attribute content that can be long running
type CheckAttributeReader struct {
// params
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2 years ago
Attributes [ ] string
Repo * Repository
IndexFile string
WorkTree string
stdinReader io . ReadCloser
stdinWriter * os . File
stdOut attributeWriter
cmd * Command
env [ ] string
ctx context . Context
cancel context . CancelFunc
}
// Init initializes the CheckAttributeReader
func ( c * CheckAttributeReader ) Init ( ctx context . Context ) error {
if len ( c . Attributes ) == 0 {
lw := new ( nulSeparatedAttributeWriter )
lw . attributes = make ( chan attributeTriple )
lw . closed = make ( chan struct { } )
c . stdOut = lw
c . stdOut . Close ( )
return fmt . Errorf ( "no provided Attributes to check" )
}
c . ctx , c . cancel = context . WithCancel ( ctx )
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2 years ago
c . cmd = NewCommand ( c . ctx , "check-attr" , "--stdin" , "-z" )
if len ( c . IndexFile ) > 0 {
c . cmd . AddArguments ( "--cached" )
c . env = append ( c . env , "GIT_INDEX_FILE=" + c . IndexFile )
}
if len ( c . WorkTree ) > 0 {
c . env = append ( c . env , "GIT_WORK_TREE=" + c . WorkTree )
}
c . env = append ( c . env , "GIT_FLUSH=1" )
c . cmd . AddDynamicArguments ( c . Attributes ... )
var err error
c . stdinReader , c . stdinWriter , err = os . Pipe ( )
if err != nil {
c . cancel ( )
return err
}
lw := new ( nulSeparatedAttributeWriter )
lw . attributes = make ( chan attributeTriple , 5 )
lw . closed = make ( chan struct { } )
c . stdOut = lw
return nil
}
// Run run cmd
func ( c * CheckAttributeReader ) Run ( ) error {
defer func ( ) {
_ = c . stdinReader . Close ( )
_ = c . stdOut . Close ( )
} ( )
stdErr := new ( bytes . Buffer )
err := c . cmd . Run ( & RunOpts {
Env : c . env ,
Dir : c . Repo . Path ,
Stdin : c . stdinReader ,
Stdout : c . stdOut ,
Stderr : stdErr ,
} )
if err != nil && // If there is an error we need to return but:
c . ctx . Err ( ) != err && // 1. Ignore the context error if the context is cancelled or exceeds the deadline (RunWithContext could return c.ctx.Err() which is Canceled or DeadlineExceeded)
err . Error ( ) != "signal: killed" { // 2. We should not pass up errors due to the program being killed
return fmt . Errorf ( "failed to run attr-check. Error: %w\nStderr: %s" , err , stdErr . String ( ) )
}
return nil
}
// CheckPath check attr for given path
func ( c * CheckAttributeReader ) CheckPath ( path string ) ( rs map [ string ] string , err error ) {
defer func ( ) {
if err != nil && err != c . ctx . Err ( ) {
log . Error ( "Unexpected error when checking path %s in %s. Error: %v" , path , c . Repo . Path , err )
}
} ( )
select {
case <- c . ctx . Done ( ) :
return nil , c . ctx . Err ( )
default :
}
if _ , err = c . stdinWriter . Write ( [ ] byte ( path + "\x00" ) ) ; err != nil {
defer c . Close ( )
return nil , err
}
rs = make ( map [ string ] string )
for range c . Attributes {
select {
case attr , ok := <- c . stdOut . ReadAttribute ( ) :
if ! ok {
return nil , c . ctx . Err ( )
}
rs [ attr . Attribute ] = attr . Value
case <- c . ctx . Done ( ) :
return nil , c . ctx . Err ( )
}
}
return rs , nil
}
// Close close pip after use
func ( c * CheckAttributeReader ) Close ( ) error {
c . cancel ( )
err := c . stdinWriter . Close ( )
return err
}
type attributeWriter interface {
io . WriteCloser
ReadAttribute ( ) <- chan attributeTriple
}
type attributeTriple struct {
Filename string
Attribute string
Value string
}
type nulSeparatedAttributeWriter struct {
tmp [ ] byte
attributes chan attributeTriple
closed chan struct { }
working attributeTriple
pos int
}
func ( wr * nulSeparatedAttributeWriter ) Write ( p [ ] byte ) ( n int , err error ) {
l , read := len ( p ) , 0
nulIdx := bytes . IndexByte ( p , '\x00' )
for nulIdx >= 0 {
wr . tmp = append ( wr . tmp , p [ : nulIdx ] ... )
switch wr . pos {
case 0 :
wr . working = attributeTriple {
Filename : string ( wr . tmp ) ,
}
case 1 :
wr . working . Attribute = string ( wr . tmp )
case 2 :
wr . working . Value = string ( wr . tmp )
}
wr . tmp = wr . tmp [ : 0 ]
wr . pos ++
if wr . pos > 2 {
wr . attributes <- wr . working
wr . pos = 0
}
read += nulIdx + 1
if l > read {
p = p [ nulIdx + 1 : ]
nulIdx = bytes . IndexByte ( p , '\x00' )
} else {
return l , nil
}
}
wr . tmp = append ( wr . tmp , p ... )
return len ( p ) , nil
}
func ( wr * nulSeparatedAttributeWriter ) ReadAttribute ( ) <- chan attributeTriple {
return wr . attributes
}
func ( wr * nulSeparatedAttributeWriter ) Close ( ) error {
select {
case <- wr . closed :
return nil
default :
}
close ( wr . attributes )
close ( wr . closed )
return nil
}
// Create a check attribute reader for the current repository and provided commit ID
func ( repo * Repository ) CheckAttributeReader ( commitID string ) ( * CheckAttributeReader , context . CancelFunc ) {
indexFilename , worktree , deleteTemporaryFile , err := repo . ReadTreeToTemporaryIndex ( commitID )
if err != nil {
return nil , func ( ) { }
}
checker := & CheckAttributeReader {
Attributes : [ ] string { "linguist-vendored" , "linguist-generated" , "linguist-language" , "gitlab-language" , "linguist-documentation" , "linguist-detectable" } ,
Repo : repo ,
IndexFile : indexFilename ,
WorkTree : worktree ,
}
ctx , cancel := context . WithCancel ( repo . Ctx )
if err := checker . Init ( ctx ) ; err != nil {
log . Error ( "Unable to open checker for %s. Error: %v" , commitID , err )
} else {
go func ( ) {
err := checker . Run ( )
if err != nil && err != ctx . Err ( ) {
log . Error ( "Unable to open checker for %s. Error: %v" , commitID , err )
}
cancel ( )
} ( )
}
deferable := func ( ) {
_ = checker . Close ( )
cancel ( )
deleteTemporaryFile ( )
}
return checker , deferable
}
// true if "set"/"true", false if "unset"/"false", none otherwise
func attributeToBool ( attr map [ string ] string , name string ) optional . Option [ bool ] {
if value , has := attr [ name ] ; has && value != "unspecified" {
switch value {
case "set" , "true" :
return optional . Some ( true )
case "unset" , "false" :
return optional . Some ( false )
}
}
return optional . None [ bool ] ( )
}
func attributeToString ( attr map [ string ] string , name string ) optional . Option [ string ] {
if value , has := attr [ name ] ; has && value != "unspecified" {
return optional . Some ( value )
}
return optional . None [ string ] ( )
}