// Copyright 2015 The Gogs Authors. All rights reserved. // Copyright 2016 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT package gitcmd import ( "bytes" "context" "errors" "fmt" "io" "os" "os/exec" "path/filepath" "strings" "time" "code.gitea.io/gitea/modules/git/internal" //nolint:depguard // only this file can use the internal type CmdArg, other files and packages should use AddXxx functions "code.gitea.io/gitea/modules/gtprof" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/util" ) // TrustedCmdArgs returns the trusted arguments for git command. // It's mainly for passing user-provided and trusted arguments to git command // In most cases, it shouldn't be used. Use AddXxx function instead type TrustedCmdArgs []internal.CmdArg // defaultCommandExecutionTimeout default command execution timeout duration var defaultCommandExecutionTimeout = 360 * time.Second func SetDefaultCommandExecutionTimeout(timeout time.Duration) { defaultCommandExecutionTimeout = timeout } // DefaultLocale is the default LC_ALL to run git commands in. const DefaultLocale = "C" // Command represents a command with its subcommands or arguments. type Command struct { callerInfo string prog string args []string preErrors []error cmd *exec.Cmd // for debug purpose only configArgs []string opts runOpts cmdCtx context.Context cmdCancel context.CancelFunc cmdFinished context.CancelFunc cmdStartTime time.Time cmdStdinWriter *io.WriteCloser cmdStdoutReader *io.ReadCloser cmdStderrReader *io.ReadCloser cmdManagedStderr *bytes.Buffer } func logArgSanitize(arg string) string { if strings.Contains(arg, "://") && strings.Contains(arg, "@") { return util.SanitizeCredentialURLs(arg) } else if filepath.IsAbs(arg) { base := filepath.Base(arg) dir := filepath.Dir(arg) return ".../" + filepath.Join(filepath.Base(dir), base) } return arg } func (c *Command) LogString() string { // WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space), // It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here. debugQuote := func(s string) string { if strings.ContainsAny(s, " `'\"\t\r\n") { return fmt.Sprintf("%q", s) } return s } a := make([]string, 0, len(c.args)+1) a = append(a, debugQuote(c.prog)) for i := 0; i < len(c.args); i++ { a = append(a, debugQuote(logArgSanitize(c.args[i]))) } return strings.Join(a, " ") } func (c *Command) ProcessState() string { if c.cmd == nil { return "" } return c.cmd.ProcessState.String() } // NewCommand creates and returns a new Git Command based on given command and arguments. // Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead. func NewCommand(args ...internal.CmdArg) *Command { cargs := make([]string, 0, len(args)) for _, arg := range args { cargs = append(cargs, string(arg)) } return &Command{ prog: GitExecutable, args: cargs, } } func (c *Command) handlePreErrorBrokenCommand(arg string) { c.preErrors = append(c.preErrors, util.ErrorWrap(ErrBrokenCommand, `broken git command argument %q`, arg)) } // isSafeArgumentValue checks if the argument is safe to be used as a value (not an option) func isSafeArgumentValue(s string) bool { return s == "" || s[0] != '-' } // isValidArgumentOption checks if the argument is a valid option (starting with '-'). // It doesn't check whether the option is supported or not func isValidArgumentOption(s string) bool { return s != "" && s[0] == '-' } // AddArguments adds new git arguments (option/value) to the command. It only accepts string literals, or trusted CmdArg. // Type CmdArg is in the internal package, so it can not be used outside of this package directly, // it makes sure that user-provided arguments won't cause RCE risks. // User-provided arguments should be passed by other AddXxx functions func (c *Command) AddArguments(args ...internal.CmdArg) *Command { for _, arg := range args { c.args = append(c.args, string(arg)) } return c } // AddOptionValues adds a new option with a list of non-option values // For example: AddOptionValues("--opt", val) means 2 arguments: {"--opt", val}. // The values are treated as dynamic argument values. It equals to: AddArguments("--opt") then AddDynamicArguments(val). func (c *Command) AddOptionValues(opt internal.CmdArg, args ...string) *Command { if !isValidArgumentOption(string(opt)) { c.handlePreErrorBrokenCommand(string(opt)) return c } c.args = append(c.args, string(opt)) c.AddDynamicArguments(args...) return c } // AddOptionFormat adds a new option with a format string and arguments // For example: AddOptionFormat("--opt=%s %s", val1, val2) means 1 argument: {"--opt=val1 val2"}. func (c *Command) AddOptionFormat(opt string, args ...any) *Command { if !isValidArgumentOption(opt) { c.handlePreErrorBrokenCommand(opt) return c } // a quick check to make sure the format string matches the number of arguments, to find low-level mistakes ASAP if strings.Count(strings.ReplaceAll(opt, "%%", ""), "%") != len(args) { c.handlePreErrorBrokenCommand(opt) return c } s := fmt.Sprintf(opt, args...) c.args = append(c.args, s) return c } // AddDynamicArguments adds new dynamic argument values to the command. // The arguments may come from user input and can not be trusted, so no leading '-' is allowed to avoid passing options. // TODO: in the future, this function can be renamed to AddArgumentValues func (c *Command) AddDynamicArguments(args ...string) *Command { for _, arg := range args { if !isSafeArgumentValue(arg) { c.handlePreErrorBrokenCommand(arg) } } if len(c.preErrors) != 0 { return c } c.args = append(c.args, args...) return c } // AddDashesAndList adds the "--" and then add the list as arguments, it's usually for adding file list // At the moment, this function can be only called once, maybe in future it can be refactored to support multiple calls (if necessary) func (c *Command) AddDashesAndList(list ...string) *Command { c.args = append(c.args, "--") // Some old code also checks `arg != ""`, IMO it's not necessary. // If the check is needed, the list should be prepared before the call to this function c.args = append(c.args, list...) return c } func (c *Command) AddConfig(key, value string) *Command { kv := key + "=" + value if !isSafeArgumentValue(kv) { c.handlePreErrorBrokenCommand(kv) } else { c.configArgs = append(c.configArgs, "-c", kv) } return c } // ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs // In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead func ToTrustedCmdArgs(args []string) TrustedCmdArgs { ret := make(TrustedCmdArgs, len(args)) for i, arg := range args { ret[i] = internal.CmdArg(arg) } return ret } // runOpts represents parameters to run the command. If UseContextTimeout is specified, then Timeout is ignored. type runOpts struct { Env []string Timeout time.Duration UseContextTimeout bool // Dir is the working dir for the git command, however: // FIXME: this could be incorrect in many cases, for example: // * /some/path/.git // * /some/path/.git/gitea-data/data/repositories/user/repo.git // If "user/repo.git" is invalid/broken, then running git command in it will use "/some/path/.git", and produce unexpected results // The correct approach is to use `--git-dir" global argument Dir string Stdout io.Writer // Stdin is used for passing input to the command // The caller must make sure the Stdin writer is closed properly to finish the Run function. // Otherwise, the Run function may hang for long time or forever, especially when the Git's context deadline is not the same as the caller's. // Some common mistakes: // * `defer stdinWriter.Close()` then call `cmd.Run()`: the Run() would never return if the command is killed by timeout // * `go { case <- parentContext.Done(): stdinWriter.Close() }` with `cmd.Run(DefaultTimeout)`: the command would have been killed by timeout but the Run doesn't return until stdinWriter.Close() // * `go { if stdoutReader.Read() err != nil: stdinWriter.Close() }` with `cmd.Run()`: the stdoutReader may never return error if the command is killed by timeout // In the future, ideally the git module itself should have full control of the stdin, to avoid such problems and make it easier to refactor to a better architecture. // Use new functions like WithStdinWriter to avoid such problems. Stdin io.Reader PipelineFunc func(context.Context, context.CancelFunc) error } func commonBaseEnvs() []string { envs := []string{ // Make Gitea use internal git config only, to prevent conflicts with user's git config // It's better to use GIT_CONFIG_GLOBAL, but it requires git >= 2.32, so we still use HOME at the moment. "HOME=" + HomeDir(), // Avoid using system git config, it would cause problems (eg: use macOS osxkeychain to show a modal dialog, auto installing lfs hooks) // This might be a breaking change in 1.24, because some users said that they have put some configs like "receive.certNonceSeed" in "/etc/gitconfig" // For these users, they need to migrate the necessary configs to Gitea's git config file manually. "GIT_CONFIG_NOSYSTEM=1", // Ignore replace references (https://git-scm.com/docs/git-replace) "GIT_NO_REPLACE_OBJECTS=1", } // some environment variables should be passed to git command passThroughEnvKeys := []string{ "GNUPGHOME", // git may call gnupg to do commit signing } for _, key := range passThroughEnvKeys { if val, ok := os.LookupEnv(key); ok { envs = append(envs, key+"="+val) } } return envs } // CommonGitCmdEnvs returns the common environment variables for a "git" command. func CommonGitCmdEnvs() []string { return append(commonBaseEnvs(), []string{ "LC_ALL=" + DefaultLocale, "GIT_TERMINAL_PROMPT=0", // avoid prompting for credentials interactively, supported since git v2.3 }...) } // CommonCmdServEnvs is like CommonGitCmdEnvs, but it only returns minimal required environment variables for the "gitea serv" command func CommonCmdServEnvs() []string { return commonBaseEnvs() } var ErrBrokenCommand = errors.New("git command is broken") func (c *Command) WithDir(dir string) *Command { c.opts.Dir = dir return c } func (c *Command) WithEnv(env []string) *Command { c.opts.Env = env return c } func (c *Command) WithTimeout(timeout time.Duration) *Command { c.opts.Timeout = timeout return c } func (c *Command) WithStdoutReader(r *io.ReadCloser) *Command { c.cmdStdoutReader = r return c } // WithStdout is deprecated, use WithStdoutReader instead func (c *Command) WithStdout(stdout io.Writer) *Command { c.opts.Stdout = stdout return c } func (c *Command) WithStderrReader(r *io.ReadCloser) *Command { c.cmdStderrReader = r return c } func (c *Command) WithStdinWriter(w *io.WriteCloser) *Command { c.cmdStdinWriter = w return c } // WithStdin is deprecated, use WithStdinWriter instead func (c *Command) WithStdin(stdin io.Reader) *Command { c.opts.Stdin = stdin return c } func (c *Command) WithPipelineFunc(f func(context.Context, context.CancelFunc) error) *Command { c.opts.PipelineFunc = f return c } func (c *Command) WithUseContextTimeout(useContextTimeout bool) *Command { c.opts.UseContextTimeout = useContextTimeout return c } // WithParentCallerInfo can be used to set the caller info (usually function name) of the parent function of the caller. // For most cases, "Run" family functions can get its caller info automatically // But if you need to call "Run" family functions in a wrapper function: "FeatureFunc -> GeneralWrapperFunc -> RunXxx", // then you can to call this function in GeneralWrapperFunc to set the caller info of FeatureFunc. // The caller info can only be set once. func (c *Command) WithParentCallerInfo(optInfo ...string) *Command { if c.callerInfo != "" { return c } if len(optInfo) > 0 { c.callerInfo = optInfo[0] return c } skip := 1 /*parent "wrap/run" functions*/ + 1 /*this function*/ callerFuncName := util.CallerFuncName(skip) callerInfo := callerFuncName if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 { callerInfo = callerInfo[pos+1:] } c.callerInfo = callerInfo return c } func (c *Command) Start(ctx context.Context) (retErr error) { if c.cmd != nil { // this is a programming error, it will cause serious deadlock problems, so it must be fixed. panic("git command has already been started") } defer func() { if retErr != nil { // release the pipes to avoid resource leak safeClosePtrCloser(c.cmdStdoutReader) safeClosePtrCloser(c.cmdStderrReader) safeClosePtrCloser(c.cmdStdinWriter) // if error occurs, we must also finish the task, otherwise, cmdFinished will be called in "Wait" function if c.cmdFinished != nil { c.cmdFinished() } } }() if len(c.preErrors) != 0 { // In most cases, such error shouldn't happen. If it happens, it must be a programming error, so we log it as error level with more details err := errors.Join(c.preErrors...) log.Error("git command: %s, error: %s", c.LogString(), err) return err } // We must not change the provided options timeout := c.opts.Timeout if timeout <= 0 { timeout = defaultCommandExecutionTimeout } cmdLogString := c.LogString() if c.callerInfo == "" { c.WithParentCallerInfo() } // these logs are for debugging purposes only, so no guarantee of correctness or stability desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", c.callerInfo, logArgSanitize(c.opts.Dir), cmdLogString) log.Debug("git.Command: %s", desc) _, span := gtprof.GetTracer().Start(ctx, gtprof.TraceSpanGitRun) defer span.End() span.SetAttributeString(gtprof.TraceAttrFuncCaller, c.callerInfo) span.SetAttributeString(gtprof.TraceAttrGitCommand, cmdLogString) if c.opts.UseContextTimeout { c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContext(ctx, desc) } else { c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContextTimeout(ctx, timeout, desc) } c.cmdStartTime = time.Now() cmd := exec.CommandContext(ctx, c.prog, append(c.configArgs, c.args...)...) c.cmd = cmd // for debug purpose only if c.opts.Env == nil { cmd.Env = os.Environ() } else { cmd.Env = c.opts.Env } process.SetSysProcAttribute(cmd) cmd.Env = append(cmd.Env, CommonGitCmdEnvs()...) cmd.Dir = c.opts.Dir cmd.Stdout = c.opts.Stdout cmd.Stdin = c.opts.Stdin if _, err := safeAssignPipe(c.cmdStdinWriter, cmd.StdinPipe); err != nil { return err } if _, err := safeAssignPipe(c.cmdStdoutReader, cmd.StdoutPipe); err != nil { return err } if _, err := safeAssignPipe(c.cmdStderrReader, cmd.StderrPipe); err != nil { return err } if c.cmdManagedStderr != nil { if cmd.Stderr != nil { panic("CombineStderr needs managed (but not caller-provided) stderr pipe") } cmd.Stderr = c.cmdManagedStderr } return cmd.Start() } func (c *Command) Wait() error { defer func() { safeClosePtrCloser(c.cmdStdoutReader) safeClosePtrCloser(c.cmdStderrReader) safeClosePtrCloser(c.cmdStdinWriter) c.cmdFinished() }() cmd, ctx, cancel := c.cmd, c.cmdCtx, c.cmdCancel if c.opts.PipelineFunc != nil { err := c.opts.PipelineFunc(ctx, cancel) if err != nil { cancel() errWait := cmd.Wait() return errors.Join(err, errWait) } } errWait := cmd.Wait() elapsed := time.Since(c.cmdStartTime) if elapsed > time.Second { log.Debug("slow git.Command.Run: %s (%s)", c, elapsed) } errCause := context.Cause(c.cmdCtx) if errors.Is(errCause, context.Canceled) { // if the ctx is canceled without other error, it must be caused by normal cancellation return errCause } if errWait != nil { // no matter whether there is other cause error, if "Wait" also has error, // it's likely the error is caused by Wait error (from git command) return errWait } return errCause } func (c *Command) StartWithStderr(ctx context.Context) RunStdError { c.cmdManagedStderr = &bytes.Buffer{} err := c.Start(ctx) if err != nil { return &runStdError{err: err} } return nil } func (c *Command) WaitWithStderr() RunStdError { if c.cmdManagedStderr == nil { panic("CombineStderr needs managed (but not caller-provided) stderr pipe") } errWait := c.Wait() if errWait == nil { // if no exec error but only stderr output, the stderr output is still saved in "c.cmdManagedStderr" and can be read later return nil } return &runStdError{err: errWait, stderr: util.UnsafeBytesToString(c.cmdManagedStderr.Bytes())} } func (c *Command) RunWithStderr(ctx context.Context) RunStdError { if err := c.StartWithStderr(ctx); err != nil { return &runStdError{err: err} } return c.WaitWithStderr() } func (c *Command) Run(ctx context.Context) (err error) { if err = c.Start(ctx); err != nil { return err } return c.Wait() } type RunStdError interface { error Unwrap() error Stderr() string } type runStdError struct { err error // usually the low-level error like `*exec.ExitError` stderr string // git command's stderr output errMsg string // the cached error message for Error() method } func (r *runStdError) Error() string { // FIXME: GIT-CMD-STDERR: it is a bad design, the stderr should not be put in the error message // But a lot of code only checks `strings.Contains(err.Error(), "git error")` if r.errMsg == "" { r.errMsg = fmt.Sprintf("%s - %s", r.err.Error(), strings.TrimSpace(r.stderr)) } return r.errMsg } func (r *runStdError) Unwrap() error { return r.err } func (r *runStdError) Stderr() string { return r.stderr } func ErrorAsStderr(err error) (string, bool) { var runErr RunStdError if errors.As(err, &runErr) { return runErr.Stderr(), true } return "", false } func StderrHasPrefix(err error, prefix string) bool { stderr, ok := ErrorAsStderr(err) if !ok { return false } return strings.HasPrefix(stderr, prefix) } func IsErrorExitCode(err error, code int) bool { var exitError *exec.ExitError if errors.As(err, &exitError) { return exitError.ExitCode() == code } return false } // RunStdString runs the command and returns stdout/stderr as string. and store stderr to returned error (err combined with stderr). func (c *Command) RunStdString(ctx context.Context) (stdout, stderr string, runErr RunStdError) { stdoutBytes, stderrBytes, runErr := c.WithParentCallerInfo().runStdBytes(ctx) return util.UnsafeBytesToString(stdoutBytes), util.UnsafeBytesToString(stderrBytes), runErr } // RunStdBytes runs the command and returns stdout/stderr as bytes. and store stderr to returned error (err combined with stderr). func (c *Command) RunStdBytes(ctx context.Context) (stdout, stderr []byte, runErr RunStdError) { return c.WithParentCallerInfo().runStdBytes(ctx) } func (c *Command) runStdBytes(ctx context.Context) ([]byte, []byte, RunStdError) { if c.opts.Stdout != nil || c.cmdStdoutReader != nil || c.cmdStderrReader != nil { // we must panic here, otherwise there would be bugs if developers set Stdin/Stderr by mistake, and it would be very difficult to debug panic("stdout and stderr field must be nil when using RunStdBytes") } stdoutBuf := &bytes.Buffer{} err := c.WithParentCallerInfo(). WithStdout(stdoutBuf). RunWithStderr(ctx) return stdoutBuf.Bytes(), c.cmdManagedStderr.Bytes(), err } func (c *Command) DebugKill() { _ = c.cmd.Process.Kill() }