Files
gitea/modules/git/gitcmd/command.go
Lunny Xiao 1463426a27 Use merge tree to detect conflicts when possible (#36400)
In Git 2.38, the `merge-tree` command introduced the `--write-tree`
option, which works directly on bare repositories. In Git 2.40, a new parameter `--merge-base` introduced so we require Git 2.40 to use the merge tree feature.

This option produces the merged tree object ID, allowing us to perform
diffs between commits without creating a temporary repository. By
avoiding the overhead of setting up and tearing down temporary repos,
this approach delivers a notable performance improvement.

It also fixes a possible situation that conflict files might be empty
but it's a conflict status according to
https://git-scm.com/docs/git-merge-tree#_mistakes_to_avoid

Replace #35542

---------

Signed-off-by: Lunny Xiao <xiaolunwen@gmail.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
2026-01-27 11:57:20 -08:00

577 lines
20 KiB
Go

// Copyright 2015 The Gogs Authors. All rights reserved.
// Copyright 2016 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package gitcmd
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"code.gitea.io/gitea/modules/git/internal" //nolint:depguard // only this file can use the internal type CmdArg, other files and packages should use AddXxx functions
"code.gitea.io/gitea/modules/gtprof"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/util"
)
// TrustedCmdArgs returns the trusted arguments for git command.
// It's mainly for passing user-provided and trusted arguments to git command
// In most cases, it shouldn't be used. Use AddXxx function instead
type TrustedCmdArgs []internal.CmdArg
// Command represents a command with its subcommands or arguments.
type Command struct {
callerInfo string
prog string
args []string
preErrors []error
configArgs []string
opts runOpts
cmd *exec.Cmd
cmdCtx context.Context
cmdCancel process.CancelCauseFunc
cmdFinished process.FinishedFunc
cmdStartTime time.Time
parentPipeFiles []*os.File
parentPipeReaders []*os.File
childrenPipeFiles []*os.File
// only os.Pipe and in-memory buffers can work with Stdin safely, see https://github.com/golang/go/issues/77227 if the command would exit unexpectedly
cmdStdin io.Reader
cmdStdout io.Writer
cmdStderr io.Writer
cmdManagedStderr *bytes.Buffer
}
func logArgSanitize(arg string) string {
if strings.Contains(arg, "://") && strings.Contains(arg, "@") {
return util.SanitizeCredentialURLs(arg)
} else if filepath.IsAbs(arg) {
base := filepath.Base(arg)
dir := filepath.Dir(arg)
return ".../" + filepath.Join(filepath.Base(dir), base)
}
return arg
}
func (c *Command) LogString() string {
// WARNING: this function is for debugging purposes only. It's much better than old code (which only joins args with space),
// It's impossible to make a simple and 100% correct implementation of argument quoting for different platforms here.
debugQuote := func(s string) string {
if strings.ContainsAny(s, " `'\"\t\r\n") {
return fmt.Sprintf("%q", s)
}
return s
}
a := make([]string, 0, len(c.args)+1)
a = append(a, debugQuote(c.prog))
for i := 0; i < len(c.args); i++ {
a = append(a, debugQuote(logArgSanitize(c.args[i])))
}
return strings.Join(a, " ")
}
func (c *Command) ProcessState() string {
if c.cmd == nil {
return ""
}
return c.cmd.ProcessState.String()
}
// NewCommand creates and returns a new Git Command based on given command and arguments.
// Each argument should be safe to be trusted. User-provided arguments should be passed to AddDynamicArguments instead.
func NewCommand(args ...internal.CmdArg) *Command {
cargs := make([]string, 0, len(args))
for _, arg := range args {
cargs = append(cargs, string(arg))
}
return &Command{
prog: GitExecutable,
args: cargs,
}
}
func (c *Command) handlePreErrorBrokenCommand(arg string) {
c.preErrors = append(c.preErrors, util.ErrorWrap(ErrBrokenCommand, `broken git command argument %q`, arg))
}
// isSafeArgumentValue checks if the argument is safe to be used as a value (not an option)
func isSafeArgumentValue(s string) bool {
return s == "" || s[0] != '-'
}
// isValidArgumentOption checks if the argument is a valid option (starting with '-').
// It doesn't check whether the option is supported or not
func isValidArgumentOption(s string) bool {
return s != "" && s[0] == '-'
}
// AddArguments adds new git arguments (option/value) to the command. It only accepts string literals, or trusted CmdArg.
// Type CmdArg is in the internal package, so it can not be used outside of this package directly,
// it makes sure that user-provided arguments won't cause RCE risks.
// User-provided arguments should be passed by other AddXxx functions
func (c *Command) AddArguments(args ...internal.CmdArg) *Command {
for _, arg := range args {
c.args = append(c.args, string(arg))
}
return c
}
// AddOptionValues adds a new option with a list of non-option values
// For example: AddOptionValues("--opt", val) means 2 arguments: {"--opt", val}.
// The values are treated as dynamic argument values. It equals to: AddArguments("--opt") then AddDynamicArguments(val).
func (c *Command) AddOptionValues(opt internal.CmdArg, args ...string) *Command {
if !isValidArgumentOption(string(opt)) {
c.handlePreErrorBrokenCommand(string(opt))
return c
}
c.args = append(c.args, string(opt))
c.AddDynamicArguments(args...)
return c
}
// AddOptionFormat adds a new option with a format string and arguments
// For example: AddOptionFormat("--opt=%s %s", val1, val2) means 1 argument: {"--opt=val1 val2"}.
func (c *Command) AddOptionFormat(opt string, args ...any) *Command {
if !isValidArgumentOption(opt) {
c.handlePreErrorBrokenCommand(opt)
return c
}
// a quick check to make sure the format string matches the number of arguments, to find low-level mistakes ASAP
if strings.Count(strings.ReplaceAll(opt, "%%", ""), "%") != len(args) {
c.handlePreErrorBrokenCommand(opt)
return c
}
s := fmt.Sprintf(opt, args...)
c.args = append(c.args, s)
return c
}
// AddDynamicArguments adds new dynamic argument values to the command.
// The arguments may come from user input and can not be trusted, so no leading '-' is allowed to avoid passing options.
// TODO: in the future, this function can be renamed to AddArgumentValues
func (c *Command) AddDynamicArguments(args ...string) *Command {
for _, arg := range args {
if !isSafeArgumentValue(arg) {
c.handlePreErrorBrokenCommand(arg)
}
}
if len(c.preErrors) != 0 {
return c
}
c.args = append(c.args, args...)
return c
}
// AddDashesAndList adds the "--" and then add the list as arguments, it's usually for adding file list
// At the moment, this function can be only called once, maybe in future it can be refactored to support multiple calls (if necessary)
func (c *Command) AddDashesAndList(list ...string) *Command {
c.args = append(c.args, "--")
// Some old code also checks `arg != ""`, IMO it's not necessary.
// If the check is needed, the list should be prepared before the call to this function
c.args = append(c.args, list...)
return c
}
func (c *Command) AddConfig(key, value string) *Command {
kv := key + "=" + value
if !isSafeArgumentValue(kv) {
c.handlePreErrorBrokenCommand(kv)
} else {
c.configArgs = append(c.configArgs, "-c", kv)
}
return c
}
// ToTrustedCmdArgs converts a list of strings (trusted as argument) to TrustedCmdArgs
// In most cases, it shouldn't be used. Use NewCommand().AddXxx() function instead
func ToTrustedCmdArgs(args []string) TrustedCmdArgs {
ret := make(TrustedCmdArgs, len(args))
for i, arg := range args {
ret[i] = internal.CmdArg(arg)
}
return ret
}
type runOpts struct {
Env []string
Timeout time.Duration
// Dir is the working dir for the git command, however:
// FIXME: this could be incorrect in many cases, for example:
// * /some/path/.git
// * /some/path/.git/gitea-data/data/repositories/user/repo.git
// If "user/repo.git" is invalid/broken, then running git command in it will use "/some/path/.git", and produce unexpected results
// The correct approach is to use `--git-dir" global argument
Dir string
PipelineFunc func(Context) error
}
func commonBaseEnvs() []string {
envs := []string{
// Make Gitea use internal git config only, to prevent conflicts with user's git config
// It's better to use GIT_CONFIG_GLOBAL, but it requires git >= 2.32, so we still use HOME at the moment.
"HOME=" + HomeDir(),
// Avoid using system git config, it would cause problems (eg: use macOS osxkeychain to show a modal dialog, auto installing lfs hooks)
// This might be a breaking change in 1.24, because some users said that they have put some configs like "receive.certNonceSeed" in "/etc/gitconfig"
// For these users, they need to migrate the necessary configs to Gitea's git config file manually.
"GIT_CONFIG_NOSYSTEM=1",
// Ignore replace references (https://git-scm.com/docs/git-replace)
"GIT_NO_REPLACE_OBJECTS=1",
}
// some environment variables should be passed to git command
passThroughEnvKeys := []string{
"GNUPGHOME", // git may call gnupg to do commit signing
}
for _, key := range passThroughEnvKeys {
if val, ok := os.LookupEnv(key); ok {
envs = append(envs, key+"="+val)
}
}
return envs
}
// CommonGitCmdEnvs returns the common environment variables for a "git" command.
func CommonGitCmdEnvs() []string {
return append(commonBaseEnvs(), []string{
"LC_ALL=C", // ensure git output is in English, error messages are parsed in English
"GIT_TERMINAL_PROMPT=0", // avoid prompting for credentials interactively, supported since git v2.3
}...)
}
// CommonCmdServEnvs is like CommonGitCmdEnvs, but it only returns minimal required environment variables for the "gitea serv" command
func CommonCmdServEnvs() []string {
return commonBaseEnvs()
}
var ErrBrokenCommand = errors.New("git command is broken")
func (c *Command) WithDir(dir string) *Command {
c.opts.Dir = dir
return c
}
func (c *Command) WithEnv(env []string) *Command {
c.opts.Env = env
return c
}
func (c *Command) WithTimeout(timeout time.Duration) *Command {
c.opts.Timeout = timeout
return c
}
func (c *Command) makeStdoutStderr(w *io.Writer) (PipeReader, func()) {
pr, pw, err := os.Pipe()
if err != nil {
c.preErrors = append(c.preErrors, err)
return &pipeNull{err}, func() {}
}
c.childrenPipeFiles = append(c.childrenPipeFiles, pw)
c.parentPipeFiles = append(c.parentPipeFiles, pr)
c.parentPipeReaders = append(c.parentPipeReaders, pr)
*w /* stdout, stderr */ = pw
return &pipeReader{f: pr}, func() { pr.Close() }
}
// MakeStdinPipe creates a writer for the command's stdin.
// The returned closer function must be called by the caller to close the pipe.
func (c *Command) MakeStdinPipe() (writer PipeWriter, closer func()) {
pr, pw, err := os.Pipe()
if err != nil {
c.preErrors = append(c.preErrors, err)
return &pipeNull{err}, func() {}
}
c.childrenPipeFiles = append(c.childrenPipeFiles, pr)
c.parentPipeFiles = append(c.parentPipeFiles, pw)
c.cmdStdin = pr
return &pipeWriter{pw}, func() { pw.Close() }
}
// MakeStdoutPipe creates a reader for the command's stdout.
// The returned closer function must be called by the caller to close the pipe.
// After the pipe reader is closed, the unread data will be discarded.
func (c *Command) MakeStdoutPipe() (reader PipeReader, closer func()) {
return c.makeStdoutStderr(&c.cmdStdout)
}
// MakeStderrPipe is like MakeStdoutPipe, but for stderr.
func (c *Command) MakeStderrPipe() (reader PipeReader, closer func()) {
return c.makeStdoutStderr(&c.cmdStderr)
}
func (c *Command) MakeStdinStdoutPipe() (stdin PipeWriter, stdout PipeReader, closer func()) {
stdin, stdinClose := c.MakeStdinPipe()
stdout, stdoutClose := c.MakeStdoutPipe()
return stdin, stdout, func() {
stdinClose()
stdoutClose()
}
}
func (c *Command) WithStdinBytes(stdin []byte) *Command {
c.cmdStdin = bytes.NewReader(stdin)
return c
}
func (c *Command) WithStdoutBuffer(w PipeBufferWriter) *Command {
c.cmdStdout = w
return c
}
// WithStdinCopy and WithStdoutCopy are general functions that accept any io.Reader / io.Writer.
// In this case, Golang exec.Cmd will start new internal goroutines to do io.Copy between pipes and provided Reader/Writer.
// If the reader or writer is blocked and never returns, then the io.Copy won't finish, then exec.Cmd.Wait won't return, which may cause deadlocks.
// A typical deadlock example is:
// * `r,w:=io.Pipe(); cmd.Stdin=r; defer w.Close(); cmd.Run()`: the Run() will never return because stdin reader is blocked forever and w.Close() will never be called.
// If the reader/writer won't block forever (for example: read from a file or buffer), then these functions are safe to use.
func (c *Command) WithStdinCopy(w io.Reader) *Command {
c.cmdStdin = w
return c
}
func (c *Command) WithStdoutCopy(w io.Writer) *Command {
c.cmdStdout = w
return c
}
// WithPipelineFunc sets the pipeline function for the command.
// The pipeline function will be called in the Run / Wait function after the command is started successfully.
// The function can read/write from/to the command's stdio pipes (if any).
// The pipeline function can cancel (kill) the command by calling ctx.CancelPipeline before the command finishes.
// The returned error of Run / Wait can be joined errors from the pipeline function, context cause, and command exit error.
// Caller can get the pipeline function's error (if any) by UnwrapPipelineError.
func (c *Command) WithPipelineFunc(f func(ctx Context) error) *Command {
c.opts.PipelineFunc = f
return c
}
// WithParentCallerInfo can be used to set the caller info (usually function name) of the parent function of the caller.
// For most cases, "Run" family functions can get its caller info automatically
// But if you need to call "Run" family functions in a wrapper function: "FeatureFunc -> GeneralWrapperFunc -> RunXxx",
// then you can to call this function in GeneralWrapperFunc to set the caller info of FeatureFunc.
// The caller info can only be set once.
func (c *Command) WithParentCallerInfo(optInfo ...string) *Command {
if c.callerInfo != "" {
return c
}
if len(optInfo) > 0 {
c.callerInfo = optInfo[0]
return c
}
skip := 1 /*parent "wrap/run" functions*/ + 1 /*this function*/
callerFuncName := util.CallerFuncName(skip)
callerInfo := callerFuncName
if pos := strings.LastIndex(callerInfo, "/"); pos >= 0 {
callerInfo = callerInfo[pos+1:]
}
c.callerInfo = callerInfo
return c
}
func (c *Command) Start(ctx context.Context) (retErr error) {
if c.cmd != nil {
// this is a programming error, it will cause serious deadlock problems, so it must be fixed.
panic("git command has already been started")
}
defer func() {
c.closePipeFiles(c.childrenPipeFiles)
if retErr != nil {
// release the pipes to avoid resource leak since the command failed to start
c.closePipeFiles(c.parentPipeFiles)
// if error occurs, we must also finish the task, otherwise, cmdFinished will be called in "Wait" function
if c.cmdFinished != nil {
c.cmdFinished()
}
}
}()
if len(c.preErrors) != 0 {
// In most cases, such error shouldn't happen. If it happens, log it as error level with more details
err := errors.Join(c.preErrors...)
log.Error("git command: %s, error: %s", c.LogString(), err)
return err
}
cmdLogString := c.LogString()
if c.callerInfo == "" {
c.WithParentCallerInfo()
}
// these logs are for debugging purposes only, so no guarantee of correctness or stability
desc := fmt.Sprintf("git.Run(by:%s, repo:%s): %s", c.callerInfo, logArgSanitize(c.opts.Dir), cmdLogString)
log.Debug("git.Command: %s", desc)
_, span := gtprof.GetTracer().Start(ctx, gtprof.TraceSpanGitRun)
defer span.End()
span.SetAttributeString(gtprof.TraceAttrFuncCaller, c.callerInfo)
span.SetAttributeString(gtprof.TraceAttrGitCommand, cmdLogString)
if c.opts.Timeout <= 0 {
c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContext(ctx, desc)
} else {
c.cmdCtx, c.cmdCancel, c.cmdFinished = process.GetManager().AddContextTimeout(ctx, c.opts.Timeout, desc)
}
c.cmdStartTime = time.Now()
c.cmd = exec.CommandContext(c.cmdCtx, c.prog, append(c.configArgs, c.args...)...)
if c.opts.Env == nil {
c.cmd.Env = os.Environ()
} else {
c.cmd.Env = c.opts.Env
}
process.SetSysProcAttribute(c.cmd)
c.cmd.Env = append(c.cmd.Env, CommonGitCmdEnvs()...)
c.cmd.Dir = c.opts.Dir
c.cmd.Stdout = c.cmdStdout
c.cmd.Stdin = c.cmdStdin
c.cmd.Stderr = c.cmdStderr
return c.cmd.Start()
}
func (c *Command) closePipeFiles(files []*os.File) {
for _, f := range files {
_ = f.Close()
}
}
func (c *Command) discardPipeReaders(files []*os.File) {
for _, f := range files {
_, _ = io.Copy(io.Discard, f)
}
}
func (c *Command) Wait() error {
defer func() {
// The reader in another goroutine might be still reading the stdout, so we shouldn't close the pipes here
// MakeStdoutPipe returns a closer function to force callers to close the pipe correctly
// Here we only need to mark the command as finished
c.cmdFinished()
}()
if c.opts.PipelineFunc != nil {
errPipeline := c.opts.PipelineFunc(&cmdContext{Context: c.cmdCtx, cmd: c})
if context.Cause(c.cmdCtx) == nil {
// if the context is not canceled explicitly, we need to discard the unread data,
// and wait for the command to exit normally, and then get its exit code
c.discardPipeReaders(c.parentPipeReaders)
} // else: canceled command will be killed, and the exit code is caused by kill
// after the pipeline function returns, we can safely close the pipes, then wait for the command to exit
c.closePipeFiles(c.parentPipeFiles)
errWait := c.cmd.Wait()
errCause := context.Cause(c.cmdCtx) // in case the cause is set during Wait(), get the final cancel cause
if unwrapped, ok := UnwrapPipelineError(errCause); ok {
if unwrapped != errPipeline {
panic("unwrapped context pipeline error should be the same one returned by pipeline function")
}
if unwrapped == nil {
// the pipeline function declares that there is no error, and it cancels (kills) the command ahead,
// so we should ignore the errors from "wait" and "cause"
errWait, errCause = nil, nil
}
}
// some legacy code still need to access the error returned by pipeline function by "==" but not "errors.Is"
// so we need to make sure the original error is able to be unwrapped by UnwrapPipelineError
return errors.Join(wrapPipelineError(errPipeline), errCause, errWait)
}
// there might be other goroutines using the context or pipes, so we just wait for the command to finish
errWait := c.cmd.Wait()
elapsed := time.Since(c.cmdStartTime)
if elapsed > time.Second {
log.Debug("slow git.Command.Run: %s (%s)", c, elapsed) // TODO: no need to log this for long-running commands
}
// Here the logic is different from "PipelineFunc" case,
// because PipelineFunc can return error if it fails, it knows whether it succeeds or fails.
// But in normal case, the caller just runs the git command, the command's exit code is the source of truth.
// If the caller need to know whether the command error is caused by cancellation, it should check the "err" by itself.
errCause := context.Cause(c.cmdCtx)
return errors.Join(errCause, errWait)
}
func (c *Command) StartWithStderr(ctx context.Context) RunStdError {
if c.cmdStderr != nil {
panic("caller-provided stderr receiver doesn't work with managed stderr buffer")
}
c.cmdManagedStderr = &bytes.Buffer{}
c.cmdStderr = c.cmdManagedStderr
err := c.Start(ctx)
if err != nil {
return &runStdError{err: err}
}
return nil
}
func (c *Command) WaitWithStderr() RunStdError {
if c.cmdManagedStderr == nil {
panic("managed stderr buffer is not initialized")
}
errWait := c.Wait()
if errWait == nil {
// if no exec error but only stderr output, the stderr output is still saved in "c.cmdManagedStderr" and can be read later
return nil
}
return &runStdError{err: errWait, stderr: util.UnsafeBytesToString(c.cmdManagedStderr.Bytes())}
}
func (c *Command) RunWithStderr(ctx context.Context) RunStdError {
if err := c.StartWithStderr(ctx); err != nil {
return &runStdError{err: err}
}
return c.WaitWithStderr()
}
func (c *Command) Run(ctx context.Context) (err error) {
if err = c.Start(ctx); err != nil {
return err
}
return c.Wait()
}
// RunStdString runs the command and returns stdout/stderr as string. and store stderr to returned error (err combined with stderr).
func (c *Command) RunStdString(ctx context.Context) (stdout, stderr string, runErr RunStdError) {
stdoutBytes, stderrBytes, runErr := c.WithParentCallerInfo().runStdBytes(ctx)
return util.UnsafeBytesToString(stdoutBytes), util.UnsafeBytesToString(stderrBytes), runErr
}
// RunStdBytes runs the command and returns stdout/stderr as bytes. and store stderr to returned error (err combined with stderr).
func (c *Command) RunStdBytes(ctx context.Context) (stdout, stderr []byte, runErr RunStdError) {
return c.WithParentCallerInfo().runStdBytes(ctx)
}
func (c *Command) runStdBytes(ctx context.Context) ([]byte, []byte, RunStdError) {
if c.cmdStdout != nil || c.cmdStderr != nil {
// it must panic here, otherwise there would be bugs if developers set other Stdin/Stderr by mistake, and it would be very difficult to debug
panic("stdout and stderr field must be nil when using RunStdBytes")
}
stdoutBuf := &bytes.Buffer{}
err := c.WithParentCallerInfo().WithStdoutBuffer(stdoutBuf).RunWithStderr(ctx)
return stdoutBuf.Bytes(), c.cmdManagedStderr.Bytes(), err
}
func (c *Command) DebugKill() {
_ = c.cmd.Process.Kill()
}