From f1e85622da0492fa01d8449eb938197cd4fa65bf Mon Sep 17 00:00:00 2001 From: zeripath Date: Sun, 19 Dec 2021 04:19:25 +0000 Subject: [PATCH] Improve TestPatch to use git read-tree -m and implement git-merge-one-file functionality (#18004) The current TestPatch conflict code uses a plain git apply which does not properly account for 3-way merging. However, we can improve things using `git read-tree -m` to do a three-way merge then follow the algorithm used in merge-one-file. We can also use `--patience` and/or `--histogram` to generate a nicer diff for applying patches too. Fix #13679 Fix #6417 Signed-off-by: Andrew Thornton --- modules/git/repo_compare.go | 6 +- services/pull/patch.go | 197 +++++++++++++++++++++++++++++++- services/pull/patch_unmerged.go | 180 +++++++++++++++++++++++++++++ 3 files changed, 377 insertions(+), 6 deletions(-) create mode 100644 services/pull/patch_unmerged.go diff --git a/modules/git/repo_compare.go b/modules/git/repo_compare.go index 4342eb4b2fe..5fe37aed7b6 100644 --- a/modules/git/repo_compare.go +++ b/modules/git/repo_compare.go @@ -237,7 +237,11 @@ func (repo *Repository) GetDiff(base, head string, w io.Writer) error { // GetDiffBinary generates and returns patch data between given revisions, including binary diffs. func (repo *Repository) GetDiffBinary(base, head string, w io.Writer) error { - return NewCommandContext(repo.Ctx, "diff", "-p", "--binary", base, head). + if CheckGitVersionAtLeast("1.7.7") == nil { + return NewCommandContext(repo.Ctx, "diff", "-p", "--binary", "--histogram", base, head). + RunInDirPipeline(repo.Path, w, nil) + } + return NewCommandContext(repo.Ctx, "diff", "-p", "--binary", "--patience", base, head). RunInDirPipeline(repo.Path, w, nil) } diff --git a/services/pull/patch.go b/services/pull/patch.go index b971eb4bf30..4bc875f6304 100644 --- a/services/pull/patch.go +++ b/services/pull/patch.go @@ -11,12 +11,15 @@ import ( "fmt" "io" "os" + "path/filepath" "strings" "code.gitea.io/gitea/models" "code.gitea.io/gitea/models/unit" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/util" "github.com/gobwas/glob" @@ -98,12 +101,193 @@ func TestPatch(pr *models.PullRequest) error { return nil } +type errMergeConflict struct { + filename string +} + +func (e *errMergeConflict) Error() string { + return fmt.Sprintf("conflict detected at: %s", e.filename) +} + +func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error { + switch { + case file.stage1 != nil && (file.stage2 == nil || file.stage3 == nil): + // 1. Deleted in one or both: + // + // Conflict <==> the stage1 !SameAs to the undeleted one + if (file.stage2 != nil && !file.stage1.SameAs(file.stage2)) || (file.stage3 != nil && !file.stage1.SameAs(file.stage3)) { + // Conflict! + return &errMergeConflict{file.stage1.path} + } + + // Not a genuine conflict and we can simply remove the file from the index + return gitRepo.RemoveFilesFromIndex(file.stage1.path) + case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)): + // 2. Added in ours but not in theirs or identical in both + // + // Not a genuine conflict just add to the index + if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil { + return err + } + return nil + case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode: + // 3. Added in both with the same sha but the modes are different + // + // Conflict! (Not sure that this can actually happen but we should handle) + return &errMergeConflict{file.stage2.path} + case file.stage1 == nil && file.stage2 == nil && file.stage3 != nil: + // 4. Added in theirs but not ours: + // + // Not a genuine conflict just add to the index + return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path) + case file.stage1 == nil: + // 5. Created by new in both + // + // Conflict! + return &errMergeConflict{file.stage2.path} + case file.stage2 != nil && file.stage3 != nil: + // 5. Modified in both - we should try to merge in the changes but first: + // + if file.stage2.mode == "120000" || file.stage3.mode == "120000" { + // 5a. Conflicting symbolic link change + return &errMergeConflict{file.stage2.path} + } + if file.stage2.mode == "160000" || file.stage3.mode == "160000" { + // 5b. Conflicting submodule change + return &errMergeConflict{file.stage2.path} + } + if file.stage2.mode != file.stage3.mode { + // 5c. Conflicting mode change + return &errMergeConflict{file.stage2.path} + } + + // Need to get the objects from the object db to attempt to merge + root, err := git.NewCommandContext(ctx, "unpack-file", file.stage1.sha).RunInDir(tmpBasePath) + if err != nil { + return fmt.Errorf("unable to get root object: %s at path: %s for merging. Error: %w", file.stage1.sha, file.stage1.path, err) + } + root = strings.TrimSpace(root) + defer func() { + _ = util.Remove(filepath.Join(tmpBasePath, root)) + }() + + base, err := git.NewCommandContext(ctx, "unpack-file", file.stage2.sha).RunInDir(tmpBasePath) + if err != nil { + return fmt.Errorf("unable to get base object: %s at path: %s for merging. Error: %w", file.stage2.sha, file.stage2.path, err) + } + base = strings.TrimSpace(filepath.Join(tmpBasePath, base)) + defer func() { + _ = util.Remove(base) + }() + head, err := git.NewCommandContext(ctx, "unpack-file", file.stage3.sha).RunInDir(tmpBasePath) + if err != nil { + return fmt.Errorf("unable to get head object:%s at path: %s for merging. Error: %w", file.stage3.sha, file.stage3.path, err) + } + head = strings.TrimSpace(head) + defer func() { + _ = util.Remove(filepath.Join(tmpBasePath, head)) + }() + + // now git merge-file annoyingly takes a different order to the merge-tree ... + _, conflictErr := git.NewCommandContext(ctx, "merge-file", base, root, head).RunInDir(tmpBasePath) + if conflictErr != nil { + return &errMergeConflict{file.stage2.path} + } + + // base now contains the merged data + hash, err := git.NewCommandContext(ctx, "hash-object", "-w", "--path", file.stage2.path, base).RunInDir(tmpBasePath) + if err != nil { + return err + } + hash = strings.TrimSpace(hash) + return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path) + default: + if file.stage1 != nil { + return &errMergeConflict{file.stage1.path} + } else if file.stage2 != nil { + return &errMergeConflict{file.stage2.path} + } else if file.stage3 != nil { + return &errMergeConflict{file.stage3.path} + } + } + return nil +} + func checkConflicts(pr *models.PullRequest, gitRepo *git.Repository, tmpBasePath string) (bool, error) { + ctx, cancel, finished := process.GetManager().AddContext(graceful.GetManager().HammerContext(), fmt.Sprintf("checkConflicts: pr[%d] %s/%s#%d", pr.ID, pr.BaseRepo.OwnerName, pr.BaseRepo.Name, pr.Index)) + defer finished() + + // First we use read-tree to do a simple three-way merge + if _, err := git.NewCommandContext(ctx, "read-tree", "-m", pr.MergeBase, "base", "tracking").RunInDir(tmpBasePath); err != nil { + log.Error("Unable to run read-tree -m! Error: %v", err) + return false, fmt.Errorf("unable to run read-tree -m! Error: %v", err) + } + + // Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles + unmerged := make(chan *unmergedFile) + go unmergedFiles(ctx, tmpBasePath, unmerged) + + defer func() { + cancel() + for range unmerged { + // empty the unmerged channel + } + }() + + numberOfConflicts := 0 + conflict := false + + for file := range unmerged { + if file == nil { + break + } + if file.err != nil { + cancel() + return false, file.err + } + + // OK now we have the unmerged file triplet attempt to merge it + if err := attemptMerge(ctx, file, tmpBasePath, gitRepo); err != nil { + if conflictErr, ok := err.(*errMergeConflict); ok { + log.Trace("Conflict: %s in PR[%d] %s/%s#%d", conflictErr.filename, pr.ID, pr.BaseRepo.OwnerName, pr.BaseRepo.Name, pr.Index) + conflict = true + if numberOfConflicts < 10 { + pr.ConflictedFiles = append(pr.ConflictedFiles, conflictErr.filename) + } + numberOfConflicts++ + continue + } + return false, err + } + } + + if !conflict { + treeHash, err := git.NewCommandContext(ctx, "write-tree").RunInDir(tmpBasePath) + if err != nil { + return false, err + } + treeHash = strings.TrimSpace(treeHash) + baseTree, err := gitRepo.GetTree("base") + if err != nil { + return false, err + } + if treeHash == baseTree.ID.String() { + log.Debug("PullRequest[%d]: Patch is empty - ignoring", pr.ID) + pr.Status = models.PullRequestStatusEmpty + pr.ConflictedFiles = []string{} + pr.ChangedProtectedFiles = []string{} + } + + return false, nil + } + + // OK read-tree has failed so we need to try a different thing - this might actually succeed where the above fails due to whitespace handling. + // 1. Create a plain patch from head to base tmpPatchFile, err := os.CreateTemp("", "patch") if err != nil { log.Error("Unable to create temporary patch file! Error: %v", err) - return false, fmt.Errorf("Unable to create temporary patch file! Error: %v", err) + return false, fmt.Errorf("unable to create temporary patch file! Error: %v", err) } defer func() { _ = util.Remove(tmpPatchFile.Name()) @@ -112,12 +296,12 @@ func checkConflicts(pr *models.PullRequest, gitRepo *git.Repository, tmpBasePath if err := gitRepo.GetDiffBinary(pr.MergeBase, "tracking", tmpPatchFile); err != nil { tmpPatchFile.Close() log.Error("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err) - return false, fmt.Errorf("Unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err) + return false, fmt.Errorf("unable to get patch file from %s to %s in %s Error: %v", pr.MergeBase, pr.HeadBranch, pr.BaseRepo.FullName(), err) } stat, err := tmpPatchFile.Stat() if err != nil { tmpPatchFile.Close() - return false, fmt.Errorf("Unable to stat patch file: %v", err) + return false, fmt.Errorf("unable to stat patch file: %v", err) } patchPath := tmpPatchFile.Name() tmpPatchFile.Close() @@ -154,6 +338,9 @@ func checkConflicts(pr *models.PullRequest, gitRepo *git.Repository, tmpBasePath if prConfig.IgnoreWhitespaceConflicts { args = append(args, "--ignore-whitespace") } + if git.CheckGitVersionAtLeast("2.32.0") == nil { + args = append(args, "--3way") + } args = append(args, patchPath) pr.ConflictedFiles = make([]string, 0, 5) @@ -168,7 +355,7 @@ func checkConflicts(pr *models.PullRequest, gitRepo *git.Repository, tmpBasePath stderrReader, stderrWriter, err := os.Pipe() if err != nil { log.Error("Unable to open stderr pipe: %v", err) - return false, fmt.Errorf("Unable to open stderr pipe: %v", err) + return false, fmt.Errorf("unable to open stderr pipe: %v", err) } defer func() { _ = stderrReader.Close() @@ -176,7 +363,7 @@ func checkConflicts(pr *models.PullRequest, gitRepo *git.Repository, tmpBasePath }() // 7. Run the check command - conflict := false + conflict = false err = git.NewCommand(args...). RunInDirTimeoutEnvFullPipelineFunc( nil, -1, tmpBasePath, diff --git a/services/pull/patch_unmerged.go b/services/pull/patch_unmerged.go new file mode 100644 index 00000000000..e15b299350c --- /dev/null +++ b/services/pull/patch_unmerged.go @@ -0,0 +1,180 @@ +// Copyright 2021 The Gitea Authors. +// All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package pull + +import ( + "bufio" + "context" + "fmt" + "io" + "os" + "strconv" + "strings" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/log" +) + +// lsFileLine is a Quadruplet struct (+error) representing a partially parsed line from ls-files +type lsFileLine struct { + mode string + sha string + stage int + path string + err error +} + +// SameAs checks if two lsFileLines are referring to the same path, sha and mode (ignoring stage) +func (line *lsFileLine) SameAs(other *lsFileLine) bool { + if line == nil || other == nil { + return false + } + + if line.err != nil || other.err != nil { + return false + } + + return line.mode == other.mode && + line.sha == other.sha && + line.path == other.path +} + +// readUnmergedLsFileLines calls git ls-files -u -z and parses the lines into mode-sha-stage-path quadruplets +// it will push these to the provided channel closing it at the end +func readUnmergedLsFileLines(ctx context.Context, tmpBasePath string, outputChan chan *lsFileLine) { + defer func() { + // Always close the outputChan at the end of this function + close(outputChan) + }() + + lsFilesReader, lsFilesWriter, err := os.Pipe() + if err != nil { + log.Error("Unable to open stderr pipe: %v", err) + outputChan <- &lsFileLine{err: fmt.Errorf("unable to open stderr pipe: %v", err)} + return + } + defer func() { + _ = lsFilesWriter.Close() + _ = lsFilesReader.Close() + }() + + stderr := &strings.Builder{} + err = git.NewCommandContext(ctx, "ls-files", "-u", "-z"). + RunInDirTimeoutEnvFullPipelineFunc( + nil, -1, tmpBasePath, + lsFilesWriter, stderr, nil, + func(_ context.Context, _ context.CancelFunc) error { + _ = lsFilesWriter.Close() + defer func() { + _ = lsFilesReader.Close() + }() + bufferedReader := bufio.NewReader(lsFilesReader) + + for { + line, err := bufferedReader.ReadString('\000') + if err != nil { + if err == io.EOF { + return nil + } + return err + } + toemit := &lsFileLine{} + + split := strings.SplitN(line, " ", 3) + if len(split) < 3 { + return fmt.Errorf("malformed line: %s", line) + } + toemit.mode = split[0] + toemit.sha = split[1] + + if len(split[2]) < 4 { + return fmt.Errorf("malformed line: %s", line) + } + + toemit.stage, err = strconv.Atoi(split[2][0:1]) + if err != nil { + return fmt.Errorf("malformed line: %s", line) + } + + toemit.path = split[2][2 : len(split[2])-1] + outputChan <- toemit + } + }) + + if err != nil { + outputChan <- &lsFileLine{err: fmt.Errorf("git ls-files -u -z: %v", git.ConcatenateError(err, stderr.String()))} + } +} + +// unmergedFile is triple (+error) of lsFileLines split into stages 1,2 & 3. +type unmergedFile struct { + stage1 *lsFileLine + stage2 *lsFileLine + stage3 *lsFileLine + err error +} + +// unmergedFiles will collate the output from readUnstagedLsFileLines in to file triplets and send them +// to the provided channel, closing at the end. +func unmergedFiles(ctx context.Context, tmpBasePath string, unmerged chan *unmergedFile) { + defer func() { + // Always close the channel + close(unmerged) + }() + + ctx, cancel := context.WithCancel(ctx) + lsFileLineChan := make(chan *lsFileLine, 10) // give lsFileLineChan a buffer + go readUnmergedLsFileLines(ctx, tmpBasePath, lsFileLineChan) + defer func() { + cancel() + for range lsFileLineChan { + // empty channel + } + }() + + next := &unmergedFile{} + for line := range lsFileLineChan { + if line.err != nil { + log.Error("Unable to run ls-files -u -z! Error: %v", line.err) + unmerged <- &unmergedFile{err: fmt.Errorf("unable to run ls-files -u -z! Error: %v", line.err)} + return + } + + // stages are always emitted 1,2,3 but sometimes 1, 2 or 3 are dropped + switch line.stage { + case 0: + // Should not happen as this represents successfully merged file - we will tolerate and ignore though + case 1: + if next.stage1 != nil { + // We need to handle the unstaged file stage1,stage2,stage3 + unmerged <- next + } + next = &unmergedFile{stage1: line} + case 2: + if next.stage3 != nil || next.stage2 != nil || (next.stage1 != nil && next.stage1.path != line.path) { + // We need to handle the unstaged file stage1,stage2,stage3 + unmerged <- next + next = &unmergedFile{} + } + next.stage2 = line + case 3: + if next.stage3 != nil || (next.stage1 != nil && next.stage1.path != line.path) || (next.stage2 != nil && next.stage2.path != line.path) { + // We need to handle the unstaged file stage1,stage2,stage3 + unmerged <- next + next = &unmergedFile{} + } + next.stage3 = line + default: + log.Error("Unexpected stage %d for path %s in run ls-files -u -z!", line.stage, line.path) + unmerged <- &unmergedFile{err: fmt.Errorf("unexpected stage %d for path %s in git ls-files -u -z", line.stage, line.path)} + return + } + } + // We need to handle the unstaged file stage1,stage2,stage3 + if next.stage1 != nil || next.stage2 != nil || next.stage3 != nil { + unmerged <- next + } +}