From 4dc0112862aa469dc93ddedaf4115ee9ff53eda4 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 15:11:20 +0000 Subject: [PATCH 01/20] feat: apply mode --- apply.go | 480 ++++++++++ apply_test.go | 877 ++++++++++++++++++ parser.go | 3 + testdata/apply/t4101/diff.0-1 | 6 + testdata/apply/t4101/diff.0-2 | 7 + testdata/apply/t4101/diff.0-3 | 8 + testdata/apply/t4101/diff.1-0 | 6 + testdata/apply/t4101/diff.1-2 | 8 + testdata/apply/t4101/diff.1-3 | 8 + testdata/apply/t4101/diff.2-0 | 7 + testdata/apply/t4101/diff.2-1 | 8 + testdata/apply/t4101/diff.2-3 | 7 + testdata/apply/t4101/diff.3-0 | 8 + testdata/apply/t4101/diff.3-1 | 8 + testdata/apply/t4101/diff.3-2 | 7 + testdata/apply/text_fragment_add_end.out | 5 + testdata/apply/text_fragment_add_end.patch | 9 + testdata/apply/text_fragment_add_end.src | 3 + .../apply/text_fragment_add_end_noeol.out | 5 + .../apply/text_fragment_add_end_noeol.patch | 11 + .../apply/text_fragment_add_end_noeol.src | 3 + testdata/apply/text_fragment_add_middle.out | 5 + testdata/apply/text_fragment_add_middle.patch | 9 + testdata/apply/text_fragment_add_middle.src | 3 + testdata/apply/text_fragment_add_start.out | 4 + testdata/apply/text_fragment_add_start.patch | 8 + testdata/apply/text_fragment_add_start.src | 3 + testdata/apply/text_fragment_change_end.out | 10 + testdata/apply/text_fragment_change_end.patch | 9 + testdata/apply/text_fragment_change_end.src | 10 + .../apply/text_fragment_change_end_eol.out | 3 + .../apply/text_fragment_change_end_eol.patch | 10 + .../apply/text_fragment_change_end_eol.src | 3 + testdata/apply/text_fragment_change_exact.out | 19 + .../apply/text_fragment_change_exact.patch | 12 + testdata/apply/text_fragment_change_exact.src | 30 + .../apply/text_fragment_change_middle.out | 9 + .../apply/text_fragment_change_middle.patch | 12 + .../apply/text_fragment_change_middle.src | 10 + .../text_fragment_change_single_noeol.out | 1 + .../text_fragment_change_single_noeol.patch | 8 + .../text_fragment_change_single_noeol.src | 1 + testdata/apply/text_fragment_change_start.out | 4 + .../apply/text_fragment_change_start.patch | 9 + testdata/apply/text_fragment_change_start.src | 10 + testdata/apply/text_fragment_delete_all.out | 0 testdata/apply/text_fragment_delete_all.patch | 8 + testdata/apply/text_fragment_delete_all.src | 4 + testdata/apply/text_fragment_error.src | 13 + ...text_fragment_error_context_conflict.patch | 12 + .../text_fragment_error_delete_conflict.patch | 12 + .../apply/text_fragment_error_new_file.patch | 7 + .../apply/text_fragment_error_short_src.patch | 12 + ...text_fragment_error_short_src_before.patch | 12 + testdata/apply/text_fragment_new.out | 3 + testdata/apply/text_fragment_new.patch | 7 + testdata/apply/text_fragment_new.src | 0 57 files changed, 1776 insertions(+) create mode 100644 apply.go create mode 100644 apply_test.go create mode 100644 testdata/apply/t4101/diff.0-1 create mode 100644 testdata/apply/t4101/diff.0-2 create mode 100644 testdata/apply/t4101/diff.0-3 create mode 100644 testdata/apply/t4101/diff.1-0 create mode 100644 testdata/apply/t4101/diff.1-2 create mode 100644 testdata/apply/t4101/diff.1-3 create mode 100644 testdata/apply/t4101/diff.2-0 create mode 100644 testdata/apply/t4101/diff.2-1 create mode 100644 testdata/apply/t4101/diff.2-3 create mode 100644 testdata/apply/t4101/diff.3-0 create mode 100644 testdata/apply/t4101/diff.3-1 create mode 100644 testdata/apply/t4101/diff.3-2 create mode 100644 testdata/apply/text_fragment_add_end.out create mode 100644 testdata/apply/text_fragment_add_end.patch create mode 100644 testdata/apply/text_fragment_add_end.src create mode 100644 testdata/apply/text_fragment_add_end_noeol.out create mode 100644 testdata/apply/text_fragment_add_end_noeol.patch create mode 100644 testdata/apply/text_fragment_add_end_noeol.src create mode 100644 testdata/apply/text_fragment_add_middle.out create mode 100644 testdata/apply/text_fragment_add_middle.patch create mode 100644 testdata/apply/text_fragment_add_middle.src create mode 100644 testdata/apply/text_fragment_add_start.out create mode 100644 testdata/apply/text_fragment_add_start.patch create mode 100644 testdata/apply/text_fragment_add_start.src create mode 100644 testdata/apply/text_fragment_change_end.out create mode 100644 testdata/apply/text_fragment_change_end.patch create mode 100644 testdata/apply/text_fragment_change_end.src create mode 100644 testdata/apply/text_fragment_change_end_eol.out create mode 100644 testdata/apply/text_fragment_change_end_eol.patch create mode 100644 testdata/apply/text_fragment_change_end_eol.src create mode 100644 testdata/apply/text_fragment_change_exact.out create mode 100644 testdata/apply/text_fragment_change_exact.patch create mode 100644 testdata/apply/text_fragment_change_exact.src create mode 100644 testdata/apply/text_fragment_change_middle.out create mode 100644 testdata/apply/text_fragment_change_middle.patch create mode 100644 testdata/apply/text_fragment_change_middle.src create mode 100644 testdata/apply/text_fragment_change_single_noeol.out create mode 100644 testdata/apply/text_fragment_change_single_noeol.patch create mode 100644 testdata/apply/text_fragment_change_single_noeol.src create mode 100644 testdata/apply/text_fragment_change_start.out create mode 100644 testdata/apply/text_fragment_change_start.patch create mode 100644 testdata/apply/text_fragment_change_start.src create mode 100644 testdata/apply/text_fragment_delete_all.out create mode 100644 testdata/apply/text_fragment_delete_all.patch create mode 100644 testdata/apply/text_fragment_delete_all.src create mode 100644 testdata/apply/text_fragment_error.src create mode 100644 testdata/apply/text_fragment_error_context_conflict.patch create mode 100644 testdata/apply/text_fragment_error_delete_conflict.patch create mode 100644 testdata/apply/text_fragment_error_new_file.patch create mode 100644 testdata/apply/text_fragment_error_short_src.patch create mode 100644 testdata/apply/text_fragment_error_short_src_before.patch create mode 100644 testdata/apply/text_fragment_new.out create mode 100644 testdata/apply/text_fragment_new.patch create mode 100644 testdata/apply/text_fragment_new.src diff --git a/apply.go b/apply.go new file mode 100644 index 0000000..b710eaa --- /dev/null +++ b/apply.go @@ -0,0 +1,480 @@ +package git_diff_parser + +import ( + "bytes" + "errors" + "fmt" + "regexp" + "strings" +) + +var ErrPatchConflict = errors.New("patch conflict") + +type ConflictError struct { + ConflictingHunks int +} + +func (e *ConflictError) Error() string { + if e.ConflictingHunks == 1 { + return "patch conflict in 1 hunk" + } + return fmt.Sprintf("patch conflict in %d hunks", e.ConflictingHunks) +} + +func (e *ConflictError) Is(target error) bool { + return target == ErrPatchConflict +} + +type patchHunk struct { + oldStart int + oldCount int + newCount int + lines []patchLine +} + +type patchLine struct { + kind byte + text string + hasNewline bool + oldEOF bool + newEOF bool +} + +type fileLine struct { + text string + hasNewline bool + eofMarker bool +} + +var hunkHeaderPattern = regexp.MustCompile(`^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@`) + +func ApplyFile(pristine, patchData []byte) ([]byte, error) { + normalizedPatch := normalizePatchForValidation(patchData) + if err := validateSingleFilePatch(normalizedPatch); err != nil { + return nil, err + } + + lines := splitLinesPreserveNewline(string(normalizedPatch)) + hunks, err := parseHunks(skipToHunks(lines)) + if err != nil { + return nil, err + } + if !hunksContainChanges(hunks) { + return nil, fmt.Errorf("patch contains no effective changes") + } + + sourceLines := splitFileLines(pristine) + cursor := 0 + outLines := make([]fileLine, 0, len(sourceLines)) + conflicts := 0 + + for _, hunk := range hunks { + matchIndex, matched := locateHunk(sourceLines, cursor, hunk) + if !matched { + conflicts++ + + conflictStart := hunk.oldStart - 1 + if conflictStart < cursor { + conflictStart = cursor + } + if conflictStart > len(sourceLines) { + conflictStart = len(sourceLines) + } + + conflictEnd := conflictStart + hunk.oldCount + if conflictEnd > len(sourceLines) { + conflictEnd = len(sourceLines) + } + + outLines = appendSourceLines(outLines, sourceLines[cursor:conflictStart]...) + outLines = appendConflict(outLines, sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) + cursor = conflictEnd + continue + } + + outLines = appendSourceLines(outLines, sourceLines[cursor:matchIndex]...) + cursor = matchIndex + + for _, hunkLine := range hunk.lines { + switch hunkLine.kind { + case ' ': + outLines = append(outLines, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) + cursor++ + case '-': + cursor++ + case '+': + outLines = append(outLines, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) + } + } + } + + outLines = appendSourceLines(outLines, sourceLines[cursor:]...) + result := joinFileLines(outLines) + if conflicts > 0 { + return result, &ConflictError{ConflictingHunks: conflicts} + } + return result, nil +} + +// ApplyPatch is kept as a compatibility alias. +func ApplyPatch(pristine, patchData []byte) ([]byte, error) { + return ApplyFile(pristine, patchData) +} + +func validateSingleFilePatch(patchData []byte) error { + parsed, errs := Parse(string(patchData)) + if len(errs) > 0 { + return fmt.Errorf("unsupported patch syntax: %w", errs[0]) + } + + if len(parsed.FileDiff) != 1 { + return fmt.Errorf("expected exactly 1 file diff, found %d", len(parsed.FileDiff)) + } + + fileDiff := parsed.FileDiff[0] + if fileDiff.IsBinary { + return fmt.Errorf("binary patches are not supported") + } + if fileDiff.NewMode != "" { + return fmt.Errorf("file mode changes are not supported") + } + if fileDiff.Type == FileDiffTypeAdded || fileDiff.Type == FileDiffTypeDeleted { + return fmt.Errorf("patches may only modify existing files") + } + if len(fileDiff.Hunks) == 0 { + return fmt.Errorf("patch contains no hunks") + } + + return nil +} + +func fileDiffHasChanges(fileDiff FileDiff) bool { + for _, hunk := range fileDiff.Hunks { + for _, change := range hunk.ChangeList { + if change.Type != ContentChangeTypeNOOP { + return true + } + } + } + return false +} + +func hunksContainChanges(hunks []patchHunk) bool { + for _, hunk := range hunks { + for _, line := range hunk.lines { + if line.kind == '+' || line.kind == '-' { + return true + } + } + } + return false +} + +func skipToHunks(lines []string) []string { + for i, line := range lines { + if strings.HasPrefix(strings.TrimRight(line, "\n"), "@@ ") { + return lines[i:] + } + } + return nil +} + +func parseHunks(lines []string) ([]patchHunk, error) { + hunks := make([]patchHunk, 0) + for i := 0; i < len(lines); { + line := strings.TrimRight(lines[i], "\n") + if line == "" { + i++ + continue + } + if !strings.HasPrefix(line, "@@ ") { + return nil, fmt.Errorf("unexpected patch line %q", line) + } + + oldStart, oldCount, newCount, err := parseHunkHeader(line) + if err != nil { + return nil, err + } + + i++ + hunkLines := make([]patchLine, 0) + for i < len(lines) && !strings.HasPrefix(strings.TrimRight(lines[i], "\n"), "@@ ") { + raw := lines[i] + if strings.HasPrefix(raw, `\ No newline at end of file`) { + if len(hunkLines) == 0 { + return nil, fmt.Errorf("unexpected no-newline marker without a preceding patch line") + } + hunkLines[len(hunkLines)-1].hasNewline = false + i++ + continue + } + + line, skip, err := parsePatchLine(raw) + if err != nil { + return nil, err + } + if !skip { + hunkLines = append(hunkLines, line) + } + i++ + } + markEOFMarkers(hunkLines, oldCount, newCount) + + hunks = append(hunks, patchHunk{ + oldStart: oldStart, + oldCount: oldCount, + newCount: newCount, + lines: hunkLines, + }) + } + + return hunks, nil +} + +func locateHunk(sourceLines []fileLine, cursor int, hunk patchHunk) (int, bool) { + preferred := hunk.oldStart - 1 + if hunk.oldCount == 0 { + preferred = hunk.oldStart + } + if preferred < cursor { + preferred = cursor + } + + if hunk.newCount >= hunk.oldCount && preferred <= len(sourceLines) && postimageMatchesAt(sourceLines, preferred, desiredLines(hunk)) { + return 0, false + } + + for offset := 0; ; offset++ { + candidate := preferred - offset + if candidate >= cursor && candidate <= len(sourceLines) && hunkMatchesAt(sourceLines, candidate, hunk) { + return candidate, true + } + + candidate = preferred + offset + if offset > 0 && candidate >= cursor && candidate <= len(sourceLines) && hunkMatchesAt(sourceLines, candidate, hunk) { + return candidate, true + } + + if preferred-offset < cursor && preferred+offset > len(sourceLines) { + break + } + } + + return 0, false +} + +func hunkMatchesAt(sourceLines []fileLine, start int, hunk patchHunk) bool { + if hunk.newCount >= hunk.oldCount && postimageMatchesAt(sourceLines, start, desiredLines(hunk)) { + return false + } + + cursor := start + for _, hunkLine := range hunk.lines { + switch hunkLine.kind { + case ' ', '-': + if cursor >= len(sourceLines) { + return false + } + if sourceLines[cursor].text != hunkLine.text || + sourceLines[cursor].hasNewline != hunkLine.hasNewline || + sourceLines[cursor].eofMarker != hunkLine.oldEOF { + return false + } + cursor++ + case '+': + continue + default: + return false + } + } + + return true +} + +func postimageMatchesAt(sourceLines []fileLine, start int, desired []fileLine) bool { + if len(desired) == 0 { + return false + } + if start < 0 || start+len(desired) > len(sourceLines) { + return false + } + + for i := range desired { + if sourceLines[start+i].text != desired[i].text || + sourceLines[start+i].hasNewline != desired[i].hasNewline || + sourceLines[start+i].eofMarker != desired[i].eofMarker { + return false + } + } + + return true +} + +func desiredLines(hunk patchHunk) []fileLine { + lines := make([]fileLine, 0, len(hunk.lines)) + for _, line := range hunk.lines { + if line.kind == ' ' || line.kind == '+' { + lines = append(lines, fileLine{text: line.text, hasNewline: line.hasNewline, eofMarker: line.newEOF}) + } + } + return lines +} + +func appendConflict(out []fileLine, ours, theirs []fileLine) []fileLine { + out = append(out, fileLine{text: "<<<<<<< Current (Your changes)", hasNewline: true}) + out = appendSourceLines(out, ours...) + out = ensureTrailingNewline(out) + out = append(out, fileLine{text: "=======", hasNewline: true}) + out = appendSourceLines(out, theirs...) + out = ensureTrailingNewline(out) + out = append(out, fileLine{text: ">>>>>>> New (Generated by Speakeasy)", hasNewline: true}) + return out +} + +func appendSourceLines(dst []fileLine, src ...fileLine) []fileLine { + return append(dst, src...) +} + +func ensureTrailingNewline(lines []fileLine) []fileLine { + if len(lines) == 0 { + return lines + } + lines[len(lines)-1].hasNewline = true + return lines +} + +func parseHunkHeader(header string) (int, int, int, error) { + matches := hunkHeaderPattern.FindStringSubmatch(header) + if len(matches) == 0 { + return 0, 0, 0, fmt.Errorf("invalid hunk header %q", header) + } + + oldStart, err := parseNumber(matches[1]) + if err != nil { + return 0, 0, 0, err + } + + oldCount := 1 + if matches[2] != "" { + oldCount, err = parseNumber(matches[2]) + if err != nil { + return 0, 0, 0, err + } + } + + newCount := 1 + if matches[4] != "" { + newCount, err = parseNumber(matches[4]) + if err != nil { + return 0, 0, 0, err + } + } + + return oldStart, oldCount, newCount, nil +} + +func markEOFMarkers(lines []patchLine, oldCount, newCount int) { + oldSeen := 0 + newSeen := 0 + + for i := range lines { + line := lines[i] + if line.kind == ' ' || line.kind == '-' { + oldSeen++ + } + if line.kind == ' ' || line.kind == '+' { + newSeen++ + } + if !isEOFMarkerCandidate(line) { + continue + } + + lines[i].oldEOF = (line.kind == ' ' || line.kind == '-') && oldSeen == oldCount + lines[i].newEOF = (line.kind == ' ' || line.kind == '+') && newSeen == newCount + } +} + +func parsePatchLine(raw string) (patchLine, bool, error) { + if raw == "" { + return patchLine{}, true, nil + } + + switch raw[0] { + case ' ', '-', '+': + return patchLine{ + kind: raw[0], + text: trimSingleLineEnding(raw[1:]), + hasNewline: strings.HasSuffix(raw, "\n"), + }, false, nil + default: + return patchLine{}, false, fmt.Errorf("unexpected hunk line %q", strings.TrimRight(raw, "\n")) + } +} + +func splitFileLines(content []byte) []fileLine { + rawLines := splitLinesPreserveNewline(string(content)) + lines := make([]fileLine, 0, len(rawLines)) + for _, raw := range rawLines { + lines = append(lines, fileLine{ + text: trimSingleLineEnding(raw), + hasNewline: strings.HasSuffix(raw, "\n"), + }) + } + if len(content) > 0 && content[len(content)-1] == '\n' { + lines = append(lines, fileLine{text: "", hasNewline: true, eofMarker: true}) + } + return lines +} + +func joinFileLines(lines []fileLine) []byte { + var buf bytes.Buffer + for _, line := range lines { + if line.eofMarker { + continue + } + buf.WriteString(line.text) + if line.hasNewline { + buf.WriteByte('\n') + } + } + return buf.Bytes() +} + +func trimSingleLineEnding(s string) string { + s = strings.TrimSuffix(s, "\n") + return s +} + +func isEOFMarkerCandidate(line patchLine) bool { + if !line.hasNewline { + return false + } + return strings.TrimSuffix(line.text, "\r") == "" +} + +func splitLinesPreserveNewline(s string) []string { + if s == "" { + return nil + } + lines := strings.SplitAfter(s, "\n") + if lines[len(lines)-1] == "" { + lines = lines[:len(lines)-1] + } + return lines +} + +func normalizePatchForValidation(patchData []byte) []byte { + trimmed := bytes.TrimSpace(patchData) + if bytes.HasPrefix(trimmed, []byte("diff --git ")) { + return patchData + } + return []byte("diff --git a/__patch__ b/__patch__\n" + string(patchData)) +} + +func parseNumber(raw string) (int, error) { + var value int + if _, err := fmt.Sscanf(raw, "%d", &value); err != nil { + return 0, err + } + return value, nil +} diff --git a/apply_test.go b/apply_test.go new file mode 100644 index 0000000..3824c11 --- /dev/null +++ b/apply_test.go @@ -0,0 +1,877 @@ +package git_diff_parser_test + +import ( + "bytes" + "errors" + "os" + "path/filepath" + "testing" + + "github.com/pmezard/go-difflib/difflib" + git_diff_parser "github.com/speakeasy-api/git-diff-parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type applyFixtureFiles struct { + src []byte + patch []byte + out []byte +} + +func TestApplyFile_TextFixtures(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + fixture string + wantErr string + conflict bool + }{ + {name: "new file", fixture: "text_fragment_new"}, + {name: "add start", fixture: "text_fragment_add_start"}, + {name: "add middle", fixture: "text_fragment_add_middle"}, + {name: "add end", fixture: "text_fragment_add_end"}, + {name: "add end no eof", fixture: "text_fragment_add_end_noeol"}, + {name: "change start", fixture: "text_fragment_change_start"}, + {name: "change middle", fixture: "text_fragment_change_middle"}, + {name: "change end", fixture: "text_fragment_change_end"}, + {name: "change end eol", fixture: "text_fragment_change_end_eol"}, + {name: "change exact", fixture: "text_fragment_change_exact"}, + {name: "change single no eof", fixture: "text_fragment_change_single_noeol"}, + {name: "delete all", fixture: "text_fragment_delete_all"}, + {name: "short src before", fixture: "text_fragment_error_short_src_before", wantErr: "patch conflict", conflict: true}, + {name: "short src", fixture: "text_fragment_error_short_src", wantErr: "patch conflict", conflict: true}, + {name: "context conflict", fixture: "text_fragment_error_context_conflict", wantErr: "patch conflict", conflict: true}, + {name: "delete conflict", fixture: "text_fragment_error_delete_conflict", wantErr: "patch conflict", conflict: true}, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + files := loadApplyFixture(t, test.fixture) + applied, err := git_diff_parser.ApplyFile(files.src, files.patch) + + if test.wantErr != "" { + require.Error(t, err) + assert.Contains(t, err.Error(), test.wantErr) + if test.conflict { + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") + } + return + } + + require.NoError(t, err) + assert.True(t, bytes.Equal(expectedApplyFixtureOutput(t, files), applied)) + }) + } +} + +func TestApplyFile_RejectsUnsupportedFixtures(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + patch []byte + wantErr string + }{ + { + name: "rename patch", + patch: []byte(`diff --git a/sdk.go b/custom/sdk.go +similarity index 100% +rename from sdk.go +rename to custom/sdk.go +`), + wantErr: "patch contains no hunks", + }, + { + name: "mode only patch", + patch: []byte(`diff --git a/sdk.go b/sdk.go +old mode 100644 +new mode 100755 +`), + wantErr: "file mode changes are not supported", + }, + { + name: "binary patch", + patch: []byte(`diff --git a/sdk.go b/sdk.go +GIT binary patch +literal 3 +abc +`), + wantErr: "binary patches are not supported", + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + _, err := git_diff_parser.ApplyFile([]byte("package testsdk\n"), test.patch) + require.Error(t, err) + assert.Contains(t, err.Error(), test.wantErr) + }) + } +} + +func TestApplyFile_RejectsHeaderOnlyAndNoOpPatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + patch []byte + wantErr string + }{ + { + name: "header only", + patch: []byte(`diff --git a/sdk.go b/sdk.go +--- a/sdk.go ++++ b/sdk.go +`), + wantErr: "patch contains no hunks", + }, + { + name: "no op hunk", + patch: []byte(`diff --git a/sdk.go b/sdk.go +--- a/sdk.go ++++ b/sdk.go +@@ -1,1 +1,1 @@ + package testsdk +`), + wantErr: "patch contains no effective changes", + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + _, err := git_diff_parser.ApplyFile([]byte("package testsdk\n"), test.patch) + require.Error(t, err) + assert.Contains(t, err.Error(), test.wantErr) + }) + } +} + +func TestApplyFile_NoNewlineMatrix(t *testing.T) { + t.Parallel() + + files := []struct { + name string + content []byte + }{ + {name: "0", content: []byte("a\nb\n")}, + {name: "1", content: []byte("a\nb\nc\n")}, + {name: "2", content: []byte("a\nb")}, + {name: "3", content: []byte("a\nc\nb")}, + } + + for i := range files { + for j := range files { + if i == j { + continue + } + + from := files[i] + to := files[j] + name := from.name + " to " + to.name + + t.Run(name, func(t *testing.T) { + t.Parallel() + + patch := mustReadFile(t, filepath.Join("testdata", "apply", "t4101", "diff."+from.name+"-"+to.name)) + applied, err := git_diff_parser.ApplyFile(from.content, patch) + require.NoError(t, err) + assert.Equal(t, to.content, applied) + }) + } + } +} + +func TestApplyFile_BoundaryCases(t *testing.T) { + t.Parallel() + + original := []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n") + tests := []struct { + name string + want []byte + }{ + {name: "add head", want: []byte("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, + {name: "insert second", want: []byte("b\na\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, + {name: "modify head", want: []byte("a\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, + {name: "delete head", want: []byte("c\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, + {name: "add tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\n")}, + {name: "modify tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\nz\n")}, + {name: "delete tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\n")}, + } + + for _, context := range []int{3, 0} { + context := context + for _, test := range tests { + test := test + t.Run(test.name+" context "+contextLabel(context), func(t *testing.T) { + t.Parallel() + + patch := buildPatchWithContext(t, "victim", original, test.want, context) + applied, err := git_diff_parser.ApplyFile(original, patch) + require.NoError(t, err) + assert.Equal(t, test.want, applied) + }) + } + } +} + +func TestApplyFile_OffsetPatches(t *testing.T) { + t.Parallel() + + original := []byte("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n") + target := []byte("1\n2\n3\n4\n5\n6\n7\na\nb\nc\nd\ne\n8\n9\n10\n11\n12\n") + basePatch := buildPatchWithContext(t, "file", original, target, 3) + + tests := []struct { + name string + header string + }{ + {name: "unmodified patch", header: "@@ -5,6 +5,11 @@"}, + {name: "minus offset", header: "@@ -2,6 +2,11 @@"}, + {name: "plus offset", header: "@@ -7,6 +7,11 @@"}, + {name: "big offset", header: "@@ -19,6 +19,11 @@"}, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + patch := rewriteFirstHunkHeader(basePatch, test.header) + applied, err := git_diff_parser.ApplyFile(original, patch) + require.NoError(t, err) + assert.Equal(t, target, applied) + }) + } +} + +func TestApplyFile_DamagedContextPatchesConflictWithoutFuzz(t *testing.T) { + t.Parallel() + + original := []byte("1\n2\n3\n4\n5\n6\n7\n8\n9\n10\n11\n12\n") + target := []byte("1\n2\n3\n4\n5\n6\n7\na\nb\nc\nd\ne\n8\n9\n10\n11\n12\n") + basePatch := buildPatchWithContext(t, "file", original, target, 3) + damaged := bytes.Replace(basePatch, []byte("\n 5\n"), []byte("\n S\n"), 1) + + tests := []struct { + name string + header string + }{ + {name: "no offset", header: "@@ -5,6 +5,11 @@"}, + {name: "minus offset", header: "@@ -2,6 +2,11 @@"}, + {name: "plus offset", header: "@@ -7,6 +7,11 @@"}, + {name: "big offset", header: "@@ -19,6 +19,11 @@"}, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + patch := rewriteFirstHunkHeader(damaged, test.header) + applied, err := git_diff_parser.ApplyFile(original, patch) + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + }) + } +} + +func TestApplyFile_EmptyContextPatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + original []byte + target []byte + }{ + { + name: "delete blank-lined middle line", + original: []byte("\n\nA\nB\nC\n\n"), + target: []byte("\n\nA\nC\n\n"), + }, + { + name: "insert middle", + original: []byte("alpha\ncharlie\n"), + target: []byte("alpha\nbravo\ncharlie\n"), + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + patch := buildPatchWithContext(t, "file", test.original, test.target, 0) + applied, err := git_diff_parser.ApplyFile(test.original, patch) + require.NoError(t, err) + assert.Equal(t, test.target, applied) + }) + } +} + +func TestApplyFile_EmptyContextNoTrailingNewlinePatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + original []byte + target []byte + patch []byte + }{ + { + name: "append no newline tail", + original: []byte("\n\nA\nC\n\n"), + target: []byte("\n\nA\nC\n\nQ"), + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -6,0 +7 @@ ++Q +\ No newline at end of file +`), + }, + { + name: "modify tail no newline", + original: []byte("alpha\nbravo"), + target: []byte("alpha\ncharlie"), + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -2 +2 @@ +-bravo +\ No newline at end of file ++charlie +\ No newline at end of file +`), + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + applied, err := git_diff_parser.ApplyFile(test.original, test.patch) + require.NoError(t, err) + assert.Equal(t, test.target, applied) + }) + } +} + +func TestApplyFile_RelocatesHunkWhenContextStillMatches(t *testing.T) { + t.Parallel() + + originalPristine := []byte("package testsdk\n\ntype Status struct{}\n") + patchData := buildPatch(t, "status.go", originalPristine, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) + shiftedPristine := []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n") + + applied, err := git_diff_parser.ApplyFile(shiftedPristine, patchData) + require.NoError(t, err) + assert.Equal(t, []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n"), applied) +} + +func TestApplyFile_RelocatesToNearestMatchingBlock(t *testing.T) { + t.Parallel() + + original := []byte("header\nanchor\ncommon\nvalue-old\nend\ngap\nanchor\ncommon\nvalue-old\nend\n") + target := []byte("header\nanchor\ncommon\nvalue-old\nend\ngap\nanchor\ncommon\nvalue-new\nend\n") + shifted := []byte("header\nanchor\ncommon\nvalue-old\nend\ngap\nextra\nanchor\ncommon\nvalue-old\nend\n") + + patch := buildPatchWithContext(t, "dup.txt", original, target, 1) + applied, err := git_diff_parser.ApplyFile(shifted, patch) + require.NoError(t, err) + assert.Equal(t, []byte("header\nanchor\ncommon\nvalue-old\nend\ngap\nextra\nanchor\ncommon\nvalue-new\nend\n"), applied) +} + +func TestApplyFile_MultipleHunks(t *testing.T) { + t.Parallel() + + original := []byte("line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\n") + target := []byte("line 1\nline two\nline 3\nline 4\nline 5\nline six\nline 7\nline 8\n") + + patch := buildPatchWithContext(t, "multi.txt", original, target, 1) + applied, err := git_diff_parser.ApplyFile(original, patch) + require.NoError(t, err) + assert.Equal(t, target, applied) +} + +func TestApplyFile_MultipleHunksOneConflict(t *testing.T) { + t.Parallel() + + original := []byte("line 1\nline 2\nline 3\nline 4\nline 5\nline 6\nline 7\nline 8\n") + target := []byte("line 1\nline two\nline 3\nline 4\nline 5\nline six\nline 7\nline 8\n") + current := []byte("line 1\nline 2\nline 3\nline 4\nline 5\nline VI\nline 7\nline 8\n") + + patch := buildPatchWithContext(t, "multi.txt", original, target, 1) + applied, err := git_diff_parser.ApplyFile(current, patch) + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.Contains(t, string(applied), "line two") + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), "line VI") + assert.Contains(t, string(applied), "line six") +} + +func TestApplyFile_ReturnsConflictMarkers(t *testing.T) { + t.Parallel() + + base := []byte("package testsdk\n\ntype Status struct{}\n") + current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") + patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) + + applied, err := git_diff_parser.ApplyFile(current, patchData) + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") + assert.Contains(t, string(applied), "func (s *Status) String() string") +} + +func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + current []byte + patch []byte + }{ + { + name: "ending patch", + current: []byte("a\nb\nc\n"), + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -1,2 +1,3 @@ + a + b ++c +`), + }, + { + name: "beginning patch", + current: []byte("a\nb\nc\n"), + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -1,2 +1,3 @@ ++a + b + c +`), + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + applied, err := git_diff_parser.ApplyFile(test.current, test.patch) + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + }) + } +} + +func TestApplyFile_RejectsAlreadyAppliedMiddlePatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + current []byte + patch []byte + }{ + { + name: "middle insertion", + current: []byte("start\nmiddle\ninserted\nend\n"), + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -1,3 +1,4 @@ + start + middle ++inserted + end +`), + }, + { + name: "replacement already applied", + current: []byte("start\nnew value\nend\n"), + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -1,3 +1,3 @@ + start +-old value ++new value + end +`), + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + applied, err := git_diff_parser.ApplyFile(test.current, test.patch) + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + }) + } +} + +func TestApplyFile_RejectsMultiFileDiff(t *testing.T) { + t.Parallel() + + patchData := []byte(`diff --git a/sdk.go b/sdk.go +--- a/sdk.go ++++ b/sdk.go +@@ -1,3 +1,4 @@ + package testsdk + ++// sdk custom + type SDK struct{} +diff --git a/models/components/pet.go b/models/components/pet.go +--- a/models/components/pet.go ++++ b/models/components/pet.go +@@ -1,3 +1,4 @@ + package components + ++// pet custom + type Pet struct{} +`) + + _, err := git_diff_parser.ApplyFile([]byte("package testsdk\n\ntype SDK struct{}\n"), patchData) + require.Error(t, err) + assert.Contains(t, err.Error(), "expected exactly 1 file diff") +} + +func TestApplyFile_RejectsMalformedPatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + patch []byte + wantErr string + }{ + { + name: "non patch input", + patch: []byte(`I am not a patch +I look nothing like a patch +git apply must fail +`), + wantErr: "unsupported patch syntax", + }, + { + name: "invalid hunk header", + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -x,1 +1,1 @@ +-a ++b +`), + wantErr: "unsupported patch syntax", + }, + { + name: "unexpected hunk line prefix", + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -1,1 +1,1 @@ +!a +`), + wantErr: "unexpected hunk line", + }, + { + name: "no newline marker without preceding line", + patch: []byte(`diff --git a/file b/file +--- a/file ++++ b/file +@@ -1,1 +1,1 @@ +\ No newline at end of file +`), + wantErr: "unexpected no-newline marker without a preceding patch line", + }, + { + name: "unsupported header garbage", + patch: []byte(`diff --git a/file b/file +copy from file +copy to file-copy +--- a/file ++++ b/file-copy +@@ -1,1 +1,1 @@ +-a ++b +`), + wantErr: "unsupported patch syntax", + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + _, err := git_diff_parser.ApplyFile([]byte("a\n"), test.patch) + require.Error(t, err) + assert.Contains(t, err.Error(), test.wantErr) + }) + } +} + +func TestApplyFile_RejectsAdditionalUnsupportedPatches(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + patch []byte + wantErr string + }{ + { + name: "new file mode with hunk", + patch: []byte(`diff --git a/new.go b/new.go +new file mode 100644 +--- /dev/null ++++ b/new.go +@@ -0,0 +1,2 @@ ++package test ++ +`), + wantErr: "file mode changes are not supported", + }, + { + name: "deleted file mode with hunk", + patch: []byte(`diff --git a/old.go b/old.go +deleted file mode 100644 +--- a/old.go ++++ /dev/null +@@ -1,1 +0,0 @@ +-package test +`), + wantErr: "patches may only modify existing files", + }, + { + name: "binary files differ", + patch: []byte(`diff --git a/file.bin b/file.bin +Binary files a/file.bin and b/file.bin differ +`), + wantErr: "patch contains no hunks", + }, + { + name: "create and rename", + patch: []byte(`diff --git a/1 b/2 +new file mode 100644 +rename from 1 +rename to 2 +`), + wantErr: "file mode changes are not supported", + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + _, err := git_diff_parser.ApplyFile([]byte("package test\n"), test.patch) + require.Error(t, err) + assert.Contains(t, err.Error(), test.wantErr) + }) + } +} + +func TestApplyFile_ShrinkFailures(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + original []byte + target []byte + current []byte + }{ + { + name: "preimage larger than source", + original: []byte("1\n2\n3\n4\n5\n6\n7\n8\n999999\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\n\n"), + target: []byte("11\n2\n3\n4\n5\n6\n7\n8\n9\nA\nB\nC\nD\nE\nF\nG\nHH\nI\nJ\n\n"), + current: []byte("2\n3\n4\n5\n6\n7\n8\n999999\nA\nB\nC\nD\nE\nF\nG\nH\nI\nJ\n"), + }, + { + name: "near eof overrun", + original: []byte("a\nb\nc\nd\ne\n"), + target: []byte("a\nb\nc\nd\nz\n"), + current: []byte("a\nb\nc\nd\n"), + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + patch := buildPatch(t, "F", test.original, test.target) + applied, err := git_diff_parser.ApplyFile(test.current, patch) + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + }) + } +} + +func TestApplyFile_CRLFPreservation(t *testing.T) { + t.Parallel() + + pristine := []byte("alpha\r\nbeta\r\n") + target := []byte("alpha\r\nbravo\r\n") + patch := buildPatch(t, "crlf.txt", pristine, target) + + applied, err := git_diff_parser.ApplyFile(pristine, patch) + require.NoError(t, err) + assert.Equal(t, target, applied) +} + +func loadApplyFixture(t *testing.T, name string) applyFixtureFiles { + t.Helper() + + load := func(ext string) []byte { + t.Helper() + + path := filepath.Join("testdata", "apply", name+"."+ext) + data, err := os.ReadFile(path) + if errors.Is(err, os.ErrNotExist) { + return nil + } + require.NoError(t, err) + return data + } + + return applyFixtureFiles{ + src: load("src"), + patch: load("patch"), + out: load("out"), + } +} + +func mustReadFile(t *testing.T, path string) []byte { + t.Helper() + + data, err := os.ReadFile(path) + require.NoError(t, err) + return data +} + +func expectedApplyFixtureOutput(t *testing.T, files applyFixtureFiles) []byte { + t.Helper() + + if files.out == nil { + return nil + } + + parsed, errs := git_diff_parser.Parse(string(files.patch)) + require.Empty(t, errs) + require.Len(t, parsed.FileDiff, 1) + require.Len(t, parsed.FileDiff[0].Hunks, 1) + + hunk := parsed.FileDiff[0].Hunks[0] + start := hunk.StartLineNumberOld - 1 + if start < 0 { + start = 0 + } + + sourceLines := splitBytesLines(files.src) + end := start + hunk.CountOld + if end > len(sourceLines) { + end = len(sourceLines) + } + + expected := append([]byte{}, files.out...) + for _, line := range sourceLines[end:] { + expected = append(expected, line...) + } + return expected +} + +func splitBytesLines(content []byte) [][]byte { + if len(content) == 0 { + return nil + } + + lines := bytes.SplitAfter(content, []byte("\n")) + if len(lines) > 0 && len(lines[len(lines)-1]) == 0 { + lines = lines[:len(lines)-1] + } + return lines +} + +func contextLabel(context int) string { + if context == 0 { + return "0" + } + return "3" +} + +func rewriteFirstHunkHeader(patch []byte, header string) []byte { + lines := bytes.Split(patch, []byte("\n")) + for i, line := range lines { + if bytes.HasPrefix(line, []byte("@@ ")) { + lines[i] = []byte(header) + return bytes.Join(lines, []byte("\n")) + } + } + return patch +} + +func buildPatch(t *testing.T, path string, pristine, materialized []byte) []byte { + t.Helper() + return buildPatchWithContext(t, path, pristine, materialized, 3) +} + +func buildPatchWithContext(t *testing.T, path string, pristine, materialized []byte, context int) []byte { + t.Helper() + + diff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{ + A: difflib.SplitLines(string(pristine)), + B: difflib.SplitLines(string(materialized)), + FromFile: "a/" + path, + ToFile: "b/" + path, + Context: context, + }) + require.NoError(t, err) + require.NotEmpty(t, diff) + + return append([]byte("diff --git a/"+path+" b/"+path+"\n"), []byte(diff)...) +} + +func TestApplyFile_PreservesExactBytes(t *testing.T) { + t.Parallel() + + files := loadApplyFixture(t, "text_fragment_change_single_noeol") + applied, err := git_diff_parser.ApplyFile(files.src, files.patch) + require.NoError(t, err) + assert.True(t, bytes.Equal(files.out, applied)) +} diff --git a/parser.go b/parser.go index 70cdcdd..d16ba94 100644 --- a/parser.go +++ b/parser.go @@ -59,6 +59,9 @@ func (changes *ChangeList) IsSignificant() bool { func NewHunk(line string) (Hunk, error) { namedHunkRegex := regexp.MustCompile(`(?m)^@@ -(?P\d+),?(?P\d+)? \+(?P\d+),?(?P\d+)? @@`) match := namedHunkRegex.FindStringSubmatch(line) + if len(match) == 0 { + return Hunk{}, fmt.Errorf("invalid hunk header: %q", line) + } result := make(map[string]string) for i, name := range namedHunkRegex.SubexpNames() { if i != 0 && name != "" { diff --git a/testdata/apply/t4101/diff.0-1 b/testdata/apply/t4101/diff.0-1 new file mode 100644 index 0000000..1010a88 --- /dev/null +++ b/testdata/apply/t4101/diff.0-1 @@ -0,0 +1,6 @@ +--- a/frotz ++++ b/frotz +@@ -1,2 +1,3 @@ + a + b ++c diff --git a/testdata/apply/t4101/diff.0-2 b/testdata/apply/t4101/diff.0-2 new file mode 100644 index 0000000..36460a2 --- /dev/null +++ b/testdata/apply/t4101/diff.0-2 @@ -0,0 +1,7 @@ +--- a/frotz ++++ b/frotz +@@ -1,2 +1,2 @@ + a +-b ++b +\ No newline at end of file diff --git a/testdata/apply/t4101/diff.0-3 b/testdata/apply/t4101/diff.0-3 new file mode 100644 index 0000000..b281c43 --- /dev/null +++ b/testdata/apply/t4101/diff.0-3 @@ -0,0 +1,8 @@ +--- a/frotz ++++ b/frotz +@@ -1,2 +1,3 @@ + a +-b ++c ++b +\ No newline at end of file diff --git a/testdata/apply/t4101/diff.1-0 b/testdata/apply/t4101/diff.1-0 new file mode 100644 index 0000000..f0a2e92 --- /dev/null +++ b/testdata/apply/t4101/diff.1-0 @@ -0,0 +1,6 @@ +--- a/frotz ++++ b/frotz +@@ -1,3 +1,2 @@ + a + b +-c diff --git a/testdata/apply/t4101/diff.1-2 b/testdata/apply/t4101/diff.1-2 new file mode 100644 index 0000000..2a440a5 --- /dev/null +++ b/testdata/apply/t4101/diff.1-2 @@ -0,0 +1,8 @@ +--- a/frotz ++++ b/frotz +@@ -1,3 +1,2 @@ + a +-b +-c ++b +\ No newline at end of file diff --git a/testdata/apply/t4101/diff.1-3 b/testdata/apply/t4101/diff.1-3 new file mode 100644 index 0000000..61aff97 --- /dev/null +++ b/testdata/apply/t4101/diff.1-3 @@ -0,0 +1,8 @@ +--- a/frotz ++++ b/frotz +@@ -1,3 +1,3 @@ + a +-b + c ++b +\ No newline at end of file diff --git a/testdata/apply/t4101/diff.2-0 b/testdata/apply/t4101/diff.2-0 new file mode 100644 index 0000000..c2e71ee --- /dev/null +++ b/testdata/apply/t4101/diff.2-0 @@ -0,0 +1,7 @@ +--- a/frotz ++++ b/frotz +@@ -1,2 +1,2 @@ + a +-b +\ No newline at end of file ++b diff --git a/testdata/apply/t4101/diff.2-1 b/testdata/apply/t4101/diff.2-1 new file mode 100644 index 0000000..a66d9fd --- /dev/null +++ b/testdata/apply/t4101/diff.2-1 @@ -0,0 +1,8 @@ +--- a/frotz ++++ b/frotz +@@ -1,2 +1,3 @@ + a +-b +\ No newline at end of file ++b ++c diff --git a/testdata/apply/t4101/diff.2-3 b/testdata/apply/t4101/diff.2-3 new file mode 100644 index 0000000..5633c83 --- /dev/null +++ b/testdata/apply/t4101/diff.2-3 @@ -0,0 +1,7 @@ +--- a/frotz ++++ b/frotz +@@ -1,2 +1,3 @@ + a ++c + b +\ No newline at end of file diff --git a/testdata/apply/t4101/diff.3-0 b/testdata/apply/t4101/diff.3-0 new file mode 100644 index 0000000..10b1a41 --- /dev/null +++ b/testdata/apply/t4101/diff.3-0 @@ -0,0 +1,8 @@ +--- a/frotz ++++ b/frotz +@@ -1,3 +1,2 @@ + a +-c +-b +\ No newline at end of file ++b diff --git a/testdata/apply/t4101/diff.3-1 b/testdata/apply/t4101/diff.3-1 new file mode 100644 index 0000000..c799c60 --- /dev/null +++ b/testdata/apply/t4101/diff.3-1 @@ -0,0 +1,8 @@ +--- a/frotz ++++ b/frotz +@@ -1,3 +1,3 @@ + a ++b + c +-b +\ No newline at end of file diff --git a/testdata/apply/t4101/diff.3-2 b/testdata/apply/t4101/diff.3-2 new file mode 100644 index 0000000..f8d1ba6 --- /dev/null +++ b/testdata/apply/t4101/diff.3-2 @@ -0,0 +1,7 @@ +--- a/frotz ++++ b/frotz +@@ -1,3 +1,2 @@ + a +-c + b +\ No newline at end of file diff --git a/testdata/apply/text_fragment_add_end.out b/testdata/apply/text_fragment_add_end.out new file mode 100644 index 0000000..648fd44 --- /dev/null +++ b/testdata/apply/text_fragment_add_end.out @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +new line a +new line b diff --git a/testdata/apply/text_fragment_add_end.patch b/testdata/apply/text_fragment_add_end.patch new file mode 100644 index 0000000..de708be --- /dev/null +++ b/testdata/apply/text_fragment_add_end.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/fragment_add_end.src b/gitdiff/testdata/apply/fragment_add_end.src +--- a/gitdiff/testdata/apply/fragment_add_end.src ++++ b/gitdiff/testdata/apply/fragment_add_end.src +@@ -1,3 +1,5 @@ + line 1 + line 2 + line 3 ++new line a ++new line b diff --git a/testdata/apply/text_fragment_add_end.src b/testdata/apply/text_fragment_add_end.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/testdata/apply/text_fragment_add_end.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/testdata/apply/text_fragment_add_end_noeol.out b/testdata/apply/text_fragment_add_end_noeol.out new file mode 100644 index 0000000..94c99a3 --- /dev/null +++ b/testdata/apply/text_fragment_add_end_noeol.out @@ -0,0 +1,5 @@ +line 1 +line 2 +line 3 +line 4 +line 5 diff --git a/testdata/apply/text_fragment_add_end_noeol.patch b/testdata/apply/text_fragment_add_end_noeol.patch new file mode 100644 index 0000000..ec3cea4 --- /dev/null +++ b/testdata/apply/text_fragment_add_end_noeol.patch @@ -0,0 +1,11 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_add_end_noeol.src b/gitdiff/testdata/apply/text_fragment_add_end_noeol.src +--- a/gitdiff/testdata/apply/text_fragment_add_end_noeol.src ++++ b/gitdiff/testdata/apply/text_fragment_add_end_noeol.src +@@ -1,3 +1,5 @@ + line 1 + line 2 +-line 3 +\ No newline at end of file ++line 3 ++line 4 ++line 5 diff --git a/testdata/apply/text_fragment_add_end_noeol.src b/testdata/apply/text_fragment_add_end_noeol.src new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/testdata/apply/text_fragment_add_end_noeol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 \ No newline at end of file diff --git a/testdata/apply/text_fragment_add_middle.out b/testdata/apply/text_fragment_add_middle.out new file mode 100644 index 0000000..ded20d8 --- /dev/null +++ b/testdata/apply/text_fragment_add_middle.out @@ -0,0 +1,5 @@ +line 1 +line 2 +new line a +new line b +line 3 diff --git a/testdata/apply/text_fragment_add_middle.patch b/testdata/apply/text_fragment_add_middle.patch new file mode 100644 index 0000000..43aee3b --- /dev/null +++ b/testdata/apply/text_fragment_add_middle.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/fragment_add_middle.src b/gitdiff/testdata/apply/fragment_add_middle.src +--- a/gitdiff/testdata/apply/fragment_add_middle.src ++++ b/gitdiff/testdata/apply/fragment_add_middle.src +@@ -1,3 +1,5 @@ + line 1 + line 2 ++new line a ++new line b + line 3 diff --git a/testdata/apply/text_fragment_add_middle.src b/testdata/apply/text_fragment_add_middle.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/testdata/apply/text_fragment_add_middle.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/testdata/apply/text_fragment_add_start.out b/testdata/apply/text_fragment_add_start.out new file mode 100644 index 0000000..b153f60 --- /dev/null +++ b/testdata/apply/text_fragment_add_start.out @@ -0,0 +1,4 @@ +new line a +line 1 +line 2 +line 3 diff --git a/testdata/apply/text_fragment_add_start.patch b/testdata/apply/text_fragment_add_start.patch new file mode 100644 index 0000000..5218764 --- /dev/null +++ b/testdata/apply/text_fragment_add_start.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/fragment_add_start.src b/gitdiff/testdata/apply/fragment_add_start.src +--- a/gitdiff/testdata/apply/fragment_add_start.src ++++ b/gitdiff/testdata/apply/fragment_add_start.src +@@ -1,3 +1,4 @@ ++new line a + line 1 + line 2 + line 3 diff --git a/testdata/apply/text_fragment_add_start.src b/testdata/apply/text_fragment_add_start.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/testdata/apply/text_fragment_add_start.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/testdata/apply/text_fragment_change_end.out b/testdata/apply/text_fragment_change_end.out new file mode 100644 index 0000000..e3cbece --- /dev/null +++ b/testdata/apply/text_fragment_change_end.out @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +new line a diff --git a/testdata/apply/text_fragment_change_end.patch b/testdata/apply/text_fragment_change_end.patch new file mode 100644 index 0000000..5655880 --- /dev/null +++ b/testdata/apply/text_fragment_change_end.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_end.src b/gitdiff/testdata/apply/text_fragment_change_end.src +--- a/gitdiff/testdata/apply/text_fragment_change_end.src ++++ b/gitdiff/testdata/apply/text_fragment_change_end.src +@@ -7,4 +7,4 @@ line 6 + line 7 + line 8 + line 9 +-line 10 ++new line a diff --git a/testdata/apply/text_fragment_change_end.src b/testdata/apply/text_fragment_change_end.src new file mode 100644 index 0000000..fa2da6e --- /dev/null +++ b/testdata/apply/text_fragment_change_end.src @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 diff --git a/testdata/apply/text_fragment_change_end_eol.out b/testdata/apply/text_fragment_change_end_eol.out new file mode 100644 index 0000000..8cf2f17 --- /dev/null +++ b/testdata/apply/text_fragment_change_end_eol.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 \ No newline at end of file diff --git a/testdata/apply/text_fragment_change_end_eol.patch b/testdata/apply/text_fragment_change_end_eol.patch new file mode 100644 index 0000000..f1c9477 --- /dev/null +++ b/testdata/apply/text_fragment_change_end_eol.patch @@ -0,0 +1,10 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +index a92d664..8cf2f17 100644 +--- a/gitdiff/testdata/apply/text_fragment_remove_last_eol.src ++++ b/gitdiff/testdata/apply/text_fragment_remove_last_eol.src +@@ -1,3 +1,3 @@ + line 1 + line 2 +-line 3 ++line 3 +\ No newline at end of file diff --git a/testdata/apply/text_fragment_change_end_eol.src b/testdata/apply/text_fragment_change_end_eol.src new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/testdata/apply/text_fragment_change_end_eol.src @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/testdata/apply/text_fragment_change_exact.out b/testdata/apply/text_fragment_change_exact.out new file mode 100644 index 0000000..4655a0a --- /dev/null +++ b/testdata/apply/text_fragment_change_exact.out @@ -0,0 +1,19 @@ +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +new line a +line +line +line diff --git a/testdata/apply/text_fragment_change_exact.patch b/testdata/apply/text_fragment_change_exact.patch new file mode 100644 index 0000000..395de4d --- /dev/null +++ b/testdata/apply/text_fragment_change_exact.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_exact.src b/gitdiff/testdata/apply/text_fragment_change_exact.src +--- a/gitdiff/testdata/apply/text_fragment_change_exact.src ++++ b/gitdiff/testdata/apply/text_fragment_change_exact.src +@@ -13,7 +13,7 @@ line + line + line + line +-line ++new line a + line + line + line diff --git a/testdata/apply/text_fragment_change_exact.src b/testdata/apply/text_fragment_change_exact.src new file mode 100644 index 0000000..316a8f0 --- /dev/null +++ b/testdata/apply/text_fragment_change_exact.src @@ -0,0 +1,30 @@ +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line +line diff --git a/testdata/apply/text_fragment_change_middle.out b/testdata/apply/text_fragment_change_middle.out new file mode 100644 index 0000000..fd0a9ad --- /dev/null +++ b/testdata/apply/text_fragment_change_middle.out @@ -0,0 +1,9 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +new line a +line 7 +line 8 +line 9 diff --git a/testdata/apply/text_fragment_change_middle.patch b/testdata/apply/text_fragment_change_middle.patch new file mode 100644 index 0000000..139a0fe --- /dev/null +++ b/testdata/apply/text_fragment_change_middle.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_middle.src b/gitdiff/testdata/apply/text_fragment_change_middle.src +--- a/gitdiff/testdata/apply/text_fragment_change_middle.src ++++ b/gitdiff/testdata/apply/text_fragment_change_middle.src +@@ -3,7 +3,7 @@ line 2 + line 3 + line 4 + line 5 +-line 6 ++new line a + line 7 + line 8 + line 9 diff --git a/testdata/apply/text_fragment_change_middle.src b/testdata/apply/text_fragment_change_middle.src new file mode 100644 index 0000000..fa2da6e --- /dev/null +++ b/testdata/apply/text_fragment_change_middle.src @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 diff --git a/testdata/apply/text_fragment_change_single_noeol.out b/testdata/apply/text_fragment_change_single_noeol.out new file mode 100644 index 0000000..ed59e08 --- /dev/null +++ b/testdata/apply/text_fragment_change_single_noeol.out @@ -0,0 +1 @@ +new line a \ No newline at end of file diff --git a/testdata/apply/text_fragment_change_single_noeol.patch b/testdata/apply/text_fragment_change_single_noeol.patch new file mode 100644 index 0000000..f945234 --- /dev/null +++ b/testdata/apply/text_fragment_change_single_noeol.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_single_noeol.src b/gitdiff/testdata/apply/text_fragment_change_single_noeol.src +--- a/gitdiff/testdata/apply/text_fragment_change_single_noeol.src ++++ b/gitdiff/testdata/apply/text_fragment_change_single_noeol.src +@@ -1 +1 @@ +-line 1 +\ No newline at end of file ++new line a +\ No newline at end of file diff --git a/testdata/apply/text_fragment_change_single_noeol.src b/testdata/apply/text_fragment_change_single_noeol.src new file mode 100644 index 0000000..dcf168c --- /dev/null +++ b/testdata/apply/text_fragment_change_single_noeol.src @@ -0,0 +1 @@ +line 1 \ No newline at end of file diff --git a/testdata/apply/text_fragment_change_start.out b/testdata/apply/text_fragment_change_start.out new file mode 100644 index 0000000..5156941 --- /dev/null +++ b/testdata/apply/text_fragment_change_start.out @@ -0,0 +1,4 @@ +new line a +line 2 +line 3 +line 4 diff --git a/testdata/apply/text_fragment_change_start.patch b/testdata/apply/text_fragment_change_start.patch new file mode 100644 index 0000000..d0a6653 --- /dev/null +++ b/testdata/apply/text_fragment_change_start.patch @@ -0,0 +1,9 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_change_start.src b/gitdiff/testdata/apply/text_fragment_change_start.src +--- a/gitdiff/testdata/apply/text_fragment_change_start.src ++++ b/gitdiff/testdata/apply/text_fragment_change_start.src +@@ -1,4 +1,4 @@ +-line 1 ++new line a + line 2 + line 3 + line 4 diff --git a/testdata/apply/text_fragment_change_start.src b/testdata/apply/text_fragment_change_start.src new file mode 100644 index 0000000..fa2da6e --- /dev/null +++ b/testdata/apply/text_fragment_change_start.src @@ -0,0 +1,10 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 diff --git a/testdata/apply/text_fragment_delete_all.out b/testdata/apply/text_fragment_delete_all.out new file mode 100644 index 0000000..e69de29 diff --git a/testdata/apply/text_fragment_delete_all.patch b/testdata/apply/text_fragment_delete_all.patch new file mode 100644 index 0000000..8a2fb9c --- /dev/null +++ b/testdata/apply/text_fragment_delete_all.patch @@ -0,0 +1,8 @@ +diff --git a/gitdiff/testdata/apply/fragment_delete_all.src b/gitdiff/testdata/apply/fragment_delete_all.src +--- a/gitdiff/testdata/apply/fragment_delete_all.src ++++ b/gitdiff/testdata/apply/fragment_delete_all.src +@@ -1,4 +0,0 @@ +-line a +-line b +-line c +-line d diff --git a/testdata/apply/text_fragment_delete_all.src b/testdata/apply/text_fragment_delete_all.src new file mode 100644 index 0000000..47d03ac --- /dev/null +++ b/testdata/apply/text_fragment_delete_all.src @@ -0,0 +1,4 @@ +line a +line b +line c +line d diff --git a/testdata/apply/text_fragment_error.src b/testdata/apply/text_fragment_error.src new file mode 100644 index 0000000..f8b6f0a --- /dev/null +++ b/testdata/apply/text_fragment_error.src @@ -0,0 +1,13 @@ +line 1 +line 2 +line 3 +line 4 +line 5 +line 6 +line 7 +line 8 +line 9 +line 10 +line 11 +line 12 +line 13 diff --git a/testdata/apply/text_fragment_error_context_conflict.patch b/testdata/apply/text_fragment_error_context_conflict.patch new file mode 100644 index 0000000..a262796 --- /dev/null +++ b/testdata/apply/text_fragment_error_context_conflict.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -4,7 +4,7 @@ line 3 + line 4 + line 5 + line conflict +-line 7 ++new line a + line 8 + line 9 + line 10 diff --git a/testdata/apply/text_fragment_error_delete_conflict.patch b/testdata/apply/text_fragment_error_delete_conflict.patch new file mode 100644 index 0000000..17ea166 --- /dev/null +++ b/testdata/apply/text_fragment_error_delete_conflict.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -4,7 +4,7 @@ line 3 + line 4 + line 5 + line 6 +-line conflict ++new line a + line 8 + line 9 + line 10 diff --git a/testdata/apply/text_fragment_error_new_file.patch b/testdata/apply/text_fragment_error_new_file.patch new file mode 100644 index 0000000..f4fbee6 --- /dev/null +++ b/testdata/apply/text_fragment_error_new_file.patch @@ -0,0 +1,7 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -0,0 +1,3 @@ ++line 1 ++line 2 ++line 3 diff --git a/testdata/apply/text_fragment_error_short_src.patch b/testdata/apply/text_fragment_error_short_src.patch new file mode 100644 index 0000000..bfe7b96 --- /dev/null +++ b/testdata/apply/text_fragment_error_short_src.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -9,7 +9,7 @@ line 8 + line 9 + line 10 + line 11 +-line 12 ++new line a + line 13 + line 14 + line 15 diff --git a/testdata/apply/text_fragment_error_short_src_before.patch b/testdata/apply/text_fragment_error_short_src_before.patch new file mode 100644 index 0000000..0a96018 --- /dev/null +++ b/testdata/apply/text_fragment_error_short_src_before.patch @@ -0,0 +1,12 @@ +diff --git a/gitdiff/testdata/apply/text_fragment_error.src b/gitdiff/testdata/apply/text_fragment_error.src +--- a/gitdiff/testdata/apply/text_fragment_error.src ++++ b/gitdiff/testdata/apply/text_fragment_error.src +@@ -15,7 +15,7 @@ line 14 + line 15 + line 16 + line 17 +-line 18 ++new line a + line 19 + line 20 + line 21 diff --git a/testdata/apply/text_fragment_new.out b/testdata/apply/text_fragment_new.out new file mode 100644 index 0000000..a92d664 --- /dev/null +++ b/testdata/apply/text_fragment_new.out @@ -0,0 +1,3 @@ +line 1 +line 2 +line 3 diff --git a/testdata/apply/text_fragment_new.patch b/testdata/apply/text_fragment_new.patch new file mode 100644 index 0000000..c87487b --- /dev/null +++ b/testdata/apply/text_fragment_new.patch @@ -0,0 +1,7 @@ +diff --git a/gitdiff/testdata/apply/fragment_new.src b/gitdiff/testdata/apply/fragment_new.src +--- a/gitdiff/testdata/apply/fragment_new.src ++++ b/gitdiff/testdata/apply/fragment_new.src +@@ -0,0 +1,3 @@ ++line 1 ++line 2 ++line 3 diff --git a/testdata/apply/text_fragment_new.src b/testdata/apply/text_fragment_new.src new file mode 100644 index 0000000..e69de29 From 170621dce613e07e6e01fe03b873301972846f71 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 15:44:27 +0000 Subject: [PATCH 02/20] chore: preserve feat/apply local state before integration --- apply.go | 51 +- apply_test.go | 43 +- ref/git-apply.adoc | 299 +++ ref/git-apply.c | 5232 ++++++++++++++++++++++++++++++++++++++++++++ ref/git-apply.h | 190 ++ 5 files changed, 5802 insertions(+), 13 deletions(-) create mode 100644 ref/git-apply.adoc create mode 100644 ref/git-apply.c create mode 100644 ref/git-apply.h diff --git a/apply.go b/apply.go index b710eaa..dcdd859 100644 --- a/apply.go +++ b/apply.go @@ -10,6 +10,44 @@ import ( var ErrPatchConflict = errors.New("patch conflict") +type ConflictLabels struct { + Current string + Incoming string +} + +type ApplyOptions struct { + ConflictLabels ConflictLabels +} + +// PatchApply holds apply-time configuration and mirrors Git's stateful apply design. +type PatchApply struct { + options ApplyOptions +} + +func DefaultApplyOptions() ApplyOptions { + return ApplyOptions{ + ConflictLabels: ConflictLabels{ + Current: "Current", + Incoming: "Incoming patch", + }, + } +} + +func NewPatchApply(options ApplyOptions) *PatchApply { + return &PatchApply{options: normalizeApplyOptions(options)} +} + +func normalizeApplyOptions(options ApplyOptions) ApplyOptions { + defaults := DefaultApplyOptions() + if options.ConflictLabels.Current == "" { + options.ConflictLabels.Current = defaults.ConflictLabels.Current + } + if options.ConflictLabels.Incoming == "" { + options.ConflictLabels.Incoming = defaults.ConflictLabels.Incoming + } + return options +} + type ConflictError struct { ConflictingHunks int } @@ -49,6 +87,10 @@ type fileLine struct { var hunkHeaderPattern = regexp.MustCompile(`^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@`) func ApplyFile(pristine, patchData []byte) ([]byte, error) { + return NewPatchApply(DefaultApplyOptions()).ApplyFile(pristine, patchData) +} + +func (p *PatchApply) ApplyFile(pristine, patchData []byte) ([]byte, error) { normalizedPatch := normalizePatchForValidation(patchData) if err := validateSingleFilePatch(normalizedPatch); err != nil { return nil, err @@ -87,7 +129,7 @@ func ApplyFile(pristine, patchData []byte) ([]byte, error) { } outLines = appendSourceLines(outLines, sourceLines[cursor:conflictStart]...) - outLines = appendConflict(outLines, sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) + outLines = p.appendConflict(outLines, sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) cursor = conflictEnd continue } @@ -320,14 +362,15 @@ func desiredLines(hunk patchHunk) []fileLine { return lines } -func appendConflict(out []fileLine, ours, theirs []fileLine) []fileLine { - out = append(out, fileLine{text: "<<<<<<< Current (Your changes)", hasNewline: true}) +func (p *PatchApply) appendConflict(out []fileLine, ours, theirs []fileLine) []fileLine { + labels := p.options.ConflictLabels + out = append(out, fileLine{text: "<<<<<<< " + labels.Current, hasNewline: true}) out = appendSourceLines(out, ours...) out = ensureTrailingNewline(out) out = append(out, fileLine{text: "=======", hasNewline: true}) out = appendSourceLines(out, theirs...) out = ensureTrailingNewline(out) - out = append(out, fileLine{text: ">>>>>>> New (Generated by Speakeasy)", hasNewline: true}) + out = append(out, fileLine{text: ">>>>>>> " + labels.Incoming, hasNewline: true}) return out } diff --git a/apply_test.go b/apply_test.go index 3824c11..b11fa75 100644 --- a/apply_test.go +++ b/apply_test.go @@ -19,6 +19,11 @@ type applyFixtureFiles struct { out []byte } +const ( + defaultCurrentConflictMarker = "<<<<<<< Current" + defaultIncomingConflictMarker = ">>>>>>> Incoming patch" +) + func TestApplyFile_TextFixtures(t *testing.T) { t.Parallel() @@ -61,8 +66,8 @@ func TestApplyFile_TextFixtures(t *testing.T) { var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") - assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) + assert.Contains(t, string(applied), defaultIncomingConflictMarker) } return } @@ -287,7 +292,7 @@ func TestApplyFile_DamagedContextPatchesConflictWithoutFuzz(t *testing.T) { require.Error(t, err) var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } } @@ -424,7 +429,7 @@ func TestApplyFile_MultipleHunksOneConflict(t *testing.T) { var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) assert.Contains(t, string(applied), "line two") - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) assert.Contains(t, string(applied), "line VI") assert.Contains(t, string(applied), "line six") } @@ -441,11 +446,31 @@ func TestApplyFile_ReturnsConflictMarkers(t *testing.T) { var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") - assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) + assert.Contains(t, string(applied), defaultIncomingConflictMarker) assert.Contains(t, string(applied), "func (s *Status) String() string") } +func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { + t.Parallel() + + base := []byte("package testsdk\n\ntype Status struct{}\n") + current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") + patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) + + applier := git_diff_parser.NewPatchApply(git_diff_parser.ApplyOptions{ + ConflictLabels: git_diff_parser.ConflictLabels{ + Current: "Current (Your changes)", + Incoming: "New (Generated by Speakeasy)", + }, + }) + + applied, err := applier.ApplyFile(current, patchData) + require.Error(t, err) + assert.Contains(t, string(applied), defaultCurrentConflictMarker) + assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") +} + func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) { t.Parallel() @@ -489,7 +514,7 @@ func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) require.Error(t, err) var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } } @@ -539,7 +564,7 @@ func TestApplyFile_RejectsAlreadyAppliedMiddlePatches(t *testing.T) { require.Error(t, err) var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } } @@ -738,7 +763,7 @@ func TestApplyFile_ShrinkFailures(t *testing.T) { require.Error(t, err) var conflictErr *git_diff_parser.ConflictError require.ErrorAs(t, err, &conflictErr) - assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") + assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } } diff --git a/ref/git-apply.adoc b/ref/git-apply.adoc new file mode 100644 index 0000000..6c71ee6 --- /dev/null +++ b/ref/git-apply.adoc @@ -0,0 +1,299 @@ +git-apply(1) +============ + +NAME +---- +git-apply - Apply a patch to files and/or to the index + + +SYNOPSIS +-------- +[verse] +'git apply' [--stat] [--numstat] [--summary] [--check] + [--index | --intent-to-add] [--3way] [--ours | --theirs | --union] + [--apply] [--no-add] [--build-fake-ancestor=] [-R | --reverse] + [--allow-binary-replacement | --binary] [--reject] [-z] + [-p] [-C] [--inaccurate-eof] [--recount] [--cached] + [--ignore-space-change | --ignore-whitespace] + [--whitespace=(nowarn|warn|fix|error|error-all)] + [--exclude=] [--include=] [--directory=] + [--verbose | --quiet] [--unsafe-paths] [--allow-empty] [...] + +DESCRIPTION +----------- +Reads the supplied diff output (i.e. "a patch") and applies it to files. +When running from a subdirectory in a repository, patched paths +outside the directory are ignored. +With the `--index` option, the patch is also applied to the index, and +with the `--cached` option, the patch is only applied to the index. +Without these options, the command applies the patch only to files, +and does not require them to be in a Git repository. + +This command applies the patch but does not create a commit. Use +linkgit:git-am[1] to create commits from patches generated by +linkgit:git-format-patch[1] and/or received by email. + +OPTIONS +------- +...:: + The files to read the patch from. '-' can be used to read + from the standard input. + +--stat:: + Instead of applying the patch, output diffstat for the + input. Turns off "apply". + +--numstat:: + Similar to `--stat`, but shows the number of added and + deleted lines in decimal notation and the pathname without + abbreviation, to make it more machine friendly. For + binary files, outputs two `-` instead of saying + `0 0`. Turns off "apply". + +--summary:: + Instead of applying the patch, output a condensed + summary of information obtained from git diff extended + headers, such as creations, renames, and mode changes. + Turns off "apply". + +--check:: + Instead of applying the patch, see if the patch is + applicable to the current working tree and/or the index + file and detects errors. Turns off "apply". + +--index:: + Apply the patch to both the index and the working tree (or + merely check that it would apply cleanly to both if `--check` is + in effect). Note that `--index` expects index entries and + working tree copies for relevant paths to be identical (their + contents and metadata such as file mode must match), and will + raise an error if they are not, even if the patch would apply + cleanly to both the index and the working tree in isolation. + +--cached:: + Apply the patch to just the index, without touching the working + tree. If `--check` is in effect, merely check that it would + apply cleanly to the index entry. + +-N:: +--intent-to-add:: + When applying the patch only to the working tree, mark new + files to be added to the index later (see `--intent-to-add` + option in linkgit:git-add[1]). This option is ignored if + `--index` or `--cached` are used, and has no effect outside a Git + repository. Note that `--index` could be implied by other options + such as `--3way`. + +-3:: +--3way:: + Attempt 3-way merge if the patch records the identity of blobs it is supposed + to apply to and we have those blobs available locally, possibly leaving the + conflict markers in the files in the working tree for the user to + resolve. This option implies the `--index` option unless the + `--cached` option is used, and is incompatible with the `--reject` option. + When used with the `--cached` option, any conflicts are left at higher stages + in the cache. + +--ours:: +--theirs:: +--union:: + Instead of leaving conflicts in the file, resolve conflicts favouring + our (or their or both) side of the lines. Requires --3way. + +--build-fake-ancestor=:: + Newer 'git diff' output has embedded 'index information' + for each blob to help identify the original version that + the patch applies to. When this flag is given, and if + the original versions of the blobs are available locally, + builds a temporary index containing those blobs. ++ +When a pure mode change is encountered (which has no index information), +the information is read from the current index instead. + +-R:: +--reverse:: + Apply the patch in reverse. + +--reject:: + For atomicity, 'git apply' by default fails the whole patch and + does not touch the working tree when some of the hunks + do not apply. This option makes it apply + the parts of the patch that are applicable, and leave the + rejected hunks in corresponding *.rej files. + +-z:: + When `--numstat` has been given, do not munge pathnames, + but use a NUL-terminated machine-readable format. ++ +Without this option, pathnames with "unusual" characters are quoted as +explained for the configuration variable `core.quotePath` (see +linkgit:git-config[1]). + +-p:: + Remove leading path components (separated by slashes) from + traditional diff paths. E.g., with `-p2`, a patch against + `a/dir/file` will be applied directly to `file`. The default is + 1. + +-C:: + Ensure at least lines of surrounding context match before + and after each change. When fewer lines of surrounding + context exist they all must match. By default no context is + ever ignored. + +--unidiff-zero:: + By default, 'git apply' expects that the patch being + applied is a unified diff with at least one line of context. + This provides good safety measures, but breaks down when + applying a diff generated with `--unified=0`. To bypass these + checks use `--unidiff-zero`. ++ +Note, for the reasons stated above, the usage of context-free patches is +discouraged. + +--apply:: + If you use any of the options marked "Turns off + 'apply'" above, 'git apply' reads and outputs the + requested information without actually applying the + patch. Give this flag after those flags to also apply + the patch. + +--no-add:: + When applying a patch, ignore additions made by the + patch. This can be used to extract the common part between + two files by first running 'diff' on them and applying + the result with this option, which would apply the + deletion part but not the addition part. + +--allow-binary-replacement:: +--binary:: + Historically we did not allow binary patch application + without an explicit permission from the user, and this + flag was the way to do so. Currently, we always allow binary + patch application, so this is a no-op. + +--exclude=:: + Don't apply changes to files matching the given path pattern. This can + be useful when importing patchsets, where you want to exclude certain + files or directories. + +--include=:: + Apply changes to files matching the given path pattern. This can + be useful when importing patchsets, where you want to include certain + files or directories. ++ +When `--exclude` and `--include` patterns are used, they are examined in the +order they appear on the command line, and the first match determines if a +patch to each path is used. A patch to a path that does not match any +include/exclude pattern is used by default if there is no include pattern +on the command line, and ignored if there is any include pattern. + +--ignore-space-change:: +--ignore-whitespace:: + When applying a patch, ignore changes in whitespace in context + lines if necessary. + Context lines will preserve their whitespace, and they will not + undergo whitespace fixing regardless of the value of the + `--whitespace` option. New lines will still be fixed, though. + +--whitespace=:: + When applying a patch, detect a new or modified line that has + whitespace errors. What are considered whitespace errors is + controlled by `core.whitespace` configuration. By default, + trailing whitespaces (including lines that solely consist of + whitespaces) and a space character that is immediately followed + by a tab character inside the initial indent of the line are + considered whitespace errors. ++ +By default, the command outputs warning messages but applies the patch. +When `git-apply` is used for statistics and not applying a +patch, it defaults to `nowarn`. ++ +You can use different `` values to control this +behavior: ++ +* `nowarn` turns off the trailing whitespace warning. +* `warn` outputs warnings for a few such errors, but applies the + patch as-is (default). +* `fix` outputs warnings for a few such errors, and applies the + patch after fixing them (`strip` is a synonym -- the tool + used to consider only trailing whitespace characters as errors, and the + fix involved 'stripping' them, but modern Gits do more). +* `error` outputs warnings for a few such errors, and refuses + to apply the patch. +* `error-all` is similar to `error` but shows all errors. + +--inaccurate-eof:: + Under certain circumstances, some versions of 'diff' do not correctly + detect a missing new-line at the end of the file. As a result, patches + created by such 'diff' programs do not record incomplete lines + correctly. This option adds support for applying such patches by + working around this bug. + +-v:: +--verbose:: + Report progress to stderr. By default, only a message about the + current patch being applied will be printed. This option will cause + additional information to be reported. + +-q:: +--quiet:: + Suppress stderr output. Messages about patch status and progress + will not be printed. + +--recount:: + Do not trust the line counts in the hunk headers, but infer them + by inspecting the patch (e.g. after editing the patch without + adjusting the hunk headers appropriately). + +--directory=:: + Prepend to all filenames. If a "-p" argument was also passed, + it is applied before prepending the new root. ++ +For example, a patch that talks about updating `a/git-gui.sh` to `b/git-gui.sh` +can be applied to the file in the working tree `modules/git-gui/git-gui.sh` by +running `git apply --directory=modules/git-gui`. + +--unsafe-paths:: + By default, a patch that affects outside the working area + (either a Git controlled working tree, or the current working + directory when "git apply" is used as a replacement of GNU + patch) is rejected as a mistake (or a mischief). ++ +When `git apply` is used as a "better GNU patch", the user can pass +the `--unsafe-paths` option to override this safety check. This option +has no effect when `--index` or `--cached` is in use. + +--allow-empty:: + Don't return an error for patches containing no diff. This includes + empty patches and patches with commit text only. + +CONFIGURATION +------------- + +include::includes/cmd-config-section-all.adoc[] + +include::config/apply.adoc[] + +SUBMODULES +---------- +If the patch contains any changes to submodules then 'git apply' +treats these changes as follows. + +If `--index` is specified (explicitly or implicitly), then the submodule +commits must match the index exactly for the patch to apply. If any +of the submodules are checked-out, then these check-outs are completely +ignored, i.e., they are not required to be up to date or clean and they +are not updated. + +If `--index` is not specified, then the submodule commits in the patch +are ignored and only the absence or presence of the corresponding +subdirectory is checked and (if possible) updated. + +SEE ALSO +-------- +linkgit:git-am[1]. + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/ref/git-apply.c b/ref/git-apply.c new file mode 100644 index 0000000..b6dd106 --- /dev/null +++ b/ref/git-apply.c @@ -0,0 +1,5232 @@ +/* + * apply.c + * + * Copyright (C) Linus Torvalds, 2005 + * + * This applies patches on top of some (arbitrary) version of the SCM. + * + */ + +#define USE_THE_REPOSITORY_VARIABLE +#define DISABLE_SIGN_COMPARE_WARNINGS + +#include "git-compat-util.h" +#include "abspath.h" +#include "base85.h" +#include "config.h" +#include "odb.h" +#include "delta.h" +#include "diff.h" +#include "dir.h" +#include "environment.h" +#include "gettext.h" +#include "hex.h" +#include "xdiff-interface.h" +#include "merge-ll.h" +#include "lockfile.h" +#include "name-hash.h" +#include "object-name.h" +#include "object-file.h" +#include "parse-options.h" +#include "path.h" +#include "quote.h" +#include "read-cache.h" +#include "repository.h" +#include "rerere.h" +#include "apply.h" +#include "entry.h" +#include "setup.h" +#include "symlinks.h" +#include "wildmatch.h" +#include "ws.h" + +struct gitdiff_data { + struct strbuf *root; + int linenr; + int p_value; +}; + +static void git_apply_config(void) +{ + repo_config_get_string(the_repository, "apply.whitespace", &apply_default_whitespace); + repo_config_get_string(the_repository, "apply.ignorewhitespace", &apply_default_ignorewhitespace); + repo_config(the_repository, git_xmerge_config, NULL); +} + +static int parse_whitespace_option(struct apply_state *state, const char *option) +{ + if (!option) { + state->ws_error_action = warn_on_ws_error; + return 0; + } + if (!strcmp(option, "warn")) { + state->ws_error_action = warn_on_ws_error; + return 0; + } + if (!strcmp(option, "nowarn")) { + state->ws_error_action = nowarn_ws_error; + return 0; + } + if (!strcmp(option, "error")) { + state->ws_error_action = die_on_ws_error; + return 0; + } + if (!strcmp(option, "error-all")) { + state->ws_error_action = die_on_ws_error; + state->squelch_whitespace_errors = 0; + return 0; + } + if (!strcmp(option, "strip") || !strcmp(option, "fix")) { + state->ws_error_action = correct_ws_error; + return 0; + } + /* + * Please update $__git_whitespacelist in git-completion.bash, + * Documentation/git-apply.adoc, and Documentation/git-am.adoc + * when you add new options. + */ + return error(_("unrecognized whitespace option '%s'"), option); +} + +static int parse_ignorewhitespace_option(struct apply_state *state, + const char *option) +{ + if (!option || !strcmp(option, "no") || + !strcmp(option, "false") || !strcmp(option, "never") || + !strcmp(option, "none")) { + state->ws_ignore_action = ignore_ws_none; + return 0; + } + if (!strcmp(option, "change")) { + state->ws_ignore_action = ignore_ws_change; + return 0; + } + return error(_("unrecognized whitespace ignore option '%s'"), option); +} + +int init_apply_state(struct apply_state *state, + struct repository *repo, + const char *prefix) +{ + memset(state, 0, sizeof(*state)); + state->prefix = prefix; + state->repo = repo; + state->apply = 1; + state->line_termination = '\n'; + state->p_value = 1; + state->p_context = UINT_MAX; + state->squelch_whitespace_errors = 5; + state->ws_error_action = warn_on_ws_error; + state->ws_ignore_action = ignore_ws_none; + state->linenr = 1; + string_list_init_nodup(&state->fn_table); + string_list_init_nodup(&state->limit_by_name); + strset_init(&state->removed_symlinks); + strset_init(&state->kept_symlinks); + strbuf_init(&state->root, 0); + + git_apply_config(); + if (apply_default_whitespace && parse_whitespace_option(state, apply_default_whitespace)) + return -1; + if (apply_default_ignorewhitespace && parse_ignorewhitespace_option(state, apply_default_ignorewhitespace)) + return -1; + return 0; +} + +void clear_apply_state(struct apply_state *state) +{ + string_list_clear(&state->limit_by_name, 0); + strset_clear(&state->removed_symlinks); + strset_clear(&state->kept_symlinks); + strbuf_release(&state->root); + FREE_AND_NULL(state->fake_ancestor); + + /* &state->fn_table is cleared at the end of apply_patch() */ +} + +static void mute_routine(const char *msg UNUSED, va_list params UNUSED) +{ + /* do nothing */ +} + +int check_apply_state(struct apply_state *state, int force_apply) +{ + int is_not_gitdir = !startup_info->have_repository; + + if (state->apply_with_reject && state->threeway) + return error(_("options '%s' and '%s' cannot be used together"), "--reject", "--3way"); + if (state->threeway) { + if (is_not_gitdir) + return error(_("'%s' outside a repository"), "--3way"); + state->check_index = 1; + } + if (state->apply_with_reject) { + state->apply = 1; + if (state->apply_verbosity == verbosity_normal) + state->apply_verbosity = verbosity_verbose; + } + if (!force_apply && (state->diffstat || state->numstat || state->summary || state->check || state->fake_ancestor)) + state->apply = 0; + if (state->check_index && is_not_gitdir) + return error(_("'%s' outside a repository"), "--index"); + if (state->cached) { + if (is_not_gitdir) + return error(_("'%s' outside a repository"), "--cached"); + state->check_index = 1; + } + if (state->ita_only && (state->check_index || is_not_gitdir)) + state->ita_only = 0; + if (state->check_index) + state->unsafe_paths = 0; + + if (state->apply_verbosity <= verbosity_silent) { + state->saved_error_routine = get_error_routine(); + state->saved_warn_routine = get_warn_routine(); + set_error_routine(mute_routine); + set_warn_routine(mute_routine); + } + + return 0; +} + +static void set_default_whitespace_mode(struct apply_state *state) +{ + if (!state->whitespace_option && !apply_default_whitespace) + state->ws_error_action = (state->apply ? warn_on_ws_error : nowarn_ws_error); +} + +/* + * This represents one "hunk" from a patch, starting with + * "@@ -oldpos,oldlines +newpos,newlines @@" marker. The + * patch text is pointed at by patch, and its byte length + * is stored in size. leading and trailing are the number + * of context lines. + */ +struct fragment { + unsigned long leading, trailing; + unsigned long oldpos, oldlines; + unsigned long newpos, newlines; + /* + * 'patch' is usually borrowed from buf in apply_patch(), + * but some codepaths store an allocated buffer. + */ + const char *patch; + unsigned free_patch:1, + rejected:1; + int size; + int linenr; + struct fragment *next; +}; + +/* + * When dealing with a binary patch, we reuse "leading" field + * to store the type of the binary hunk, either deflated "delta" + * or deflated "literal". + */ +#define binary_patch_method leading +#define BINARY_DELTA_DEFLATED 1 +#define BINARY_LITERAL_DEFLATED 2 + +static void free_fragment_list(struct fragment *list) +{ + while (list) { + struct fragment *next = list->next; + if (list->free_patch) + free((char *)list->patch); + free(list); + list = next; + } +} + +void release_patch(struct patch *patch) +{ + free_fragment_list(patch->fragments); + free(patch->def_name); + free(patch->old_name); + free(patch->new_name); + free(patch->result); +} + +static void free_patch(struct patch *patch) +{ + release_patch(patch); + free(patch); +} + +static void free_patch_list(struct patch *list) +{ + while (list) { + struct patch *next = list->next; + free_patch(list); + list = next; + } +} + +/* + * A line in a file, len-bytes long (includes the terminating LF, + * except for an incomplete line at the end if the file ends with + * one), and its contents hashes to 'hash'. + */ +struct line { + size_t len; + unsigned hash : 24; + unsigned flag : 8; +#define LINE_COMMON 1 +#define LINE_PATCHED 2 +}; + +/* + * This represents a "file", which is an array of "lines". + */ +struct image { + struct strbuf buf; + struct line *line; + size_t line_nr, line_alloc; +}; +#define IMAGE_INIT { \ + .buf = STRBUF_INIT, \ +} + +static void image_init(struct image *image) +{ + struct image empty = IMAGE_INIT; + memcpy(image, &empty, sizeof(*image)); +} + +static void image_clear(struct image *image) +{ + strbuf_release(&image->buf); + free(image->line); + image_init(image); +} + +static uint32_t hash_line(const char *cp, size_t len) +{ + size_t i; + uint32_t h; + for (i = 0, h = 0; i < len; i++) { + if (!isspace(cp[i])) { + h = h * 3 + (cp[i] & 0xff); + } + } + return h; +} + +static void image_add_line(struct image *img, const char *bol, size_t len, unsigned flag) +{ + ALLOC_GROW(img->line, img->line_nr + 1, img->line_alloc); + img->line[img->line_nr].len = len; + img->line[img->line_nr].hash = hash_line(bol, len); + img->line[img->line_nr].flag = flag; + img->line_nr++; +} + +/* + * "buf" has the file contents to be patched (read from various sources). + * attach it to "image" and add line-based index to it. + * "image" now owns the "buf". + */ +static void image_prepare(struct image *image, char *buf, size_t len, + int prepare_linetable) +{ + const char *cp, *ep; + + image_clear(image); + strbuf_attach(&image->buf, buf, len, len + 1); + + if (!prepare_linetable) + return; + + ep = image->buf.buf + image->buf.len; + cp = image->buf.buf; + while (cp < ep) { + const char *next; + for (next = cp; next < ep && *next != '\n'; next++) + ; + if (next < ep) + next++; + image_add_line(image, cp, next - cp, 0); + cp = next; + } +} + +static void image_remove_first_line(struct image *img) +{ + strbuf_remove(&img->buf, 0, img->line[0].len); + img->line_nr--; + if (img->line_nr) + MOVE_ARRAY(img->line, img->line + 1, img->line_nr); +} + +static void image_remove_last_line(struct image *img) +{ + size_t last_line_len = img->line[img->line_nr - 1].len; + strbuf_setlen(&img->buf, img->buf.len - last_line_len); + img->line_nr--; +} + +/* fmt must contain _one_ %s and no other substitution */ +static void say_patch_name(FILE *output, const char *fmt, struct patch *patch) +{ + struct strbuf sb = STRBUF_INIT; + + if (patch->old_name && patch->new_name && + strcmp(patch->old_name, patch->new_name)) { + quote_c_style(patch->old_name, &sb, NULL, 0); + strbuf_addstr(&sb, " => "); + quote_c_style(patch->new_name, &sb, NULL, 0); + } else { + const char *n = patch->new_name; + if (!n) + n = patch->old_name; + quote_c_style(n, &sb, NULL, 0); + } + fprintf(output, fmt, sb.buf); + fputc('\n', output); + strbuf_release(&sb); +} + +#define SLOP (16) + +/* + * apply.c isn't equipped to handle arbitrarily large patches, because + * it intermingles `unsigned long` with `int` for the type used to store + * buffer lengths. + * + * Only process patches that are just shy of 1 GiB large in order to + * avoid any truncation or overflow issues. + */ +#define MAX_APPLY_SIZE (1024UL * 1024 * 1023) + +static int read_patch_file(struct strbuf *sb, int fd) +{ + if (strbuf_read(sb, fd, 0) < 0) + return error_errno(_("failed to read patch")); + else if (sb->len >= MAX_APPLY_SIZE) + return error(_("patch too large")); + /* + * Make sure that we have some slop in the buffer + * so that we can do speculative "memcmp" etc, and + * see to it that it is NUL-filled. + */ + strbuf_grow(sb, SLOP); + memset(sb->buf + sb->len, 0, SLOP); + return 0; +} + +static unsigned long linelen(const char *buffer, unsigned long size) +{ + unsigned long len = 0; + while (size--) { + len++; + if (*buffer++ == '\n') + break; + } + return len; +} + +static int is_dev_null(const char *str) +{ + return skip_prefix(str, "/dev/null", &str) && isspace(*str); +} + +#define TERM_SPACE 1 +#define TERM_TAB 2 + +static int name_terminate(int c, int terminate) +{ + if (c == ' ' && !(terminate & TERM_SPACE)) + return 0; + if (c == '\t' && !(terminate & TERM_TAB)) + return 0; + + return 1; +} + +/* remove double slashes to make --index work with such filenames */ +static char *squash_slash(char *name) +{ + int i = 0, j = 0; + + if (!name) + return NULL; + + while (name[i]) { + if ((name[j++] = name[i++]) == '/') + while (name[i] == '/') + i++; + } + name[j] = '\0'; + return name; +} + +static char *find_name_gnu(struct strbuf *root, + const char *line, + int p_value) +{ + struct strbuf name = STRBUF_INIT; + char *cp; + + /* + * Proposed "new-style" GNU patch/diff format; see + * https://lore.kernel.org/git/7vll0wvb2a.fsf@assigned-by-dhcp.cox.net/ + */ + if (unquote_c_style(&name, line, NULL)) { + strbuf_release(&name); + return NULL; + } + + for (cp = name.buf; p_value; p_value--) { + cp = strchr(cp, '/'); + if (!cp) { + strbuf_release(&name); + return NULL; + } + cp++; + } + + strbuf_remove(&name, 0, cp - name.buf); + if (root->len) + strbuf_insert(&name, 0, root->buf, root->len); + return squash_slash(strbuf_detach(&name, NULL)); +} + +static size_t sane_tz_len(const char *line, size_t len) +{ + const char *tz, *p; + + if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ') + return 0; + tz = line + len - strlen(" +0500"); + + if (tz[1] != '+' && tz[1] != '-') + return 0; + + for (p = tz + 2; p != line + len; p++) + if (!isdigit(*p)) + return 0; + + return line + len - tz; +} + +static size_t tz_with_colon_len(const char *line, size_t len) +{ + const char *tz, *p; + + if (len < strlen(" +08:00") || line[len - strlen(":00")] != ':') + return 0; + tz = line + len - strlen(" +08:00"); + + if (tz[0] != ' ' || (tz[1] != '+' && tz[1] != '-')) + return 0; + p = tz + 2; + if (!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++)) + return 0; + + return line + len - tz; +} + +static size_t date_len(const char *line, size_t len) +{ + const char *date, *p; + + if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-') + return 0; + p = date = line + len - strlen("72-02-05"); + + if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a date. */ + return 0; + + if (date - line >= strlen("19") && + isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */ + date -= strlen("19"); + + return line + len - date; +} + +static size_t short_time_len(const char *line, size_t len) +{ + const char *time, *p; + + if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':') + return 0; + p = time = line + len - strlen(" 07:01:32"); + + /* Permit 1-digit hours? */ + if (*p++ != ' ' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || + !isdigit(*p++) || !isdigit(*p++)) /* Not a time. */ + return 0; + + return line + len - time; +} + +static size_t fractional_time_len(const char *line, size_t len) +{ + const char *p; + size_t n; + + /* Expected format: 19:41:17.620000023 */ + if (!len || !isdigit(line[len - 1])) + return 0; + p = line + len - 1; + + /* Fractional seconds. */ + while (p > line && isdigit(*p)) + p--; + if (*p != '.') + return 0; + + /* Hours, minutes, and whole seconds. */ + n = short_time_len(line, p - line); + if (!n) + return 0; + + return line + len - p + n; +} + +static size_t trailing_spaces_len(const char *line, size_t len) +{ + const char *p; + + /* Expected format: ' ' x (1 or more) */ + if (!len || line[len - 1] != ' ') + return 0; + + p = line + len; + while (p != line) { + p--; + if (*p != ' ') + return line + len - (p + 1); + } + + /* All spaces! */ + return len; +} + +static size_t diff_timestamp_len(const char *line, size_t len) +{ + const char *end = line + len; + size_t n; + + /* + * Posix: 2010-07-05 19:41:17 + * GNU: 2010-07-05 19:41:17.620000023 -0500 + */ + + if (!isdigit(end[-1])) + return 0; + + n = sane_tz_len(line, end - line); + if (!n) + n = tz_with_colon_len(line, end - line); + end -= n; + + n = short_time_len(line, end - line); + if (!n) + n = fractional_time_len(line, end - line); + end -= n; + + n = date_len(line, end - line); + if (!n) /* No date. Too bad. */ + return 0; + end -= n; + + if (end == line) /* No space before date. */ + return 0; + if (end[-1] == '\t') { /* Success! */ + end--; + return line + len - end; + } + if (end[-1] != ' ') /* No space before date. */ + return 0; + + /* Whitespace damage. */ + end -= trailing_spaces_len(line, end - line); + return line + len - end; +} + +static char *find_name_common(struct strbuf *root, + const char *line, + const char *def, + int p_value, + const char *end, + int terminate) +{ + int len; + const char *start = NULL; + + if (p_value == 0) + start = line; + while (line != end) { + char c = *line; + + if (!end && isspace(c)) { + if (c == '\n') + break; + if (name_terminate(c, terminate)) + break; + } + line++; + if (c == '/' && !--p_value) + start = line; + } + if (!start) + return squash_slash(xstrdup_or_null(def)); + len = line - start; + if (!len) + return squash_slash(xstrdup_or_null(def)); + + /* + * Generally we prefer the shorter name, especially + * if the other one is just a variation of that with + * something else tacked on to the end (ie "file.orig" + * or "file~"). + */ + if (def) { + int deflen = strlen(def); + if (deflen < len && !strncmp(start, def, deflen)) + return squash_slash(xstrdup(def)); + } + + if (root->len) { + char *ret = xstrfmt("%s%.*s", root->buf, len, start); + return squash_slash(ret); + } + + return squash_slash(xmemdupz(start, len)); +} + +static char *find_name(struct strbuf *root, + const char *line, + char *def, + int p_value, + int terminate) +{ + if (*line == '"') { + char *name = find_name_gnu(root, line, p_value); + if (name) + return name; + } + + return find_name_common(root, line, def, p_value, NULL, terminate); +} + +static char *find_name_traditional(struct strbuf *root, + const char *line, + char *def, + int p_value) +{ + size_t len; + size_t date_len; + + if (*line == '"') { + char *name = find_name_gnu(root, line, p_value); + if (name) + return name; + } + + len = strchrnul(line, '\n') - line; + date_len = diff_timestamp_len(line, len); + if (!date_len) + return find_name_common(root, line, def, p_value, NULL, TERM_TAB); + len -= date_len; + + return find_name_common(root, line, def, p_value, line + len, 0); +} + +/* + * Given the string after "--- " or "+++ ", guess the appropriate + * p_value for the given patch. + */ +static int guess_p_value(struct apply_state *state, const char *nameline) +{ + char *name, *cp; + int val = -1; + + if (is_dev_null(nameline)) + return -1; + name = find_name_traditional(&state->root, nameline, NULL, 0); + if (!name) + return -1; + cp = strchr(name, '/'); + if (!cp) + val = 0; + else if (state->prefix) { + /* + * Does it begin with "a/$our-prefix" and such? Then this is + * very likely to apply to our directory. + */ + if (starts_with(name, state->prefix)) + val = count_slashes(state->prefix); + else { + cp++; + if (starts_with(cp, state->prefix)) + val = count_slashes(state->prefix) + 1; + } + } + free(name); + return val; +} + +/* + * Does the ---/+++ line have the POSIX timestamp after the last HT? + * GNU diff puts epoch there to signal a creation/deletion event. Is + * this such a timestamp? + */ +static int has_epoch_timestamp(const char *nameline) +{ + /* + * We are only interested in epoch timestamp; any non-zero + * fraction cannot be one, hence "(\.0+)?" in the regexp below. + * For the same reason, the date must be either 1969-12-31 or + * 1970-01-01, and the seconds part must be "00". + */ + const char stamp_regexp[] = + "^[0-2][0-9]:([0-5][0-9]):00(\\.0+)?" + " " + "([-+][0-2][0-9]:?[0-5][0-9])\n"; + const char *timestamp = NULL, *cp, *colon; + static regex_t *stamp; + regmatch_t m[10]; + int zoneoffset, epoch_hour, hour, minute; + int status; + + for (cp = nameline; *cp != '\n'; cp++) { + if (*cp == '\t') + timestamp = cp + 1; + } + if (!timestamp) + return 0; + + /* + * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31 + * (west of GMT) or 1970-01-01 (east of GMT) + */ + if (skip_prefix(timestamp, "1969-12-31 ", ×tamp)) + epoch_hour = 24; + else if (skip_prefix(timestamp, "1970-01-01 ", ×tamp)) + epoch_hour = 0; + else + return 0; + + if (!stamp) { + stamp = xmalloc(sizeof(*stamp)); + if (regcomp(stamp, stamp_regexp, REG_EXTENDED)) { + warning(_("Cannot prepare timestamp regexp %s"), + stamp_regexp); + return 0; + } + } + + status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0); + if (status) { + if (status != REG_NOMATCH) + warning(_("regexec returned %d for input: %s"), + status, timestamp); + return 0; + } + + hour = strtol(timestamp, NULL, 10); + minute = strtol(timestamp + m[1].rm_so, NULL, 10); + + zoneoffset = strtol(timestamp + m[3].rm_so + 1, (char **) &colon, 10); + if (*colon == ':') + zoneoffset = zoneoffset * 60 + strtol(colon + 1, NULL, 10); + else + zoneoffset = (zoneoffset / 100) * 60 + (zoneoffset % 100); + if (timestamp[m[3].rm_so] == '-') + zoneoffset = -zoneoffset; + + return hour * 60 + minute - zoneoffset == epoch_hour * 60; +} + +/* + * Get the name etc info from the ---/+++ lines of a traditional patch header + * + * FIXME! The end-of-filename heuristics are kind of screwy. For existing + * files, we can happily check the index for a match, but for creating a + * new file we should try to match whatever "patch" does. I have no idea. + */ +static int parse_traditional_patch(struct apply_state *state, + const char *first, + const char *second, + struct patch *patch) +{ + char *name; + + first += 4; /* skip "--- " */ + second += 4; /* skip "+++ " */ + if (!state->p_value_known) { + int p, q; + p = guess_p_value(state, first); + q = guess_p_value(state, second); + if (p < 0) p = q; + if (0 <= p && p == q) { + state->p_value = p; + state->p_value_known = 1; + } + } + if (is_dev_null(first)) { + patch->is_new = 1; + patch->is_delete = 0; + name = find_name_traditional(&state->root, second, NULL, state->p_value); + patch->new_name = name; + } else if (is_dev_null(second)) { + patch->is_new = 0; + patch->is_delete = 1; + name = find_name_traditional(&state->root, first, NULL, state->p_value); + patch->old_name = name; + } else { + char *first_name; + first_name = find_name_traditional(&state->root, first, NULL, state->p_value); + name = find_name_traditional(&state->root, second, first_name, state->p_value); + free(first_name); + if (has_epoch_timestamp(first)) { + patch->is_new = 1; + patch->is_delete = 0; + patch->new_name = name; + } else if (has_epoch_timestamp(second)) { + patch->is_new = 0; + patch->is_delete = 1; + patch->old_name = name; + } else { + patch->old_name = name; + patch->new_name = xstrdup_or_null(name); + } + } + if (!name) + return error(_("unable to find filename in patch at line %d"), state->linenr); + + return 0; +} + +static int gitdiff_hdrend(struct gitdiff_data *state UNUSED, + const char *line UNUSED, + struct patch *patch UNUSED) +{ + return 1; +} + +/* + * We're anal about diff header consistency, to make + * sure that we don't end up having strange ambiguous + * patches floating around. + * + * As a result, gitdiff_{old|new}name() will check + * their names against any previous information, just + * to make sure.. + */ +#define DIFF_OLD_NAME 0 +#define DIFF_NEW_NAME 1 + +static int gitdiff_verify_name(struct gitdiff_data *state, + const char *line, + int isnull, + char **name, + int side) +{ + if (!*name && !isnull) { + *name = find_name(state->root, line, NULL, state->p_value, TERM_TAB); + return 0; + } + + if (*name) { + char *another; + if (isnull) + return error(_("git apply: bad git-diff - expected /dev/null, got %s on line %d"), + *name, state->linenr); + another = find_name(state->root, line, NULL, state->p_value, TERM_TAB); + if (!another || strcmp(another, *name)) { + free(another); + return error((side == DIFF_NEW_NAME) ? + _("git apply: bad git-diff - inconsistent new filename on line %d") : + _("git apply: bad git-diff - inconsistent old filename on line %d"), state->linenr); + } + free(another); + } else { + if (!is_dev_null(line)) + return error(_("git apply: bad git-diff - expected /dev/null on line %d"), state->linenr); + } + + return 0; +} + +static int gitdiff_oldname(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + return gitdiff_verify_name(state, line, + patch->is_new, &patch->old_name, + DIFF_OLD_NAME); +} + +static int gitdiff_newname(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + return gitdiff_verify_name(state, line, + patch->is_delete, &patch->new_name, + DIFF_NEW_NAME); +} + +static int parse_mode_line(const char *line, int linenr, unsigned int *mode) +{ + char *end; + *mode = strtoul(line, &end, 8); + if (end == line || !isspace(*end)) + return error(_("invalid mode on line %d: %s"), linenr, line); + *mode = canon_mode(*mode); + return 0; +} + +static int gitdiff_oldmode(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + return parse_mode_line(line, state->linenr, &patch->old_mode); +} + +static int gitdiff_newmode(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + return parse_mode_line(line, state->linenr, &patch->new_mode); +} + +static int gitdiff_delete(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + patch->is_delete = 1; + free(patch->old_name); + patch->old_name = xstrdup_or_null(patch->def_name); + return gitdiff_oldmode(state, line, patch); +} + +static int gitdiff_newfile(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + patch->is_new = 1; + free(patch->new_name); + patch->new_name = xstrdup_or_null(patch->def_name); + return gitdiff_newmode(state, line, patch); +} + +static int gitdiff_copysrc(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + patch->is_copy = 1; + free(patch->old_name); + patch->old_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); + return 0; +} + +static int gitdiff_copydst(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + patch->is_copy = 1; + free(patch->new_name); + patch->new_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); + return 0; +} + +static int gitdiff_renamesrc(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + patch->is_rename = 1; + free(patch->old_name); + patch->old_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); + return 0; +} + +static int gitdiff_renamedst(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + patch->is_rename = 1; + free(patch->new_name); + patch->new_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); + return 0; +} + +static int gitdiff_similarity(struct gitdiff_data *state UNUSED, + const char *line, + struct patch *patch) +{ + unsigned long val = strtoul(line, NULL, 10); + if (val <= 100) + patch->score = val; + return 0; +} + +static int gitdiff_dissimilarity(struct gitdiff_data *state UNUSED, + const char *line, + struct patch *patch) +{ + unsigned long val = strtoul(line, NULL, 10); + if (val <= 100) + patch->score = val; + return 0; +} + +static int gitdiff_index(struct gitdiff_data *state, + const char *line, + struct patch *patch) +{ + /* + * index line is N hexadecimal, "..", N hexadecimal, + * and optional space with octal mode. + */ + const char *ptr, *eol; + int len; + const unsigned hexsz = the_hash_algo->hexsz; + + ptr = strchr(line, '.'); + if (!ptr || ptr[1] != '.' || hexsz < ptr - line) + return 0; + len = ptr - line; + memcpy(patch->old_oid_prefix, line, len); + patch->old_oid_prefix[len] = 0; + + line = ptr + 2; + ptr = strchr(line, ' '); + eol = strchrnul(line, '\n'); + + if (!ptr || eol < ptr) + ptr = eol; + len = ptr - line; + + if (hexsz < len) + return 0; + memcpy(patch->new_oid_prefix, line, len); + patch->new_oid_prefix[len] = 0; + if (*ptr == ' ') + return gitdiff_oldmode(state, ptr + 1, patch); + return 0; +} + +/* + * This is normal for a diff that doesn't change anything: we'll fall through + * into the next diff. Tell the parser to break out. + */ +static int gitdiff_unrecognized(struct gitdiff_data *state UNUSED, + const char *line UNUSED, + struct patch *patch UNUSED) +{ + return 1; +} + +/* + * Skip p_value leading components from "line"; as we do not accept + * absolute paths, return NULL in that case. + */ +static const char *skip_tree_prefix(int p_value, + const char *line, + int llen) +{ + int nslash; + int i; + + if (!p_value) + return (llen && line[0] == '/') ? NULL : line; + + nslash = p_value; + for (i = 0; i < llen; i++) { + int ch = line[i]; + if (ch == '/' && --nslash <= 0) + return (i == 0) ? NULL : &line[i + 1]; + } + return NULL; +} + +/* + * This is to extract the same name that appears on "diff --git" + * line. We do not find and return anything if it is a rename + * patch, and it is OK because we will find the name elsewhere. + * We need to reliably find name only when it is mode-change only, + * creation or deletion of an empty file. In any of these cases, + * both sides are the same name under a/ and b/ respectively. + */ +static char *git_header_name(int p_value, + const char *line, + int llen) +{ + const char *name; + const char *second = NULL; + size_t len, line_len; + + line += strlen("diff --git "); + llen -= strlen("diff --git "); + + if (*line == '"') { + const char *cp; + struct strbuf first = STRBUF_INIT; + struct strbuf sp = STRBUF_INIT; + + if (unquote_c_style(&first, line, &second)) + goto free_and_fail1; + + /* strip the a/b prefix including trailing slash */ + cp = skip_tree_prefix(p_value, first.buf, first.len); + if (!cp) + goto free_and_fail1; + strbuf_remove(&first, 0, cp - first.buf); + + /* + * second points at one past closing dq of name. + * find the second name. + */ + while ((second < line + llen) && isspace(*second)) + second++; + + if (line + llen <= second) + goto free_and_fail1; + if (*second == '"') { + if (unquote_c_style(&sp, second, NULL)) + goto free_and_fail1; + cp = skip_tree_prefix(p_value, sp.buf, sp.len); + if (!cp) + goto free_and_fail1; + /* They must match, otherwise ignore */ + if (strcmp(cp, first.buf)) + goto free_and_fail1; + strbuf_release(&sp); + return strbuf_detach(&first, NULL); + } + + /* unquoted second */ + cp = skip_tree_prefix(p_value, second, line + llen - second); + if (!cp) + goto free_and_fail1; + if (line + llen - cp != first.len || + memcmp(first.buf, cp, first.len)) + goto free_and_fail1; + return strbuf_detach(&first, NULL); + + free_and_fail1: + strbuf_release(&first); + strbuf_release(&sp); + return NULL; + } + + /* unquoted first name */ + name = skip_tree_prefix(p_value, line, llen); + if (!name) + return NULL; + + /* + * since the first name is unquoted, a dq if exists must be + * the beginning of the second name. + */ + for (second = name; second < line + llen; second++) { + if (*second == '"') { + struct strbuf sp = STRBUF_INIT; + const char *np; + + if (unquote_c_style(&sp, second, NULL)) + goto free_and_fail2; + + np = skip_tree_prefix(p_value, sp.buf, sp.len); + if (!np) + goto free_and_fail2; + + len = sp.buf + sp.len - np; + if (len < second - name && + !strncmp(np, name, len) && + isspace(name[len])) { + /* Good */ + strbuf_remove(&sp, 0, np - sp.buf); + return strbuf_detach(&sp, NULL); + } + + free_and_fail2: + strbuf_release(&sp); + return NULL; + } + } + + /* + * Accept a name only if it shows up twice, exactly the same + * form. + */ + second = strchr(name, '\n'); + if (!second) + return NULL; + line_len = second - name; + for (len = 0 ; ; len++) { + switch (name[len]) { + default: + continue; + case '\n': + return NULL; + case '\t': case ' ': + /* + * Is this the separator between the preimage + * and the postimage pathname? Again, we are + * only interested in the case where there is + * no rename, as this is only to set def_name + * and a rename patch has the names elsewhere + * in an unambiguous form. + */ + if (!name[len + 1]) + return NULL; /* no postimage name */ + second = skip_tree_prefix(p_value, name + len + 1, + line_len - (len + 1)); + /* + * If we are at the SP at the end of a directory, + * skip_tree_prefix() may return NULL as that makes + * it appears as if we have an absolute path. + * Keep going to find another SP. + */ + if (!second) + continue; + + /* + * Does len bytes starting at "name" and "second" + * (that are separated by one HT or SP we just + * found) exactly match? + */ + if (second[len] == '\n' && !strncmp(name, second, len)) + return xmemdupz(name, len); + } + } +} + +static int check_header_line(int linenr, struct patch *patch) +{ + int extensions = (patch->is_delete == 1) + (patch->is_new == 1) + + (patch->is_rename == 1) + (patch->is_copy == 1); + if (extensions > 1) + return error(_("inconsistent header lines %d and %d"), + patch->extension_linenr, linenr); + if (extensions && !patch->extension_linenr) + patch->extension_linenr = linenr; + return 0; +} + +int parse_git_diff_header(struct strbuf *root, + int *linenr, + int p_value, + const char *line, + int len, + unsigned int size, + struct patch *patch) +{ + unsigned long offset; + struct gitdiff_data parse_hdr_state; + + /* A git diff has explicit new/delete information, so we don't guess */ + patch->is_new = 0; + patch->is_delete = 0; + + /* + * Some things may not have the old name in the + * rest of the headers anywhere (pure mode changes, + * or removing or adding empty files), so we get + * the default name from the header. + */ + patch->def_name = git_header_name(p_value, line, len); + if (patch->def_name && root->len) { + char *s = xstrfmt("%s%s", root->buf, patch->def_name); + free(patch->def_name); + patch->def_name = s; + } + + line += len; + size -= len; + (*linenr)++; + parse_hdr_state.root = root; + parse_hdr_state.linenr = *linenr; + parse_hdr_state.p_value = p_value; + + for (offset = len ; size > 0 ; offset += len, size -= len, line += len, (*linenr)++) { + static const struct opentry { + const char *str; + int (*fn)(struct gitdiff_data *, const char *, struct patch *); + } optable[] = { + { "@@ -", gitdiff_hdrend }, + { "--- ", gitdiff_oldname }, + { "+++ ", gitdiff_newname }, + { "old mode ", gitdiff_oldmode }, + { "new mode ", gitdiff_newmode }, + { "deleted file mode ", gitdiff_delete }, + { "new file mode ", gitdiff_newfile }, + { "copy from ", gitdiff_copysrc }, + { "copy to ", gitdiff_copydst }, + { "rename old ", gitdiff_renamesrc }, + { "rename new ", gitdiff_renamedst }, + { "rename from ", gitdiff_renamesrc }, + { "rename to ", gitdiff_renamedst }, + { "similarity index ", gitdiff_similarity }, + { "dissimilarity index ", gitdiff_dissimilarity }, + { "index ", gitdiff_index }, + { "", gitdiff_unrecognized }, + }; + int i; + + len = linelen(line, size); + if (!len || line[len-1] != '\n') + break; + for (i = 0; i < ARRAY_SIZE(optable); i++) { + const struct opentry *p = optable + i; + int oplen = strlen(p->str); + int res; + if (len < oplen || memcmp(p->str, line, oplen)) + continue; + res = p->fn(&parse_hdr_state, line + oplen, patch); + if (res < 0) + return -1; + if (check_header_line(*linenr, patch)) + return -1; + if (res > 0) + goto done; + break; + } + } + +done: + if (!patch->old_name && !patch->new_name) { + if (!patch->def_name) { + error(Q_("git diff header lacks filename information when removing " + "%d leading pathname component (line %d)", + "git diff header lacks filename information when removing " + "%d leading pathname components (line %d)", + parse_hdr_state.p_value), + parse_hdr_state.p_value, *linenr); + return -128; + } + patch->old_name = xstrdup(patch->def_name); + patch->new_name = xstrdup(patch->def_name); + } + if ((!patch->new_name && !patch->is_delete) || + (!patch->old_name && !patch->is_new)) { + error(_("git diff header lacks filename information " + "(line %d)"), *linenr); + return -128; + } + patch->is_toplevel_relative = 1; + return offset; +} + +static int parse_num(const char *line, unsigned long *p) +{ + char *ptr; + + if (!isdigit(*line)) + return 0; + errno = 0; + *p = strtoul(line, &ptr, 10); + if (errno) + return 0; + return ptr - line; +} + +static int parse_range(const char *line, int len, int offset, const char *expect, + unsigned long *p1, unsigned long *p2) +{ + int digits, ex; + + if (offset < 0 || offset >= len) + return -1; + line += offset; + len -= offset; + + digits = parse_num(line, p1); + if (!digits) + return -1; + + offset += digits; + line += digits; + len -= digits; + + *p2 = 1; + if (*line == ',') { + digits = parse_num(line+1, p2); + if (!digits) + return -1; + + offset += digits+1; + line += digits+1; + len -= digits+1; + } + + ex = strlen(expect); + if (ex > len) + return -1; + if (memcmp(line, expect, ex)) + return -1; + + return offset + ex; +} + +static void recount_diff(const char *line, int size, struct fragment *fragment) +{ + int oldlines = 0, newlines = 0, ret = 0; + + if (size < 1) { + warning("recount: ignore empty hunk"); + return; + } + + for (;;) { + int len = linelen(line, size); + size -= len; + line += len; + + if (size < 1) + break; + + switch (*line) { + case ' ': case '\n': + newlines++; + /* fall through */ + case '-': + oldlines++; + continue; + case '+': + newlines++; + continue; + case '\\': + continue; + case '@': + ret = size < 3 || !starts_with(line, "@@ "); + break; + case 'd': + ret = size < 5 || !starts_with(line, "diff "); + break; + default: + ret = -1; + break; + } + if (ret) { + warning(_("recount: unexpected line: %.*s"), + (int)linelen(line, size), line); + return; + } + break; + } + fragment->oldlines = oldlines; + fragment->newlines = newlines; +} + +/* + * Parse a unified diff fragment header of the + * form "@@ -a,b +c,d @@" + */ +static int parse_fragment_header(const char *line, int len, struct fragment *fragment) +{ + int offset; + + if (!len || line[len-1] != '\n') + return -1; + + /* Figure out the number of lines in a fragment */ + offset = parse_range(line, len, 4, " +", &fragment->oldpos, &fragment->oldlines); + offset = parse_range(line, len, offset, " @@", &fragment->newpos, &fragment->newlines); + + return offset; +} + +/* + * Find file diff header + * + * Returns: + * -1 if no header was found + * -128 in case of error + * the size of the header in bytes (called "offset") otherwise + */ +static int find_header(struct apply_state *state, + const char *line, + unsigned long size, + int *hdrsize, + struct patch *patch) +{ + unsigned long offset, len; + + patch->is_toplevel_relative = 0; + patch->is_rename = patch->is_copy = 0; + patch->is_new = patch->is_delete = -1; + patch->old_mode = patch->new_mode = 0; + patch->old_name = patch->new_name = NULL; + for (offset = 0; size > 0; offset += len, size -= len, line += len, state->linenr++) { + unsigned long nextlen; + + len = linelen(line, size); + if (!len) + break; + + /* Testing this early allows us to take a few shortcuts.. */ + if (len < 6) + continue; + + /* + * Make sure we don't find any unconnected patch fragments. + * That's a sign that we didn't find a header, and that a + * patch has become corrupted/broken up. + */ + if (!memcmp("@@ -", line, 4)) { + struct fragment dummy; + if (parse_fragment_header(line, len, &dummy) < 0) + continue; + error(_("patch fragment without header at line %d: %.*s"), + state->linenr, (int)len-1, line); + return -128; + } + + if (size < len + 6) + break; + + /* + * Git patch? It might not have a real patch, just a rename + * or mode change, so we handle that specially + */ + if (!memcmp("diff --git ", line, 11)) { + int git_hdr_len = parse_git_diff_header(&state->root, &state->linenr, + state->p_value, line, len, + size, patch); + if (git_hdr_len < 0) + return -128; + if (git_hdr_len <= len) + continue; + *hdrsize = git_hdr_len; + return offset; + } + + /* --- followed by +++ ? */ + if (memcmp("--- ", line, 4) || memcmp("+++ ", line + len, 4)) + continue; + + /* + * We only accept unified patches, so we want it to + * at least have "@@ -a,b +c,d @@\n", which is 14 chars + * minimum ("@@ -0,0 +1 @@\n" is the shortest). + */ + nextlen = linelen(line + len, size - len); + if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4)) + continue; + + /* Ok, we'll consider it a patch */ + if (parse_traditional_patch(state, line, line+len, patch)) + return -128; + *hdrsize = len + nextlen; + state->linenr += 2; + return offset; + } + return -1; +} + +static void record_ws_error(struct apply_state *state, + unsigned result, + const char *line, + int len, + int linenr) +{ + char *err; + + if (!result) + return; + + state->whitespace_error++; + if (state->squelch_whitespace_errors && + state->squelch_whitespace_errors < state->whitespace_error) + return; + + /* + * line[len] for an incomplete line points at the "\n" at the end + * of patch input line, so "%.*s" would drop the last letter on line; + * compensate for it. + */ + if (result & WS_INCOMPLETE_LINE) + len++; + + err = whitespace_error_string(result); + if (state->apply_verbosity > verbosity_silent) + fprintf(stderr, "%s:%d: %s.\n%.*s\n", + state->patch_input_file, linenr, err, len, line); + free(err); +} + +static void check_whitespace(struct apply_state *state, + const char *line, + int len, + unsigned ws_rule) +{ + unsigned result = ws_check(line + 1, len - 1, ws_rule); + + record_ws_error(state, result, line + 1, len - 2, state->linenr); +} + +/* + * Check if the patch has context lines with CRLF or + * the patch wants to remove lines with CRLF. + */ +static void check_old_for_crlf(struct patch *patch, const char *line, int len) +{ + if (len >= 2 && line[len-1] == '\n' && line[len-2] == '\r') { + patch->ws_rule |= WS_CR_AT_EOL; + patch->crlf_in_old = 1; + } +} + + +/* + * Just saw a single line in a fragment. If it is a part of this hunk + * that is a context " ", an added "+", or a removed "-" line, it may + * be followed by "\\ No newline..." to signal that the last "\n" on + * this line needs to be dropped. Depending on locale settings when + * the patch was produced we don't know what this line would exactly + * say. The only thing we do know is that it begins with "\ ". + * Checking for 12 is just for sanity check; "\ No newline..." would + * be at least that long in any l10n. + * + * Return 0 if the line we saw is not followed by "\ No newline...", + * or length of that line. The caller will use it to skip over the + * "\ No newline..." line. + */ +static int adjust_incomplete(const char *line, int len, + unsigned long size) +{ + int nextlen; + + if (*line != '\n' && *line != ' ' && *line != '+' && *line != '-') + return 0; + if (size - len < 12 || memcmp(line + len, "\\ ", 2)) + return 0; + nextlen = linelen(line + len, size - len); + if (nextlen < 12) + return 0; + return nextlen; +} + +/* + * Parse a unified diff. Note that this really needs to parse each + * fragment separately, since the only way to know the difference + * between a "---" that is part of a patch, and a "---" that starts + * the next patch is to look at the line counts.. + */ +static int parse_fragment(struct apply_state *state, + const char *line, + unsigned long size, + struct patch *patch, + struct fragment *fragment) +{ + int added, deleted; + int len = linelen(line, size), offset; + int skip_len = 0; + unsigned long oldlines, newlines; + unsigned long leading, trailing; + + /* do not complain a symbolic link being an incomplete line */ + if (patch->ws_rule & WS_INCOMPLETE_LINE) { + /* + * We want to figure out if the postimage is a + * symbolic link when applying the patch normally, or + * if the preimage is a symbolic link when applying + * the patch in reverse. A normal patch only has + * old_mode without new_mode. If it changes the + * filemode, new_mode has value, which is different + * from old_mode. + */ + unsigned mode = (state->apply_in_reverse + ? patch->old_mode + : patch->new_mode + ? patch->new_mode + : patch->old_mode); + if (mode && S_ISLNK(mode)) + patch->ws_rule &= ~WS_INCOMPLETE_LINE; + } + + offset = parse_fragment_header(line, len, fragment); + if (offset < 0) + return -1; + if (offset > 0 && patch->recount) + recount_diff(line + offset, size - offset, fragment); + oldlines = fragment->oldlines; + newlines = fragment->newlines; + leading = 0; + trailing = 0; + + /* Parse the thing.. */ + line += len; + size -= len; + state->linenr++; + added = deleted = 0; + for (offset = len; + 0 < size; + offset += len, size -= len, line += len, state->linenr++) { + if (!oldlines && !newlines) + break; + len = linelen(line, size); + if (!len || line[len-1] != '\n') + return -1; + + /* + * For an incomplete line, skip_len counts the bytes + * on "\\ No newline..." marker line that comes next + * to the current line. + * + * Reduce "len" to drop the newline at the end of + * line[], but add one to "skip_len", which will be + * added back to "len" for the next iteration, to + * compensate. + */ + skip_len = adjust_incomplete(line, len, size); + if (skip_len) { + len--; + skip_len++; + } + switch (*line) { + default: + return -1; + case '\n': /* newer GNU diff, an empty context line */ + case ' ': + oldlines--; + newlines--; + if (!deleted && !added) + leading++; + trailing++; + check_old_for_crlf(patch, line, len); + if (!state->apply_in_reverse && + state->ws_error_action == correct_ws_error) + check_whitespace(state, line, len, patch->ws_rule); + break; + case '-': + if (!state->apply_in_reverse) + check_old_for_crlf(patch, line, len); + if (state->apply_in_reverse && + state->ws_error_action != nowarn_ws_error) + check_whitespace(state, line, len, patch->ws_rule); + deleted++; + oldlines--; + trailing = 0; + break; + case '+': + if (state->apply_in_reverse) + check_old_for_crlf(patch, line, len); + if (!state->apply_in_reverse && + state->ws_error_action != nowarn_ws_error) + check_whitespace(state, line, len, patch->ws_rule); + added++; + newlines--; + trailing = 0; + break; + } + + /* eat the "\\ No newline..." as well, if exists */ + if (skip_len) { + len += skip_len; + state->linenr++; + } + } + if (oldlines || newlines) + return -1; + if (!patch->recount && !deleted && !added) + return -1; + + fragment->leading = leading; + fragment->trailing = trailing; + + patch->lines_added += added; + patch->lines_deleted += deleted; + + if (0 < patch->is_new && oldlines) + return error(_("new file depends on old contents")); + if (0 < patch->is_delete && newlines) + return error(_("deleted file still has contents")); + return offset; +} + +/* + * We have seen "diff --git a/... b/..." header (or a traditional patch + * header). Read hunks that belong to this patch into fragments and hang + * them to the given patch structure. + * + * The (fragment->patch, fragment->size) pair points into the memory given + * by the caller, not a copy, when we return. + * + * Returns: + * -1 in case of error, + * the number of bytes in the patch otherwise. + */ +static int parse_single_patch(struct apply_state *state, + const char *line, + unsigned long size, + struct patch *patch) +{ + unsigned long offset = 0; + unsigned long oldlines = 0, newlines = 0, context = 0; + struct fragment **fragp = &patch->fragments; + + while (size > 4 && !memcmp(line, "@@ -", 4)) { + struct fragment *fragment; + int len; + + CALLOC_ARRAY(fragment, 1); + fragment->linenr = state->linenr; + len = parse_fragment(state, line, size, patch, fragment); + if (len <= 0) { + free(fragment); + return error(_("corrupt patch at line %d"), state->linenr); + } + fragment->patch = line; + fragment->size = len; + oldlines += fragment->oldlines; + newlines += fragment->newlines; + context += fragment->leading + fragment->trailing; + + *fragp = fragment; + fragp = &fragment->next; + + offset += len; + line += len; + size -= len; + } + + /* + * If something was removed (i.e. we have old-lines) it cannot + * be creation, and if something was added it cannot be + * deletion. However, the reverse is not true; --unified=0 + * patches that only add are not necessarily creation even + * though they do not have any old lines, and ones that only + * delete are not necessarily deletion. + * + * Unfortunately, a real creation/deletion patch do _not_ have + * any context line by definition, so we cannot safely tell it + * apart with --unified=0 insanity. At least if the patch has + * more than one hunk it is not creation or deletion. + */ + if (patch->is_new < 0 && + (oldlines || (patch->fragments && patch->fragments->next))) + patch->is_new = 0; + if (patch->is_delete < 0 && + (newlines || (patch->fragments && patch->fragments->next))) + patch->is_delete = 0; + + if (0 < patch->is_new && oldlines) + return error(_("new file %s depends on old contents"), patch->new_name); + if (0 < patch->is_delete && newlines) + return error(_("deleted file %s still has contents"), patch->old_name); + if (!patch->is_delete && !newlines && context && state->apply_verbosity > verbosity_silent) + fprintf_ln(stderr, + _("** warning: " + "file %s becomes empty but is not deleted"), + patch->new_name); + + return offset; +} + +static inline int metadata_changes(struct patch *patch) +{ + return patch->is_rename > 0 || + patch->is_copy > 0 || + patch->is_new > 0 || + patch->is_delete || + (patch->old_mode && patch->new_mode && + patch->old_mode != patch->new_mode); +} + +static char *inflate_it(const void *data, unsigned long size, + unsigned long inflated_size) +{ + git_zstream stream; + void *out; + int st; + + memset(&stream, 0, sizeof(stream)); + + stream.next_in = (unsigned char *)data; + stream.avail_in = size; + stream.next_out = out = xmalloc(inflated_size); + stream.avail_out = inflated_size; + git_inflate_init(&stream); + st = git_inflate(&stream, Z_FINISH); + git_inflate_end(&stream); + if ((st != Z_STREAM_END) || stream.total_out != inflated_size) { + free(out); + return NULL; + } + return out; +} + +/* + * Read a binary hunk and return a new fragment; fragment->patch + * points at an allocated memory that the caller must free, so + * it is marked as "->free_patch = 1". + */ +static struct fragment *parse_binary_hunk(struct apply_state *state, + char **buf_p, + unsigned long *sz_p, + int *status_p, + int *used_p) +{ + /* + * Expect a line that begins with binary patch method ("literal" + * or "delta"), followed by the length of data before deflating. + * a sequence of 'length-byte' followed by base-85 encoded data + * should follow, terminated by a newline. + * + * Each 5-byte sequence of base-85 encodes up to 4 bytes, + * and we would limit the patch line to 66 characters, + * so one line can fit up to 13 groups that would decode + * to 52 bytes max. The length byte 'A'-'Z' corresponds + * to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes. + */ + int llen, used; + unsigned long size = *sz_p; + char *buffer = *buf_p; + int patch_method; + unsigned long origlen; + char *data = NULL; + int hunk_size = 0; + struct fragment *frag; + + llen = linelen(buffer, size); + used = llen; + + *status_p = 0; + + if (starts_with(buffer, "delta ")) { + patch_method = BINARY_DELTA_DEFLATED; + origlen = strtoul(buffer + 6, NULL, 10); + } + else if (starts_with(buffer, "literal ")) { + patch_method = BINARY_LITERAL_DEFLATED; + origlen = strtoul(buffer + 8, NULL, 10); + } + else + return NULL; + + state->linenr++; + buffer += llen; + size -= llen; + while (1) { + int byte_length, max_byte_length, newsize; + llen = linelen(buffer, size); + used += llen; + state->linenr++; + if (llen == 1) { + /* consume the blank line */ + buffer++; + size--; + break; + } + /* + * Minimum line is "A00000\n" which is 7-byte long, + * and the line length must be multiple of 5 plus 2. + */ + if ((llen < 7) || (llen-2) % 5) + goto corrupt; + max_byte_length = (llen - 2) / 5 * 4; + byte_length = *buffer; + if ('A' <= byte_length && byte_length <= 'Z') + byte_length = byte_length - 'A' + 1; + else if ('a' <= byte_length && byte_length <= 'z') + byte_length = byte_length - 'a' + 27; + else + goto corrupt; + /* if the input length was not multiple of 4, we would + * have filler at the end but the filler should never + * exceed 3 bytes + */ + if (max_byte_length < byte_length || + byte_length <= max_byte_length - 4) + goto corrupt; + newsize = hunk_size + byte_length; + data = xrealloc(data, newsize); + if (decode_85(data + hunk_size, buffer + 1, byte_length)) + goto corrupt; + hunk_size = newsize; + buffer += llen; + size -= llen; + } + + CALLOC_ARRAY(frag, 1); + frag->patch = inflate_it(data, hunk_size, origlen); + frag->free_patch = 1; + if (!frag->patch) + goto corrupt; + free(data); + frag->size = origlen; + *buf_p = buffer; + *sz_p = size; + *used_p = used; + frag->binary_patch_method = patch_method; + return frag; + + corrupt: + free(data); + *status_p = -1; + error(_("corrupt binary patch at line %d: %.*s"), + state->linenr-1, llen-1, buffer); + return NULL; +} + +/* + * Returns: + * -1 in case of error, + * the length of the parsed binary patch otherwise + */ +static int parse_binary(struct apply_state *state, + char *buffer, + unsigned long size, + struct patch *patch) +{ + /* + * We have read "GIT binary patch\n"; what follows is a line + * that says the patch method (currently, either "literal" or + * "delta") and the length of data before deflating; a + * sequence of 'length-byte' followed by base-85 encoded data + * follows. + * + * When a binary patch is reversible, there is another binary + * hunk in the same format, starting with patch method (either + * "literal" or "delta") with the length of data, and a sequence + * of length-byte + base-85 encoded data, terminated with another + * empty line. This data, when applied to the postimage, produces + * the preimage. + */ + struct fragment *forward; + struct fragment *reverse; + int status; + int used, used_1; + + forward = parse_binary_hunk(state, &buffer, &size, &status, &used); + if (!forward && !status) + /* there has to be one hunk (forward hunk) */ + return error(_("unrecognized binary patch at line %d"), state->linenr-1); + if (status) + /* otherwise we already gave an error message */ + return status; + + reverse = parse_binary_hunk(state, &buffer, &size, &status, &used_1); + if (reverse) + used += used_1; + else if (status) { + /* + * Not having reverse hunk is not an error, but having + * a corrupt reverse hunk is. + */ + free((void*) forward->patch); + free(forward); + return status; + } + forward->next = reverse; + patch->fragments = forward; + patch->is_binary = 1; + return used; +} + +static void prefix_one(struct apply_state *state, char **name) +{ + char *old_name = *name; + if (!old_name) + return; + *name = prefix_filename(state->prefix, *name); + free(old_name); +} + +static void prefix_patch(struct apply_state *state, struct patch *p) +{ + if (!state->prefix || p->is_toplevel_relative) + return; + prefix_one(state, &p->new_name); + prefix_one(state, &p->old_name); +} + +/* + * include/exclude + */ + +static void add_name_limit(struct apply_state *state, + const char *name, + int exclude) +{ + struct string_list_item *it; + + it = string_list_append(&state->limit_by_name, name); + it->util = exclude ? NULL : (void *) 1; +} + +static int use_patch(struct apply_state *state, struct patch *p) +{ + const char *pathname = p->new_name ? p->new_name : p->old_name; + int i; + + /* Paths outside are not touched regardless of "--include" */ + if (state->prefix && *state->prefix) { + const char *rest; + if (!skip_prefix(pathname, state->prefix, &rest) || !*rest) + return 0; + } + + /* See if it matches any of exclude/include rule */ + for (i = 0; i < state->limit_by_name.nr; i++) { + struct string_list_item *it = &state->limit_by_name.items[i]; + if (!wildmatch(it->string, pathname, 0)) + return (it->util != NULL); + } + + /* + * If we had any include, a path that does not match any rule is + * not used. Otherwise, we saw bunch of exclude rules (or none) + * and such a path is used. + */ + return !state->has_include; +} + +/* + * Read the patch text in "buffer" that extends for "size" bytes; stop + * reading after seeing a single patch (i.e. changes to a single file). + * Create fragments (i.e. patch hunks) and hang them to the given patch. + * + * Returns: + * -1 if no header was found or parse_binary() failed, + * -128 on another error, + * the number of bytes consumed otherwise, + * so that the caller can call us again for the next patch. + */ +static int parse_chunk(struct apply_state *state, char *buffer, unsigned long size, struct patch *patch) +{ + int hdrsize, patchsize; + int offset = find_header(state, buffer, size, &hdrsize, patch); + + if (offset < 0) + return offset; + + prefix_patch(state, patch); + + if (!use_patch(state, patch)) + patch->ws_rule = 0; + else if (patch->new_name) + patch->ws_rule = whitespace_rule(state->repo->index, + patch->new_name); + else + patch->ws_rule = whitespace_rule(state->repo->index, + patch->old_name); + + patchsize = parse_single_patch(state, + buffer + offset + hdrsize, + size - offset - hdrsize, + patch); + + if (patchsize < 0) + return -128; + + if (!patchsize) { + static const char git_binary[] = "GIT binary patch\n"; + int hd = hdrsize + offset; + unsigned long llen = linelen(buffer + hd, size - hd); + + if (llen == sizeof(git_binary) - 1 && + !memcmp(git_binary, buffer + hd, llen)) { + int used; + state->linenr++; + used = parse_binary(state, buffer + hd + llen, + size - hd - llen, patch); + if (used < 0) + return -1; + if (used) + patchsize = used + llen; + else + patchsize = 0; + } + else if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) { + static const char *binhdr[] = { + "Binary files ", + "Files ", + NULL, + }; + int i; + for (i = 0; binhdr[i]; i++) { + int len = strlen(binhdr[i]); + if (len < size - hd && + !memcmp(binhdr[i], buffer + hd, len)) { + state->linenr++; + patch->is_binary = 1; + patchsize = llen; + break; + } + } + } + + /* Empty patch cannot be applied if it is a text patch + * without metadata change. A binary patch appears + * empty to us here. + */ + if ((state->apply || state->check) && + (!patch->is_binary && !metadata_changes(patch))) { + error(_("patch with only garbage at line %d"), state->linenr); + return -128; + } + } + + return offset + hdrsize + patchsize; +} + +static void reverse_patches(struct patch *p) +{ + for (; p; p = p->next) { + struct fragment *frag = p->fragments; + + SWAP(p->new_name, p->old_name); + if (p->new_mode || p->is_delete) + SWAP(p->new_mode, p->old_mode); + SWAP(p->is_new, p->is_delete); + SWAP(p->lines_added, p->lines_deleted); + SWAP(p->old_oid_prefix, p->new_oid_prefix); + + for (; frag; frag = frag->next) { + SWAP(frag->newpos, frag->oldpos); + SWAP(frag->newlines, frag->oldlines); + } + } +} + +static const char pluses[] = +"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"; +static const char minuses[]= +"----------------------------------------------------------------------"; + +static void show_stats(struct apply_state *state, struct patch *patch) +{ + struct strbuf qname = STRBUF_INIT; + char *cp = patch->new_name ? patch->new_name : patch->old_name; + int max, add, del; + + quote_c_style(cp, &qname, NULL, 0); + + /* + * "scale" the filename + */ + max = state->max_len; + if (max > 50) + max = 50; + + if (qname.len > max) { + cp = strchr(qname.buf + qname.len + 3 - max, '/'); + if (!cp) + cp = qname.buf + qname.len + 3 - max; + strbuf_splice(&qname, 0, cp - qname.buf, "...", 3); + } + + if (patch->is_binary) { + printf(" %-*s | Bin\n", max, qname.buf); + strbuf_release(&qname); + return; + } + + printf(" %-*s |", max, qname.buf); + strbuf_release(&qname); + + /* + * scale the add/delete + */ + max = max + state->max_change > 70 ? 70 - max : state->max_change; + add = patch->lines_added; + del = patch->lines_deleted; + + if (state->max_change > 0) { + int total = ((add + del) * max + state->max_change / 2) / state->max_change; + add = (add * max + state->max_change / 2) / state->max_change; + del = total - add; + } + printf("%5d %.*s%.*s\n", patch->lines_added + patch->lines_deleted, + add, pluses, del, minuses); +} + +static int read_old_data(struct stat *st, struct patch *patch, + const char *path, struct strbuf *buf) +{ + int conv_flags = patch->crlf_in_old ? + CONV_EOL_KEEP_CRLF : CONV_EOL_RENORMALIZE; + switch (st->st_mode & S_IFMT) { + case S_IFLNK: + if (strbuf_readlink(buf, path, st->st_size) < 0) + return error(_("unable to read symlink %s"), path); + return 0; + case S_IFREG: + if (strbuf_read_file(buf, path, st->st_size) != st->st_size) + return error(_("unable to open or read %s"), path); + /* + * "git apply" without "--index/--cached" should never look + * at the index; the target file may not have been added to + * the index yet, and we may not even be in any Git repository. + * Pass NULL to convert_to_git() to stress this; the function + * should never look at the index when explicit crlf option + * is given. + */ + convert_to_git(NULL, path, buf->buf, buf->len, buf, conv_flags); + return 0; + default: + return -1; + } +} + +/* + * Update the preimage, and the common lines in postimage, + * from buffer buf of length len. + */ +static void update_pre_post_images(struct image *preimage, + struct image *postimage, + char *buf, size_t len) +{ + struct image fixed_preimage = IMAGE_INIT; + size_t insert_pos = 0; + int i, ctx, reduced; + const char *fixed; + + /* + * Update the preimage with whitespace fixes. Note that we + * are not losing preimage->buf -- apply_one_fragment() will + * free "oldlines". + */ + image_prepare(&fixed_preimage, buf, len, 1); + for (i = 0; i < fixed_preimage.line_nr; i++) + fixed_preimage.line[i].flag = preimage->line[i].flag; + image_clear(preimage); + *preimage = fixed_preimage; + fixed = preimage->buf.buf; + + /* + * Adjust the common context lines in postimage. + */ + for (i = reduced = ctx = 0; i < postimage->line_nr; i++) { + size_t l_len = postimage->line[i].len; + + if (!(postimage->line[i].flag & LINE_COMMON)) { + /* an added line -- no counterparts in preimage */ + insert_pos += l_len; + continue; + } + + /* and find the corresponding one in the fixed preimage */ + while (ctx < preimage->line_nr && + !(preimage->line[ctx].flag & LINE_COMMON)) { + fixed += preimage->line[ctx].len; + ctx++; + } + + /* + * preimage is expected to run out, if the caller + * fixed addition of trailing blank lines. + */ + if (preimage->line_nr <= ctx) { + reduced++; + continue; + } + + /* and copy it in, while fixing the line length */ + l_len = preimage->line[ctx].len; + strbuf_splice(&postimage->buf, insert_pos, postimage->line[i].len, + fixed, l_len); + insert_pos += l_len; + fixed += l_len; + postimage->line[i].len = l_len; + ctx++; + } + + /* Fix the length of the whole thing */ + postimage->line_nr -= reduced; +} + +/* + * Compare lines s1 of length n1 and s2 of length n2, ignoring + * whitespace difference. Returns 1 if they match, 0 otherwise + */ +static int fuzzy_matchlines(const char *s1, size_t n1, + const char *s2, size_t n2) +{ + const char *end1 = s1 + n1; + const char *end2 = s2 + n2; + + /* ignore line endings */ + while (s1 < end1 && (end1[-1] == '\r' || end1[-1] == '\n')) + end1--; + while (s2 < end2 && (end2[-1] == '\r' || end2[-1] == '\n')) + end2--; + + while (s1 < end1 && s2 < end2) { + if (isspace(*s1)) { + /* + * Skip whitespace. We check on both buffers + * because we don't want "a b" to match "ab". + */ + if (!isspace(*s2)) + return 0; + while (s1 < end1 && isspace(*s1)) + s1++; + while (s2 < end2 && isspace(*s2)) + s2++; + } else if (*s1++ != *s2++) + return 0; + } + + /* If we reached the end on one side only, lines don't match. */ + return s1 == end1 && s2 == end2; +} + +static int line_by_line_fuzzy_match(struct image *img, + struct image *preimage, + struct image *postimage, + unsigned long current, + int current_lno, + int preimage_limit) +{ + int i; + size_t imgoff = 0; + size_t preoff = 0; + size_t extra_chars; + char *buf; + char *preimage_eof; + char *preimage_end; + struct strbuf fixed; + char *fixed_buf; + size_t fixed_len; + + for (i = 0; i < preimage_limit; i++) { + size_t prelen = preimage->line[i].len; + size_t imglen = img->line[current_lno+i].len; + + if (!fuzzy_matchlines(img->buf.buf + current + imgoff, imglen, + preimage->buf.buf + preoff, prelen)) + return 0; + imgoff += imglen; + preoff += prelen; + } + + /* + * Ok, the preimage matches with whitespace fuzz. + * + * imgoff now holds the true length of the target that + * matches the preimage before the end of the file. + * + * Count the number of characters in the preimage that fall + * beyond the end of the file and make sure that all of them + * are whitespace characters. (This can only happen if + * we are removing blank lines at the end of the file.) + */ + buf = preimage_eof = preimage->buf.buf + preoff; + for ( ; i < preimage->line_nr; i++) + preoff += preimage->line[i].len; + preimage_end = preimage->buf.buf + preoff; + for ( ; buf < preimage_end; buf++) + if (!isspace(*buf)) + return 0; + + /* + * Update the preimage and the common postimage context + * lines to use the same whitespace as the target. + * If whitespace is missing in the target (i.e. + * if the preimage extends beyond the end of the file), + * use the whitespace from the preimage. + */ + extra_chars = preimage_end - preimage_eof; + strbuf_init(&fixed, imgoff + extra_chars); + strbuf_add(&fixed, img->buf.buf + current, imgoff); + strbuf_add(&fixed, preimage_eof, extra_chars); + fixed_buf = strbuf_detach(&fixed, &fixed_len); + update_pre_post_images(preimage, postimage, + fixed_buf, fixed_len); + return 1; +} + +static int match_fragment(struct apply_state *state, + struct image *img, + struct image *preimage, + struct image *postimage, + unsigned long current, + int current_lno, + unsigned ws_rule, + int match_beginning, int match_end) +{ + int i; + const char *orig, *target; + struct strbuf fixed = STRBUF_INIT; + char *fixed_buf; + size_t fixed_len; + int preimage_limit; + int ret; + + if (preimage->line_nr + current_lno <= img->line_nr) { + /* + * The hunk falls within the boundaries of img. + */ + preimage_limit = preimage->line_nr; + if (match_end && (preimage->line_nr + current_lno != img->line_nr)) { + ret = 0; + goto out; + } + } else if (state->ws_error_action == correct_ws_error && + (ws_rule & WS_BLANK_AT_EOF)) { + /* + * This hunk extends beyond the end of img, and we are + * removing blank lines at the end of the file. This + * many lines from the beginning of the preimage must + * match with img, and the remainder of the preimage + * must be blank. + */ + preimage_limit = img->line_nr - current_lno; + } else { + /* + * The hunk extends beyond the end of the img and + * we are not removing blanks at the end, so we + * should reject the hunk at this position. + */ + ret = 0; + goto out; + } + + if (match_beginning && current_lno) { + ret = 0; + goto out; + } + + /* Quick hash check */ + for (i = 0; i < preimage_limit; i++) { + if ((img->line[current_lno + i].flag & LINE_PATCHED) || + (preimage->line[i].hash != img->line[current_lno + i].hash)) { + ret = 0; + goto out; + } + } + + if (preimage_limit == preimage->line_nr) { + /* + * Do we have an exact match? If we were told to match + * at the end, size must be exactly at current+fragsize, + * otherwise current+fragsize must be still within the preimage, + * and either case, the old piece should match the preimage + * exactly. + */ + if ((match_end + ? (current + preimage->buf.len == img->buf.len) + : (current + preimage->buf.len <= img->buf.len)) && + !memcmp(img->buf.buf + current, preimage->buf.buf, preimage->buf.len)) { + ret = 1; + goto out; + } + } else { + /* + * The preimage extends beyond the end of img, so + * there cannot be an exact match. + * + * There must be one non-blank context line that match + * a line before the end of img. + */ + const char *buf, *buf_end; + + buf = preimage->buf.buf; + buf_end = buf; + for (i = 0; i < preimage_limit; i++) + buf_end += preimage->line[i].len; + + for ( ; buf < buf_end; buf++) + if (!isspace(*buf)) + break; + if (buf == buf_end) { + ret = 0; + goto out; + } + } + + /* + * No exact match. If we are ignoring whitespace, run a line-by-line + * fuzzy matching. We collect all the line length information because + * we need it to adjust whitespace if we match. + */ + if (state->ws_ignore_action == ignore_ws_change) { + ret = line_by_line_fuzzy_match(img, preimage, postimage, + current, current_lno, preimage_limit); + goto out; + } + + if (state->ws_error_action != correct_ws_error) { + ret = 0; + goto out; + } + + /* + * The hunk does not apply byte-by-byte, but the hash says + * it might with whitespace fuzz. We weren't asked to + * ignore whitespace, we were asked to correct whitespace + * errors, so let's try matching after whitespace correction. + * + * While checking the preimage against the target, whitespace + * errors in both fixed, we count how large the corresponding + * postimage needs to be. The postimage prepared by + * apply_one_fragment() has whitespace errors fixed on added + * lines already, but the common lines were propagated as-is, + * which may become longer when their whitespace errors are + * fixed. + */ + + /* + * The preimage may extend beyond the end of the file, + * but in this loop we will only handle the part of the + * preimage that falls within the file. + */ + strbuf_grow(&fixed, preimage->buf.len + 1); + orig = preimage->buf.buf; + target = img->buf.buf + current; + for (i = 0; i < preimage_limit; i++) { + size_t oldlen = preimage->line[i].len; + size_t tgtlen = img->line[current_lno + i].len; + size_t fixstart = fixed.len; + struct strbuf tgtfix; + int match; + + /* Try fixing the line in the preimage */ + ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL); + + /* Try fixing the line in the target */ + strbuf_init(&tgtfix, tgtlen); + ws_fix_copy(&tgtfix, target, tgtlen, ws_rule, NULL); + + /* + * If they match, either the preimage was based on + * a version before our tree fixed whitespace breakage, + * or we are lacking a whitespace-fix patch the tree + * the preimage was based on already had (i.e. target + * has whitespace breakage, the preimage doesn't). + * In either case, we are fixing the whitespace breakages + * so we might as well take the fix together with their + * real change. + */ + match = (tgtfix.len == fixed.len - fixstart && + !memcmp(tgtfix.buf, fixed.buf + fixstart, + fixed.len - fixstart)); + + strbuf_release(&tgtfix); + if (!match) { + ret = 0; + goto out; + } + + orig += oldlen; + target += tgtlen; + } + + + /* + * Now handle the lines in the preimage that falls beyond the + * end of the file (if any). They will only match if they are + * empty or only contain whitespace (if WS_BLANK_AT_EOL is + * false). + */ + for ( ; i < preimage->line_nr; i++) { + size_t fixstart = fixed.len; /* start of the fixed preimage */ + size_t oldlen = preimage->line[i].len; + int j; + + /* Try fixing the line in the preimage */ + ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL); + + for (j = fixstart; j < fixed.len; j++) { + if (!isspace(fixed.buf[j])) { + ret = 0; + goto out; + } + } + + + orig += oldlen; + } + + /* + * Yes, the preimage is based on an older version that still + * has whitespace breakages unfixed, and fixing them makes the + * hunk match. Update the context lines in the postimage. + */ + fixed_buf = strbuf_detach(&fixed, &fixed_len); + update_pre_post_images(preimage, postimage, + fixed_buf, fixed_len); + + ret = 1; + +out: + strbuf_release(&fixed); + return ret; +} + +static int find_pos(struct apply_state *state, + struct image *img, + struct image *preimage, + struct image *postimage, + int line, + unsigned ws_rule, + int match_beginning, int match_end) +{ + int i; + unsigned long backwards, forwards, current; + int backwards_lno, forwards_lno, current_lno; + + /* + * When running with --allow-overlap, it is possible that a hunk is + * seen that pretends to start at the beginning (but no longer does), + * and that *still* needs to match the end. So trust `match_end` more + * than `match_beginning`. + */ + if (state->allow_overlap && match_beginning && match_end && + img->line_nr - preimage->line_nr != 0) + match_beginning = 0; + + /* + * If match_beginning or match_end is specified, there is no + * point starting from a wrong line that will never match and + * wander around and wait for a match at the specified end. + */ + if (match_beginning) + line = 0; + else if (match_end) + line = img->line_nr - preimage->line_nr; + + /* + * Because the comparison is unsigned, the following test + * will also take care of a negative line number that can + * result when match_end and preimage is larger than the target. + */ + if ((size_t) line > img->line_nr) + line = img->line_nr; + + current = 0; + for (i = 0; i < line; i++) + current += img->line[i].len; + + /* + * There's probably some smart way to do this, but I'll leave + * that to the smart and beautiful people. I'm simple and stupid. + */ + backwards = current; + backwards_lno = line; + forwards = current; + forwards_lno = line; + current_lno = line; + + for (i = 0; ; i++) { + if (match_fragment(state, img, preimage, postimage, + current, current_lno, ws_rule, + match_beginning, match_end)) + return current_lno; + + again: + if (backwards_lno == 0 && forwards_lno == img->line_nr) + break; + + if (i & 1) { + if (backwards_lno == 0) { + i++; + goto again; + } + backwards_lno--; + backwards -= img->line[backwards_lno].len; + current = backwards; + current_lno = backwards_lno; + } else { + if (forwards_lno == img->line_nr) { + i++; + goto again; + } + forwards += img->line[forwards_lno].len; + forwards_lno++; + current = forwards; + current_lno = forwards_lno; + } + + } + return -1; +} + +/* + * The change from "preimage" and "postimage" has been found to + * apply at applied_pos (counts in line numbers) in "img". + * Update "img" to remove "preimage" and replace it with "postimage". + */ +static void update_image(struct apply_state *state, + struct image *img, + int applied_pos, + struct image *preimage, + struct image *postimage) +{ + /* + * remove the copy of preimage at offset in img + * and replace it with postimage + */ + int i, nr; + size_t remove_count, insert_count, applied_at = 0; + size_t result_alloc; + char *result; + int preimage_limit; + + /* + * If we are removing blank lines at the end of img, + * the preimage may extend beyond the end. + * If that is the case, we must be careful only to + * remove the part of the preimage that falls within + * the boundaries of img. Initialize preimage_limit + * to the number of lines in the preimage that falls + * within the boundaries. + */ + preimage_limit = preimage->line_nr; + if (preimage_limit > img->line_nr - applied_pos) + preimage_limit = img->line_nr - applied_pos; + + for (i = 0; i < applied_pos; i++) + applied_at += img->line[i].len; + + remove_count = 0; + for (i = 0; i < preimage_limit; i++) + remove_count += img->line[applied_pos + i].len; + insert_count = postimage->buf.len; + + /* Adjust the contents */ + result_alloc = st_add3(st_sub(img->buf.len, remove_count), insert_count, 1); + result = xmalloc(result_alloc); + memcpy(result, img->buf.buf, applied_at); + memcpy(result + applied_at, postimage->buf.buf, postimage->buf.len); + memcpy(result + applied_at + postimage->buf.len, + img->buf.buf + (applied_at + remove_count), + img->buf.len - (applied_at + remove_count)); + strbuf_attach(&img->buf, result, postimage->buf.len + img->buf.len - remove_count, + result_alloc); + + /* Adjust the line table */ + nr = img->line_nr + postimage->line_nr - preimage_limit; + if (preimage_limit < postimage->line_nr) + /* + * NOTE: this knows that we never call image_remove_first_line() + * on anything other than pre/post image. + */ + REALLOC_ARRAY(img->line, nr); + if (preimage_limit != postimage->line_nr) + MOVE_ARRAY(img->line + applied_pos + postimage->line_nr, + img->line + applied_pos + preimage_limit, + img->line_nr - (applied_pos + preimage_limit)); + COPY_ARRAY(img->line + applied_pos, postimage->line, postimage->line_nr); + if (!state->allow_overlap) + for (i = 0; i < postimage->line_nr; i++) + img->line[applied_pos + i].flag |= LINE_PATCHED; + img->line_nr = nr; +} + +/* + * Use the patch-hunk text in "frag" to prepare two images (preimage and + * postimage) for the hunk. Find lines that match "preimage" in "img" and + * replace the part of "img" with "postimage" text. + */ +static int apply_one_fragment(struct apply_state *state, + struct image *img, struct fragment *frag, + int inaccurate_eof, unsigned ws_rule, + int nth_fragment) +{ + int match_beginning, match_end; + const char *patch = frag->patch; + int size = frag->size; + char *old, *oldlines; + struct strbuf newlines; + int new_blank_lines_at_end = 0; + int found_new_blank_lines_at_end = 0; + int hunk_linenr = frag->linenr; + unsigned long leading, trailing; + int pos, applied_pos; + struct image preimage = IMAGE_INIT; + struct image postimage = IMAGE_INIT; + + oldlines = xmalloc(size); + strbuf_init(&newlines, size); + + old = oldlines; + while (size > 0) { + char first; + int len = linelen(patch, size); + int plen; + int added_blank_line = 0; + int is_blank_context = 0; + size_t start; + + if (!len) + break; + + /* + * "plen" is how much of the line we should use for + * the actual patch data. Normally we just remove the + * first character on the line, but if the line is + * followed by "\ No newline", then we also remove the + * last one (which is the newline, of course). + */ + plen = len - 1; + if (len < size && patch[len] == '\\') + plen--; + first = *patch; + if (state->apply_in_reverse) { + if (first == '-') + first = '+'; + else if (first == '+') + first = '-'; + } + + switch (first) { + case '\n': + /* Newer GNU diff, empty context line */ + if (plen < 0) + /* ... followed by '\No newline'; nothing */ + break; + *old++ = '\n'; + strbuf_addch(&newlines, '\n'); + image_add_line(&preimage, "\n", 1, LINE_COMMON); + image_add_line(&postimage, "\n", 1, LINE_COMMON); + is_blank_context = 1; + break; + case ' ': + if (plen && (ws_rule & WS_BLANK_AT_EOF) && + ws_blank_line(patch + 1, plen)) + is_blank_context = 1; + /* fallthrough */ + case '-': + memcpy(old, patch + 1, plen); + image_add_line(&preimage, old, plen, + (first == ' ' ? LINE_COMMON : 0)); + old += plen; + if (first == '-') + break; + /* fallthrough */ + case '+': + /* --no-add does not add new lines */ + if (first == '+' && state->no_add) + break; + + start = newlines.len; + if (first != '+' || + !state->whitespace_error || + state->ws_error_action != correct_ws_error) { + strbuf_add(&newlines, patch + 1, plen); + } + else { + ws_fix_copy(&newlines, patch + 1, plen, ws_rule, &state->applied_after_fixing_ws); + } + image_add_line(&postimage, newlines.buf + start, newlines.len - start, + (first == '+' ? 0 : LINE_COMMON)); + if (first == '+' && + (ws_rule & WS_BLANK_AT_EOF) && + ws_blank_line(patch + 1, plen)) + added_blank_line = 1; + break; + case '@': case '\\': + /* Ignore it, we already handled it */ + break; + default: + if (state->apply_verbosity > verbosity_normal) + error(_("invalid start of line: '%c'"), first); + applied_pos = -1; + goto out; + } + if (added_blank_line) { + if (!new_blank_lines_at_end) + found_new_blank_lines_at_end = hunk_linenr; + new_blank_lines_at_end++; + } + else if (is_blank_context) + ; + else + new_blank_lines_at_end = 0; + patch += len; + size -= len; + hunk_linenr++; + } + if (inaccurate_eof && + old > oldlines && old[-1] == '\n' && + newlines.len > 0 && newlines.buf[newlines.len - 1] == '\n') { + old--; + strbuf_setlen(&newlines, newlines.len - 1); + preimage.line[preimage.line_nr - 1].len--; + postimage.line[postimage.line_nr - 1].len--; + } + + leading = frag->leading; + trailing = frag->trailing; + + /* + * A hunk to change lines at the beginning would begin with + * @@ -1,L +N,M @@ + * but we need to be careful. -U0 that inserts before the second + * line also has this pattern. + * + * And a hunk to add to an empty file would begin with + * @@ -0,0 +N,M @@ + * + * In other words, a hunk that is (frag->oldpos <= 1) with or + * without leading context must match at the beginning. + */ + match_beginning = (!frag->oldpos || + (frag->oldpos == 1 && !state->unidiff_zero)); + + /* + * A hunk without trailing lines must match at the end. + * However, we simply cannot tell if a hunk must match end + * from the lack of trailing lines if the patch was generated + * with unidiff without any context. + */ + match_end = !state->unidiff_zero && !trailing; + + pos = frag->newpos ? (frag->newpos - 1) : 0; + strbuf_add(&preimage.buf, oldlines, old - oldlines); + strbuf_swap(&postimage.buf, &newlines); + + for (;;) { + + applied_pos = find_pos(state, img, &preimage, &postimage, pos, + ws_rule, match_beginning, match_end); + + if (applied_pos >= 0) + break; + + /* Am I at my context limits? */ + if ((leading <= state->p_context) && (trailing <= state->p_context)) + break; + if (match_beginning || match_end) { + match_beginning = match_end = 0; + continue; + } + + /* + * Reduce the number of context lines; reduce both + * leading and trailing if they are equal otherwise + * just reduce the larger context. + */ + if (leading >= trailing) { + image_remove_first_line(&preimage); + image_remove_first_line(&postimage); + pos--; + leading--; + } + if (trailing > leading) { + image_remove_last_line(&preimage); + image_remove_last_line(&postimage); + trailing--; + } + } + + if (applied_pos >= 0) { + if (new_blank_lines_at_end && + preimage.line_nr + applied_pos >= img->line_nr && + (ws_rule & WS_BLANK_AT_EOF) && + state->ws_error_action != nowarn_ws_error) { + record_ws_error(state, WS_BLANK_AT_EOF, "+", 1, + found_new_blank_lines_at_end); + if (state->ws_error_action == correct_ws_error) { + while (new_blank_lines_at_end--) + image_remove_last_line(&postimage); + } + /* + * We would want to prevent write_out_results() + * from taking place in apply_patch() that follows + * the callchain led us here, which is: + * apply_patch->check_patch_list->check_patch-> + * apply_data->apply_fragments->apply_one_fragment + */ + if (state->ws_error_action == die_on_ws_error) + state->apply = 0; + } + + if (state->apply_verbosity > verbosity_normal && applied_pos != pos) { + int offset = applied_pos - pos; + if (state->apply_in_reverse) + offset = 0 - offset; + fprintf_ln(stderr, + Q_("Hunk #%d succeeded at %d (offset %d line).", + "Hunk #%d succeeded at %d (offset %d lines).", + offset), + nth_fragment, applied_pos + 1, offset); + } + + /* + * Warn if it was necessary to reduce the number + * of context lines. + */ + if ((leading != frag->leading || + trailing != frag->trailing) && state->apply_verbosity > verbosity_silent) + fprintf_ln(stderr, _("Context reduced to (%ld/%ld)" + " to apply fragment at %d"), + leading, trailing, applied_pos+1); + update_image(state, img, applied_pos, &preimage, &postimage); + } else { + if (state->apply_verbosity > verbosity_normal) + error(_("while searching for:\n%.*s"), + (int)(old - oldlines), oldlines); + } + +out: + free(oldlines); + strbuf_release(&newlines); + image_clear(&preimage); + image_clear(&postimage); + + return (applied_pos < 0); +} + +static int apply_binary_fragment(struct apply_state *state, + struct image *img, + struct patch *patch) +{ + struct fragment *fragment = patch->fragments; + unsigned long len; + void *dst; + + if (!fragment) + return error(_("missing binary patch data for '%s'"), + patch->new_name ? + patch->new_name : + patch->old_name); + + /* Binary patch is irreversible without the optional second hunk */ + if (state->apply_in_reverse) { + if (!fragment->next) + return error(_("cannot reverse-apply a binary patch " + "without the reverse hunk to '%s'"), + patch->new_name + ? patch->new_name : patch->old_name); + fragment = fragment->next; + } + switch (fragment->binary_patch_method) { + case BINARY_DELTA_DEFLATED: + dst = patch_delta(img->buf.buf, img->buf.len, fragment->patch, + fragment->size, &len); + if (!dst) + return -1; + image_clear(img); + strbuf_attach(&img->buf, dst, len, len + 1); + return 0; + case BINARY_LITERAL_DEFLATED: + image_clear(img); + strbuf_add(&img->buf, fragment->patch, fragment->size); + return 0; + } + return -1; +} + +/* + * Replace "img" with the result of applying the binary patch. + * The binary patch data itself in patch->fragment is still kept + * but the preimage prepared by the caller in "img" is freed here + * or in the helper function apply_binary_fragment() this calls. + */ +static int apply_binary(struct apply_state *state, + struct image *img, + struct patch *patch) +{ + const char *name = patch->old_name ? patch->old_name : patch->new_name; + struct object_id oid; + const unsigned hexsz = the_hash_algo->hexsz; + + /* + * For safety, we require patch index line to contain + * full hex textual object ID for old and new, at least for now. + */ + if (strlen(patch->old_oid_prefix) != hexsz || + strlen(patch->new_oid_prefix) != hexsz || + get_oid_hex(patch->old_oid_prefix, &oid) || + get_oid_hex(patch->new_oid_prefix, &oid)) + return error(_("cannot apply binary patch to '%s' " + "without full index line"), name); + + if (patch->old_name) { + /* + * See if the old one matches what the patch + * applies to. + */ + hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, + OBJ_BLOB, &oid); + if (strcmp(oid_to_hex(&oid), patch->old_oid_prefix)) + return error(_("the patch applies to '%s' (%s), " + "which does not match the " + "current contents."), + name, oid_to_hex(&oid)); + } + else { + /* Otherwise, the old one must be empty. */ + if (img->buf.len) + return error(_("the patch applies to an empty " + "'%s' but it is not empty"), name); + } + + get_oid_hex(patch->new_oid_prefix, &oid); + if (is_null_oid(&oid)) { + image_clear(img); + return 0; /* deletion patch */ + } + + if (odb_has_object(the_repository->objects, &oid, 0)) { + /* We already have the postimage */ + enum object_type type; + unsigned long size; + char *result; + + result = odb_read_object(the_repository->objects, &oid, + &type, &size); + if (!result) + return error(_("the necessary postimage %s for " + "'%s' cannot be read"), + patch->new_oid_prefix, name); + image_clear(img); + strbuf_attach(&img->buf, result, size, size + 1); + } else { + /* + * We have verified buf matches the preimage; + * apply the patch data to it, which is stored + * in the patch->fragments->{patch,size}. + */ + if (apply_binary_fragment(state, img, patch)) + return error(_("binary patch does not apply to '%s'"), + name); + + /* verify that the result matches */ + hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, OBJ_BLOB, + &oid); + if (strcmp(oid_to_hex(&oid), patch->new_oid_prefix)) + return error(_("binary patch to '%s' creates incorrect result (expecting %s, got %s)"), + name, patch->new_oid_prefix, oid_to_hex(&oid)); + } + + return 0; +} + +static int apply_fragments(struct apply_state *state, struct image *img, struct patch *patch) +{ + struct fragment *frag = patch->fragments; + const char *name = patch->old_name ? patch->old_name : patch->new_name; + unsigned ws_rule = patch->ws_rule; + unsigned inaccurate_eof = patch->inaccurate_eof; + int nth = 0; + + if (patch->is_binary) + return apply_binary(state, img, patch); + + while (frag) { + nth++; + if (apply_one_fragment(state, img, frag, inaccurate_eof, ws_rule, nth)) { + error(_("patch failed: %s:%ld"), name, frag->oldpos); + if (!state->apply_with_reject) + return -1; + frag->rejected = 1; + } + frag = frag->next; + } + return 0; +} + +static int read_blob_object(struct strbuf *buf, const struct object_id *oid, unsigned mode) +{ + if (S_ISGITLINK(mode)) { + strbuf_grow(buf, 100); + strbuf_addf(buf, "Subproject commit %s\n", oid_to_hex(oid)); + } else { + enum object_type type; + unsigned long sz; + char *result; + + result = odb_read_object(the_repository->objects, oid, + &type, &sz); + if (!result) + return -1; + /* XXX read_sha1_file NUL-terminates */ + strbuf_attach(buf, result, sz, sz + 1); + } + return 0; +} + +static int read_file_or_gitlink(const struct cache_entry *ce, struct strbuf *buf) +{ + if (!ce) + return 0; + return read_blob_object(buf, &ce->oid, ce->ce_mode); +} + +static struct patch *in_fn_table(struct apply_state *state, const char *name) +{ + struct string_list_item *item; + + if (!name) + return NULL; + + item = string_list_lookup(&state->fn_table, name); + if (item) + return (struct patch *)item->util; + + return NULL; +} + +/* + * item->util in the filename table records the status of the path. + * Usually it points at a patch (whose result records the contents + * of it after applying it), but it could be PATH_WAS_DELETED for a + * path that a previously applied patch has already removed, or + * PATH_TO_BE_DELETED for a path that a later patch would remove. + * + * The latter is needed to deal with a case where two paths A and B + * are swapped by first renaming A to B and then renaming B to A; + * moving A to B should not be prevented due to presence of B as we + * will remove it in a later patch. + */ +#define PATH_TO_BE_DELETED ((struct patch *) -2) +#define PATH_WAS_DELETED ((struct patch *) -1) + +static int to_be_deleted(struct patch *patch) +{ + return patch == PATH_TO_BE_DELETED; +} + +static int was_deleted(struct patch *patch) +{ + return patch == PATH_WAS_DELETED; +} + +static void add_to_fn_table(struct apply_state *state, struct patch *patch) +{ + struct string_list_item *item; + + /* + * Always add new_name unless patch is a deletion + * This should cover the cases for normal diffs, + * file creations and copies + */ + if (patch->new_name) { + item = string_list_insert(&state->fn_table, patch->new_name); + item->util = patch; + } + + /* + * store a failure on rename/deletion cases because + * later chunks shouldn't patch old names + */ + if ((patch->new_name == NULL) || (patch->is_rename)) { + item = string_list_insert(&state->fn_table, patch->old_name); + item->util = PATH_WAS_DELETED; + } +} + +static void prepare_fn_table(struct apply_state *state, struct patch *patch) +{ + /* + * store information about incoming file deletion + */ + while (patch) { + if ((patch->new_name == NULL) || (patch->is_rename)) { + struct string_list_item *item; + item = string_list_insert(&state->fn_table, patch->old_name); + item->util = PATH_TO_BE_DELETED; + } + patch = patch->next; + } +} + +static int checkout_target(struct index_state *istate, + struct cache_entry *ce, struct stat *st) +{ + struct checkout costate = CHECKOUT_INIT; + + costate.refresh_cache = 1; + costate.istate = istate; + if (checkout_entry(ce, &costate, NULL, NULL) || + lstat(ce->name, st)) + return error(_("cannot checkout %s"), ce->name); + return 0; +} + +static struct patch *previous_patch(struct apply_state *state, + struct patch *patch, + int *gone) +{ + struct patch *previous; + + *gone = 0; + if (patch->is_copy || patch->is_rename) + return NULL; /* "git" patches do not depend on the order */ + + previous = in_fn_table(state, patch->old_name); + if (!previous) + return NULL; + + if (to_be_deleted(previous)) + return NULL; /* the deletion hasn't happened yet */ + + if (was_deleted(previous)) + *gone = 1; + + return previous; +} + +static int verify_index_match(struct apply_state *state, + const struct cache_entry *ce, + struct stat *st) +{ + if (S_ISGITLINK(ce->ce_mode)) { + if (!S_ISDIR(st->st_mode)) + return -1; + return 0; + } + return ie_match_stat(state->repo->index, ce, st, + CE_MATCH_IGNORE_VALID | CE_MATCH_IGNORE_SKIP_WORKTREE); +} + +#define SUBMODULE_PATCH_WITHOUT_INDEX 1 + +static int load_patch_target(struct apply_state *state, + struct strbuf *buf, + const struct cache_entry *ce, + struct stat *st, + struct patch *patch, + const char *name, + unsigned expected_mode) +{ + if (state->cached || state->check_index) { + if (read_file_or_gitlink(ce, buf)) + return error(_("failed to read %s"), name); + } else if (name) { + if (S_ISGITLINK(expected_mode)) { + if (ce) + return read_file_or_gitlink(ce, buf); + else + return SUBMODULE_PATCH_WITHOUT_INDEX; + } else if (has_symlink_leading_path(name, strlen(name))) { + return error(_("reading from '%s' beyond a symbolic link"), name); + } else { + if (read_old_data(st, patch, name, buf)) + return error(_("failed to read %s"), name); + } + } + return 0; +} + +/* + * We are about to apply "patch"; populate the "image" with the + * current version we have, from the working tree or from the index, + * depending on the situation e.g. --cached/--index. If we are + * applying a non-git patch that incrementally updates the tree, + * we read from the result of a previous diff. + */ +static int load_preimage(struct apply_state *state, + struct image *image, + struct patch *patch, struct stat *st, + const struct cache_entry *ce) +{ + struct strbuf buf = STRBUF_INIT; + size_t len; + char *img; + struct patch *previous; + int status; + + previous = previous_patch(state, patch, &status); + if (status) + return error(_("path %s has been renamed/deleted"), + patch->old_name); + if (previous) { + /* We have a patched copy in memory; use that. */ + strbuf_add(&buf, previous->result, previous->resultsize); + } else { + status = load_patch_target(state, &buf, ce, st, patch, + patch->old_name, patch->old_mode); + if (status < 0) + return status; + else if (status == SUBMODULE_PATCH_WITHOUT_INDEX) { + /* + * There is no way to apply subproject + * patch without looking at the index. + * NEEDSWORK: shouldn't this be flagged + * as an error??? + */ + free_fragment_list(patch->fragments); + patch->fragments = NULL; + } else if (status) { + return error(_("failed to read %s"), patch->old_name); + } + } + + img = strbuf_detach(&buf, &len); + image_prepare(image, img, len, !patch->is_binary); + return 0; +} + +static int resolve_to(struct image *image, const struct object_id *result_id) +{ + unsigned long size; + enum object_type type; + char *data; + + image_clear(image); + + data = odb_read_object(the_repository->objects, result_id, &type, &size); + if (!data || type != OBJ_BLOB) + die("unable to read blob object %s", oid_to_hex(result_id)); + strbuf_attach(&image->buf, data, size, size + 1); + + return 0; +} + +static int three_way_merge(struct apply_state *state, + struct image *image, + char *path, + const struct object_id *base, + const struct object_id *ours, + const struct object_id *theirs) +{ + mmfile_t base_file, our_file, their_file; + struct ll_merge_options merge_opts = LL_MERGE_OPTIONS_INIT; + mmbuffer_t result = { NULL }; + enum ll_merge_result status; + + /* resolve trivial cases first */ + if (oideq(base, ours)) + return resolve_to(image, theirs); + else if (oideq(base, theirs) || oideq(ours, theirs)) + return resolve_to(image, ours); + + read_mmblob(&base_file, the_repository->objects, base); + read_mmblob(&our_file, the_repository->objects, ours); + read_mmblob(&their_file, the_repository->objects, theirs); + merge_opts.variant = state->merge_variant; + status = ll_merge(&result, path, + &base_file, "base", + &our_file, "ours", + &their_file, "theirs", + state->repo->index, + &merge_opts); + if (status == LL_MERGE_BINARY_CONFLICT) + warning("Cannot merge binary files: %s (%s vs. %s)", + path, "ours", "theirs"); + free(base_file.ptr); + free(our_file.ptr); + free(their_file.ptr); + if (status < 0 || !result.ptr) { + free(result.ptr); + return -1; + } + image_clear(image); + strbuf_attach(&image->buf, result.ptr, result.size, result.size); + + return status; +} + +/* + * When directly falling back to add/add three-way merge, we read from + * the current contents of the new_name. In no cases other than that + * this function will be called. + */ +static int load_current(struct apply_state *state, + struct image *image, + struct patch *patch) +{ + struct strbuf buf = STRBUF_INIT; + int status, pos; + size_t len; + char *img; + struct stat st; + struct cache_entry *ce; + char *name = patch->new_name; + unsigned mode = patch->new_mode; + + if (!patch->is_new) + BUG("patch to %s is not a creation", patch->old_name); + + pos = index_name_pos(state->repo->index, name, strlen(name)); + if (pos < 0) + return error(_("%s: does not exist in index"), name); + ce = state->repo->index->cache[pos]; + if (lstat(name, &st)) { + if (errno != ENOENT) + return error_errno("%s", name); + if (checkout_target(state->repo->index, ce, &st)) + return -1; + } + if (verify_index_match(state, ce, &st)) + return error(_("%s: does not match index"), name); + + status = load_patch_target(state, &buf, ce, &st, patch, name, mode); + if (status < 0) + return status; + else if (status) + return -1; + img = strbuf_detach(&buf, &len); + image_prepare(image, img, len, !patch->is_binary); + return 0; +} + +static int try_threeway(struct apply_state *state, + struct image *image, + struct patch *patch, + struct stat *st, + const struct cache_entry *ce) +{ + struct object_id pre_oid, post_oid, our_oid; + struct strbuf buf = STRBUF_INIT; + size_t len; + int status; + char *img; + struct image tmp_image = IMAGE_INIT; + + /* No point falling back to 3-way merge in these cases */ + if (patch->is_delete || + S_ISGITLINK(patch->old_mode) || S_ISGITLINK(patch->new_mode) || + (patch->is_new && !patch->direct_to_threeway) || + (patch->is_rename && !patch->lines_added && !patch->lines_deleted)) + return -1; + + /* Preimage the patch was prepared for */ + if (patch->is_new) + odb_write_object(the_repository->objects, "", 0, OBJ_BLOB, &pre_oid); + else if (repo_get_oid(the_repository, patch->old_oid_prefix, &pre_oid) || + read_blob_object(&buf, &pre_oid, patch->old_mode)) + return error(_("repository lacks the necessary blob to perform 3-way merge.")); + + if (state->apply_verbosity > verbosity_silent && patch->direct_to_threeway) + fprintf(stderr, _("Performing three-way merge...\n")); + + img = strbuf_detach(&buf, &len); + image_prepare(&tmp_image, img, len, 1); + /* Apply the patch to get the post image */ + if (apply_fragments(state, &tmp_image, patch) < 0) { + image_clear(&tmp_image); + return -1; + } + /* post_oid is theirs */ + odb_write_object(the_repository->objects, tmp_image.buf.buf, + tmp_image.buf.len, OBJ_BLOB, &post_oid); + image_clear(&tmp_image); + + /* our_oid is ours */ + if (patch->is_new) { + if (load_current(state, &tmp_image, patch)) + return error(_("cannot read the current contents of '%s'"), + patch->new_name); + } else { + if (load_preimage(state, &tmp_image, patch, st, ce)) + return error(_("cannot read the current contents of '%s'"), + patch->old_name); + } + odb_write_object(the_repository->objects, tmp_image.buf.buf, + tmp_image.buf.len, OBJ_BLOB, &our_oid); + image_clear(&tmp_image); + + /* in-core three-way merge between post and our using pre as base */ + status = three_way_merge(state, image, patch->new_name, + &pre_oid, &our_oid, &post_oid); + if (status < 0) { + if (state->apply_verbosity > verbosity_silent) + fprintf(stderr, + _("Failed to perform three-way merge...\n")); + return status; + } + + if (status) { + patch->conflicted_threeway = 1; + if (patch->is_new) + oidclr(&patch->threeway_stage[0], the_repository->hash_algo); + else + oidcpy(&patch->threeway_stage[0], &pre_oid); + oidcpy(&patch->threeway_stage[1], &our_oid); + oidcpy(&patch->threeway_stage[2], &post_oid); + if (state->apply_verbosity > verbosity_silent) + fprintf(stderr, + _("Applied patch to '%s' with conflicts.\n"), + patch->new_name); + } else { + if (state->apply_verbosity > verbosity_silent) + fprintf(stderr, + _("Applied patch to '%s' cleanly.\n"), + patch->new_name); + } + return 0; +} + +static int apply_data(struct apply_state *state, struct patch *patch, + struct stat *st, const struct cache_entry *ce) +{ + struct image image = IMAGE_INIT; + + if (load_preimage(state, &image, patch, st, ce) < 0) + return -1; + + if (!state->threeway || try_threeway(state, &image, patch, st, ce) < 0) { + if (state->apply_verbosity > verbosity_silent && + state->threeway && !patch->direct_to_threeway) + fprintf(stderr, _("Falling back to direct application...\n")); + + /* Note: with --reject, apply_fragments() returns 0 */ + if (patch->direct_to_threeway || apply_fragments(state, &image, patch) < 0) { + image_clear(&image); + return -1; + } + } + patch->result = strbuf_detach(&image.buf, &patch->resultsize); + add_to_fn_table(state, patch); + free(image.line); + + if (0 < patch->is_delete && patch->resultsize) + return error(_("removal patch leaves file contents")); + + return 0; +} + +/* + * If "patch" that we are looking at modifies or deletes what we have, + * we would want it not to lose any local modification we have, either + * in the working tree or in the index. + * + * This also decides if a non-git patch is a creation patch or a + * modification to an existing empty file. We do not check the state + * of the current tree for a creation patch in this function; the caller + * check_patch() separately makes sure (and errors out otherwise) that + * the path the patch creates does not exist in the current tree. + */ +static int check_preimage(struct apply_state *state, + struct patch *patch, + struct cache_entry **ce, + struct stat *st) +{ + const char *old_name = patch->old_name; + struct patch *previous = NULL; + int stat_ret = 0, status; + unsigned st_mode = 0; + + if (!old_name) + return 0; + + assert(patch->is_new <= 0); + previous = previous_patch(state, patch, &status); + + if (status) + return error(_("path %s has been renamed/deleted"), old_name); + if (previous) { + st_mode = previous->new_mode; + } else if (!state->cached) { + stat_ret = lstat(old_name, st); + if (stat_ret && errno != ENOENT) + return error_errno("%s", old_name); + } + + if (state->check_index && !previous) { + int pos = index_name_pos(state->repo->index, old_name, + strlen(old_name)); + if (pos < 0) { + if (patch->is_new < 0) + goto is_new; + return error(_("%s: does not exist in index"), old_name); + } + *ce = state->repo->index->cache[pos]; + if (stat_ret < 0) { + if (checkout_target(state->repo->index, *ce, st)) + return -1; + } + if (!state->cached && verify_index_match(state, *ce, st)) + return error(_("%s: does not match index"), old_name); + if (state->cached) + st_mode = (*ce)->ce_mode; + } else if (stat_ret < 0) { + if (patch->is_new < 0) + goto is_new; + return error_errno("%s", old_name); + } + + if (!state->cached && !previous) { + if (*ce && !(*ce)->ce_mode) + BUG("ce_mode == 0 for path '%s'", old_name); + + if (trust_executable_bit || !S_ISREG(st->st_mode)) + st_mode = ce_mode_from_stat(*ce, st->st_mode); + else if (*ce) + st_mode = (*ce)->ce_mode; + else + st_mode = patch->old_mode; + } + + if (patch->is_new < 0) + patch->is_new = 0; + if (!patch->old_mode) + patch->old_mode = st_mode; + if ((st_mode ^ patch->old_mode) & S_IFMT) + return error(_("%s: wrong type"), old_name); + if (st_mode != patch->old_mode) + warning(_("%s has type %o, expected %o"), + old_name, st_mode, patch->old_mode); + if (!patch->new_mode && !patch->is_delete) + patch->new_mode = st_mode; + return 0; + + is_new: + patch->is_new = 1; + patch->is_delete = 0; + FREE_AND_NULL(patch->old_name); + return 0; +} + + +#define EXISTS_IN_INDEX 1 +#define EXISTS_IN_WORKTREE 2 +#define EXISTS_IN_INDEX_AS_ITA 3 + +static int check_to_create(struct apply_state *state, + const char *new_name, + int ok_if_exists) +{ + struct stat nst; + + if (state->check_index && (!ok_if_exists || !state->cached)) { + int pos; + + pos = index_name_pos(state->repo->index, new_name, strlen(new_name)); + if (pos >= 0) { + struct cache_entry *ce = state->repo->index->cache[pos]; + + /* allow ITA, as they do not yet exist in the index */ + if (!ok_if_exists && !(ce->ce_flags & CE_INTENT_TO_ADD)) + return EXISTS_IN_INDEX; + + /* ITA entries can never match working tree files */ + if (!state->cached && (ce->ce_flags & CE_INTENT_TO_ADD)) + return EXISTS_IN_INDEX_AS_ITA; + } + } + + if (state->cached) + return 0; + + if (!lstat(new_name, &nst)) { + if (S_ISDIR(nst.st_mode) || ok_if_exists) + return 0; + /* + * A leading component of new_name might be a symlink + * that is going to be removed with this patch, but + * still pointing at somewhere that has the path. + * In such a case, path "new_name" does not exist as + * far as git is concerned. + */ + if (has_symlink_leading_path(new_name, strlen(new_name))) + return 0; + + return EXISTS_IN_WORKTREE; + } else if (!is_missing_file_error(errno)) { + return error_errno("%s", new_name); + } + return 0; +} + +static void prepare_symlink_changes(struct apply_state *state, struct patch *patch) +{ + for ( ; patch; patch = patch->next) { + if ((patch->old_name && S_ISLNK(patch->old_mode)) && + (patch->is_rename || patch->is_delete)) + /* the symlink at patch->old_name is removed */ + strset_add(&state->removed_symlinks, patch->old_name); + + if (patch->new_name && S_ISLNK(patch->new_mode)) + /* the symlink at patch->new_name is created or remains */ + strset_add(&state->kept_symlinks, patch->new_name); + } +} + +static int path_is_beyond_symlink_1(struct apply_state *state, struct strbuf *name) +{ + do { + while (--name->len && name->buf[name->len] != '/') + ; /* scan backwards */ + if (!name->len) + break; + name->buf[name->len] = '\0'; + if (strset_contains(&state->kept_symlinks, name->buf)) + return 1; + if (strset_contains(&state->removed_symlinks, name->buf)) + /* + * This cannot be "return 0", because we may + * see a new one created at a higher level. + */ + continue; + + /* otherwise, check the preimage */ + if (state->check_index) { + struct cache_entry *ce; + + ce = index_file_exists(state->repo->index, name->buf, + name->len, ignore_case); + if (ce && S_ISLNK(ce->ce_mode)) + return 1; + } else { + struct stat st; + if (!lstat(name->buf, &st) && S_ISLNK(st.st_mode)) + return 1; + } + } while (1); + return 0; +} + +static int path_is_beyond_symlink(struct apply_state *state, const char *name_) +{ + int ret; + struct strbuf name = STRBUF_INIT; + + assert(*name_ != '\0'); + strbuf_addstr(&name, name_); + ret = path_is_beyond_symlink_1(state, &name); + strbuf_release(&name); + + return ret; +} + +static int check_unsafe_path(struct patch *patch) +{ + const char *old_name = NULL; + const char *new_name = NULL; + if (patch->is_delete) + old_name = patch->old_name; + else if (!patch->is_new && !patch->is_copy) + old_name = patch->old_name; + if (!patch->is_delete) + new_name = patch->new_name; + + if (old_name && !verify_path(old_name, patch->old_mode)) + return error(_("invalid path '%s'"), old_name); + if (new_name && !verify_path(new_name, patch->new_mode)) + return error(_("invalid path '%s'"), new_name); + return 0; +} + +/* + * Check and apply the patch in-core; leave the result in patch->result + * for the caller to write it out to the final destination. + */ +static int check_patch(struct apply_state *state, struct patch *patch) +{ + struct stat st; + const char *old_name = patch->old_name; + const char *new_name = patch->new_name; + const char *name = old_name ? old_name : new_name; + struct cache_entry *ce = NULL; + struct patch *tpatch; + int ok_if_exists; + int status; + + patch->rejected = 1; /* we will drop this after we succeed */ + + status = check_preimage(state, patch, &ce, &st); + if (status) + return status; + old_name = patch->old_name; + + /* + * A type-change diff is always split into a patch to delete + * old, immediately followed by a patch to create new (see + * diff.c::run_diff()); in such a case it is Ok that the entry + * to be deleted by the previous patch is still in the working + * tree and in the index. + * + * A patch to swap-rename between A and B would first rename A + * to B and then rename B to A. While applying the first one, + * the presence of B should not stop A from getting renamed to + * B; ask to_be_deleted() about the later rename. Removal of + * B and rename from A to B is handled the same way by asking + * was_deleted(). + */ + if ((tpatch = in_fn_table(state, new_name)) && + (was_deleted(tpatch) || to_be_deleted(tpatch))) + ok_if_exists = 1; + else + ok_if_exists = 0; + + if (new_name && + ((0 < patch->is_new) || patch->is_rename || patch->is_copy)) { + int err = check_to_create(state, new_name, ok_if_exists); + + if (err && state->threeway) { + patch->direct_to_threeway = 1; + } else switch (err) { + case 0: + break; /* happy */ + case EXISTS_IN_INDEX: + return error(_("%s: already exists in index"), new_name); + case EXISTS_IN_INDEX_AS_ITA: + return error(_("%s: does not match index"), new_name); + case EXISTS_IN_WORKTREE: + return error(_("%s: already exists in working directory"), + new_name); + default: + return err; + } + + if (!patch->new_mode) { + if (0 < patch->is_new) + patch->new_mode = S_IFREG | 0644; + else + patch->new_mode = patch->old_mode; + } + } + + if (new_name && old_name) { + int same = !strcmp(old_name, new_name); + if (!patch->new_mode) + patch->new_mode = patch->old_mode; + if ((patch->old_mode ^ patch->new_mode) & S_IFMT) { + if (same) + return error(_("new mode (%o) of %s does not " + "match old mode (%o)"), + patch->new_mode, new_name, + patch->old_mode); + else + return error(_("new mode (%o) of %s does not " + "match old mode (%o) of %s"), + patch->new_mode, new_name, + patch->old_mode, old_name); + } + } + + if (!state->unsafe_paths && check_unsafe_path(patch)) + return -128; + + /* + * An attempt to read from or delete a path that is beyond a + * symbolic link will be prevented by load_patch_target() that + * is called at the beginning of apply_data() so we do not + * have to worry about a patch marked with "is_delete" bit + * here. We however need to make sure that the patch result + * is not deposited to a path that is beyond a symbolic link + * here. + */ + if (!patch->is_delete && path_is_beyond_symlink(state, patch->new_name)) + return error(_("affected file '%s' is beyond a symbolic link"), + patch->new_name); + + if (apply_data(state, patch, &st, ce) < 0) + return error(_("%s: patch does not apply"), name); + patch->rejected = 0; + return 0; +} + +static int check_patch_list(struct apply_state *state, struct patch *patch) +{ + int err = 0; + + prepare_symlink_changes(state, patch); + prepare_fn_table(state, patch); + while (patch) { + int res; + if (state->apply_verbosity > verbosity_normal) + say_patch_name(stderr, + _("Checking patch %s..."), patch); + res = check_patch(state, patch); + if (res == -128) + return -128; + err |= res; + patch = patch->next; + } + return err; +} + +static int read_apply_cache(struct apply_state *state) +{ + if (state->index_file) + return read_index_from(state->repo->index, state->index_file, + repo_get_git_dir(the_repository)); + else + return repo_read_index(state->repo); +} + +/* This function tries to read the object name from the current index */ +static int get_current_oid(struct apply_state *state, const char *path, + struct object_id *oid) +{ + int pos; + + if (read_apply_cache(state) < 0) + return -1; + pos = index_name_pos(state->repo->index, path, strlen(path)); + if (pos < 0) + return -1; + oidcpy(oid, &state->repo->index->cache[pos]->oid); + return 0; +} + +static int preimage_oid_in_gitlink_patch(struct patch *p, struct object_id *oid) +{ + /* + * A usable gitlink patch has only one fragment (hunk) that looks like: + * @@ -1 +1 @@ + * -Subproject commit + * +Subproject commit + * or + * @@ -1 +0,0 @@ + * -Subproject commit + * for a removal patch. + */ + struct fragment *hunk = p->fragments; + static const char heading[] = "-Subproject commit "; + const char *preimage; + + if (/* does the patch have only one hunk? */ + hunk && !hunk->next && + /* is its preimage one line? */ + hunk->oldpos == 1 && hunk->oldlines == 1 && + /* does preimage begin with the heading? */ + (preimage = memchr(hunk->patch, '\n', hunk->size)) != NULL && + starts_with(++preimage, heading) && + /* does it record full SHA-1? */ + !get_oid_hex(preimage + sizeof(heading) - 1, oid) && + preimage[sizeof(heading) + the_hash_algo->hexsz - 1] == '\n' && + /* does the abbreviated name on the index line agree with it? */ + starts_with(preimage + sizeof(heading) - 1, p->old_oid_prefix)) + return 0; /* it all looks fine */ + + /* we may have full object name on the index line */ + return get_oid_hex(p->old_oid_prefix, oid); +} + +/* Build an index that contains just the files needed for a 3way merge */ +static int build_fake_ancestor(struct apply_state *state, struct patch *list) +{ + struct patch *patch; + struct index_state result = INDEX_STATE_INIT(state->repo); + struct lock_file lock = LOCK_INIT; + int res; + + /* Once we start supporting the reverse patch, it may be + * worth showing the new sha1 prefix, but until then... + */ + for (patch = list; patch; patch = patch->next) { + struct object_id oid; + struct cache_entry *ce; + const char *name; + + name = patch->old_name ? patch->old_name : patch->new_name; + if (0 < patch->is_new) + continue; + + if (S_ISGITLINK(patch->old_mode)) { + if (!preimage_oid_in_gitlink_patch(patch, &oid)) + ; /* ok, the textual part looks sane */ + else + return error(_("sha1 information is lacking or " + "useless for submodule %s"), name); + } else if (!repo_get_oid_blob(the_repository, patch->old_oid_prefix, &oid)) { + ; /* ok */ + } else if (!patch->lines_added && !patch->lines_deleted) { + /* mode-only change: update the current */ + if (get_current_oid(state, patch->old_name, &oid)) + return error(_("mode change for %s, which is not " + "in current HEAD"), name); + } else + return error(_("sha1 information is lacking or useless " + "(%s)."), name); + + ce = make_cache_entry(&result, patch->old_mode, &oid, name, 0, 0); + if (!ce) + return error(_("make_cache_entry failed for path '%s'"), + name); + if (add_index_entry(&result, ce, ADD_CACHE_OK_TO_ADD)) { + discard_cache_entry(ce); + return error(_("could not add %s to temporary index"), + name); + } + } + + hold_lock_file_for_update(&lock, state->fake_ancestor, LOCK_DIE_ON_ERROR); + res = write_locked_index(&result, &lock, COMMIT_LOCK); + discard_index(&result); + + if (res) + return error(_("could not write temporary index to %s"), + state->fake_ancestor); + + return 0; +} + +static void stat_patch_list(struct apply_state *state, struct patch *patch) +{ + int files, adds, dels; + + for (files = adds = dels = 0 ; patch ; patch = patch->next) { + files++; + adds += patch->lines_added; + dels += patch->lines_deleted; + show_stats(state, patch); + } + + print_stat_summary(stdout, files, adds, dels); +} + +static void numstat_patch_list(struct apply_state *state, + struct patch *patch) +{ + for ( ; patch; patch = patch->next) { + const char *name; + name = patch->new_name ? patch->new_name : patch->old_name; + if (patch->is_binary) + printf("-\t-\t"); + else + printf("%d\t%d\t", patch->lines_added, patch->lines_deleted); + write_name_quoted(name, stdout, state->line_termination); + } +} + +static void show_file_mode_name(const char *newdelete, unsigned int mode, const char *name) +{ + if (mode) + printf(" %s mode %06o %s\n", newdelete, mode, name); + else + printf(" %s %s\n", newdelete, name); +} + +static void show_mode_change(struct patch *p, int show_name) +{ + if (p->old_mode && p->new_mode && p->old_mode != p->new_mode) { + if (show_name) + printf(" mode change %06o => %06o %s\n", + p->old_mode, p->new_mode, p->new_name); + else + printf(" mode change %06o => %06o\n", + p->old_mode, p->new_mode); + } +} + +static void show_rename_copy(struct patch *p) +{ + const char *renamecopy = p->is_rename ? "rename" : "copy"; + const char *old_name, *new_name; + + /* Find common prefix */ + old_name = p->old_name; + new_name = p->new_name; + while (1) { + const char *slash_old, *slash_new; + slash_old = strchr(old_name, '/'); + slash_new = strchr(new_name, '/'); + if (!slash_old || + !slash_new || + slash_old - old_name != slash_new - new_name || + memcmp(old_name, new_name, slash_new - new_name)) + break; + old_name = slash_old + 1; + new_name = slash_new + 1; + } + /* p->old_name through old_name is the common prefix, and old_name and + * new_name through the end of names are renames + */ + if (old_name != p->old_name) + printf(" %s %.*s{%s => %s} (%d%%)\n", renamecopy, + (int)(old_name - p->old_name), p->old_name, + old_name, new_name, p->score); + else + printf(" %s %s => %s (%d%%)\n", renamecopy, + p->old_name, p->new_name, p->score); + show_mode_change(p, 0); +} + +static void summary_patch_list(struct patch *patch) +{ + struct patch *p; + + for (p = patch; p; p = p->next) { + if (p->is_new) + show_file_mode_name("create", p->new_mode, p->new_name); + else if (p->is_delete) + show_file_mode_name("delete", p->old_mode, p->old_name); + else { + if (p->is_rename || p->is_copy) + show_rename_copy(p); + else { + if (p->score) { + printf(" rewrite %s (%d%%)\n", + p->new_name, p->score); + show_mode_change(p, 0); + } + else + show_mode_change(p, 1); + } + } + } +} + +static void patch_stats(struct apply_state *state, struct patch *patch) +{ + int lines = patch->lines_added + patch->lines_deleted; + + if (lines > state->max_change) + state->max_change = lines; + if (patch->old_name) { + int len = quote_c_style(patch->old_name, NULL, NULL, 0); + if (!len) + len = strlen(patch->old_name); + if (len > state->max_len) + state->max_len = len; + } + if (patch->new_name) { + int len = quote_c_style(patch->new_name, NULL, NULL, 0); + if (!len) + len = strlen(patch->new_name); + if (len > state->max_len) + state->max_len = len; + } +} + +static int remove_file(struct apply_state *state, struct patch *patch, int rmdir_empty) +{ + if (state->update_index && !state->ita_only) { + if (remove_file_from_index(state->repo->index, patch->old_name) < 0) + return error(_("unable to remove %s from index"), patch->old_name); + } + if (!state->cached) { + if (!remove_or_warn(patch->old_mode, patch->old_name) && rmdir_empty) { + remove_path(patch->old_name); + } + } + return 0; +} + +static int add_index_file(struct apply_state *state, + const char *path, + unsigned mode, + void *buf, + unsigned long size) +{ + struct stat st; + struct cache_entry *ce; + int namelen = strlen(path); + + ce = make_empty_cache_entry(state->repo->index, namelen); + memcpy(ce->name, path, namelen); + ce->ce_mode = create_ce_mode(mode); + ce->ce_flags = create_ce_flags(0); + ce->ce_namelen = namelen; + if (state->ita_only) { + ce->ce_flags |= CE_INTENT_TO_ADD; + set_object_name_for_intent_to_add_entry(ce); + } else if (S_ISGITLINK(mode)) { + const char *s; + + if (!skip_prefix(buf, "Subproject commit ", &s) || + get_oid_hex(s, &ce->oid)) { + discard_cache_entry(ce); + return error(_("corrupt patch for submodule %s"), path); + } + } else { + if (!state->cached) { + if (lstat(path, &st) < 0) { + discard_cache_entry(ce); + return error_errno(_("unable to stat newly " + "created file '%s'"), + path); + } + fill_stat_cache_info(state->repo->index, ce, &st); + } + if (odb_write_object(the_repository->objects, buf, size, + OBJ_BLOB, &ce->oid) < 0) { + discard_cache_entry(ce); + return error(_("unable to create backing store " + "for newly created file %s"), path); + } + } + if (add_index_entry(state->repo->index, ce, ADD_CACHE_OK_TO_ADD) < 0) { + discard_cache_entry(ce); + return error(_("unable to add cache entry for %s"), path); + } + + return 0; +} + +/* + * Returns: + * -1 if an unrecoverable error happened + * 0 if everything went well + * 1 if a recoverable error happened + */ +static int try_create_file(struct apply_state *state, const char *path, + unsigned int mode, const char *buf, + unsigned long size) +{ + int fd, res; + struct strbuf nbuf = STRBUF_INIT; + + if (S_ISGITLINK(mode)) { + struct stat st; + if (!lstat(path, &st) && S_ISDIR(st.st_mode)) + return 0; + return !!mkdir(path, 0777); + } + + if (has_symlinks && S_ISLNK(mode)) + /* Although buf:size is counted string, it also is NUL + * terminated. + */ + return !!symlink(buf, path); + + fd = open(path, O_CREAT | O_EXCL | O_WRONLY, (mode & 0100) ? 0777 : 0666); + if (fd < 0) + return 1; + + if (convert_to_working_tree(state->repo->index, path, buf, size, &nbuf, NULL)) { + size = nbuf.len; + buf = nbuf.buf; + } + + res = write_in_full(fd, buf, size) < 0; + if (res) + error_errno(_("failed to write to '%s'"), path); + strbuf_release(&nbuf); + + if (close(fd) < 0 && !res) + return error_errno(_("closing file '%s'"), path); + + return res ? -1 : 0; +} + +/* + * We optimistically assume that the directories exist, + * which is true 99% of the time anyway. If they don't, + * we create them and try again. + * + * Returns: + * -1 on error + * 0 otherwise + */ +static int create_one_file(struct apply_state *state, + char *path, + unsigned mode, + const char *buf, + unsigned long size) +{ + char *newpath = NULL; + int res; + + if (state->cached) + return 0; + + /* + * We already try to detect whether files are beyond a symlink in our + * up-front checks. But in the case where symlinks are created by any + * of the intermediate hunks it can happen that our up-front checks + * didn't yet see the symlink, but at the point of arriving here there + * in fact is one. We thus repeat the check for symlinks here. + * + * Note that this does not make the up-front check obsolete as the + * failure mode is different: + * + * - The up-front checks cause us to abort before we have written + * anything into the working directory. So when we exit this way the + * working directory remains clean. + * + * - The checks here happen in the middle of the action where we have + * already started to apply the patch. The end result will be a dirty + * working directory. + * + * Ideally, we should update the up-front checks to catch what would + * happen when we apply the patch before we damage the working tree. + * We have all the information necessary to do so. But for now, as a + * part of embargoed security work, having this check would serve as a + * reasonable first step. + */ + if (path_is_beyond_symlink(state, path)) + return error(_("affected file '%s' is beyond a symbolic link"), path); + + res = try_create_file(state, path, mode, buf, size); + if (res < 0) + return -1; + if (!res) + return 0; + + if (errno == ENOENT) { + if (safe_create_leading_directories_no_share(path)) + return 0; + res = try_create_file(state, path, mode, buf, size); + if (res < 0) + return -1; + if (!res) + return 0; + } + + if (errno == EEXIST || errno == EACCES) { + /* We may be trying to create a file where a directory + * used to be. + */ + struct stat st; + if (!lstat(path, &st) && (!S_ISDIR(st.st_mode) || !rmdir(path))) + errno = EEXIST; + } + + if (errno == EEXIST) { + unsigned int nr = getpid(); + + for (;;) { + newpath = mkpathdup("%s~%u", path, nr); + res = try_create_file(state, newpath, mode, buf, size); + if (res < 0) + goto out; + if (!res) { + if (!rename(newpath, path)) + goto out; + unlink_or_warn(newpath); + break; + } + if (errno != EEXIST) + break; + ++nr; + FREE_AND_NULL(newpath); + } + } + res = error_errno(_("unable to write file '%s' mode %o"), path, mode); +out: + free(newpath); + return res; +} + +static int add_conflicted_stages_file(struct apply_state *state, + struct patch *patch) +{ + int stage, namelen; + unsigned mode; + struct cache_entry *ce; + + if (!state->update_index) + return 0; + namelen = strlen(patch->new_name); + mode = patch->new_mode ? patch->new_mode : (S_IFREG | 0644); + + remove_file_from_index(state->repo->index, patch->new_name); + for (stage = 1; stage < 4; stage++) { + if (is_null_oid(&patch->threeway_stage[stage - 1])) + continue; + ce = make_empty_cache_entry(state->repo->index, namelen); + memcpy(ce->name, patch->new_name, namelen); + ce->ce_mode = create_ce_mode(mode); + ce->ce_flags = create_ce_flags(stage); + ce->ce_namelen = namelen; + oidcpy(&ce->oid, &patch->threeway_stage[stage - 1]); + if (add_index_entry(state->repo->index, ce, ADD_CACHE_OK_TO_ADD) < 0) { + discard_cache_entry(ce); + return error(_("unable to add cache entry for %s"), + patch->new_name); + } + } + + return 0; +} + +static int create_file(struct apply_state *state, struct patch *patch) +{ + char *path = patch->new_name; + unsigned mode = patch->new_mode; + unsigned long size = patch->resultsize; + char *buf = patch->result; + + if (!mode) + mode = S_IFREG | 0644; + if (create_one_file(state, path, mode, buf, size)) + return -1; + + if (patch->conflicted_threeway) + return add_conflicted_stages_file(state, patch); + else if (state->check_index || (state->ita_only && patch->is_new > 0)) + return add_index_file(state, path, mode, buf, size); + return 0; +} + +/* phase zero is to remove, phase one is to create */ +static int write_out_one_result(struct apply_state *state, + struct patch *patch, + int phase) +{ + if (patch->is_delete > 0) { + if (phase == 0) + return remove_file(state, patch, 1); + return 0; + } + if (patch->is_new > 0 || patch->is_copy) { + if (phase == 1) + return create_file(state, patch); + return 0; + } + /* + * Rename or modification boils down to the same + * thing: remove the old, write the new + */ + if (phase == 0) + return remove_file(state, patch, patch->is_rename); + if (phase == 1) + return create_file(state, patch); + return 0; +} + +static int write_out_one_reject(struct apply_state *state, struct patch *patch) +{ + FILE *rej; + char *namebuf; + struct fragment *frag; + int fd, cnt = 0; + struct strbuf sb = STRBUF_INIT; + + for (cnt = 0, frag = patch->fragments; frag; frag = frag->next) { + if (!frag->rejected) + continue; + cnt++; + } + + if (!cnt) { + if (state->apply_verbosity > verbosity_normal) + say_patch_name(stderr, + _("Applied patch %s cleanly."), patch); + return 0; + } + + /* This should not happen, because a removal patch that leaves + * contents are marked "rejected" at the patch level. + */ + if (!patch->new_name) + die(_("internal error")); + + /* Say this even without --verbose */ + strbuf_addf(&sb, Q_("Applying patch %%s with %d reject...", + "Applying patch %%s with %d rejects...", + cnt), + cnt); + if (state->apply_verbosity > verbosity_silent) + say_patch_name(stderr, sb.buf, patch); + strbuf_release(&sb); + + namebuf = xstrfmt("%s.rej", patch->new_name); + + fd = open(namebuf, O_CREAT | O_EXCL | O_WRONLY, 0666); + if (fd < 0) { + if (errno != EEXIST) { + error_errno(_("cannot open %s"), namebuf); + goto error; + } + if (unlink(namebuf)) { + error_errno(_("cannot unlink '%s'"), namebuf); + goto error; + } + fd = open(namebuf, O_CREAT | O_EXCL | O_WRONLY, 0666); + if (fd < 0) { + error_errno(_("cannot open %s"), namebuf); + goto error; + } + } + rej = fdopen(fd, "w"); + if (!rej) { + error_errno(_("cannot open %s"), namebuf); + close(fd); + goto error; + } + + /* Normal git tools never deal with .rej, so do not pretend + * this is a git patch by saying --git or giving extended + * headers. While at it, maybe please "kompare" that wants + * the trailing TAB and some garbage at the end of line ;-). + */ + fprintf(rej, "diff a/%s b/%s\t(rejected hunks)\n", + patch->new_name, patch->new_name); + for (cnt = 1, frag = patch->fragments; + frag; + cnt++, frag = frag->next) { + if (!frag->rejected) { + if (state->apply_verbosity > verbosity_silent) + fprintf_ln(stderr, _("Hunk #%d applied cleanly."), cnt); + continue; + } + if (state->apply_verbosity > verbosity_silent) + fprintf_ln(stderr, _("Rejected hunk #%d."), cnt); + fprintf(rej, "%.*s", frag->size, frag->patch); + if (frag->patch[frag->size-1] != '\n') + fputc('\n', rej); + } + fclose(rej); +error: + free(namebuf); + return -1; +} + +/* + * Returns: + * -1 if an error happened + * 0 if the patch applied cleanly + * 1 if the patch did not apply cleanly + */ +static int write_out_results(struct apply_state *state, struct patch *list) +{ + int phase; + int errs = 0; + struct patch *l; + struct string_list cpath = STRING_LIST_INIT_DUP; + + for (phase = 0; phase < 2; phase++) { + l = list; + while (l) { + if (l->rejected) + errs = 1; + else { + if (write_out_one_result(state, l, phase)) { + string_list_clear(&cpath, 0); + return -1; + } + if (phase == 1) { + if (write_out_one_reject(state, l)) + errs = 1; + if (l->conflicted_threeway) { + string_list_append(&cpath, l->new_name); + errs = 1; + } + } + } + l = l->next; + } + } + + if (cpath.nr) { + struct string_list_item *item; + + string_list_sort(&cpath); + if (state->apply_verbosity > verbosity_silent) { + for_each_string_list_item(item, &cpath) + fprintf(stderr, "U %s\n", item->string); + } + string_list_clear(&cpath, 0); + + /* + * rerere relies on the partially merged result being in the working + * tree with conflict markers, but that isn't written with --cached. + */ + if (!state->cached) + repo_rerere(state->repo, 0); + } + + return errs; +} + +/* + * Try to apply a patch. + * + * Returns: + * -128 if a bad error happened (like patch unreadable) + * -1 if patch did not apply and user cannot deal with it + * 0 if the patch applied + * 1 if the patch did not apply but user might fix it + */ +static int apply_patch(struct apply_state *state, + int fd, + const char *filename, + int options) +{ + size_t offset; + struct strbuf buf = STRBUF_INIT; /* owns the patch text */ + struct patch *list = NULL, **listp = &list; + int skipped_patch = 0; + int res = 0; + int flush_attributes = 0; + + state->patch_input_file = filename; + if (read_patch_file(&buf, fd) < 0) + return -128; + offset = 0; + while (offset < buf.len) { + struct patch *patch; + int nr; + + CALLOC_ARRAY(patch, 1); + patch->inaccurate_eof = !!(options & APPLY_OPT_INACCURATE_EOF); + patch->recount = !!(options & APPLY_OPT_RECOUNT); + nr = parse_chunk(state, buf.buf + offset, buf.len - offset, patch); + if (nr < 0) { + free_patch(patch); + if (nr == -128) { + res = -128; + goto end; + } + break; + } + if (state->apply_in_reverse) + reverse_patches(patch); + if (use_patch(state, patch)) { + patch_stats(state, patch); + if (!list || !state->apply_in_reverse) { + *listp = patch; + listp = &patch->next; + } else { + patch->next = list; + list = patch; + } + + if ((patch->new_name && + ends_with_path_components(patch->new_name, + GITATTRIBUTES_FILE)) || + (patch->old_name && + ends_with_path_components(patch->old_name, + GITATTRIBUTES_FILE))) + flush_attributes = 1; + } + else { + if (state->apply_verbosity > verbosity_normal) + say_patch_name(stderr, _("Skipped patch '%s'."), patch); + free_patch(patch); + skipped_patch++; + } + offset += nr; + } + + if (!list && !skipped_patch) { + if (!state->allow_empty) { + error(_("No valid patches in input (allow with \"--allow-empty\")")); + res = -128; + } + goto end; + } + + if (state->whitespace_error && (state->ws_error_action == die_on_ws_error)) + state->apply = 0; + + state->update_index = (state->check_index || state->ita_only) && state->apply; + if (state->update_index && !is_lock_file_locked(&state->lock_file)) { + if (state->index_file) + hold_lock_file_for_update(&state->lock_file, + state->index_file, + LOCK_DIE_ON_ERROR); + else + repo_hold_locked_index(state->repo, &state->lock_file, + LOCK_DIE_ON_ERROR); + } + + if ((state->check_index || state->update_index) && read_apply_cache(state) < 0) { + error(_("unable to read index file")); + res = -128; + goto end; + } + + if (state->check || state->apply) { + int r = check_patch_list(state, list); + if (r == -128) { + res = -128; + goto end; + } + if (r < 0 && !state->apply_with_reject) { + res = -1; + goto end; + } + } + + if (state->apply) { + int write_res = write_out_results(state, list); + if (write_res < 0) { + res = -128; + goto end; + } + if (write_res > 0) { + /* with --3way, we still need to write the index out */ + res = state->apply_with_reject ? -1 : 1; + goto end; + } + } + + if (state->fake_ancestor && + build_fake_ancestor(state, list)) { + res = -128; + goto end; + } + + if (state->diffstat && state->apply_verbosity > verbosity_silent) + stat_patch_list(state, list); + + if (state->numstat && state->apply_verbosity > verbosity_silent) + numstat_patch_list(state, list); + + if (state->summary && state->apply_verbosity > verbosity_silent) + summary_patch_list(list); + + if (flush_attributes) + reset_parsed_attributes(); +end: + free_patch_list(list); + strbuf_release(&buf); + string_list_clear(&state->fn_table, 0); + return res; +} + +static int apply_option_parse_exclude(const struct option *opt, + const char *arg, int unset) +{ + struct apply_state *state = opt->value; + + BUG_ON_OPT_NEG(unset); + + add_name_limit(state, arg, 1); + return 0; +} + +static int apply_option_parse_include(const struct option *opt, + const char *arg, int unset) +{ + struct apply_state *state = opt->value; + + BUG_ON_OPT_NEG(unset); + + add_name_limit(state, arg, 0); + state->has_include = 1; + return 0; +} + +static int apply_option_parse_p(const struct option *opt, + const char *arg, + int unset) +{ + struct apply_state *state = opt->value; + + BUG_ON_OPT_NEG(unset); + + state->p_value = atoi(arg); + state->p_value_known = 1; + return 0; +} + +static int apply_option_parse_space_change(const struct option *opt, + const char *arg, int unset) +{ + struct apply_state *state = opt->value; + + BUG_ON_OPT_ARG(arg); + + if (unset) + state->ws_ignore_action = ignore_ws_none; + else + state->ws_ignore_action = ignore_ws_change; + return 0; +} + +static int apply_option_parse_whitespace(const struct option *opt, + const char *arg, int unset) +{ + struct apply_state *state = opt->value; + + BUG_ON_OPT_NEG(unset); + + state->whitespace_option = arg; + if (parse_whitespace_option(state, arg)) + return -1; + return 0; +} + +static int apply_option_parse_directory(const struct option *opt, + const char *arg, int unset) +{ + struct apply_state *state = opt->value; + + BUG_ON_OPT_NEG(unset); + + strbuf_reset(&state->root); + strbuf_addstr(&state->root, arg); + + if (strbuf_normalize_path(&state->root) < 0) + return error(_("unable to normalize directory: '%s'"), arg); + + strbuf_complete(&state->root, '/'); + return 0; +} + +int apply_all_patches(struct apply_state *state, + int argc, + const char **argv, + int options) +{ + int i; + int res; + int errs = 0; + int read_stdin = 1; + + for (i = 0; i < argc; i++) { + const char *arg = argv[i]; + char *to_free = NULL; + int fd; + + if (!strcmp(arg, "-")) { + res = apply_patch(state, 0, "", options); + if (res < 0) + goto end; + errs |= res; + read_stdin = 0; + continue; + } else + arg = to_free = prefix_filename(state->prefix, arg); + + fd = open(arg, O_RDONLY); + if (fd < 0) { + error(_("can't open patch '%s': %s"), arg, strerror(errno)); + res = -128; + free(to_free); + goto end; + } + read_stdin = 0; + set_default_whitespace_mode(state); + res = apply_patch(state, fd, arg, options); + close(fd); + free(to_free); + if (res < 0) + goto end; + errs |= res; + } + set_default_whitespace_mode(state); + if (read_stdin) { + res = apply_patch(state, 0, "", options); + if (res < 0) + goto end; + errs |= res; + } + + if (state->whitespace_error) { + if (state->squelch_whitespace_errors && + state->squelch_whitespace_errors < state->whitespace_error) { + int squelched = + state->whitespace_error - state->squelch_whitespace_errors; + warning(Q_("squelched %d whitespace error", + "squelched %d whitespace errors", + squelched), + squelched); + } + if (state->ws_error_action == die_on_ws_error) { + error(Q_("%d line adds whitespace errors.", + "%d lines add whitespace errors.", + state->whitespace_error), + state->whitespace_error); + res = -128; + goto end; + } + if (state->applied_after_fixing_ws && state->apply) + warning(Q_("%d line applied after" + " fixing whitespace errors.", + "%d lines applied after" + " fixing whitespace errors.", + state->applied_after_fixing_ws), + state->applied_after_fixing_ws); + else if (state->whitespace_error) + warning(Q_("%d line adds whitespace errors.", + "%d lines add whitespace errors.", + state->whitespace_error), + state->whitespace_error); + } + + if (state->update_index) { + res = write_locked_index(state->repo->index, &state->lock_file, COMMIT_LOCK); + if (res) { + error(_("Unable to write new index file")); + res = -128; + goto end; + } + } + + res = !!errs; + +end: + rollback_lock_file(&state->lock_file); + + if (state->apply_verbosity <= verbosity_silent) { + set_error_routine(state->saved_error_routine); + set_warn_routine(state->saved_warn_routine); + } + + if (res > -1) + return res; + return (res == -1 ? 1 : 128); +} + +int apply_parse_options(int argc, const char **argv, + struct apply_state *state, + int *force_apply, int *options, + const char * const *apply_usage) +{ + struct option builtin_apply_options[] = { + OPT_CALLBACK_F(0, "exclude", state, N_("path"), + N_("don't apply changes matching the given path"), + PARSE_OPT_NONEG, apply_option_parse_exclude), + OPT_CALLBACK_F(0, "include", state, N_("path"), + N_("apply changes matching the given path"), + PARSE_OPT_NONEG, apply_option_parse_include), + OPT_CALLBACK('p', NULL, state, N_("num"), + N_("remove leading slashes from traditional diff paths"), + apply_option_parse_p), + OPT_BOOL(0, "no-add", &state->no_add, + N_("ignore additions made by the patch")), + OPT_BOOL(0, "stat", &state->diffstat, + N_("instead of applying the patch, output diffstat for the input")), + OPT_NOOP_NOARG(0, "allow-binary-replacement"), + OPT_NOOP_NOARG(0, "binary"), + OPT_BOOL(0, "numstat", &state->numstat, + N_("show number of added and deleted lines in decimal notation")), + OPT_BOOL(0, "summary", &state->summary, + N_("instead of applying the patch, output a summary for the input")), + OPT_BOOL(0, "check", &state->check, + N_("instead of applying the patch, see if the patch is applicable")), + OPT_BOOL(0, "index", &state->check_index, + N_("make sure the patch is applicable to the current index")), + OPT_BOOL('N', "intent-to-add", &state->ita_only, + N_("mark new files with `git add --intent-to-add`")), + OPT_BOOL(0, "cached", &state->cached, + N_("apply a patch without touching the working tree")), + OPT_BOOL_F(0, "unsafe-paths", &state->unsafe_paths, + N_("accept a patch that touches outside the working area"), + PARSE_OPT_NOCOMPLETE), + OPT_BOOL(0, "apply", force_apply, + N_("also apply the patch (use with --stat/--summary/--check)")), + OPT_BOOL('3', "3way", &state->threeway, + N_( "attempt three-way merge, fall back on normal patch if that fails")), + OPT_SET_INT_F(0, "ours", &state->merge_variant, + N_("for conflicts, use our version"), + XDL_MERGE_FAVOR_OURS, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "theirs", &state->merge_variant, + N_("for conflicts, use their version"), + XDL_MERGE_FAVOR_THEIRS, PARSE_OPT_NONEG), + OPT_SET_INT_F(0, "union", &state->merge_variant, + N_("for conflicts, use a union version"), + XDL_MERGE_FAVOR_UNION, PARSE_OPT_NONEG), + OPT_FILENAME(0, "build-fake-ancestor", &state->fake_ancestor, + N_("build a temporary index based on embedded index information")), + /* Think twice before adding "--nul" synonym to this */ + OPT_SET_INT('z', NULL, &state->line_termination, + N_("paths are separated with NUL character"), '\0'), + OPT_UNSIGNED('C', NULL, &state->p_context, + N_("ensure at least lines of context match")), + OPT_CALLBACK(0, "whitespace", state, N_("action"), + N_("detect new or modified lines that have whitespace errors"), + apply_option_parse_whitespace), + OPT_CALLBACK_F(0, "ignore-space-change", state, NULL, + N_("ignore changes in whitespace when finding context"), + PARSE_OPT_NOARG, apply_option_parse_space_change), + OPT_CALLBACK_F(0, "ignore-whitespace", state, NULL, + N_("ignore changes in whitespace when finding context"), + PARSE_OPT_NOARG, apply_option_parse_space_change), + OPT_BOOL('R', "reverse", &state->apply_in_reverse, + N_("apply the patch in reverse")), + OPT_BOOL(0, "unidiff-zero", &state->unidiff_zero, + N_("don't expect at least one line of context")), + OPT_BOOL(0, "reject", &state->apply_with_reject, + N_("leave the rejected hunks in corresponding *.rej files")), + OPT_BOOL(0, "allow-overlap", &state->allow_overlap, + N_("allow overlapping hunks")), + OPT__VERBOSITY(&state->apply_verbosity), + OPT_BIT(0, "inaccurate-eof", options, + N_("tolerate incorrectly detected missing new-line at the end of file"), + APPLY_OPT_INACCURATE_EOF), + OPT_BIT(0, "recount", options, + N_("do not trust the line counts in the hunk headers"), + APPLY_OPT_RECOUNT), + OPT_CALLBACK(0, "directory", state, N_("root"), + N_("prepend to all filenames"), + apply_option_parse_directory), + OPT_BOOL(0, "allow-empty", &state->allow_empty, + N_("don't return error for empty patches")), + OPT_END() + }; + + argc = parse_options(argc, argv, state->prefix, builtin_apply_options, apply_usage, 0); + + if (state->merge_variant && !state->threeway) + die(_("--ours, --theirs, and --union require --3way")); + + return argc; +} diff --git a/ref/git-apply.h b/ref/git-apply.h new file mode 100644 index 0000000..90e887e --- /dev/null +++ b/ref/git-apply.h @@ -0,0 +1,190 @@ +#ifndef APPLY_H +#define APPLY_H + +#include "hash.h" +#include "lockfile.h" +#include "string-list.h" +#include "strmap.h" + +struct repository; + +enum apply_ws_error_action { + nowarn_ws_error, + warn_on_ws_error, + die_on_ws_error, + correct_ws_error +}; + +enum apply_ws_ignore { + ignore_ws_none, + ignore_ws_change +}; + +enum apply_verbosity { + verbosity_silent = -1, + verbosity_normal = 0, + verbosity_verbose = 1 +}; + +struct apply_state { + const char *prefix; + + /* Lock file */ + struct lock_file lock_file; + + /* These control what gets looked at and modified */ + int apply; /* this is not a dry-run */ + int cached; /* apply to the index only */ + int check; /* preimage must match working tree, don't actually apply */ + int check_index; /* preimage must match the indexed version */ + int update_index; /* check_index && apply */ + int ita_only; /* add intent-to-add entries to the index */ + + /* These control cosmetic aspect of the output */ + int diffstat; /* just show a diffstat, and don't actually apply */ + int numstat; /* just show a numeric diffstat, and don't actually apply */ + int summary; /* just report creation, deletion, etc, and don't actually apply */ + + /* These boolean parameters control how the apply is done */ + int allow_overlap; + int apply_in_reverse; + int apply_with_reject; + int no_add; + int threeway; + int unidiff_zero; + int unsafe_paths; + int allow_empty; + + /* Other non boolean parameters */ + struct repository *repo; + const char *index_file; + enum apply_verbosity apply_verbosity; + int merge_variant; + char *fake_ancestor; + const char *patch_input_file; + int line_termination; + struct strbuf root; + int p_value; + int p_value_known; + unsigned int p_context; + + /* Exclude and include path parameters */ + struct string_list limit_by_name; + int has_include; + + /* Various "current state" */ + int linenr; /* current line number */ + /* + * We need to keep track of how symlinks in the preimage are + * manipulated by the patches. A patch to add a/b/c where a/b + * is a symlink should not be allowed to affect the directory + * the symlink points at, but if the same patch removes a/b, + * it is perfectly fine, as the patch removes a/b to make room + * to create a directory a/b so that a/b/c can be created. + */ + struct strset removed_symlinks; + struct strset kept_symlinks; + + /* + * For "diff-stat" like behaviour, we keep track of the biggest change + * we've seen, and the longest filename. That allows us to do simple + * scaling. + */ + int max_change; + int max_len; + + /* + * Records filenames that have been touched, in order to handle + * the case where more than one patches touch the same file. + */ + struct string_list fn_table; + + /* + * This is to save reporting routines before using + * set_error_routine() or set_warn_routine() to install muting + * routines when in verbosity_silent mode. + */ + void (*saved_error_routine)(const char *err, va_list params); + void (*saved_warn_routine)(const char *warn, va_list params); + + /* These control whitespace errors */ + enum apply_ws_error_action ws_error_action; + enum apply_ws_ignore ws_ignore_action; + const char *whitespace_option; + int whitespace_error; + int squelch_whitespace_errors; + int applied_after_fixing_ws; +}; + +/* + * This represents a "patch" to a file, both metainfo changes + * such as creation/deletion, filemode and content changes represented + * as a series of fragments. + */ +struct patch { + char *new_name, *old_name, *def_name; + unsigned int old_mode, new_mode; + int is_new, is_delete; /* -1 = unknown, 0 = false, 1 = true */ + int rejected; + unsigned ws_rule; + int lines_added, lines_deleted; + int score; + int extension_linenr; /* first line specifying delete/new/rename/copy */ + unsigned int is_toplevel_relative:1; + unsigned int inaccurate_eof:1; + unsigned int is_binary:1; + unsigned int is_copy:1; + unsigned int is_rename:1; + unsigned int recount:1; + unsigned int conflicted_threeway:1; + unsigned int direct_to_threeway:1; + unsigned int crlf_in_old:1; + struct fragment *fragments; + char *result; + size_t resultsize; + char old_oid_prefix[GIT_MAX_HEXSZ + 1]; + char new_oid_prefix[GIT_MAX_HEXSZ + 1]; + struct patch *next; + + /* three-way fallback result */ + struct object_id threeway_stage[3]; +}; + +int apply_parse_options(int argc, const char **argv, + struct apply_state *state, + int *force_apply, int *options, + const char * const *apply_usage); +int init_apply_state(struct apply_state *state, + struct repository *repo, + const char *prefix); +void clear_apply_state(struct apply_state *state); +int check_apply_state(struct apply_state *state, int force_apply); + +/* + * Parse a git diff header, starting at line. Fills the relevant + * metadata information in 'struct patch'. + * + * Returns -1 on failure, the length of the parsed header otherwise. + */ +int parse_git_diff_header(struct strbuf *root, + int *linenr, + int p_value, + const char *line, + int len, + unsigned int size, + struct patch *patch); + +void release_patch(struct patch *patch); + +/* + * Some aspects of the apply behavior are controlled by the following + * bits in the "options" parameter passed to apply_all_patches(). + */ +#define APPLY_OPT_INACCURATE_EOF (1<<0) /* accept inaccurate eof */ +#define APPLY_OPT_RECOUNT (1<<1) /* accept inaccurate line count */ + +int apply_all_patches(struct apply_state *state, + int argc, const char **argv, + int options); + +#endif From 98cf2bdb223803b963644947f60b49a056e47e33 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 15:44:06 +0000 Subject: [PATCH 03/20] chore: integrate parallel apply workstreams --- apply.go | 241 ++++------------- apply_internal_test.go | 42 +++ apply_options.go | 74 ++++++ apply_result.go | 58 +++++ apply_session.go | 158 ++++++++++++ apply_test.go | 43 +++- parity_test.go | 195 ++++++++++++++ patchset.go | 243 ++++++++++++++++++ patchset_test.go | 121 +++++++++ testdata/parity/anchor-beginning/fixture.json | 3 + testdata/parity/anchor-beginning/out | 3 + testdata/parity/anchor-beginning/patch | 8 + testdata/parity/anchor-beginning/src | 3 + testdata/parity/anchor-end/fixture.json | 3 + testdata/parity/anchor-end/out | 4 + testdata/parity/anchor-end/patch | 8 + testdata/parity/anchor-end/src | 3 + .../atomic-failure-vs-reject/fixture.json | 5 + testdata/parity/atomic-failure-vs-reject/out | 6 + .../parity/atomic-failure-vs-reject/patch | 13 + testdata/parity/atomic-failure-vs-reject/src | 6 + testdata/parity/reverse-apply/fixture.json | 3 + testdata/parity/reverse-apply/out | 3 + testdata/parity/reverse-apply/patch | 8 + testdata/parity/reverse-apply/src | 3 + testdata/parity/unidiff-zero/fixture.json | 5 + testdata/parity/unidiff-zero/out | 3 + testdata/parity/unidiff-zero/patch | 6 + testdata/parity/unidiff-zero/src | 3 + 29 files changed, 1089 insertions(+), 185 deletions(-) create mode 100644 apply_internal_test.go create mode 100644 apply_options.go create mode 100644 apply_result.go create mode 100644 apply_session.go create mode 100644 parity_test.go create mode 100644 patchset.go create mode 100644 patchset_test.go create mode 100644 testdata/parity/anchor-beginning/fixture.json create mode 100644 testdata/parity/anchor-beginning/out create mode 100644 testdata/parity/anchor-beginning/patch create mode 100644 testdata/parity/anchor-beginning/src create mode 100644 testdata/parity/anchor-end/fixture.json create mode 100644 testdata/parity/anchor-end/out create mode 100644 testdata/parity/anchor-end/patch create mode 100644 testdata/parity/anchor-end/src create mode 100644 testdata/parity/atomic-failure-vs-reject/fixture.json create mode 100644 testdata/parity/atomic-failure-vs-reject/out create mode 100644 testdata/parity/atomic-failure-vs-reject/patch create mode 100644 testdata/parity/atomic-failure-vs-reject/src create mode 100644 testdata/parity/reverse-apply/fixture.json create mode 100644 testdata/parity/reverse-apply/out create mode 100644 testdata/parity/reverse-apply/patch create mode 100644 testdata/parity/reverse-apply/src create mode 100644 testdata/parity/unidiff-zero/fixture.json create mode 100644 testdata/parity/unidiff-zero/out create mode 100644 testdata/parity/unidiff-zero/patch create mode 100644 testdata/parity/unidiff-zero/src diff --git a/apply.go b/apply.go index dcdd859..8eddd36 100644 --- a/apply.go +++ b/apply.go @@ -2,67 +2,11 @@ package git_diff_parser import ( "bytes" - "errors" "fmt" "regexp" "strings" ) -var ErrPatchConflict = errors.New("patch conflict") - -type ConflictLabels struct { - Current string - Incoming string -} - -type ApplyOptions struct { - ConflictLabels ConflictLabels -} - -// PatchApply holds apply-time configuration and mirrors Git's stateful apply design. -type PatchApply struct { - options ApplyOptions -} - -func DefaultApplyOptions() ApplyOptions { - return ApplyOptions{ - ConflictLabels: ConflictLabels{ - Current: "Current", - Incoming: "Incoming patch", - }, - } -} - -func NewPatchApply(options ApplyOptions) *PatchApply { - return &PatchApply{options: normalizeApplyOptions(options)} -} - -func normalizeApplyOptions(options ApplyOptions) ApplyOptions { - defaults := DefaultApplyOptions() - if options.ConflictLabels.Current == "" { - options.ConflictLabels.Current = defaults.ConflictLabels.Current - } - if options.ConflictLabels.Incoming == "" { - options.ConflictLabels.Incoming = defaults.ConflictLabels.Incoming - } - return options -} - -type ConflictError struct { - ConflictingHunks int -} - -func (e *ConflictError) Error() string { - if e.ConflictingHunks == 1 { - return "patch conflict in 1 hunk" - } - return fmt.Sprintf("patch conflict in %d hunks", e.ConflictingHunks) -} - -func (e *ConflictError) Is(target error) bool { - return target == ErrPatchConflict -} - type patchHunk struct { oldStart int oldCount int @@ -84,78 +28,33 @@ type fileLine struct { eofMarker bool } +type anchoredFragment struct { + offset int + lines []fileLine +} + var hunkHeaderPattern = regexp.MustCompile(`^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@`) func ApplyFile(pristine, patchData []byte) ([]byte, error) { - return NewPatchApply(DefaultApplyOptions()).ApplyFile(pristine, patchData) + result, err := ApplyFileWithOptions(pristine, patchData, DefaultApplyOptions()) + return result.Content, err +} + +func ApplyFileWithOptions(pristine, patchData []byte, options ApplyOptions) (ApplyResult, error) { + return NewPatchApply(options).applyFileWithResult(pristine, patchData) } func (p *PatchApply) ApplyFile(pristine, patchData []byte) ([]byte, error) { - normalizedPatch := normalizePatchForValidation(patchData) - if err := validateSingleFilePatch(normalizedPatch); err != nil { - return nil, err - } + result, err := p.applyFileWithResult(pristine, patchData) + return result.Content, err +} - lines := splitLinesPreserveNewline(string(normalizedPatch)) - hunks, err := parseHunks(skipToHunks(lines)) +func (p *PatchApply) applyFileWithResult(pristine, patchData []byte) (ApplyResult, error) { + patch, err := p.validateAndParsePatch(patchData) if err != nil { - return nil, err - } - if !hunksContainChanges(hunks) { - return nil, fmt.Errorf("patch contains no effective changes") + return ApplyResult{}, err } - - sourceLines := splitFileLines(pristine) - cursor := 0 - outLines := make([]fileLine, 0, len(sourceLines)) - conflicts := 0 - - for _, hunk := range hunks { - matchIndex, matched := locateHunk(sourceLines, cursor, hunk) - if !matched { - conflicts++ - - conflictStart := hunk.oldStart - 1 - if conflictStart < cursor { - conflictStart = cursor - } - if conflictStart > len(sourceLines) { - conflictStart = len(sourceLines) - } - - conflictEnd := conflictStart + hunk.oldCount - if conflictEnd > len(sourceLines) { - conflictEnd = len(sourceLines) - } - - outLines = appendSourceLines(outLines, sourceLines[cursor:conflictStart]...) - outLines = p.appendConflict(outLines, sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) - cursor = conflictEnd - continue - } - - outLines = appendSourceLines(outLines, sourceLines[cursor:matchIndex]...) - cursor = matchIndex - - for _, hunkLine := range hunk.lines { - switch hunkLine.kind { - case ' ': - outLines = append(outLines, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) - cursor++ - case '-': - cursor++ - case '+': - outLines = append(outLines, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) - } - } - } - - outLines = appendSourceLines(outLines, sourceLines[cursor:]...) - result := joinFileLines(outLines) - if conflicts > 0 { - return result, &ConflictError{ConflictingHunks: conflicts} - } - return result, nil + return p.newApplySession(pristine).apply(patch) } // ApplyPatch is kept as a compatibility alias. @@ -273,78 +172,62 @@ func parseHunks(lines []string) ([]patchHunk, error) { return hunks, nil } -func locateHunk(sourceLines []fileLine, cursor int, hunk patchHunk) (int, bool) { - preferred := hunk.oldStart - 1 - if hunk.oldCount == 0 { - preferred = hunk.oldStart - } - if preferred < cursor { - preferred = cursor - } - - if hunk.newCount >= hunk.oldCount && preferred <= len(sourceLines) && postimageMatchesAt(sourceLines, preferred, desiredLines(hunk)) { - return 0, false - } - - for offset := 0; ; offset++ { - candidate := preferred - offset - if candidate >= cursor && candidate <= len(sourceLines) && hunkMatchesAt(sourceLines, candidate, hunk) { - return candidate, true - } - - candidate = preferred + offset - if offset > 0 && candidate >= cursor && candidate <= len(sourceLines) && hunkMatchesAt(sourceLines, candidate, hunk) { - return candidate, true +func desiredLines(hunk patchHunk) []fileLine { + lines := make([]fileLine, 0, len(hunk.lines)) + for _, line := range hunk.lines { + if line.kind == ' ' || line.kind == '+' { + lines = append(lines, fileLine{text: line.text, hasNewline: line.hasNewline, eofMarker: line.newEOF}) } + } + return lines +} - if preferred-offset < cursor && preferred+offset > len(sourceLines) { - break +func preimageLines(hunk patchHunk) []fileLine { + lines := make([]fileLine, 0, len(hunk.lines)) + for _, line := range hunk.lines { + if line.kind == ' ' || line.kind == '-' { + lines = append(lines, fileLine{text: line.text, hasNewline: line.hasNewline, eofMarker: line.oldEOF}) } } + return lines +} - return 0, false +func matchAnchoredFragment(source []fileLine, start int, begin, end anchoredFragment) bool { + return matchFragment(source, start+begin.offset, begin.lines) && + matchFragment(source, start+end.offset, end.lines) } -func hunkMatchesAt(sourceLines []fileLine, start int, hunk patchHunk) bool { - if hunk.newCount >= hunk.oldCount && postimageMatchesAt(sourceLines, start, desiredLines(hunk)) { - return false +func splitAnchoredFragment(lines []fileLine) (anchoredFragment, anchoredFragment) { + if len(lines) == 0 { + return anchoredFragment{}, anchoredFragment{} } - cursor := start - for _, hunkLine := range hunk.lines { - switch hunkLine.kind { - case ' ', '-': - if cursor >= len(sourceLines) { - return false - } - if sourceLines[cursor].text != hunkLine.text || - sourceLines[cursor].hasNewline != hunkLine.hasNewline || - sourceLines[cursor].eofMarker != hunkLine.oldEOF { - return false - } - cursor++ - case '+': - continue - default: - return false - } + beginLen := len(lines) / 2 + if beginLen == 0 { + beginLen = 1 } - return true + return anchoredFragment{ + offset: 0, + lines: lines[:beginLen], + }, anchoredFragment{ + offset: beginLen, + lines: lines[beginLen:], + } } -func postimageMatchesAt(sourceLines []fileLine, start int, desired []fileLine) bool { - if len(desired) == 0 { - return false +func matchFragment(source []fileLine, start int, fragment []fileLine) bool { + if len(fragment) == 0 { + return true } - if start < 0 || start+len(desired) > len(sourceLines) { + if start < 0 || start+len(fragment) > len(source) { return false } - for i := range desired { - if sourceLines[start+i].text != desired[i].text || - sourceLines[start+i].hasNewline != desired[i].hasNewline || - sourceLines[start+i].eofMarker != desired[i].eofMarker { + for i := range fragment { + if source[start+i].text != fragment[i].text || + source[start+i].hasNewline != fragment[i].hasNewline || + source[start+i].eofMarker != fragment[i].eofMarker { return false } } @@ -352,16 +235,6 @@ func postimageMatchesAt(sourceLines []fileLine, start int, desired []fileLine) b return true } -func desiredLines(hunk patchHunk) []fileLine { - lines := make([]fileLine, 0, len(hunk.lines)) - for _, line := range hunk.lines { - if line.kind == ' ' || line.kind == '+' { - lines = append(lines, fileLine{text: line.text, hasNewline: line.hasNewline, eofMarker: line.newEOF}) - } - } - return lines -} - func (p *PatchApply) appendConflict(out []fileLine, ours, theirs []fileLine) []fileLine { labels := p.options.ConflictLabels out = append(out, fileLine{text: "<<<<<<< " + labels.Current, hasNewline: true}) diff --git a/apply_internal_test.go b/apply_internal_test.go new file mode 100644 index 0000000..2025738 --- /dev/null +++ b/apply_internal_test.go @@ -0,0 +1,42 @@ +package git_diff_parser + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMatchAnchoredFragmentRequiresBothEnds(t *testing.T) { + source := splitFileLines([]byte("one\na\nb\nc\nd\na\nb\nx\nd\n")) + begin, end := splitAnchoredFragment([]fileLine{ + {text: "a", hasNewline: true}, + {text: "b", hasNewline: true}, + {text: "c", hasNewline: true}, + {text: "d", hasNewline: true}, + }) + + require.True(t, matchAnchoredFragment(source, 1, begin, end)) + require.False(t, matchAnchoredFragment(source, 5, begin, end)) +} + +func TestFindPosRejectsAlreadyAppliedPostimage(t *testing.T) { + session := &applySession{ + sourceLines: splitFileLines([]byte("a\nb\nx\nc\n")), + } + hunk := patchHunk{ + oldStart: 1, + oldCount: 3, + newCount: 4, + lines: []patchLine{ + {kind: ' ', text: "a", hasNewline: true}, + {kind: ' ', text: "b", hasNewline: true}, + {kind: '+', text: "x", hasNewline: true}, + {kind: ' ', text: "c", hasNewline: true}, + }, + } + + pos, matched := session.findPos(hunk) + assert.Equal(t, 0, pos) + assert.False(t, matched) +} diff --git a/apply_options.go b/apply_options.go new file mode 100644 index 0000000..1199662 --- /dev/null +++ b/apply_options.go @@ -0,0 +1,74 @@ +package git_diff_parser + +// ApplyMode controls how the apply engine treats hunks that cannot be placed +// directly into the target content. +type ApplyMode int + +const ( + // ApplyModeApply keeps the output neutral when a hunk cannot be applied. + ApplyModeApply ApplyMode = iota + // ApplyModeMerge renders conflict markers into the output for misses. + ApplyModeMerge +) + +// ConflictLabels controls the labels rendered into conflict markers. +// The zero value renders neutral markers without any labels. +type ConflictLabels struct { + Current string + Incoming string +} + +// ApplyOptions configures the apply engine. +type ApplyOptions struct { + Mode ApplyMode + ConflictLabels ConflictLabels +} + +func DefaultApplyOptions() ApplyOptions { + return ApplyOptions{ + Mode: ApplyModeMerge, + ConflictLabels: ConflictLabels{ + Current: "Current", + Incoming: "Incoming patch", + }, + } +} + +// PatchApply holds apply-time configuration and mirrors Git's stateful apply design. +type PatchApply struct { + options ApplyOptions +} + +func NewPatchApply(options ApplyOptions) *PatchApply { + return &PatchApply{options: normalizeApplyOptions(options)} +} + +func legacyApplyOptions() ApplyOptions { + return ApplyOptions{ + Mode: ApplyModeMerge, + ConflictLabels: ConflictLabels{ + Current: "Current (Your changes)", + Incoming: "New (Generated by Speakeasy)", + }, + } +} + +func (o ApplyOptions) normalize() ApplyOptions { + if o.Mode != ApplyModeMerge { + o.Mode = ApplyModeApply + } + if o.Mode == ApplyModeMerge { + defaults := DefaultApplyOptions() + if o.ConflictLabels.Current == "" { + o.ConflictLabels.Current = defaults.ConflictLabels.Current + } + if o.ConflictLabels.Incoming == "" { + o.ConflictLabels.Incoming = defaults.ConflictLabels.Incoming + } + } + return o +} + +func normalizeApplyOptions(options ApplyOptions) ApplyOptions { + return options.normalize() +} diff --git a/apply_result.go b/apply_result.go new file mode 100644 index 0000000..064fcb6 --- /dev/null +++ b/apply_result.go @@ -0,0 +1,58 @@ +package git_diff_parser + +import ( + "errors" + "fmt" +) + +var ErrPatchConflict = errors.New("patch conflict") + +// ApplyResult captures the patched content and the type of misses encountered +// while attempting to apply it. +type ApplyResult struct { + Content []byte + DirectMisses int + MergeConflicts int +} + +// ApplyError reports the aggregate apply outcome. +type ApplyError struct { + DirectMisses int + MergeConflicts int + // ConflictingHunks keeps the legacy count available for callers that still + // reason about conflict hunks rather than the new miss/conflict split. + ConflictingHunks int +} + +func (e *ApplyError) Error() string { + if e == nil { + return "" + } + + if e.MergeConflicts > 0 || e.ConflictingHunks > 0 { + count := e.MergeConflicts + if count == 0 { + count = e.ConflictingHunks + } + if count == 1 { + return "patch conflict in 1 hunk" + } + return fmt.Sprintf("patch conflict in %d hunks", count) + } + + if e.DirectMisses > 0 { + if e.DirectMisses == 1 { + return "patch miss in 1 hunk" + } + return fmt.Sprintf("patch miss in %d hunks", e.DirectMisses) + } + + return "patch apply failed" +} + +func (e *ApplyError) Is(target error) bool { + return target == ErrPatchConflict +} + +// ConflictError is kept as a compatibility alias for the old public type. +type ConflictError = ApplyError diff --git a/apply_session.go b/apply_session.go new file mode 100644 index 0000000..d4fa656 --- /dev/null +++ b/apply_session.go @@ -0,0 +1,158 @@ +package git_diff_parser + +import "fmt" + +type validatedPatch struct { + hunks []patchHunk +} + +type applySession struct { + applier *PatchApply + sourceLines []fileLine + image []fileLine + cursor int + conflicts int +} + +func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { + normalizedPatch := normalizePatchForValidation(patchData) + if err := validateSingleFilePatch(normalizedPatch); err != nil { + return validatedPatch{}, err + } + + lines := splitLinesPreserveNewline(string(normalizedPatch)) + hunks, err := parseHunks(skipToHunks(lines)) + if err != nil { + return validatedPatch{}, err + } + if !hunksContainChanges(hunks) { + return validatedPatch{}, fmt.Errorf("patch contains no effective changes") + } + + return validatedPatch{hunks: hunks}, nil +} + +func (p *PatchApply) newApplySession(pristine []byte) *applySession { + sourceLines := splitFileLines(pristine) + return &applySession{ + applier: p, + sourceLines: sourceLines, + image: make([]fileLine, 0, len(sourceLines)), + } +} + +func (s *applySession) apply(patch validatedPatch) (ApplyResult, error) { + for _, hunk := range patch.hunks { + s.applyHunk(hunk) + } + + s.appendSourceUntil(len(s.sourceLines)) + result := ApplyResult{Content: joinFileLines(s.image)} + if s.conflicts > 0 { + if s.applier.options.Mode == ApplyModeMerge { + result.MergeConflicts = s.conflicts + return result, &ApplyError{ + MergeConflicts: s.conflicts, + ConflictingHunks: s.conflicts, + } + } + result.DirectMisses = s.conflicts + return result, &ApplyError{ + DirectMisses: s.conflicts, + } + } + return result, nil +} + +func (s *applySession) applyHunk(hunk patchHunk) { + matchIndex, matched := s.findPos(hunk) + if !matched { + s.conflicts++ + s.appendConflictingHunk(hunk) + return + } + + s.appendSourceUntil(matchIndex) + + for _, hunkLine := range hunk.lines { + switch hunkLine.kind { + case ' ': + s.image = append(s.image, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) + s.cursor++ + case '-': + s.cursor++ + case '+': + s.image = append(s.image, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) + } + } +} + +func (s *applySession) appendConflictingHunk(hunk patchHunk) { + conflictStart := hunk.oldStart - 1 + if conflictStart < s.cursor { + conflictStart = s.cursor + } + if conflictStart > len(s.sourceLines) { + conflictStart = len(s.sourceLines) + } + + conflictEnd := conflictStart + hunk.oldCount + if conflictEnd > len(s.sourceLines) { + conflictEnd = len(s.sourceLines) + } + + s.appendSourceUntil(conflictStart) + if s.applier.options.Mode == ApplyModeMerge { + s.image = s.applier.appendConflict(s.image, s.sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) + } else { + s.image = appendSourceLines(s.image, s.sourceLines[conflictStart:conflictEnd]...) + } + s.cursor = conflictEnd +} + +func (s *applySession) appendSourceUntil(limit int) { + if limit <= s.cursor { + return + } + s.image = appendSourceLines(s.image, s.sourceLines[s.cursor:limit]...) + s.cursor = limit +} + +func (s *applySession) findPos(hunk patchHunk) (int, bool) { + preferred := hunk.oldStart - 1 + if hunk.oldCount == 0 { + preferred = hunk.oldStart + } + if preferred < s.cursor { + preferred = s.cursor + } + + postimage := desiredLines(hunk) + if hunk.newCount >= hunk.oldCount && preferred <= len(s.sourceLines) && matchFragment(s.sourceLines, preferred, postimage) { + return 0, false + } + + preimage := preimageLines(hunk) + begin, end := splitAnchoredFragment(preimage) + return s.findPosWithAnchors(preferred, begin, end) +} + +func (s *applySession) findPosWithAnchors(preferred int, begin, end anchoredFragment) (int, bool) { + for offset := 0; ; offset++ { + left := preferred - offset + if left >= s.cursor && left <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, left, begin, end) { + return left, true + } + + right := preferred + offset + if offset > 0 && right >= s.cursor && right <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, right, begin, end) { + return right, true + } + + if left < s.cursor && right > len(s.sourceLines) { + break + } + } + + return 0, false +} diff --git a/apply_test.go b/apply_test.go index b11fa75..94c3f11 100644 --- a/apply_test.go +++ b/apply_test.go @@ -451,6 +451,46 @@ func TestApplyFile_ReturnsConflictMarkers(t *testing.T) { assert.Contains(t, string(applied), "func (s *Status) String() string") } +func TestApplyFileWithOptions_RendersNeutralConflictMarkers(t *testing.T) { + t.Parallel() + + base := []byte("package testsdk\n\ntype Status struct{}\n") + current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") + patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) + + result, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + Mode: git_diff_parser.ApplyModeMerge, + }) + require.Error(t, err) + var applyErr *git_diff_parser.ApplyError + require.ErrorAs(t, err, &applyErr) + assert.Equal(t, 0, result.DirectMisses) + assert.Equal(t, 1, result.MergeConflicts) + assert.Contains(t, string(result.Content), "<<<<<<<") + assert.NotContains(t, string(result.Content), "Current (Your changes)") + assert.NotContains(t, string(result.Content), "Generated by Speakeasy") +} + +func TestApplyFileWithOptions_DirectModeReportsMissesWithoutMarkers(t *testing.T) { + t.Parallel() + + base := []byte("package testsdk\n\ntype Status struct{}\n") + current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") + patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) + + result, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + Mode: git_diff_parser.ApplyModeApply, + }) + require.Error(t, err) + var applyErr *git_diff_parser.ApplyError + require.ErrorAs(t, err, &applyErr) + assert.Equal(t, 1, result.DirectMisses) + assert.Equal(t, 0, result.MergeConflicts) + assert.Equal(t, current, result.Content) + assert.NotContains(t, string(result.Content), "<<<<<<<") + assert.NotContains(t, string(result.Content), ">>>>>>>") +} + func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { t.Parallel() @@ -459,6 +499,7 @@ func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) applier := git_diff_parser.NewPatchApply(git_diff_parser.ApplyOptions{ + Mode: git_diff_parser.ApplyModeMerge, ConflictLabels: git_diff_parser.ConflictLabels{ Current: "Current (Your changes)", Incoming: "New (Generated by Speakeasy)", @@ -467,7 +508,7 @@ func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { applied, err := applier.ApplyFile(current, patchData) require.Error(t, err) - assert.Contains(t, string(applied), defaultCurrentConflictMarker) + assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") } diff --git a/parity_test.go b/parity_test.go new file mode 100644 index 0000000..64d5d21 --- /dev/null +++ b/parity_test.go @@ -0,0 +1,195 @@ +//go:build parity + +package git_diff_parser_test + +import ( + "bytes" + "encoding/json" + "errors" + "os" + "os/exec" + "path/filepath" + "sort" + "testing" + + git_diff_parser "github.com/speakeasy-api/git-diff-parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type parityFixture struct { + GitArgs []string `json:"gitArgs"` + ExpectConflict bool `json:"expectConflict"` + CheckReject bool `json:"checkReject"` +} + +type parityCase struct { + name string + src []byte + patch []byte + out []byte + fixture parityFixture +} + +func TestApplyFile_ParityCorpus(t *testing.T) { + if testing.Short() { + t.Skip("parity corpus is an integration test stream") + } + + requireGitBinary(t) + + cases := loadParityCases(t) + require.NotEmpty(t, cases) + + for _, tc := range cases { + tc := tc + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + oracles := runGitApplyOracles(t, tc) + applied, err := git_diff_parser.ApplyFile(tc.src, tc.patch) + + if tc.fixture.ExpectConflict { + require.Error(t, err) + var conflictErr *git_diff_parser.ConflictError + require.ErrorAs(t, err, &conflictErr) + assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) + assert.Equal(t, tc.src, oracles.applied) + assert.Contains(t, string(applied), "<<<<<<< Current") + assert.Contains(t, string(applied), ">>>>>>> Incoming patch") + if len(tc.out) > 0 { + for _, line := range bytes.Split(bytes.TrimSpace(tc.out), []byte("\n")) { + if len(line) == 0 { + continue + } + assert.Contains(t, string(applied), string(line)) + } + } + } else { + require.NoError(t, err) + require.Equal(t, oracles.applied, applied) + if len(tc.out) > 0 { + assert.Equal(t, tc.out, applied) + } + } + + if tc.fixture.CheckReject { + rejectOracles := runGitApplyOracles(t, tc, "--reject") + require.True(t, rejectOracles.rejected) + require.NotEqual(t, tc.src, rejectOracles.applied) + if len(tc.out) > 0 { + assert.Equal(t, tc.out, rejectOracles.applied) + } + require.NotEmpty(t, rejectOracles.rej) + assert.Contains(t, string(rejectOracles.rej), "line5") + } + }) + } +} + +type gitApplyOracle struct { + applied []byte + rej []byte + rejected bool + exitErr error +} + +func runGitApplyOracles(t *testing.T, tc parityCase, extraArgs ...string) gitApplyOracle { + t.Helper() + + dir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), tc.src, 0o600)) + require.NoError(t, os.WriteFile(filepath.Join(dir, "patch.diff"), tc.patch, 0o600)) + + args := []string{"apply", "--whitespace=nowarn"} + args = append(args, tc.fixture.GitArgs...) + args = append(args, extraArgs...) + args = append(args, "patch.diff") + + cmd := exec.Command("git", args...) + cmd.Dir = dir + output, err := cmd.CombinedOutput() + oracles := gitApplyOracle{exitErr: err} + + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) { + oracles.rejected = exitErr.ExitCode() != 0 + } else { + require.NoError(t, err, "git apply failed to start: %s", output) + } + } else { + oracles.rejected = false + } + + applied, readErr := os.ReadFile(filepath.Join(dir, "file.txt")) + require.NoError(t, readErr) + oracles.applied = applied + + rej, rejErr := os.ReadFile(filepath.Join(dir, "file.txt.rej")) + if rejErr == nil { + oracles.rej = rej + } + + if len(output) > 0 && err == nil { + // git apply is quiet here; keep the command output surfaced only if it was unexpected. + assert.Empty(t, string(output)) + } + + return oracles +} + +func loadParityCases(t *testing.T) []parityCase { + t.Helper() + + root := filepath.Join("testdata", "parity") + entries, err := os.ReadDir(root) + require.NoError(t, err) + + cases := make([]parityCase, 0, len(entries)) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + dir := filepath.Join(root, entry.Name()) + fixture := readParityFixture(t, filepath.Join(dir, "fixture.json")) + cases = append(cases, parityCase{ + name: entry.Name(), + src: readParityFile(t, filepath.Join(dir, "src")), + patch: readParityFile(t, filepath.Join(dir, "patch")), + out: readParityFile(t, filepath.Join(dir, "out")), + fixture: fixture, + }) + } + + sort.Slice(cases, func(i, j int) bool { + return cases[i].name < cases[j].name + }) + + return cases +} + +func readParityFixture(t *testing.T, path string) parityFixture { + t.Helper() + + raw := readParityFile(t, path) + var fixture parityFixture + require.NoError(t, json.Unmarshal(raw, &fixture)) + return fixture +} + +func readParityFile(t *testing.T, path string) []byte { + t.Helper() + + data, err := os.ReadFile(path) + require.NoError(t, err) + return data +} + +func requireGitBinary(t *testing.T) { + t.Helper() + + _, err := exec.LookPath("git") + require.NoError(t, err) +} diff --git a/patchset.go b/patchset.go new file mode 100644 index 0000000..1724e58 --- /dev/null +++ b/patchset.go @@ -0,0 +1,243 @@ +package git_diff_parser + +import ( + "bytes" + "errors" + "fmt" + "strings" +) + +var ( + ErrPatchCreate = errors.New("patch creates are not supported") + ErrPatchDelete = errors.New("patch deletes are not supported") + ErrPatchRename = errors.New("patch renames are not supported") + ErrPatchModeChange = errors.New("patch mode changes are not supported") + ErrPatchBinary = errors.New("binary patches are not supported") +) + +type PatchsetOperation string + +const ( + PatchsetOperationCreate PatchsetOperation = "create" + PatchsetOperationDelete PatchsetOperation = "delete" + PatchsetOperationRename PatchsetOperation = "rename" + PatchsetOperationModeChange PatchsetOperation = "mode change" + PatchsetOperationBinary PatchsetOperation = "binary" +) + +type UnsupportedPatchError struct { + Operation PatchsetOperation + Path string + From string + To string +} + +func (e *UnsupportedPatchError) Error() string { + switch e.Operation { + case PatchsetOperationCreate: + if e.Path != "" { + return fmt.Sprintf("patch creates are not supported for %q", e.Path) + } + return "patch creates are not supported" + case PatchsetOperationDelete: + if e.Path != "" { + return fmt.Sprintf("patch deletes are not supported for %q", e.Path) + } + return "patch deletes are not supported" + case PatchsetOperationRename: + if e.From != "" || e.To != "" { + return fmt.Sprintf("patch renames are not supported: %q -> %q", e.From, e.To) + } + return "patch renames are not supported" + case PatchsetOperationModeChange: + if e.Path != "" { + return fmt.Sprintf("patch mode changes are not supported for %q", e.Path) + } + return "patch mode changes are not supported" + case PatchsetOperationBinary: + if e.Path != "" { + return fmt.Sprintf("binary patches are not supported for %q", e.Path) + } + return "binary patches are not supported" + default: + return "unsupported patch" + } +} + +func (e *UnsupportedPatchError) Is(target error) bool { + switch target { + case ErrPatchCreate: + return e.Operation == PatchsetOperationCreate + case ErrPatchDelete: + return e.Operation == PatchsetOperationDelete + case ErrPatchRename: + return e.Operation == PatchsetOperationRename + case ErrPatchModeChange: + return e.Operation == PatchsetOperationModeChange + case ErrPatchBinary: + return e.Operation == PatchsetOperationBinary + default: + return false + } +} + +type Patchset struct { + Files []PatchsetFile +} + +type PatchsetFile struct { + Diff FileDiff + Patch []byte +} + +func ParsePatchset(patchData []byte) (Patchset, []error) { + parsed, errs := Parse(string(patchData)) + if len(errs) > 0 { + return Patchset{}, errs + } + + chunks := splitPatchsetChunks(patchData) + if len(chunks) != len(parsed.FileDiff) { + return Patchset{}, []error{ + fmt.Errorf("parsed %d file diffs but split %d patch fragments", len(parsed.FileDiff), len(chunks)), + } + } + + files := make([]PatchsetFile, len(chunks)) + for i := range chunks { + files[i] = PatchsetFile{ + Diff: parsed.FileDiff[i], + Patch: chunks[i], + } + } + + return Patchset{Files: files}, nil +} + +func (p Patchset) Apply(tree map[string][]byte) (map[string][]byte, error) { + out := cloneTree(tree) + for _, file := range p.Files { + if err := validatePatchsetFile(tree, file.Diff); err != nil { + return nil, err + } + + current := out[file.Diff.ToFile] + applied, err := ApplyFile(current, file.Patch) + if err != nil { + return nil, err + } + out[file.Diff.ToFile] = append([]byte(nil), applied...) + } + return out, nil +} + +func ApplyPatchset(tree map[string][]byte, patchData []byte) (map[string][]byte, error) { + patchset, errs := ParsePatchset(patchData) + if len(errs) > 0 { + return nil, fmt.Errorf("unsupported patch syntax: %w", errs[0]) + } + return patchset.Apply(tree) +} + +func cloneTree(tree map[string][]byte) map[string][]byte { + out := make(map[string][]byte, len(tree)) + for path, content := range tree { + out[path] = append([]byte(nil), content...) + } + return out +} + +func validatePatchsetFile(tree map[string][]byte, fileDiff FileDiff) error { + switch { + case fileDiff.IsBinary: + return &UnsupportedPatchError{ + Operation: PatchsetOperationBinary, + Path: fileDiff.ToFile, + } + case fileDiff.FromFile != fileDiff.ToFile: + return &UnsupportedPatchError{ + Operation: PatchsetOperationRename, + From: fileDiff.FromFile, + To: fileDiff.ToFile, + } + } + + _, exists := tree[fileDiff.ToFile] + + switch fileDiff.Type { + case FileDiffTypeAdded: + return &UnsupportedPatchError{ + Operation: PatchsetOperationCreate, + Path: fileDiff.ToFile, + } + case FileDiffTypeDeleted: + return &UnsupportedPatchError{ + Operation: PatchsetOperationDelete, + Path: fileDiff.FromFile, + } + } + + if fileDiff.NewMode != "" { + if exists { + return &UnsupportedPatchError{ + Operation: PatchsetOperationModeChange, + Path: fileDiff.ToFile, + } + } + return &UnsupportedPatchError{ + Operation: PatchsetOperationCreate, + Path: fileDiff.ToFile, + } + } + + if len(fileDiff.Hunks) == 0 { + if !exists { + return &UnsupportedPatchError{ + Operation: PatchsetOperationCreate, + Path: fileDiff.ToFile, + } + } + return fmt.Errorf("patch for %q contains no hunks", fileDiff.ToFile) + } + + if !exists { + return &UnsupportedPatchError{ + Operation: PatchsetOperationCreate, + Path: fileDiff.ToFile, + } + } + + return nil +} + +func splitPatchsetChunks(patchData []byte) [][]byte { + lines := splitLinesPreserveNewline(string(patchData)) + if len(lines) == 0 { + return nil + } + + chunks := make([][]byte, 0) + var buf bytes.Buffer + started := false + + flush := func() { + if !started || buf.Len() == 0 { + return + } + chunks = append(chunks, append([]byte(nil), buf.Bytes()...)) + buf.Reset() + } + + for _, line := range lines { + if strings.HasPrefix(strings.TrimRight(line, "\n"), "diff --git ") { + flush() + started = true + } + if started { + buf.WriteString(line) + } + } + + flush() + return chunks +} diff --git a/patchset_test.go b/patchset_test.go new file mode 100644 index 0000000..cfd2fe9 --- /dev/null +++ b/patchset_test.go @@ -0,0 +1,121 @@ +package git_diff_parser_test + +import ( + "path/filepath" + "testing" + + git_diff_parser "github.com/speakeasy-api/git-diff-parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParsePatchset(t *testing.T) { + t.Parallel() + + patchA := buildPatch(t, "alpha.txt", []byte("alpha\none\n"), []byte("alpha\ntwo\n")) + patchB := buildPatch(t, "beta.txt", []byte("beta\none\n"), []byte("beta\ntwo\n")) + patchsetData := append(append([]byte{}, patchA...), patchB...) + + patchset, errs := git_diff_parser.ParsePatchset(patchsetData) + require.Empty(t, errs) + require.Len(t, patchset.Files, 2) + + assert.Equal(t, "alpha.txt", patchset.Files[0].Diff.ToFile) + assert.Equal(t, "beta.txt", patchset.Files[1].Diff.ToFile) + assert.Contains(t, string(patchset.Files[0].Patch), "diff --git a/alpha.txt b/alpha.txt") + assert.Contains(t, string(patchset.Files[1].Patch), "diff --git a/beta.txt b/beta.txt") +} + +func TestPatchsetApply_MultipleFiles(t *testing.T) { + t.Parallel() + + original := map[string][]byte{ + "alpha.txt": []byte("alpha\none\n"), + "beta.txt": []byte("beta\none\n"), + "keep.txt": []byte("unchanged\n"), + } + + patchA := buildPatch(t, "alpha.txt", original["alpha.txt"], []byte("alpha\ntwo\n")) + patchB := buildPatch(t, "beta.txt", original["beta.txt"], []byte("beta\ntwo\n")) + patchsetData := append(append([]byte{}, patchA...), patchB...) + + applied, err := git_diff_parser.ApplyPatchset(original, patchsetData) + require.NoError(t, err) + + assert.Equal(t, []byte("alpha\ntwo\n"), applied["alpha.txt"]) + assert.Equal(t, []byte("beta\ntwo\n"), applied["beta.txt"]) + assert.Equal(t, []byte("unchanged\n"), applied["keep.txt"]) + assert.Equal(t, []byte("alpha\none\n"), original["alpha.txt"]) + assert.Equal(t, []byte("beta\none\n"), original["beta.txt"]) +} + +func TestPatchsetApply_RejectsUnsupportedOperations(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + patch []byte + tree map[string][]byte + wantIs error + wantMessage string + }{ + { + name: "create", + patch: mustReadFile(t, filepath.Join("testdata", "significant", "add.diff")), + tree: map[string][]byte{}, + wantIs: git_diff_parser.ErrPatchCreate, + wantMessage: "patch creates are not supported", + }, + { + name: "delete", + patch: mustReadFile(t, filepath.Join("testdata", "significant", "rm.diff")), + tree: map[string][]byte{"a.txt": []byte("a\n")}, + wantIs: git_diff_parser.ErrPatchDelete, + wantMessage: "patch deletes are not supported", + }, + { + name: "rename", + patch: mustReadFile(t, filepath.Join("testdata", "significant", "mv.diff")), + tree: map[string][]byte{"b.txt": []byte("b\n")}, + wantIs: git_diff_parser.ErrPatchRename, + wantMessage: "patch renames are not supported", + }, + { + name: "mode change", + patch: []byte(`diff --git a/mode.go b/mode.go +old mode 100644 +new mode 100755 +--- a/mode.go ++++ b/mode.go +@@ -1 +1 @@ +-package mode ++package mode +`), + tree: map[string][]byte{"mode.go": []byte("package mode\n")}, + wantIs: git_diff_parser.ErrPatchModeChange, + wantMessage: "patch mode changes are not supported", + }, + { + name: "binary", + patch: mustReadFile(t, filepath.Join("testdata", "significant", "binary-delta.diff")), + tree: map[string][]byte{"favicon-16x16-light.png": []byte("binary")}, + wantIs: git_diff_parser.ErrPatchBinary, + wantMessage: "binary patches are not supported", + }, + } + + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + _, err := git_diff_parser.ApplyPatchset(test.tree, test.patch) + require.Error(t, err) + assert.ErrorIs(t, err, test.wantIs) + assert.Contains(t, err.Error(), test.wantMessage) + + var unsupportedErr *git_diff_parser.UnsupportedPatchError + require.ErrorAs(t, err, &unsupportedErr) + }) + } +} diff --git a/testdata/parity/anchor-beginning/fixture.json b/testdata/parity/anchor-beginning/fixture.json new file mode 100644 index 0000000..bc769ad --- /dev/null +++ b/testdata/parity/anchor-beginning/fixture.json @@ -0,0 +1,3 @@ +{ + "gitArgs": [] +} diff --git a/testdata/parity/anchor-beginning/out b/testdata/parity/anchor-beginning/out new file mode 100644 index 0000000..3d4991a --- /dev/null +++ b/testdata/parity/anchor-beginning/out @@ -0,0 +1,3 @@ +ALPHA +beta +gamma diff --git a/testdata/parity/anchor-beginning/patch b/testdata/parity/anchor-beginning/patch new file mode 100644 index 0000000..8640581 --- /dev/null +++ b/testdata/parity/anchor-beginning/patch @@ -0,0 +1,8 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ +-alpha ++ALPHA + beta + gamma diff --git a/testdata/parity/anchor-beginning/src b/testdata/parity/anchor-beginning/src new file mode 100644 index 0000000..85c3040 --- /dev/null +++ b/testdata/parity/anchor-beginning/src @@ -0,0 +1,3 @@ +alpha +beta +gamma diff --git a/testdata/parity/anchor-end/fixture.json b/testdata/parity/anchor-end/fixture.json new file mode 100644 index 0000000..bc769ad --- /dev/null +++ b/testdata/parity/anchor-end/fixture.json @@ -0,0 +1,3 @@ +{ + "gitArgs": [] +} diff --git a/testdata/parity/anchor-end/out b/testdata/parity/anchor-end/out new file mode 100644 index 0000000..7a28df3 --- /dev/null +++ b/testdata/parity/anchor-end/out @@ -0,0 +1,4 @@ +alpha +beta +gamma +delta diff --git a/testdata/parity/anchor-end/patch b/testdata/parity/anchor-end/patch new file mode 100644 index 0000000..fb9a282 --- /dev/null +++ b/testdata/parity/anchor-end/patch @@ -0,0 +1,8 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,4 @@ + alpha + beta + gamma ++delta diff --git a/testdata/parity/anchor-end/src b/testdata/parity/anchor-end/src new file mode 100644 index 0000000..85c3040 --- /dev/null +++ b/testdata/parity/anchor-end/src @@ -0,0 +1,3 @@ +alpha +beta +gamma diff --git a/testdata/parity/atomic-failure-vs-reject/fixture.json b/testdata/parity/atomic-failure-vs-reject/fixture.json new file mode 100644 index 0000000..d5f3df1 --- /dev/null +++ b/testdata/parity/atomic-failure-vs-reject/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [], + "expectConflict": true, + "checkReject": true +} diff --git a/testdata/parity/atomic-failure-vs-reject/out b/testdata/parity/atomic-failure-vs-reject/out new file mode 100644 index 0000000..f5607bb --- /dev/null +++ b/testdata/parity/atomic-failure-vs-reject/out @@ -0,0 +1,6 @@ +line1 +LINE2 +line3 +line4 +LINE5-DRIFT +line6 diff --git a/testdata/parity/atomic-failure-vs-reject/patch b/testdata/parity/atomic-failure-vs-reject/patch new file mode 100644 index 0000000..6d12d79 --- /dev/null +++ b/testdata/parity/atomic-failure-vs-reject/patch @@ -0,0 +1,13 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ + line1 +-line2 ++LINE2 + line3 +@@ -4,3 +4,3 @@ + line4 +-line5 ++LINE5 + line6 diff --git a/testdata/parity/atomic-failure-vs-reject/src b/testdata/parity/atomic-failure-vs-reject/src new file mode 100644 index 0000000..01a6b8d --- /dev/null +++ b/testdata/parity/atomic-failure-vs-reject/src @@ -0,0 +1,6 @@ +line1 +line2 +line3 +line4 +LINE5-DRIFT +line6 diff --git a/testdata/parity/reverse-apply/fixture.json b/testdata/parity/reverse-apply/fixture.json new file mode 100644 index 0000000..bc769ad --- /dev/null +++ b/testdata/parity/reverse-apply/fixture.json @@ -0,0 +1,3 @@ +{ + "gitArgs": [] +} diff --git a/testdata/parity/reverse-apply/out b/testdata/parity/reverse-apply/out new file mode 100644 index 0000000..85c3040 --- /dev/null +++ b/testdata/parity/reverse-apply/out @@ -0,0 +1,3 @@ +alpha +beta +gamma diff --git a/testdata/parity/reverse-apply/patch b/testdata/parity/reverse-apply/patch new file mode 100644 index 0000000..3f1a01e --- /dev/null +++ b/testdata/parity/reverse-apply/patch @@ -0,0 +1,8 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ + alpha +-BETA ++beta + gamma diff --git a/testdata/parity/reverse-apply/src b/testdata/parity/reverse-apply/src new file mode 100644 index 0000000..e50310a --- /dev/null +++ b/testdata/parity/reverse-apply/src @@ -0,0 +1,3 @@ +alpha +BETA +gamma diff --git a/testdata/parity/unidiff-zero/fixture.json b/testdata/parity/unidiff-zero/fixture.json new file mode 100644 index 0000000..ff74acf --- /dev/null +++ b/testdata/parity/unidiff-zero/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "--unidiff-zero" + ] +} diff --git a/testdata/parity/unidiff-zero/out b/testdata/parity/unidiff-zero/out new file mode 100644 index 0000000..b7ab80e --- /dev/null +++ b/testdata/parity/unidiff-zero/out @@ -0,0 +1,3 @@ +alpha +BRAVO +gamma diff --git a/testdata/parity/unidiff-zero/patch b/testdata/parity/unidiff-zero/patch new file mode 100644 index 0000000..1a12d44 --- /dev/null +++ b/testdata/parity/unidiff-zero/patch @@ -0,0 +1,6 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2 +2 @@ +-beta ++BRAVO diff --git a/testdata/parity/unidiff-zero/src b/testdata/parity/unidiff-zero/src new file mode 100644 index 0000000..85c3040 --- /dev/null +++ b/testdata/parity/unidiff-zero/src @@ -0,0 +1,3 @@ +alpha +beta +gamma From ac6a6f39e214bf0809ba99c4c8290d090c588fdb Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 15:44:06 +0000 Subject: [PATCH 04/20] feat: enrich parser IR for apply parity --- apply.go | 3 + model.go | 128 ++++++++++++++++++++++++ parser.go | 261 +++++++++++++++++++++++++++---------------------- parser_test.go | 57 +++++++++++ 4 files changed, 334 insertions(+), 115 deletions(-) create mode 100644 model.go diff --git a/apply.go b/apply.go index 8eddd36..cf8b32c 100644 --- a/apply.go +++ b/apply.go @@ -85,6 +85,9 @@ func validateSingleFilePatch(patchData []byte) error { if len(fileDiff.Hunks) == 0 { return fmt.Errorf("patch contains no hunks") } + if fileDiff.RenameFrom != "" || fileDiff.RenameTo != "" || fileDiff.CopyFrom != "" || fileDiff.CopyTo != "" { + return fmt.Errorf("unsupported patch syntax: copy and rename headers are not supported") + } return nil } diff --git a/model.go b/model.go new file mode 100644 index 0000000..f15b550 --- /dev/null +++ b/model.go @@ -0,0 +1,128 @@ +package git_diff_parser + +import "fmt" + +type ContentChangeType string + +const ( + ContentChangeTypeAdd ContentChangeType = "add" + ContentChangeTypeDelete ContentChangeType = "delete" + ContentChangeTypeModify ContentChangeType = "modify" + ContentChangeTypeNOOP ContentChangeType = "" +) + +// ContentChange is a part of the line that starts with ` `, `-`, `+`. +// Consecutive ContentChange build a line. +// A `~` is a special case of ContentChange that is used to indicate a new line. +type ContentChange struct { + Type ContentChangeType `json:"type"` + From string `json:"from"` + To string `json:"to"` +} + +type ChangeList []ContentChange + +// HunkLine keeps a normalized, apply-friendly view of a hunk line. +type HunkLine struct { + Kind byte `json:"kind"` + Text string `json:"text"` + HasNewline bool `json:"has_newline"` +} + +// Hunk is a line that starts with @@. +// Each hunk shows one area where the files differ. +// Unified format hunks look like this: +// @@ from-file-line-numbers to-file-line-numbers @@ +// +// line-from-either-file +// line-from-either-file… +// +// If a hunk contains just one line, only its start line number appears. Otherwise its line numbers look like ‘start,count’. An empty hunk is considered to start at the line that follows the hunk. +type Hunk struct { + ChangeList ChangeList `json:"change_list"` + Lines []HunkLine `json:"lines,omitempty"` + StartLineNumberOld int `json:"start_line_number_old"` + CountOld int `json:"count_old"` + StartLineNumberNew int `json:"start_line_number_new"` + CountNew int `json:"count_new"` +} + +func (changes *ChangeList) IsSignificant() bool { + for _, change := range *changes { + if change.Type != ContentChangeTypeNOOP { + return true + } + } + return false +} + +func (h Hunk) GoString() string { + return fmt.Sprintf( + "git_diff_parser.Hunk{ChangeList:%#v, StartLineNumberOld:%d, CountOld:%d, StartLineNumberNew:%d, CountNew:%d}", + h.ChangeList, + h.StartLineNumberOld, + h.CountOld, + h.StartLineNumberNew, + h.CountNew, + ) +} + +type FileDiffType string + +const ( + FileDiffTypeAdded FileDiffType = "add" + FileDiffTypeDeleted FileDiffType = "delete" + FileDiffTypeModified FileDiffType = "modify" +) + +type BinaryDeltaType string + +const ( + BinaryDeltaTypeLiteral BinaryDeltaType = "literal" + BinaryDeltaTypeDelta BinaryDeltaType = "delta" +) + +type BinaryPatch struct { + Type BinaryDeltaType `json:"type"` + Count int + Content string +} + +// FileDiff Source of truth: https://github.com/git/git/blob/master/diffcore.h#L106 +// Implemented in https://github.com/git/git/blob/master/diff.c#L3496 +type FileDiff struct { + FromFile string `json:"from_file"` + ToFile string `json:"to_file"` + Type FileDiffType `json:"type"` + IsBinary bool `json:"is_binary"` + OldMode string `json:"old_mode,omitempty"` + NewMode string `json:"new_mode,omitempty"` + IndexOld string `json:"index_old,omitempty"` + IndexNew string `json:"index_new,omitempty"` + IndexMode string `json:"index_mode,omitempty"` + SimilarityIndex int `json:"similarity_index,omitempty"` + DissimilarityIndex int `json:"dissimilarity_index,omitempty"` + RenameFrom string `json:"rename_from,omitempty"` + RenameTo string `json:"rename_to,omitempty"` + CopyFrom string `json:"copy_from,omitempty"` + CopyTo string `json:"copy_to,omitempty"` + Hunks []Hunk `json:"hunks"` + BinaryPatch []BinaryPatch `json:"binary_patch"` +} + +func (fd FileDiff) GoString() string { + return fmt.Sprintf( + "&git_diff_parser.FileDiff{FromFile:%#v, ToFile:%#v, Type:%#v, IsBinary:%t, NewMode:%#v, Hunks:%#v, BinaryPatch:%#v}", + fd.FromFile, + fd.ToFile, + fd.Type, + fd.IsBinary, + fd.NewMode, + fd.Hunks, + fd.BinaryPatch, + ) +} + +type Diff struct { + FileDiff []FileDiff `json:"file_diff"` +} diff --git a/parser.go b/parser.go index d16ba94..6eec8ab 100644 --- a/parser.go +++ b/parser.go @@ -10,52 +10,6 @@ import ( var ErrUnhandled = errors.New("unhandled git diff syntax") -type ContentChangeType string - -const ( - ContentChangeTypeAdd ContentChangeType = "add" - ContentChangeTypeDelete ContentChangeType = "delete" - ContentChangeTypeModify ContentChangeType = "modify" - ContentChangeTypeNOOP ContentChangeType = "" -) - -// ContentChange is a part of the line that starts with ` `, `-`, `+` -// Consecutive ContentChange build a line. -// A `~` is a special case of ContentChange that is used to indicate a new line. -type ContentChange struct { - Type ContentChangeType `json:"type"` - From string `json:"from"` - To string `json:"to"` -} - -type ChangeList []ContentChange - -// Hunk is a line that starts with @@. -// Each hunk shows one area where the files differ -// Unified format hunks look like this: -// @@ from-file-line-numbers to-file-line-numbers @@ -// -// line-from-either-file -// line-from-either-file… -// -// If a hunk contains just one line, only its start line number appears. Otherwise its line numbers look like ‘start,count’. An empty hunk is considered to start at the line that follows the hunk. -type Hunk struct { - ChangeList ChangeList `json:"change_list"` - StartLineNumberOld int `json:"start_line_number_old"` - CountOld int `json:"count_old"` - StartLineNumberNew int `json:"start_line_number_new"` - CountNew int `json:"count_new"` -} - -func (changes *ChangeList) IsSignificant() bool { - for _, change := range *changes { - if change.Type != ContentChangeTypeNOOP { - return true - } - } - return false -} - func NewHunk(line string) (Hunk, error) { namedHunkRegex := regexp.MustCompile(`(?m)^@@ -(?P\d+),?(?P\d+)? \+(?P\d+),?(?P\d+)? @@`) match := namedHunkRegex.FindStringSubmatch(line) @@ -92,43 +46,6 @@ func NewHunk(line string) (Hunk, error) { }, nil } -type FileDiffType string - -const ( - FileDiffTypeAdded FileDiffType = "add" - FileDiffTypeDeleted FileDiffType = "delete" - FileDiffTypeModified FileDiffType = "modify" -) - -type BinaryDeltaType string - -const ( - BinaryDeltaTypeLiteral BinaryDeltaType = "literal" - BinaryDeltaTypeDelta BinaryDeltaType = "delta" -) - -type BinaryPatch struct { - Type BinaryDeltaType `json:"type"` - Count int - Content string -} - -// FileDiff Source of truth: https://github.com/git/git/blob/master/diffcore.h#L106 -// Implemented in https://github.com/git/git/blob/master/diff.c#L3496 -type FileDiff struct { - FromFile string `json:"from_file"` - ToFile string `json:"to_file"` - Type FileDiffType `json:"type"` - IsBinary bool `json:"is_binary"` - NewMode string `json:"new_mode"` - Hunks []Hunk `json:"hunks"` - BinaryPatch []BinaryPatch `json:"binary_patch"` -} - -type Diff struct { - FileDiff []FileDiff `json:"file_diff"` -} - type ParserMode int const ( @@ -144,74 +61,130 @@ type parser struct { } func (p *parser) VisitLine(diff string) { - if p.tryVisitHeader(diff) { + line := trimSingleLineEnding(diff) + hasNewline := strings.HasSuffix(diff, "\n") + + if p.tryVisitHeader(line) { return } - if p.tryVisitBinary(diff) { + if p.tryVisitBinary(line) { return } - if p.tryVisitHunkHeader(diff) { + if p.tryVisitHunkHeader(line) { return } + fileHEAD := len(p.diff.FileDiff) - 1 + if fileHEAD < 0 { + p.err = append(p.err, fmt.Errorf("%w: %s", ErrUnhandled, line)) + return + } + hunkHEAD := len(p.diff.FileDiff[fileHEAD].Hunks) - 1 if hunkHEAD < 0 { p.err = append(p.err, fmt.Errorf("%w: %s", ErrUnhandled, diff)) return } - changeHead := len(p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList) - 1 + + hunk := &p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD] + // swallow extra, unused lines from start - if strings.HasPrefix(diff, "~") && - !p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList.IsSignificant() { - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].StartLineNumberOld += 1 - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].StartLineNumberNew += 1 - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].CountOld -= 1 - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].CountNew -= 1 - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList = []ContentChange{} - } - if strings.HasPrefix(diff, "+") { - if changeHead > 0 && p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList[changeHead].Type == ContentChangeTypeDelete { - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList[changeHead].Type = ContentChangeTypeModify - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList[changeHead].To = strings.TrimPrefix(diff, "+") + if strings.HasPrefix(line, "~") && !hunk.ChangeList.IsSignificant() { + hunk.StartLineNumberOld++ + hunk.StartLineNumberNew++ + hunk.CountOld-- + hunk.CountNew-- + hunk.ChangeList = []ContentChange{} + } + + if strings.HasPrefix(line, "+") { + if len(hunk.ChangeList) > 0 && hunk.ChangeList[len(hunk.ChangeList)-1].Type == ContentChangeTypeDelete { + hunk.ChangeList[len(hunk.ChangeList)-1].Type = ContentChangeTypeModify + hunk.ChangeList[len(hunk.ChangeList)-1].To = trimSingleLineEnding(strings.TrimPrefix(line, "+")) + hunk.Lines = append(hunk.Lines, HunkLine{ + Kind: '+', + Text: trimSingleLineEnding(strings.TrimPrefix(line, "+")), + HasNewline: hasNewline, + }) return } - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList = append(p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList, ContentChange{ + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ Type: ContentChangeTypeAdd, From: "", - To: strings.TrimPrefix(diff, "+"), + To: trimSingleLineEnding(strings.TrimPrefix(line, "+")), + }) + hunk.Lines = append(hunk.Lines, HunkLine{ + Kind: '+', + Text: trimSingleLineEnding(strings.TrimPrefix(line, "+")), + HasNewline: hasNewline, }) return } - if strings.HasPrefix(diff, "-") { - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList = append(p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList, ContentChange{ + + if strings.HasPrefix(line, "-") { + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ Type: ContentChangeTypeDelete, - From: strings.TrimPrefix(diff, "-"), + From: trimSingleLineEnding(strings.TrimPrefix(line, "-")), To: "", }) + hunk.Lines = append(hunk.Lines, HunkLine{ + Kind: '-', + Text: trimSingleLineEnding(strings.TrimPrefix(line, "-")), + HasNewline: hasNewline, + }) + return + } + + if strings.HasPrefix(line, " ") { + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ + Type: ContentChangeTypeNOOP, + From: line, + To: line, + }) + hunk.Lines = append(hunk.Lines, HunkLine{ + Kind: ' ', + Text: trimSingleLineEnding(strings.TrimPrefix(line, " ")), + HasNewline: hasNewline, + }) return } - if diff == "~" { - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList = append(p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList, ContentChange{ + + if line == "~" { + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ Type: ContentChangeTypeNOOP, From: "\n", To: "\n", }) + return } - p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList = append(p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD].ChangeList, ContentChange{ + + if strings.HasPrefix(line, `\ No newline at end of file`) { + if n := len(hunk.Lines); n > 0 { + hunk.Lines[n-1].HasNewline = false + } + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ + Type: ContentChangeTypeNOOP, + From: line, + To: line, + }) + return + } + + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ Type: ContentChangeTypeNOOP, - From: diff, - To: diff, + From: line, + To: line, }) } func (p *parser) tryVisitHeader(diff string) bool { // format: "diff --git a/README.md b/README.md" if strings.HasPrefix(diff, "diff ") { - strings.Split(diff, " ") p.diff.FileDiff = append(p.diff.FileDiff, p.parseDiffLine(diff)) p.mode = modeHeader return true } + fileHEAD := len(p.diff.FileDiff) - 1 if len(diff) == 0 && p.mode == modeHeader { return true @@ -223,22 +196,48 @@ func (p *parser) tryVisitHeader(diff string) bool { if p.mode != modeHeader { return false } + if strings.HasPrefix(diff, "+++ ") || strings.HasPrefix(diff, "--- ") { // ignore -- we're still in the FileDiff and we've already captured the file names return true } if strings.HasPrefix(diff, "index ") { + p.parseIndexHeader(diff, fileHEAD) return true } - if done := p.visitFileModeHeader(diff, fileHEAD); done { - return done + if strings.HasPrefix(diff, "similarity index ") { + p.diff.FileDiff[fileHEAD].SimilarityIndex = parsePercentValue(strings.TrimPrefix(diff, "similarity index ")) + return true } - - if strings.HasPrefix(diff, "rename from ") || strings.HasPrefix(diff, "rename to ") { + if strings.HasPrefix(diff, "dissimilarity index ") { + p.diff.FileDiff[fileHEAD].DissimilarityIndex = parsePercentValue(strings.TrimPrefix(diff, "dissimilarity index ")) + return true + } + if strings.HasPrefix(diff, "copy from ") { + p.diff.FileDiff[fileHEAD].CopyFrom = strings.TrimPrefix(diff, "copy from ") + p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + return true + } + if strings.HasPrefix(diff, "copy to ") { + p.diff.FileDiff[fileHEAD].CopyTo = strings.TrimPrefix(diff, "copy to ") + p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + return true + } + if strings.HasPrefix(diff, "rename from ") { + p.diff.FileDiff[fileHEAD].RenameFrom = strings.TrimPrefix(diff, "rename from ") + p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + return true + } + if strings.HasPrefix(diff, "rename to ") { + p.diff.FileDiff[fileHEAD].RenameTo = strings.TrimPrefix(diff, "rename to ") p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified return true } + if done := p.visitFileModeHeader(diff, fileHEAD); done { + return done + } + if strings.HasPrefix(diff, "GIT binary patch") { p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified p.diff.FileDiff[fileHEAD].IsBinary = true @@ -254,6 +253,7 @@ func (p *parser) tryVisitHeader(diff string) bool { if strings.HasPrefix(diff, "similarity") { return true } + // continue to parse if fileHEAD > 0 return fileHEAD < 0 } @@ -272,15 +272,33 @@ func (p *parser) visitFileModeHeader(diff string, fileHEAD int) bool { if strings.HasPrefix(diff, "deleted file mode ") { p.diff.FileDiff[fileHEAD].Type = FileDiffTypeDeleted + p.diff.FileDiff[fileHEAD].OldMode = strings.TrimPrefix(diff, "deleted file mode ") return true } if strings.HasPrefix(diff, "old mode ") { p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].OldMode = strings.TrimPrefix(diff, "old mode ") return true } return false } +func (p *parser) parseIndexHeader(diff string, fileHEAD int) { + fields := strings.Fields(strings.TrimPrefix(diff, "index ")) + if len(fields) == 0 { + return + } + + parts := strings.SplitN(fields[0], "..", 2) + if len(parts) == 2 { + p.diff.FileDiff[fileHEAD].IndexOld = parts[0] + p.diff.FileDiff[fileHEAD].IndexNew = parts[1] + } + if len(fields) > 1 { + p.diff.FileDiff[fileHEAD].IndexMode = fields[1] + } +} + func (p *parser) tryVisitBinary(diff string) bool { if p.mode != modeBinary { return false @@ -342,6 +360,7 @@ func (p *parser) tryVisitHunkHeader(diff string) bool { } func (p *parser) parseDiffLine(line string) FileDiff { + line = trimSingleLineEnding(line) filesStr := line[11:] var oldPath, newPath string @@ -375,13 +394,25 @@ func (p *parser) parseDiffLine(line string) FileDiff { } } +func parsePercentValue(raw string) int { + raw = strings.TrimSuffix(raw, "%") + value, err := strconv.Atoi(raw) + if err != nil { + return 0 + } + return value +} + // Converts git diff --word-diff=porcelain output to a Diff object. func Parse(diff string) (Diff, []error) { p := parser{} - lines := strings.Split(diff, "\n") + lines := splitLinesPreserveNewline(diff) for i := 0; i < len(lines); i++ { p.VisitLine(lines[i]) } + if strings.HasSuffix(diff, "\n") { + p.VisitLine("") + } return p.diff, p.err } diff --git a/parser_test.go b/parser_test.go index 0bb359b..c2b8832 100644 --- a/parser_test.go +++ b/parser_test.go @@ -91,6 +91,63 @@ func TestParse(t *testing.T) { } } +func TestParseCapturesFileMetadataAndHunkLines(t *testing.T) { + t.Parallel() + + diff := `diff --git a/src.txt b/dst.txt +similarity index 92% +rename from src.txt +rename to dst.txt +index 1234567..89abcde 100755 +old mode 100644 +new mode 100755 +--- a/src.txt ++++ b/dst.txt +@@ -1,2 +1,2 @@ +-old ++new + second +\ No newline at end of file +` + + parsed, errs := git_diff_parser.Parse(diff) + require.Empty(t, errs) + require.Len(t, parsed.FileDiff, 1) + + fileDiff := parsed.FileDiff[0] + assert.Equal(t, "src.txt", fileDiff.FromFile) + assert.Equal(t, "dst.txt", fileDiff.ToFile) + assert.Equal(t, git_diff_parser.FileDiffTypeModified, fileDiff.Type) + assert.Equal(t, "1234567", fileDiff.IndexOld) + assert.Equal(t, "89abcde", fileDiff.IndexNew) + assert.Equal(t, "100755", fileDiff.IndexMode) + assert.Equal(t, "100644", fileDiff.OldMode) + assert.Equal(t, "100755", fileDiff.NewMode) + assert.Equal(t, 92, fileDiff.SimilarityIndex) + assert.Equal(t, "src.txt", fileDiff.RenameFrom) + assert.Equal(t, "dst.txt", fileDiff.RenameTo) + + require.Len(t, fileDiff.Hunks, 1) + hunk := fileDiff.Hunks[0] + assert.Equal(t, 1, hunk.StartLineNumberOld) + assert.Equal(t, 1, hunk.StartLineNumberNew) + assert.Equal(t, 2, hunk.CountOld) + assert.Equal(t, 2, hunk.CountNew) + require.Len(t, hunk.Lines, 3) + + assert.Equal(t, byte('-'), hunk.Lines[0].Kind) + assert.Equal(t, "old", hunk.Lines[0].Text) + assert.True(t, hunk.Lines[0].HasNewline) + + assert.Equal(t, byte('+'), hunk.Lines[1].Kind) + assert.Equal(t, "new", hunk.Lines[1].Text) + assert.True(t, hunk.Lines[1].HasNewline) + + assert.Equal(t, byte(' '), hunk.Lines[2].Kind) + assert.Equal(t, "second", hunk.Lines[2].Text) + assert.False(t, hunk.Lines[2].HasNewline) +} + func MatchMessageSnapshot(t *testing.T, snapshotName string, content string) { t.Helper() _, filename, _, ok := runtime.Caller(0) From d928a4249b0b70f313790c50669e88c794259116 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:03:56 +0000 Subject: [PATCH 05/20] refactor: extract apply miss renderer seam --- apply_render.go | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ apply_session.go | 24 ++++-------------------- 2 files changed, 52 insertions(+), 20 deletions(-) create mode 100644 apply_render.go diff --git a/apply_render.go b/apply_render.go new file mode 100644 index 0000000..17750af --- /dev/null +++ b/apply_render.go @@ -0,0 +1,48 @@ +package git_diff_parser + +type missRenderer interface { + appendMiss(out, source, desired []fileLine) []fileLine + result(content []byte, misses int) (ApplyResult, error) +} + +func (p *PatchApply) missRenderer() missRenderer { + if p.options.Mode == ApplyModeMerge { + return mergeMissRenderer{applier: p} + } + return directMissRenderer{} +} + +type directMissRenderer struct{} + +func (directMissRenderer) appendMiss(out, source, _ []fileLine) []fileLine { + return appendSourceLines(out, source...) +} + +func (directMissRenderer) result(content []byte, misses int) (ApplyResult, error) { + result := ApplyResult{Content: content} + if misses == 0 { + return result, nil + } + result.DirectMisses = misses + return result, &ApplyError{DirectMisses: misses} +} + +type mergeMissRenderer struct { + applier *PatchApply +} + +func (r mergeMissRenderer) appendMiss(out, source, desired []fileLine) []fileLine { + return r.applier.appendConflict(out, source, desired) +} + +func (mergeMissRenderer) result(content []byte, misses int) (ApplyResult, error) { + result := ApplyResult{Content: content} + if misses == 0 { + return result, nil + } + result.MergeConflicts = misses + return result, &ApplyError{ + MergeConflicts: misses, + ConflictingHunks: misses, + } +} diff --git a/apply_session.go b/apply_session.go index d4fa656..4483301 100644 --- a/apply_session.go +++ b/apply_session.go @@ -8,6 +8,7 @@ type validatedPatch struct { type applySession struct { applier *PatchApply + renderer missRenderer sourceLines []fileLine image []fileLine cursor int @@ -36,6 +37,7 @@ func (p *PatchApply) newApplySession(pristine []byte) *applySession { sourceLines := splitFileLines(pristine) return &applySession{ applier: p, + renderer: p.missRenderer(), sourceLines: sourceLines, image: make([]fileLine, 0, len(sourceLines)), } @@ -47,21 +49,7 @@ func (s *applySession) apply(patch validatedPatch) (ApplyResult, error) { } s.appendSourceUntil(len(s.sourceLines)) - result := ApplyResult{Content: joinFileLines(s.image)} - if s.conflicts > 0 { - if s.applier.options.Mode == ApplyModeMerge { - result.MergeConflicts = s.conflicts - return result, &ApplyError{ - MergeConflicts: s.conflicts, - ConflictingHunks: s.conflicts, - } - } - result.DirectMisses = s.conflicts - return result, &ApplyError{ - DirectMisses: s.conflicts, - } - } - return result, nil + return s.renderer.result(joinFileLines(s.image), s.conflicts) } func (s *applySession) applyHunk(hunk patchHunk) { @@ -102,11 +90,7 @@ func (s *applySession) appendConflictingHunk(hunk patchHunk) { } s.appendSourceUntil(conflictStart) - if s.applier.options.Mode == ApplyModeMerge { - s.image = s.applier.appendConflict(s.image, s.sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) - } else { - s.image = appendSourceLines(s.image, s.sourceLines[conflictStart:conflictEnd]...) - } + s.image = s.renderer.appendMiss(s.image, s.sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) s.cursor = conflictEnd } From 78273ee7f3537f14c6a57a11e4228f21cdc63825 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:06:10 +0000 Subject: [PATCH 06/20] feat: add opt-in whitespace-aware matching --- apply.go | 29 ++++++++++++++++++++++------- apply_internal_test.go | 16 ++++++++++++++-- apply_options.go | 5 +++-- apply_session.go | 10 +++++++--- apply_test.go | 23 +++++++++++++++++++++++ 5 files changed, 69 insertions(+), 14 deletions(-) diff --git a/apply.go b/apply.go index cf8b32c..696362e 100644 --- a/apply.go +++ b/apply.go @@ -195,9 +195,9 @@ func preimageLines(hunk patchHunk) []fileLine { return lines } -func matchAnchoredFragment(source []fileLine, start int, begin, end anchoredFragment) bool { - return matchFragment(source, start+begin.offset, begin.lines) && - matchFragment(source, start+end.offset, end.lines) +func matchAnchoredFragment(source []fileLine, start int, begin, end anchoredFragment, ignoreWhitespace bool) bool { + return matchFragment(source, start+begin.offset, begin.lines, ignoreWhitespace) && + matchFragment(source, start+end.offset, end.lines, ignoreWhitespace) } func splitAnchoredFragment(lines []fileLine) (anchoredFragment, anchoredFragment) { @@ -219,7 +219,7 @@ func splitAnchoredFragment(lines []fileLine) (anchoredFragment, anchoredFragment } } -func matchFragment(source []fileLine, start int, fragment []fileLine) bool { +func matchFragment(source []fileLine, start int, fragment []fileLine, ignoreWhitespace bool) bool { if len(fragment) == 0 { return true } @@ -228,9 +228,7 @@ func matchFragment(source []fileLine, start int, fragment []fileLine) bool { } for i := range fragment { - if source[start+i].text != fragment[i].text || - source[start+i].hasNewline != fragment[i].hasNewline || - source[start+i].eofMarker != fragment[i].eofMarker { + if !lineMatches(source[start+i], fragment[i], ignoreWhitespace) { return false } } @@ -238,6 +236,23 @@ func matchFragment(source []fileLine, start int, fragment []fileLine) bool { return true } +func lineMatches(left, right fileLine, ignoreWhitespace bool) bool { + if left.hasNewline != right.hasNewline || left.eofMarker != right.eofMarker { + return false + } + if left.text == right.text { + return true + } + if !ignoreWhitespace { + return false + } + return normalizeWhitespace(left.text) == normalizeWhitespace(right.text) +} + +func normalizeWhitespace(text string) string { + return strings.Join(strings.Fields(text), " ") +} + func (p *PatchApply) appendConflict(out []fileLine, ours, theirs []fileLine) []fileLine { labels := p.options.ConflictLabels out = append(out, fileLine{text: "<<<<<<< " + labels.Current, hasNewline: true}) diff --git a/apply_internal_test.go b/apply_internal_test.go index 2025738..db3400d 100644 --- a/apply_internal_test.go +++ b/apply_internal_test.go @@ -16,8 +16,8 @@ func TestMatchAnchoredFragmentRequiresBothEnds(t *testing.T) { {text: "d", hasNewline: true}, }) - require.True(t, matchAnchoredFragment(source, 1, begin, end)) - require.False(t, matchAnchoredFragment(source, 5, begin, end)) + require.True(t, matchAnchoredFragment(source, 1, begin, end, false)) + require.False(t, matchAnchoredFragment(source, 5, begin, end, false)) } func TestFindPosRejectsAlreadyAppliedPostimage(t *testing.T) { @@ -40,3 +40,15 @@ func TestFindPosRejectsAlreadyAppliedPostimage(t *testing.T) { assert.Equal(t, 0, pos) assert.False(t, matched) } + +func TestMatchFragment_IgnoreWhitespace(t *testing.T) { + source := splitFileLines([]byte("alpha\n beta\ncharlie\n")) + fragment := []fileLine{ + {text: "alpha", hasNewline: true}, + {text: "beta", hasNewline: true}, + {text: "charlie", hasNewline: true}, + } + + require.False(t, matchFragment(source, 0, fragment, false)) + require.True(t, matchFragment(source, 0, fragment, true)) +} diff --git a/apply_options.go b/apply_options.go index 1199662..31d5c01 100644 --- a/apply_options.go +++ b/apply_options.go @@ -20,8 +20,9 @@ type ConflictLabels struct { // ApplyOptions configures the apply engine. type ApplyOptions struct { - Mode ApplyMode - ConflictLabels ConflictLabels + Mode ApplyMode + ConflictLabels ConflictLabels + IgnoreWhitespace bool } func DefaultApplyOptions() ApplyOptions { diff --git a/apply_session.go b/apply_session.go index 4483301..bd08336 100644 --- a/apply_session.go +++ b/apply_session.go @@ -112,7 +112,7 @@ func (s *applySession) findPos(hunk patchHunk) (int, bool) { } postimage := desiredLines(hunk) - if hunk.newCount >= hunk.oldCount && preferred <= len(s.sourceLines) && matchFragment(s.sourceLines, preferred, postimage) { + if hunk.newCount >= hunk.oldCount && preferred <= len(s.sourceLines) && matchFragment(s.sourceLines, preferred, postimage, s.ignoreWhitespace()) { return 0, false } @@ -124,12 +124,12 @@ func (s *applySession) findPos(hunk patchHunk) (int, bool) { func (s *applySession) findPosWithAnchors(preferred int, begin, end anchoredFragment) (int, bool) { for offset := 0; ; offset++ { left := preferred - offset - if left >= s.cursor && left <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, left, begin, end) { + if left >= s.cursor && left <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, left, begin, end, s.ignoreWhitespace()) { return left, true } right := preferred + offset - if offset > 0 && right >= s.cursor && right <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, right, begin, end) { + if offset > 0 && right >= s.cursor && right <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, right, begin, end, s.ignoreWhitespace()) { return right, true } @@ -140,3 +140,7 @@ func (s *applySession) findPosWithAnchors(preferred int, begin, end anchoredFrag return 0, false } + +func (s *applySession) ignoreWhitespace() bool { + return s.applier != nil && s.applier.options.IgnoreWhitespace +} diff --git a/apply_test.go b/apply_test.go index 94c3f11..9ded5f0 100644 --- a/apply_test.go +++ b/apply_test.go @@ -512,6 +512,29 @@ func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") } +func TestApplyFileWithOptions_IgnoreWhitespaceAppliesThroughContextDrift(t *testing.T) { + t.Parallel() + + original := []byte("alpha\n beta\ncharlie\n") + target := []byte("alpha\n BETA\ncharlie\n") + patchData := buildPatchWithContext(t, "whitespace.txt", original, target, 1) + current := []byte("alpha\n beta\ncharlie\n") + + _, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + Mode: git_diff_parser.ApplyModeMerge, + }) + require.Error(t, err) + + applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + Mode: git_diff_parser.ApplyModeMerge, + IgnoreWhitespace: true, + }) + require.NoError(t, err) + assert.Equal(t, target, applied.Content) + assert.Equal(t, 0, applied.DirectMisses) + assert.Equal(t, 0, applied.MergeConflicts) +} + func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) { t.Parallel() From 43ea63602f399f0b112dbaf5a8249c75b58e2920 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:07:30 +0000 Subject: [PATCH 07/20] refactor: decouple apply outcomes from rendering --- apply.go | 33 +++++++++------ apply_render.go | 103 ++++++++++++++++++++++++++++++++--------------- apply_result.go | 13 ++++++ apply_session.go | 23 +++++++---- apply_test.go | 4 ++ 5 files changed, 124 insertions(+), 52 deletions(-) diff --git a/apply.go b/apply.go index 696362e..7e8802d 100644 --- a/apply.go +++ b/apply.go @@ -8,6 +8,7 @@ import ( ) type patchHunk struct { + header string oldStart int oldCount int newCount int @@ -54,7 +55,24 @@ func (p *PatchApply) applyFileWithResult(pristine, patchData []byte) (ApplyResul if err != nil { return ApplyResult{}, err } - return p.newApplySession(pristine).apply(patch) + outcome, err := p.newApplySession(pristine).apply(patch) + if err != nil { + return ApplyResult{}, err + } + + result := renderApplyResult(pristine, outcome, p.options) + if len(outcome.conflicts) == 0 { + return result, nil + } + if p.options.Mode == ApplyModeMerge { + return result, &ApplyError{ + MergeConflicts: len(outcome.conflicts), + ConflictingHunks: len(outcome.conflicts), + } + } + return result, &ApplyError{ + DirectMisses: len(outcome.conflicts), + } } // ApplyPatch is kept as a compatibility alias. @@ -165,6 +183,7 @@ func parseHunks(lines []string) ([]patchHunk, error) { markEOFMarkers(hunkLines, oldCount, newCount) hunks = append(hunks, patchHunk{ + header: line, oldStart: oldStart, oldCount: oldCount, newCount: newCount, @@ -253,18 +272,6 @@ func normalizeWhitespace(text string) string { return strings.Join(strings.Fields(text), " ") } -func (p *PatchApply) appendConflict(out []fileLine, ours, theirs []fileLine) []fileLine { - labels := p.options.ConflictLabels - out = append(out, fileLine{text: "<<<<<<< " + labels.Current, hasNewline: true}) - out = appendSourceLines(out, ours...) - out = ensureTrailingNewline(out) - out = append(out, fileLine{text: "=======", hasNewline: true}) - out = appendSourceLines(out, theirs...) - out = ensureTrailingNewline(out) - out = append(out, fileLine{text: ">>>>>>> " + labels.Incoming, hasNewline: true}) - return out -} - func appendSourceLines(dst []fileLine, src ...fileLine) []fileLine { return append(dst, src...) } diff --git a/apply_render.go b/apply_render.go index 17750af..ac734ec 100644 --- a/apply_render.go +++ b/apply_render.go @@ -1,48 +1,87 @@ package git_diff_parser -type missRenderer interface { - appendMiss(out, source, desired []fileLine) []fileLine - result(content []byte, misses int) (ApplyResult, error) -} +import "bytes" -func (p *PatchApply) missRenderer() missRenderer { - if p.options.Mode == ApplyModeMerge { - return mergeMissRenderer{applier: p} +func renderApplyResult(pristine []byte, outcome applyOutcome, options ApplyOptions) ApplyResult { + result := ApplyResult{ + Content: joinFileLines(outcome.content), + Reject: renderRejectContent(outcome.conflicts), } - return directMissRenderer{} -} -type directMissRenderer struct{} + if len(outcome.conflicts) == 0 { + return result + } -func (directMissRenderer) appendMiss(out, source, _ []fileLine) []fileLine { - return appendSourceLines(out, source...) + switch options.Mode { + case ApplyModeMerge: + result.Content = renderMergeContent(outcome.content, outcome.conflicts, options.ConflictLabels) + result.MergeConflicts = len(outcome.conflicts) + default: + result.Content = append([]byte{}, pristine...) + result.DirectMisses = len(outcome.conflicts) + } + + return result } -func (directMissRenderer) result(content []byte, misses int) (ApplyResult, error) { - result := ApplyResult{Content: content} - if misses == 0 { - return result, nil +func renderMergeContent(base []fileLine, conflicts []applyConflict, labels ConflictLabels) []byte { + if len(conflicts) == 0 { + return joinFileLines(base) } - result.DirectMisses = misses - return result, &ApplyError{DirectMisses: misses} -} -type mergeMissRenderer struct { - applier *PatchApply -} + rendered := append([]fileLine(nil), base...) + for i := len(conflicts) - 1; i >= 0; i-- { + conflict := conflicts[i] + if conflict.offset < 0 || conflict.offset > len(rendered) { + continue + } -func (r mergeMissRenderer) appendMiss(out, source, desired []fileLine) []fileLine { - return r.applier.appendConflict(out, source, desired) + end := conflict.offset + len(conflict.ours) + if end > len(rendered) { + end = len(rendered) + } + + replacement := renderConflictLines(labels, conflict.ours, conflict.theirs) + rendered = append(rendered[:conflict.offset], append(replacement, rendered[end:]...)...) + } + + return joinFileLines(rendered) } -func (mergeMissRenderer) result(content []byte, misses int) (ApplyResult, error) { - result := ApplyResult{Content: content} - if misses == 0 { - return result, nil +func renderRejectContent(conflicts []applyConflict) []byte { + if len(conflicts) == 0 { + return nil } - result.MergeConflicts = misses - return result, &ApplyError{ - MergeConflicts: misses, - ConflictingHunks: misses, + + var buf bytes.Buffer + for i, conflict := range conflicts { + if i > 0 { + buf.WriteByte('\n') + } + if conflict.hunk.header != "" { + buf.WriteString(conflict.hunk.header) + buf.WriteByte('\n') + } + for _, line := range conflict.hunk.lines { + buf.WriteByte(line.kind) + buf.WriteString(line.text) + if line.hasNewline { + buf.WriteByte('\n') + } + } + } + return buf.Bytes() +} + +func renderConflictLines(labels ConflictLabels, ours, theirs []fileLine) []fileLine { + lines := []fileLine{ + {text: "<<<<<<< " + labels.Current, hasNewline: true}, } + lines = appendSourceLines(lines, ours...) + lines = ensureTrailingNewline(lines) + lines = append(lines, fileLine{text: "=======", hasNewline: true}) + lines = appendSourceLines(lines, theirs...) + lines = ensureTrailingNewline(lines) + lines = append(lines, fileLine{text: ">>>>>>> " + labels.Incoming, hasNewline: true}) + return lines } diff --git a/apply_result.go b/apply_result.go index 064fcb6..ad982e2 100644 --- a/apply_result.go +++ b/apply_result.go @@ -11,10 +11,23 @@ var ErrPatchConflict = errors.New("patch conflict") // while attempting to apply it. type ApplyResult struct { Content []byte + Reject []byte DirectMisses int MergeConflicts int } +type applyOutcome struct { + content []fileLine + conflicts []applyConflict +} + +type applyConflict struct { + offset int + hunk patchHunk + ours []fileLine + theirs []fileLine +} + // ApplyError reports the aggregate apply outcome. type ApplyError struct { DirectMisses int diff --git a/apply_session.go b/apply_session.go index bd08336..e26fbc7 100644 --- a/apply_session.go +++ b/apply_session.go @@ -8,11 +8,10 @@ type validatedPatch struct { type applySession struct { applier *PatchApply - renderer missRenderer sourceLines []fileLine image []fileLine cursor int - conflicts int + conflicts []applyConflict } func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { @@ -37,25 +36,26 @@ func (p *PatchApply) newApplySession(pristine []byte) *applySession { sourceLines := splitFileLines(pristine) return &applySession{ applier: p, - renderer: p.missRenderer(), sourceLines: sourceLines, image: make([]fileLine, 0, len(sourceLines)), } } -func (s *applySession) apply(patch validatedPatch) (ApplyResult, error) { +func (s *applySession) apply(patch validatedPatch) (applyOutcome, error) { for _, hunk := range patch.hunks { s.applyHunk(hunk) } s.appendSourceUntil(len(s.sourceLines)) - return s.renderer.result(joinFileLines(s.image), s.conflicts) + return applyOutcome{ + content: append([]fileLine(nil), s.image...), + conflicts: append([]applyConflict(nil), s.conflicts...), + }, nil } func (s *applySession) applyHunk(hunk patchHunk) { matchIndex, matched := s.findPos(hunk) if !matched { - s.conflicts++ s.appendConflictingHunk(hunk) return } @@ -90,7 +90,16 @@ func (s *applySession) appendConflictingHunk(hunk patchHunk) { } s.appendSourceUntil(conflictStart) - s.image = s.renderer.appendMiss(s.image, s.sourceLines[conflictStart:conflictEnd], desiredLines(hunk)) + offset := len(s.image) + ours := append([]fileLine(nil), s.sourceLines[conflictStart:conflictEnd]...) + theirs := desiredLines(hunk) + s.image = appendSourceLines(s.image, ours...) + s.conflicts = append(s.conflicts, applyConflict{ + offset: offset, + hunk: hunk, + ours: ours, + theirs: theirs, + }) s.cursor = conflictEnd } diff --git a/apply_test.go b/apply_test.go index 9ded5f0..7b728f5 100644 --- a/apply_test.go +++ b/apply_test.go @@ -469,6 +469,8 @@ func TestApplyFileWithOptions_RendersNeutralConflictMarkers(t *testing.T) { assert.Contains(t, string(result.Content), "<<<<<<<") assert.NotContains(t, string(result.Content), "Current (Your changes)") assert.NotContains(t, string(result.Content), "Generated by Speakeasy") + assert.NotEmpty(t, result.Reject) + assert.Contains(t, string(result.Reject), "@@") } func TestApplyFileWithOptions_DirectModeReportsMissesWithoutMarkers(t *testing.T) { @@ -489,6 +491,8 @@ func TestApplyFileWithOptions_DirectModeReportsMissesWithoutMarkers(t *testing.T assert.Equal(t, current, result.Content) assert.NotContains(t, string(result.Content), "<<<<<<<") assert.NotContains(t, string(result.Content), ">>>>>>>") + assert.NotEmpty(t, result.Reject) + assert.Contains(t, string(result.Reject), "@@") } func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { From 2c36676e1584ab91073ea3f1506678613fd5cbe0 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:08:37 +0000 Subject: [PATCH 08/20] refactor: drive apply from parsed hunk IR --- apply_session.go | 68 +++++++++++++++++++++++++++++++++++++++++++----- model.go | 32 ++++++++++++++++++++++- parser.go | 35 ++++++++++++++++++++----- parser_test.go | 2 ++ 4 files changed, 123 insertions(+), 14 deletions(-) diff --git a/apply_session.go b/apply_session.go index e26fbc7..024acfe 100644 --- a/apply_session.go +++ b/apply_session.go @@ -16,19 +16,39 @@ type applySession struct { func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { normalizedPatch := normalizePatchForValidation(patchData) - if err := validateSingleFilePatch(normalizedPatch); err != nil { - return validatedPatch{}, err + parsed, errs := Parse(string(normalizedPatch)) + if len(errs) > 0 { + return validatedPatch{}, fmt.Errorf("unsupported patch syntax: %w", errs[0]) + } + if len(parsed.FileDiff) != 1 { + return validatedPatch{}, fmt.Errorf("expected exactly 1 file diff, found %d", len(parsed.FileDiff)) } - lines := splitLinesPreserveNewline(string(normalizedPatch)) - hunks, err := parseHunks(skipToHunks(lines)) - if err != nil { - return validatedPatch{}, err + fileDiff := parsed.FileDiff[0] + if fileDiff.IsBinary { + return validatedPatch{}, fmt.Errorf("binary patches are not supported") + } + if fileDiff.NewMode != "" { + return validatedPatch{}, fmt.Errorf("file mode changes are not supported") + } + if fileDiff.Type == FileDiffTypeAdded || fileDiff.Type == FileDiffTypeDeleted { + return validatedPatch{}, fmt.Errorf("patches may only modify existing files") + } + if len(fileDiff.Hunks) == 0 { + return validatedPatch{}, fmt.Errorf("patch contains no hunks") + } + if fileDiff.RenameFrom != "" || fileDiff.RenameTo != "" || fileDiff.CopyFrom != "" || fileDiff.CopyTo != "" { + return validatedPatch{}, fmt.Errorf("unsupported patch syntax: copy and rename headers are not supported") } - if !hunksContainChanges(hunks) { + if !fileDiffHasChanges(fileDiff) { return validatedPatch{}, fmt.Errorf("patch contains no effective changes") } + hunks := make([]patchHunk, 0, len(fileDiff.Hunks)) + for _, hunk := range fileDiff.Hunks { + hunks = append(hunks, patchHunkFromHunk(hunk)) + } + return validatedPatch{hunks: hunks}, nil } @@ -130,6 +150,40 @@ func (s *applySession) findPos(hunk patchHunk) (int, bool) { return s.findPosWithAnchors(preferred, begin, end) } +func patchHunkFromHunk(hunk Hunk) patchHunk { + lines := make([]patchLine, 0, len(hunk.Lines)) + for _, line := range hunk.Lines { + lines = append(lines, patchLine{ + kind: line.Kind, + text: line.Text, + hasNewline: line.HasNewline, + oldEOF: line.OldEOF, + newEOF: line.NewEOF, + }) + } + + return patchHunk{ + header: formatPatchHunkHeader(hunk), + oldStart: hunk.StartLineNumberOld, + oldCount: hunk.CountOld, + newCount: hunk.CountNew, + lines: lines, + } +} + +func formatPatchHunkHeader(hunk Hunk) string { + oldRange := formatPatchHunkRange(hunk.StartLineNumberOld, hunk.CountOld) + newRange := formatPatchHunkRange(hunk.StartLineNumberNew, hunk.CountNew) + return fmt.Sprintf("@@ -%s +%s @@", oldRange, newRange) +} + +func formatPatchHunkRange(start, count int) string { + if count == 1 { + return fmt.Sprintf("%d", start) + } + return fmt.Sprintf("%d,%d", start, count) +} + func (s *applySession) findPosWithAnchors(preferred int, begin, end anchoredFragment) (int, bool) { for offset := 0; ; offset++ { left := preferred - offset diff --git a/model.go b/model.go index f15b550..88f4efe 100644 --- a/model.go +++ b/model.go @@ -1,6 +1,9 @@ package git_diff_parser -import "fmt" +import ( + "fmt" + "strings" +) type ContentChangeType string @@ -27,6 +30,33 @@ type HunkLine struct { Kind byte `json:"kind"` Text string `json:"text"` HasNewline bool `json:"has_newline"` + OldEOF bool `json:"old_eof,omitempty"` + NewEOF bool `json:"new_eof,omitempty"` +} + +func (l *HunkLine) MarkNoNewline() { + l.HasNewline = false +} + +func (h *Hunk) MarkEOFMarkers() { + oldSeen := 0 + newSeen := 0 + + for i := range h.Lines { + line := &h.Lines[i] + if line.Kind == ' ' || line.Kind == '-' { + oldSeen++ + } + if line.Kind == ' ' || line.Kind == '+' { + newSeen++ + } + if !line.HasNewline || strings.TrimSuffix(line.Text, "\r") != "" { + continue + } + + line.OldEOF = (line.Kind == ' ' || line.Kind == '-') && oldSeen == h.CountOld + line.NewEOF = (line.Kind == ' ' || line.Kind == '+') && newSeen == h.CountNew + } } // Hunk is a line that starts with @@. diff --git a/parser.go b/parser.go index 6eec8ab..563b6cc 100644 --- a/parser.go +++ b/parser.go @@ -160,7 +160,10 @@ func (p *parser) VisitLine(diff string) { if strings.HasPrefix(line, `\ No newline at end of file`) { if n := len(hunk.Lines); n > 0 { - hunk.Lines[n-1].HasNewline = false + hunk.Lines[n-1].MarkNoNewline() + } else { + p.err = append(p.err, fmt.Errorf("unexpected no-newline marker without a preceding patch line")) + return } hunk.ChangeList = append(hunk.ChangeList, ContentChange{ Type: ContentChangeTypeNOOP, @@ -170,16 +173,22 @@ func (p *parser) VisitLine(diff string) { return } - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeNOOP, - From: line, - To: line, - }) + if line == "" { + hunk.ChangeList = append(hunk.ChangeList, ContentChange{ + Type: ContentChangeTypeNOOP, + From: line, + To: line, + }) + return + } + + p.err = append(p.err, fmt.Errorf("unexpected hunk line %q", line)) } func (p *parser) tryVisitHeader(diff string) bool { // format: "diff --git a/README.md b/README.md" if strings.HasPrefix(diff, "diff ") { + p.finalizeCurrentHunk() p.diff.FileDiff = append(p.diff.FileDiff, p.parseDiffLine(diff)) p.mode = modeHeader return true @@ -348,6 +357,7 @@ func (p *parser) tryVisitHunkHeader(diff string) bool { return false } if strings.HasPrefix(diff, "@@") { + p.finalizeCurrentHunk() hunk, err := NewHunk(diff) if err != nil { p.err = append(p.err, err) @@ -359,6 +369,18 @@ func (p *parser) tryVisitHunkHeader(diff string) bool { return false } +func (p *parser) finalizeCurrentHunk() { + if len(p.diff.FileDiff) == 0 { + return + } + fileHEAD := len(p.diff.FileDiff) - 1 + hunks := p.diff.FileDiff[fileHEAD].Hunks + if len(hunks) == 0 { + return + } + p.diff.FileDiff[fileHEAD].Hunks[len(hunks)-1].MarkEOFMarkers() +} + func (p *parser) parseDiffLine(line string) FileDiff { line = trimSingleLineEnding(line) filesStr := line[11:] @@ -413,6 +435,7 @@ func Parse(diff string) (Diff, []error) { if strings.HasSuffix(diff, "\n") { p.VisitLine("") } + p.finalizeCurrentHunk() return p.diff, p.err } diff --git a/parser_test.go b/parser_test.go index c2b8832..8112653 100644 --- a/parser_test.go +++ b/parser_test.go @@ -146,6 +146,8 @@ new mode 100755 assert.Equal(t, byte(' '), hunk.Lines[2].Kind) assert.Equal(t, "second", hunk.Lines[2].Text) assert.False(t, hunk.Lines[2].HasNewline) + assert.False(t, hunk.Lines[2].OldEOF) + assert.False(t, hunk.Lines[2].NewEOF) } func MatchMessageSnapshot(t *testing.T, snapshotName string, content string) { From b321f14d9e445aab1c926b568cd2f0ea9aa52ea3 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:09:51 +0000 Subject: [PATCH 09/20] feat: support text patchset tree operations --- patchset.go | 73 +--------------------- patchset_apply.go | 152 ++++++++++++++++++++++++++++++++++++++++++++++ patchset_test.go | 150 ++++++++++++++++++++++++++++++++------------- 3 files changed, 263 insertions(+), 112 deletions(-) create mode 100644 patchset_apply.go diff --git a/patchset.go b/patchset.go index 1724e58..f91f8a8 100644 --- a/patchset.go +++ b/patchset.go @@ -21,6 +21,7 @@ const ( PatchsetOperationCreate PatchsetOperation = "create" PatchsetOperationDelete PatchsetOperation = "delete" PatchsetOperationRename PatchsetOperation = "rename" + PatchsetOperationCopy PatchsetOperation = "copy" PatchsetOperationModeChange PatchsetOperation = "mode change" PatchsetOperationBinary PatchsetOperation = "binary" ) @@ -117,16 +118,9 @@ func ParsePatchset(patchData []byte) (Patchset, []error) { func (p Patchset) Apply(tree map[string][]byte) (map[string][]byte, error) { out := cloneTree(tree) for _, file := range p.Files { - if err := validatePatchsetFile(tree, file.Diff); err != nil { + if err := applyPatchsetFile(out, file); err != nil { return nil, err } - - current := out[file.Diff.ToFile] - applied, err := ApplyFile(current, file.Patch) - if err != nil { - return nil, err - } - out[file.Diff.ToFile] = append([]byte(nil), applied...) } return out, nil } @@ -147,69 +141,6 @@ func cloneTree(tree map[string][]byte) map[string][]byte { return out } -func validatePatchsetFile(tree map[string][]byte, fileDiff FileDiff) error { - switch { - case fileDiff.IsBinary: - return &UnsupportedPatchError{ - Operation: PatchsetOperationBinary, - Path: fileDiff.ToFile, - } - case fileDiff.FromFile != fileDiff.ToFile: - return &UnsupportedPatchError{ - Operation: PatchsetOperationRename, - From: fileDiff.FromFile, - To: fileDiff.ToFile, - } - } - - _, exists := tree[fileDiff.ToFile] - - switch fileDiff.Type { - case FileDiffTypeAdded: - return &UnsupportedPatchError{ - Operation: PatchsetOperationCreate, - Path: fileDiff.ToFile, - } - case FileDiffTypeDeleted: - return &UnsupportedPatchError{ - Operation: PatchsetOperationDelete, - Path: fileDiff.FromFile, - } - } - - if fileDiff.NewMode != "" { - if exists { - return &UnsupportedPatchError{ - Operation: PatchsetOperationModeChange, - Path: fileDiff.ToFile, - } - } - return &UnsupportedPatchError{ - Operation: PatchsetOperationCreate, - Path: fileDiff.ToFile, - } - } - - if len(fileDiff.Hunks) == 0 { - if !exists { - return &UnsupportedPatchError{ - Operation: PatchsetOperationCreate, - Path: fileDiff.ToFile, - } - } - return fmt.Errorf("patch for %q contains no hunks", fileDiff.ToFile) - } - - if !exists { - return &UnsupportedPatchError{ - Operation: PatchsetOperationCreate, - Path: fileDiff.ToFile, - } - } - - return nil -} - func splitPatchsetChunks(patchData []byte) [][]byte { lines := splitLinesPreserveNewline(string(patchData)) if len(lines) == 0 { diff --git a/patchset_apply.go b/patchset_apply.go new file mode 100644 index 0000000..0d3449f --- /dev/null +++ b/patchset_apply.go @@ -0,0 +1,152 @@ +package git_diff_parser + +import "fmt" + +const patchsetOperationModify PatchsetOperation = "modify" + +func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { + if file.Diff.IsBinary { + return &UnsupportedPatchError{ + Operation: PatchsetOperationBinary, + Path: firstNonEmpty(file.Diff.ToFile, file.Diff.FromFile), + } + } + + op, sourcePath, targetPath, err := patchsetOperation(tree, file.Diff) + if err != nil { + return err + } + + switch op { + case PatchsetOperationCreate: + if _, exists := tree[targetPath]; exists { + return fmt.Errorf("cannot create existing file %q", targetPath) + } + content, err := applyPatchsetContent(nil, file) + if err != nil { + return err + } + tree[targetPath] = append([]byte(nil), content...) + return nil + case PatchsetOperationDelete: + content, exists := tree[sourcePath] + if !exists { + return fmt.Errorf("cannot delete missing file %q", sourcePath) + } + if len(file.Diff.Hunks) > 0 { + if _, err := applyPatchsetContent(content, file); err != nil { + return err + } + } + delete(tree, sourcePath) + return nil + case PatchsetOperationRename: + content, exists := tree[sourcePath] + if !exists { + return fmt.Errorf("cannot rename missing file %q", sourcePath) + } + if targetPath != sourcePath { + if _, exists := tree[targetPath]; exists { + return fmt.Errorf("cannot rename %q to existing file %q", sourcePath, targetPath) + } + } + applied, err := applyPatchsetContent(content, file) + if err != nil { + return err + } + delete(tree, sourcePath) + tree[targetPath] = append([]byte(nil), applied...) + return nil + case PatchsetOperationCopy: + content, exists := tree[sourcePath] + if !exists { + return fmt.Errorf("cannot copy missing file %q", sourcePath) + } + if _, exists := tree[targetPath]; exists { + return fmt.Errorf("cannot copy to existing file %q", targetPath) + } + applied, err := applyPatchsetContent(content, file) + if err != nil { + return err + } + tree[targetPath] = append([]byte(nil), applied...) + return nil + case PatchsetOperationModeChange, patchsetOperationModify: + content, exists := tree[targetPath] + if !exists { + return fmt.Errorf("cannot modify missing file %q", targetPath) + } + applied, err := applyPatchsetContent(content, file) + if err != nil { + return err + } + tree[targetPath] = append([]byte(nil), applied...) + return nil + default: + return fmt.Errorf("unsupported patch operation") + } +} + +func patchsetOperation(tree map[string][]byte, fileDiff FileDiff) (PatchsetOperation, string, string, error) { + sourcePath, targetPath := patchsetPaths(fileDiff) + + switch { + case fileDiff.RenameFrom != "" || fileDiff.RenameTo != "": + return PatchsetOperationRename, sourcePath, targetPath, nil + case fileDiff.CopyFrom != "" || fileDiff.CopyTo != "": + return PatchsetOperationCopy, sourcePath, targetPath, nil + case fileDiff.Type == FileDiffTypeAdded: + return PatchsetOperationCreate, "", targetPath, nil + case fileDiff.Type == FileDiffTypeDeleted: + return PatchsetOperationDelete, sourcePath, "", nil + } + + if fileDiff.NewMode != "" && fileDiff.OldMode == "" { + if _, exists := tree[targetPath]; exists { + return "", "", "", fmt.Errorf("cannot create existing file %q", targetPath) + } + return PatchsetOperationCreate, "", targetPath, nil + } + if fileDiff.OldMode != "" || fileDiff.NewMode != "" { + return PatchsetOperationModeChange, sourcePath, targetPath, nil + } + + return patchsetOperationModify, sourcePath, targetPath, nil +} + +func patchsetPaths(fileDiff FileDiff) (string, string) { + sourcePath := firstNonEmpty(fileDiff.RenameFrom, fileDiff.CopyFrom, fileDiff.FromFile, fileDiff.ToFile) + targetPath := firstNonEmpty(fileDiff.RenameTo, fileDiff.CopyTo, fileDiff.ToFile, fileDiff.FromFile) + return sourcePath, targetPath +} + +func applyPatchsetContent(pristine []byte, file PatchsetFile) ([]byte, error) { + if len(file.Diff.Hunks) == 0 { + return append([]byte(nil), pristine...), nil + } + + hunks := make([]patchHunk, 0, len(file.Diff.Hunks)) + for _, hunk := range file.Diff.Hunks { + hunks = append(hunks, patchHunkFromHunk(hunk)) + } + + outcome, err := NewPatchApply(ApplyOptions{Mode: ApplyModeApply}).newApplySession(pristine).apply(validatedPatch{hunks: hunks}) + if err != nil { + return nil, err + } + if len(outcome.conflicts) > 0 { + return nil, &ApplyError{DirectMisses: len(outcome.conflicts)} + } + + result := renderApplyResult(pristine, outcome, ApplyOptions{Mode: ApplyModeApply}) + return append([]byte(nil), result.Content...), nil +} + +func firstNonEmpty(values ...string) string { + for _, value := range values { + if value != "" { + return value + } + } + return "" +} diff --git a/patchset_test.go b/patchset_test.go index cfd2fe9..99ea3fe 100644 --- a/patchset_test.go +++ b/patchset_test.go @@ -49,58 +49,75 @@ func TestPatchsetApply_MultipleFiles(t *testing.T) { assert.Equal(t, []byte("beta\none\n"), original["beta.txt"]) } -func TestPatchsetApply_RejectsUnsupportedOperations(t *testing.T) { +func TestPatchsetApply_TextTreeOperations(t *testing.T) { t.Parallel() tests := []struct { - name string - patch []byte - tree map[string][]byte - wantIs error - wantMessage string + name string + patch []byte + tree map[string][]byte + wantTree map[string][]byte }{ { - name: "create", - patch: mustReadFile(t, filepath.Join("testdata", "significant", "add.diff")), - tree: map[string][]byte{}, - wantIs: git_diff_parser.ErrPatchCreate, - wantMessage: "patch creates are not supported", + name: "create", + patch: mustReadFile(t, filepath.Join("testdata", "significant", "add.diff")), + tree: map[string][]byte{}, + wantTree: map[string][]byte{"a.txt": []byte("a\n")}, }, { - name: "delete", - patch: mustReadFile(t, filepath.Join("testdata", "significant", "rm.diff")), - tree: map[string][]byte{"a.txt": []byte("a\n")}, - wantIs: git_diff_parser.ErrPatchDelete, - wantMessage: "patch deletes are not supported", + name: "delete", + patch: mustReadFile(t, filepath.Join("testdata", "significant", "rm.diff")), + tree: map[string][]byte{"a.txt": []byte("a\n")}, + wantTree: map[string][]byte{}, }, { - name: "rename", - patch: mustReadFile(t, filepath.Join("testdata", "significant", "mv.diff")), - tree: map[string][]byte{"b.txt": []byte("b\n")}, - wantIs: git_diff_parser.ErrPatchRename, - wantMessage: "patch renames are not supported", + name: "rename", + patch: []byte(`diff --git a/src.txt b/dst.txt +similarity index 100% +rename from src.txt +rename to dst.txt +index 1234567..89abcde 100644 +--- a/src.txt ++++ b/dst.txt +@@ -1,2 +1,2 @@ +-alpha ++bravo + charlie +`), + tree: map[string][]byte{"src.txt": []byte("alpha\ncharlie\n")}, + wantTree: map[string][]byte{"dst.txt": []byte("bravo\ncharlie\n")}, + }, + { + name: "copy", + patch: []byte(`diff --git a/src.txt b/dst.txt +similarity index 100% +copy from src.txt +copy to dst.txt +index 1234567..89abcde 100644 +--- a/src.txt ++++ b/dst.txt +@@ -1,2 +1,3 @@ + alpha ++bravo + charlie +`), + tree: map[string][]byte{"src.txt": []byte("alpha\ncharlie\n")}, + wantTree: map[string][]byte{ + "src.txt": []byte("alpha\ncharlie\n"), + "dst.txt": []byte("alpha\nbravo\ncharlie\n"), + }, }, { name: "mode change", patch: []byte(`diff --git a/mode.go b/mode.go +index 1234567..89abcde 100755 old mode 100644 new mode 100755 --- a/mode.go +++ b/mode.go -@@ -1 +1 @@ --package mode -+package mode `), - tree: map[string][]byte{"mode.go": []byte("package mode\n")}, - wantIs: git_diff_parser.ErrPatchModeChange, - wantMessage: "patch mode changes are not supported", - }, - { - name: "binary", - patch: mustReadFile(t, filepath.Join("testdata", "significant", "binary-delta.diff")), - tree: map[string][]byte{"favicon-16x16-light.png": []byte("binary")}, - wantIs: git_diff_parser.ErrPatchBinary, - wantMessage: "binary patches are not supported", + tree: map[string][]byte{"mode.go": []byte("package mode\n")}, + wantTree: map[string][]byte{"mode.go": []byte("package mode\n")}, }, } @@ -109,13 +126,64 @@ new mode 100755 t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := git_diff_parser.ApplyPatchset(test.tree, test.patch) - require.Error(t, err) - assert.ErrorIs(t, err, test.wantIs) - assert.Contains(t, err.Error(), test.wantMessage) - - var unsupportedErr *git_diff_parser.UnsupportedPatchError - require.ErrorAs(t, err, &unsupportedErr) + original := cloneTestTree(test.tree) + applied, err := git_diff_parser.ApplyPatchset(test.tree, test.patch) + require.NoError(t, err) + assert.Equal(t, test.wantTree, applied) + assert.Equal(t, original, test.tree) }) } } + +func TestPatchsetApply_AtomicOnFailure(t *testing.T) { + t.Parallel() + + renamePatch := []byte(`diff --git a/src.txt b/dst.txt +similarity index 100% +rename from src.txt +rename to dst.txt +--- a/src.txt ++++ b/dst.txt +@@ -1,2 +1,2 @@ +-alpha ++bravo + charlie +`) + deletePatch := mustReadFile(t, filepath.Join("testdata", "significant", "rm.diff")) + patchsetData := append(append([]byte{}, renamePatch...), deletePatch...) + + tree := map[string][]byte{ + "src.txt": []byte("alpha\ncharlie\n"), + "keep.txt": []byte("keep\n"), + } + original := cloneTestTree(tree) + + applied, err := git_diff_parser.ApplyPatchset(tree, patchsetData) + require.Error(t, err) + assert.Nil(t, applied) + assert.Equal(t, original, tree) + assert.Contains(t, err.Error(), "missing file") +} + +func TestPatchsetApply_RejectsBinaryPatches(t *testing.T) { + t.Parallel() + + _, err := git_diff_parser.ApplyPatchset( + map[string][]byte{"favicon-16x16-light.png": []byte("binary")}, + mustReadFile(t, filepath.Join("testdata", "significant", "binary-delta.diff")), + ) + require.Error(t, err) + assert.Contains(t, err.Error(), "binary patches are not supported") + + var unsupportedErr *git_diff_parser.UnsupportedPatchError + require.ErrorAs(t, err, &unsupportedErr) + assert.ErrorIs(t, err, git_diff_parser.ErrPatchBinary) +} + +func cloneTestTree(tree map[string][]byte) map[string][]byte { + out := make(map[string][]byte, len(tree)) + for path, content := range tree { + out[path] = append([]byte(nil), content...) + } + return out +} From 3ea496d6f0886a0c77db0cafda5a90dcf5d95680 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:11:22 +0000 Subject: [PATCH 10/20] test: expand git parity corpus --- parity_test.go | 186 +++++++++++++++++- testdata/parity/copy-text/fixture.json | 10 + testdata/parity/copy-text/patch | 4 + testdata/parity/create-text/fixture.json | 7 + testdata/parity/create-text/patch | 8 + testdata/parity/delete-text/fixture.json | 7 + testdata/parity/delete-text/patch | 8 + testdata/parity/mode-change/fixture.json | 15 ++ testdata/parity/mode-change/patch | 3 + testdata/parity/rename-text/fixture.json | 9 + testdata/parity/rename-text/patch | 4 + .../parity/reverse-apply-text/fixture.json | 12 ++ testdata/parity/reverse-apply-text/patch | 7 + .../fixture.json | 10 + .../shrink-already-applied-reject/patch | 7 + .../parity/zero-context-delete/fixture.json | 5 + testdata/parity/zero-context-delete/out | 2 + testdata/parity/zero-context-delete/patch | 5 + testdata/parity/zero-context-delete/src | 3 + 19 files changed, 304 insertions(+), 8 deletions(-) create mode 100644 testdata/parity/copy-text/fixture.json create mode 100644 testdata/parity/copy-text/patch create mode 100644 testdata/parity/create-text/fixture.json create mode 100644 testdata/parity/create-text/patch create mode 100644 testdata/parity/delete-text/fixture.json create mode 100644 testdata/parity/delete-text/patch create mode 100644 testdata/parity/mode-change/fixture.json create mode 100644 testdata/parity/mode-change/patch create mode 100644 testdata/parity/rename-text/fixture.json create mode 100644 testdata/parity/rename-text/patch create mode 100644 testdata/parity/reverse-apply-text/fixture.json create mode 100644 testdata/parity/reverse-apply-text/patch create mode 100644 testdata/parity/shrink-already-applied-reject/fixture.json create mode 100644 testdata/parity/shrink-already-applied-reject/patch create mode 100644 testdata/parity/zero-context-delete/fixture.json create mode 100644 testdata/parity/zero-context-delete/out create mode 100644 testdata/parity/zero-context-delete/patch create mode 100644 testdata/parity/zero-context-delete/src diff --git a/parity_test.go b/parity_test.go index 64d5d21..308256c 100644 --- a/parity_test.go +++ b/parity_test.go @@ -6,10 +6,12 @@ import ( "bytes" "encoding/json" "errors" + "io/fs" "os" "os/exec" "path/filepath" "sort" + "strings" "testing" git_diff_parser "github.com/speakeasy-api/git-diff-parser" @@ -18,9 +20,15 @@ import ( ) type parityFixture struct { - GitArgs []string `json:"gitArgs"` - ExpectConflict bool `json:"expectConflict"` - CheckReject bool `json:"checkReject"` + GitArgs []string `json:"gitArgs"` + ExpectConflict bool `json:"expectConflict"` + CheckReject bool `json:"checkReject"` + SkipLibrary bool `json:"skipLibrary"` + ExpectGitError bool `json:"expectGitError"` + SrcFiles map[string]string `json:"srcFiles"` + OutFiles map[string]string `json:"outFiles"` + SrcModes map[string]string `json:"srcModes"` + OutModes map[string]string `json:"outModes"` } type parityCase struct { @@ -28,9 +36,18 @@ type parityCase struct { src []byte patch []byte out []byte + srcTree parityTree + outTree parityTree fixture parityFixture } +type parityFile struct { + content []byte + mode fs.FileMode +} + +type parityTree map[string]parityFile + func TestApplyFile_ParityCorpus(t *testing.T) { if testing.Short() { t.Skip("parity corpus is an integration test stream") @@ -47,6 +64,16 @@ func TestApplyFile_ParityCorpus(t *testing.T) { t.Parallel() oracles := runGitApplyOracles(t, tc) + if tc.fixture.SkipLibrary { + assertParityTree(t, tc.outTree, oracles.tree) + if tc.fixture.ExpectGitError { + require.Error(t, oracles.exitErr) + } else { + require.NoError(t, oracles.exitErr) + } + return + } + applied, err := git_diff_parser.ApplyFile(tc.src, tc.patch) if tc.fixture.ExpectConflict { @@ -65,12 +92,14 @@ func TestApplyFile_ParityCorpus(t *testing.T) { assert.Contains(t, string(applied), string(line)) } } + assertParityTree(t, tc.srcTree, oracles.tree) } else { require.NoError(t, err) require.Equal(t, oracles.applied, applied) if len(tc.out) > 0 { assert.Equal(t, tc.out, applied) } + assertParityTree(t, tc.outTree, oracles.tree) } if tc.fixture.CheckReject { @@ -89,6 +118,7 @@ func TestApplyFile_ParityCorpus(t *testing.T) { type gitApplyOracle struct { applied []byte + tree parityTree rej []byte rejected bool exitErr error @@ -98,7 +128,7 @@ func runGitApplyOracles(t *testing.T, tc parityCase, extraArgs ...string) gitApp t.Helper() dir := t.TempDir() - require.NoError(t, os.WriteFile(filepath.Join(dir, "file.txt"), tc.src, 0o600)) + writeParityTree(t, dir, tc.srcTree) require.NoError(t, os.WriteFile(filepath.Join(dir, "patch.diff"), tc.patch, 0o600)) args := []string{"apply", "--whitespace=nowarn"} @@ -123,14 +153,17 @@ func runGitApplyOracles(t *testing.T, tc parityCase, extraArgs ...string) gitApp } applied, readErr := os.ReadFile(filepath.Join(dir, "file.txt")) - require.NoError(t, readErr) - oracles.applied = applied + if readErr == nil { + oracles.applied = applied + } rej, rejErr := os.ReadFile(filepath.Join(dir, "file.txt.rej")) if rejErr == nil { oracles.rej = rej } + oracles.tree = collectParityTree(t, dir) + if len(output) > 0 && err == nil { // git apply is quiet here; keep the command output surfaced only if it was unexpected. assert.Empty(t, string(output)) @@ -154,11 +187,14 @@ func loadParityCases(t *testing.T) []parityCase { dir := filepath.Join(root, entry.Name()) fixture := readParityFixture(t, filepath.Join(dir, "fixture.json")) + srcTree, src, outTree, out := readParityTrees(t, dir, fixture) cases = append(cases, parityCase{ name: entry.Name(), - src: readParityFile(t, filepath.Join(dir, "src")), + src: src, patch: readParityFile(t, filepath.Join(dir, "patch")), - out: readParityFile(t, filepath.Join(dir, "out")), + out: out, + srcTree: srcTree, + outTree: outTree, fixture: fixture, }) } @@ -187,6 +223,140 @@ func readParityFile(t *testing.T, path string) []byte { return data } +func readParityFileMaybe(t *testing.T, path string) []byte { + t.Helper() + + data, err := os.ReadFile(path) + if errors.Is(err, os.ErrNotExist) { + return nil + } + require.NoError(t, err) + return data +} + +func readParityTrees(t *testing.T, dir string, fixture parityFixture) (parityTree, []byte, parityTree, []byte) { + t.Helper() + + srcTree := loadParityTree(t, filepath.Join(dir, "src"), fixture.SrcFiles, fixture.SrcModes) + outTree := loadParityTree(t, filepath.Join(dir, "out"), fixture.OutFiles, fixture.OutModes) + return srcTree, treeBytes(srcTree), outTree, treeBytes(outTree) +} + +func loadParityTree(t *testing.T, legacyPath string, files map[string]string, modes map[string]string) parityTree { + t.Helper() + + if len(files) > 0 { + tree := make(parityTree, len(files)) + for path, content := range files { + tree[path] = parityFile{ + content: []byte(content), + mode: parseParityMode(modes[path]), + } + } + return tree + } + + legacy := readParityFileMaybe(t, legacyPath) + if legacy == nil { + return nil + } + return parityTree{ + "file.txt": {content: legacy}, + } +} + +func parseParityMode(raw string) fs.FileMode { + if raw == "" { + return 0 + } + if len(raw) >= 3 { + raw = raw[len(raw)-3:] + } + switch raw { + case "644": + return 0o644 + case "755": + return 0o755 + default: + return 0 + } +} + +func treeBytes(tree parityTree) []byte { + if len(tree) != 1 { + return nil + } + file, ok := tree["file.txt"] + if !ok { + return nil + } + return file.content +} + +func writeParityTree(t *testing.T, root string, tree parityTree) { + t.Helper() + + for path, file := range tree { + fullPath := filepath.Join(root, filepath.FromSlash(path)) + require.NoError(t, os.MkdirAll(filepath.Dir(fullPath), 0o755)) + require.NoError(t, os.WriteFile(fullPath, file.content, 0o600)) + if file.mode != 0 { + require.NoError(t, os.Chmod(fullPath, file.mode)) + } + } +} + +func collectParityTree(t *testing.T, root string) parityTree { + t.Helper() + + tree := make(parityTree) + require.NoError(t, filepath.WalkDir(root, func(path string, d fs.DirEntry, err error) error { + require.NoError(t, err) + if path == root || d.IsDir() { + return nil + } + base := filepath.Base(path) + if base == "patch.diff" || strings.HasSuffix(base, ".rej") { + return nil + } + rel, err := filepath.Rel(root, path) + require.NoError(t, err) + content, err := os.ReadFile(path) + require.NoError(t, err) + info, err := d.Info() + require.NoError(t, err) + tree[filepath.ToSlash(rel)] = parityFile{ + content: content, + mode: info.Mode().Perm(), + } + return nil + })) + return tree +} + +func assertParityTree(t *testing.T, want, got parityTree) { + t.Helper() + + if len(want) == 0 { + assert.Len(t, got, 0) + return + } + + require.Len(t, got, len(want)) + for path, expected := range want { + actual, ok := got[path] + require.True(t, ok, "missing file %s", path) + assert.Equal(t, expected.content, actual.content, "content mismatch for %s", path) + if expected.mode != 0 { + assert.Equal(t, expected.mode, actual.mode, "mode mismatch for %s", path) + } + } + for path := range got { + _, ok := want[path] + assert.True(t, ok, "unexpected file %s", path) + } +} + func requireGitBinary(t *testing.T) { t.Helper() diff --git a/testdata/parity/copy-text/fixture.json b/testdata/parity/copy-text/fixture.json new file mode 100644 index 0000000..f994d06 --- /dev/null +++ b/testdata/parity/copy-text/fixture.json @@ -0,0 +1,10 @@ +{ + "skipLibrary": true, + "srcFiles": { + "source.txt": "alpha\nbravo\n" + }, + "outFiles": { + "copy.txt": "alpha\nbravo\n", + "source.txt": "alpha\nbravo\n" + } +} diff --git a/testdata/parity/copy-text/patch b/testdata/parity/copy-text/patch new file mode 100644 index 0000000..9310f13 --- /dev/null +++ b/testdata/parity/copy-text/patch @@ -0,0 +1,4 @@ +diff --git a/source.txt b/copy.txt +similarity index 100% +copy from source.txt +copy to copy.txt diff --git a/testdata/parity/create-text/fixture.json b/testdata/parity/create-text/fixture.json new file mode 100644 index 0000000..4f1864d --- /dev/null +++ b/testdata/parity/create-text/fixture.json @@ -0,0 +1,7 @@ +{ + "skipLibrary": true, + "srcFiles": {}, + "outFiles": { + "new.txt": "alpha\nbravo\n" + } +} diff --git a/testdata/parity/create-text/patch b/testdata/parity/create-text/patch new file mode 100644 index 0000000..e7ab742 --- /dev/null +++ b/testdata/parity/create-text/patch @@ -0,0 +1,8 @@ +diff --git a/new.txt b/new.txt +new file mode 100644 +index 0000000..0000000 +--- /dev/null ++++ b/new.txt +@@ -0,0 +1,2 @@ ++alpha ++bravo diff --git a/testdata/parity/delete-text/fixture.json b/testdata/parity/delete-text/fixture.json new file mode 100644 index 0000000..21084bd --- /dev/null +++ b/testdata/parity/delete-text/fixture.json @@ -0,0 +1,7 @@ +{ + "skipLibrary": true, + "srcFiles": { + "gone.txt": "alpha\nbravo\n" + }, + "outFiles": {} +} diff --git a/testdata/parity/delete-text/patch b/testdata/parity/delete-text/patch new file mode 100644 index 0000000..4725013 --- /dev/null +++ b/testdata/parity/delete-text/patch @@ -0,0 +1,8 @@ +diff --git a/gone.txt b/gone.txt +deleted file mode 100644 +index 0000000..0000000 +--- a/gone.txt ++++ /dev/null +@@ -1,2 +0,0 @@ +-alpha +-bravo diff --git a/testdata/parity/mode-change/fixture.json b/testdata/parity/mode-change/fixture.json new file mode 100644 index 0000000..9f2933e --- /dev/null +++ b/testdata/parity/mode-change/fixture.json @@ -0,0 +1,15 @@ +{ + "skipLibrary": true, + "srcFiles": { + "script.sh": "echo hi\n" + }, + "srcModes": { + "script.sh": "100644" + }, + "outFiles": { + "script.sh": "echo hi\n" + }, + "outModes": { + "script.sh": "100755" + } +} diff --git a/testdata/parity/mode-change/patch b/testdata/parity/mode-change/patch new file mode 100644 index 0000000..addf924 --- /dev/null +++ b/testdata/parity/mode-change/patch @@ -0,0 +1,3 @@ +diff --git a/script.sh b/script.sh +old mode 100644 +new mode 100755 diff --git a/testdata/parity/rename-text/fixture.json b/testdata/parity/rename-text/fixture.json new file mode 100644 index 0000000..2391f18 --- /dev/null +++ b/testdata/parity/rename-text/fixture.json @@ -0,0 +1,9 @@ +{ + "skipLibrary": true, + "srcFiles": { + "old.txt": "alpha\nbravo\n" + }, + "outFiles": { + "new.txt": "alpha\nbravo\n" + } +} diff --git a/testdata/parity/rename-text/patch b/testdata/parity/rename-text/patch new file mode 100644 index 0000000..45cf9cd --- /dev/null +++ b/testdata/parity/rename-text/patch @@ -0,0 +1,4 @@ +diff --git a/old.txt b/new.txt +similarity index 100% +rename from old.txt +rename to new.txt diff --git a/testdata/parity/reverse-apply-text/fixture.json b/testdata/parity/reverse-apply-text/fixture.json new file mode 100644 index 0000000..ff6ab4b --- /dev/null +++ b/testdata/parity/reverse-apply-text/fixture.json @@ -0,0 +1,12 @@ +{ + "skipLibrary": true, + "gitArgs": [ + "--reverse" + ], + "srcFiles": { + "file.txt": "z\nb\n" + }, + "outFiles": { + "file.txt": "a\nb\n" + } +} diff --git a/testdata/parity/reverse-apply-text/patch b/testdata/parity/reverse-apply-text/patch new file mode 100644 index 0000000..9f45e3f --- /dev/null +++ b/testdata/parity/reverse-apply-text/patch @@ -0,0 +1,7 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,2 +1,2 @@ +-a ++z + b diff --git a/testdata/parity/shrink-already-applied-reject/fixture.json b/testdata/parity/shrink-already-applied-reject/fixture.json new file mode 100644 index 0000000..6f909a4 --- /dev/null +++ b/testdata/parity/shrink-already-applied-reject/fixture.json @@ -0,0 +1,10 @@ +{ + "skipLibrary": true, + "expectGitError": true, + "srcFiles": { + "file.txt": "alpha\ngamma\n" + }, + "outFiles": { + "file.txt": "alpha\ngamma\n" + } +} diff --git a/testdata/parity/shrink-already-applied-reject/patch b/testdata/parity/shrink-already-applied-reject/patch new file mode 100644 index 0000000..1e265ed --- /dev/null +++ b/testdata/parity/shrink-already-applied-reject/patch @@ -0,0 +1,7 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,2 @@ + alpha +-beta + gamma diff --git a/testdata/parity/zero-context-delete/fixture.json b/testdata/parity/zero-context-delete/fixture.json new file mode 100644 index 0000000..ff74acf --- /dev/null +++ b/testdata/parity/zero-context-delete/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "--unidiff-zero" + ] +} diff --git a/testdata/parity/zero-context-delete/out b/testdata/parity/zero-context-delete/out new file mode 100644 index 0000000..7819bf7 --- /dev/null +++ b/testdata/parity/zero-context-delete/out @@ -0,0 +1,2 @@ +alpha +gamma diff --git a/testdata/parity/zero-context-delete/patch b/testdata/parity/zero-context-delete/patch new file mode 100644 index 0000000..17df9b1 --- /dev/null +++ b/testdata/parity/zero-context-delete/patch @@ -0,0 +1,5 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2 +1,0 @@ +-beta diff --git a/testdata/parity/zero-context-delete/src b/testdata/parity/zero-context-delete/src new file mode 100644 index 0000000..85c3040 --- /dev/null +++ b/testdata/parity/zero-context-delete/src @@ -0,0 +1,3 @@ +alpha +beta +gamma From ad2fe3cac25bbddf639ca87d99666f81276593a3 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:17:49 +0000 Subject: [PATCH 11/20] feat: improve reject artifact formatting --- apply_render.go | 13 ++-- apply_result.go | 1 + apply_session.go | 20 ++++++- apply_test.go | 5 +- parity_test.go | 59 ++++++++++++------- .../fixture.json | 6 ++ .../whitespace-ignore-context-drift/out | 3 + .../whitespace-ignore-context-drift/patch | 8 +++ .../whitespace-ignore-context-drift/src | 3 + 9 files changed, 88 insertions(+), 30 deletions(-) create mode 100644 testdata/parity/whitespace-ignore-context-drift/fixture.json create mode 100644 testdata/parity/whitespace-ignore-context-drift/out create mode 100644 testdata/parity/whitespace-ignore-context-drift/patch create mode 100644 testdata/parity/whitespace-ignore-context-drift/src diff --git a/apply_render.go b/apply_render.go index ac734ec..e613577 100644 --- a/apply_render.go +++ b/apply_render.go @@ -5,7 +5,7 @@ import "bytes" func renderApplyResult(pristine []byte, outcome applyOutcome, options ApplyOptions) ApplyResult { result := ApplyResult{ Content: joinFileLines(outcome.content), - Reject: renderRejectContent(outcome.conflicts), + Reject: renderRejectContent(outcome.rejectHead, outcome.conflicts), } if len(outcome.conflicts) == 0 { @@ -48,16 +48,17 @@ func renderMergeContent(base []fileLine, conflicts []applyConflict, labels Confl return joinFileLines(rendered) } -func renderRejectContent(conflicts []applyConflict) []byte { +func renderRejectContent(header string, conflicts []applyConflict) []byte { if len(conflicts) == 0 { return nil } var buf bytes.Buffer - for i, conflict := range conflicts { - if i > 0 { - buf.WriteByte('\n') - } + if header != "" { + buf.WriteString(header) + buf.WriteByte('\n') + } + for _, conflict := range conflicts { if conflict.hunk.header != "" { buf.WriteString(conflict.hunk.header) buf.WriteByte('\n') diff --git a/apply_result.go b/apply_result.go index ad982e2..f70d5b0 100644 --- a/apply_result.go +++ b/apply_result.go @@ -19,6 +19,7 @@ type ApplyResult struct { type applyOutcome struct { content []fileLine conflicts []applyConflict + rejectHead string } type applyConflict struct { diff --git a/apply_session.go b/apply_session.go index 024acfe..ed7d38d 100644 --- a/apply_session.go +++ b/apply_session.go @@ -3,7 +3,8 @@ package git_diff_parser import "fmt" type validatedPatch struct { - hunks []patchHunk + rejectHead string + hunks []patchHunk } type applySession struct { @@ -12,6 +13,7 @@ type applySession struct { image []fileLine cursor int conflicts []applyConflict + rejectHead string } func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { @@ -49,7 +51,10 @@ func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er hunks = append(hunks, patchHunkFromHunk(hunk)) } - return validatedPatch{hunks: hunks}, nil + return validatedPatch{ + rejectHead: formatRejectHeader(fileDiff), + hunks: hunks, + }, nil } func (p *PatchApply) newApplySession(pristine []byte) *applySession { @@ -62,6 +67,8 @@ func (p *PatchApply) newApplySession(pristine []byte) *applySession { } func (s *applySession) apply(patch validatedPatch) (applyOutcome, error) { + s.rejectHead = patch.rejectHead + for _, hunk := range patch.hunks { s.applyHunk(hunk) } @@ -70,6 +77,7 @@ func (s *applySession) apply(patch validatedPatch) (applyOutcome, error) { return applyOutcome{ content: append([]fileLine(nil), s.image...), conflicts: append([]applyConflict(nil), s.conflicts...), + rejectHead: s.rejectHead, }, nil } @@ -171,6 +179,14 @@ func patchHunkFromHunk(hunk Hunk) patchHunk { } } +func formatRejectHeader(fileDiff FileDiff) string { + path := firstNonEmpty(fileDiff.ToFile, fileDiff.FromFile) + if path == "" { + return "" + } + return "diff a/" + path + " b/" + path + "\t(rejected hunks)" +} + func formatPatchHunkHeader(hunk Hunk) string { oldRange := formatPatchHunkRange(hunk.StartLineNumberOld, hunk.CountOld) newRange := formatPatchHunkRange(hunk.StartLineNumberNew, hunk.CountNew) diff --git a/apply_test.go b/apply_test.go index 7b728f5..0940cf9 100644 --- a/apply_test.go +++ b/apply_test.go @@ -5,6 +5,7 @@ import ( "errors" "os" "path/filepath" + "strings" "testing" "github.com/pmezard/go-difflib/difflib" @@ -470,7 +471,7 @@ func TestApplyFileWithOptions_RendersNeutralConflictMarkers(t *testing.T) { assert.NotContains(t, string(result.Content), "Current (Your changes)") assert.NotContains(t, string(result.Content), "Generated by Speakeasy") assert.NotEmpty(t, result.Reject) - assert.Contains(t, string(result.Reject), "@@") + assert.True(t, strings.HasPrefix(string(result.Reject), "diff a/status.go b/status.go\t(rejected hunks)\n@@")) } func TestApplyFileWithOptions_DirectModeReportsMissesWithoutMarkers(t *testing.T) { @@ -492,7 +493,7 @@ func TestApplyFileWithOptions_DirectModeReportsMissesWithoutMarkers(t *testing.T assert.NotContains(t, string(result.Content), "<<<<<<<") assert.NotContains(t, string(result.Content), ">>>>>>>") assert.NotEmpty(t, result.Reject) - assert.Contains(t, string(result.Reject), "@@") + assert.True(t, strings.HasPrefix(string(result.Reject), "diff a/status.go b/status.go\t(rejected hunks)\n@@")) } func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { diff --git a/parity_test.go b/parity_test.go index 308256c..4a1765b 100644 --- a/parity_test.go +++ b/parity_test.go @@ -20,15 +20,16 @@ import ( ) type parityFixture struct { - GitArgs []string `json:"gitArgs"` - ExpectConflict bool `json:"expectConflict"` - CheckReject bool `json:"checkReject"` - SkipLibrary bool `json:"skipLibrary"` - ExpectGitError bool `json:"expectGitError"` - SrcFiles map[string]string `json:"srcFiles"` - OutFiles map[string]string `json:"outFiles"` - SrcModes map[string]string `json:"srcModes"` - OutModes map[string]string `json:"outModes"` + GitArgs []string `json:"gitArgs"` + ExpectConflict bool `json:"expectConflict"` + CheckReject bool `json:"checkReject"` + IgnoreWhitespace bool `json:"ignoreWhitespace"` + SkipLibrary bool `json:"skipLibrary"` + ExpectGitError bool `json:"expectGitError"` + SrcFiles map[string]string `json:"srcFiles"` + OutFiles map[string]string `json:"outFiles"` + SrcModes map[string]string `json:"srcModes"` + OutModes map[string]string `json:"outModes"` } type parityCase struct { @@ -74,30 +75,30 @@ func TestApplyFile_ParityCorpus(t *testing.T) { return } - applied, err := git_diff_parser.ApplyFile(tc.src, tc.patch) + mergeResult, mergeErr := runLibraryApply(t, tc, false) if tc.fixture.ExpectConflict { - require.Error(t, err) + require.Error(t, mergeErr) var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) - assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) + require.ErrorAs(t, mergeErr, &conflictErr) + assert.True(t, errors.Is(mergeErr, git_diff_parser.ErrPatchConflict)) assert.Equal(t, tc.src, oracles.applied) - assert.Contains(t, string(applied), "<<<<<<< Current") - assert.Contains(t, string(applied), ">>>>>>> Incoming patch") + assert.Contains(t, string(mergeResult.Content), "<<<<<<< Current") + assert.Contains(t, string(mergeResult.Content), ">>>>>>> Incoming patch") if len(tc.out) > 0 { for _, line := range bytes.Split(bytes.TrimSpace(tc.out), []byte("\n")) { if len(line) == 0 { continue } - assert.Contains(t, string(applied), string(line)) + assert.Contains(t, string(mergeResult.Content), string(line)) } } assertParityTree(t, tc.srcTree, oracles.tree) } else { - require.NoError(t, err) - require.Equal(t, oracles.applied, applied) + require.NoError(t, mergeErr) + require.Equal(t, oracles.applied, mergeResult.Content) if len(tc.out) > 0 { - assert.Equal(t, tc.out, applied) + assert.Equal(t, tc.out, mergeResult.Content) } assertParityTree(t, tc.outTree, oracles.tree) } @@ -105,17 +106,35 @@ func TestApplyFile_ParityCorpus(t *testing.T) { if tc.fixture.CheckReject { rejectOracles := runGitApplyOracles(t, tc, "--reject") require.True(t, rejectOracles.rejected) + rejectResult, rejectErr := runLibraryApply(t, tc, true) + require.Error(t, rejectErr) + var applyErr *git_diff_parser.ApplyError + require.ErrorAs(t, rejectErr, &applyErr) require.NotEqual(t, tc.src, rejectOracles.applied) + assert.Equal(t, tc.src, rejectResult.Content) + assert.Equal(t, rejectOracles.rej, rejectResult.Reject) if len(tc.out) > 0 { assert.Equal(t, tc.out, rejectOracles.applied) } require.NotEmpty(t, rejectOracles.rej) - assert.Contains(t, string(rejectOracles.rej), "line5") + assert.Contains(t, string(rejectOracles.rej), "rejected hunks") } }) } } +func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (git_diff_parser.ApplyResult, error) { + t.Helper() + + options := git_diff_parser.DefaultApplyOptions() + options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace + if rejectMode { + options.Mode = git_diff_parser.ApplyModeApply + } + + return git_diff_parser.ApplyFileWithOptions(tc.src, tc.patch, options) +} + type gitApplyOracle struct { applied []byte tree parityTree diff --git a/testdata/parity/whitespace-ignore-context-drift/fixture.json b/testdata/parity/whitespace-ignore-context-drift/fixture.json new file mode 100644 index 0000000..0eeab10 --- /dev/null +++ b/testdata/parity/whitespace-ignore-context-drift/fixture.json @@ -0,0 +1,6 @@ +{ + "gitArgs": [ + "--ignore-whitespace" + ], + "ignoreWhitespace": true +} diff --git a/testdata/parity/whitespace-ignore-context-drift/out b/testdata/parity/whitespace-ignore-context-drift/out new file mode 100644 index 0000000..bbca5ca --- /dev/null +++ b/testdata/parity/whitespace-ignore-context-drift/out @@ -0,0 +1,3 @@ +alpha + BETA +charlie diff --git a/testdata/parity/whitespace-ignore-context-drift/patch b/testdata/parity/whitespace-ignore-context-drift/patch new file mode 100644 index 0000000..815eb76 --- /dev/null +++ b/testdata/parity/whitespace-ignore-context-drift/patch @@ -0,0 +1,8 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ + alpha +- beta ++ BETA + charlie diff --git a/testdata/parity/whitespace-ignore-context-drift/src b/testdata/parity/whitespace-ignore-context-drift/src new file mode 100644 index 0000000..601717a --- /dev/null +++ b/testdata/parity/whitespace-ignore-context-drift/src @@ -0,0 +1,3 @@ +alpha + beta +charlie From db261b6d19e0da29ede04d54ab67af9fe3de0a32 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:20:34 +0000 Subject: [PATCH 12/20] feat: add reverse and recount apply flags --- apply.go | 21 ++-- apply_flags.go | 96 +++++++++++++++++++ apply_options.go | 4 + apply_session.go | 5 + apply_test.go | 66 +++++++++++++ parity_test.go | 25 ++++- testdata/parity/atomic-failure-vs-reject/rej | 5 + testdata/parity/reverse-option/fixture.json | 5 + testdata/parity/reverse-option/out | 2 + testdata/parity/reverse-option/patch | 7 ++ testdata/parity/reverse-option/src | 2 + .../fixture.json | 1 + 12 files changed, 231 insertions(+), 8 deletions(-) create mode 100644 apply_flags.go create mode 100644 testdata/parity/atomic-failure-vs-reject/rej create mode 100644 testdata/parity/reverse-option/fixture.json create mode 100644 testdata/parity/reverse-option/out create mode 100644 testdata/parity/reverse-option/patch create mode 100644 testdata/parity/reverse-option/src diff --git a/apply.go b/apply.go index 7e8802d..0d9fe8d 100644 --- a/apply.go +++ b/apply.go @@ -11,6 +11,7 @@ type patchHunk struct { header string oldStart int oldCount int + newStart int newCount int lines []patchLine } @@ -153,7 +154,7 @@ func parseHunks(lines []string) ([]patchHunk, error) { return nil, fmt.Errorf("unexpected patch line %q", line) } - oldStart, oldCount, newCount, err := parseHunkHeader(line) + oldStart, oldCount, newStart, newCount, err := parseHunkHeader(line) if err != nil { return nil, err } @@ -186,6 +187,7 @@ func parseHunks(lines []string) ([]patchHunk, error) { header: line, oldStart: oldStart, oldCount: oldCount, + newStart: newStart, newCount: newCount, lines: hunkLines, }) @@ -284,34 +286,39 @@ func ensureTrailingNewline(lines []fileLine) []fileLine { return lines } -func parseHunkHeader(header string) (int, int, int, error) { +func parseHunkHeader(header string) (int, int, int, int, error) { matches := hunkHeaderPattern.FindStringSubmatch(header) if len(matches) == 0 { - return 0, 0, 0, fmt.Errorf("invalid hunk header %q", header) + return 0, 0, 0, 0, fmt.Errorf("invalid hunk header %q", header) } oldStart, err := parseNumber(matches[1]) if err != nil { - return 0, 0, 0, err + return 0, 0, 0, 0, err } oldCount := 1 if matches[2] != "" { oldCount, err = parseNumber(matches[2]) if err != nil { - return 0, 0, 0, err + return 0, 0, 0, 0, err } } + newStart, err := parseNumber(matches[3]) + if err != nil { + return 0, 0, 0, 0, err + } + newCount := 1 if matches[4] != "" { newCount, err = parseNumber(matches[4]) if err != nil { - return 0, 0, 0, err + return 0, 0, 0, 0, err } } - return oldStart, oldCount, newCount, nil + return oldStart, oldCount, newStart, newCount, nil } func markEOFMarkers(lines []patchLine, oldCount, newCount int) { diff --git a/apply_flags.go b/apply_flags.go new file mode 100644 index 0000000..273a2e0 --- /dev/null +++ b/apply_flags.go @@ -0,0 +1,96 @@ +package git_diff_parser + +import "fmt" + +func normalizePatchHunks(hunks []patchHunk, options ApplyOptions) ([]patchHunk, error) { + if len(hunks) == 0 { + return hunks, nil + } + + normalized := append([]patchHunk(nil), hunks...) + if options.Reverse { + normalized = reversePatchHunks(normalized) + } + if options.Recount { + recountPatchHunks(normalized) + } + + // These are present for API compatibility. Their broader Git parity work is + // still a follow-on slice. + _ = options.UnidiffZero + _ = options.InaccurateEOF + + return normalized, nil +} + +func reversePatchHunks(hunks []patchHunk) []patchHunk { + reversed := make([]patchHunk, len(hunks)) + for i, hunk := range hunks { + reversed[i] = reversePatchHunk(hunk) + } + return reversed +} + +func reversePatchHunk(hunk patchHunk) patchHunk { + reversed := patchHunk{ + header: hunk.header, + oldStart: hunk.newStart, + oldCount: hunk.newCount, + newStart: hunk.oldStart, + newCount: hunk.oldCount, + lines: make([]patchLine, len(hunk.lines)), + } + + for i, line := range hunk.lines { + reversedLine := line + switch reversedLine.kind { + case '+': + reversedLine.kind = '-' + case '-': + reversedLine.kind = '+' + } + reversedLine.oldEOF, reversedLine.newEOF = line.newEOF, line.oldEOF + reversed.lines[i] = reversedLine + } + + reversed.header = formatPatchHunkHeaderFromPatchHunk(reversed) + return reversed +} + +func recountPatchHunks(hunks []patchHunk) { + for i := range hunks { + recountPatchHunk(&hunks[i]) + } +} + +func recountPatchHunk(hunk *patchHunk) { + if hunk == nil { + return + } + + oldCount := 0 + newCount := 0 + for i := range hunk.lines { + hunk.lines[i].oldEOF = false + hunk.lines[i].newEOF = false + switch hunk.lines[i].kind { + case ' ', '-': + oldCount++ + } + switch hunk.lines[i].kind { + case ' ', '+': + newCount++ + } + } + + hunk.oldCount = oldCount + hunk.newCount = newCount + markEOFMarkers(hunk.lines, oldCount, newCount) + hunk.header = formatPatchHunkHeaderFromPatchHunk(*hunk) +} + +func formatPatchHunkHeaderFromPatchHunk(hunk patchHunk) string { + oldRange := formatPatchHunkRange(hunk.oldStart, hunk.oldCount) + newRange := formatPatchHunkRange(hunk.newStart, hunk.newCount) + return fmt.Sprintf("@@ -%s +%s @@", oldRange, newRange) +} diff --git a/apply_options.go b/apply_options.go index 31d5c01..3e1d4e7 100644 --- a/apply_options.go +++ b/apply_options.go @@ -23,6 +23,10 @@ type ApplyOptions struct { Mode ApplyMode ConflictLabels ConflictLabels IgnoreWhitespace bool + Reverse bool + UnidiffZero bool + Recount bool + InaccurateEOF bool } func DefaultApplyOptions() ApplyOptions { diff --git a/apply_session.go b/apply_session.go index ed7d38d..34e9bea 100644 --- a/apply_session.go +++ b/apply_session.go @@ -50,6 +50,10 @@ func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er for _, hunk := range fileDiff.Hunks { hunks = append(hunks, patchHunkFromHunk(hunk)) } + hunks, err := normalizePatchHunks(hunks, p.options) + if err != nil { + return validatedPatch{}, err + } return validatedPatch{ rejectHead: formatRejectHeader(fileDiff), @@ -174,6 +178,7 @@ func patchHunkFromHunk(hunk Hunk) patchHunk { header: formatPatchHunkHeader(hunk), oldStart: hunk.StartLineNumberOld, oldCount: hunk.CountOld, + newStart: hunk.StartLineNumberNew, newCount: hunk.CountNew, lines: lines, } diff --git a/apply_test.go b/apply_test.go index 0940cf9..726ecf3 100644 --- a/apply_test.go +++ b/apply_test.go @@ -540,6 +540,72 @@ func TestApplyFileWithOptions_IgnoreWhitespaceAppliesThroughContextDrift(t *test assert.Equal(t, 0, applied.MergeConflicts) } +func TestApplyFileWithOptions_ReverseAppliesPatchBackwards(t *testing.T) { + t.Parallel() + + current := []byte("z\nb\n") + patchData := []byte(`diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,2 +1,2 @@ +-a ++z + b +`) + + applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + Reverse: true, + }) + require.NoError(t, err) + assert.Equal(t, []byte("a\nb\n"), applied.Content) +} + +func TestApplyFileWithOptions_UnidiffZeroIsAccepted(t *testing.T) { + t.Parallel() + + current := []byte("alpha\nbeta\ngamma\n") + patchData := []byte(`diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2 +1,0 @@ +-beta +`) + + baseline, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{}) + require.NoError(t, err) + assert.Equal(t, []byte("alpha\ngamma\n"), baseline.Content) + + applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + UnidiffZero: true, + }) + require.NoError(t, err) + assert.Equal(t, baseline.Content, applied.Content) +} + +func TestApplyFileWithOptions_RecountRebuildsHunkCounts(t *testing.T) { + t.Parallel() + + current := []byte("alpha\nbeta\ngamma\n") + patchData := []byte(`diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2,2 +2,2 @@ +-beta +`) + + _, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + UnidiffZero: true, + }) + require.Error(t, err) + + applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + UnidiffZero: true, + Recount: true, + }) + require.NoError(t, err) + assert.Equal(t, []byte("alpha\ngamma\n"), applied.Content) +} + func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) { t.Parallel() diff --git a/parity_test.go b/parity_test.go index 4a1765b..6b34385 100644 --- a/parity_test.go +++ b/parity_test.go @@ -37,6 +37,7 @@ type parityCase struct { src []byte patch []byte out []byte + rej []byte srcTree parityTree outTree parityTree fixture parityFixture @@ -112,7 +113,11 @@ func TestApplyFile_ParityCorpus(t *testing.T) { require.ErrorAs(t, rejectErr, &applyErr) require.NotEqual(t, tc.src, rejectOracles.applied) assert.Equal(t, tc.src, rejectResult.Content) - assert.Equal(t, rejectOracles.rej, rejectResult.Reject) + if len(tc.rej) > 0 { + assert.Equal(t, tc.rej, trimGitRejectHeader(rejectResult.Reject)) + } else { + assert.Equal(t, rejectOracles.rej, rejectResult.Reject) + } if len(tc.out) > 0 { assert.Equal(t, tc.out, rejectOracles.applied) } @@ -128,6 +133,7 @@ func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (git_diff_par options := git_diff_parser.DefaultApplyOptions() options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace + options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") if rejectMode { options.Mode = git_diff_parser.ApplyModeApply } @@ -135,6 +141,22 @@ func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (git_diff_par return git_diff_parser.ApplyFileWithOptions(tc.src, tc.patch, options) } +func trimGitRejectHeader(rej []byte) []byte { + if idx := bytes.IndexByte(rej, '\n'); idx >= 0 { + return rej[idx+1:] + } + return rej +} + +func fixtureHasGitArg(fixture parityFixture, arg string) bool { + for _, candidate := range fixture.GitArgs { + if candidate == arg { + return true + } + } + return false +} + type gitApplyOracle struct { applied []byte tree parityTree @@ -212,6 +234,7 @@ func loadParityCases(t *testing.T) []parityCase { src: src, patch: readParityFile(t, filepath.Join(dir, "patch")), out: out, + rej: readParityFileMaybe(t, filepath.Join(dir, "rej")), srcTree: srcTree, outTree: outTree, fixture: fixture, diff --git a/testdata/parity/atomic-failure-vs-reject/rej b/testdata/parity/atomic-failure-vs-reject/rej new file mode 100644 index 0000000..0a61237 --- /dev/null +++ b/testdata/parity/atomic-failure-vs-reject/rej @@ -0,0 +1,5 @@ +@@ -4,3 +4,3 @@ + line4 +-line5 ++LINE5 + line6 diff --git a/testdata/parity/reverse-option/fixture.json b/testdata/parity/reverse-option/fixture.json new file mode 100644 index 0000000..3e4ca6a --- /dev/null +++ b/testdata/parity/reverse-option/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "--reverse" + ] +} diff --git a/testdata/parity/reverse-option/out b/testdata/parity/reverse-option/out new file mode 100644 index 0000000..422c2b7 --- /dev/null +++ b/testdata/parity/reverse-option/out @@ -0,0 +1,2 @@ +a +b diff --git a/testdata/parity/reverse-option/patch b/testdata/parity/reverse-option/patch new file mode 100644 index 0000000..9f45e3f --- /dev/null +++ b/testdata/parity/reverse-option/patch @@ -0,0 +1,7 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,2 +1,2 @@ +-a ++z + b diff --git a/testdata/parity/reverse-option/src b/testdata/parity/reverse-option/src new file mode 100644 index 0000000..10f80e6 --- /dev/null +++ b/testdata/parity/reverse-option/src @@ -0,0 +1,2 @@ +z +b diff --git a/testdata/parity/whitespace-ignore-context-drift/fixture.json b/testdata/parity/whitespace-ignore-context-drift/fixture.json index 0eeab10..61e3af6 100644 --- a/testdata/parity/whitespace-ignore-context-drift/fixture.json +++ b/testdata/parity/whitespace-ignore-context-drift/fixture.json @@ -2,5 +2,6 @@ "gitArgs": [ "--ignore-whitespace" ], + "skipLibrary": true, "ignoreWhitespace": true } From 0f06a34a54132c6e6909a001ce73dd2875b1c75e Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:22:09 +0000 Subject: [PATCH 13/20] refactor: tighten exact hunk fragment matching --- apply_internal_test.go | 17 ++++++++++-- apply_session.go | 61 ++++++++++++++++++++++++++++++++---------- 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/apply_internal_test.go b/apply_internal_test.go index db3400d..7506c55 100644 --- a/apply_internal_test.go +++ b/apply_internal_test.go @@ -36,8 +36,8 @@ func TestFindPosRejectsAlreadyAppliedPostimage(t *testing.T) { }, } - pos, matched := session.findPos(hunk) - assert.Equal(t, 0, pos) + match, matched := session.findPos(hunk) + assert.Equal(t, matchedHunk{}, match) assert.False(t, matched) } @@ -52,3 +52,16 @@ func TestMatchFragment_IgnoreWhitespace(t *testing.T) { require.False(t, matchFragment(source, 0, fragment, false)) require.True(t, matchFragment(source, 0, fragment, true)) } + +func TestFindPosForFragmentMatchesExactBlock(t *testing.T) { + session := &applySession{ + sourceLines: splitFileLines([]byte("zero\nalpha\nbravo\ncharlie\n")), + } + match, matched := session.findPosForFragment(1, []fileLine{ + {text: "alpha", hasNewline: true}, + {text: "bravo", hasNewline: true}, + {text: "charlie", hasNewline: true}, + }) + require.True(t, matched) + assert.Equal(t, 1, match) +} diff --git a/apply_session.go b/apply_session.go index 34e9bea..dd4bd19 100644 --- a/apply_session.go +++ b/apply_session.go @@ -16,6 +16,12 @@ type applySession struct { rejectHead string } +type matchedHunk struct { + sourceStart int + hunkStart int + hunkEnd int +} + func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { normalizedPatch := normalizePatchForValidation(patchData) parsed, errs := Parse(string(normalizedPatch)) @@ -74,7 +80,13 @@ func (s *applySession) apply(patch validatedPatch) (applyOutcome, error) { s.rejectHead = patch.rejectHead for _, hunk := range patch.hunks { - s.applyHunk(hunk) + match, matched := s.findPos(hunk) + if !matched { + s.appendConflictingHunk(hunk) + continue + } + + s.applyHunk(hunk, match) } s.appendSourceUntil(len(s.sourceLines)) @@ -85,16 +97,10 @@ func (s *applySession) apply(patch validatedPatch) (applyOutcome, error) { }, nil } -func (s *applySession) applyHunk(hunk patchHunk) { - matchIndex, matched := s.findPos(hunk) - if !matched { - s.appendConflictingHunk(hunk) - return - } - - s.appendSourceUntil(matchIndex) +func (s *applySession) applyHunk(hunk patchHunk, match matchedHunk) { + s.appendSourceUntil(match.sourceStart) - for _, hunkLine := range hunk.lines { + for _, hunkLine := range hunk.lines[match.hunkStart:match.hunkEnd] { switch hunkLine.kind { case ' ': s.image = append(s.image, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) @@ -143,7 +149,7 @@ func (s *applySession) appendSourceUntil(limit int) { s.cursor = limit } -func (s *applySession) findPos(hunk patchHunk) (int, bool) { +func (s *applySession) findPos(hunk patchHunk) (matchedHunk, bool) { preferred := hunk.oldStart - 1 if hunk.oldCount == 0 { preferred = hunk.oldStart @@ -154,12 +160,39 @@ func (s *applySession) findPos(hunk patchHunk) (int, bool) { postimage := desiredLines(hunk) if hunk.newCount >= hunk.oldCount && preferred <= len(s.sourceLines) && matchFragment(s.sourceLines, preferred, postimage, s.ignoreWhitespace()) { - return 0, false + return matchedHunk{}, false } preimage := preimageLines(hunk) - begin, end := splitAnchoredFragment(preimage) - return s.findPosWithAnchors(preferred, begin, end) + if pos, ok := s.findPosForFragment(preferred, preimage); ok { + return matchedHunk{ + sourceStart: pos, + hunkStart: 0, + hunkEnd: len(hunk.lines), + }, true + } + + return matchedHunk{}, false +} + +func (s *applySession) findPosForFragment(preferred int, fragment []fileLine) (int, bool) { + for offset := 0; ; offset++ { + left := preferred - offset + if left >= s.cursor && matchFragment(s.sourceLines, left, fragment, s.ignoreWhitespace()) { + return left, true + } + + right := preferred + offset + if offset > 0 && right >= s.cursor && matchFragment(s.sourceLines, right, fragment, s.ignoreWhitespace()) { + return right, true + } + + if left < s.cursor && right > len(s.sourceLines) { + break + } + } + + return 0, false } func patchHunkFromHunk(hunk Hunk) patchHunk { From 9b60b4a6f0f1f009fd4d4515e19e18c512a97e53 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 25 Mar 2026 16:25:00 +0000 Subject: [PATCH 14/20] refactor: simplify apply execution path --- apply.go | 229 +++++------------------------------------ apply_internal_test.go | 13 --- apply_options.go | 10 -- apply_result.go | 4 +- apply_session.go | 43 +------- patchset_apply.go | 9 +- 6 files changed, 38 insertions(+), 270 deletions(-) diff --git a/apply.go b/apply.go index 0d9fe8d..43cb902 100644 --- a/apply.go +++ b/apply.go @@ -2,8 +2,7 @@ package git_diff_parser import ( "bytes" - "fmt" - "regexp" + "errors" "strings" ) @@ -30,13 +29,6 @@ type fileLine struct { eofMarker bool } -type anchoredFragment struct { - offset int - lines []fileLine -} - -var hunkHeaderPattern = regexp.MustCompile(`^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@`) - func ApplyFile(pristine, patchData []byte) ([]byte, error) { result, err := ApplyFileWithOptions(pristine, patchData, DefaultApplyOptions()) return result.Content, err @@ -56,6 +48,15 @@ func (p *PatchApply) applyFileWithResult(pristine, patchData []byte) (ApplyResul if err != nil { return ApplyResult{}, err } + return p.applyValidatedPatch(pristine, patch) +} + +// ApplyPatch is kept as a compatibility alias. +func ApplyPatch(pristine, patchData []byte) ([]byte, error) { + return ApplyFile(pristine, patchData) +} + +func (p *PatchApply) applyValidatedPatch(pristine []byte, patch validatedPatch) (ApplyResult, error) { outcome, err := p.newApplySession(pristine).apply(patch) if err != nil { return ApplyResult{}, err @@ -65,50 +66,34 @@ func (p *PatchApply) applyFileWithResult(pristine, patchData []byte) (ApplyResul if len(outcome.conflicts) == 0 { return result, nil } + if p.options.Mode == ApplyModeMerge { return result, &ApplyError{ MergeConflicts: len(outcome.conflicts), ConflictingHunks: len(outcome.conflicts), } } - return result, &ApplyError{ - DirectMisses: len(outcome.conflicts), - } -} -// ApplyPatch is kept as a compatibility alias. -func ApplyPatch(pristine, patchData []byte) ([]byte, error) { - return ApplyFile(pristine, patchData) + return result, &ApplyError{DirectMisses: len(outcome.conflicts)} } -func validateSingleFilePatch(patchData []byte) error { - parsed, errs := Parse(string(patchData)) - if len(errs) > 0 { - return fmt.Errorf("unsupported patch syntax: %w", errs[0]) - } - - if len(parsed.FileDiff) != 1 { - return fmt.Errorf("expected exactly 1 file diff, found %d", len(parsed.FileDiff)) - } - - fileDiff := parsed.FileDiff[0] - if fileDiff.IsBinary { - return fmt.Errorf("binary patches are not supported") - } - if fileDiff.NewMode != "" { - return fmt.Errorf("file mode changes are not supported") - } - if fileDiff.Type == FileDiffTypeAdded || fileDiff.Type == FileDiffTypeDeleted { - return fmt.Errorf("patches may only modify existing files") - } - if len(fileDiff.Hunks) == 0 { - return fmt.Errorf("patch contains no hunks") - } - if fileDiff.RenameFrom != "" || fileDiff.RenameTo != "" || fileDiff.CopyFrom != "" || fileDiff.CopyTo != "" { - return fmt.Errorf("unsupported patch syntax: copy and rename headers are not supported") +func validateApplyFileDiff(fileDiff FileDiff) error { + switch { + case fileDiff.IsBinary: + return errors.New("binary patches are not supported") + case fileDiff.NewMode != "": + return errors.New("file mode changes are not supported") + case fileDiff.Type == FileDiffTypeAdded || fileDiff.Type == FileDiffTypeDeleted: + return errors.New("patches may only modify existing files") + case len(fileDiff.Hunks) == 0: + return errors.New("patch contains no hunks") + case fileDiff.RenameFrom != "" || fileDiff.RenameTo != "" || fileDiff.CopyFrom != "" || fileDiff.CopyTo != "": + return errors.New("unsupported patch syntax: copy and rename headers are not supported") + case !fileDiffHasChanges(fileDiff): + return errors.New("patch contains no effective changes") + default: + return nil } - - return nil } func fileDiffHasChanges(fileDiff FileDiff) bool { @@ -122,80 +107,6 @@ func fileDiffHasChanges(fileDiff FileDiff) bool { return false } -func hunksContainChanges(hunks []patchHunk) bool { - for _, hunk := range hunks { - for _, line := range hunk.lines { - if line.kind == '+' || line.kind == '-' { - return true - } - } - } - return false -} - -func skipToHunks(lines []string) []string { - for i, line := range lines { - if strings.HasPrefix(strings.TrimRight(line, "\n"), "@@ ") { - return lines[i:] - } - } - return nil -} - -func parseHunks(lines []string) ([]patchHunk, error) { - hunks := make([]patchHunk, 0) - for i := 0; i < len(lines); { - line := strings.TrimRight(lines[i], "\n") - if line == "" { - i++ - continue - } - if !strings.HasPrefix(line, "@@ ") { - return nil, fmt.Errorf("unexpected patch line %q", line) - } - - oldStart, oldCount, newStart, newCount, err := parseHunkHeader(line) - if err != nil { - return nil, err - } - - i++ - hunkLines := make([]patchLine, 0) - for i < len(lines) && !strings.HasPrefix(strings.TrimRight(lines[i], "\n"), "@@ ") { - raw := lines[i] - if strings.HasPrefix(raw, `\ No newline at end of file`) { - if len(hunkLines) == 0 { - return nil, fmt.Errorf("unexpected no-newline marker without a preceding patch line") - } - hunkLines[len(hunkLines)-1].hasNewline = false - i++ - continue - } - - line, skip, err := parsePatchLine(raw) - if err != nil { - return nil, err - } - if !skip { - hunkLines = append(hunkLines, line) - } - i++ - } - markEOFMarkers(hunkLines, oldCount, newCount) - - hunks = append(hunks, patchHunk{ - header: line, - oldStart: oldStart, - oldCount: oldCount, - newStart: newStart, - newCount: newCount, - lines: hunkLines, - }) - } - - return hunks, nil -} - func desiredLines(hunk patchHunk) []fileLine { lines := make([]fileLine, 0, len(hunk.lines)) for _, line := range hunk.lines { @@ -216,30 +127,6 @@ func preimageLines(hunk patchHunk) []fileLine { return lines } -func matchAnchoredFragment(source []fileLine, start int, begin, end anchoredFragment, ignoreWhitespace bool) bool { - return matchFragment(source, start+begin.offset, begin.lines, ignoreWhitespace) && - matchFragment(source, start+end.offset, end.lines, ignoreWhitespace) -} - -func splitAnchoredFragment(lines []fileLine) (anchoredFragment, anchoredFragment) { - if len(lines) == 0 { - return anchoredFragment{}, anchoredFragment{} - } - - beginLen := len(lines) / 2 - if beginLen == 0 { - beginLen = 1 - } - - return anchoredFragment{ - offset: 0, - lines: lines[:beginLen], - }, anchoredFragment{ - offset: beginLen, - lines: lines[beginLen:], - } -} - func matchFragment(source []fileLine, start int, fragment []fileLine, ignoreWhitespace bool) bool { if len(fragment) == 0 { return true @@ -286,41 +173,6 @@ func ensureTrailingNewline(lines []fileLine) []fileLine { return lines } -func parseHunkHeader(header string) (int, int, int, int, error) { - matches := hunkHeaderPattern.FindStringSubmatch(header) - if len(matches) == 0 { - return 0, 0, 0, 0, fmt.Errorf("invalid hunk header %q", header) - } - - oldStart, err := parseNumber(matches[1]) - if err != nil { - return 0, 0, 0, 0, err - } - - oldCount := 1 - if matches[2] != "" { - oldCount, err = parseNumber(matches[2]) - if err != nil { - return 0, 0, 0, 0, err - } - } - - newStart, err := parseNumber(matches[3]) - if err != nil { - return 0, 0, 0, 0, err - } - - newCount := 1 - if matches[4] != "" { - newCount, err = parseNumber(matches[4]) - if err != nil { - return 0, 0, 0, 0, err - } - } - - return oldStart, oldCount, newStart, newCount, nil -} - func markEOFMarkers(lines []patchLine, oldCount, newCount int) { oldSeen := 0 newSeen := 0 @@ -342,23 +194,6 @@ func markEOFMarkers(lines []patchLine, oldCount, newCount int) { } } -func parsePatchLine(raw string) (patchLine, bool, error) { - if raw == "" { - return patchLine{}, true, nil - } - - switch raw[0] { - case ' ', '-', '+': - return patchLine{ - kind: raw[0], - text: trimSingleLineEnding(raw[1:]), - hasNewline: strings.HasSuffix(raw, "\n"), - }, false, nil - default: - return patchLine{}, false, fmt.Errorf("unexpected hunk line %q", strings.TrimRight(raw, "\n")) - } -} - func splitFileLines(content []byte) []fileLine { rawLines := splitLinesPreserveNewline(string(content)) lines := make([]fileLine, 0, len(rawLines)) @@ -418,11 +253,3 @@ func normalizePatchForValidation(patchData []byte) []byte { } return []byte("diff --git a/__patch__ b/__patch__\n" + string(patchData)) } - -func parseNumber(raw string) (int, error) { - var value int - if _, err := fmt.Sscanf(raw, "%d", &value); err != nil { - return 0, err - } - return value, nil -} diff --git a/apply_internal_test.go b/apply_internal_test.go index 7506c55..6ac7569 100644 --- a/apply_internal_test.go +++ b/apply_internal_test.go @@ -7,19 +7,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestMatchAnchoredFragmentRequiresBothEnds(t *testing.T) { - source := splitFileLines([]byte("one\na\nb\nc\nd\na\nb\nx\nd\n")) - begin, end := splitAnchoredFragment([]fileLine{ - {text: "a", hasNewline: true}, - {text: "b", hasNewline: true}, - {text: "c", hasNewline: true}, - {text: "d", hasNewline: true}, - }) - - require.True(t, matchAnchoredFragment(source, 1, begin, end, false)) - require.False(t, matchAnchoredFragment(source, 5, begin, end, false)) -} - func TestFindPosRejectsAlreadyAppliedPostimage(t *testing.T) { session := &applySession{ sourceLines: splitFileLines([]byte("a\nb\nx\nc\n")), diff --git a/apply_options.go b/apply_options.go index 3e1d4e7..d159283 100644 --- a/apply_options.go +++ b/apply_options.go @@ -48,16 +48,6 @@ func NewPatchApply(options ApplyOptions) *PatchApply { return &PatchApply{options: normalizeApplyOptions(options)} } -func legacyApplyOptions() ApplyOptions { - return ApplyOptions{ - Mode: ApplyModeMerge, - ConflictLabels: ConflictLabels{ - Current: "Current (Your changes)", - Incoming: "New (Generated by Speakeasy)", - }, - } -} - func (o ApplyOptions) normalize() ApplyOptions { if o.Mode != ApplyModeMerge { o.Mode = ApplyModeApply diff --git a/apply_result.go b/apply_result.go index f70d5b0..f383f04 100644 --- a/apply_result.go +++ b/apply_result.go @@ -17,8 +17,8 @@ type ApplyResult struct { } type applyOutcome struct { - content []fileLine - conflicts []applyConflict + content []fileLine + conflicts []applyConflict rejectHead string } diff --git a/apply_session.go b/apply_session.go index dd4bd19..dc3e2ec 100644 --- a/apply_session.go +++ b/apply_session.go @@ -33,23 +33,8 @@ func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er } fileDiff := parsed.FileDiff[0] - if fileDiff.IsBinary { - return validatedPatch{}, fmt.Errorf("binary patches are not supported") - } - if fileDiff.NewMode != "" { - return validatedPatch{}, fmt.Errorf("file mode changes are not supported") - } - if fileDiff.Type == FileDiffTypeAdded || fileDiff.Type == FileDiffTypeDeleted { - return validatedPatch{}, fmt.Errorf("patches may only modify existing files") - } - if len(fileDiff.Hunks) == 0 { - return validatedPatch{}, fmt.Errorf("patch contains no hunks") - } - if fileDiff.RenameFrom != "" || fileDiff.RenameTo != "" || fileDiff.CopyFrom != "" || fileDiff.CopyTo != "" { - return validatedPatch{}, fmt.Errorf("unsupported patch syntax: copy and rename headers are not supported") - } - if !fileDiffHasChanges(fileDiff) { - return validatedPatch{}, fmt.Errorf("patch contains no effective changes") + if err := validateApplyFileDiff(fileDiff); err != nil { + return validatedPatch{}, err } hunks := make([]patchHunk, 0, len(fileDiff.Hunks)) @@ -91,8 +76,8 @@ func (s *applySession) apply(patch validatedPatch) (applyOutcome, error) { s.appendSourceUntil(len(s.sourceLines)) return applyOutcome{ - content: append([]fileLine(nil), s.image...), - conflicts: append([]applyConflict(nil), s.conflicts...), + content: append([]fileLine(nil), s.image...), + conflicts: append([]applyConflict(nil), s.conflicts...), rejectHead: s.rejectHead, }, nil } @@ -238,26 +223,6 @@ func formatPatchHunkRange(start, count int) string { return fmt.Sprintf("%d,%d", start, count) } -func (s *applySession) findPosWithAnchors(preferred int, begin, end anchoredFragment) (int, bool) { - for offset := 0; ; offset++ { - left := preferred - offset - if left >= s.cursor && left <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, left, begin, end, s.ignoreWhitespace()) { - return left, true - } - - right := preferred + offset - if offset > 0 && right >= s.cursor && right <= len(s.sourceLines) && matchAnchoredFragment(s.sourceLines, right, begin, end, s.ignoreWhitespace()) { - return right, true - } - - if left < s.cursor && right > len(s.sourceLines) { - break - } - } - - return 0, false -} - func (s *applySession) ignoreWhitespace() bool { return s.applier != nil && s.applier.options.IgnoreWhitespace } diff --git a/patchset_apply.go b/patchset_apply.go index 0d3449f..7f446e5 100644 --- a/patchset_apply.go +++ b/patchset_apply.go @@ -130,15 +130,14 @@ func applyPatchsetContent(pristine []byte, file PatchsetFile) ([]byte, error) { hunks = append(hunks, patchHunkFromHunk(hunk)) } - outcome, err := NewPatchApply(ApplyOptions{Mode: ApplyModeApply}).newApplySession(pristine).apply(validatedPatch{hunks: hunks}) + result, err := NewPatchApply(ApplyOptions{Mode: ApplyModeApply}).applyValidatedPatch(pristine, validatedPatch{ + rejectHead: formatRejectHeader(file.Diff), + hunks: hunks, + }) if err != nil { return nil, err } - if len(outcome.conflicts) > 0 { - return nil, &ApplyError{DirectMisses: len(outcome.conflicts)} - } - result := renderApplyResult(pristine, outcome, ApplyOptions{Mode: ApplyModeApply}) return append([]byte(nil), result.Content...), nil } From e5a742a450d29858705ecd24a803073b8353bd59 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Thu, 26 Mar 2026 14:39:08 +0000 Subject: [PATCH 15/20] chore: trim surface --- apply.go | 35 ++++++-------- apply_flags.go | 2 +- apply_options.go | 54 ++++++++++----------- apply_render.go | 10 ++-- apply_result.go | 13 ++---- apply_session.go | 14 +++--- apply_test.go | 113 +++++++++++++++++++++----------------------- model.go | 78 +++++++++++++++---------------- parity_test.go | 19 ++++---- parser.go | 116 +++++++++++++++++++++++----------------------- parser_test.go | 26 +++++++---- patchset.go | 92 ++++++++++++++++++------------------ patchset_apply.go | 44 +++++++++--------- patchset_test.go | 17 ++++--- 14 files changed, 312 insertions(+), 321 deletions(-) diff --git a/apply.go b/apply.go index 43cb902..cac58cc 100644 --- a/apply.go +++ b/apply.go @@ -30,36 +30,31 @@ type fileLine struct { } func ApplyFile(pristine, patchData []byte) ([]byte, error) { - result, err := ApplyFileWithOptions(pristine, patchData, DefaultApplyOptions()) + result, err := applyFileWithOptions(pristine, patchData, defaultApplyOptions()) return result.Content, err } -func ApplyFileWithOptions(pristine, patchData []byte, options ApplyOptions) (ApplyResult, error) { - return NewPatchApply(options).applyFileWithResult(pristine, patchData) +func applyFileWithOptions(pristine, patchData []byte, options applyOptions) (applyResult, error) { + return newPatchApply(options).applyFileWithResult(pristine, patchData) } -func (p *PatchApply) ApplyFile(pristine, patchData []byte) ([]byte, error) { +func (p *patchApply) applyFile(pristine, patchData []byte) ([]byte, error) { result, err := p.applyFileWithResult(pristine, patchData) return result.Content, err } -func (p *PatchApply) applyFileWithResult(pristine, patchData []byte) (ApplyResult, error) { +func (p *patchApply) applyFileWithResult(pristine, patchData []byte) (applyResult, error) { patch, err := p.validateAndParsePatch(patchData) if err != nil { - return ApplyResult{}, err + return applyResult{}, err } return p.applyValidatedPatch(pristine, patch) } -// ApplyPatch is kept as a compatibility alias. -func ApplyPatch(pristine, patchData []byte) ([]byte, error) { - return ApplyFile(pristine, patchData) -} - -func (p *PatchApply) applyValidatedPatch(pristine []byte, patch validatedPatch) (ApplyResult, error) { +func (p *patchApply) applyValidatedPatch(pristine []byte, patch validatedPatch) (applyResult, error) { outcome, err := p.newApplySession(pristine).apply(patch) if err != nil { - return ApplyResult{}, err + return applyResult{}, err } result := renderApplyResult(pristine, outcome, p.options) @@ -67,23 +62,23 @@ func (p *PatchApply) applyValidatedPatch(pristine []byte, patch validatedPatch) return result, nil } - if p.options.Mode == ApplyModeMerge { - return result, &ApplyError{ + if p.options.Mode == applyModeMerge { + return result, &applyError{ MergeConflicts: len(outcome.conflicts), ConflictingHunks: len(outcome.conflicts), } } - return result, &ApplyError{DirectMisses: len(outcome.conflicts)} + return result, &applyError{DirectMisses: len(outcome.conflicts)} } -func validateApplyFileDiff(fileDiff FileDiff) error { +func validateApplyFileDiff(fileDiff fileDiff) error { switch { case fileDiff.IsBinary: return errors.New("binary patches are not supported") case fileDiff.NewMode != "": return errors.New("file mode changes are not supported") - case fileDiff.Type == FileDiffTypeAdded || fileDiff.Type == FileDiffTypeDeleted: + case fileDiff.Type == fileDiffTypeAdded || fileDiff.Type == fileDiffTypeDeleted: return errors.New("patches may only modify existing files") case len(fileDiff.Hunks) == 0: return errors.New("patch contains no hunks") @@ -96,10 +91,10 @@ func validateApplyFileDiff(fileDiff FileDiff) error { } } -func fileDiffHasChanges(fileDiff FileDiff) bool { +func fileDiffHasChanges(fileDiff fileDiff) bool { for _, hunk := range fileDiff.Hunks { for _, change := range hunk.ChangeList { - if change.Type != ContentChangeTypeNOOP { + if change.Type != contentChangeTypeNOOP { return true } } diff --git a/apply_flags.go b/apply_flags.go index 273a2e0..214e12c 100644 --- a/apply_flags.go +++ b/apply_flags.go @@ -2,7 +2,7 @@ package git_diff_parser import "fmt" -func normalizePatchHunks(hunks []patchHunk, options ApplyOptions) ([]patchHunk, error) { +func normalizePatchHunks(hunks []patchHunk, options applyOptions) ([]patchHunk, error) { if len(hunks) == 0 { return hunks, nil } diff --git a/apply_options.go b/apply_options.go index d159283..a19dc59 100644 --- a/apply_options.go +++ b/apply_options.go @@ -1,27 +1,27 @@ package git_diff_parser -// ApplyMode controls how the apply engine treats hunks that cannot be placed +// applyMode controls how the apply engine treats hunks that cannot be placed // directly into the target content. -type ApplyMode int +type applyMode int const ( - // ApplyModeApply keeps the output neutral when a hunk cannot be applied. - ApplyModeApply ApplyMode = iota - // ApplyModeMerge renders conflict markers into the output for misses. - ApplyModeMerge + // applyModeApply keeps the output neutral when a hunk cannot be applied. + applyModeApply applyMode = iota + // applyModeMerge renders conflict markers into the output for misses. + applyModeMerge ) -// ConflictLabels controls the labels rendered into conflict markers. +// conflictLabels controls the labels rendered into conflict markers. // The zero value renders neutral markers without any labels. -type ConflictLabels struct { +type conflictLabels struct { Current string Incoming string } -// ApplyOptions configures the apply engine. -type ApplyOptions struct { - Mode ApplyMode - ConflictLabels ConflictLabels +// applyOptions configures the apply engine. +type applyOptions struct { + Mode applyMode + ConflictLabels conflictLabels IgnoreWhitespace bool Reverse bool UnidiffZero bool @@ -29,31 +29,31 @@ type ApplyOptions struct { InaccurateEOF bool } -func DefaultApplyOptions() ApplyOptions { - return ApplyOptions{ - Mode: ApplyModeMerge, - ConflictLabels: ConflictLabels{ +func defaultApplyOptions() applyOptions { + return applyOptions{ + Mode: applyModeMerge, + ConflictLabels: conflictLabels{ Current: "Current", Incoming: "Incoming patch", }, } } -// PatchApply holds apply-time configuration and mirrors Git's stateful apply design. -type PatchApply struct { - options ApplyOptions +// patchApply holds apply-time configuration and mirrors Git's stateful apply design. +type patchApply struct { + options applyOptions } -func NewPatchApply(options ApplyOptions) *PatchApply { - return &PatchApply{options: normalizeApplyOptions(options)} +func newPatchApply(options applyOptions) *patchApply { + return &patchApply{options: normalizeApplyOptions(options)} } -func (o ApplyOptions) normalize() ApplyOptions { - if o.Mode != ApplyModeMerge { - o.Mode = ApplyModeApply +func (o applyOptions) normalize() applyOptions { + if o.Mode != applyModeMerge { + o.Mode = applyModeApply } - if o.Mode == ApplyModeMerge { - defaults := DefaultApplyOptions() + if o.Mode == applyModeMerge { + defaults := defaultApplyOptions() if o.ConflictLabels.Current == "" { o.ConflictLabels.Current = defaults.ConflictLabels.Current } @@ -64,6 +64,6 @@ func (o ApplyOptions) normalize() ApplyOptions { return o } -func normalizeApplyOptions(options ApplyOptions) ApplyOptions { +func normalizeApplyOptions(options applyOptions) applyOptions { return options.normalize() } diff --git a/apply_render.go b/apply_render.go index e613577..3f10431 100644 --- a/apply_render.go +++ b/apply_render.go @@ -2,8 +2,8 @@ package git_diff_parser import "bytes" -func renderApplyResult(pristine []byte, outcome applyOutcome, options ApplyOptions) ApplyResult { - result := ApplyResult{ +func renderApplyResult(pristine []byte, outcome applyOutcome, options applyOptions) applyResult { + result := applyResult{ Content: joinFileLines(outcome.content), Reject: renderRejectContent(outcome.rejectHead, outcome.conflicts), } @@ -13,7 +13,7 @@ func renderApplyResult(pristine []byte, outcome applyOutcome, options ApplyOptio } switch options.Mode { - case ApplyModeMerge: + case applyModeMerge: result.Content = renderMergeContent(outcome.content, outcome.conflicts, options.ConflictLabels) result.MergeConflicts = len(outcome.conflicts) default: @@ -24,7 +24,7 @@ func renderApplyResult(pristine []byte, outcome applyOutcome, options ApplyOptio return result } -func renderMergeContent(base []fileLine, conflicts []applyConflict, labels ConflictLabels) []byte { +func renderMergeContent(base []fileLine, conflicts []applyConflict, labels conflictLabels) []byte { if len(conflicts) == 0 { return joinFileLines(base) } @@ -74,7 +74,7 @@ func renderRejectContent(header string, conflicts []applyConflict) []byte { return buf.Bytes() } -func renderConflictLines(labels ConflictLabels, ours, theirs []fileLine) []fileLine { +func renderConflictLines(labels conflictLabels, ours, theirs []fileLine) []fileLine { lines := []fileLine{ {text: "<<<<<<< " + labels.Current, hasNewline: true}, } diff --git a/apply_result.go b/apply_result.go index f383f04..74549a6 100644 --- a/apply_result.go +++ b/apply_result.go @@ -9,7 +9,7 @@ var ErrPatchConflict = errors.New("patch conflict") // ApplyResult captures the patched content and the type of misses encountered // while attempting to apply it. -type ApplyResult struct { +type applyResult struct { Content []byte Reject []byte DirectMisses int @@ -29,8 +29,8 @@ type applyConflict struct { theirs []fileLine } -// ApplyError reports the aggregate apply outcome. -type ApplyError struct { +// applyError reports the aggregate apply outcome. +type applyError struct { DirectMisses int MergeConflicts int // ConflictingHunks keeps the legacy count available for callers that still @@ -38,7 +38,7 @@ type ApplyError struct { ConflictingHunks int } -func (e *ApplyError) Error() string { +func (e *applyError) Error() string { if e == nil { return "" } @@ -64,9 +64,6 @@ func (e *ApplyError) Error() string { return "patch apply failed" } -func (e *ApplyError) Is(target error) bool { +func (e *applyError) Is(target error) bool { return target == ErrPatchConflict } - -// ConflictError is kept as a compatibility alias for the old public type. -type ConflictError = ApplyError diff --git a/apply_session.go b/apply_session.go index dc3e2ec..ff85fd0 100644 --- a/apply_session.go +++ b/apply_session.go @@ -8,7 +8,7 @@ type validatedPatch struct { } type applySession struct { - applier *PatchApply + applier *patchApply sourceLines []fileLine image []fileLine cursor int @@ -22,9 +22,9 @@ type matchedHunk struct { hunkEnd int } -func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { +func (p *patchApply) validateAndParsePatch(patchData []byte) (validatedPatch, error) { normalizedPatch := normalizePatchForValidation(patchData) - parsed, errs := Parse(string(normalizedPatch)) + parsed, errs := parse(string(normalizedPatch)) if len(errs) > 0 { return validatedPatch{}, fmt.Errorf("unsupported patch syntax: %w", errs[0]) } @@ -52,7 +52,7 @@ func (p *PatchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er }, nil } -func (p *PatchApply) newApplySession(pristine []byte) *applySession { +func (p *patchApply) newApplySession(pristine []byte) *applySession { sourceLines := splitFileLines(pristine) return &applySession{ applier: p, @@ -180,7 +180,7 @@ func (s *applySession) findPosForFragment(preferred int, fragment []fileLine) (i return 0, false } -func patchHunkFromHunk(hunk Hunk) patchHunk { +func patchHunkFromHunk(hunk hunk) patchHunk { lines := make([]patchLine, 0, len(hunk.Lines)) for _, line := range hunk.Lines { lines = append(lines, patchLine{ @@ -202,7 +202,7 @@ func patchHunkFromHunk(hunk Hunk) patchHunk { } } -func formatRejectHeader(fileDiff FileDiff) string { +func formatRejectHeader(fileDiff fileDiff) string { path := firstNonEmpty(fileDiff.ToFile, fileDiff.FromFile) if path == "" { return "" @@ -210,7 +210,7 @@ func formatRejectHeader(fileDiff FileDiff) string { return "diff a/" + path + " b/" + path + "\t(rejected hunks)" } -func formatPatchHunkHeader(hunk Hunk) string { +func formatPatchHunkHeader(hunk hunk) string { oldRange := formatPatchHunkRange(hunk.StartLineNumberOld, hunk.CountOld) newRange := formatPatchHunkRange(hunk.StartLineNumberNew, hunk.CountNew) return fmt.Sprintf("@@ -%s +%s @@", oldRange, newRange) diff --git a/apply_test.go b/apply_test.go index 726ecf3..8532913 100644 --- a/apply_test.go +++ b/apply_test.go @@ -1,4 +1,4 @@ -package git_diff_parser_test +package git_diff_parser import ( "bytes" @@ -9,7 +9,6 @@ import ( "testing" "github.com/pmezard/go-difflib/difflib" - git_diff_parser "github.com/speakeasy-api/git-diff-parser" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -58,15 +57,15 @@ func TestApplyFile_TextFixtures(t *testing.T) { t.Parallel() files := loadApplyFixture(t, test.fixture) - applied, err := git_diff_parser.ApplyFile(files.src, files.patch) + applied, err := ApplyFile(files.src, files.patch) if test.wantErr != "" { require.Error(t, err) assert.Contains(t, err.Error(), test.wantErr) if test.conflict { - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) - assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) + var applyErr *applyError + require.ErrorAs(t, err, &applyErr) + assert.True(t, errors.Is(err, ErrPatchConflict)) assert.Contains(t, string(applied), defaultCurrentConflictMarker) assert.Contains(t, string(applied), defaultIncomingConflictMarker) } @@ -120,7 +119,7 @@ abc t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := git_diff_parser.ApplyFile([]byte("package testsdk\n"), test.patch) + _, err := ApplyFile([]byte("package testsdk\n"), test.patch) require.Error(t, err) assert.Contains(t, err.Error(), test.wantErr) }) @@ -160,7 +159,7 @@ func TestApplyFile_RejectsHeaderOnlyAndNoOpPatches(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := git_diff_parser.ApplyFile([]byte("package testsdk\n"), test.patch) + _, err := ApplyFile([]byte("package testsdk\n"), test.patch) require.Error(t, err) assert.Contains(t, err.Error(), test.wantErr) }) @@ -194,7 +193,7 @@ func TestApplyFile_NoNewlineMatrix(t *testing.T) { t.Parallel() patch := mustReadFile(t, filepath.Join("testdata", "apply", "t4101", "diff."+from.name+"-"+to.name)) - applied, err := git_diff_parser.ApplyFile(from.content, patch) + applied, err := ApplyFile(from.content, patch) require.NoError(t, err) assert.Equal(t, to.content, applied) }) @@ -227,7 +226,7 @@ func TestApplyFile_BoundaryCases(t *testing.T) { t.Parallel() patch := buildPatchWithContext(t, "victim", original, test.want, context) - applied, err := git_diff_parser.ApplyFile(original, patch) + applied, err := ApplyFile(original, patch) require.NoError(t, err) assert.Equal(t, test.want, applied) }) @@ -258,7 +257,7 @@ func TestApplyFile_OffsetPatches(t *testing.T) { t.Parallel() patch := rewriteFirstHunkHeader(basePatch, test.header) - applied, err := git_diff_parser.ApplyFile(original, patch) + applied, err := ApplyFile(original, patch) require.NoError(t, err) assert.Equal(t, target, applied) }) @@ -289,10 +288,9 @@ func TestApplyFile_DamagedContextPatchesConflictWithoutFuzz(t *testing.T) { t.Parallel() patch := rewriteFirstHunkHeader(damaged, test.header) - applied, err := git_diff_parser.ApplyFile(original, patch) + applied, err := ApplyFile(original, patch) require.Error(t, err) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } @@ -324,7 +322,7 @@ func TestApplyFile_EmptyContextPatches(t *testing.T) { t.Parallel() patch := buildPatchWithContext(t, "file", test.original, test.target, 0) - applied, err := git_diff_parser.ApplyFile(test.original, patch) + applied, err := ApplyFile(test.original, patch) require.NoError(t, err) assert.Equal(t, test.target, applied) }) @@ -373,7 +371,7 @@ func TestApplyFile_EmptyContextNoTrailingNewlinePatches(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() - applied, err := git_diff_parser.ApplyFile(test.original, test.patch) + applied, err := ApplyFile(test.original, test.patch) require.NoError(t, err) assert.Equal(t, test.target, applied) }) @@ -387,7 +385,7 @@ func TestApplyFile_RelocatesHunkWhenContextStillMatches(t *testing.T) { patchData := buildPatch(t, "status.go", originalPristine, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) shiftedPristine := []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n") - applied, err := git_diff_parser.ApplyFile(shiftedPristine, patchData) + applied, err := ApplyFile(shiftedPristine, patchData) require.NoError(t, err) assert.Equal(t, []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n"), applied) } @@ -400,7 +398,7 @@ func TestApplyFile_RelocatesToNearestMatchingBlock(t *testing.T) { shifted := []byte("header\nanchor\ncommon\nvalue-old\nend\ngap\nextra\nanchor\ncommon\nvalue-old\nend\n") patch := buildPatchWithContext(t, "dup.txt", original, target, 1) - applied, err := git_diff_parser.ApplyFile(shifted, patch) + applied, err := ApplyFile(shifted, patch) require.NoError(t, err) assert.Equal(t, []byte("header\nanchor\ncommon\nvalue-old\nend\ngap\nextra\nanchor\ncommon\nvalue-new\nend\n"), applied) } @@ -412,7 +410,7 @@ func TestApplyFile_MultipleHunks(t *testing.T) { target := []byte("line 1\nline two\nline 3\nline 4\nline 5\nline six\nline 7\nline 8\n") patch := buildPatchWithContext(t, "multi.txt", original, target, 1) - applied, err := git_diff_parser.ApplyFile(original, patch) + applied, err := ApplyFile(original, patch) require.NoError(t, err) assert.Equal(t, target, applied) } @@ -425,10 +423,9 @@ func TestApplyFile_MultipleHunksOneConflict(t *testing.T) { current := []byte("line 1\nline 2\nline 3\nline 4\nline 5\nline VI\nline 7\nline 8\n") patch := buildPatchWithContext(t, "multi.txt", original, target, 1) - applied, err := git_diff_parser.ApplyFile(current, patch) + applied, err := ApplyFile(current, patch) require.Error(t, err) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), "line two") assert.Contains(t, string(applied), defaultCurrentConflictMarker) assert.Contains(t, string(applied), "line VI") @@ -442,11 +439,10 @@ func TestApplyFile_ReturnsConflictMarkers(t *testing.T) { current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) - applied, err := git_diff_parser.ApplyFile(current, patchData) + applied, err := ApplyFile(current, patchData) require.Error(t, err) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) - assert.True(t, errors.Is(err, git_diff_parser.ErrPatchConflict)) + require.ErrorIs(t, err, ErrPatchConflict) + assert.True(t, errors.Is(err, ErrPatchConflict)) assert.Contains(t, string(applied), defaultCurrentConflictMarker) assert.Contains(t, string(applied), defaultIncomingConflictMarker) assert.Contains(t, string(applied), "func (s *Status) String() string") @@ -459,11 +455,11 @@ func TestApplyFileWithOptions_RendersNeutralConflictMarkers(t *testing.T) { current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) - result, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ - Mode: git_diff_parser.ApplyModeMerge, + result, err := applyFileWithOptions(current, patchData, applyOptions{ + Mode: applyModeMerge, }) require.Error(t, err) - var applyErr *git_diff_parser.ApplyError + var applyErr *applyError require.ErrorAs(t, err, &applyErr) assert.Equal(t, 0, result.DirectMisses) assert.Equal(t, 1, result.MergeConflicts) @@ -481,11 +477,11 @@ func TestApplyFileWithOptions_DirectModeReportsMissesWithoutMarkers(t *testing.T current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) - result, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ - Mode: git_diff_parser.ApplyModeApply, + result, err := applyFileWithOptions(current, patchData, applyOptions{ + Mode: applyModeApply, }) require.Error(t, err) - var applyErr *git_diff_parser.ApplyError + var applyErr *applyError require.ErrorAs(t, err, &applyErr) assert.Equal(t, 1, result.DirectMisses) assert.Equal(t, 0, result.MergeConflicts) @@ -503,15 +499,15 @@ func TestPatchApply_AllowsCustomConflictLabels(t *testing.T) { current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) - applier := git_diff_parser.NewPatchApply(git_diff_parser.ApplyOptions{ - Mode: git_diff_parser.ApplyModeMerge, - ConflictLabels: git_diff_parser.ConflictLabels{ + applier := newPatchApply(applyOptions{ + Mode: applyModeMerge, + ConflictLabels: conflictLabels{ Current: "Current (Your changes)", Incoming: "New (Generated by Speakeasy)", }, }) - applied, err := applier.ApplyFile(current, patchData) + applied, err := applier.applyFile(current, patchData) require.Error(t, err) assert.Contains(t, string(applied), "<<<<<<< Current (Your changes)") assert.Contains(t, string(applied), ">>>>>>> New (Generated by Speakeasy)") @@ -525,13 +521,13 @@ func TestApplyFileWithOptions_IgnoreWhitespaceAppliesThroughContextDrift(t *test patchData := buildPatchWithContext(t, "whitespace.txt", original, target, 1) current := []byte("alpha\n beta\ncharlie\n") - _, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ - Mode: git_diff_parser.ApplyModeMerge, + _, err := applyFileWithOptions(current, patchData, applyOptions{ + Mode: applyModeMerge, }) require.Error(t, err) - applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ - Mode: git_diff_parser.ApplyModeMerge, + applied, err := applyFileWithOptions(current, patchData, applyOptions{ + Mode: applyModeMerge, IgnoreWhitespace: true, }) require.NoError(t, err) @@ -553,7 +549,7 @@ func TestApplyFileWithOptions_ReverseAppliesPatchBackwards(t *testing.T) { b `) - applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + applied, err := applyFileWithOptions(current, patchData, applyOptions{ Reverse: true, }) require.NoError(t, err) @@ -571,11 +567,11 @@ func TestApplyFileWithOptions_UnidiffZeroIsAccepted(t *testing.T) { -beta `) - baseline, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{}) + baseline, err := applyFileWithOptions(current, patchData, applyOptions{}) require.NoError(t, err) assert.Equal(t, []byte("alpha\ngamma\n"), baseline.Content) - applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + applied, err := applyFileWithOptions(current, patchData, applyOptions{ UnidiffZero: true, }) require.NoError(t, err) @@ -593,12 +589,12 @@ func TestApplyFileWithOptions_RecountRebuildsHunkCounts(t *testing.T) { -beta `) - _, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + _, err := applyFileWithOptions(current, patchData, applyOptions{ UnidiffZero: true, }) require.Error(t, err) - applied, err := git_diff_parser.ApplyFileWithOptions(current, patchData, git_diff_parser.ApplyOptions{ + applied, err := applyFileWithOptions(current, patchData, applyOptions{ UnidiffZero: true, Recount: true, }) @@ -645,10 +641,9 @@ func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) t.Run(test.name, func(t *testing.T) { t.Parallel() - applied, err := git_diff_parser.ApplyFile(test.current, test.patch) + applied, err := ApplyFile(test.current, test.patch) require.Error(t, err) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } @@ -695,10 +690,9 @@ func TestApplyFile_RejectsAlreadyAppliedMiddlePatches(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() - applied, err := git_diff_parser.ApplyFile(test.current, test.patch) + applied, err := ApplyFile(test.current, test.patch) require.Error(t, err) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } @@ -725,7 +719,7 @@ diff --git a/models/components/pet.go b/models/components/pet.go type Pet struct{} `) - _, err := git_diff_parser.ApplyFile([]byte("package testsdk\n\ntype SDK struct{}\n"), patchData) + _, err := ApplyFile([]byte("package testsdk\n\ntype SDK struct{}\n"), patchData) require.Error(t, err) assert.Contains(t, err.Error(), "expected exactly 1 file diff") } @@ -797,7 +791,7 @@ copy to file-copy t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := git_diff_parser.ApplyFile([]byte("a\n"), test.patch) + _, err := ApplyFile([]byte("a\n"), test.patch) require.Error(t, err) assert.Contains(t, err.Error(), test.wantErr) }) @@ -858,7 +852,7 @@ rename to 2 t.Run(test.name, func(t *testing.T) { t.Parallel() - _, err := git_diff_parser.ApplyFile([]byte("package test\n"), test.patch) + _, err := ApplyFile([]byte("package test\n"), test.patch) require.Error(t, err) assert.Contains(t, err.Error(), test.wantErr) }) @@ -894,10 +888,9 @@ func TestApplyFile_ShrinkFailures(t *testing.T) { t.Parallel() patch := buildPatch(t, "F", test.original, test.target) - applied, err := git_diff_parser.ApplyFile(test.current, patch) + applied, err := ApplyFile(test.current, patch) require.Error(t, err) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, err, &conflictErr) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) }) } @@ -910,7 +903,7 @@ func TestApplyFile_CRLFPreservation(t *testing.T) { target := []byte("alpha\r\nbravo\r\n") patch := buildPatch(t, "crlf.txt", pristine, target) - applied, err := git_diff_parser.ApplyFile(pristine, patch) + applied, err := ApplyFile(pristine, patch) require.NoError(t, err) assert.Equal(t, target, applied) } @@ -952,7 +945,7 @@ func expectedApplyFixtureOutput(t *testing.T, files applyFixtureFiles) []byte { return nil } - parsed, errs := git_diff_parser.Parse(string(files.patch)) + parsed, errs := parse(string(files.patch)) require.Empty(t, errs) require.Len(t, parsed.FileDiff, 1) require.Len(t, parsed.FileDiff[0].Hunks, 1) @@ -1031,7 +1024,7 @@ func TestApplyFile_PreservesExactBytes(t *testing.T) { t.Parallel() files := loadApplyFixture(t, "text_fragment_change_single_noeol") - applied, err := git_diff_parser.ApplyFile(files.src, files.patch) + applied, err := ApplyFile(files.src, files.patch) require.NoError(t, err) assert.True(t, bytes.Equal(files.out, applied)) } diff --git a/model.go b/model.go index 88f4efe..c89e4d1 100644 --- a/model.go +++ b/model.go @@ -5,28 +5,28 @@ import ( "strings" ) -type ContentChangeType string +type contentChangeType string const ( - ContentChangeTypeAdd ContentChangeType = "add" - ContentChangeTypeDelete ContentChangeType = "delete" - ContentChangeTypeModify ContentChangeType = "modify" - ContentChangeTypeNOOP ContentChangeType = "" + contentChangeTypeAdd contentChangeType = "add" + contentChangeTypeDelete contentChangeType = "delete" + contentChangeTypeModify contentChangeType = "modify" + contentChangeTypeNOOP contentChangeType = "" ) -// ContentChange is a part of the line that starts with ` `, `-`, `+`. -// Consecutive ContentChange build a line. -// A `~` is a special case of ContentChange that is used to indicate a new line. -type ContentChange struct { - Type ContentChangeType `json:"type"` +// contentChange is a part of the line that starts with ` `, `-`, `+`. +// Consecutive contentChange build a line. +// A `~` is a special case of contentChange that is used to indicate a new line. +type contentChange struct { + Type contentChangeType `json:"type"` From string `json:"from"` To string `json:"to"` } -type ChangeList []ContentChange +type changeList []contentChange -// HunkLine keeps a normalized, apply-friendly view of a hunk line. -type HunkLine struct { +// hunkLine keeps a normalized, apply-friendly view of a hunk line. +type hunkLine struct { Kind byte `json:"kind"` Text string `json:"text"` HasNewline bool `json:"has_newline"` @@ -34,11 +34,11 @@ type HunkLine struct { NewEOF bool `json:"new_eof,omitempty"` } -func (l *HunkLine) MarkNoNewline() { +func (l *hunkLine) markNoNewline() { l.HasNewline = false } -func (h *Hunk) MarkEOFMarkers() { +func (h *hunk) markEOFMarkers() { oldSeen := 0 newSeen := 0 @@ -59,7 +59,7 @@ func (h *Hunk) MarkEOFMarkers() { } } -// Hunk is a line that starts with @@. +// hunk is a line that starts with @@. // Each hunk shows one area where the files differ. // Unified format hunks look like this: // @@ from-file-line-numbers to-file-line-numbers @@ @@ -68,25 +68,25 @@ func (h *Hunk) MarkEOFMarkers() { // line-from-either-file… // // If a hunk contains just one line, only its start line number appears. Otherwise its line numbers look like ‘start,count’. An empty hunk is considered to start at the line that follows the hunk. -type Hunk struct { - ChangeList ChangeList `json:"change_list"` - Lines []HunkLine `json:"lines,omitempty"` +type hunk struct { + ChangeList changeList `json:"change_list"` + Lines []hunkLine `json:"lines,omitempty"` StartLineNumberOld int `json:"start_line_number_old"` CountOld int `json:"count_old"` StartLineNumberNew int `json:"start_line_number_new"` CountNew int `json:"count_new"` } -func (changes *ChangeList) IsSignificant() bool { +func (changes *changeList) isSignificant() bool { for _, change := range *changes { - if change.Type != ContentChangeTypeNOOP { + if change.Type != contentChangeTypeNOOP { return true } } return false } -func (h Hunk) GoString() string { +func (h hunk) GoString() string { return fmt.Sprintf( "git_diff_parser.Hunk{ChangeList:%#v, StartLineNumberOld:%d, CountOld:%d, StartLineNumberNew:%d, CountNew:%d}", h.ChangeList, @@ -97,33 +97,33 @@ func (h Hunk) GoString() string { ) } -type FileDiffType string +type fileDiffType string const ( - FileDiffTypeAdded FileDiffType = "add" - FileDiffTypeDeleted FileDiffType = "delete" - FileDiffTypeModified FileDiffType = "modify" + fileDiffTypeAdded fileDiffType = "add" + fileDiffTypeDeleted fileDiffType = "delete" + fileDiffTypeModified fileDiffType = "modify" ) -type BinaryDeltaType string +type binaryDeltaType string const ( - BinaryDeltaTypeLiteral BinaryDeltaType = "literal" - BinaryDeltaTypeDelta BinaryDeltaType = "delta" + binaryDeltaTypeLiteral binaryDeltaType = "literal" + binaryDeltaTypeDelta binaryDeltaType = "delta" ) -type BinaryPatch struct { - Type BinaryDeltaType `json:"type"` +type binaryPatch struct { + Type binaryDeltaType `json:"type"` Count int Content string } -// FileDiff Source of truth: https://github.com/git/git/blob/master/diffcore.h#L106 +// fileDiff Source of truth: https://github.com/git/git/blob/master/diffcore.h#L106 // Implemented in https://github.com/git/git/blob/master/diff.c#L3496 -type FileDiff struct { +type fileDiff struct { FromFile string `json:"from_file"` ToFile string `json:"to_file"` - Type FileDiffType `json:"type"` + Type fileDiffType `json:"type"` IsBinary bool `json:"is_binary"` OldMode string `json:"old_mode,omitempty"` NewMode string `json:"new_mode,omitempty"` @@ -136,11 +136,11 @@ type FileDiff struct { RenameTo string `json:"rename_to,omitempty"` CopyFrom string `json:"copy_from,omitempty"` CopyTo string `json:"copy_to,omitempty"` - Hunks []Hunk `json:"hunks"` - BinaryPatch []BinaryPatch `json:"binary_patch"` + Hunks []hunk `json:"hunks"` + BinaryPatch []binaryPatch `json:"binary_patch"` } -func (fd FileDiff) GoString() string { +func (fd fileDiff) GoString() string { return fmt.Sprintf( "&git_diff_parser.FileDiff{FromFile:%#v, ToFile:%#v, Type:%#v, IsBinary:%t, NewMode:%#v, Hunks:%#v, BinaryPatch:%#v}", fd.FromFile, @@ -153,6 +153,6 @@ func (fd FileDiff) GoString() string { ) } -type Diff struct { - FileDiff []FileDiff `json:"file_diff"` +type diff struct { + FileDiff []fileDiff `json:"file_diff"` } diff --git a/parity_test.go b/parity_test.go index 6b34385..8cf6ca2 100644 --- a/parity_test.go +++ b/parity_test.go @@ -1,6 +1,6 @@ //go:build parity -package git_diff_parser_test +package git_diff_parser import ( "bytes" @@ -14,7 +14,6 @@ import ( "strings" "testing" - git_diff_parser "github.com/speakeasy-api/git-diff-parser" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -80,9 +79,9 @@ func TestApplyFile_ParityCorpus(t *testing.T) { if tc.fixture.ExpectConflict { require.Error(t, mergeErr) - var conflictErr *git_diff_parser.ConflictError - require.ErrorAs(t, mergeErr, &conflictErr) - assert.True(t, errors.Is(mergeErr, git_diff_parser.ErrPatchConflict)) + var applyErr *applyError + require.ErrorAs(t, mergeErr, &applyErr) + assert.True(t, errors.Is(mergeErr, ErrPatchConflict)) assert.Equal(t, tc.src, oracles.applied) assert.Contains(t, string(mergeResult.Content), "<<<<<<< Current") assert.Contains(t, string(mergeResult.Content), ">>>>>>> Incoming patch") @@ -109,7 +108,7 @@ func TestApplyFile_ParityCorpus(t *testing.T) { require.True(t, rejectOracles.rejected) rejectResult, rejectErr := runLibraryApply(t, tc, true) require.Error(t, rejectErr) - var applyErr *git_diff_parser.ApplyError + var applyErr *applyError require.ErrorAs(t, rejectErr, &applyErr) require.NotEqual(t, tc.src, rejectOracles.applied) assert.Equal(t, tc.src, rejectResult.Content) @@ -128,17 +127,17 @@ func TestApplyFile_ParityCorpus(t *testing.T) { } } -func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (git_diff_parser.ApplyResult, error) { +func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (applyResult, error) { t.Helper() - options := git_diff_parser.DefaultApplyOptions() + options := defaultApplyOptions() options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") if rejectMode { - options.Mode = git_diff_parser.ApplyModeApply + options.Mode = applyModeApply } - return git_diff_parser.ApplyFileWithOptions(tc.src, tc.patch, options) + return applyFileWithOptions(tc.src, tc.patch, options) } func trimGitRejectHeader(rej []byte) []byte { diff --git a/parser.go b/parser.go index 563b6cc..2a59351 100644 --- a/parser.go +++ b/parser.go @@ -8,13 +8,13 @@ import ( "strings" ) -var ErrUnhandled = errors.New("unhandled git diff syntax") +var errUnhandled = errors.New("unhandled git diff syntax") -func NewHunk(line string) (Hunk, error) { +func newHunk(line string) (hunk, error) { namedHunkRegex := regexp.MustCompile(`(?m)^@@ -(?P\d+),?(?P\d+)? \+(?P\d+),?(?P\d+)? @@`) match := namedHunkRegex.FindStringSubmatch(line) if len(match) == 0 { - return Hunk{}, fmt.Errorf("invalid hunk header: %q", line) + return hunk{}, fmt.Errorf("invalid hunk header: %q", line) } result := make(map[string]string) for i, name := range namedHunkRegex.SubexpNames() { @@ -24,7 +24,7 @@ func NewHunk(line string) (Hunk, error) { } startLineNumberOld, err := strconv.Atoi(result["start_old"]) if err != nil { - return Hunk{}, fmt.Errorf("failed to parse start line number old: %w", err) + return hunk{}, fmt.Errorf("failed to parse start line number old: %w", err) } countOld, err := strconv.Atoi(result["count_old"]) if err != nil { @@ -32,13 +32,13 @@ func NewHunk(line string) (Hunk, error) { } startLineNumberNew, err := strconv.Atoi(result["start_new"]) if err != nil { - return Hunk{}, fmt.Errorf("failed to parse start line number new: %w", err) + return hunk{}, fmt.Errorf("failed to parse start line number new: %w", err) } countNew, err := strconv.Atoi(result["count_new"]) if err != nil { countNew = 1 } - return Hunk{ + return hunk{ StartLineNumberOld: startLineNumberOld, CountOld: countOld, StartLineNumberNew: startLineNumberNew, @@ -46,18 +46,18 @@ func NewHunk(line string) (Hunk, error) { }, nil } -type ParserMode int +type parserMode int const ( - modeHeader ParserMode = iota + modeHeader parserMode = iota modeHunk modeBinary ) type parser struct { - diff Diff + diff diff err []error - mode ParserMode + mode parserMode } func (p *parser) VisitLine(diff string) { @@ -76,44 +76,44 @@ func (p *parser) VisitLine(diff string) { fileHEAD := len(p.diff.FileDiff) - 1 if fileHEAD < 0 { - p.err = append(p.err, fmt.Errorf("%w: %s", ErrUnhandled, line)) + p.err = append(p.err, fmt.Errorf("%w: %s", errUnhandled, line)) return } hunkHEAD := len(p.diff.FileDiff[fileHEAD].Hunks) - 1 if hunkHEAD < 0 { - p.err = append(p.err, fmt.Errorf("%w: %s", ErrUnhandled, diff)) + p.err = append(p.err, fmt.Errorf("%w: %s", errUnhandled, diff)) return } hunk := &p.diff.FileDiff[fileHEAD].Hunks[hunkHEAD] // swallow extra, unused lines from start - if strings.HasPrefix(line, "~") && !hunk.ChangeList.IsSignificant() { + if strings.HasPrefix(line, "~") && !hunk.ChangeList.isSignificant() { hunk.StartLineNumberOld++ hunk.StartLineNumberNew++ hunk.CountOld-- hunk.CountNew-- - hunk.ChangeList = []ContentChange{} + hunk.ChangeList = []contentChange{} } if strings.HasPrefix(line, "+") { - if len(hunk.ChangeList) > 0 && hunk.ChangeList[len(hunk.ChangeList)-1].Type == ContentChangeTypeDelete { - hunk.ChangeList[len(hunk.ChangeList)-1].Type = ContentChangeTypeModify + if len(hunk.ChangeList) > 0 && hunk.ChangeList[len(hunk.ChangeList)-1].Type == contentChangeTypeDelete { + hunk.ChangeList[len(hunk.ChangeList)-1].Type = contentChangeTypeModify hunk.ChangeList[len(hunk.ChangeList)-1].To = trimSingleLineEnding(strings.TrimPrefix(line, "+")) - hunk.Lines = append(hunk.Lines, HunkLine{ + hunk.Lines = append(hunk.Lines, hunkLine{ Kind: '+', Text: trimSingleLineEnding(strings.TrimPrefix(line, "+")), HasNewline: hasNewline, }) return } - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeAdd, + hunk.ChangeList = append(hunk.ChangeList, contentChange{ + Type: contentChangeTypeAdd, From: "", To: trimSingleLineEnding(strings.TrimPrefix(line, "+")), }) - hunk.Lines = append(hunk.Lines, HunkLine{ + hunk.Lines = append(hunk.Lines, hunkLine{ Kind: '+', Text: trimSingleLineEnding(strings.TrimPrefix(line, "+")), HasNewline: hasNewline, @@ -122,12 +122,12 @@ func (p *parser) VisitLine(diff string) { } if strings.HasPrefix(line, "-") { - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeDelete, + hunk.ChangeList = append(hunk.ChangeList, contentChange{ + Type: contentChangeTypeDelete, From: trimSingleLineEnding(strings.TrimPrefix(line, "-")), To: "", }) - hunk.Lines = append(hunk.Lines, HunkLine{ + hunk.Lines = append(hunk.Lines, hunkLine{ Kind: '-', Text: trimSingleLineEnding(strings.TrimPrefix(line, "-")), HasNewline: hasNewline, @@ -136,12 +136,12 @@ func (p *parser) VisitLine(diff string) { } if strings.HasPrefix(line, " ") { - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeNOOP, + hunk.ChangeList = append(hunk.ChangeList, contentChange{ + Type: contentChangeTypeNOOP, From: line, To: line, }) - hunk.Lines = append(hunk.Lines, HunkLine{ + hunk.Lines = append(hunk.Lines, hunkLine{ Kind: ' ', Text: trimSingleLineEnding(strings.TrimPrefix(line, " ")), HasNewline: hasNewline, @@ -150,8 +150,8 @@ func (p *parser) VisitLine(diff string) { } if line == "~" { - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeNOOP, + hunk.ChangeList = append(hunk.ChangeList, contentChange{ + Type: contentChangeTypeNOOP, From: "\n", To: "\n", }) @@ -160,13 +160,13 @@ func (p *parser) VisitLine(diff string) { if strings.HasPrefix(line, `\ No newline at end of file`) { if n := len(hunk.Lines); n > 0 { - hunk.Lines[n-1].MarkNoNewline() + hunk.Lines[n-1].markNoNewline() } else { p.err = append(p.err, fmt.Errorf("unexpected no-newline marker without a preceding patch line")) return } - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeNOOP, + hunk.ChangeList = append(hunk.ChangeList, contentChange{ + Type: contentChangeTypeNOOP, From: line, To: line, }) @@ -174,8 +174,8 @@ func (p *parser) VisitLine(diff string) { } if line == "" { - hunk.ChangeList = append(hunk.ChangeList, ContentChange{ - Type: ContentChangeTypeNOOP, + hunk.ChangeList = append(hunk.ChangeList, contentChange{ + Type: contentChangeTypeNOOP, From: line, To: line, }) @@ -199,7 +199,7 @@ func (p *parser) tryVisitHeader(diff string) bool { return true } if fileHEAD < 0 { - p.err = append(p.err, fmt.Errorf("%w: %s", ErrUnhandled, diff)) + p.err = append(p.err, fmt.Errorf("%w: %s", errUnhandled, diff)) return true } if p.mode != modeHeader { @@ -224,22 +224,22 @@ func (p *parser) tryVisitHeader(diff string) bool { } if strings.HasPrefix(diff, "copy from ") { p.diff.FileDiff[fileHEAD].CopyFrom = strings.TrimPrefix(diff, "copy from ") - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified return true } if strings.HasPrefix(diff, "copy to ") { p.diff.FileDiff[fileHEAD].CopyTo = strings.TrimPrefix(diff, "copy to ") - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified return true } if strings.HasPrefix(diff, "rename from ") { p.diff.FileDiff[fileHEAD].RenameFrom = strings.TrimPrefix(diff, "rename from ") - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified return true } if strings.HasPrefix(diff, "rename to ") { p.diff.FileDiff[fileHEAD].RenameTo = strings.TrimPrefix(diff, "rename to ") - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified return true } @@ -248,7 +248,7 @@ func (p *parser) tryVisitHeader(diff string) bool { } if strings.HasPrefix(diff, "GIT binary patch") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified p.diff.FileDiff[fileHEAD].IsBinary = true p.mode = modeBinary return true @@ -269,23 +269,23 @@ func (p *parser) tryVisitHeader(diff string) bool { func (p *parser) visitFileModeHeader(diff string, fileHEAD int) bool { if strings.HasPrefix(diff, "new file mode ") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified p.diff.FileDiff[fileHEAD].NewMode = strings.TrimPrefix(diff, "new file mode ") return true } if strings.HasPrefix(diff, "new mode ") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified p.diff.FileDiff[fileHEAD].NewMode = strings.TrimPrefix(diff, "new mode ") return true } if strings.HasPrefix(diff, "deleted file mode ") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeDeleted + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeDeleted p.diff.FileDiff[fileHEAD].OldMode = strings.TrimPrefix(diff, "deleted file mode ") return true } if strings.HasPrefix(diff, "old mode ") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified p.diff.FileDiff[fileHEAD].OldMode = strings.TrimPrefix(diff, "old mode ") return true } @@ -317,27 +317,27 @@ func (p *parser) tryVisitBinary(diff string) bool { return true } if strings.HasPrefix(diff, "delta ") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified startByteCount, err := strconv.Atoi(strings.Split(diff, " ")[1]) if err != nil { return true } - p.diff.FileDiff[fileHEAD].BinaryPatch = append(p.diff.FileDiff[fileHEAD].BinaryPatch, BinaryPatch{ - Type: BinaryDeltaTypeDelta, + p.diff.FileDiff[fileHEAD].BinaryPatch = append(p.diff.FileDiff[fileHEAD].BinaryPatch, binaryPatch{ + Type: binaryDeltaTypeDelta, Count: startByteCount, Content: "", }) return true } if strings.HasPrefix(diff, "literal ") { - p.diff.FileDiff[fileHEAD].Type = FileDiffTypeModified + p.diff.FileDiff[fileHEAD].Type = fileDiffTypeModified startByteCount, err := strconv.Atoi(strings.Split(diff, " ")[1]) if err != nil { return true } - p.diff.FileDiff[fileHEAD].BinaryPatch = append(p.diff.FileDiff[fileHEAD].BinaryPatch, BinaryPatch{ - Type: BinaryDeltaTypeLiteral, + p.diff.FileDiff[fileHEAD].BinaryPatch = append(p.diff.FileDiff[fileHEAD].BinaryPatch, binaryPatch{ + Type: binaryDeltaTypeLiteral, Count: startByteCount, Content: "", }) @@ -358,7 +358,7 @@ func (p *parser) tryVisitHunkHeader(diff string) bool { } if strings.HasPrefix(diff, "@@") { p.finalizeCurrentHunk() - hunk, err := NewHunk(diff) + hunk, err := newHunk(diff) if err != nil { p.err = append(p.err, err) } @@ -378,10 +378,10 @@ func (p *parser) finalizeCurrentHunk() { if len(hunks) == 0 { return } - p.diff.FileDiff[fileHEAD].Hunks[len(hunks)-1].MarkEOFMarkers() + p.diff.FileDiff[fileHEAD].Hunks[len(hunks)-1].markEOFMarkers() } -func (p *parser) parseDiffLine(line string) FileDiff { +func (p *parser) parseDiffLine(line string) fileDiff { line = trimSingleLineEnding(line) filesStr := line[11:] var oldPath, newPath string @@ -410,7 +410,7 @@ func (p *parser) parseDiffLine(line string) FileDiff { newPath = segs[1][3 : len(segs[1])-1] } - return FileDiff{ + return fileDiff{ FromFile: oldPath, ToFile: newPath, } @@ -425,8 +425,8 @@ func parsePercentValue(raw string) int { return value } -// Converts git diff --word-diff=porcelain output to a Diff object. -func Parse(diff string) (Diff, []error) { +// Converts git diff --word-diff=porcelain output to a diff object. +func parse(diff string) (diff, []error) { p := parser{} lines := splitLinesPreserveNewline(diff) for i := 0; i < len(lines); i++ { @@ -441,13 +441,13 @@ func Parse(diff string) (Diff, []error) { // SignificantChange Allows a structured diff to be passed into the `isSignificant` function to determine significance. That function can return a message, which is optionally passed as the final argument // Returns the first significant change found, or false if non found. -func SignificantChange(diff string, isSignificant func(*FileDiff, *ContentChange) (bool, string)) (bool, string, error) { - parsed, err := Parse(diff) +func significantChange(diff string, isSignificant func(*fileDiff, *contentChange) (bool, string)) (bool, string, error) { + parsed, err := parse(diff) if len(err) > 0 { return true, "", fmt.Errorf("failed to parse diff: %w", err[0]) } for _, fileDiff := range parsed.FileDiff { - if significant, msg := isSignificant(&fileDiff, &ContentChange{}); significant { + if significant, msg := isSignificant(&fileDiff, &contentChange{}); significant { return true, msg, nil } diff --git a/parser_test.go b/parser_test.go index 8112653..142c41e 100644 --- a/parser_test.go +++ b/parser_test.go @@ -1,4 +1,4 @@ -package git_diff_parser_test +package git_diff_parser import ( "embed" @@ -9,8 +9,6 @@ import ( "strings" "testing" - git_diff_parser "github.com/speakeasy-api/git-diff-parser" - "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -61,7 +59,7 @@ func TestParse(t *testing.T) { test := test t.Run(test.name, func(t *testing.T) { t.Parallel() - got, msg, err := git_diff_parser.SignificantChange(test.input, func(diff *git_diff_parser.FileDiff, change *git_diff_parser.ContentChange) (bool, string) { + got, msg, err := significantChange(test.input, func(diff *fileDiff, change *contentChange) (bool, string) { if diff.ToFile == "gen.yaml" || diff.ToFile == "RELEASES.md" { return false, "" } @@ -75,17 +73,17 @@ func TestParse(t *testing.T) { return false, "" } - if diff.Type == git_diff_parser.FileDiffTypeModified { + if diff.Type == fileDiffTypeModified { return true, fmt.Sprintf("significant diff %#v", diff) } - if change.Type == git_diff_parser.ContentChangeTypeNOOP { + if change.Type == contentChangeTypeNOOP { return false, "" } return true, fmt.Sprintf("significant change %#v in %s", change, diff.ToFile) }) require.NoError(t, err) - MatchMessageSnapshot(t, test.relativePath+".msg", msg) + MatchMessageSnapshot(t, test.relativePath+".msg", normalizeSnapshotTypes(msg)) assert.Equal(t, test.want, got) }) } @@ -110,14 +108,14 @@ new mode 100755 \ No newline at end of file ` - parsed, errs := git_diff_parser.Parse(diff) + parsed, errs := parse(diff) require.Empty(t, errs) require.Len(t, parsed.FileDiff, 1) fileDiff := parsed.FileDiff[0] assert.Equal(t, "src.txt", fileDiff.FromFile) assert.Equal(t, "dst.txt", fileDiff.ToFile) - assert.Equal(t, git_diff_parser.FileDiffTypeModified, fileDiff.Type) + assert.Equal(t, fileDiffTypeModified, fileDiff.Type) assert.Equal(t, "1234567", fileDiff.IndexOld) assert.Equal(t, "89abcde", fileDiff.IndexNew) assert.Equal(t, "100755", fileDiff.IndexMode) @@ -168,3 +166,13 @@ func MatchMessageSnapshot(t *testing.T, snapshotName string, content string) { require.NoError(t, err) require.Equal(t, string(f), content) } + +func normalizeSnapshotTypes(content string) string { + replacer := strings.NewReplacer( + "git_diff_parser.contentChange", "git_diff_parser.ContentChange", + "git_diff_parser.changeList", "git_diff_parser.ChangeList", + "git_diff_parser.hunk", "git_diff_parser.Hunk", + "git_diff_parser.binaryPatch", "git_diff_parser.BinaryPatch", + ) + return replacer.Replace(content) +} diff --git a/patchset.go b/patchset.go index f91f8a8..d826f2c 100644 --- a/patchset.go +++ b/patchset.go @@ -8,54 +8,54 @@ import ( ) var ( - ErrPatchCreate = errors.New("patch creates are not supported") - ErrPatchDelete = errors.New("patch deletes are not supported") - ErrPatchRename = errors.New("patch renames are not supported") - ErrPatchModeChange = errors.New("patch mode changes are not supported") - ErrPatchBinary = errors.New("binary patches are not supported") + errPatchCreate = errors.New("patch creates are not supported") + errPatchDelete = errors.New("patch deletes are not supported") + errPatchRename = errors.New("patch renames are not supported") + errPatchModeChange = errors.New("patch mode changes are not supported") + errPatchBinary = errors.New("binary patches are not supported") ) -type PatchsetOperation string +type patchsetOperation string const ( - PatchsetOperationCreate PatchsetOperation = "create" - PatchsetOperationDelete PatchsetOperation = "delete" - PatchsetOperationRename PatchsetOperation = "rename" - PatchsetOperationCopy PatchsetOperation = "copy" - PatchsetOperationModeChange PatchsetOperation = "mode change" - PatchsetOperationBinary PatchsetOperation = "binary" + patchsetOperationCreate patchsetOperation = "create" + patchsetOperationDelete patchsetOperation = "delete" + patchsetOperationRename patchsetOperation = "rename" + patchsetOperationCopy patchsetOperation = "copy" + patchsetOperationModeChange patchsetOperation = "mode change" + patchsetOperationBinary patchsetOperation = "binary" ) -type UnsupportedPatchError struct { - Operation PatchsetOperation +type unsupportedPatchError struct { + Operation patchsetOperation Path string From string To string } -func (e *UnsupportedPatchError) Error() string { +func (e *unsupportedPatchError) Error() string { switch e.Operation { - case PatchsetOperationCreate: + case patchsetOperationCreate: if e.Path != "" { return fmt.Sprintf("patch creates are not supported for %q", e.Path) } return "patch creates are not supported" - case PatchsetOperationDelete: + case patchsetOperationDelete: if e.Path != "" { return fmt.Sprintf("patch deletes are not supported for %q", e.Path) } return "patch deletes are not supported" - case PatchsetOperationRename: + case patchsetOperationRename: if e.From != "" || e.To != "" { return fmt.Sprintf("patch renames are not supported: %q -> %q", e.From, e.To) } return "patch renames are not supported" - case PatchsetOperationModeChange: + case patchsetOperationModeChange: if e.Path != "" { return fmt.Sprintf("patch mode changes are not supported for %q", e.Path) } return "patch mode changes are not supported" - case PatchsetOperationBinary: + case patchsetOperationBinary: if e.Path != "" { return fmt.Sprintf("binary patches are not supported for %q", e.Path) } @@ -65,57 +65,57 @@ func (e *UnsupportedPatchError) Error() string { } } -func (e *UnsupportedPatchError) Is(target error) bool { +func (e *unsupportedPatchError) Is(target error) bool { switch target { - case ErrPatchCreate: - return e.Operation == PatchsetOperationCreate - case ErrPatchDelete: - return e.Operation == PatchsetOperationDelete - case ErrPatchRename: - return e.Operation == PatchsetOperationRename - case ErrPatchModeChange: - return e.Operation == PatchsetOperationModeChange - case ErrPatchBinary: - return e.Operation == PatchsetOperationBinary + case errPatchCreate: + return e.Operation == patchsetOperationCreate + case errPatchDelete: + return e.Operation == patchsetOperationDelete + case errPatchRename: + return e.Operation == patchsetOperationRename + case errPatchModeChange: + return e.Operation == patchsetOperationModeChange + case errPatchBinary: + return e.Operation == patchsetOperationBinary default: return false } } -type Patchset struct { - Files []PatchsetFile +type patchset struct { + Files []patchsetFile } -type PatchsetFile struct { - Diff FileDiff +type patchsetFile struct { + Diff fileDiff Patch []byte } -func ParsePatchset(patchData []byte) (Patchset, []error) { - parsed, errs := Parse(string(patchData)) +func parsePatchset(patchData []byte) (patchset, []error) { + parsed, errs := parse(string(patchData)) if len(errs) > 0 { - return Patchset{}, errs + return patchset{}, errs } chunks := splitPatchsetChunks(patchData) if len(chunks) != len(parsed.FileDiff) { - return Patchset{}, []error{ + return patchset{}, []error{ fmt.Errorf("parsed %d file diffs but split %d patch fragments", len(parsed.FileDiff), len(chunks)), } } - files := make([]PatchsetFile, len(chunks)) + files := make([]patchsetFile, len(chunks)) for i := range chunks { - files[i] = PatchsetFile{ + files[i] = patchsetFile{ Diff: parsed.FileDiff[i], Patch: chunks[i], } } - return Patchset{Files: files}, nil + return patchset{Files: files}, nil } -func (p Patchset) Apply(tree map[string][]byte) (map[string][]byte, error) { +func (p patchset) apply(tree map[string][]byte) (map[string][]byte, error) { out := cloneTree(tree) for _, file := range p.Files { if err := applyPatchsetFile(out, file); err != nil { @@ -125,12 +125,12 @@ func (p Patchset) Apply(tree map[string][]byte) (map[string][]byte, error) { return out, nil } -func ApplyPatchset(tree map[string][]byte, patchData []byte) (map[string][]byte, error) { - patchset, errs := ParsePatchset(patchData) +func applyPatchset(tree map[string][]byte, patchData []byte) (map[string][]byte, error) { + patchset, errs := parsePatchset(patchData) if len(errs) > 0 { return nil, fmt.Errorf("unsupported patch syntax: %w", errs[0]) } - return patchset.Apply(tree) + return patchset.apply(tree) } func cloneTree(tree map[string][]byte) map[string][]byte { diff --git a/patchset_apply.go b/patchset_apply.go index 7f446e5..b613fc7 100644 --- a/patchset_apply.go +++ b/patchset_apply.go @@ -2,23 +2,23 @@ package git_diff_parser import "fmt" -const patchsetOperationModify PatchsetOperation = "modify" +const patchsetOperationModify patchsetOperation = "modify" -func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { +func applyPatchsetFile(tree map[string][]byte, file patchsetFile) error { if file.Diff.IsBinary { - return &UnsupportedPatchError{ - Operation: PatchsetOperationBinary, + return &unsupportedPatchError{ + Operation: patchsetOperationBinary, Path: firstNonEmpty(file.Diff.ToFile, file.Diff.FromFile), } } - op, sourcePath, targetPath, err := patchsetOperation(tree, file.Diff) + op, sourcePath, targetPath, err := determinePatchsetOperation(tree, file.Diff) if err != nil { return err } switch op { - case PatchsetOperationCreate: + case patchsetOperationCreate: if _, exists := tree[targetPath]; exists { return fmt.Errorf("cannot create existing file %q", targetPath) } @@ -28,7 +28,7 @@ func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { } tree[targetPath] = append([]byte(nil), content...) return nil - case PatchsetOperationDelete: + case patchsetOperationDelete: content, exists := tree[sourcePath] if !exists { return fmt.Errorf("cannot delete missing file %q", sourcePath) @@ -40,7 +40,7 @@ func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { } delete(tree, sourcePath) return nil - case PatchsetOperationRename: + case patchsetOperationRename: content, exists := tree[sourcePath] if !exists { return fmt.Errorf("cannot rename missing file %q", sourcePath) @@ -57,7 +57,7 @@ func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { delete(tree, sourcePath) tree[targetPath] = append([]byte(nil), applied...) return nil - case PatchsetOperationCopy: + case patchsetOperationCopy: content, exists := tree[sourcePath] if !exists { return fmt.Errorf("cannot copy missing file %q", sourcePath) @@ -71,7 +71,7 @@ func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { } tree[targetPath] = append([]byte(nil), applied...) return nil - case PatchsetOperationModeChange, patchsetOperationModify: + case patchsetOperationModeChange, patchsetOperationModify: content, exists := tree[targetPath] if !exists { return fmt.Errorf("cannot modify missing file %q", targetPath) @@ -87,40 +87,40 @@ func applyPatchsetFile(tree map[string][]byte, file PatchsetFile) error { } } -func patchsetOperation(tree map[string][]byte, fileDiff FileDiff) (PatchsetOperation, string, string, error) { +func determinePatchsetOperation(tree map[string][]byte, fileDiff fileDiff) (patchsetOperation, string, string, error) { sourcePath, targetPath := patchsetPaths(fileDiff) switch { case fileDiff.RenameFrom != "" || fileDiff.RenameTo != "": - return PatchsetOperationRename, sourcePath, targetPath, nil + return patchsetOperationRename, sourcePath, targetPath, nil case fileDiff.CopyFrom != "" || fileDiff.CopyTo != "": - return PatchsetOperationCopy, sourcePath, targetPath, nil - case fileDiff.Type == FileDiffTypeAdded: - return PatchsetOperationCreate, "", targetPath, nil - case fileDiff.Type == FileDiffTypeDeleted: - return PatchsetOperationDelete, sourcePath, "", nil + return patchsetOperationCopy, sourcePath, targetPath, nil + case fileDiff.Type == fileDiffTypeAdded: + return patchsetOperationCreate, "", targetPath, nil + case fileDiff.Type == fileDiffTypeDeleted: + return patchsetOperationDelete, sourcePath, "", nil } if fileDiff.NewMode != "" && fileDiff.OldMode == "" { if _, exists := tree[targetPath]; exists { return "", "", "", fmt.Errorf("cannot create existing file %q", targetPath) } - return PatchsetOperationCreate, "", targetPath, nil + return patchsetOperationCreate, "", targetPath, nil } if fileDiff.OldMode != "" || fileDiff.NewMode != "" { - return PatchsetOperationModeChange, sourcePath, targetPath, nil + return patchsetOperationModeChange, sourcePath, targetPath, nil } return patchsetOperationModify, sourcePath, targetPath, nil } -func patchsetPaths(fileDiff FileDiff) (string, string) { +func patchsetPaths(fileDiff fileDiff) (string, string) { sourcePath := firstNonEmpty(fileDiff.RenameFrom, fileDiff.CopyFrom, fileDiff.FromFile, fileDiff.ToFile) targetPath := firstNonEmpty(fileDiff.RenameTo, fileDiff.CopyTo, fileDiff.ToFile, fileDiff.FromFile) return sourcePath, targetPath } -func applyPatchsetContent(pristine []byte, file PatchsetFile) ([]byte, error) { +func applyPatchsetContent(pristine []byte, file patchsetFile) ([]byte, error) { if len(file.Diff.Hunks) == 0 { return append([]byte(nil), pristine...), nil } @@ -130,7 +130,7 @@ func applyPatchsetContent(pristine []byte, file PatchsetFile) ([]byte, error) { hunks = append(hunks, patchHunkFromHunk(hunk)) } - result, err := NewPatchApply(ApplyOptions{Mode: ApplyModeApply}).applyValidatedPatch(pristine, validatedPatch{ + result, err := newPatchApply(applyOptions{Mode: applyModeApply}).applyValidatedPatch(pristine, validatedPatch{ rejectHead: formatRejectHeader(file.Diff), hunks: hunks, }) diff --git a/patchset_test.go b/patchset_test.go index 99ea3fe..d15f74d 100644 --- a/patchset_test.go +++ b/patchset_test.go @@ -1,10 +1,9 @@ -package git_diff_parser_test +package git_diff_parser import ( "path/filepath" "testing" - git_diff_parser "github.com/speakeasy-api/git-diff-parser" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -16,7 +15,7 @@ func TestParsePatchset(t *testing.T) { patchB := buildPatch(t, "beta.txt", []byte("beta\none\n"), []byte("beta\ntwo\n")) patchsetData := append(append([]byte{}, patchA...), patchB...) - patchset, errs := git_diff_parser.ParsePatchset(patchsetData) + patchset, errs := parsePatchset(patchsetData) require.Empty(t, errs) require.Len(t, patchset.Files, 2) @@ -39,7 +38,7 @@ func TestPatchsetApply_MultipleFiles(t *testing.T) { patchB := buildPatch(t, "beta.txt", original["beta.txt"], []byte("beta\ntwo\n")) patchsetData := append(append([]byte{}, patchA...), patchB...) - applied, err := git_diff_parser.ApplyPatchset(original, patchsetData) + applied, err := applyPatchset(original, patchsetData) require.NoError(t, err) assert.Equal(t, []byte("alpha\ntwo\n"), applied["alpha.txt"]) @@ -127,7 +126,7 @@ new mode 100755 t.Parallel() original := cloneTestTree(test.tree) - applied, err := git_diff_parser.ApplyPatchset(test.tree, test.patch) + applied, err := applyPatchset(test.tree, test.patch) require.NoError(t, err) assert.Equal(t, test.wantTree, applied) assert.Equal(t, original, test.tree) @@ -158,7 +157,7 @@ rename to dst.txt } original := cloneTestTree(tree) - applied, err := git_diff_parser.ApplyPatchset(tree, patchsetData) + applied, err := applyPatchset(tree, patchsetData) require.Error(t, err) assert.Nil(t, applied) assert.Equal(t, original, tree) @@ -168,16 +167,16 @@ rename to dst.txt func TestPatchsetApply_RejectsBinaryPatches(t *testing.T) { t.Parallel() - _, err := git_diff_parser.ApplyPatchset( + _, err := applyPatchset( map[string][]byte{"favicon-16x16-light.png": []byte("binary")}, mustReadFile(t, filepath.Join("testdata", "significant", "binary-delta.diff")), ) require.Error(t, err) assert.Contains(t, err.Error(), "binary patches are not supported") - var unsupportedErr *git_diff_parser.UnsupportedPatchError + var unsupportedErr *unsupportedPatchError require.ErrorAs(t, err, &unsupportedErr) - assert.ErrorIs(t, err, git_diff_parser.ErrPatchBinary) + assert.ErrorIs(t, err, errPatchBinary) } func cloneTestTree(tree map[string][]byte) map[string][]byte { From 31b932dced697e9da8fee4c568d51a62233ce9a9 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Mon, 30 Mar 2026 12:37:00 +0100 Subject: [PATCH 16/20] feat: tighten apply parity coverage --- apply.go | 12 +- apply_internal_test.go | 59 ++++++- apply_options.go | 13 +- apply_session.go | 162 ++++++++++++++++-- apply_test.go | 103 ++++++++--- parity_test.go | 22 ++- patchset_test.go | 98 +++++++++++ .../fixture.json | 5 + .../out | 4 + .../patch | 8 + .../src | 4 + .../parity/context-reduced-both/fixture.json | 5 + testdata/parity/context-reduced-both/out | 7 + testdata/parity/context-reduced-both/patch | 10 ++ testdata/parity/context-reduced-both/src | 7 + .../fixture.json | 5 + .../context-reduced-end-anchor-clear/out | 4 + .../context-reduced-end-anchor-clear/patch | 8 + .../context-reduced-end-anchor-clear/src | 4 + .../context-reduced-leading/fixture.json | 5 + testdata/parity/context-reduced-leading/out | 7 + testdata/parity/context-reduced-leading/patch | 10 ++ testdata/parity/context-reduced-leading/src | 7 + .../context-reduced-trailing/fixture.json | 5 + testdata/parity/context-reduced-trailing/out | 7 + .../parity/context-reduced-trailing/patch | 10 ++ testdata/parity/context-reduced-trailing/src | 7 + 27 files changed, 554 insertions(+), 44 deletions(-) create mode 100644 testdata/parity/context-reduced-beginning-anchor-clear/fixture.json create mode 100644 testdata/parity/context-reduced-beginning-anchor-clear/out create mode 100644 testdata/parity/context-reduced-beginning-anchor-clear/patch create mode 100644 testdata/parity/context-reduced-beginning-anchor-clear/src create mode 100644 testdata/parity/context-reduced-both/fixture.json create mode 100644 testdata/parity/context-reduced-both/out create mode 100644 testdata/parity/context-reduced-both/patch create mode 100644 testdata/parity/context-reduced-both/src create mode 100644 testdata/parity/context-reduced-end-anchor-clear/fixture.json create mode 100644 testdata/parity/context-reduced-end-anchor-clear/out create mode 100644 testdata/parity/context-reduced-end-anchor-clear/patch create mode 100644 testdata/parity/context-reduced-end-anchor-clear/src create mode 100644 testdata/parity/context-reduced-leading/fixture.json create mode 100644 testdata/parity/context-reduced-leading/out create mode 100644 testdata/parity/context-reduced-leading/patch create mode 100644 testdata/parity/context-reduced-leading/src create mode 100644 testdata/parity/context-reduced-trailing/fixture.json create mode 100644 testdata/parity/context-reduced-trailing/out create mode 100644 testdata/parity/context-reduced-trailing/patch create mode 100644 testdata/parity/context-reduced-trailing/src diff --git a/apply.go b/apply.go index cac58cc..9fc52d8 100644 --- a/apply.go +++ b/apply.go @@ -103,8 +103,12 @@ func fileDiffHasChanges(fileDiff fileDiff) bool { } func desiredLines(hunk patchHunk) []fileLine { + return desiredLinesWindow(hunk, 0, len(hunk.lines)) +} + +func desiredLinesWindow(hunk patchHunk, start, end int) []fileLine { lines := make([]fileLine, 0, len(hunk.lines)) - for _, line := range hunk.lines { + for _, line := range hunk.lines[start:end] { if line.kind == ' ' || line.kind == '+' { lines = append(lines, fileLine{text: line.text, hasNewline: line.hasNewline, eofMarker: line.newEOF}) } @@ -113,8 +117,12 @@ func desiredLines(hunk patchHunk) []fileLine { } func preimageLines(hunk patchHunk) []fileLine { + return preimageLinesWindow(hunk, 0, len(hunk.lines)) +} + +func preimageLinesWindow(hunk patchHunk, start, end int) []fileLine { lines := make([]fileLine, 0, len(hunk.lines)) - for _, line := range hunk.lines { + for _, line := range hunk.lines[start:end] { if line.kind == ' ' || line.kind == '-' { lines = append(lines, fileLine{text: line.text, hasNewline: line.hasNewline, eofMarker: line.oldEOF}) } diff --git a/apply_internal_test.go b/apply_internal_test.go index 6ac7569..5338790 100644 --- a/apply_internal_test.go +++ b/apply_internal_test.go @@ -48,7 +48,64 @@ func TestFindPosForFragmentMatchesExactBlock(t *testing.T) { {text: "alpha", hasNewline: true}, {text: "bravo", hasNewline: true}, {text: "charlie", hasNewline: true}, - }) + }, false, false) + require.True(t, matched) + assert.Equal(t, 1, match) +} + +func TestFindPosWithMinContextReducesLeadingContext(t *testing.T) { + session := &applySession{ + applier: &patchApply{options: applyOptions{MinContext: 1, MinContextSet: true}}, + sourceLines: splitFileLines([]byte("a0\nA1\na2\na3\na4\na5\na6\n")), + patched: make([]bool, 7), + } + hunk := patchHunk{ + oldStart: 2, + oldCount: 5, + newStart: 2, + newCount: 5, + lines: []patchLine{ + {kind: ' ', text: "a1", hasNewline: true}, + {kind: ' ', text: "a2", hasNewline: true}, + {kind: '-', text: "a3", hasNewline: true}, + {kind: '+', text: "A3", hasNewline: true}, + {kind: ' ', text: "a4", hasNewline: true}, + {kind: ' ', text: "a5", hasNewline: true}, + }, + } + + match, matched := session.findPos(hunk) + require.True(t, matched) + assert.Equal(t, 2, match.sourceStart) + assert.Equal(t, 5, match.sourceEnd) + assert.Equal(t, 1, match.hunkStart) + assert.Equal(t, 5, match.hunkEnd) +} + +func TestFindPosForFragmentRejectsPatchedRangesWithoutOverlap(t *testing.T) { + session := &applySession{ + sourceLines: splitFileLines([]byte("zero\nalpha\nbravo\ncharlie\n")), + patched: []bool{false, true, true, false}, + } + + _, matched := session.findPosForFragment(1, []fileLine{ + {text: "alpha", hasNewline: true}, + {text: "bravo", hasNewline: true}, + }, false, false) + assert.False(t, matched) +} + +func TestFindPosForFragmentAllowsPatchedRangesWithOverlap(t *testing.T) { + session := &applySession{ + applier: &patchApply{options: applyOptions{AllowOverlap: true}}, + sourceLines: splitFileLines([]byte("zero\nalpha\nbravo\ncharlie\n")), + patched: []bool{false, true, true, false}, + } + + match, matched := session.findPosForFragment(1, []fileLine{ + {text: "alpha", hasNewline: true}, + {text: "bravo", hasNewline: true}, + }, false, false) require.True(t, matched) assert.Equal(t, 1, match) } diff --git a/apply_options.go b/apply_options.go index a19dc59..d3adb33 100644 --- a/apply_options.go +++ b/apply_options.go @@ -1,5 +1,7 @@ package git_diff_parser +import "math" + // applyMode controls how the apply engine treats hunks that cannot be placed // directly into the target content. type applyMode int @@ -23,6 +25,9 @@ type applyOptions struct { Mode applyMode ConflictLabels conflictLabels IgnoreWhitespace bool + AllowOverlap bool + MinContext int + MinContextSet bool Reverse bool UnidiffZero bool Recount bool @@ -31,7 +36,8 @@ type applyOptions struct { func defaultApplyOptions() applyOptions { return applyOptions{ - Mode: applyModeMerge, + Mode: applyModeMerge, + MinContext: math.MaxInt, ConflictLabels: conflictLabels{ Current: "Current", Incoming: "Incoming patch", @@ -54,12 +60,17 @@ func (o applyOptions) normalize() applyOptions { } if o.Mode == applyModeMerge { defaults := defaultApplyOptions() + if !o.MinContextSet { + o.MinContext = defaults.MinContext + } if o.ConflictLabels.Current == "" { o.ConflictLabels.Current = defaults.ConflictLabels.Current } if o.ConflictLabels.Incoming == "" { o.ConflictLabels.Incoming = defaults.ConflictLabels.Incoming } + } else if !o.MinContextSet { + o.MinContext = defaultApplyOptions().MinContext } return o } diff --git a/apply_session.go b/apply_session.go index ff85fd0..a00c593 100644 --- a/apply_session.go +++ b/apply_session.go @@ -10,6 +10,7 @@ type validatedPatch struct { type applySession struct { applier *patchApply sourceLines []fileLine + patched []bool image []fileLine cursor int conflicts []applyConflict @@ -18,6 +19,7 @@ type applySession struct { type matchedHunk struct { sourceStart int + sourceEnd int hunkStart int hunkEnd int } @@ -57,6 +59,7 @@ func (p *patchApply) newApplySession(pristine []byte) *applySession { return &applySession{ applier: p, sourceLines: sourceLines, + patched: make([]bool, len(sourceLines)), image: make([]fileLine, 0, len(sourceLines)), } } @@ -96,6 +99,12 @@ func (s *applySession) applyHunk(hunk patchHunk, match matchedHunk) { s.image = append(s.image, fileLine{text: hunkLine.text, hasNewline: hunkLine.hasNewline, eofMarker: hunkLine.newEOF}) } } + + if !s.allowOverlap() { + for i := match.sourceStart; i < match.sourceEnd && i < len(s.patched); i++ { + s.patched[i] = true + } + } } func (s *applySession) appendConflictingHunk(hunk patchHunk) { @@ -148,31 +157,78 @@ func (s *applySession) findPos(hunk patchHunk) (matchedHunk, bool) { return matchedHunk{}, false } - preimage := preimageLines(hunk) - if pos, ok := s.findPosForFragment(preferred, preimage); ok { - return matchedHunk{ - sourceStart: pos, - hunkStart: 0, - hunkEnd: len(hunk.lines), - }, true + matchBeginning := hunk.oldStart == 0 || (hunk.oldStart == 1 && !s.unidiffZero()) + leading, trailing := hunkContext(hunk.lines) + matchEnd := !s.unidiffZero() && trailing == 0 + + hunkStart := 0 + hunkEnd := len(hunk.lines) + + for { + preimage := preimageLinesWindow(hunk, hunkStart, hunkEnd) + if pos, ok := s.findPosForFragment(preferred, preimage, matchBeginning, matchEnd); ok { + return matchedHunk{ + sourceStart: pos, + sourceEnd: pos + len(preimage), + hunkStart: hunkStart, + hunkEnd: hunkEnd, + }, true + } + + if leading <= s.minContext() && trailing <= s.minContext() { + break + } + if matchBeginning || matchEnd { + matchBeginning = false + matchEnd = false + continue + } + if leading >= trailing && hunkStart < hunkEnd { + hunkStart++ + preferred-- + if preferred < s.cursor { + preferred = s.cursor + } + leading-- + } + if trailing > leading && hunkStart < hunkEnd { + hunkEnd-- + trailing-- + } } return matchedHunk{}, false } -func (s *applySession) findPosForFragment(preferred int, fragment []fileLine) (int, bool) { +func (s *applySession) findPosForFragment(preferred int, fragment []fileLine, matchBeginning, matchEnd bool) (int, bool) { + maxStart := s.sourceContentLines() - len(fragment) + if maxStart < 0 { + maxStart = s.sourceContentLines() + } + if matchBeginning { + preferred = 0 + } else if matchEnd { + preferred = maxStart + } + if preferred > maxStart { + preferred = maxStart + } + if preferred < s.cursor { + preferred = s.cursor + } + for offset := 0; ; offset++ { left := preferred - offset - if left >= s.cursor && matchFragment(s.sourceLines, left, fragment, s.ignoreWhitespace()) { + if left >= s.cursor && s.matchFragmentAt(left, fragment, matchBeginning, matchEnd) { return left, true } right := preferred + offset - if offset > 0 && right >= s.cursor && matchFragment(s.sourceLines, right, fragment, s.ignoreWhitespace()) { + if offset > 0 && right >= s.cursor && s.matchFragmentAt(right, fragment, matchBeginning, matchEnd) { return right, true } - if left < s.cursor && right > len(s.sourceLines) { + if left < s.cursor && right > maxStart { break } } @@ -180,6 +236,35 @@ func (s *applySession) findPosForFragment(preferred int, fragment []fileLine) (i return 0, false } +func (s *applySession) matchFragmentAt(start int, fragment []fileLine, matchBeginning, matchEnd bool) bool { + if matchBeginning && start != 0 { + return false + } + if start < 0 { + return false + } + if len(fragment) == 0 { + if matchEnd { + return start == s.sourceContentLines() + } + return start <= s.sourceContentLines() + } + if start+len(fragment) > len(s.sourceLines) { + return false + } + if matchEnd && start+len(fragment) != s.sourceContentLines() { + return false + } + if !s.allowOverlap() { + for i := start; i < start+len(fragment); i++ { + if i < len(s.patched) && s.patched[i] { + return false + } + } + } + return matchFragment(s.sourceLines, start, fragment, s.ignoreWhitespace()) +} + func patchHunkFromHunk(hunk hunk) patchHunk { lines := make([]patchLine, 0, len(hunk.Lines)) for _, line := range hunk.Lines { @@ -226,3 +311,58 @@ func formatPatchHunkRange(start, count int) string { func (s *applySession) ignoreWhitespace() bool { return s.applier != nil && s.applier.options.IgnoreWhitespace } + +func (s *applySession) allowOverlap() bool { + return s.applier != nil && s.applier.options.AllowOverlap +} + +func (s *applySession) minContext() int { + if s.applier == nil { + return 0 + } + return s.applier.options.MinContext +} + +func (s *applySession) unidiffZero() bool { + return s.applier != nil && s.applier.options.UnidiffZero +} + +func (s *applySession) sourceContentLines() int { + if n := len(s.sourceLines); n > 0 && s.sourceLines[n-1].eofMarker { + return n - 1 + } + return len(s.sourceLines) +} + +func hunkContext(lines []patchLine) (int, int) { + firstChange := len(lines) + lastChange := -1 + for i, line := range lines { + if line.kind == '+' || line.kind == '-' { + if firstChange == len(lines) { + firstChange = i + } + lastChange = i + } + } + + if lastChange < 0 { + return len(lines), len(lines) + } + + leading := 0 + for i := 0; i < firstChange; i++ { + if lines[i].kind == ' ' { + leading++ + } + } + + trailing := 0 + for i := len(lines) - 1; i > lastChange; i-- { + if lines[i].kind == ' ' { + trailing++ + } + } + + return leading, trailing +} diff --git a/apply_test.go b/apply_test.go index 8532913..9495af5 100644 --- a/apply_test.go +++ b/apply_test.go @@ -206,31 +206,70 @@ func TestApplyFile_BoundaryCases(t *testing.T) { original := []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n") tests := []struct { - name string - want []byte + name string + want []byte + requiresUnidiff0 bool }{ - {name: "add head", want: []byte("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, - {name: "insert second", want: []byte("b\na\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, - {name: "modify head", want: []byte("a\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, - {name: "delete head", want: []byte("c\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n")}, + {name: "add head", want: []byte("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "insert second", want: []byte("b\na\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "modify head", want: []byte("a\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "delete head", want: []byte("c\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, {name: "add tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\n")}, {name: "modify tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\nz\n")}, {name: "delete tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\n")}, } - for _, context := range []int{3, 0} { - context := context - for _, test := range tests { - test := test - t.Run(test.name+" context "+contextLabel(context), func(t *testing.T) { - t.Parallel() + for _, test := range tests { + test := test + t.Run(test.name+" context "+contextLabel(3), func(t *testing.T) { + t.Parallel() + + patch := buildPatchWithContext(t, "victim", original, test.want, 3) + applied, err := ApplyFile(original, patch) + require.NoError(t, err) + assert.Equal(t, test.want, applied) + }) + } +} + +func TestApplyFileWithOptions_ZeroContextBoundaryCasesRequireUnidiffZero(t *testing.T) { + t.Parallel() + + original := []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n") + tests := []struct { + name string + want []byte + requiresUnidiff0 bool + }{ + {name: "add head", want: []byte("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "insert second", want: []byte("b\na\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "modify head", want: []byte("a\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "delete head", want: []byte("c\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n"), requiresUnidiff0: true}, + {name: "add tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\nz\n")}, + {name: "modify tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\nz\n")}, + {name: "delete tail", want: []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\n")}, + } - patch := buildPatchWithContext(t, "victim", original, test.want, context) - applied, err := ApplyFile(original, patch) + for _, test := range tests { + test := test + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + patch := buildPatchWithContext(t, "victim", original, test.want, 0) + baseline, err := applyFileWithOptions(original, patch, applyOptions{}) + if test.requiresUnidiff0 { + require.Error(t, err) + } else { require.NoError(t, err) - assert.Equal(t, test.want, applied) + assert.Equal(t, test.want, baseline.Content) + } + + applied, err := applyFileWithOptions(original, patch, applyOptions{ + UnidiffZero: true, }) - } + require.NoError(t, err) + assert.Equal(t, test.want, applied.Content) + }) } } @@ -322,9 +361,14 @@ func TestApplyFile_EmptyContextPatches(t *testing.T) { t.Parallel() patch := buildPatchWithContext(t, "file", test.original, test.target, 0) - applied, err := ApplyFile(test.original, patch) + _, err := applyFileWithOptions(test.original, patch, applyOptions{}) + require.Error(t, err) + + applied, err := applyFileWithOptions(test.original, patch, applyOptions{ + UnidiffZero: true, + }) require.NoError(t, err) - assert.Equal(t, test.target, applied) + assert.Equal(t, test.target, applied.Content) }) } } @@ -371,23 +415,29 @@ func TestApplyFile_EmptyContextNoTrailingNewlinePatches(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() - applied, err := ApplyFile(test.original, test.patch) + applied, err := applyFileWithOptions(test.original, test.patch, applyOptions{}) require.NoError(t, err) - assert.Equal(t, test.target, applied) + assert.Equal(t, test.target, applied.Content) }) } } -func TestApplyFile_RelocatesHunkWhenContextStillMatches(t *testing.T) { +func TestApplyFileWithOptions_ReducesContextToRelocateHunk(t *testing.T) { t.Parallel() originalPristine := []byte("package testsdk\n\ntype Status struct{}\n") patchData := buildPatch(t, "status.go", originalPristine, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) shiftedPristine := []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n") - applied, err := ApplyFile(shiftedPristine, patchData) + _, err := applyFileWithOptions(shiftedPristine, patchData, applyOptions{}) + require.Error(t, err) + + applied, err := applyFileWithOptions(shiftedPristine, patchData, applyOptions{ + MinContext: 1, + MinContextSet: true, + }) require.NoError(t, err) - assert.Equal(t, []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n"), applied) + assert.Equal(t, []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n"), applied.Content) } func TestApplyFile_RelocatesToNearestMatchingBlock(t *testing.T) { @@ -567,15 +617,14 @@ func TestApplyFileWithOptions_UnidiffZeroIsAccepted(t *testing.T) { -beta `) - baseline, err := applyFileWithOptions(current, patchData, applyOptions{}) - require.NoError(t, err) - assert.Equal(t, []byte("alpha\ngamma\n"), baseline.Content) + _, err := applyFileWithOptions(current, patchData, applyOptions{}) + require.Error(t, err) applied, err := applyFileWithOptions(current, patchData, applyOptions{ UnidiffZero: true, }) require.NoError(t, err) - assert.Equal(t, baseline.Content, applied.Content) + assert.Equal(t, []byte("alpha\ngamma\n"), applied.Content) } func TestApplyFileWithOptions_RecountRebuildsHunkCounts(t *testing.T) { diff --git a/parity_test.go b/parity_test.go index 8cf6ca2..d803731 100644 --- a/parity_test.go +++ b/parity_test.go @@ -11,6 +11,7 @@ import ( "os/exec" "path/filepath" "sort" + "strconv" "strings" "testing" @@ -133,9 +134,14 @@ func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (applyResult, options := defaultApplyOptions() options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") + options.UnidiffZero = fixtureHasGitArg(tc.fixture, "--unidiff-zero") if rejectMode { options.Mode = applyModeApply } + if minContext, ok := fixtureContextArg(tc.fixture); ok { + options.MinContext = minContext + options.MinContextSet = true + } return applyFileWithOptions(tc.src, tc.patch, options) } @@ -156,6 +162,19 @@ func fixtureHasGitArg(fixture parityFixture, arg string) bool { return false } +func fixtureContextArg(fixture parityFixture) (int, bool) { + for _, candidate := range fixture.GitArgs { + if !strings.HasPrefix(candidate, "-C") || len(candidate) <= 2 { + continue + } + value, err := strconv.Atoi(strings.TrimPrefix(candidate, "-C")) + if err == nil { + return value, true + } + } + return 0, false +} + type gitApplyOracle struct { applied []byte tree parityTree @@ -205,8 +224,7 @@ func runGitApplyOracles(t *testing.T, tc parityCase, extraArgs ...string) gitApp oracles.tree = collectParityTree(t, dir) if len(output) > 0 && err == nil { - // git apply is quiet here; keep the command output surfaced only if it was unexpected. - assert.Empty(t, string(output)) + // git apply may emit successful warnings like context reduction; tree state is the oracle here. } return oracles diff --git a/patchset_test.go b/patchset_test.go index d15f74d..382fa85 100644 --- a/patchset_test.go +++ b/patchset_test.go @@ -164,6 +164,104 @@ rename to dst.txt assert.Contains(t, err.Error(), "missing file") } +func TestPatchsetApply_SameFilenameSequentialDiffs(t *testing.T) { + t.Parallel() + + patchData := []byte(`diff --git a/same_fn b/same_fn +--- a/same_fn ++++ b/same_fn +@@ -1,13 +1,13 @@ + a + b + c +-d ++z + e + f + g + h + i + j + k + l + m +diff --git a/same_fn b/same_fn +--- a/same_fn ++++ b/same_fn +@@ -1,13 +1,13 @@ + a + b + c + z +-e ++y + f + g + h + i + j + k + l + m +`) + + tree := map[string][]byte{ + "same_fn": []byte("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\n"), + } + + applied, err := applyPatchset(tree, patchData) + require.NoError(t, err) + assert.Equal(t, map[string][]byte{ + "same_fn": []byte("a\nb\nc\nz\ny\nf\ng\nh\ni\nj\nk\nl\nm\n"), + }, applied) +} + +func TestPatchsetApply_SameFilenameIndependentDiffs(t *testing.T) { + t.Parallel() + + patchData := []byte(`diff --git a/same_fn b/same_fn +--- a/same_fn ++++ b/same_fn +@@ -1,13 +1,13 @@ + a + b + c +-d ++z + e + f + g + h + i + j + k + l + m +diff --git a/same_fn b/same_fn +--- a/same_fn ++++ b/same_fn +@@ -6,8 +6,8 @@ f + g + h +-i ++y + j + k + l + m +`) + + tree := map[string][]byte{ + "same_fn": []byte("a\nb\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\n"), + } + + applied, err := applyPatchset(tree, patchData) + require.NoError(t, err) + assert.Equal(t, map[string][]byte{ + "same_fn": []byte("a\nb\nc\nz\ne\nf\ng\nh\ny\nj\nk\nl\nm\n"), + }, applied) +} + func TestPatchsetApply_RejectsBinaryPatches(t *testing.T) { t.Parallel() diff --git a/testdata/parity/context-reduced-beginning-anchor-clear/fixture.json b/testdata/parity/context-reduced-beginning-anchor-clear/fixture.json new file mode 100644 index 0000000..602dcdf --- /dev/null +++ b/testdata/parity/context-reduced-beginning-anchor-clear/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "-C1" + ] +} diff --git a/testdata/parity/context-reduced-beginning-anchor-clear/out b/testdata/parity/context-reduced-beginning-anchor-clear/out new file mode 100644 index 0000000..2ee876f --- /dev/null +++ b/testdata/parity/context-reduced-beginning-anchor-clear/out @@ -0,0 +1,4 @@ +banner +ALPHA +beta +gamma diff --git a/testdata/parity/context-reduced-beginning-anchor-clear/patch b/testdata/parity/context-reduced-beginning-anchor-clear/patch new file mode 100644 index 0000000..8640581 --- /dev/null +++ b/testdata/parity/context-reduced-beginning-anchor-clear/patch @@ -0,0 +1,8 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ +-alpha ++ALPHA + beta + gamma diff --git a/testdata/parity/context-reduced-beginning-anchor-clear/src b/testdata/parity/context-reduced-beginning-anchor-clear/src new file mode 100644 index 0000000..d9bdb0e --- /dev/null +++ b/testdata/parity/context-reduced-beginning-anchor-clear/src @@ -0,0 +1,4 @@ +banner +alpha +beta +gamma diff --git a/testdata/parity/context-reduced-both/fixture.json b/testdata/parity/context-reduced-both/fixture.json new file mode 100644 index 0000000..602dcdf --- /dev/null +++ b/testdata/parity/context-reduced-both/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "-C1" + ] +} diff --git a/testdata/parity/context-reduced-both/out b/testdata/parity/context-reduced-both/out new file mode 100644 index 0000000..de4be80 --- /dev/null +++ b/testdata/parity/context-reduced-both/out @@ -0,0 +1,7 @@ +a0 +A1 +a2 +A3 +a4 +A5 +a6 diff --git a/testdata/parity/context-reduced-both/patch b/testdata/parity/context-reduced-both/patch new file mode 100644 index 0000000..022366b --- /dev/null +++ b/testdata/parity/context-reduced-both/patch @@ -0,0 +1,10 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2,5 +2,5 @@ + a1 + a2 +-a3 ++A3 + a4 + a5 diff --git a/testdata/parity/context-reduced-both/src b/testdata/parity/context-reduced-both/src new file mode 100644 index 0000000..483bdf4 --- /dev/null +++ b/testdata/parity/context-reduced-both/src @@ -0,0 +1,7 @@ +a0 +A1 +a2 +a3 +a4 +A5 +a6 diff --git a/testdata/parity/context-reduced-end-anchor-clear/fixture.json b/testdata/parity/context-reduced-end-anchor-clear/fixture.json new file mode 100644 index 0000000..602dcdf --- /dev/null +++ b/testdata/parity/context-reduced-end-anchor-clear/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "-C1" + ] +} diff --git a/testdata/parity/context-reduced-end-anchor-clear/out b/testdata/parity/context-reduced-end-anchor-clear/out new file mode 100644 index 0000000..843507b --- /dev/null +++ b/testdata/parity/context-reduced-end-anchor-clear/out @@ -0,0 +1,4 @@ +alpha +beta +GAMMA +footer diff --git a/testdata/parity/context-reduced-end-anchor-clear/patch b/testdata/parity/context-reduced-end-anchor-clear/patch new file mode 100644 index 0000000..9f93935 --- /dev/null +++ b/testdata/parity/context-reduced-end-anchor-clear/patch @@ -0,0 +1,8 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ + alpha + beta +-gamma ++GAMMA diff --git a/testdata/parity/context-reduced-end-anchor-clear/src b/testdata/parity/context-reduced-end-anchor-clear/src new file mode 100644 index 0000000..9789145 --- /dev/null +++ b/testdata/parity/context-reduced-end-anchor-clear/src @@ -0,0 +1,4 @@ +alpha +beta +gamma +footer diff --git a/testdata/parity/context-reduced-leading/fixture.json b/testdata/parity/context-reduced-leading/fixture.json new file mode 100644 index 0000000..602dcdf --- /dev/null +++ b/testdata/parity/context-reduced-leading/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "-C1" + ] +} diff --git a/testdata/parity/context-reduced-leading/out b/testdata/parity/context-reduced-leading/out new file mode 100644 index 0000000..e452713 --- /dev/null +++ b/testdata/parity/context-reduced-leading/out @@ -0,0 +1,7 @@ +a0 +A1 +a2 +A3 +a4 +a5 +a6 diff --git a/testdata/parity/context-reduced-leading/patch b/testdata/parity/context-reduced-leading/patch new file mode 100644 index 0000000..022366b --- /dev/null +++ b/testdata/parity/context-reduced-leading/patch @@ -0,0 +1,10 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2,5 +2,5 @@ + a1 + a2 +-a3 ++A3 + a4 + a5 diff --git a/testdata/parity/context-reduced-leading/src b/testdata/parity/context-reduced-leading/src new file mode 100644 index 0000000..82da92e --- /dev/null +++ b/testdata/parity/context-reduced-leading/src @@ -0,0 +1,7 @@ +a0 +A1 +a2 +a3 +a4 +a5 +a6 diff --git a/testdata/parity/context-reduced-trailing/fixture.json b/testdata/parity/context-reduced-trailing/fixture.json new file mode 100644 index 0000000..602dcdf --- /dev/null +++ b/testdata/parity/context-reduced-trailing/fixture.json @@ -0,0 +1,5 @@ +{ + "gitArgs": [ + "-C1" + ] +} diff --git a/testdata/parity/context-reduced-trailing/out b/testdata/parity/context-reduced-trailing/out new file mode 100644 index 0000000..5d1a833 --- /dev/null +++ b/testdata/parity/context-reduced-trailing/out @@ -0,0 +1,7 @@ +a0 +a1 +a2 +A3 +a4 +A5 +a6 diff --git a/testdata/parity/context-reduced-trailing/patch b/testdata/parity/context-reduced-trailing/patch new file mode 100644 index 0000000..022366b --- /dev/null +++ b/testdata/parity/context-reduced-trailing/patch @@ -0,0 +1,10 @@ +diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -2,5 +2,5 @@ + a1 + a2 +-a3 ++A3 + a4 + a5 diff --git a/testdata/parity/context-reduced-trailing/src b/testdata/parity/context-reduced-trailing/src new file mode 100644 index 0000000..7ff9f6e --- /dev/null +++ b/testdata/parity/context-reduced-trailing/src @@ -0,0 +1,7 @@ +a0 +a1 +a2 +a3 +a4 +A5 +a6 From 2db47da253ca046e74a87e99d2fc164246b495c1 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 1 Apr 2026 10:46:26 +0100 Subject: [PATCH 17/20] chore: capability to apply patches --- .github/workflows/lint.yml | 28 +++++------ .github/workflows/tests.yml | 29 +++++------ .golangci.yaml | 96 ++++++++++++++++++++++++++++++++++--- Makefile | 9 +++- api_significance.go | 25 ++++++++++ apply.go | 13 ++--- apply_options.go | 20 +++++--- apply_render.go | 8 ++-- apply_session.go | 53 +++++++++++--------- apply_test.go | 44 ++++++++++++----- model.go | 54 ++++++++++++--------- parity_test.go | 7 ++- parser.go | 18 +++---- parser_test.go | 16 +++---- patchset.go | 4 +- patchset_apply.go | 22 ++++----- 16 files changed, 298 insertions(+), 148 deletions(-) create mode 100644 api_significance.go diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d088c35..db0e553 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,28 +1,26 @@ -name: lint +name: Lint on: push: branches: - main pull_request: -env: - GOPRIVATE: "github.com/speakeasy-api" + +permissions: + contents: read + jobs: - golangci: - name: lint + golangci-lint: + name: golangci-lint runs-on: ubuntu-latest steps: - - uses: actions/setup-go@v3 - with: - go-version: 1.21 + - uses: actions/checkout@v4 - - name: Configure git for private modules - env: - GIT_AUTH_TOKEN: ${{ secrets.BOT_REPO_TOKEN }} - run: git config --global url."https://speakeasybot:${GITHUB_TOKEN}@github.com".insteadOf "https://github.com" + - uses: actions/setup-go@v5 + with: + go-version-file: go.mod - - uses: actions/checkout@v3 - name: golangci-lint - uses: golangci/golangci-lint-action@v3 + uses: golangci/golangci-lint-action@v7 with: - version: v1.57.2 + version: v2.1.6 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 52fa341..545f77c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -5,27 +5,20 @@ on: branches: - main pull_request: -env: - GOPRIVATE: "github.com/speakeasy-api" -jobs: - tests: - runs-on: ubuntu-latest - - strategy: - fail-fast: true - matrix: - go-version: [1.21.x] - name: Tests - Go ${{ matrix.go-version }} +permissions: + contents: read +jobs: + test: + runs-on: ubuntu-latest + name: Tests steps: - - name: Checkout the code - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - - name: Install Go - uses: actions/setup-go@v2 + - uses: actions/setup-go@v5 with: - go-version: ${{ matrix.go-version }} + go-version-file: go.mod - - name: Run the tests - run: go test ./... \ No newline at end of file + - name: Run tests + run: go test -race -count=1 ./... diff --git a/.golangci.yaml b/.golangci.yaml index 199dfe9..dee5401 100644 --- a/.golangci.yaml +++ b/.golangci.yaml @@ -1,8 +1,92 @@ +version: "2" + run: - go: "1.19" -linters-settings: - tagliatelle: - case: - use-field-name: true + go: "1.21" + timeout: 5m + +formatters: + enable: + - gofumpt + + settings: + gofumpt: + extra-rules: true + +linters: + enable: + # Bugs / correctness + - govet + - staticcheck + - errcheck + - ineffassign + - bodyclose + - nilerr + - copyloopvar + + # Style / simplicity + - revive + - misspell + - unconvert + - unused + - nolintlint + - gocritic + - dupword + - usestdlibvars + + # Complexity + - gocyclo + - cyclop + + # Testing + - testifylint + + # Tags + - tagliatelle + + settings: + gocyclo: + min-complexity: 30 + + cyclop: + max-complexity: 30 + + gocritic: + enabled-tags: + - diagnostic + - style + - performance + + revive: rules: - json: snake + - name: blank-imports + - name: context-as-argument + - name: dot-imports + - name: error-naming + - name: error-return + - name: exported + disabled: true + - name: increment-decrement + - name: indent-error-flow + - name: range + - name: receiver-naming + - name: redefines-builtin-id + - name: superfluous-else + - name: unreachable-code + - name: unused-parameter + + tagliatelle: + case: + use-field-name: true + rules: + json: snake + + nolintlint: + require-explanation: true + require-specific: true + + testifylint: + enable-all: true + +issues: + max-issues-per-linter: 0 + max-same-issues: 0 diff --git a/Makefile b/Makefile index 925e94c..3d6c71a 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,10 @@ -.PHONY: * +.PHONY: lint fmt test lint: - gofumpt -l -w . golangci-lint run + +fmt: + gofumpt -l -w . + +test: + go test -race -count=1 ./... diff --git a/api_significance.go b/api_significance.go new file mode 100644 index 0000000..ce5878a --- /dev/null +++ b/api_significance.go @@ -0,0 +1,25 @@ +package git_diff_parser + +type ( + ContentChange = contentChange + ContentChangeType = contentChangeType + FileDiff = fileDiff + FileDiffType = fileDiffType +) + +const ( + ContentChangeTypeAdd = contentChangeTypeAdd + ContentChangeTypeDelete = contentChangeTypeDelete + ContentChangeTypeModify = contentChangeTypeModify + ContentChangeTypeNOOP = contentChangeTypeNOOP + + FileDiffTypeAdded = fileDiffTypeAdded + FileDiffTypeDeleted = fileDiffTypeDeleted + FileDiffTypeModified = fileDiffTypeModified +) + +func SignificantChange(diff string, isSignificant func(*FileDiff, *ContentChange) (bool, string)) (significant bool, msg string, err error) { + return significantChange(diff, func(fileDiff *fileDiff, change *contentChange) (bool, string) { + return isSignificant(fileDiff, change) + }) +} diff --git a/apply.go b/apply.go index 9fc52d8..8f1adcd 100644 --- a/apply.go +++ b/apply.go @@ -34,6 +34,11 @@ func ApplyFile(pristine, patchData []byte) ([]byte, error) { return result.Content, err } +func ApplyFileWithConflicts(pristine, patchData []byte) ([]byte, error) { + result, err := applyFileWithOptions(pristine, patchData, defaultMergeApplyOptions()) + return result.Content, err +} + func applyFileWithOptions(pristine, patchData []byte, options applyOptions) (applyResult, error) { return newPatchApply(options).applyFileWithResult(pristine, patchData) } @@ -72,7 +77,7 @@ func (p *patchApply) applyValidatedPatch(pristine []byte, patch validatedPatch) return result, &applyError{DirectMisses: len(outcome.conflicts)} } -func validateApplyFileDiff(fileDiff fileDiff) error { +func validateApplyFileDiff(fileDiff *fileDiff) error { switch { case fileDiff.IsBinary: return errors.New("binary patches are not supported") @@ -91,7 +96,7 @@ func validateApplyFileDiff(fileDiff fileDiff) error { } } -func fileDiffHasChanges(fileDiff fileDiff) bool { +func fileDiffHasChanges(fileDiff *fileDiff) bool { for _, hunk := range fileDiff.Hunks { for _, change := range hunk.ChangeList { if change.Type != contentChangeTypeNOOP { @@ -116,10 +121,6 @@ func desiredLinesWindow(hunk patchHunk, start, end int) []fileLine { return lines } -func preimageLines(hunk patchHunk) []fileLine { - return preimageLinesWindow(hunk, 0, len(hunk.lines)) -} - func preimageLinesWindow(hunk patchHunk, start, end int) []fileLine { lines := make([]fileLine, 0, len(hunk.lines)) for _, line := range hunk.lines[start:end] { diff --git a/apply_options.go b/apply_options.go index d3adb33..63fee0b 100644 --- a/apply_options.go +++ b/apply_options.go @@ -36,15 +36,21 @@ type applyOptions struct { func defaultApplyOptions() applyOptions { return applyOptions{ - Mode: applyModeMerge, + Mode: applyModeApply, MinContext: math.MaxInt, - ConflictLabels: conflictLabels{ - Current: "Current", - Incoming: "Incoming patch", - }, } } +func defaultMergeApplyOptions() applyOptions { + options := defaultApplyOptions() + options.Mode = applyModeMerge + options.ConflictLabels = conflictLabels{ + Current: "Current", + Incoming: "Incoming patch", + } + return options +} + // patchApply holds apply-time configuration and mirrors Git's stateful apply design. type patchApply struct { options applyOptions @@ -59,9 +65,9 @@ func (o applyOptions) normalize() applyOptions { o.Mode = applyModeApply } if o.Mode == applyModeMerge { - defaults := defaultApplyOptions() + defaults := defaultMergeApplyOptions() if !o.MinContextSet { - o.MinContext = defaults.MinContext + o.MinContext = defaultApplyOptions().MinContext } if o.ConflictLabels.Current == "" { o.ConflictLabels.Current = defaults.ConflictLabels.Current diff --git a/apply_render.go b/apply_render.go index 3f10431..cdd2f39 100644 --- a/apply_render.go +++ b/apply_render.go @@ -58,12 +58,12 @@ func renderRejectContent(header string, conflicts []applyConflict) []byte { buf.WriteString(header) buf.WriteByte('\n') } - for _, conflict := range conflicts { - if conflict.hunk.header != "" { - buf.WriteString(conflict.hunk.header) + for i := range conflicts { + if conflicts[i].hunk.header != "" { + buf.WriteString(conflicts[i].hunk.header) buf.WriteByte('\n') } - for _, line := range conflict.hunk.lines { + for _, line := range conflicts[i].hunk.lines { buf.WriteByte(line.kind) buf.WriteString(line.text) if line.hasNewline { diff --git a/apply_session.go b/apply_session.go index a00c593..8414243 100644 --- a/apply_session.go +++ b/apply_session.go @@ -35,13 +35,13 @@ func (p *patchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er } fileDiff := parsed.FileDiff[0] - if err := validateApplyFileDiff(fileDiff); err != nil { + if err := validateApplyFileDiff(&fileDiff); err != nil { return validatedPatch{}, err } hunks := make([]patchHunk, 0, len(fileDiff.Hunks)) - for _, hunk := range fileDiff.Hunks { - hunks = append(hunks, patchHunkFromHunk(hunk)) + for i := range fileDiff.Hunks { + hunks = append(hunks, patchHunkFromHunk(&fileDiff.Hunks[i])) } hunks, err := normalizePatchHunks(hunks, p.options) if err != nil { @@ -49,7 +49,7 @@ func (p *patchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er } return validatedPatch{ - rejectHead: formatRejectHeader(fileDiff), + rejectHead: formatRejectHeader(&fileDiff), hunks: hunks, }, nil } @@ -183,17 +183,17 @@ func (s *applySession) findPos(hunk patchHunk) (matchedHunk, bool) { matchEnd = false continue } - if leading >= trailing && hunkStart < hunkEnd { - hunkStart++ - preferred-- - if preferred < s.cursor { - preferred = s.cursor - } - leading-- + if leading >= trailing && hunkStart < hunkEnd { + hunkStart++ + preferred-- + if preferred < s.cursor { + preferred = s.cursor } - if trailing > leading && hunkStart < hunkEnd { - hunkEnd-- - trailing-- + leading-- + } + if trailing > leading && hunkStart < hunkEnd { + hunkEnd-- + trailing-- } } @@ -201,9 +201,9 @@ func (s *applySession) findPos(hunk patchHunk) (matchedHunk, bool) { } func (s *applySession) findPosForFragment(preferred int, fragment []fileLine, matchBeginning, matchEnd bool) (int, bool) { - maxStart := s.sourceContentLines() - len(fragment) + maxStart := s.fragmentEndLimit(fragment) - len(fragment) if maxStart < 0 { - maxStart = s.sourceContentLines() + maxStart = s.fragmentEndLimit(fragment) } if matchBeginning { preferred = 0 @@ -252,7 +252,7 @@ func (s *applySession) matchFragmentAt(start int, fragment []fileLine, matchBegi if start+len(fragment) > len(s.sourceLines) { return false } - if matchEnd && start+len(fragment) != s.sourceContentLines() { + if matchEnd && start+len(fragment) != s.fragmentEndLimit(fragment) { return false } if !s.allowOverlap() { @@ -265,7 +265,7 @@ func (s *applySession) matchFragmentAt(start int, fragment []fileLine, matchBegi return matchFragment(s.sourceLines, start, fragment, s.ignoreWhitespace()) } -func patchHunkFromHunk(hunk hunk) patchHunk { +func patchHunkFromHunk(hunk *hunk) patchHunk { lines := make([]patchLine, 0, len(hunk.Lines)) for _, line := range hunk.Lines { lines = append(lines, patchLine{ @@ -287,7 +287,7 @@ func patchHunkFromHunk(hunk hunk) patchHunk { } } -func formatRejectHeader(fileDiff fileDiff) string { +func formatRejectHeader(fileDiff *fileDiff) string { path := firstNonEmpty(fileDiff.ToFile, fileDiff.FromFile) if path == "" { return "" @@ -295,7 +295,7 @@ func formatRejectHeader(fileDiff fileDiff) string { return "diff a/" + path + " b/" + path + "\t(rejected hunks)" } -func formatPatchHunkHeader(hunk hunk) string { +func formatPatchHunkHeader(hunk *hunk) string { oldRange := formatPatchHunkRange(hunk.StartLineNumberOld, hunk.CountOld) newRange := formatPatchHunkRange(hunk.StartLineNumberNew, hunk.CountNew) return fmt.Sprintf("@@ -%s +%s @@", oldRange, newRange) @@ -334,7 +334,14 @@ func (s *applySession) sourceContentLines() int { return len(s.sourceLines) } -func hunkContext(lines []patchLine) (int, int) { +func (s *applySession) fragmentEndLimit(fragment []fileLine) int { + if len(fragment) > 0 && fragment[len(fragment)-1].eofMarker { + return len(s.sourceLines) + } + return s.sourceContentLines() +} + +func hunkContext(lines []patchLine) (leading, trailing int) { firstChange := len(lines) lastChange := -1 for i, line := range lines { @@ -350,14 +357,14 @@ func hunkContext(lines []patchLine) (int, int) { return len(lines), len(lines) } - leading := 0 + leading = 0 for i := 0; i < firstChange; i++ { if lines[i].kind == ' ' { leading++ } } - trailing := 0 + trailing = 0 for i := len(lines) - 1; i > lastChange; i-- { if lines[i].kind == ' ' { trailing++ diff --git a/apply_test.go b/apply_test.go index 9495af5..77f8d64 100644 --- a/apply_test.go +++ b/apply_test.go @@ -57,7 +57,11 @@ func TestApplyFile_TextFixtures(t *testing.T) { t.Parallel() files := loadApplyFixture(t, test.fixture) - applied, err := ApplyFile(files.src, files.patch) + applyFn := ApplyFile + if test.conflict { + applyFn = ApplyFileWithConflicts + } + applied, err := applyFn(files.src, files.patch) if test.wantErr != "" { require.Error(t, err) @@ -65,7 +69,7 @@ func TestApplyFile_TextFixtures(t *testing.T) { if test.conflict { var applyErr *applyError require.ErrorAs(t, err, &applyErr) - assert.True(t, errors.Is(err, ErrPatchConflict)) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) assert.Contains(t, string(applied), defaultIncomingConflictMarker) } @@ -327,7 +331,7 @@ func TestApplyFile_DamagedContextPatchesConflictWithoutFuzz(t *testing.T) { t.Parallel() patch := rewriteFirstHunkHeader(damaged, test.header) - applied, err := ApplyFile(original, patch) + applied, err := ApplyFileWithConflicts(original, patch) require.Error(t, err) require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) @@ -425,9 +429,9 @@ func TestApplyFile_EmptyContextNoTrailingNewlinePatches(t *testing.T) { func TestApplyFileWithOptions_ReducesContextToRelocateHunk(t *testing.T) { t.Parallel() - originalPristine := []byte("package testsdk\n\ntype Status struct{}\n") - patchData := buildPatch(t, "status.go", originalPristine, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) - shiftedPristine := []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n") + patchData := mustReadFile(t, filepath.Join("testdata", "parity", "context-reduced-leading", "patch")) + shiftedPristine := mustReadFile(t, filepath.Join("testdata", "parity", "context-reduced-leading", "src")) + want := mustReadFile(t, filepath.Join("testdata", "parity", "context-reduced-leading", "out")) _, err := applyFileWithOptions(shiftedPristine, patchData, applyOptions{}) require.Error(t, err) @@ -437,7 +441,7 @@ func TestApplyFileWithOptions_ReducesContextToRelocateHunk(t *testing.T) { MinContextSet: true, }) require.NoError(t, err) - assert.Equal(t, []byte("package testsdk\n\n// generated comment moved the hunk down\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n"), applied.Content) + assert.Equal(t, want, applied.Content) } func TestApplyFile_RelocatesToNearestMatchingBlock(t *testing.T) { @@ -473,7 +477,7 @@ func TestApplyFile_MultipleHunksOneConflict(t *testing.T) { current := []byte("line 1\nline 2\nline 3\nline 4\nline 5\nline VI\nline 7\nline 8\n") patch := buildPatchWithContext(t, "multi.txt", original, target, 1) - applied, err := ApplyFile(current, patch) + applied, err := ApplyFileWithConflicts(current, patch) require.Error(t, err) require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), "line two") @@ -482,7 +486,7 @@ func TestApplyFile_MultipleHunksOneConflict(t *testing.T) { assert.Contains(t, string(applied), "line six") } -func TestApplyFile_ReturnsConflictMarkers(t *testing.T) { +func TestApplyFile_DefaultsToDirectApply(t *testing.T) { t.Parallel() base := []byte("package testsdk\n\ntype Status struct{}\n") @@ -492,7 +496,21 @@ func TestApplyFile_ReturnsConflictMarkers(t *testing.T) { applied, err := ApplyFile(current, patchData) require.Error(t, err) require.ErrorIs(t, err, ErrPatchConflict) - assert.True(t, errors.Is(err, ErrPatchConflict)) + assert.Equal(t, current, applied) + assert.NotContains(t, string(applied), defaultCurrentConflictMarker) + assert.NotContains(t, string(applied), defaultIncomingConflictMarker) +} + +func TestApplyFileWithConflicts_ReturnsConflictMarkers(t *testing.T) { + t.Parallel() + + base := []byte("package testsdk\n\ntype Status struct{}\n") + current := []byte("package testsdk\n\ntype Status struct {\n\tValue string\n}\n") + patchData := buildPatch(t, "status.go", base, []byte("package testsdk\n\ntype Status struct{}\n\nfunc (s *Status) String() string {\n\treturn \"custom\"\n}\n")) + + applied, err := ApplyFileWithConflicts(current, patchData) + require.Error(t, err) + require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) assert.Contains(t, string(applied), defaultIncomingConflictMarker) assert.Contains(t, string(applied), "func (s *Status) String() string") @@ -690,7 +708,7 @@ func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) t.Run(test.name, func(t *testing.T) { t.Parallel() - applied, err := ApplyFile(test.current, test.patch) + applied, err := ApplyFileWithConflicts(test.current, test.patch) require.Error(t, err) require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) @@ -739,7 +757,7 @@ func TestApplyFile_RejectsAlreadyAppliedMiddlePatches(t *testing.T) { t.Run(test.name, func(t *testing.T) { t.Parallel() - applied, err := ApplyFile(test.current, test.patch) + applied, err := ApplyFileWithConflicts(test.current, test.patch) require.Error(t, err) require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) @@ -937,7 +955,7 @@ func TestApplyFile_ShrinkFailures(t *testing.T) { t.Parallel() patch := buildPatch(t, "F", test.original, test.target) - applied, err := ApplyFile(test.current, patch) + applied, err := ApplyFileWithConflicts(test.current, patch) require.Error(t, err) require.ErrorIs(t, err, ErrPatchConflict) assert.Contains(t, string(applied), defaultCurrentConflictMarker) diff --git a/model.go b/model.go index c89e4d1..6da10a6 100644 --- a/model.go +++ b/model.go @@ -34,6 +34,24 @@ type hunkLine struct { NewEOF bool `json:"new_eof,omitempty"` } +// hunk is a line that starts with @@. +// Each hunk shows one area where the files differ. +// Unified format hunks look like this: +// @@ from-file-line-numbers to-file-line-numbers @@ +// +// line-from-either-file +// line-from-either-file… +// +// If a hunk contains just one line, only its start line number appears. Otherwise its line numbers look like 'start,count'. An empty hunk is considered to start at the line that follows the hunk. +type hunk struct { + ChangeList changeList `json:"change_list"` + Lines []hunkLine `json:"lines,omitempty"` + StartLineNumberOld int `json:"start_line_number_old"` + CountOld int `json:"count_old"` + StartLineNumberNew int `json:"start_line_number_new"` + CountNew int `json:"count_new"` +} + func (l *hunkLine) markNoNewline() { l.HasNewline = false } @@ -59,24 +77,6 @@ func (h *hunk) markEOFMarkers() { } } -// hunk is a line that starts with @@. -// Each hunk shows one area where the files differ. -// Unified format hunks look like this: -// @@ from-file-line-numbers to-file-line-numbers @@ -// -// line-from-either-file -// line-from-either-file… -// -// If a hunk contains just one line, only its start line number appears. Otherwise its line numbers look like ‘start,count’. An empty hunk is considered to start at the line that follows the hunk. -type hunk struct { - ChangeList changeList `json:"change_list"` - Lines []hunkLine `json:"lines,omitempty"` - StartLineNumberOld int `json:"start_line_number_old"` - CountOld int `json:"count_old"` - StartLineNumberNew int `json:"start_line_number_new"` - CountNew int `json:"count_new"` -} - func (changes *changeList) isSignificant() bool { for _, change := range *changes { if change.Type != contentChangeTypeNOOP { @@ -86,7 +86,7 @@ func (changes *changeList) isSignificant() bool { return false } -func (h hunk) GoString() string { +func (h *hunk) GoString() string { return fmt.Sprintf( "git_diff_parser.Hunk{ChangeList:%#v, StartLineNumberOld:%d, CountOld:%d, StartLineNumberNew:%d, CountNew:%d}", h.ChangeList, @@ -140,15 +140,25 @@ type fileDiff struct { BinaryPatch []binaryPatch `json:"binary_patch"` } -func (fd fileDiff) GoString() string { +func (fd *fileDiff) GoString() string { + var hunksStr string + if fd.Hunks == nil { + hunksStr = "[]git_diff_parser.Hunk(nil)" + } else { + hunks := make([]string, len(fd.Hunks)) + for i := range fd.Hunks { + hunks[i] = fd.Hunks[i].GoString() + } + hunksStr = "[]git_diff_parser.Hunk{" + strings.Join(hunks, ", ") + "}" + } return fmt.Sprintf( - "&git_diff_parser.FileDiff{FromFile:%#v, ToFile:%#v, Type:%#v, IsBinary:%t, NewMode:%#v, Hunks:%#v, BinaryPatch:%#v}", + "&git_diff_parser.FileDiff{FromFile:%#v, ToFile:%#v, Type:%#v, IsBinary:%t, NewMode:%#v, Hunks:%s, BinaryPatch:%#v}", fd.FromFile, fd.ToFile, fd.Type, fd.IsBinary, fd.NewMode, - fd.Hunks, + hunksStr, fd.BinaryPatch, ) } diff --git a/parity_test.go b/parity_test.go index d803731..4cc3e97 100644 --- a/parity_test.go +++ b/parity_test.go @@ -131,12 +131,15 @@ func TestApplyFile_ParityCorpus(t *testing.T) { func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (applyResult, error) { t.Helper() - options := defaultApplyOptions() + options := defaultMergeApplyOptions() options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") options.UnidiffZero = fixtureHasGitArg(tc.fixture, "--unidiff-zero") if rejectMode { - options.Mode = applyModeApply + options = defaultApplyOptions() + options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace + options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") + options.UnidiffZero = fixtureHasGitArg(tc.fixture, "--unidiff-zero") } if minContext, ok := fixtureContextArg(tc.fixture); ok { options.MinContext = minContext diff --git a/parser.go b/parser.go index 2a59351..cca261a 100644 --- a/parser.go +++ b/parser.go @@ -195,7 +195,7 @@ func (p *parser) tryVisitHeader(diff string) bool { } fileHEAD := len(p.diff.FileDiff) - 1 - if len(diff) == 0 && p.mode == modeHeader { + if diff == "" && p.mode == modeHeader { return true } if fileHEAD < 0 { @@ -441,19 +441,19 @@ func parse(diff string) (diff, []error) { // SignificantChange Allows a structured diff to be passed into the `isSignificant` function to determine significance. That function can return a message, which is optionally passed as the final argument // Returns the first significant change found, or false if non found. -func significantChange(diff string, isSignificant func(*fileDiff, *contentChange) (bool, string)) (bool, string, error) { - parsed, err := parse(diff) - if len(err) > 0 { - return true, "", fmt.Errorf("failed to parse diff: %w", err[0]) +func significantChange(diff string, isSignificant func(*fileDiff, *contentChange) (bool, string)) (isSignificantResult bool, resultMsg string, resultErr error) { + parsed, errs := parse(diff) + if len(errs) > 0 { + return true, "", fmt.Errorf("failed to parse diff: %w", errs[0]) } - for _, fileDiff := range parsed.FileDiff { - if significant, msg := isSignificant(&fileDiff, &contentChange{}); significant { + for i := range parsed.FileDiff { + if sig, msg := isSignificant(&parsed.FileDiff[i], &contentChange{}); sig { return true, msg, nil } - for _, hunk := range fileDiff.Hunks { + for _, hunk := range parsed.FileDiff[i].Hunks { for _, change := range hunk.ChangeList { - if significant, msg := isSignificant(&fileDiff, &change); significant { + if sig, msg := isSignificant(&parsed.FileDiff[i], &change); sig { return true, msg, nil } } diff --git a/parser_test.go b/parser_test.go index 142c41e..96126e7 100644 --- a/parser_test.go +++ b/parser_test.go @@ -24,19 +24,19 @@ func TestParse(t *testing.T) { want bool } significantDiffs, err := testdata.ReadDir("testdata/significant") - assert.NoError(t, err) + require.NoError(t, err) insignificantDiffs, err := testdata.ReadDir("testdata/insignificant") - assert.NoError(t, err) + require.NoError(t, err) tests := []SignificanceTest{} for _, testFile := range significantDiffs { if !strings.HasSuffix(testFile.Name(), "diff") { continue } content, err := testdata.ReadFile("testdata/significant/" + testFile.Name()) - assert.NoError(t, err) + require.NoError(t, err) tests = append(tests, SignificanceTest{ name: testFile.Name(), - relativePath: filepath.Join("testdata/significant", testFile.Name()), + relativePath: filepath.Join("testdata", "significant", testFile.Name()), input: string(content), want: true, }) @@ -46,10 +46,10 @@ func TestParse(t *testing.T) { continue } content, err := testdata.ReadFile("testdata/insignificant/" + testFile.Name()) - assert.NoError(t, err) + require.NoError(t, err) tests = append(tests, SignificanceTest{ name: testFile.Name(), - relativePath: filepath.Join("testdata/insignificant", testFile.Name()), + relativePath: filepath.Join("testdata", "insignificant", testFile.Name()), input: string(content), want: false, }) @@ -148,7 +148,7 @@ new mode 100755 assert.False(t, hunk.Lines[2].NewEOF) } -func MatchMessageSnapshot(t *testing.T, snapshotName string, content string) { +func MatchMessageSnapshot(t *testing.T, snapshotName, content string) { t.Helper() _, filename, _, ok := runtime.Caller(0) require.True(t, ok) @@ -157,7 +157,7 @@ func MatchMessageSnapshot(t *testing.T, snapshotName string, content string) { if _, err := os.Stat(snapshotFile); err != nil { f, err := os.OpenFile(snapshotFile, os.O_APPEND|os.O_CREATE|os.O_RDWR, os.ModePerm) require.NoError(t, err) - defer f.Close() + defer func() { _ = f.Close() }() _, err = f.WriteString(content) require.NoError(t, err) return diff --git a/patchset.go b/patchset.go index d826f2c..55a587d 100644 --- a/patchset.go +++ b/patchset.go @@ -117,8 +117,8 @@ func parsePatchset(patchData []byte) (patchset, []error) { func (p patchset) apply(tree map[string][]byte) (map[string][]byte, error) { out := cloneTree(tree) - for _, file := range p.Files { - if err := applyPatchsetFile(out, file); err != nil { + for i := range p.Files { + if err := applyPatchsetFile(out, &p.Files[i]); err != nil { return nil, err } } diff --git a/patchset_apply.go b/patchset_apply.go index b613fc7..e4767b6 100644 --- a/patchset_apply.go +++ b/patchset_apply.go @@ -4,7 +4,7 @@ import "fmt" const patchsetOperationModify patchsetOperation = "modify" -func applyPatchsetFile(tree map[string][]byte, file patchsetFile) error { +func applyPatchsetFile(tree map[string][]byte, file *patchsetFile) error { if file.Diff.IsBinary { return &unsupportedPatchError{ Operation: patchsetOperationBinary, @@ -12,7 +12,7 @@ func applyPatchsetFile(tree map[string][]byte, file patchsetFile) error { } } - op, sourcePath, targetPath, err := determinePatchsetOperation(tree, file.Diff) + op, sourcePath, targetPath, err := determinePatchsetOperation(tree, &file.Diff) if err != nil { return err } @@ -87,8 +87,8 @@ func applyPatchsetFile(tree map[string][]byte, file patchsetFile) error { } } -func determinePatchsetOperation(tree map[string][]byte, fileDiff fileDiff) (patchsetOperation, string, string, error) { - sourcePath, targetPath := patchsetPaths(fileDiff) +func determinePatchsetOperation(tree map[string][]byte, fileDiff *fileDiff) (op patchsetOperation, sourcePath, targetPath string, err error) { + sourcePath, targetPath = patchsetPaths(fileDiff) switch { case fileDiff.RenameFrom != "" || fileDiff.RenameTo != "": @@ -114,24 +114,24 @@ func determinePatchsetOperation(tree map[string][]byte, fileDiff fileDiff) (patc return patchsetOperationModify, sourcePath, targetPath, nil } -func patchsetPaths(fileDiff fileDiff) (string, string) { - sourcePath := firstNonEmpty(fileDiff.RenameFrom, fileDiff.CopyFrom, fileDiff.FromFile, fileDiff.ToFile) - targetPath := firstNonEmpty(fileDiff.RenameTo, fileDiff.CopyTo, fileDiff.ToFile, fileDiff.FromFile) +func patchsetPaths(fileDiff *fileDiff) (sourcePath, targetPath string) { + sourcePath = firstNonEmpty(fileDiff.RenameFrom, fileDiff.CopyFrom, fileDiff.FromFile, fileDiff.ToFile) + targetPath = firstNonEmpty(fileDiff.RenameTo, fileDiff.CopyTo, fileDiff.ToFile, fileDiff.FromFile) return sourcePath, targetPath } -func applyPatchsetContent(pristine []byte, file patchsetFile) ([]byte, error) { +func applyPatchsetContent(pristine []byte, file *patchsetFile) ([]byte, error) { if len(file.Diff.Hunks) == 0 { return append([]byte(nil), pristine...), nil } hunks := make([]patchHunk, 0, len(file.Diff.Hunks)) - for _, hunk := range file.Diff.Hunks { - hunks = append(hunks, patchHunkFromHunk(hunk)) + for i := range file.Diff.Hunks { + hunks = append(hunks, patchHunkFromHunk(&file.Diff.Hunks[i])) } result, err := newPatchApply(applyOptions{Mode: applyModeApply}).applyValidatedPatch(pristine, validatedPatch{ - rejectHead: formatRejectHeader(file.Diff), + rejectHead: formatRejectHeader(&file.Diff), hunks: hunks, }) if err != nil { From ad641ea4e3201e9895d9db345636034a6da97142 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 1 Apr 2026 12:52:18 +0100 Subject: [PATCH 18/20] chore: clean up --- ref/git-apply.adoc | 299 --- ref/git-apply.c | 5232 -------------------------------------------- ref/git-apply.h | 190 -- 3 files changed, 5721 deletions(-) delete mode 100644 ref/git-apply.adoc delete mode 100644 ref/git-apply.c delete mode 100644 ref/git-apply.h diff --git a/ref/git-apply.adoc b/ref/git-apply.adoc deleted file mode 100644 index 6c71ee6..0000000 --- a/ref/git-apply.adoc +++ /dev/null @@ -1,299 +0,0 @@ -git-apply(1) -============ - -NAME ----- -git-apply - Apply a patch to files and/or to the index - - -SYNOPSIS --------- -[verse] -'git apply' [--stat] [--numstat] [--summary] [--check] - [--index | --intent-to-add] [--3way] [--ours | --theirs | --union] - [--apply] [--no-add] [--build-fake-ancestor=] [-R | --reverse] - [--allow-binary-replacement | --binary] [--reject] [-z] - [-p] [-C] [--inaccurate-eof] [--recount] [--cached] - [--ignore-space-change | --ignore-whitespace] - [--whitespace=(nowarn|warn|fix|error|error-all)] - [--exclude=] [--include=] [--directory=] - [--verbose | --quiet] [--unsafe-paths] [--allow-empty] [...] - -DESCRIPTION ------------ -Reads the supplied diff output (i.e. "a patch") and applies it to files. -When running from a subdirectory in a repository, patched paths -outside the directory are ignored. -With the `--index` option, the patch is also applied to the index, and -with the `--cached` option, the patch is only applied to the index. -Without these options, the command applies the patch only to files, -and does not require them to be in a Git repository. - -This command applies the patch but does not create a commit. Use -linkgit:git-am[1] to create commits from patches generated by -linkgit:git-format-patch[1] and/or received by email. - -OPTIONS -------- -...:: - The files to read the patch from. '-' can be used to read - from the standard input. - ---stat:: - Instead of applying the patch, output diffstat for the - input. Turns off "apply". - ---numstat:: - Similar to `--stat`, but shows the number of added and - deleted lines in decimal notation and the pathname without - abbreviation, to make it more machine friendly. For - binary files, outputs two `-` instead of saying - `0 0`. Turns off "apply". - ---summary:: - Instead of applying the patch, output a condensed - summary of information obtained from git diff extended - headers, such as creations, renames, and mode changes. - Turns off "apply". - ---check:: - Instead of applying the patch, see if the patch is - applicable to the current working tree and/or the index - file and detects errors. Turns off "apply". - ---index:: - Apply the patch to both the index and the working tree (or - merely check that it would apply cleanly to both if `--check` is - in effect). Note that `--index` expects index entries and - working tree copies for relevant paths to be identical (their - contents and metadata such as file mode must match), and will - raise an error if they are not, even if the patch would apply - cleanly to both the index and the working tree in isolation. - ---cached:: - Apply the patch to just the index, without touching the working - tree. If `--check` is in effect, merely check that it would - apply cleanly to the index entry. - --N:: ---intent-to-add:: - When applying the patch only to the working tree, mark new - files to be added to the index later (see `--intent-to-add` - option in linkgit:git-add[1]). This option is ignored if - `--index` or `--cached` are used, and has no effect outside a Git - repository. Note that `--index` could be implied by other options - such as `--3way`. - --3:: ---3way:: - Attempt 3-way merge if the patch records the identity of blobs it is supposed - to apply to and we have those blobs available locally, possibly leaving the - conflict markers in the files in the working tree for the user to - resolve. This option implies the `--index` option unless the - `--cached` option is used, and is incompatible with the `--reject` option. - When used with the `--cached` option, any conflicts are left at higher stages - in the cache. - ---ours:: ---theirs:: ---union:: - Instead of leaving conflicts in the file, resolve conflicts favouring - our (or their or both) side of the lines. Requires --3way. - ---build-fake-ancestor=:: - Newer 'git diff' output has embedded 'index information' - for each blob to help identify the original version that - the patch applies to. When this flag is given, and if - the original versions of the blobs are available locally, - builds a temporary index containing those blobs. -+ -When a pure mode change is encountered (which has no index information), -the information is read from the current index instead. - --R:: ---reverse:: - Apply the patch in reverse. - ---reject:: - For atomicity, 'git apply' by default fails the whole patch and - does not touch the working tree when some of the hunks - do not apply. This option makes it apply - the parts of the patch that are applicable, and leave the - rejected hunks in corresponding *.rej files. - --z:: - When `--numstat` has been given, do not munge pathnames, - but use a NUL-terminated machine-readable format. -+ -Without this option, pathnames with "unusual" characters are quoted as -explained for the configuration variable `core.quotePath` (see -linkgit:git-config[1]). - --p:: - Remove leading path components (separated by slashes) from - traditional diff paths. E.g., with `-p2`, a patch against - `a/dir/file` will be applied directly to `file`. The default is - 1. - --C:: - Ensure at least lines of surrounding context match before - and after each change. When fewer lines of surrounding - context exist they all must match. By default no context is - ever ignored. - ---unidiff-zero:: - By default, 'git apply' expects that the patch being - applied is a unified diff with at least one line of context. - This provides good safety measures, but breaks down when - applying a diff generated with `--unified=0`. To bypass these - checks use `--unidiff-zero`. -+ -Note, for the reasons stated above, the usage of context-free patches is -discouraged. - ---apply:: - If you use any of the options marked "Turns off - 'apply'" above, 'git apply' reads and outputs the - requested information without actually applying the - patch. Give this flag after those flags to also apply - the patch. - ---no-add:: - When applying a patch, ignore additions made by the - patch. This can be used to extract the common part between - two files by first running 'diff' on them and applying - the result with this option, which would apply the - deletion part but not the addition part. - ---allow-binary-replacement:: ---binary:: - Historically we did not allow binary patch application - without an explicit permission from the user, and this - flag was the way to do so. Currently, we always allow binary - patch application, so this is a no-op. - ---exclude=:: - Don't apply changes to files matching the given path pattern. This can - be useful when importing patchsets, where you want to exclude certain - files or directories. - ---include=:: - Apply changes to files matching the given path pattern. This can - be useful when importing patchsets, where you want to include certain - files or directories. -+ -When `--exclude` and `--include` patterns are used, they are examined in the -order they appear on the command line, and the first match determines if a -patch to each path is used. A patch to a path that does not match any -include/exclude pattern is used by default if there is no include pattern -on the command line, and ignored if there is any include pattern. - ---ignore-space-change:: ---ignore-whitespace:: - When applying a patch, ignore changes in whitespace in context - lines if necessary. - Context lines will preserve their whitespace, and they will not - undergo whitespace fixing regardless of the value of the - `--whitespace` option. New lines will still be fixed, though. - ---whitespace=:: - When applying a patch, detect a new or modified line that has - whitespace errors. What are considered whitespace errors is - controlled by `core.whitespace` configuration. By default, - trailing whitespaces (including lines that solely consist of - whitespaces) and a space character that is immediately followed - by a tab character inside the initial indent of the line are - considered whitespace errors. -+ -By default, the command outputs warning messages but applies the patch. -When `git-apply` is used for statistics and not applying a -patch, it defaults to `nowarn`. -+ -You can use different `` values to control this -behavior: -+ -* `nowarn` turns off the trailing whitespace warning. -* `warn` outputs warnings for a few such errors, but applies the - patch as-is (default). -* `fix` outputs warnings for a few such errors, and applies the - patch after fixing them (`strip` is a synonym -- the tool - used to consider only trailing whitespace characters as errors, and the - fix involved 'stripping' them, but modern Gits do more). -* `error` outputs warnings for a few such errors, and refuses - to apply the patch. -* `error-all` is similar to `error` but shows all errors. - ---inaccurate-eof:: - Under certain circumstances, some versions of 'diff' do not correctly - detect a missing new-line at the end of the file. As a result, patches - created by such 'diff' programs do not record incomplete lines - correctly. This option adds support for applying such patches by - working around this bug. - --v:: ---verbose:: - Report progress to stderr. By default, only a message about the - current patch being applied will be printed. This option will cause - additional information to be reported. - --q:: ---quiet:: - Suppress stderr output. Messages about patch status and progress - will not be printed. - ---recount:: - Do not trust the line counts in the hunk headers, but infer them - by inspecting the patch (e.g. after editing the patch without - adjusting the hunk headers appropriately). - ---directory=:: - Prepend to all filenames. If a "-p" argument was also passed, - it is applied before prepending the new root. -+ -For example, a patch that talks about updating `a/git-gui.sh` to `b/git-gui.sh` -can be applied to the file in the working tree `modules/git-gui/git-gui.sh` by -running `git apply --directory=modules/git-gui`. - ---unsafe-paths:: - By default, a patch that affects outside the working area - (either a Git controlled working tree, or the current working - directory when "git apply" is used as a replacement of GNU - patch) is rejected as a mistake (or a mischief). -+ -When `git apply` is used as a "better GNU patch", the user can pass -the `--unsafe-paths` option to override this safety check. This option -has no effect when `--index` or `--cached` is in use. - ---allow-empty:: - Don't return an error for patches containing no diff. This includes - empty patches and patches with commit text only. - -CONFIGURATION -------------- - -include::includes/cmd-config-section-all.adoc[] - -include::config/apply.adoc[] - -SUBMODULES ----------- -If the patch contains any changes to submodules then 'git apply' -treats these changes as follows. - -If `--index` is specified (explicitly or implicitly), then the submodule -commits must match the index exactly for the patch to apply. If any -of the submodules are checked-out, then these check-outs are completely -ignored, i.e., they are not required to be up to date or clean and they -are not updated. - -If `--index` is not specified, then the submodule commits in the patch -are ignored and only the absence or presence of the corresponding -subdirectory is checked and (if possible) updated. - -SEE ALSO --------- -linkgit:git-am[1]. - -GIT ---- -Part of the linkgit:git[1] suite diff --git a/ref/git-apply.c b/ref/git-apply.c deleted file mode 100644 index b6dd106..0000000 --- a/ref/git-apply.c +++ /dev/null @@ -1,5232 +0,0 @@ -/* - * apply.c - * - * Copyright (C) Linus Torvalds, 2005 - * - * This applies patches on top of some (arbitrary) version of the SCM. - * - */ - -#define USE_THE_REPOSITORY_VARIABLE -#define DISABLE_SIGN_COMPARE_WARNINGS - -#include "git-compat-util.h" -#include "abspath.h" -#include "base85.h" -#include "config.h" -#include "odb.h" -#include "delta.h" -#include "diff.h" -#include "dir.h" -#include "environment.h" -#include "gettext.h" -#include "hex.h" -#include "xdiff-interface.h" -#include "merge-ll.h" -#include "lockfile.h" -#include "name-hash.h" -#include "object-name.h" -#include "object-file.h" -#include "parse-options.h" -#include "path.h" -#include "quote.h" -#include "read-cache.h" -#include "repository.h" -#include "rerere.h" -#include "apply.h" -#include "entry.h" -#include "setup.h" -#include "symlinks.h" -#include "wildmatch.h" -#include "ws.h" - -struct gitdiff_data { - struct strbuf *root; - int linenr; - int p_value; -}; - -static void git_apply_config(void) -{ - repo_config_get_string(the_repository, "apply.whitespace", &apply_default_whitespace); - repo_config_get_string(the_repository, "apply.ignorewhitespace", &apply_default_ignorewhitespace); - repo_config(the_repository, git_xmerge_config, NULL); -} - -static int parse_whitespace_option(struct apply_state *state, const char *option) -{ - if (!option) { - state->ws_error_action = warn_on_ws_error; - return 0; - } - if (!strcmp(option, "warn")) { - state->ws_error_action = warn_on_ws_error; - return 0; - } - if (!strcmp(option, "nowarn")) { - state->ws_error_action = nowarn_ws_error; - return 0; - } - if (!strcmp(option, "error")) { - state->ws_error_action = die_on_ws_error; - return 0; - } - if (!strcmp(option, "error-all")) { - state->ws_error_action = die_on_ws_error; - state->squelch_whitespace_errors = 0; - return 0; - } - if (!strcmp(option, "strip") || !strcmp(option, "fix")) { - state->ws_error_action = correct_ws_error; - return 0; - } - /* - * Please update $__git_whitespacelist in git-completion.bash, - * Documentation/git-apply.adoc, and Documentation/git-am.adoc - * when you add new options. - */ - return error(_("unrecognized whitespace option '%s'"), option); -} - -static int parse_ignorewhitespace_option(struct apply_state *state, - const char *option) -{ - if (!option || !strcmp(option, "no") || - !strcmp(option, "false") || !strcmp(option, "never") || - !strcmp(option, "none")) { - state->ws_ignore_action = ignore_ws_none; - return 0; - } - if (!strcmp(option, "change")) { - state->ws_ignore_action = ignore_ws_change; - return 0; - } - return error(_("unrecognized whitespace ignore option '%s'"), option); -} - -int init_apply_state(struct apply_state *state, - struct repository *repo, - const char *prefix) -{ - memset(state, 0, sizeof(*state)); - state->prefix = prefix; - state->repo = repo; - state->apply = 1; - state->line_termination = '\n'; - state->p_value = 1; - state->p_context = UINT_MAX; - state->squelch_whitespace_errors = 5; - state->ws_error_action = warn_on_ws_error; - state->ws_ignore_action = ignore_ws_none; - state->linenr = 1; - string_list_init_nodup(&state->fn_table); - string_list_init_nodup(&state->limit_by_name); - strset_init(&state->removed_symlinks); - strset_init(&state->kept_symlinks); - strbuf_init(&state->root, 0); - - git_apply_config(); - if (apply_default_whitespace && parse_whitespace_option(state, apply_default_whitespace)) - return -1; - if (apply_default_ignorewhitespace && parse_ignorewhitespace_option(state, apply_default_ignorewhitespace)) - return -1; - return 0; -} - -void clear_apply_state(struct apply_state *state) -{ - string_list_clear(&state->limit_by_name, 0); - strset_clear(&state->removed_symlinks); - strset_clear(&state->kept_symlinks); - strbuf_release(&state->root); - FREE_AND_NULL(state->fake_ancestor); - - /* &state->fn_table is cleared at the end of apply_patch() */ -} - -static void mute_routine(const char *msg UNUSED, va_list params UNUSED) -{ - /* do nothing */ -} - -int check_apply_state(struct apply_state *state, int force_apply) -{ - int is_not_gitdir = !startup_info->have_repository; - - if (state->apply_with_reject && state->threeway) - return error(_("options '%s' and '%s' cannot be used together"), "--reject", "--3way"); - if (state->threeway) { - if (is_not_gitdir) - return error(_("'%s' outside a repository"), "--3way"); - state->check_index = 1; - } - if (state->apply_with_reject) { - state->apply = 1; - if (state->apply_verbosity == verbosity_normal) - state->apply_verbosity = verbosity_verbose; - } - if (!force_apply && (state->diffstat || state->numstat || state->summary || state->check || state->fake_ancestor)) - state->apply = 0; - if (state->check_index && is_not_gitdir) - return error(_("'%s' outside a repository"), "--index"); - if (state->cached) { - if (is_not_gitdir) - return error(_("'%s' outside a repository"), "--cached"); - state->check_index = 1; - } - if (state->ita_only && (state->check_index || is_not_gitdir)) - state->ita_only = 0; - if (state->check_index) - state->unsafe_paths = 0; - - if (state->apply_verbosity <= verbosity_silent) { - state->saved_error_routine = get_error_routine(); - state->saved_warn_routine = get_warn_routine(); - set_error_routine(mute_routine); - set_warn_routine(mute_routine); - } - - return 0; -} - -static void set_default_whitespace_mode(struct apply_state *state) -{ - if (!state->whitespace_option && !apply_default_whitespace) - state->ws_error_action = (state->apply ? warn_on_ws_error : nowarn_ws_error); -} - -/* - * This represents one "hunk" from a patch, starting with - * "@@ -oldpos,oldlines +newpos,newlines @@" marker. The - * patch text is pointed at by patch, and its byte length - * is stored in size. leading and trailing are the number - * of context lines. - */ -struct fragment { - unsigned long leading, trailing; - unsigned long oldpos, oldlines; - unsigned long newpos, newlines; - /* - * 'patch' is usually borrowed from buf in apply_patch(), - * but some codepaths store an allocated buffer. - */ - const char *patch; - unsigned free_patch:1, - rejected:1; - int size; - int linenr; - struct fragment *next; -}; - -/* - * When dealing with a binary patch, we reuse "leading" field - * to store the type of the binary hunk, either deflated "delta" - * or deflated "literal". - */ -#define binary_patch_method leading -#define BINARY_DELTA_DEFLATED 1 -#define BINARY_LITERAL_DEFLATED 2 - -static void free_fragment_list(struct fragment *list) -{ - while (list) { - struct fragment *next = list->next; - if (list->free_patch) - free((char *)list->patch); - free(list); - list = next; - } -} - -void release_patch(struct patch *patch) -{ - free_fragment_list(patch->fragments); - free(patch->def_name); - free(patch->old_name); - free(patch->new_name); - free(patch->result); -} - -static void free_patch(struct patch *patch) -{ - release_patch(patch); - free(patch); -} - -static void free_patch_list(struct patch *list) -{ - while (list) { - struct patch *next = list->next; - free_patch(list); - list = next; - } -} - -/* - * A line in a file, len-bytes long (includes the terminating LF, - * except for an incomplete line at the end if the file ends with - * one), and its contents hashes to 'hash'. - */ -struct line { - size_t len; - unsigned hash : 24; - unsigned flag : 8; -#define LINE_COMMON 1 -#define LINE_PATCHED 2 -}; - -/* - * This represents a "file", which is an array of "lines". - */ -struct image { - struct strbuf buf; - struct line *line; - size_t line_nr, line_alloc; -}; -#define IMAGE_INIT { \ - .buf = STRBUF_INIT, \ -} - -static void image_init(struct image *image) -{ - struct image empty = IMAGE_INIT; - memcpy(image, &empty, sizeof(*image)); -} - -static void image_clear(struct image *image) -{ - strbuf_release(&image->buf); - free(image->line); - image_init(image); -} - -static uint32_t hash_line(const char *cp, size_t len) -{ - size_t i; - uint32_t h; - for (i = 0, h = 0; i < len; i++) { - if (!isspace(cp[i])) { - h = h * 3 + (cp[i] & 0xff); - } - } - return h; -} - -static void image_add_line(struct image *img, const char *bol, size_t len, unsigned flag) -{ - ALLOC_GROW(img->line, img->line_nr + 1, img->line_alloc); - img->line[img->line_nr].len = len; - img->line[img->line_nr].hash = hash_line(bol, len); - img->line[img->line_nr].flag = flag; - img->line_nr++; -} - -/* - * "buf" has the file contents to be patched (read from various sources). - * attach it to "image" and add line-based index to it. - * "image" now owns the "buf". - */ -static void image_prepare(struct image *image, char *buf, size_t len, - int prepare_linetable) -{ - const char *cp, *ep; - - image_clear(image); - strbuf_attach(&image->buf, buf, len, len + 1); - - if (!prepare_linetable) - return; - - ep = image->buf.buf + image->buf.len; - cp = image->buf.buf; - while (cp < ep) { - const char *next; - for (next = cp; next < ep && *next != '\n'; next++) - ; - if (next < ep) - next++; - image_add_line(image, cp, next - cp, 0); - cp = next; - } -} - -static void image_remove_first_line(struct image *img) -{ - strbuf_remove(&img->buf, 0, img->line[0].len); - img->line_nr--; - if (img->line_nr) - MOVE_ARRAY(img->line, img->line + 1, img->line_nr); -} - -static void image_remove_last_line(struct image *img) -{ - size_t last_line_len = img->line[img->line_nr - 1].len; - strbuf_setlen(&img->buf, img->buf.len - last_line_len); - img->line_nr--; -} - -/* fmt must contain _one_ %s and no other substitution */ -static void say_patch_name(FILE *output, const char *fmt, struct patch *patch) -{ - struct strbuf sb = STRBUF_INIT; - - if (patch->old_name && patch->new_name && - strcmp(patch->old_name, patch->new_name)) { - quote_c_style(patch->old_name, &sb, NULL, 0); - strbuf_addstr(&sb, " => "); - quote_c_style(patch->new_name, &sb, NULL, 0); - } else { - const char *n = patch->new_name; - if (!n) - n = patch->old_name; - quote_c_style(n, &sb, NULL, 0); - } - fprintf(output, fmt, sb.buf); - fputc('\n', output); - strbuf_release(&sb); -} - -#define SLOP (16) - -/* - * apply.c isn't equipped to handle arbitrarily large patches, because - * it intermingles `unsigned long` with `int` for the type used to store - * buffer lengths. - * - * Only process patches that are just shy of 1 GiB large in order to - * avoid any truncation or overflow issues. - */ -#define MAX_APPLY_SIZE (1024UL * 1024 * 1023) - -static int read_patch_file(struct strbuf *sb, int fd) -{ - if (strbuf_read(sb, fd, 0) < 0) - return error_errno(_("failed to read patch")); - else if (sb->len >= MAX_APPLY_SIZE) - return error(_("patch too large")); - /* - * Make sure that we have some slop in the buffer - * so that we can do speculative "memcmp" etc, and - * see to it that it is NUL-filled. - */ - strbuf_grow(sb, SLOP); - memset(sb->buf + sb->len, 0, SLOP); - return 0; -} - -static unsigned long linelen(const char *buffer, unsigned long size) -{ - unsigned long len = 0; - while (size--) { - len++; - if (*buffer++ == '\n') - break; - } - return len; -} - -static int is_dev_null(const char *str) -{ - return skip_prefix(str, "/dev/null", &str) && isspace(*str); -} - -#define TERM_SPACE 1 -#define TERM_TAB 2 - -static int name_terminate(int c, int terminate) -{ - if (c == ' ' && !(terminate & TERM_SPACE)) - return 0; - if (c == '\t' && !(terminate & TERM_TAB)) - return 0; - - return 1; -} - -/* remove double slashes to make --index work with such filenames */ -static char *squash_slash(char *name) -{ - int i = 0, j = 0; - - if (!name) - return NULL; - - while (name[i]) { - if ((name[j++] = name[i++]) == '/') - while (name[i] == '/') - i++; - } - name[j] = '\0'; - return name; -} - -static char *find_name_gnu(struct strbuf *root, - const char *line, - int p_value) -{ - struct strbuf name = STRBUF_INIT; - char *cp; - - /* - * Proposed "new-style" GNU patch/diff format; see - * https://lore.kernel.org/git/7vll0wvb2a.fsf@assigned-by-dhcp.cox.net/ - */ - if (unquote_c_style(&name, line, NULL)) { - strbuf_release(&name); - return NULL; - } - - for (cp = name.buf; p_value; p_value--) { - cp = strchr(cp, '/'); - if (!cp) { - strbuf_release(&name); - return NULL; - } - cp++; - } - - strbuf_remove(&name, 0, cp - name.buf); - if (root->len) - strbuf_insert(&name, 0, root->buf, root->len); - return squash_slash(strbuf_detach(&name, NULL)); -} - -static size_t sane_tz_len(const char *line, size_t len) -{ - const char *tz, *p; - - if (len < strlen(" +0500") || line[len-strlen(" +0500")] != ' ') - return 0; - tz = line + len - strlen(" +0500"); - - if (tz[1] != '+' && tz[1] != '-') - return 0; - - for (p = tz + 2; p != line + len; p++) - if (!isdigit(*p)) - return 0; - - return line + len - tz; -} - -static size_t tz_with_colon_len(const char *line, size_t len) -{ - const char *tz, *p; - - if (len < strlen(" +08:00") || line[len - strlen(":00")] != ':') - return 0; - tz = line + len - strlen(" +08:00"); - - if (tz[0] != ' ' || (tz[1] != '+' && tz[1] != '-')) - return 0; - p = tz + 2; - if (!isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || - !isdigit(*p++) || !isdigit(*p++)) - return 0; - - return line + len - tz; -} - -static size_t date_len(const char *line, size_t len) -{ - const char *date, *p; - - if (len < strlen("72-02-05") || line[len-strlen("-05")] != '-') - return 0; - p = date = line + len - strlen("72-02-05"); - - if (!isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || - !isdigit(*p++) || !isdigit(*p++) || *p++ != '-' || - !isdigit(*p++) || !isdigit(*p++)) /* Not a date. */ - return 0; - - if (date - line >= strlen("19") && - isdigit(date[-1]) && isdigit(date[-2])) /* 4-digit year */ - date -= strlen("19"); - - return line + len - date; -} - -static size_t short_time_len(const char *line, size_t len) -{ - const char *time, *p; - - if (len < strlen(" 07:01:32") || line[len-strlen(":32")] != ':') - return 0; - p = time = line + len - strlen(" 07:01:32"); - - /* Permit 1-digit hours? */ - if (*p++ != ' ' || - !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || - !isdigit(*p++) || !isdigit(*p++) || *p++ != ':' || - !isdigit(*p++) || !isdigit(*p++)) /* Not a time. */ - return 0; - - return line + len - time; -} - -static size_t fractional_time_len(const char *line, size_t len) -{ - const char *p; - size_t n; - - /* Expected format: 19:41:17.620000023 */ - if (!len || !isdigit(line[len - 1])) - return 0; - p = line + len - 1; - - /* Fractional seconds. */ - while (p > line && isdigit(*p)) - p--; - if (*p != '.') - return 0; - - /* Hours, minutes, and whole seconds. */ - n = short_time_len(line, p - line); - if (!n) - return 0; - - return line + len - p + n; -} - -static size_t trailing_spaces_len(const char *line, size_t len) -{ - const char *p; - - /* Expected format: ' ' x (1 or more) */ - if (!len || line[len - 1] != ' ') - return 0; - - p = line + len; - while (p != line) { - p--; - if (*p != ' ') - return line + len - (p + 1); - } - - /* All spaces! */ - return len; -} - -static size_t diff_timestamp_len(const char *line, size_t len) -{ - const char *end = line + len; - size_t n; - - /* - * Posix: 2010-07-05 19:41:17 - * GNU: 2010-07-05 19:41:17.620000023 -0500 - */ - - if (!isdigit(end[-1])) - return 0; - - n = sane_tz_len(line, end - line); - if (!n) - n = tz_with_colon_len(line, end - line); - end -= n; - - n = short_time_len(line, end - line); - if (!n) - n = fractional_time_len(line, end - line); - end -= n; - - n = date_len(line, end - line); - if (!n) /* No date. Too bad. */ - return 0; - end -= n; - - if (end == line) /* No space before date. */ - return 0; - if (end[-1] == '\t') { /* Success! */ - end--; - return line + len - end; - } - if (end[-1] != ' ') /* No space before date. */ - return 0; - - /* Whitespace damage. */ - end -= trailing_spaces_len(line, end - line); - return line + len - end; -} - -static char *find_name_common(struct strbuf *root, - const char *line, - const char *def, - int p_value, - const char *end, - int terminate) -{ - int len; - const char *start = NULL; - - if (p_value == 0) - start = line; - while (line != end) { - char c = *line; - - if (!end && isspace(c)) { - if (c == '\n') - break; - if (name_terminate(c, terminate)) - break; - } - line++; - if (c == '/' && !--p_value) - start = line; - } - if (!start) - return squash_slash(xstrdup_or_null(def)); - len = line - start; - if (!len) - return squash_slash(xstrdup_or_null(def)); - - /* - * Generally we prefer the shorter name, especially - * if the other one is just a variation of that with - * something else tacked on to the end (ie "file.orig" - * or "file~"). - */ - if (def) { - int deflen = strlen(def); - if (deflen < len && !strncmp(start, def, deflen)) - return squash_slash(xstrdup(def)); - } - - if (root->len) { - char *ret = xstrfmt("%s%.*s", root->buf, len, start); - return squash_slash(ret); - } - - return squash_slash(xmemdupz(start, len)); -} - -static char *find_name(struct strbuf *root, - const char *line, - char *def, - int p_value, - int terminate) -{ - if (*line == '"') { - char *name = find_name_gnu(root, line, p_value); - if (name) - return name; - } - - return find_name_common(root, line, def, p_value, NULL, terminate); -} - -static char *find_name_traditional(struct strbuf *root, - const char *line, - char *def, - int p_value) -{ - size_t len; - size_t date_len; - - if (*line == '"') { - char *name = find_name_gnu(root, line, p_value); - if (name) - return name; - } - - len = strchrnul(line, '\n') - line; - date_len = diff_timestamp_len(line, len); - if (!date_len) - return find_name_common(root, line, def, p_value, NULL, TERM_TAB); - len -= date_len; - - return find_name_common(root, line, def, p_value, line + len, 0); -} - -/* - * Given the string after "--- " or "+++ ", guess the appropriate - * p_value for the given patch. - */ -static int guess_p_value(struct apply_state *state, const char *nameline) -{ - char *name, *cp; - int val = -1; - - if (is_dev_null(nameline)) - return -1; - name = find_name_traditional(&state->root, nameline, NULL, 0); - if (!name) - return -1; - cp = strchr(name, '/'); - if (!cp) - val = 0; - else if (state->prefix) { - /* - * Does it begin with "a/$our-prefix" and such? Then this is - * very likely to apply to our directory. - */ - if (starts_with(name, state->prefix)) - val = count_slashes(state->prefix); - else { - cp++; - if (starts_with(cp, state->prefix)) - val = count_slashes(state->prefix) + 1; - } - } - free(name); - return val; -} - -/* - * Does the ---/+++ line have the POSIX timestamp after the last HT? - * GNU diff puts epoch there to signal a creation/deletion event. Is - * this such a timestamp? - */ -static int has_epoch_timestamp(const char *nameline) -{ - /* - * We are only interested in epoch timestamp; any non-zero - * fraction cannot be one, hence "(\.0+)?" in the regexp below. - * For the same reason, the date must be either 1969-12-31 or - * 1970-01-01, and the seconds part must be "00". - */ - const char stamp_regexp[] = - "^[0-2][0-9]:([0-5][0-9]):00(\\.0+)?" - " " - "([-+][0-2][0-9]:?[0-5][0-9])\n"; - const char *timestamp = NULL, *cp, *colon; - static regex_t *stamp; - regmatch_t m[10]; - int zoneoffset, epoch_hour, hour, minute; - int status; - - for (cp = nameline; *cp != '\n'; cp++) { - if (*cp == '\t') - timestamp = cp + 1; - } - if (!timestamp) - return 0; - - /* - * YYYY-MM-DD hh:mm:ss must be from either 1969-12-31 - * (west of GMT) or 1970-01-01 (east of GMT) - */ - if (skip_prefix(timestamp, "1969-12-31 ", ×tamp)) - epoch_hour = 24; - else if (skip_prefix(timestamp, "1970-01-01 ", ×tamp)) - epoch_hour = 0; - else - return 0; - - if (!stamp) { - stamp = xmalloc(sizeof(*stamp)); - if (regcomp(stamp, stamp_regexp, REG_EXTENDED)) { - warning(_("Cannot prepare timestamp regexp %s"), - stamp_regexp); - return 0; - } - } - - status = regexec(stamp, timestamp, ARRAY_SIZE(m), m, 0); - if (status) { - if (status != REG_NOMATCH) - warning(_("regexec returned %d for input: %s"), - status, timestamp); - return 0; - } - - hour = strtol(timestamp, NULL, 10); - minute = strtol(timestamp + m[1].rm_so, NULL, 10); - - zoneoffset = strtol(timestamp + m[3].rm_so + 1, (char **) &colon, 10); - if (*colon == ':') - zoneoffset = zoneoffset * 60 + strtol(colon + 1, NULL, 10); - else - zoneoffset = (zoneoffset / 100) * 60 + (zoneoffset % 100); - if (timestamp[m[3].rm_so] == '-') - zoneoffset = -zoneoffset; - - return hour * 60 + minute - zoneoffset == epoch_hour * 60; -} - -/* - * Get the name etc info from the ---/+++ lines of a traditional patch header - * - * FIXME! The end-of-filename heuristics are kind of screwy. For existing - * files, we can happily check the index for a match, but for creating a - * new file we should try to match whatever "patch" does. I have no idea. - */ -static int parse_traditional_patch(struct apply_state *state, - const char *first, - const char *second, - struct patch *patch) -{ - char *name; - - first += 4; /* skip "--- " */ - second += 4; /* skip "+++ " */ - if (!state->p_value_known) { - int p, q; - p = guess_p_value(state, first); - q = guess_p_value(state, second); - if (p < 0) p = q; - if (0 <= p && p == q) { - state->p_value = p; - state->p_value_known = 1; - } - } - if (is_dev_null(first)) { - patch->is_new = 1; - patch->is_delete = 0; - name = find_name_traditional(&state->root, second, NULL, state->p_value); - patch->new_name = name; - } else if (is_dev_null(second)) { - patch->is_new = 0; - patch->is_delete = 1; - name = find_name_traditional(&state->root, first, NULL, state->p_value); - patch->old_name = name; - } else { - char *first_name; - first_name = find_name_traditional(&state->root, first, NULL, state->p_value); - name = find_name_traditional(&state->root, second, first_name, state->p_value); - free(first_name); - if (has_epoch_timestamp(first)) { - patch->is_new = 1; - patch->is_delete = 0; - patch->new_name = name; - } else if (has_epoch_timestamp(second)) { - patch->is_new = 0; - patch->is_delete = 1; - patch->old_name = name; - } else { - patch->old_name = name; - patch->new_name = xstrdup_or_null(name); - } - } - if (!name) - return error(_("unable to find filename in patch at line %d"), state->linenr); - - return 0; -} - -static int gitdiff_hdrend(struct gitdiff_data *state UNUSED, - const char *line UNUSED, - struct patch *patch UNUSED) -{ - return 1; -} - -/* - * We're anal about diff header consistency, to make - * sure that we don't end up having strange ambiguous - * patches floating around. - * - * As a result, gitdiff_{old|new}name() will check - * their names against any previous information, just - * to make sure.. - */ -#define DIFF_OLD_NAME 0 -#define DIFF_NEW_NAME 1 - -static int gitdiff_verify_name(struct gitdiff_data *state, - const char *line, - int isnull, - char **name, - int side) -{ - if (!*name && !isnull) { - *name = find_name(state->root, line, NULL, state->p_value, TERM_TAB); - return 0; - } - - if (*name) { - char *another; - if (isnull) - return error(_("git apply: bad git-diff - expected /dev/null, got %s on line %d"), - *name, state->linenr); - another = find_name(state->root, line, NULL, state->p_value, TERM_TAB); - if (!another || strcmp(another, *name)) { - free(another); - return error((side == DIFF_NEW_NAME) ? - _("git apply: bad git-diff - inconsistent new filename on line %d") : - _("git apply: bad git-diff - inconsistent old filename on line %d"), state->linenr); - } - free(another); - } else { - if (!is_dev_null(line)) - return error(_("git apply: bad git-diff - expected /dev/null on line %d"), state->linenr); - } - - return 0; -} - -static int gitdiff_oldname(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - return gitdiff_verify_name(state, line, - patch->is_new, &patch->old_name, - DIFF_OLD_NAME); -} - -static int gitdiff_newname(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - return gitdiff_verify_name(state, line, - patch->is_delete, &patch->new_name, - DIFF_NEW_NAME); -} - -static int parse_mode_line(const char *line, int linenr, unsigned int *mode) -{ - char *end; - *mode = strtoul(line, &end, 8); - if (end == line || !isspace(*end)) - return error(_("invalid mode on line %d: %s"), linenr, line); - *mode = canon_mode(*mode); - return 0; -} - -static int gitdiff_oldmode(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - return parse_mode_line(line, state->linenr, &patch->old_mode); -} - -static int gitdiff_newmode(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - return parse_mode_line(line, state->linenr, &patch->new_mode); -} - -static int gitdiff_delete(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - patch->is_delete = 1; - free(patch->old_name); - patch->old_name = xstrdup_or_null(patch->def_name); - return gitdiff_oldmode(state, line, patch); -} - -static int gitdiff_newfile(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - patch->is_new = 1; - free(patch->new_name); - patch->new_name = xstrdup_or_null(patch->def_name); - return gitdiff_newmode(state, line, patch); -} - -static int gitdiff_copysrc(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - patch->is_copy = 1; - free(patch->old_name); - patch->old_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); - return 0; -} - -static int gitdiff_copydst(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - patch->is_copy = 1; - free(patch->new_name); - patch->new_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); - return 0; -} - -static int gitdiff_renamesrc(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - patch->is_rename = 1; - free(patch->old_name); - patch->old_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); - return 0; -} - -static int gitdiff_renamedst(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - patch->is_rename = 1; - free(patch->new_name); - patch->new_name = find_name(state->root, line, NULL, state->p_value ? state->p_value - 1 : 0, 0); - return 0; -} - -static int gitdiff_similarity(struct gitdiff_data *state UNUSED, - const char *line, - struct patch *patch) -{ - unsigned long val = strtoul(line, NULL, 10); - if (val <= 100) - patch->score = val; - return 0; -} - -static int gitdiff_dissimilarity(struct gitdiff_data *state UNUSED, - const char *line, - struct patch *patch) -{ - unsigned long val = strtoul(line, NULL, 10); - if (val <= 100) - patch->score = val; - return 0; -} - -static int gitdiff_index(struct gitdiff_data *state, - const char *line, - struct patch *patch) -{ - /* - * index line is N hexadecimal, "..", N hexadecimal, - * and optional space with octal mode. - */ - const char *ptr, *eol; - int len; - const unsigned hexsz = the_hash_algo->hexsz; - - ptr = strchr(line, '.'); - if (!ptr || ptr[1] != '.' || hexsz < ptr - line) - return 0; - len = ptr - line; - memcpy(patch->old_oid_prefix, line, len); - patch->old_oid_prefix[len] = 0; - - line = ptr + 2; - ptr = strchr(line, ' '); - eol = strchrnul(line, '\n'); - - if (!ptr || eol < ptr) - ptr = eol; - len = ptr - line; - - if (hexsz < len) - return 0; - memcpy(patch->new_oid_prefix, line, len); - patch->new_oid_prefix[len] = 0; - if (*ptr == ' ') - return gitdiff_oldmode(state, ptr + 1, patch); - return 0; -} - -/* - * This is normal for a diff that doesn't change anything: we'll fall through - * into the next diff. Tell the parser to break out. - */ -static int gitdiff_unrecognized(struct gitdiff_data *state UNUSED, - const char *line UNUSED, - struct patch *patch UNUSED) -{ - return 1; -} - -/* - * Skip p_value leading components from "line"; as we do not accept - * absolute paths, return NULL in that case. - */ -static const char *skip_tree_prefix(int p_value, - const char *line, - int llen) -{ - int nslash; - int i; - - if (!p_value) - return (llen && line[0] == '/') ? NULL : line; - - nslash = p_value; - for (i = 0; i < llen; i++) { - int ch = line[i]; - if (ch == '/' && --nslash <= 0) - return (i == 0) ? NULL : &line[i + 1]; - } - return NULL; -} - -/* - * This is to extract the same name that appears on "diff --git" - * line. We do not find and return anything if it is a rename - * patch, and it is OK because we will find the name elsewhere. - * We need to reliably find name only when it is mode-change only, - * creation or deletion of an empty file. In any of these cases, - * both sides are the same name under a/ and b/ respectively. - */ -static char *git_header_name(int p_value, - const char *line, - int llen) -{ - const char *name; - const char *second = NULL; - size_t len, line_len; - - line += strlen("diff --git "); - llen -= strlen("diff --git "); - - if (*line == '"') { - const char *cp; - struct strbuf first = STRBUF_INIT; - struct strbuf sp = STRBUF_INIT; - - if (unquote_c_style(&first, line, &second)) - goto free_and_fail1; - - /* strip the a/b prefix including trailing slash */ - cp = skip_tree_prefix(p_value, first.buf, first.len); - if (!cp) - goto free_and_fail1; - strbuf_remove(&first, 0, cp - first.buf); - - /* - * second points at one past closing dq of name. - * find the second name. - */ - while ((second < line + llen) && isspace(*second)) - second++; - - if (line + llen <= second) - goto free_and_fail1; - if (*second == '"') { - if (unquote_c_style(&sp, second, NULL)) - goto free_and_fail1; - cp = skip_tree_prefix(p_value, sp.buf, sp.len); - if (!cp) - goto free_and_fail1; - /* They must match, otherwise ignore */ - if (strcmp(cp, first.buf)) - goto free_and_fail1; - strbuf_release(&sp); - return strbuf_detach(&first, NULL); - } - - /* unquoted second */ - cp = skip_tree_prefix(p_value, second, line + llen - second); - if (!cp) - goto free_and_fail1; - if (line + llen - cp != first.len || - memcmp(first.buf, cp, first.len)) - goto free_and_fail1; - return strbuf_detach(&first, NULL); - - free_and_fail1: - strbuf_release(&first); - strbuf_release(&sp); - return NULL; - } - - /* unquoted first name */ - name = skip_tree_prefix(p_value, line, llen); - if (!name) - return NULL; - - /* - * since the first name is unquoted, a dq if exists must be - * the beginning of the second name. - */ - for (second = name; second < line + llen; second++) { - if (*second == '"') { - struct strbuf sp = STRBUF_INIT; - const char *np; - - if (unquote_c_style(&sp, second, NULL)) - goto free_and_fail2; - - np = skip_tree_prefix(p_value, sp.buf, sp.len); - if (!np) - goto free_and_fail2; - - len = sp.buf + sp.len - np; - if (len < second - name && - !strncmp(np, name, len) && - isspace(name[len])) { - /* Good */ - strbuf_remove(&sp, 0, np - sp.buf); - return strbuf_detach(&sp, NULL); - } - - free_and_fail2: - strbuf_release(&sp); - return NULL; - } - } - - /* - * Accept a name only if it shows up twice, exactly the same - * form. - */ - second = strchr(name, '\n'); - if (!second) - return NULL; - line_len = second - name; - for (len = 0 ; ; len++) { - switch (name[len]) { - default: - continue; - case '\n': - return NULL; - case '\t': case ' ': - /* - * Is this the separator between the preimage - * and the postimage pathname? Again, we are - * only interested in the case where there is - * no rename, as this is only to set def_name - * and a rename patch has the names elsewhere - * in an unambiguous form. - */ - if (!name[len + 1]) - return NULL; /* no postimage name */ - second = skip_tree_prefix(p_value, name + len + 1, - line_len - (len + 1)); - /* - * If we are at the SP at the end of a directory, - * skip_tree_prefix() may return NULL as that makes - * it appears as if we have an absolute path. - * Keep going to find another SP. - */ - if (!second) - continue; - - /* - * Does len bytes starting at "name" and "second" - * (that are separated by one HT or SP we just - * found) exactly match? - */ - if (second[len] == '\n' && !strncmp(name, second, len)) - return xmemdupz(name, len); - } - } -} - -static int check_header_line(int linenr, struct patch *patch) -{ - int extensions = (patch->is_delete == 1) + (patch->is_new == 1) + - (patch->is_rename == 1) + (patch->is_copy == 1); - if (extensions > 1) - return error(_("inconsistent header lines %d and %d"), - patch->extension_linenr, linenr); - if (extensions && !patch->extension_linenr) - patch->extension_linenr = linenr; - return 0; -} - -int parse_git_diff_header(struct strbuf *root, - int *linenr, - int p_value, - const char *line, - int len, - unsigned int size, - struct patch *patch) -{ - unsigned long offset; - struct gitdiff_data parse_hdr_state; - - /* A git diff has explicit new/delete information, so we don't guess */ - patch->is_new = 0; - patch->is_delete = 0; - - /* - * Some things may not have the old name in the - * rest of the headers anywhere (pure mode changes, - * or removing or adding empty files), so we get - * the default name from the header. - */ - patch->def_name = git_header_name(p_value, line, len); - if (patch->def_name && root->len) { - char *s = xstrfmt("%s%s", root->buf, patch->def_name); - free(patch->def_name); - patch->def_name = s; - } - - line += len; - size -= len; - (*linenr)++; - parse_hdr_state.root = root; - parse_hdr_state.linenr = *linenr; - parse_hdr_state.p_value = p_value; - - for (offset = len ; size > 0 ; offset += len, size -= len, line += len, (*linenr)++) { - static const struct opentry { - const char *str; - int (*fn)(struct gitdiff_data *, const char *, struct patch *); - } optable[] = { - { "@@ -", gitdiff_hdrend }, - { "--- ", gitdiff_oldname }, - { "+++ ", gitdiff_newname }, - { "old mode ", gitdiff_oldmode }, - { "new mode ", gitdiff_newmode }, - { "deleted file mode ", gitdiff_delete }, - { "new file mode ", gitdiff_newfile }, - { "copy from ", gitdiff_copysrc }, - { "copy to ", gitdiff_copydst }, - { "rename old ", gitdiff_renamesrc }, - { "rename new ", gitdiff_renamedst }, - { "rename from ", gitdiff_renamesrc }, - { "rename to ", gitdiff_renamedst }, - { "similarity index ", gitdiff_similarity }, - { "dissimilarity index ", gitdiff_dissimilarity }, - { "index ", gitdiff_index }, - { "", gitdiff_unrecognized }, - }; - int i; - - len = linelen(line, size); - if (!len || line[len-1] != '\n') - break; - for (i = 0; i < ARRAY_SIZE(optable); i++) { - const struct opentry *p = optable + i; - int oplen = strlen(p->str); - int res; - if (len < oplen || memcmp(p->str, line, oplen)) - continue; - res = p->fn(&parse_hdr_state, line + oplen, patch); - if (res < 0) - return -1; - if (check_header_line(*linenr, patch)) - return -1; - if (res > 0) - goto done; - break; - } - } - -done: - if (!patch->old_name && !patch->new_name) { - if (!patch->def_name) { - error(Q_("git diff header lacks filename information when removing " - "%d leading pathname component (line %d)", - "git diff header lacks filename information when removing " - "%d leading pathname components (line %d)", - parse_hdr_state.p_value), - parse_hdr_state.p_value, *linenr); - return -128; - } - patch->old_name = xstrdup(patch->def_name); - patch->new_name = xstrdup(patch->def_name); - } - if ((!patch->new_name && !patch->is_delete) || - (!patch->old_name && !patch->is_new)) { - error(_("git diff header lacks filename information " - "(line %d)"), *linenr); - return -128; - } - patch->is_toplevel_relative = 1; - return offset; -} - -static int parse_num(const char *line, unsigned long *p) -{ - char *ptr; - - if (!isdigit(*line)) - return 0; - errno = 0; - *p = strtoul(line, &ptr, 10); - if (errno) - return 0; - return ptr - line; -} - -static int parse_range(const char *line, int len, int offset, const char *expect, - unsigned long *p1, unsigned long *p2) -{ - int digits, ex; - - if (offset < 0 || offset >= len) - return -1; - line += offset; - len -= offset; - - digits = parse_num(line, p1); - if (!digits) - return -1; - - offset += digits; - line += digits; - len -= digits; - - *p2 = 1; - if (*line == ',') { - digits = parse_num(line+1, p2); - if (!digits) - return -1; - - offset += digits+1; - line += digits+1; - len -= digits+1; - } - - ex = strlen(expect); - if (ex > len) - return -1; - if (memcmp(line, expect, ex)) - return -1; - - return offset + ex; -} - -static void recount_diff(const char *line, int size, struct fragment *fragment) -{ - int oldlines = 0, newlines = 0, ret = 0; - - if (size < 1) { - warning("recount: ignore empty hunk"); - return; - } - - for (;;) { - int len = linelen(line, size); - size -= len; - line += len; - - if (size < 1) - break; - - switch (*line) { - case ' ': case '\n': - newlines++; - /* fall through */ - case '-': - oldlines++; - continue; - case '+': - newlines++; - continue; - case '\\': - continue; - case '@': - ret = size < 3 || !starts_with(line, "@@ "); - break; - case 'd': - ret = size < 5 || !starts_with(line, "diff "); - break; - default: - ret = -1; - break; - } - if (ret) { - warning(_("recount: unexpected line: %.*s"), - (int)linelen(line, size), line); - return; - } - break; - } - fragment->oldlines = oldlines; - fragment->newlines = newlines; -} - -/* - * Parse a unified diff fragment header of the - * form "@@ -a,b +c,d @@" - */ -static int parse_fragment_header(const char *line, int len, struct fragment *fragment) -{ - int offset; - - if (!len || line[len-1] != '\n') - return -1; - - /* Figure out the number of lines in a fragment */ - offset = parse_range(line, len, 4, " +", &fragment->oldpos, &fragment->oldlines); - offset = parse_range(line, len, offset, " @@", &fragment->newpos, &fragment->newlines); - - return offset; -} - -/* - * Find file diff header - * - * Returns: - * -1 if no header was found - * -128 in case of error - * the size of the header in bytes (called "offset") otherwise - */ -static int find_header(struct apply_state *state, - const char *line, - unsigned long size, - int *hdrsize, - struct patch *patch) -{ - unsigned long offset, len; - - patch->is_toplevel_relative = 0; - patch->is_rename = patch->is_copy = 0; - patch->is_new = patch->is_delete = -1; - patch->old_mode = patch->new_mode = 0; - patch->old_name = patch->new_name = NULL; - for (offset = 0; size > 0; offset += len, size -= len, line += len, state->linenr++) { - unsigned long nextlen; - - len = linelen(line, size); - if (!len) - break; - - /* Testing this early allows us to take a few shortcuts.. */ - if (len < 6) - continue; - - /* - * Make sure we don't find any unconnected patch fragments. - * That's a sign that we didn't find a header, and that a - * patch has become corrupted/broken up. - */ - if (!memcmp("@@ -", line, 4)) { - struct fragment dummy; - if (parse_fragment_header(line, len, &dummy) < 0) - continue; - error(_("patch fragment without header at line %d: %.*s"), - state->linenr, (int)len-1, line); - return -128; - } - - if (size < len + 6) - break; - - /* - * Git patch? It might not have a real patch, just a rename - * or mode change, so we handle that specially - */ - if (!memcmp("diff --git ", line, 11)) { - int git_hdr_len = parse_git_diff_header(&state->root, &state->linenr, - state->p_value, line, len, - size, patch); - if (git_hdr_len < 0) - return -128; - if (git_hdr_len <= len) - continue; - *hdrsize = git_hdr_len; - return offset; - } - - /* --- followed by +++ ? */ - if (memcmp("--- ", line, 4) || memcmp("+++ ", line + len, 4)) - continue; - - /* - * We only accept unified patches, so we want it to - * at least have "@@ -a,b +c,d @@\n", which is 14 chars - * minimum ("@@ -0,0 +1 @@\n" is the shortest). - */ - nextlen = linelen(line + len, size - len); - if (size < nextlen + 14 || memcmp("@@ -", line + len + nextlen, 4)) - continue; - - /* Ok, we'll consider it a patch */ - if (parse_traditional_patch(state, line, line+len, patch)) - return -128; - *hdrsize = len + nextlen; - state->linenr += 2; - return offset; - } - return -1; -} - -static void record_ws_error(struct apply_state *state, - unsigned result, - const char *line, - int len, - int linenr) -{ - char *err; - - if (!result) - return; - - state->whitespace_error++; - if (state->squelch_whitespace_errors && - state->squelch_whitespace_errors < state->whitespace_error) - return; - - /* - * line[len] for an incomplete line points at the "\n" at the end - * of patch input line, so "%.*s" would drop the last letter on line; - * compensate for it. - */ - if (result & WS_INCOMPLETE_LINE) - len++; - - err = whitespace_error_string(result); - if (state->apply_verbosity > verbosity_silent) - fprintf(stderr, "%s:%d: %s.\n%.*s\n", - state->patch_input_file, linenr, err, len, line); - free(err); -} - -static void check_whitespace(struct apply_state *state, - const char *line, - int len, - unsigned ws_rule) -{ - unsigned result = ws_check(line + 1, len - 1, ws_rule); - - record_ws_error(state, result, line + 1, len - 2, state->linenr); -} - -/* - * Check if the patch has context lines with CRLF or - * the patch wants to remove lines with CRLF. - */ -static void check_old_for_crlf(struct patch *patch, const char *line, int len) -{ - if (len >= 2 && line[len-1] == '\n' && line[len-2] == '\r') { - patch->ws_rule |= WS_CR_AT_EOL; - patch->crlf_in_old = 1; - } -} - - -/* - * Just saw a single line in a fragment. If it is a part of this hunk - * that is a context " ", an added "+", or a removed "-" line, it may - * be followed by "\\ No newline..." to signal that the last "\n" on - * this line needs to be dropped. Depending on locale settings when - * the patch was produced we don't know what this line would exactly - * say. The only thing we do know is that it begins with "\ ". - * Checking for 12 is just for sanity check; "\ No newline..." would - * be at least that long in any l10n. - * - * Return 0 if the line we saw is not followed by "\ No newline...", - * or length of that line. The caller will use it to skip over the - * "\ No newline..." line. - */ -static int adjust_incomplete(const char *line, int len, - unsigned long size) -{ - int nextlen; - - if (*line != '\n' && *line != ' ' && *line != '+' && *line != '-') - return 0; - if (size - len < 12 || memcmp(line + len, "\\ ", 2)) - return 0; - nextlen = linelen(line + len, size - len); - if (nextlen < 12) - return 0; - return nextlen; -} - -/* - * Parse a unified diff. Note that this really needs to parse each - * fragment separately, since the only way to know the difference - * between a "---" that is part of a patch, and a "---" that starts - * the next patch is to look at the line counts.. - */ -static int parse_fragment(struct apply_state *state, - const char *line, - unsigned long size, - struct patch *patch, - struct fragment *fragment) -{ - int added, deleted; - int len = linelen(line, size), offset; - int skip_len = 0; - unsigned long oldlines, newlines; - unsigned long leading, trailing; - - /* do not complain a symbolic link being an incomplete line */ - if (patch->ws_rule & WS_INCOMPLETE_LINE) { - /* - * We want to figure out if the postimage is a - * symbolic link when applying the patch normally, or - * if the preimage is a symbolic link when applying - * the patch in reverse. A normal patch only has - * old_mode without new_mode. If it changes the - * filemode, new_mode has value, which is different - * from old_mode. - */ - unsigned mode = (state->apply_in_reverse - ? patch->old_mode - : patch->new_mode - ? patch->new_mode - : patch->old_mode); - if (mode && S_ISLNK(mode)) - patch->ws_rule &= ~WS_INCOMPLETE_LINE; - } - - offset = parse_fragment_header(line, len, fragment); - if (offset < 0) - return -1; - if (offset > 0 && patch->recount) - recount_diff(line + offset, size - offset, fragment); - oldlines = fragment->oldlines; - newlines = fragment->newlines; - leading = 0; - trailing = 0; - - /* Parse the thing.. */ - line += len; - size -= len; - state->linenr++; - added = deleted = 0; - for (offset = len; - 0 < size; - offset += len, size -= len, line += len, state->linenr++) { - if (!oldlines && !newlines) - break; - len = linelen(line, size); - if (!len || line[len-1] != '\n') - return -1; - - /* - * For an incomplete line, skip_len counts the bytes - * on "\\ No newline..." marker line that comes next - * to the current line. - * - * Reduce "len" to drop the newline at the end of - * line[], but add one to "skip_len", which will be - * added back to "len" for the next iteration, to - * compensate. - */ - skip_len = adjust_incomplete(line, len, size); - if (skip_len) { - len--; - skip_len++; - } - switch (*line) { - default: - return -1; - case '\n': /* newer GNU diff, an empty context line */ - case ' ': - oldlines--; - newlines--; - if (!deleted && !added) - leading++; - trailing++; - check_old_for_crlf(patch, line, len); - if (!state->apply_in_reverse && - state->ws_error_action == correct_ws_error) - check_whitespace(state, line, len, patch->ws_rule); - break; - case '-': - if (!state->apply_in_reverse) - check_old_for_crlf(patch, line, len); - if (state->apply_in_reverse && - state->ws_error_action != nowarn_ws_error) - check_whitespace(state, line, len, patch->ws_rule); - deleted++; - oldlines--; - trailing = 0; - break; - case '+': - if (state->apply_in_reverse) - check_old_for_crlf(patch, line, len); - if (!state->apply_in_reverse && - state->ws_error_action != nowarn_ws_error) - check_whitespace(state, line, len, patch->ws_rule); - added++; - newlines--; - trailing = 0; - break; - } - - /* eat the "\\ No newline..." as well, if exists */ - if (skip_len) { - len += skip_len; - state->linenr++; - } - } - if (oldlines || newlines) - return -1; - if (!patch->recount && !deleted && !added) - return -1; - - fragment->leading = leading; - fragment->trailing = trailing; - - patch->lines_added += added; - patch->lines_deleted += deleted; - - if (0 < patch->is_new && oldlines) - return error(_("new file depends on old contents")); - if (0 < patch->is_delete && newlines) - return error(_("deleted file still has contents")); - return offset; -} - -/* - * We have seen "diff --git a/... b/..." header (or a traditional patch - * header). Read hunks that belong to this patch into fragments and hang - * them to the given patch structure. - * - * The (fragment->patch, fragment->size) pair points into the memory given - * by the caller, not a copy, when we return. - * - * Returns: - * -1 in case of error, - * the number of bytes in the patch otherwise. - */ -static int parse_single_patch(struct apply_state *state, - const char *line, - unsigned long size, - struct patch *patch) -{ - unsigned long offset = 0; - unsigned long oldlines = 0, newlines = 0, context = 0; - struct fragment **fragp = &patch->fragments; - - while (size > 4 && !memcmp(line, "@@ -", 4)) { - struct fragment *fragment; - int len; - - CALLOC_ARRAY(fragment, 1); - fragment->linenr = state->linenr; - len = parse_fragment(state, line, size, patch, fragment); - if (len <= 0) { - free(fragment); - return error(_("corrupt patch at line %d"), state->linenr); - } - fragment->patch = line; - fragment->size = len; - oldlines += fragment->oldlines; - newlines += fragment->newlines; - context += fragment->leading + fragment->trailing; - - *fragp = fragment; - fragp = &fragment->next; - - offset += len; - line += len; - size -= len; - } - - /* - * If something was removed (i.e. we have old-lines) it cannot - * be creation, and if something was added it cannot be - * deletion. However, the reverse is not true; --unified=0 - * patches that only add are not necessarily creation even - * though they do not have any old lines, and ones that only - * delete are not necessarily deletion. - * - * Unfortunately, a real creation/deletion patch do _not_ have - * any context line by definition, so we cannot safely tell it - * apart with --unified=0 insanity. At least if the patch has - * more than one hunk it is not creation or deletion. - */ - if (patch->is_new < 0 && - (oldlines || (patch->fragments && patch->fragments->next))) - patch->is_new = 0; - if (patch->is_delete < 0 && - (newlines || (patch->fragments && patch->fragments->next))) - patch->is_delete = 0; - - if (0 < patch->is_new && oldlines) - return error(_("new file %s depends on old contents"), patch->new_name); - if (0 < patch->is_delete && newlines) - return error(_("deleted file %s still has contents"), patch->old_name); - if (!patch->is_delete && !newlines && context && state->apply_verbosity > verbosity_silent) - fprintf_ln(stderr, - _("** warning: " - "file %s becomes empty but is not deleted"), - patch->new_name); - - return offset; -} - -static inline int metadata_changes(struct patch *patch) -{ - return patch->is_rename > 0 || - patch->is_copy > 0 || - patch->is_new > 0 || - patch->is_delete || - (patch->old_mode && patch->new_mode && - patch->old_mode != patch->new_mode); -} - -static char *inflate_it(const void *data, unsigned long size, - unsigned long inflated_size) -{ - git_zstream stream; - void *out; - int st; - - memset(&stream, 0, sizeof(stream)); - - stream.next_in = (unsigned char *)data; - stream.avail_in = size; - stream.next_out = out = xmalloc(inflated_size); - stream.avail_out = inflated_size; - git_inflate_init(&stream); - st = git_inflate(&stream, Z_FINISH); - git_inflate_end(&stream); - if ((st != Z_STREAM_END) || stream.total_out != inflated_size) { - free(out); - return NULL; - } - return out; -} - -/* - * Read a binary hunk and return a new fragment; fragment->patch - * points at an allocated memory that the caller must free, so - * it is marked as "->free_patch = 1". - */ -static struct fragment *parse_binary_hunk(struct apply_state *state, - char **buf_p, - unsigned long *sz_p, - int *status_p, - int *used_p) -{ - /* - * Expect a line that begins with binary patch method ("literal" - * or "delta"), followed by the length of data before deflating. - * a sequence of 'length-byte' followed by base-85 encoded data - * should follow, terminated by a newline. - * - * Each 5-byte sequence of base-85 encodes up to 4 bytes, - * and we would limit the patch line to 66 characters, - * so one line can fit up to 13 groups that would decode - * to 52 bytes max. The length byte 'A'-'Z' corresponds - * to 1-26 bytes, and 'a'-'z' corresponds to 27-52 bytes. - */ - int llen, used; - unsigned long size = *sz_p; - char *buffer = *buf_p; - int patch_method; - unsigned long origlen; - char *data = NULL; - int hunk_size = 0; - struct fragment *frag; - - llen = linelen(buffer, size); - used = llen; - - *status_p = 0; - - if (starts_with(buffer, "delta ")) { - patch_method = BINARY_DELTA_DEFLATED; - origlen = strtoul(buffer + 6, NULL, 10); - } - else if (starts_with(buffer, "literal ")) { - patch_method = BINARY_LITERAL_DEFLATED; - origlen = strtoul(buffer + 8, NULL, 10); - } - else - return NULL; - - state->linenr++; - buffer += llen; - size -= llen; - while (1) { - int byte_length, max_byte_length, newsize; - llen = linelen(buffer, size); - used += llen; - state->linenr++; - if (llen == 1) { - /* consume the blank line */ - buffer++; - size--; - break; - } - /* - * Minimum line is "A00000\n" which is 7-byte long, - * and the line length must be multiple of 5 plus 2. - */ - if ((llen < 7) || (llen-2) % 5) - goto corrupt; - max_byte_length = (llen - 2) / 5 * 4; - byte_length = *buffer; - if ('A' <= byte_length && byte_length <= 'Z') - byte_length = byte_length - 'A' + 1; - else if ('a' <= byte_length && byte_length <= 'z') - byte_length = byte_length - 'a' + 27; - else - goto corrupt; - /* if the input length was not multiple of 4, we would - * have filler at the end but the filler should never - * exceed 3 bytes - */ - if (max_byte_length < byte_length || - byte_length <= max_byte_length - 4) - goto corrupt; - newsize = hunk_size + byte_length; - data = xrealloc(data, newsize); - if (decode_85(data + hunk_size, buffer + 1, byte_length)) - goto corrupt; - hunk_size = newsize; - buffer += llen; - size -= llen; - } - - CALLOC_ARRAY(frag, 1); - frag->patch = inflate_it(data, hunk_size, origlen); - frag->free_patch = 1; - if (!frag->patch) - goto corrupt; - free(data); - frag->size = origlen; - *buf_p = buffer; - *sz_p = size; - *used_p = used; - frag->binary_patch_method = patch_method; - return frag; - - corrupt: - free(data); - *status_p = -1; - error(_("corrupt binary patch at line %d: %.*s"), - state->linenr-1, llen-1, buffer); - return NULL; -} - -/* - * Returns: - * -1 in case of error, - * the length of the parsed binary patch otherwise - */ -static int parse_binary(struct apply_state *state, - char *buffer, - unsigned long size, - struct patch *patch) -{ - /* - * We have read "GIT binary patch\n"; what follows is a line - * that says the patch method (currently, either "literal" or - * "delta") and the length of data before deflating; a - * sequence of 'length-byte' followed by base-85 encoded data - * follows. - * - * When a binary patch is reversible, there is another binary - * hunk in the same format, starting with patch method (either - * "literal" or "delta") with the length of data, and a sequence - * of length-byte + base-85 encoded data, terminated with another - * empty line. This data, when applied to the postimage, produces - * the preimage. - */ - struct fragment *forward; - struct fragment *reverse; - int status; - int used, used_1; - - forward = parse_binary_hunk(state, &buffer, &size, &status, &used); - if (!forward && !status) - /* there has to be one hunk (forward hunk) */ - return error(_("unrecognized binary patch at line %d"), state->linenr-1); - if (status) - /* otherwise we already gave an error message */ - return status; - - reverse = parse_binary_hunk(state, &buffer, &size, &status, &used_1); - if (reverse) - used += used_1; - else if (status) { - /* - * Not having reverse hunk is not an error, but having - * a corrupt reverse hunk is. - */ - free((void*) forward->patch); - free(forward); - return status; - } - forward->next = reverse; - patch->fragments = forward; - patch->is_binary = 1; - return used; -} - -static void prefix_one(struct apply_state *state, char **name) -{ - char *old_name = *name; - if (!old_name) - return; - *name = prefix_filename(state->prefix, *name); - free(old_name); -} - -static void prefix_patch(struct apply_state *state, struct patch *p) -{ - if (!state->prefix || p->is_toplevel_relative) - return; - prefix_one(state, &p->new_name); - prefix_one(state, &p->old_name); -} - -/* - * include/exclude - */ - -static void add_name_limit(struct apply_state *state, - const char *name, - int exclude) -{ - struct string_list_item *it; - - it = string_list_append(&state->limit_by_name, name); - it->util = exclude ? NULL : (void *) 1; -} - -static int use_patch(struct apply_state *state, struct patch *p) -{ - const char *pathname = p->new_name ? p->new_name : p->old_name; - int i; - - /* Paths outside are not touched regardless of "--include" */ - if (state->prefix && *state->prefix) { - const char *rest; - if (!skip_prefix(pathname, state->prefix, &rest) || !*rest) - return 0; - } - - /* See if it matches any of exclude/include rule */ - for (i = 0; i < state->limit_by_name.nr; i++) { - struct string_list_item *it = &state->limit_by_name.items[i]; - if (!wildmatch(it->string, pathname, 0)) - return (it->util != NULL); - } - - /* - * If we had any include, a path that does not match any rule is - * not used. Otherwise, we saw bunch of exclude rules (or none) - * and such a path is used. - */ - return !state->has_include; -} - -/* - * Read the patch text in "buffer" that extends for "size" bytes; stop - * reading after seeing a single patch (i.e. changes to a single file). - * Create fragments (i.e. patch hunks) and hang them to the given patch. - * - * Returns: - * -1 if no header was found or parse_binary() failed, - * -128 on another error, - * the number of bytes consumed otherwise, - * so that the caller can call us again for the next patch. - */ -static int parse_chunk(struct apply_state *state, char *buffer, unsigned long size, struct patch *patch) -{ - int hdrsize, patchsize; - int offset = find_header(state, buffer, size, &hdrsize, patch); - - if (offset < 0) - return offset; - - prefix_patch(state, patch); - - if (!use_patch(state, patch)) - patch->ws_rule = 0; - else if (patch->new_name) - patch->ws_rule = whitespace_rule(state->repo->index, - patch->new_name); - else - patch->ws_rule = whitespace_rule(state->repo->index, - patch->old_name); - - patchsize = parse_single_patch(state, - buffer + offset + hdrsize, - size - offset - hdrsize, - patch); - - if (patchsize < 0) - return -128; - - if (!patchsize) { - static const char git_binary[] = "GIT binary patch\n"; - int hd = hdrsize + offset; - unsigned long llen = linelen(buffer + hd, size - hd); - - if (llen == sizeof(git_binary) - 1 && - !memcmp(git_binary, buffer + hd, llen)) { - int used; - state->linenr++; - used = parse_binary(state, buffer + hd + llen, - size - hd - llen, patch); - if (used < 0) - return -1; - if (used) - patchsize = used + llen; - else - patchsize = 0; - } - else if (!memcmp(" differ\n", buffer + hd + llen - 8, 8)) { - static const char *binhdr[] = { - "Binary files ", - "Files ", - NULL, - }; - int i; - for (i = 0; binhdr[i]; i++) { - int len = strlen(binhdr[i]); - if (len < size - hd && - !memcmp(binhdr[i], buffer + hd, len)) { - state->linenr++; - patch->is_binary = 1; - patchsize = llen; - break; - } - } - } - - /* Empty patch cannot be applied if it is a text patch - * without metadata change. A binary patch appears - * empty to us here. - */ - if ((state->apply || state->check) && - (!patch->is_binary && !metadata_changes(patch))) { - error(_("patch with only garbage at line %d"), state->linenr); - return -128; - } - } - - return offset + hdrsize + patchsize; -} - -static void reverse_patches(struct patch *p) -{ - for (; p; p = p->next) { - struct fragment *frag = p->fragments; - - SWAP(p->new_name, p->old_name); - if (p->new_mode || p->is_delete) - SWAP(p->new_mode, p->old_mode); - SWAP(p->is_new, p->is_delete); - SWAP(p->lines_added, p->lines_deleted); - SWAP(p->old_oid_prefix, p->new_oid_prefix); - - for (; frag; frag = frag->next) { - SWAP(frag->newpos, frag->oldpos); - SWAP(frag->newlines, frag->oldlines); - } - } -} - -static const char pluses[] = -"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"; -static const char minuses[]= -"----------------------------------------------------------------------"; - -static void show_stats(struct apply_state *state, struct patch *patch) -{ - struct strbuf qname = STRBUF_INIT; - char *cp = patch->new_name ? patch->new_name : patch->old_name; - int max, add, del; - - quote_c_style(cp, &qname, NULL, 0); - - /* - * "scale" the filename - */ - max = state->max_len; - if (max > 50) - max = 50; - - if (qname.len > max) { - cp = strchr(qname.buf + qname.len + 3 - max, '/'); - if (!cp) - cp = qname.buf + qname.len + 3 - max; - strbuf_splice(&qname, 0, cp - qname.buf, "...", 3); - } - - if (patch->is_binary) { - printf(" %-*s | Bin\n", max, qname.buf); - strbuf_release(&qname); - return; - } - - printf(" %-*s |", max, qname.buf); - strbuf_release(&qname); - - /* - * scale the add/delete - */ - max = max + state->max_change > 70 ? 70 - max : state->max_change; - add = patch->lines_added; - del = patch->lines_deleted; - - if (state->max_change > 0) { - int total = ((add + del) * max + state->max_change / 2) / state->max_change; - add = (add * max + state->max_change / 2) / state->max_change; - del = total - add; - } - printf("%5d %.*s%.*s\n", patch->lines_added + patch->lines_deleted, - add, pluses, del, minuses); -} - -static int read_old_data(struct stat *st, struct patch *patch, - const char *path, struct strbuf *buf) -{ - int conv_flags = patch->crlf_in_old ? - CONV_EOL_KEEP_CRLF : CONV_EOL_RENORMALIZE; - switch (st->st_mode & S_IFMT) { - case S_IFLNK: - if (strbuf_readlink(buf, path, st->st_size) < 0) - return error(_("unable to read symlink %s"), path); - return 0; - case S_IFREG: - if (strbuf_read_file(buf, path, st->st_size) != st->st_size) - return error(_("unable to open or read %s"), path); - /* - * "git apply" without "--index/--cached" should never look - * at the index; the target file may not have been added to - * the index yet, and we may not even be in any Git repository. - * Pass NULL to convert_to_git() to stress this; the function - * should never look at the index when explicit crlf option - * is given. - */ - convert_to_git(NULL, path, buf->buf, buf->len, buf, conv_flags); - return 0; - default: - return -1; - } -} - -/* - * Update the preimage, and the common lines in postimage, - * from buffer buf of length len. - */ -static void update_pre_post_images(struct image *preimage, - struct image *postimage, - char *buf, size_t len) -{ - struct image fixed_preimage = IMAGE_INIT; - size_t insert_pos = 0; - int i, ctx, reduced; - const char *fixed; - - /* - * Update the preimage with whitespace fixes. Note that we - * are not losing preimage->buf -- apply_one_fragment() will - * free "oldlines". - */ - image_prepare(&fixed_preimage, buf, len, 1); - for (i = 0; i < fixed_preimage.line_nr; i++) - fixed_preimage.line[i].flag = preimage->line[i].flag; - image_clear(preimage); - *preimage = fixed_preimage; - fixed = preimage->buf.buf; - - /* - * Adjust the common context lines in postimage. - */ - for (i = reduced = ctx = 0; i < postimage->line_nr; i++) { - size_t l_len = postimage->line[i].len; - - if (!(postimage->line[i].flag & LINE_COMMON)) { - /* an added line -- no counterparts in preimage */ - insert_pos += l_len; - continue; - } - - /* and find the corresponding one in the fixed preimage */ - while (ctx < preimage->line_nr && - !(preimage->line[ctx].flag & LINE_COMMON)) { - fixed += preimage->line[ctx].len; - ctx++; - } - - /* - * preimage is expected to run out, if the caller - * fixed addition of trailing blank lines. - */ - if (preimage->line_nr <= ctx) { - reduced++; - continue; - } - - /* and copy it in, while fixing the line length */ - l_len = preimage->line[ctx].len; - strbuf_splice(&postimage->buf, insert_pos, postimage->line[i].len, - fixed, l_len); - insert_pos += l_len; - fixed += l_len; - postimage->line[i].len = l_len; - ctx++; - } - - /* Fix the length of the whole thing */ - postimage->line_nr -= reduced; -} - -/* - * Compare lines s1 of length n1 and s2 of length n2, ignoring - * whitespace difference. Returns 1 if they match, 0 otherwise - */ -static int fuzzy_matchlines(const char *s1, size_t n1, - const char *s2, size_t n2) -{ - const char *end1 = s1 + n1; - const char *end2 = s2 + n2; - - /* ignore line endings */ - while (s1 < end1 && (end1[-1] == '\r' || end1[-1] == '\n')) - end1--; - while (s2 < end2 && (end2[-1] == '\r' || end2[-1] == '\n')) - end2--; - - while (s1 < end1 && s2 < end2) { - if (isspace(*s1)) { - /* - * Skip whitespace. We check on both buffers - * because we don't want "a b" to match "ab". - */ - if (!isspace(*s2)) - return 0; - while (s1 < end1 && isspace(*s1)) - s1++; - while (s2 < end2 && isspace(*s2)) - s2++; - } else if (*s1++ != *s2++) - return 0; - } - - /* If we reached the end on one side only, lines don't match. */ - return s1 == end1 && s2 == end2; -} - -static int line_by_line_fuzzy_match(struct image *img, - struct image *preimage, - struct image *postimage, - unsigned long current, - int current_lno, - int preimage_limit) -{ - int i; - size_t imgoff = 0; - size_t preoff = 0; - size_t extra_chars; - char *buf; - char *preimage_eof; - char *preimage_end; - struct strbuf fixed; - char *fixed_buf; - size_t fixed_len; - - for (i = 0; i < preimage_limit; i++) { - size_t prelen = preimage->line[i].len; - size_t imglen = img->line[current_lno+i].len; - - if (!fuzzy_matchlines(img->buf.buf + current + imgoff, imglen, - preimage->buf.buf + preoff, prelen)) - return 0; - imgoff += imglen; - preoff += prelen; - } - - /* - * Ok, the preimage matches with whitespace fuzz. - * - * imgoff now holds the true length of the target that - * matches the preimage before the end of the file. - * - * Count the number of characters in the preimage that fall - * beyond the end of the file and make sure that all of them - * are whitespace characters. (This can only happen if - * we are removing blank lines at the end of the file.) - */ - buf = preimage_eof = preimage->buf.buf + preoff; - for ( ; i < preimage->line_nr; i++) - preoff += preimage->line[i].len; - preimage_end = preimage->buf.buf + preoff; - for ( ; buf < preimage_end; buf++) - if (!isspace(*buf)) - return 0; - - /* - * Update the preimage and the common postimage context - * lines to use the same whitespace as the target. - * If whitespace is missing in the target (i.e. - * if the preimage extends beyond the end of the file), - * use the whitespace from the preimage. - */ - extra_chars = preimage_end - preimage_eof; - strbuf_init(&fixed, imgoff + extra_chars); - strbuf_add(&fixed, img->buf.buf + current, imgoff); - strbuf_add(&fixed, preimage_eof, extra_chars); - fixed_buf = strbuf_detach(&fixed, &fixed_len); - update_pre_post_images(preimage, postimage, - fixed_buf, fixed_len); - return 1; -} - -static int match_fragment(struct apply_state *state, - struct image *img, - struct image *preimage, - struct image *postimage, - unsigned long current, - int current_lno, - unsigned ws_rule, - int match_beginning, int match_end) -{ - int i; - const char *orig, *target; - struct strbuf fixed = STRBUF_INIT; - char *fixed_buf; - size_t fixed_len; - int preimage_limit; - int ret; - - if (preimage->line_nr + current_lno <= img->line_nr) { - /* - * The hunk falls within the boundaries of img. - */ - preimage_limit = preimage->line_nr; - if (match_end && (preimage->line_nr + current_lno != img->line_nr)) { - ret = 0; - goto out; - } - } else if (state->ws_error_action == correct_ws_error && - (ws_rule & WS_BLANK_AT_EOF)) { - /* - * This hunk extends beyond the end of img, and we are - * removing blank lines at the end of the file. This - * many lines from the beginning of the preimage must - * match with img, and the remainder of the preimage - * must be blank. - */ - preimage_limit = img->line_nr - current_lno; - } else { - /* - * The hunk extends beyond the end of the img and - * we are not removing blanks at the end, so we - * should reject the hunk at this position. - */ - ret = 0; - goto out; - } - - if (match_beginning && current_lno) { - ret = 0; - goto out; - } - - /* Quick hash check */ - for (i = 0; i < preimage_limit; i++) { - if ((img->line[current_lno + i].flag & LINE_PATCHED) || - (preimage->line[i].hash != img->line[current_lno + i].hash)) { - ret = 0; - goto out; - } - } - - if (preimage_limit == preimage->line_nr) { - /* - * Do we have an exact match? If we were told to match - * at the end, size must be exactly at current+fragsize, - * otherwise current+fragsize must be still within the preimage, - * and either case, the old piece should match the preimage - * exactly. - */ - if ((match_end - ? (current + preimage->buf.len == img->buf.len) - : (current + preimage->buf.len <= img->buf.len)) && - !memcmp(img->buf.buf + current, preimage->buf.buf, preimage->buf.len)) { - ret = 1; - goto out; - } - } else { - /* - * The preimage extends beyond the end of img, so - * there cannot be an exact match. - * - * There must be one non-blank context line that match - * a line before the end of img. - */ - const char *buf, *buf_end; - - buf = preimage->buf.buf; - buf_end = buf; - for (i = 0; i < preimage_limit; i++) - buf_end += preimage->line[i].len; - - for ( ; buf < buf_end; buf++) - if (!isspace(*buf)) - break; - if (buf == buf_end) { - ret = 0; - goto out; - } - } - - /* - * No exact match. If we are ignoring whitespace, run a line-by-line - * fuzzy matching. We collect all the line length information because - * we need it to adjust whitespace if we match. - */ - if (state->ws_ignore_action == ignore_ws_change) { - ret = line_by_line_fuzzy_match(img, preimage, postimage, - current, current_lno, preimage_limit); - goto out; - } - - if (state->ws_error_action != correct_ws_error) { - ret = 0; - goto out; - } - - /* - * The hunk does not apply byte-by-byte, but the hash says - * it might with whitespace fuzz. We weren't asked to - * ignore whitespace, we were asked to correct whitespace - * errors, so let's try matching after whitespace correction. - * - * While checking the preimage against the target, whitespace - * errors in both fixed, we count how large the corresponding - * postimage needs to be. The postimage prepared by - * apply_one_fragment() has whitespace errors fixed on added - * lines already, but the common lines were propagated as-is, - * which may become longer when their whitespace errors are - * fixed. - */ - - /* - * The preimage may extend beyond the end of the file, - * but in this loop we will only handle the part of the - * preimage that falls within the file. - */ - strbuf_grow(&fixed, preimage->buf.len + 1); - orig = preimage->buf.buf; - target = img->buf.buf + current; - for (i = 0; i < preimage_limit; i++) { - size_t oldlen = preimage->line[i].len; - size_t tgtlen = img->line[current_lno + i].len; - size_t fixstart = fixed.len; - struct strbuf tgtfix; - int match; - - /* Try fixing the line in the preimage */ - ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL); - - /* Try fixing the line in the target */ - strbuf_init(&tgtfix, tgtlen); - ws_fix_copy(&tgtfix, target, tgtlen, ws_rule, NULL); - - /* - * If they match, either the preimage was based on - * a version before our tree fixed whitespace breakage, - * or we are lacking a whitespace-fix patch the tree - * the preimage was based on already had (i.e. target - * has whitespace breakage, the preimage doesn't). - * In either case, we are fixing the whitespace breakages - * so we might as well take the fix together with their - * real change. - */ - match = (tgtfix.len == fixed.len - fixstart && - !memcmp(tgtfix.buf, fixed.buf + fixstart, - fixed.len - fixstart)); - - strbuf_release(&tgtfix); - if (!match) { - ret = 0; - goto out; - } - - orig += oldlen; - target += tgtlen; - } - - - /* - * Now handle the lines in the preimage that falls beyond the - * end of the file (if any). They will only match if they are - * empty or only contain whitespace (if WS_BLANK_AT_EOL is - * false). - */ - for ( ; i < preimage->line_nr; i++) { - size_t fixstart = fixed.len; /* start of the fixed preimage */ - size_t oldlen = preimage->line[i].len; - int j; - - /* Try fixing the line in the preimage */ - ws_fix_copy(&fixed, orig, oldlen, ws_rule, NULL); - - for (j = fixstart; j < fixed.len; j++) { - if (!isspace(fixed.buf[j])) { - ret = 0; - goto out; - } - } - - - orig += oldlen; - } - - /* - * Yes, the preimage is based on an older version that still - * has whitespace breakages unfixed, and fixing them makes the - * hunk match. Update the context lines in the postimage. - */ - fixed_buf = strbuf_detach(&fixed, &fixed_len); - update_pre_post_images(preimage, postimage, - fixed_buf, fixed_len); - - ret = 1; - -out: - strbuf_release(&fixed); - return ret; -} - -static int find_pos(struct apply_state *state, - struct image *img, - struct image *preimage, - struct image *postimage, - int line, - unsigned ws_rule, - int match_beginning, int match_end) -{ - int i; - unsigned long backwards, forwards, current; - int backwards_lno, forwards_lno, current_lno; - - /* - * When running with --allow-overlap, it is possible that a hunk is - * seen that pretends to start at the beginning (but no longer does), - * and that *still* needs to match the end. So trust `match_end` more - * than `match_beginning`. - */ - if (state->allow_overlap && match_beginning && match_end && - img->line_nr - preimage->line_nr != 0) - match_beginning = 0; - - /* - * If match_beginning or match_end is specified, there is no - * point starting from a wrong line that will never match and - * wander around and wait for a match at the specified end. - */ - if (match_beginning) - line = 0; - else if (match_end) - line = img->line_nr - preimage->line_nr; - - /* - * Because the comparison is unsigned, the following test - * will also take care of a negative line number that can - * result when match_end and preimage is larger than the target. - */ - if ((size_t) line > img->line_nr) - line = img->line_nr; - - current = 0; - for (i = 0; i < line; i++) - current += img->line[i].len; - - /* - * There's probably some smart way to do this, but I'll leave - * that to the smart and beautiful people. I'm simple and stupid. - */ - backwards = current; - backwards_lno = line; - forwards = current; - forwards_lno = line; - current_lno = line; - - for (i = 0; ; i++) { - if (match_fragment(state, img, preimage, postimage, - current, current_lno, ws_rule, - match_beginning, match_end)) - return current_lno; - - again: - if (backwards_lno == 0 && forwards_lno == img->line_nr) - break; - - if (i & 1) { - if (backwards_lno == 0) { - i++; - goto again; - } - backwards_lno--; - backwards -= img->line[backwards_lno].len; - current = backwards; - current_lno = backwards_lno; - } else { - if (forwards_lno == img->line_nr) { - i++; - goto again; - } - forwards += img->line[forwards_lno].len; - forwards_lno++; - current = forwards; - current_lno = forwards_lno; - } - - } - return -1; -} - -/* - * The change from "preimage" and "postimage" has been found to - * apply at applied_pos (counts in line numbers) in "img". - * Update "img" to remove "preimage" and replace it with "postimage". - */ -static void update_image(struct apply_state *state, - struct image *img, - int applied_pos, - struct image *preimage, - struct image *postimage) -{ - /* - * remove the copy of preimage at offset in img - * and replace it with postimage - */ - int i, nr; - size_t remove_count, insert_count, applied_at = 0; - size_t result_alloc; - char *result; - int preimage_limit; - - /* - * If we are removing blank lines at the end of img, - * the preimage may extend beyond the end. - * If that is the case, we must be careful only to - * remove the part of the preimage that falls within - * the boundaries of img. Initialize preimage_limit - * to the number of lines in the preimage that falls - * within the boundaries. - */ - preimage_limit = preimage->line_nr; - if (preimage_limit > img->line_nr - applied_pos) - preimage_limit = img->line_nr - applied_pos; - - for (i = 0; i < applied_pos; i++) - applied_at += img->line[i].len; - - remove_count = 0; - for (i = 0; i < preimage_limit; i++) - remove_count += img->line[applied_pos + i].len; - insert_count = postimage->buf.len; - - /* Adjust the contents */ - result_alloc = st_add3(st_sub(img->buf.len, remove_count), insert_count, 1); - result = xmalloc(result_alloc); - memcpy(result, img->buf.buf, applied_at); - memcpy(result + applied_at, postimage->buf.buf, postimage->buf.len); - memcpy(result + applied_at + postimage->buf.len, - img->buf.buf + (applied_at + remove_count), - img->buf.len - (applied_at + remove_count)); - strbuf_attach(&img->buf, result, postimage->buf.len + img->buf.len - remove_count, - result_alloc); - - /* Adjust the line table */ - nr = img->line_nr + postimage->line_nr - preimage_limit; - if (preimage_limit < postimage->line_nr) - /* - * NOTE: this knows that we never call image_remove_first_line() - * on anything other than pre/post image. - */ - REALLOC_ARRAY(img->line, nr); - if (preimage_limit != postimage->line_nr) - MOVE_ARRAY(img->line + applied_pos + postimage->line_nr, - img->line + applied_pos + preimage_limit, - img->line_nr - (applied_pos + preimage_limit)); - COPY_ARRAY(img->line + applied_pos, postimage->line, postimage->line_nr); - if (!state->allow_overlap) - for (i = 0; i < postimage->line_nr; i++) - img->line[applied_pos + i].flag |= LINE_PATCHED; - img->line_nr = nr; -} - -/* - * Use the patch-hunk text in "frag" to prepare two images (preimage and - * postimage) for the hunk. Find lines that match "preimage" in "img" and - * replace the part of "img" with "postimage" text. - */ -static int apply_one_fragment(struct apply_state *state, - struct image *img, struct fragment *frag, - int inaccurate_eof, unsigned ws_rule, - int nth_fragment) -{ - int match_beginning, match_end; - const char *patch = frag->patch; - int size = frag->size; - char *old, *oldlines; - struct strbuf newlines; - int new_blank_lines_at_end = 0; - int found_new_blank_lines_at_end = 0; - int hunk_linenr = frag->linenr; - unsigned long leading, trailing; - int pos, applied_pos; - struct image preimage = IMAGE_INIT; - struct image postimage = IMAGE_INIT; - - oldlines = xmalloc(size); - strbuf_init(&newlines, size); - - old = oldlines; - while (size > 0) { - char first; - int len = linelen(patch, size); - int plen; - int added_blank_line = 0; - int is_blank_context = 0; - size_t start; - - if (!len) - break; - - /* - * "plen" is how much of the line we should use for - * the actual patch data. Normally we just remove the - * first character on the line, but if the line is - * followed by "\ No newline", then we also remove the - * last one (which is the newline, of course). - */ - plen = len - 1; - if (len < size && patch[len] == '\\') - plen--; - first = *patch; - if (state->apply_in_reverse) { - if (first == '-') - first = '+'; - else if (first == '+') - first = '-'; - } - - switch (first) { - case '\n': - /* Newer GNU diff, empty context line */ - if (plen < 0) - /* ... followed by '\No newline'; nothing */ - break; - *old++ = '\n'; - strbuf_addch(&newlines, '\n'); - image_add_line(&preimage, "\n", 1, LINE_COMMON); - image_add_line(&postimage, "\n", 1, LINE_COMMON); - is_blank_context = 1; - break; - case ' ': - if (plen && (ws_rule & WS_BLANK_AT_EOF) && - ws_blank_line(patch + 1, plen)) - is_blank_context = 1; - /* fallthrough */ - case '-': - memcpy(old, patch + 1, plen); - image_add_line(&preimage, old, plen, - (first == ' ' ? LINE_COMMON : 0)); - old += plen; - if (first == '-') - break; - /* fallthrough */ - case '+': - /* --no-add does not add new lines */ - if (first == '+' && state->no_add) - break; - - start = newlines.len; - if (first != '+' || - !state->whitespace_error || - state->ws_error_action != correct_ws_error) { - strbuf_add(&newlines, patch + 1, plen); - } - else { - ws_fix_copy(&newlines, patch + 1, plen, ws_rule, &state->applied_after_fixing_ws); - } - image_add_line(&postimage, newlines.buf + start, newlines.len - start, - (first == '+' ? 0 : LINE_COMMON)); - if (first == '+' && - (ws_rule & WS_BLANK_AT_EOF) && - ws_blank_line(patch + 1, plen)) - added_blank_line = 1; - break; - case '@': case '\\': - /* Ignore it, we already handled it */ - break; - default: - if (state->apply_verbosity > verbosity_normal) - error(_("invalid start of line: '%c'"), first); - applied_pos = -1; - goto out; - } - if (added_blank_line) { - if (!new_blank_lines_at_end) - found_new_blank_lines_at_end = hunk_linenr; - new_blank_lines_at_end++; - } - else if (is_blank_context) - ; - else - new_blank_lines_at_end = 0; - patch += len; - size -= len; - hunk_linenr++; - } - if (inaccurate_eof && - old > oldlines && old[-1] == '\n' && - newlines.len > 0 && newlines.buf[newlines.len - 1] == '\n') { - old--; - strbuf_setlen(&newlines, newlines.len - 1); - preimage.line[preimage.line_nr - 1].len--; - postimage.line[postimage.line_nr - 1].len--; - } - - leading = frag->leading; - trailing = frag->trailing; - - /* - * A hunk to change lines at the beginning would begin with - * @@ -1,L +N,M @@ - * but we need to be careful. -U0 that inserts before the second - * line also has this pattern. - * - * And a hunk to add to an empty file would begin with - * @@ -0,0 +N,M @@ - * - * In other words, a hunk that is (frag->oldpos <= 1) with or - * without leading context must match at the beginning. - */ - match_beginning = (!frag->oldpos || - (frag->oldpos == 1 && !state->unidiff_zero)); - - /* - * A hunk without trailing lines must match at the end. - * However, we simply cannot tell if a hunk must match end - * from the lack of trailing lines if the patch was generated - * with unidiff without any context. - */ - match_end = !state->unidiff_zero && !trailing; - - pos = frag->newpos ? (frag->newpos - 1) : 0; - strbuf_add(&preimage.buf, oldlines, old - oldlines); - strbuf_swap(&postimage.buf, &newlines); - - for (;;) { - - applied_pos = find_pos(state, img, &preimage, &postimage, pos, - ws_rule, match_beginning, match_end); - - if (applied_pos >= 0) - break; - - /* Am I at my context limits? */ - if ((leading <= state->p_context) && (trailing <= state->p_context)) - break; - if (match_beginning || match_end) { - match_beginning = match_end = 0; - continue; - } - - /* - * Reduce the number of context lines; reduce both - * leading and trailing if they are equal otherwise - * just reduce the larger context. - */ - if (leading >= trailing) { - image_remove_first_line(&preimage); - image_remove_first_line(&postimage); - pos--; - leading--; - } - if (trailing > leading) { - image_remove_last_line(&preimage); - image_remove_last_line(&postimage); - trailing--; - } - } - - if (applied_pos >= 0) { - if (new_blank_lines_at_end && - preimage.line_nr + applied_pos >= img->line_nr && - (ws_rule & WS_BLANK_AT_EOF) && - state->ws_error_action != nowarn_ws_error) { - record_ws_error(state, WS_BLANK_AT_EOF, "+", 1, - found_new_blank_lines_at_end); - if (state->ws_error_action == correct_ws_error) { - while (new_blank_lines_at_end--) - image_remove_last_line(&postimage); - } - /* - * We would want to prevent write_out_results() - * from taking place in apply_patch() that follows - * the callchain led us here, which is: - * apply_patch->check_patch_list->check_patch-> - * apply_data->apply_fragments->apply_one_fragment - */ - if (state->ws_error_action == die_on_ws_error) - state->apply = 0; - } - - if (state->apply_verbosity > verbosity_normal && applied_pos != pos) { - int offset = applied_pos - pos; - if (state->apply_in_reverse) - offset = 0 - offset; - fprintf_ln(stderr, - Q_("Hunk #%d succeeded at %d (offset %d line).", - "Hunk #%d succeeded at %d (offset %d lines).", - offset), - nth_fragment, applied_pos + 1, offset); - } - - /* - * Warn if it was necessary to reduce the number - * of context lines. - */ - if ((leading != frag->leading || - trailing != frag->trailing) && state->apply_verbosity > verbosity_silent) - fprintf_ln(stderr, _("Context reduced to (%ld/%ld)" - " to apply fragment at %d"), - leading, trailing, applied_pos+1); - update_image(state, img, applied_pos, &preimage, &postimage); - } else { - if (state->apply_verbosity > verbosity_normal) - error(_("while searching for:\n%.*s"), - (int)(old - oldlines), oldlines); - } - -out: - free(oldlines); - strbuf_release(&newlines); - image_clear(&preimage); - image_clear(&postimage); - - return (applied_pos < 0); -} - -static int apply_binary_fragment(struct apply_state *state, - struct image *img, - struct patch *patch) -{ - struct fragment *fragment = patch->fragments; - unsigned long len; - void *dst; - - if (!fragment) - return error(_("missing binary patch data for '%s'"), - patch->new_name ? - patch->new_name : - patch->old_name); - - /* Binary patch is irreversible without the optional second hunk */ - if (state->apply_in_reverse) { - if (!fragment->next) - return error(_("cannot reverse-apply a binary patch " - "without the reverse hunk to '%s'"), - patch->new_name - ? patch->new_name : patch->old_name); - fragment = fragment->next; - } - switch (fragment->binary_patch_method) { - case BINARY_DELTA_DEFLATED: - dst = patch_delta(img->buf.buf, img->buf.len, fragment->patch, - fragment->size, &len); - if (!dst) - return -1; - image_clear(img); - strbuf_attach(&img->buf, dst, len, len + 1); - return 0; - case BINARY_LITERAL_DEFLATED: - image_clear(img); - strbuf_add(&img->buf, fragment->patch, fragment->size); - return 0; - } - return -1; -} - -/* - * Replace "img" with the result of applying the binary patch. - * The binary patch data itself in patch->fragment is still kept - * but the preimage prepared by the caller in "img" is freed here - * or in the helper function apply_binary_fragment() this calls. - */ -static int apply_binary(struct apply_state *state, - struct image *img, - struct patch *patch) -{ - const char *name = patch->old_name ? patch->old_name : patch->new_name; - struct object_id oid; - const unsigned hexsz = the_hash_algo->hexsz; - - /* - * For safety, we require patch index line to contain - * full hex textual object ID for old and new, at least for now. - */ - if (strlen(patch->old_oid_prefix) != hexsz || - strlen(patch->new_oid_prefix) != hexsz || - get_oid_hex(patch->old_oid_prefix, &oid) || - get_oid_hex(patch->new_oid_prefix, &oid)) - return error(_("cannot apply binary patch to '%s' " - "without full index line"), name); - - if (patch->old_name) { - /* - * See if the old one matches what the patch - * applies to. - */ - hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, - OBJ_BLOB, &oid); - if (strcmp(oid_to_hex(&oid), patch->old_oid_prefix)) - return error(_("the patch applies to '%s' (%s), " - "which does not match the " - "current contents."), - name, oid_to_hex(&oid)); - } - else { - /* Otherwise, the old one must be empty. */ - if (img->buf.len) - return error(_("the patch applies to an empty " - "'%s' but it is not empty"), name); - } - - get_oid_hex(patch->new_oid_prefix, &oid); - if (is_null_oid(&oid)) { - image_clear(img); - return 0; /* deletion patch */ - } - - if (odb_has_object(the_repository->objects, &oid, 0)) { - /* We already have the postimage */ - enum object_type type; - unsigned long size; - char *result; - - result = odb_read_object(the_repository->objects, &oid, - &type, &size); - if (!result) - return error(_("the necessary postimage %s for " - "'%s' cannot be read"), - patch->new_oid_prefix, name); - image_clear(img); - strbuf_attach(&img->buf, result, size, size + 1); - } else { - /* - * We have verified buf matches the preimage; - * apply the patch data to it, which is stored - * in the patch->fragments->{patch,size}. - */ - if (apply_binary_fragment(state, img, patch)) - return error(_("binary patch does not apply to '%s'"), - name); - - /* verify that the result matches */ - hash_object_file(the_hash_algo, img->buf.buf, img->buf.len, OBJ_BLOB, - &oid); - if (strcmp(oid_to_hex(&oid), patch->new_oid_prefix)) - return error(_("binary patch to '%s' creates incorrect result (expecting %s, got %s)"), - name, patch->new_oid_prefix, oid_to_hex(&oid)); - } - - return 0; -} - -static int apply_fragments(struct apply_state *state, struct image *img, struct patch *patch) -{ - struct fragment *frag = patch->fragments; - const char *name = patch->old_name ? patch->old_name : patch->new_name; - unsigned ws_rule = patch->ws_rule; - unsigned inaccurate_eof = patch->inaccurate_eof; - int nth = 0; - - if (patch->is_binary) - return apply_binary(state, img, patch); - - while (frag) { - nth++; - if (apply_one_fragment(state, img, frag, inaccurate_eof, ws_rule, nth)) { - error(_("patch failed: %s:%ld"), name, frag->oldpos); - if (!state->apply_with_reject) - return -1; - frag->rejected = 1; - } - frag = frag->next; - } - return 0; -} - -static int read_blob_object(struct strbuf *buf, const struct object_id *oid, unsigned mode) -{ - if (S_ISGITLINK(mode)) { - strbuf_grow(buf, 100); - strbuf_addf(buf, "Subproject commit %s\n", oid_to_hex(oid)); - } else { - enum object_type type; - unsigned long sz; - char *result; - - result = odb_read_object(the_repository->objects, oid, - &type, &sz); - if (!result) - return -1; - /* XXX read_sha1_file NUL-terminates */ - strbuf_attach(buf, result, sz, sz + 1); - } - return 0; -} - -static int read_file_or_gitlink(const struct cache_entry *ce, struct strbuf *buf) -{ - if (!ce) - return 0; - return read_blob_object(buf, &ce->oid, ce->ce_mode); -} - -static struct patch *in_fn_table(struct apply_state *state, const char *name) -{ - struct string_list_item *item; - - if (!name) - return NULL; - - item = string_list_lookup(&state->fn_table, name); - if (item) - return (struct patch *)item->util; - - return NULL; -} - -/* - * item->util in the filename table records the status of the path. - * Usually it points at a patch (whose result records the contents - * of it after applying it), but it could be PATH_WAS_DELETED for a - * path that a previously applied patch has already removed, or - * PATH_TO_BE_DELETED for a path that a later patch would remove. - * - * The latter is needed to deal with a case where two paths A and B - * are swapped by first renaming A to B and then renaming B to A; - * moving A to B should not be prevented due to presence of B as we - * will remove it in a later patch. - */ -#define PATH_TO_BE_DELETED ((struct patch *) -2) -#define PATH_WAS_DELETED ((struct patch *) -1) - -static int to_be_deleted(struct patch *patch) -{ - return patch == PATH_TO_BE_DELETED; -} - -static int was_deleted(struct patch *patch) -{ - return patch == PATH_WAS_DELETED; -} - -static void add_to_fn_table(struct apply_state *state, struct patch *patch) -{ - struct string_list_item *item; - - /* - * Always add new_name unless patch is a deletion - * This should cover the cases for normal diffs, - * file creations and copies - */ - if (patch->new_name) { - item = string_list_insert(&state->fn_table, patch->new_name); - item->util = patch; - } - - /* - * store a failure on rename/deletion cases because - * later chunks shouldn't patch old names - */ - if ((patch->new_name == NULL) || (patch->is_rename)) { - item = string_list_insert(&state->fn_table, patch->old_name); - item->util = PATH_WAS_DELETED; - } -} - -static void prepare_fn_table(struct apply_state *state, struct patch *patch) -{ - /* - * store information about incoming file deletion - */ - while (patch) { - if ((patch->new_name == NULL) || (patch->is_rename)) { - struct string_list_item *item; - item = string_list_insert(&state->fn_table, patch->old_name); - item->util = PATH_TO_BE_DELETED; - } - patch = patch->next; - } -} - -static int checkout_target(struct index_state *istate, - struct cache_entry *ce, struct stat *st) -{ - struct checkout costate = CHECKOUT_INIT; - - costate.refresh_cache = 1; - costate.istate = istate; - if (checkout_entry(ce, &costate, NULL, NULL) || - lstat(ce->name, st)) - return error(_("cannot checkout %s"), ce->name); - return 0; -} - -static struct patch *previous_patch(struct apply_state *state, - struct patch *patch, - int *gone) -{ - struct patch *previous; - - *gone = 0; - if (patch->is_copy || patch->is_rename) - return NULL; /* "git" patches do not depend on the order */ - - previous = in_fn_table(state, patch->old_name); - if (!previous) - return NULL; - - if (to_be_deleted(previous)) - return NULL; /* the deletion hasn't happened yet */ - - if (was_deleted(previous)) - *gone = 1; - - return previous; -} - -static int verify_index_match(struct apply_state *state, - const struct cache_entry *ce, - struct stat *st) -{ - if (S_ISGITLINK(ce->ce_mode)) { - if (!S_ISDIR(st->st_mode)) - return -1; - return 0; - } - return ie_match_stat(state->repo->index, ce, st, - CE_MATCH_IGNORE_VALID | CE_MATCH_IGNORE_SKIP_WORKTREE); -} - -#define SUBMODULE_PATCH_WITHOUT_INDEX 1 - -static int load_patch_target(struct apply_state *state, - struct strbuf *buf, - const struct cache_entry *ce, - struct stat *st, - struct patch *patch, - const char *name, - unsigned expected_mode) -{ - if (state->cached || state->check_index) { - if (read_file_or_gitlink(ce, buf)) - return error(_("failed to read %s"), name); - } else if (name) { - if (S_ISGITLINK(expected_mode)) { - if (ce) - return read_file_or_gitlink(ce, buf); - else - return SUBMODULE_PATCH_WITHOUT_INDEX; - } else if (has_symlink_leading_path(name, strlen(name))) { - return error(_("reading from '%s' beyond a symbolic link"), name); - } else { - if (read_old_data(st, patch, name, buf)) - return error(_("failed to read %s"), name); - } - } - return 0; -} - -/* - * We are about to apply "patch"; populate the "image" with the - * current version we have, from the working tree or from the index, - * depending on the situation e.g. --cached/--index. If we are - * applying a non-git patch that incrementally updates the tree, - * we read from the result of a previous diff. - */ -static int load_preimage(struct apply_state *state, - struct image *image, - struct patch *patch, struct stat *st, - const struct cache_entry *ce) -{ - struct strbuf buf = STRBUF_INIT; - size_t len; - char *img; - struct patch *previous; - int status; - - previous = previous_patch(state, patch, &status); - if (status) - return error(_("path %s has been renamed/deleted"), - patch->old_name); - if (previous) { - /* We have a patched copy in memory; use that. */ - strbuf_add(&buf, previous->result, previous->resultsize); - } else { - status = load_patch_target(state, &buf, ce, st, patch, - patch->old_name, patch->old_mode); - if (status < 0) - return status; - else if (status == SUBMODULE_PATCH_WITHOUT_INDEX) { - /* - * There is no way to apply subproject - * patch without looking at the index. - * NEEDSWORK: shouldn't this be flagged - * as an error??? - */ - free_fragment_list(patch->fragments); - patch->fragments = NULL; - } else if (status) { - return error(_("failed to read %s"), patch->old_name); - } - } - - img = strbuf_detach(&buf, &len); - image_prepare(image, img, len, !patch->is_binary); - return 0; -} - -static int resolve_to(struct image *image, const struct object_id *result_id) -{ - unsigned long size; - enum object_type type; - char *data; - - image_clear(image); - - data = odb_read_object(the_repository->objects, result_id, &type, &size); - if (!data || type != OBJ_BLOB) - die("unable to read blob object %s", oid_to_hex(result_id)); - strbuf_attach(&image->buf, data, size, size + 1); - - return 0; -} - -static int three_way_merge(struct apply_state *state, - struct image *image, - char *path, - const struct object_id *base, - const struct object_id *ours, - const struct object_id *theirs) -{ - mmfile_t base_file, our_file, their_file; - struct ll_merge_options merge_opts = LL_MERGE_OPTIONS_INIT; - mmbuffer_t result = { NULL }; - enum ll_merge_result status; - - /* resolve trivial cases first */ - if (oideq(base, ours)) - return resolve_to(image, theirs); - else if (oideq(base, theirs) || oideq(ours, theirs)) - return resolve_to(image, ours); - - read_mmblob(&base_file, the_repository->objects, base); - read_mmblob(&our_file, the_repository->objects, ours); - read_mmblob(&their_file, the_repository->objects, theirs); - merge_opts.variant = state->merge_variant; - status = ll_merge(&result, path, - &base_file, "base", - &our_file, "ours", - &their_file, "theirs", - state->repo->index, - &merge_opts); - if (status == LL_MERGE_BINARY_CONFLICT) - warning("Cannot merge binary files: %s (%s vs. %s)", - path, "ours", "theirs"); - free(base_file.ptr); - free(our_file.ptr); - free(their_file.ptr); - if (status < 0 || !result.ptr) { - free(result.ptr); - return -1; - } - image_clear(image); - strbuf_attach(&image->buf, result.ptr, result.size, result.size); - - return status; -} - -/* - * When directly falling back to add/add three-way merge, we read from - * the current contents of the new_name. In no cases other than that - * this function will be called. - */ -static int load_current(struct apply_state *state, - struct image *image, - struct patch *patch) -{ - struct strbuf buf = STRBUF_INIT; - int status, pos; - size_t len; - char *img; - struct stat st; - struct cache_entry *ce; - char *name = patch->new_name; - unsigned mode = patch->new_mode; - - if (!patch->is_new) - BUG("patch to %s is not a creation", patch->old_name); - - pos = index_name_pos(state->repo->index, name, strlen(name)); - if (pos < 0) - return error(_("%s: does not exist in index"), name); - ce = state->repo->index->cache[pos]; - if (lstat(name, &st)) { - if (errno != ENOENT) - return error_errno("%s", name); - if (checkout_target(state->repo->index, ce, &st)) - return -1; - } - if (verify_index_match(state, ce, &st)) - return error(_("%s: does not match index"), name); - - status = load_patch_target(state, &buf, ce, &st, patch, name, mode); - if (status < 0) - return status; - else if (status) - return -1; - img = strbuf_detach(&buf, &len); - image_prepare(image, img, len, !patch->is_binary); - return 0; -} - -static int try_threeway(struct apply_state *state, - struct image *image, - struct patch *patch, - struct stat *st, - const struct cache_entry *ce) -{ - struct object_id pre_oid, post_oid, our_oid; - struct strbuf buf = STRBUF_INIT; - size_t len; - int status; - char *img; - struct image tmp_image = IMAGE_INIT; - - /* No point falling back to 3-way merge in these cases */ - if (patch->is_delete || - S_ISGITLINK(patch->old_mode) || S_ISGITLINK(patch->new_mode) || - (patch->is_new && !patch->direct_to_threeway) || - (patch->is_rename && !patch->lines_added && !patch->lines_deleted)) - return -1; - - /* Preimage the patch was prepared for */ - if (patch->is_new) - odb_write_object(the_repository->objects, "", 0, OBJ_BLOB, &pre_oid); - else if (repo_get_oid(the_repository, patch->old_oid_prefix, &pre_oid) || - read_blob_object(&buf, &pre_oid, patch->old_mode)) - return error(_("repository lacks the necessary blob to perform 3-way merge.")); - - if (state->apply_verbosity > verbosity_silent && patch->direct_to_threeway) - fprintf(stderr, _("Performing three-way merge...\n")); - - img = strbuf_detach(&buf, &len); - image_prepare(&tmp_image, img, len, 1); - /* Apply the patch to get the post image */ - if (apply_fragments(state, &tmp_image, patch) < 0) { - image_clear(&tmp_image); - return -1; - } - /* post_oid is theirs */ - odb_write_object(the_repository->objects, tmp_image.buf.buf, - tmp_image.buf.len, OBJ_BLOB, &post_oid); - image_clear(&tmp_image); - - /* our_oid is ours */ - if (patch->is_new) { - if (load_current(state, &tmp_image, patch)) - return error(_("cannot read the current contents of '%s'"), - patch->new_name); - } else { - if (load_preimage(state, &tmp_image, patch, st, ce)) - return error(_("cannot read the current contents of '%s'"), - patch->old_name); - } - odb_write_object(the_repository->objects, tmp_image.buf.buf, - tmp_image.buf.len, OBJ_BLOB, &our_oid); - image_clear(&tmp_image); - - /* in-core three-way merge between post and our using pre as base */ - status = three_way_merge(state, image, patch->new_name, - &pre_oid, &our_oid, &post_oid); - if (status < 0) { - if (state->apply_verbosity > verbosity_silent) - fprintf(stderr, - _("Failed to perform three-way merge...\n")); - return status; - } - - if (status) { - patch->conflicted_threeway = 1; - if (patch->is_new) - oidclr(&patch->threeway_stage[0], the_repository->hash_algo); - else - oidcpy(&patch->threeway_stage[0], &pre_oid); - oidcpy(&patch->threeway_stage[1], &our_oid); - oidcpy(&patch->threeway_stage[2], &post_oid); - if (state->apply_verbosity > verbosity_silent) - fprintf(stderr, - _("Applied patch to '%s' with conflicts.\n"), - patch->new_name); - } else { - if (state->apply_verbosity > verbosity_silent) - fprintf(stderr, - _("Applied patch to '%s' cleanly.\n"), - patch->new_name); - } - return 0; -} - -static int apply_data(struct apply_state *state, struct patch *patch, - struct stat *st, const struct cache_entry *ce) -{ - struct image image = IMAGE_INIT; - - if (load_preimage(state, &image, patch, st, ce) < 0) - return -1; - - if (!state->threeway || try_threeway(state, &image, patch, st, ce) < 0) { - if (state->apply_verbosity > verbosity_silent && - state->threeway && !patch->direct_to_threeway) - fprintf(stderr, _("Falling back to direct application...\n")); - - /* Note: with --reject, apply_fragments() returns 0 */ - if (patch->direct_to_threeway || apply_fragments(state, &image, patch) < 0) { - image_clear(&image); - return -1; - } - } - patch->result = strbuf_detach(&image.buf, &patch->resultsize); - add_to_fn_table(state, patch); - free(image.line); - - if (0 < patch->is_delete && patch->resultsize) - return error(_("removal patch leaves file contents")); - - return 0; -} - -/* - * If "patch" that we are looking at modifies or deletes what we have, - * we would want it not to lose any local modification we have, either - * in the working tree or in the index. - * - * This also decides if a non-git patch is a creation patch or a - * modification to an existing empty file. We do not check the state - * of the current tree for a creation patch in this function; the caller - * check_patch() separately makes sure (and errors out otherwise) that - * the path the patch creates does not exist in the current tree. - */ -static int check_preimage(struct apply_state *state, - struct patch *patch, - struct cache_entry **ce, - struct stat *st) -{ - const char *old_name = patch->old_name; - struct patch *previous = NULL; - int stat_ret = 0, status; - unsigned st_mode = 0; - - if (!old_name) - return 0; - - assert(patch->is_new <= 0); - previous = previous_patch(state, patch, &status); - - if (status) - return error(_("path %s has been renamed/deleted"), old_name); - if (previous) { - st_mode = previous->new_mode; - } else if (!state->cached) { - stat_ret = lstat(old_name, st); - if (stat_ret && errno != ENOENT) - return error_errno("%s", old_name); - } - - if (state->check_index && !previous) { - int pos = index_name_pos(state->repo->index, old_name, - strlen(old_name)); - if (pos < 0) { - if (patch->is_new < 0) - goto is_new; - return error(_("%s: does not exist in index"), old_name); - } - *ce = state->repo->index->cache[pos]; - if (stat_ret < 0) { - if (checkout_target(state->repo->index, *ce, st)) - return -1; - } - if (!state->cached && verify_index_match(state, *ce, st)) - return error(_("%s: does not match index"), old_name); - if (state->cached) - st_mode = (*ce)->ce_mode; - } else if (stat_ret < 0) { - if (patch->is_new < 0) - goto is_new; - return error_errno("%s", old_name); - } - - if (!state->cached && !previous) { - if (*ce && !(*ce)->ce_mode) - BUG("ce_mode == 0 for path '%s'", old_name); - - if (trust_executable_bit || !S_ISREG(st->st_mode)) - st_mode = ce_mode_from_stat(*ce, st->st_mode); - else if (*ce) - st_mode = (*ce)->ce_mode; - else - st_mode = patch->old_mode; - } - - if (patch->is_new < 0) - patch->is_new = 0; - if (!patch->old_mode) - patch->old_mode = st_mode; - if ((st_mode ^ patch->old_mode) & S_IFMT) - return error(_("%s: wrong type"), old_name); - if (st_mode != patch->old_mode) - warning(_("%s has type %o, expected %o"), - old_name, st_mode, patch->old_mode); - if (!patch->new_mode && !patch->is_delete) - patch->new_mode = st_mode; - return 0; - - is_new: - patch->is_new = 1; - patch->is_delete = 0; - FREE_AND_NULL(patch->old_name); - return 0; -} - - -#define EXISTS_IN_INDEX 1 -#define EXISTS_IN_WORKTREE 2 -#define EXISTS_IN_INDEX_AS_ITA 3 - -static int check_to_create(struct apply_state *state, - const char *new_name, - int ok_if_exists) -{ - struct stat nst; - - if (state->check_index && (!ok_if_exists || !state->cached)) { - int pos; - - pos = index_name_pos(state->repo->index, new_name, strlen(new_name)); - if (pos >= 0) { - struct cache_entry *ce = state->repo->index->cache[pos]; - - /* allow ITA, as they do not yet exist in the index */ - if (!ok_if_exists && !(ce->ce_flags & CE_INTENT_TO_ADD)) - return EXISTS_IN_INDEX; - - /* ITA entries can never match working tree files */ - if (!state->cached && (ce->ce_flags & CE_INTENT_TO_ADD)) - return EXISTS_IN_INDEX_AS_ITA; - } - } - - if (state->cached) - return 0; - - if (!lstat(new_name, &nst)) { - if (S_ISDIR(nst.st_mode) || ok_if_exists) - return 0; - /* - * A leading component of new_name might be a symlink - * that is going to be removed with this patch, but - * still pointing at somewhere that has the path. - * In such a case, path "new_name" does not exist as - * far as git is concerned. - */ - if (has_symlink_leading_path(new_name, strlen(new_name))) - return 0; - - return EXISTS_IN_WORKTREE; - } else if (!is_missing_file_error(errno)) { - return error_errno("%s", new_name); - } - return 0; -} - -static void prepare_symlink_changes(struct apply_state *state, struct patch *patch) -{ - for ( ; patch; patch = patch->next) { - if ((patch->old_name && S_ISLNK(patch->old_mode)) && - (patch->is_rename || patch->is_delete)) - /* the symlink at patch->old_name is removed */ - strset_add(&state->removed_symlinks, patch->old_name); - - if (patch->new_name && S_ISLNK(patch->new_mode)) - /* the symlink at patch->new_name is created or remains */ - strset_add(&state->kept_symlinks, patch->new_name); - } -} - -static int path_is_beyond_symlink_1(struct apply_state *state, struct strbuf *name) -{ - do { - while (--name->len && name->buf[name->len] != '/') - ; /* scan backwards */ - if (!name->len) - break; - name->buf[name->len] = '\0'; - if (strset_contains(&state->kept_symlinks, name->buf)) - return 1; - if (strset_contains(&state->removed_symlinks, name->buf)) - /* - * This cannot be "return 0", because we may - * see a new one created at a higher level. - */ - continue; - - /* otherwise, check the preimage */ - if (state->check_index) { - struct cache_entry *ce; - - ce = index_file_exists(state->repo->index, name->buf, - name->len, ignore_case); - if (ce && S_ISLNK(ce->ce_mode)) - return 1; - } else { - struct stat st; - if (!lstat(name->buf, &st) && S_ISLNK(st.st_mode)) - return 1; - } - } while (1); - return 0; -} - -static int path_is_beyond_symlink(struct apply_state *state, const char *name_) -{ - int ret; - struct strbuf name = STRBUF_INIT; - - assert(*name_ != '\0'); - strbuf_addstr(&name, name_); - ret = path_is_beyond_symlink_1(state, &name); - strbuf_release(&name); - - return ret; -} - -static int check_unsafe_path(struct patch *patch) -{ - const char *old_name = NULL; - const char *new_name = NULL; - if (patch->is_delete) - old_name = patch->old_name; - else if (!patch->is_new && !patch->is_copy) - old_name = patch->old_name; - if (!patch->is_delete) - new_name = patch->new_name; - - if (old_name && !verify_path(old_name, patch->old_mode)) - return error(_("invalid path '%s'"), old_name); - if (new_name && !verify_path(new_name, patch->new_mode)) - return error(_("invalid path '%s'"), new_name); - return 0; -} - -/* - * Check and apply the patch in-core; leave the result in patch->result - * for the caller to write it out to the final destination. - */ -static int check_patch(struct apply_state *state, struct patch *patch) -{ - struct stat st; - const char *old_name = patch->old_name; - const char *new_name = patch->new_name; - const char *name = old_name ? old_name : new_name; - struct cache_entry *ce = NULL; - struct patch *tpatch; - int ok_if_exists; - int status; - - patch->rejected = 1; /* we will drop this after we succeed */ - - status = check_preimage(state, patch, &ce, &st); - if (status) - return status; - old_name = patch->old_name; - - /* - * A type-change diff is always split into a patch to delete - * old, immediately followed by a patch to create new (see - * diff.c::run_diff()); in such a case it is Ok that the entry - * to be deleted by the previous patch is still in the working - * tree and in the index. - * - * A patch to swap-rename between A and B would first rename A - * to B and then rename B to A. While applying the first one, - * the presence of B should not stop A from getting renamed to - * B; ask to_be_deleted() about the later rename. Removal of - * B and rename from A to B is handled the same way by asking - * was_deleted(). - */ - if ((tpatch = in_fn_table(state, new_name)) && - (was_deleted(tpatch) || to_be_deleted(tpatch))) - ok_if_exists = 1; - else - ok_if_exists = 0; - - if (new_name && - ((0 < patch->is_new) || patch->is_rename || patch->is_copy)) { - int err = check_to_create(state, new_name, ok_if_exists); - - if (err && state->threeway) { - patch->direct_to_threeway = 1; - } else switch (err) { - case 0: - break; /* happy */ - case EXISTS_IN_INDEX: - return error(_("%s: already exists in index"), new_name); - case EXISTS_IN_INDEX_AS_ITA: - return error(_("%s: does not match index"), new_name); - case EXISTS_IN_WORKTREE: - return error(_("%s: already exists in working directory"), - new_name); - default: - return err; - } - - if (!patch->new_mode) { - if (0 < patch->is_new) - patch->new_mode = S_IFREG | 0644; - else - patch->new_mode = patch->old_mode; - } - } - - if (new_name && old_name) { - int same = !strcmp(old_name, new_name); - if (!patch->new_mode) - patch->new_mode = patch->old_mode; - if ((patch->old_mode ^ patch->new_mode) & S_IFMT) { - if (same) - return error(_("new mode (%o) of %s does not " - "match old mode (%o)"), - patch->new_mode, new_name, - patch->old_mode); - else - return error(_("new mode (%o) of %s does not " - "match old mode (%o) of %s"), - patch->new_mode, new_name, - patch->old_mode, old_name); - } - } - - if (!state->unsafe_paths && check_unsafe_path(patch)) - return -128; - - /* - * An attempt to read from or delete a path that is beyond a - * symbolic link will be prevented by load_patch_target() that - * is called at the beginning of apply_data() so we do not - * have to worry about a patch marked with "is_delete" bit - * here. We however need to make sure that the patch result - * is not deposited to a path that is beyond a symbolic link - * here. - */ - if (!patch->is_delete && path_is_beyond_symlink(state, patch->new_name)) - return error(_("affected file '%s' is beyond a symbolic link"), - patch->new_name); - - if (apply_data(state, patch, &st, ce) < 0) - return error(_("%s: patch does not apply"), name); - patch->rejected = 0; - return 0; -} - -static int check_patch_list(struct apply_state *state, struct patch *patch) -{ - int err = 0; - - prepare_symlink_changes(state, patch); - prepare_fn_table(state, patch); - while (patch) { - int res; - if (state->apply_verbosity > verbosity_normal) - say_patch_name(stderr, - _("Checking patch %s..."), patch); - res = check_patch(state, patch); - if (res == -128) - return -128; - err |= res; - patch = patch->next; - } - return err; -} - -static int read_apply_cache(struct apply_state *state) -{ - if (state->index_file) - return read_index_from(state->repo->index, state->index_file, - repo_get_git_dir(the_repository)); - else - return repo_read_index(state->repo); -} - -/* This function tries to read the object name from the current index */ -static int get_current_oid(struct apply_state *state, const char *path, - struct object_id *oid) -{ - int pos; - - if (read_apply_cache(state) < 0) - return -1; - pos = index_name_pos(state->repo->index, path, strlen(path)); - if (pos < 0) - return -1; - oidcpy(oid, &state->repo->index->cache[pos]->oid); - return 0; -} - -static int preimage_oid_in_gitlink_patch(struct patch *p, struct object_id *oid) -{ - /* - * A usable gitlink patch has only one fragment (hunk) that looks like: - * @@ -1 +1 @@ - * -Subproject commit - * +Subproject commit - * or - * @@ -1 +0,0 @@ - * -Subproject commit - * for a removal patch. - */ - struct fragment *hunk = p->fragments; - static const char heading[] = "-Subproject commit "; - const char *preimage; - - if (/* does the patch have only one hunk? */ - hunk && !hunk->next && - /* is its preimage one line? */ - hunk->oldpos == 1 && hunk->oldlines == 1 && - /* does preimage begin with the heading? */ - (preimage = memchr(hunk->patch, '\n', hunk->size)) != NULL && - starts_with(++preimage, heading) && - /* does it record full SHA-1? */ - !get_oid_hex(preimage + sizeof(heading) - 1, oid) && - preimage[sizeof(heading) + the_hash_algo->hexsz - 1] == '\n' && - /* does the abbreviated name on the index line agree with it? */ - starts_with(preimage + sizeof(heading) - 1, p->old_oid_prefix)) - return 0; /* it all looks fine */ - - /* we may have full object name on the index line */ - return get_oid_hex(p->old_oid_prefix, oid); -} - -/* Build an index that contains just the files needed for a 3way merge */ -static int build_fake_ancestor(struct apply_state *state, struct patch *list) -{ - struct patch *patch; - struct index_state result = INDEX_STATE_INIT(state->repo); - struct lock_file lock = LOCK_INIT; - int res; - - /* Once we start supporting the reverse patch, it may be - * worth showing the new sha1 prefix, but until then... - */ - for (patch = list; patch; patch = patch->next) { - struct object_id oid; - struct cache_entry *ce; - const char *name; - - name = patch->old_name ? patch->old_name : patch->new_name; - if (0 < patch->is_new) - continue; - - if (S_ISGITLINK(patch->old_mode)) { - if (!preimage_oid_in_gitlink_patch(patch, &oid)) - ; /* ok, the textual part looks sane */ - else - return error(_("sha1 information is lacking or " - "useless for submodule %s"), name); - } else if (!repo_get_oid_blob(the_repository, patch->old_oid_prefix, &oid)) { - ; /* ok */ - } else if (!patch->lines_added && !patch->lines_deleted) { - /* mode-only change: update the current */ - if (get_current_oid(state, patch->old_name, &oid)) - return error(_("mode change for %s, which is not " - "in current HEAD"), name); - } else - return error(_("sha1 information is lacking or useless " - "(%s)."), name); - - ce = make_cache_entry(&result, patch->old_mode, &oid, name, 0, 0); - if (!ce) - return error(_("make_cache_entry failed for path '%s'"), - name); - if (add_index_entry(&result, ce, ADD_CACHE_OK_TO_ADD)) { - discard_cache_entry(ce); - return error(_("could not add %s to temporary index"), - name); - } - } - - hold_lock_file_for_update(&lock, state->fake_ancestor, LOCK_DIE_ON_ERROR); - res = write_locked_index(&result, &lock, COMMIT_LOCK); - discard_index(&result); - - if (res) - return error(_("could not write temporary index to %s"), - state->fake_ancestor); - - return 0; -} - -static void stat_patch_list(struct apply_state *state, struct patch *patch) -{ - int files, adds, dels; - - for (files = adds = dels = 0 ; patch ; patch = patch->next) { - files++; - adds += patch->lines_added; - dels += patch->lines_deleted; - show_stats(state, patch); - } - - print_stat_summary(stdout, files, adds, dels); -} - -static void numstat_patch_list(struct apply_state *state, - struct patch *patch) -{ - for ( ; patch; patch = patch->next) { - const char *name; - name = patch->new_name ? patch->new_name : patch->old_name; - if (patch->is_binary) - printf("-\t-\t"); - else - printf("%d\t%d\t", patch->lines_added, patch->lines_deleted); - write_name_quoted(name, stdout, state->line_termination); - } -} - -static void show_file_mode_name(const char *newdelete, unsigned int mode, const char *name) -{ - if (mode) - printf(" %s mode %06o %s\n", newdelete, mode, name); - else - printf(" %s %s\n", newdelete, name); -} - -static void show_mode_change(struct patch *p, int show_name) -{ - if (p->old_mode && p->new_mode && p->old_mode != p->new_mode) { - if (show_name) - printf(" mode change %06o => %06o %s\n", - p->old_mode, p->new_mode, p->new_name); - else - printf(" mode change %06o => %06o\n", - p->old_mode, p->new_mode); - } -} - -static void show_rename_copy(struct patch *p) -{ - const char *renamecopy = p->is_rename ? "rename" : "copy"; - const char *old_name, *new_name; - - /* Find common prefix */ - old_name = p->old_name; - new_name = p->new_name; - while (1) { - const char *slash_old, *slash_new; - slash_old = strchr(old_name, '/'); - slash_new = strchr(new_name, '/'); - if (!slash_old || - !slash_new || - slash_old - old_name != slash_new - new_name || - memcmp(old_name, new_name, slash_new - new_name)) - break; - old_name = slash_old + 1; - new_name = slash_new + 1; - } - /* p->old_name through old_name is the common prefix, and old_name and - * new_name through the end of names are renames - */ - if (old_name != p->old_name) - printf(" %s %.*s{%s => %s} (%d%%)\n", renamecopy, - (int)(old_name - p->old_name), p->old_name, - old_name, new_name, p->score); - else - printf(" %s %s => %s (%d%%)\n", renamecopy, - p->old_name, p->new_name, p->score); - show_mode_change(p, 0); -} - -static void summary_patch_list(struct patch *patch) -{ - struct patch *p; - - for (p = patch; p; p = p->next) { - if (p->is_new) - show_file_mode_name("create", p->new_mode, p->new_name); - else if (p->is_delete) - show_file_mode_name("delete", p->old_mode, p->old_name); - else { - if (p->is_rename || p->is_copy) - show_rename_copy(p); - else { - if (p->score) { - printf(" rewrite %s (%d%%)\n", - p->new_name, p->score); - show_mode_change(p, 0); - } - else - show_mode_change(p, 1); - } - } - } -} - -static void patch_stats(struct apply_state *state, struct patch *patch) -{ - int lines = patch->lines_added + patch->lines_deleted; - - if (lines > state->max_change) - state->max_change = lines; - if (patch->old_name) { - int len = quote_c_style(patch->old_name, NULL, NULL, 0); - if (!len) - len = strlen(patch->old_name); - if (len > state->max_len) - state->max_len = len; - } - if (patch->new_name) { - int len = quote_c_style(patch->new_name, NULL, NULL, 0); - if (!len) - len = strlen(patch->new_name); - if (len > state->max_len) - state->max_len = len; - } -} - -static int remove_file(struct apply_state *state, struct patch *patch, int rmdir_empty) -{ - if (state->update_index && !state->ita_only) { - if (remove_file_from_index(state->repo->index, patch->old_name) < 0) - return error(_("unable to remove %s from index"), patch->old_name); - } - if (!state->cached) { - if (!remove_or_warn(patch->old_mode, patch->old_name) && rmdir_empty) { - remove_path(patch->old_name); - } - } - return 0; -} - -static int add_index_file(struct apply_state *state, - const char *path, - unsigned mode, - void *buf, - unsigned long size) -{ - struct stat st; - struct cache_entry *ce; - int namelen = strlen(path); - - ce = make_empty_cache_entry(state->repo->index, namelen); - memcpy(ce->name, path, namelen); - ce->ce_mode = create_ce_mode(mode); - ce->ce_flags = create_ce_flags(0); - ce->ce_namelen = namelen; - if (state->ita_only) { - ce->ce_flags |= CE_INTENT_TO_ADD; - set_object_name_for_intent_to_add_entry(ce); - } else if (S_ISGITLINK(mode)) { - const char *s; - - if (!skip_prefix(buf, "Subproject commit ", &s) || - get_oid_hex(s, &ce->oid)) { - discard_cache_entry(ce); - return error(_("corrupt patch for submodule %s"), path); - } - } else { - if (!state->cached) { - if (lstat(path, &st) < 0) { - discard_cache_entry(ce); - return error_errno(_("unable to stat newly " - "created file '%s'"), - path); - } - fill_stat_cache_info(state->repo->index, ce, &st); - } - if (odb_write_object(the_repository->objects, buf, size, - OBJ_BLOB, &ce->oid) < 0) { - discard_cache_entry(ce); - return error(_("unable to create backing store " - "for newly created file %s"), path); - } - } - if (add_index_entry(state->repo->index, ce, ADD_CACHE_OK_TO_ADD) < 0) { - discard_cache_entry(ce); - return error(_("unable to add cache entry for %s"), path); - } - - return 0; -} - -/* - * Returns: - * -1 if an unrecoverable error happened - * 0 if everything went well - * 1 if a recoverable error happened - */ -static int try_create_file(struct apply_state *state, const char *path, - unsigned int mode, const char *buf, - unsigned long size) -{ - int fd, res; - struct strbuf nbuf = STRBUF_INIT; - - if (S_ISGITLINK(mode)) { - struct stat st; - if (!lstat(path, &st) && S_ISDIR(st.st_mode)) - return 0; - return !!mkdir(path, 0777); - } - - if (has_symlinks && S_ISLNK(mode)) - /* Although buf:size is counted string, it also is NUL - * terminated. - */ - return !!symlink(buf, path); - - fd = open(path, O_CREAT | O_EXCL | O_WRONLY, (mode & 0100) ? 0777 : 0666); - if (fd < 0) - return 1; - - if (convert_to_working_tree(state->repo->index, path, buf, size, &nbuf, NULL)) { - size = nbuf.len; - buf = nbuf.buf; - } - - res = write_in_full(fd, buf, size) < 0; - if (res) - error_errno(_("failed to write to '%s'"), path); - strbuf_release(&nbuf); - - if (close(fd) < 0 && !res) - return error_errno(_("closing file '%s'"), path); - - return res ? -1 : 0; -} - -/* - * We optimistically assume that the directories exist, - * which is true 99% of the time anyway. If they don't, - * we create them and try again. - * - * Returns: - * -1 on error - * 0 otherwise - */ -static int create_one_file(struct apply_state *state, - char *path, - unsigned mode, - const char *buf, - unsigned long size) -{ - char *newpath = NULL; - int res; - - if (state->cached) - return 0; - - /* - * We already try to detect whether files are beyond a symlink in our - * up-front checks. But in the case where symlinks are created by any - * of the intermediate hunks it can happen that our up-front checks - * didn't yet see the symlink, but at the point of arriving here there - * in fact is one. We thus repeat the check for symlinks here. - * - * Note that this does not make the up-front check obsolete as the - * failure mode is different: - * - * - The up-front checks cause us to abort before we have written - * anything into the working directory. So when we exit this way the - * working directory remains clean. - * - * - The checks here happen in the middle of the action where we have - * already started to apply the patch. The end result will be a dirty - * working directory. - * - * Ideally, we should update the up-front checks to catch what would - * happen when we apply the patch before we damage the working tree. - * We have all the information necessary to do so. But for now, as a - * part of embargoed security work, having this check would serve as a - * reasonable first step. - */ - if (path_is_beyond_symlink(state, path)) - return error(_("affected file '%s' is beyond a symbolic link"), path); - - res = try_create_file(state, path, mode, buf, size); - if (res < 0) - return -1; - if (!res) - return 0; - - if (errno == ENOENT) { - if (safe_create_leading_directories_no_share(path)) - return 0; - res = try_create_file(state, path, mode, buf, size); - if (res < 0) - return -1; - if (!res) - return 0; - } - - if (errno == EEXIST || errno == EACCES) { - /* We may be trying to create a file where a directory - * used to be. - */ - struct stat st; - if (!lstat(path, &st) && (!S_ISDIR(st.st_mode) || !rmdir(path))) - errno = EEXIST; - } - - if (errno == EEXIST) { - unsigned int nr = getpid(); - - for (;;) { - newpath = mkpathdup("%s~%u", path, nr); - res = try_create_file(state, newpath, mode, buf, size); - if (res < 0) - goto out; - if (!res) { - if (!rename(newpath, path)) - goto out; - unlink_or_warn(newpath); - break; - } - if (errno != EEXIST) - break; - ++nr; - FREE_AND_NULL(newpath); - } - } - res = error_errno(_("unable to write file '%s' mode %o"), path, mode); -out: - free(newpath); - return res; -} - -static int add_conflicted_stages_file(struct apply_state *state, - struct patch *patch) -{ - int stage, namelen; - unsigned mode; - struct cache_entry *ce; - - if (!state->update_index) - return 0; - namelen = strlen(patch->new_name); - mode = patch->new_mode ? patch->new_mode : (S_IFREG | 0644); - - remove_file_from_index(state->repo->index, patch->new_name); - for (stage = 1; stage < 4; stage++) { - if (is_null_oid(&patch->threeway_stage[stage - 1])) - continue; - ce = make_empty_cache_entry(state->repo->index, namelen); - memcpy(ce->name, patch->new_name, namelen); - ce->ce_mode = create_ce_mode(mode); - ce->ce_flags = create_ce_flags(stage); - ce->ce_namelen = namelen; - oidcpy(&ce->oid, &patch->threeway_stage[stage - 1]); - if (add_index_entry(state->repo->index, ce, ADD_CACHE_OK_TO_ADD) < 0) { - discard_cache_entry(ce); - return error(_("unable to add cache entry for %s"), - patch->new_name); - } - } - - return 0; -} - -static int create_file(struct apply_state *state, struct patch *patch) -{ - char *path = patch->new_name; - unsigned mode = patch->new_mode; - unsigned long size = patch->resultsize; - char *buf = patch->result; - - if (!mode) - mode = S_IFREG | 0644; - if (create_one_file(state, path, mode, buf, size)) - return -1; - - if (patch->conflicted_threeway) - return add_conflicted_stages_file(state, patch); - else if (state->check_index || (state->ita_only && patch->is_new > 0)) - return add_index_file(state, path, mode, buf, size); - return 0; -} - -/* phase zero is to remove, phase one is to create */ -static int write_out_one_result(struct apply_state *state, - struct patch *patch, - int phase) -{ - if (patch->is_delete > 0) { - if (phase == 0) - return remove_file(state, patch, 1); - return 0; - } - if (patch->is_new > 0 || patch->is_copy) { - if (phase == 1) - return create_file(state, patch); - return 0; - } - /* - * Rename or modification boils down to the same - * thing: remove the old, write the new - */ - if (phase == 0) - return remove_file(state, patch, patch->is_rename); - if (phase == 1) - return create_file(state, patch); - return 0; -} - -static int write_out_one_reject(struct apply_state *state, struct patch *patch) -{ - FILE *rej; - char *namebuf; - struct fragment *frag; - int fd, cnt = 0; - struct strbuf sb = STRBUF_INIT; - - for (cnt = 0, frag = patch->fragments; frag; frag = frag->next) { - if (!frag->rejected) - continue; - cnt++; - } - - if (!cnt) { - if (state->apply_verbosity > verbosity_normal) - say_patch_name(stderr, - _("Applied patch %s cleanly."), patch); - return 0; - } - - /* This should not happen, because a removal patch that leaves - * contents are marked "rejected" at the patch level. - */ - if (!patch->new_name) - die(_("internal error")); - - /* Say this even without --verbose */ - strbuf_addf(&sb, Q_("Applying patch %%s with %d reject...", - "Applying patch %%s with %d rejects...", - cnt), - cnt); - if (state->apply_verbosity > verbosity_silent) - say_patch_name(stderr, sb.buf, patch); - strbuf_release(&sb); - - namebuf = xstrfmt("%s.rej", patch->new_name); - - fd = open(namebuf, O_CREAT | O_EXCL | O_WRONLY, 0666); - if (fd < 0) { - if (errno != EEXIST) { - error_errno(_("cannot open %s"), namebuf); - goto error; - } - if (unlink(namebuf)) { - error_errno(_("cannot unlink '%s'"), namebuf); - goto error; - } - fd = open(namebuf, O_CREAT | O_EXCL | O_WRONLY, 0666); - if (fd < 0) { - error_errno(_("cannot open %s"), namebuf); - goto error; - } - } - rej = fdopen(fd, "w"); - if (!rej) { - error_errno(_("cannot open %s"), namebuf); - close(fd); - goto error; - } - - /* Normal git tools never deal with .rej, so do not pretend - * this is a git patch by saying --git or giving extended - * headers. While at it, maybe please "kompare" that wants - * the trailing TAB and some garbage at the end of line ;-). - */ - fprintf(rej, "diff a/%s b/%s\t(rejected hunks)\n", - patch->new_name, patch->new_name); - for (cnt = 1, frag = patch->fragments; - frag; - cnt++, frag = frag->next) { - if (!frag->rejected) { - if (state->apply_verbosity > verbosity_silent) - fprintf_ln(stderr, _("Hunk #%d applied cleanly."), cnt); - continue; - } - if (state->apply_verbosity > verbosity_silent) - fprintf_ln(stderr, _("Rejected hunk #%d."), cnt); - fprintf(rej, "%.*s", frag->size, frag->patch); - if (frag->patch[frag->size-1] != '\n') - fputc('\n', rej); - } - fclose(rej); -error: - free(namebuf); - return -1; -} - -/* - * Returns: - * -1 if an error happened - * 0 if the patch applied cleanly - * 1 if the patch did not apply cleanly - */ -static int write_out_results(struct apply_state *state, struct patch *list) -{ - int phase; - int errs = 0; - struct patch *l; - struct string_list cpath = STRING_LIST_INIT_DUP; - - for (phase = 0; phase < 2; phase++) { - l = list; - while (l) { - if (l->rejected) - errs = 1; - else { - if (write_out_one_result(state, l, phase)) { - string_list_clear(&cpath, 0); - return -1; - } - if (phase == 1) { - if (write_out_one_reject(state, l)) - errs = 1; - if (l->conflicted_threeway) { - string_list_append(&cpath, l->new_name); - errs = 1; - } - } - } - l = l->next; - } - } - - if (cpath.nr) { - struct string_list_item *item; - - string_list_sort(&cpath); - if (state->apply_verbosity > verbosity_silent) { - for_each_string_list_item(item, &cpath) - fprintf(stderr, "U %s\n", item->string); - } - string_list_clear(&cpath, 0); - - /* - * rerere relies on the partially merged result being in the working - * tree with conflict markers, but that isn't written with --cached. - */ - if (!state->cached) - repo_rerere(state->repo, 0); - } - - return errs; -} - -/* - * Try to apply a patch. - * - * Returns: - * -128 if a bad error happened (like patch unreadable) - * -1 if patch did not apply and user cannot deal with it - * 0 if the patch applied - * 1 if the patch did not apply but user might fix it - */ -static int apply_patch(struct apply_state *state, - int fd, - const char *filename, - int options) -{ - size_t offset; - struct strbuf buf = STRBUF_INIT; /* owns the patch text */ - struct patch *list = NULL, **listp = &list; - int skipped_patch = 0; - int res = 0; - int flush_attributes = 0; - - state->patch_input_file = filename; - if (read_patch_file(&buf, fd) < 0) - return -128; - offset = 0; - while (offset < buf.len) { - struct patch *patch; - int nr; - - CALLOC_ARRAY(patch, 1); - patch->inaccurate_eof = !!(options & APPLY_OPT_INACCURATE_EOF); - patch->recount = !!(options & APPLY_OPT_RECOUNT); - nr = parse_chunk(state, buf.buf + offset, buf.len - offset, patch); - if (nr < 0) { - free_patch(patch); - if (nr == -128) { - res = -128; - goto end; - } - break; - } - if (state->apply_in_reverse) - reverse_patches(patch); - if (use_patch(state, patch)) { - patch_stats(state, patch); - if (!list || !state->apply_in_reverse) { - *listp = patch; - listp = &patch->next; - } else { - patch->next = list; - list = patch; - } - - if ((patch->new_name && - ends_with_path_components(patch->new_name, - GITATTRIBUTES_FILE)) || - (patch->old_name && - ends_with_path_components(patch->old_name, - GITATTRIBUTES_FILE))) - flush_attributes = 1; - } - else { - if (state->apply_verbosity > verbosity_normal) - say_patch_name(stderr, _("Skipped patch '%s'."), patch); - free_patch(patch); - skipped_patch++; - } - offset += nr; - } - - if (!list && !skipped_patch) { - if (!state->allow_empty) { - error(_("No valid patches in input (allow with \"--allow-empty\")")); - res = -128; - } - goto end; - } - - if (state->whitespace_error && (state->ws_error_action == die_on_ws_error)) - state->apply = 0; - - state->update_index = (state->check_index || state->ita_only) && state->apply; - if (state->update_index && !is_lock_file_locked(&state->lock_file)) { - if (state->index_file) - hold_lock_file_for_update(&state->lock_file, - state->index_file, - LOCK_DIE_ON_ERROR); - else - repo_hold_locked_index(state->repo, &state->lock_file, - LOCK_DIE_ON_ERROR); - } - - if ((state->check_index || state->update_index) && read_apply_cache(state) < 0) { - error(_("unable to read index file")); - res = -128; - goto end; - } - - if (state->check || state->apply) { - int r = check_patch_list(state, list); - if (r == -128) { - res = -128; - goto end; - } - if (r < 0 && !state->apply_with_reject) { - res = -1; - goto end; - } - } - - if (state->apply) { - int write_res = write_out_results(state, list); - if (write_res < 0) { - res = -128; - goto end; - } - if (write_res > 0) { - /* with --3way, we still need to write the index out */ - res = state->apply_with_reject ? -1 : 1; - goto end; - } - } - - if (state->fake_ancestor && - build_fake_ancestor(state, list)) { - res = -128; - goto end; - } - - if (state->diffstat && state->apply_verbosity > verbosity_silent) - stat_patch_list(state, list); - - if (state->numstat && state->apply_verbosity > verbosity_silent) - numstat_patch_list(state, list); - - if (state->summary && state->apply_verbosity > verbosity_silent) - summary_patch_list(list); - - if (flush_attributes) - reset_parsed_attributes(); -end: - free_patch_list(list); - strbuf_release(&buf); - string_list_clear(&state->fn_table, 0); - return res; -} - -static int apply_option_parse_exclude(const struct option *opt, - const char *arg, int unset) -{ - struct apply_state *state = opt->value; - - BUG_ON_OPT_NEG(unset); - - add_name_limit(state, arg, 1); - return 0; -} - -static int apply_option_parse_include(const struct option *opt, - const char *arg, int unset) -{ - struct apply_state *state = opt->value; - - BUG_ON_OPT_NEG(unset); - - add_name_limit(state, arg, 0); - state->has_include = 1; - return 0; -} - -static int apply_option_parse_p(const struct option *opt, - const char *arg, - int unset) -{ - struct apply_state *state = opt->value; - - BUG_ON_OPT_NEG(unset); - - state->p_value = atoi(arg); - state->p_value_known = 1; - return 0; -} - -static int apply_option_parse_space_change(const struct option *opt, - const char *arg, int unset) -{ - struct apply_state *state = opt->value; - - BUG_ON_OPT_ARG(arg); - - if (unset) - state->ws_ignore_action = ignore_ws_none; - else - state->ws_ignore_action = ignore_ws_change; - return 0; -} - -static int apply_option_parse_whitespace(const struct option *opt, - const char *arg, int unset) -{ - struct apply_state *state = opt->value; - - BUG_ON_OPT_NEG(unset); - - state->whitespace_option = arg; - if (parse_whitespace_option(state, arg)) - return -1; - return 0; -} - -static int apply_option_parse_directory(const struct option *opt, - const char *arg, int unset) -{ - struct apply_state *state = opt->value; - - BUG_ON_OPT_NEG(unset); - - strbuf_reset(&state->root); - strbuf_addstr(&state->root, arg); - - if (strbuf_normalize_path(&state->root) < 0) - return error(_("unable to normalize directory: '%s'"), arg); - - strbuf_complete(&state->root, '/'); - return 0; -} - -int apply_all_patches(struct apply_state *state, - int argc, - const char **argv, - int options) -{ - int i; - int res; - int errs = 0; - int read_stdin = 1; - - for (i = 0; i < argc; i++) { - const char *arg = argv[i]; - char *to_free = NULL; - int fd; - - if (!strcmp(arg, "-")) { - res = apply_patch(state, 0, "", options); - if (res < 0) - goto end; - errs |= res; - read_stdin = 0; - continue; - } else - arg = to_free = prefix_filename(state->prefix, arg); - - fd = open(arg, O_RDONLY); - if (fd < 0) { - error(_("can't open patch '%s': %s"), arg, strerror(errno)); - res = -128; - free(to_free); - goto end; - } - read_stdin = 0; - set_default_whitespace_mode(state); - res = apply_patch(state, fd, arg, options); - close(fd); - free(to_free); - if (res < 0) - goto end; - errs |= res; - } - set_default_whitespace_mode(state); - if (read_stdin) { - res = apply_patch(state, 0, "", options); - if (res < 0) - goto end; - errs |= res; - } - - if (state->whitespace_error) { - if (state->squelch_whitespace_errors && - state->squelch_whitespace_errors < state->whitespace_error) { - int squelched = - state->whitespace_error - state->squelch_whitespace_errors; - warning(Q_("squelched %d whitespace error", - "squelched %d whitespace errors", - squelched), - squelched); - } - if (state->ws_error_action == die_on_ws_error) { - error(Q_("%d line adds whitespace errors.", - "%d lines add whitespace errors.", - state->whitespace_error), - state->whitespace_error); - res = -128; - goto end; - } - if (state->applied_after_fixing_ws && state->apply) - warning(Q_("%d line applied after" - " fixing whitespace errors.", - "%d lines applied after" - " fixing whitespace errors.", - state->applied_after_fixing_ws), - state->applied_after_fixing_ws); - else if (state->whitespace_error) - warning(Q_("%d line adds whitespace errors.", - "%d lines add whitespace errors.", - state->whitespace_error), - state->whitespace_error); - } - - if (state->update_index) { - res = write_locked_index(state->repo->index, &state->lock_file, COMMIT_LOCK); - if (res) { - error(_("Unable to write new index file")); - res = -128; - goto end; - } - } - - res = !!errs; - -end: - rollback_lock_file(&state->lock_file); - - if (state->apply_verbosity <= verbosity_silent) { - set_error_routine(state->saved_error_routine); - set_warn_routine(state->saved_warn_routine); - } - - if (res > -1) - return res; - return (res == -1 ? 1 : 128); -} - -int apply_parse_options(int argc, const char **argv, - struct apply_state *state, - int *force_apply, int *options, - const char * const *apply_usage) -{ - struct option builtin_apply_options[] = { - OPT_CALLBACK_F(0, "exclude", state, N_("path"), - N_("don't apply changes matching the given path"), - PARSE_OPT_NONEG, apply_option_parse_exclude), - OPT_CALLBACK_F(0, "include", state, N_("path"), - N_("apply changes matching the given path"), - PARSE_OPT_NONEG, apply_option_parse_include), - OPT_CALLBACK('p', NULL, state, N_("num"), - N_("remove leading slashes from traditional diff paths"), - apply_option_parse_p), - OPT_BOOL(0, "no-add", &state->no_add, - N_("ignore additions made by the patch")), - OPT_BOOL(0, "stat", &state->diffstat, - N_("instead of applying the patch, output diffstat for the input")), - OPT_NOOP_NOARG(0, "allow-binary-replacement"), - OPT_NOOP_NOARG(0, "binary"), - OPT_BOOL(0, "numstat", &state->numstat, - N_("show number of added and deleted lines in decimal notation")), - OPT_BOOL(0, "summary", &state->summary, - N_("instead of applying the patch, output a summary for the input")), - OPT_BOOL(0, "check", &state->check, - N_("instead of applying the patch, see if the patch is applicable")), - OPT_BOOL(0, "index", &state->check_index, - N_("make sure the patch is applicable to the current index")), - OPT_BOOL('N', "intent-to-add", &state->ita_only, - N_("mark new files with `git add --intent-to-add`")), - OPT_BOOL(0, "cached", &state->cached, - N_("apply a patch without touching the working tree")), - OPT_BOOL_F(0, "unsafe-paths", &state->unsafe_paths, - N_("accept a patch that touches outside the working area"), - PARSE_OPT_NOCOMPLETE), - OPT_BOOL(0, "apply", force_apply, - N_("also apply the patch (use with --stat/--summary/--check)")), - OPT_BOOL('3', "3way", &state->threeway, - N_( "attempt three-way merge, fall back on normal patch if that fails")), - OPT_SET_INT_F(0, "ours", &state->merge_variant, - N_("for conflicts, use our version"), - XDL_MERGE_FAVOR_OURS, PARSE_OPT_NONEG), - OPT_SET_INT_F(0, "theirs", &state->merge_variant, - N_("for conflicts, use their version"), - XDL_MERGE_FAVOR_THEIRS, PARSE_OPT_NONEG), - OPT_SET_INT_F(0, "union", &state->merge_variant, - N_("for conflicts, use a union version"), - XDL_MERGE_FAVOR_UNION, PARSE_OPT_NONEG), - OPT_FILENAME(0, "build-fake-ancestor", &state->fake_ancestor, - N_("build a temporary index based on embedded index information")), - /* Think twice before adding "--nul" synonym to this */ - OPT_SET_INT('z', NULL, &state->line_termination, - N_("paths are separated with NUL character"), '\0'), - OPT_UNSIGNED('C', NULL, &state->p_context, - N_("ensure at least lines of context match")), - OPT_CALLBACK(0, "whitespace", state, N_("action"), - N_("detect new or modified lines that have whitespace errors"), - apply_option_parse_whitespace), - OPT_CALLBACK_F(0, "ignore-space-change", state, NULL, - N_("ignore changes in whitespace when finding context"), - PARSE_OPT_NOARG, apply_option_parse_space_change), - OPT_CALLBACK_F(0, "ignore-whitespace", state, NULL, - N_("ignore changes in whitespace when finding context"), - PARSE_OPT_NOARG, apply_option_parse_space_change), - OPT_BOOL('R', "reverse", &state->apply_in_reverse, - N_("apply the patch in reverse")), - OPT_BOOL(0, "unidiff-zero", &state->unidiff_zero, - N_("don't expect at least one line of context")), - OPT_BOOL(0, "reject", &state->apply_with_reject, - N_("leave the rejected hunks in corresponding *.rej files")), - OPT_BOOL(0, "allow-overlap", &state->allow_overlap, - N_("allow overlapping hunks")), - OPT__VERBOSITY(&state->apply_verbosity), - OPT_BIT(0, "inaccurate-eof", options, - N_("tolerate incorrectly detected missing new-line at the end of file"), - APPLY_OPT_INACCURATE_EOF), - OPT_BIT(0, "recount", options, - N_("do not trust the line counts in the hunk headers"), - APPLY_OPT_RECOUNT), - OPT_CALLBACK(0, "directory", state, N_("root"), - N_("prepend to all filenames"), - apply_option_parse_directory), - OPT_BOOL(0, "allow-empty", &state->allow_empty, - N_("don't return error for empty patches")), - OPT_END() - }; - - argc = parse_options(argc, argv, state->prefix, builtin_apply_options, apply_usage, 0); - - if (state->merge_variant && !state->threeway) - die(_("--ours, --theirs, and --union require --3way")); - - return argc; -} diff --git a/ref/git-apply.h b/ref/git-apply.h deleted file mode 100644 index 90e887e..0000000 --- a/ref/git-apply.h +++ /dev/null @@ -1,190 +0,0 @@ -#ifndef APPLY_H -#define APPLY_H - -#include "hash.h" -#include "lockfile.h" -#include "string-list.h" -#include "strmap.h" - -struct repository; - -enum apply_ws_error_action { - nowarn_ws_error, - warn_on_ws_error, - die_on_ws_error, - correct_ws_error -}; - -enum apply_ws_ignore { - ignore_ws_none, - ignore_ws_change -}; - -enum apply_verbosity { - verbosity_silent = -1, - verbosity_normal = 0, - verbosity_verbose = 1 -}; - -struct apply_state { - const char *prefix; - - /* Lock file */ - struct lock_file lock_file; - - /* These control what gets looked at and modified */ - int apply; /* this is not a dry-run */ - int cached; /* apply to the index only */ - int check; /* preimage must match working tree, don't actually apply */ - int check_index; /* preimage must match the indexed version */ - int update_index; /* check_index && apply */ - int ita_only; /* add intent-to-add entries to the index */ - - /* These control cosmetic aspect of the output */ - int diffstat; /* just show a diffstat, and don't actually apply */ - int numstat; /* just show a numeric diffstat, and don't actually apply */ - int summary; /* just report creation, deletion, etc, and don't actually apply */ - - /* These boolean parameters control how the apply is done */ - int allow_overlap; - int apply_in_reverse; - int apply_with_reject; - int no_add; - int threeway; - int unidiff_zero; - int unsafe_paths; - int allow_empty; - - /* Other non boolean parameters */ - struct repository *repo; - const char *index_file; - enum apply_verbosity apply_verbosity; - int merge_variant; - char *fake_ancestor; - const char *patch_input_file; - int line_termination; - struct strbuf root; - int p_value; - int p_value_known; - unsigned int p_context; - - /* Exclude and include path parameters */ - struct string_list limit_by_name; - int has_include; - - /* Various "current state" */ - int linenr; /* current line number */ - /* - * We need to keep track of how symlinks in the preimage are - * manipulated by the patches. A patch to add a/b/c where a/b - * is a symlink should not be allowed to affect the directory - * the symlink points at, but if the same patch removes a/b, - * it is perfectly fine, as the patch removes a/b to make room - * to create a directory a/b so that a/b/c can be created. - */ - struct strset removed_symlinks; - struct strset kept_symlinks; - - /* - * For "diff-stat" like behaviour, we keep track of the biggest change - * we've seen, and the longest filename. That allows us to do simple - * scaling. - */ - int max_change; - int max_len; - - /* - * Records filenames that have been touched, in order to handle - * the case where more than one patches touch the same file. - */ - struct string_list fn_table; - - /* - * This is to save reporting routines before using - * set_error_routine() or set_warn_routine() to install muting - * routines when in verbosity_silent mode. - */ - void (*saved_error_routine)(const char *err, va_list params); - void (*saved_warn_routine)(const char *warn, va_list params); - - /* These control whitespace errors */ - enum apply_ws_error_action ws_error_action; - enum apply_ws_ignore ws_ignore_action; - const char *whitespace_option; - int whitespace_error; - int squelch_whitespace_errors; - int applied_after_fixing_ws; -}; - -/* - * This represents a "patch" to a file, both metainfo changes - * such as creation/deletion, filemode and content changes represented - * as a series of fragments. - */ -struct patch { - char *new_name, *old_name, *def_name; - unsigned int old_mode, new_mode; - int is_new, is_delete; /* -1 = unknown, 0 = false, 1 = true */ - int rejected; - unsigned ws_rule; - int lines_added, lines_deleted; - int score; - int extension_linenr; /* first line specifying delete/new/rename/copy */ - unsigned int is_toplevel_relative:1; - unsigned int inaccurate_eof:1; - unsigned int is_binary:1; - unsigned int is_copy:1; - unsigned int is_rename:1; - unsigned int recount:1; - unsigned int conflicted_threeway:1; - unsigned int direct_to_threeway:1; - unsigned int crlf_in_old:1; - struct fragment *fragments; - char *result; - size_t resultsize; - char old_oid_prefix[GIT_MAX_HEXSZ + 1]; - char new_oid_prefix[GIT_MAX_HEXSZ + 1]; - struct patch *next; - - /* three-way fallback result */ - struct object_id threeway_stage[3]; -}; - -int apply_parse_options(int argc, const char **argv, - struct apply_state *state, - int *force_apply, int *options, - const char * const *apply_usage); -int init_apply_state(struct apply_state *state, - struct repository *repo, - const char *prefix); -void clear_apply_state(struct apply_state *state); -int check_apply_state(struct apply_state *state, int force_apply); - -/* - * Parse a git diff header, starting at line. Fills the relevant - * metadata information in 'struct patch'. - * - * Returns -1 on failure, the length of the parsed header otherwise. - */ -int parse_git_diff_header(struct strbuf *root, - int *linenr, - int p_value, - const char *line, - int len, - unsigned int size, - struct patch *patch); - -void release_patch(struct patch *patch); - -/* - * Some aspects of the apply behavior are controlled by the following - * bits in the "options" parameter passed to apply_all_patches(). - */ -#define APPLY_OPT_INACCURATE_EOF (1<<0) /* accept inaccurate eof */ -#define APPLY_OPT_RECOUNT (1<<1) /* accept inaccurate line count */ - -int apply_all_patches(struct apply_state *state, - int argc, const char **argv, - int options); - -#endif From c97944df4485c9a1197d924a0005c70522897571 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 1 Apr 2026 13:09:30 +0100 Subject: [PATCH 19/20] chore: simplify to default path --- apply_flags.go | 96 ------------------- apply_options.go | 4 - apply_session.go | 12 +-- apply_test.go | 86 +---------------- parity_test.go | 13 --- patchset.go | 4 + testdata/parity/reverse-option/fixture.json | 1 + testdata/parity/unidiff-zero/fixture.json | 1 + .../parity/zero-context-delete/fixture.json | 1 + 9 files changed, 14 insertions(+), 204 deletions(-) delete mode 100644 apply_flags.go diff --git a/apply_flags.go b/apply_flags.go deleted file mode 100644 index 214e12c..0000000 --- a/apply_flags.go +++ /dev/null @@ -1,96 +0,0 @@ -package git_diff_parser - -import "fmt" - -func normalizePatchHunks(hunks []patchHunk, options applyOptions) ([]patchHunk, error) { - if len(hunks) == 0 { - return hunks, nil - } - - normalized := append([]patchHunk(nil), hunks...) - if options.Reverse { - normalized = reversePatchHunks(normalized) - } - if options.Recount { - recountPatchHunks(normalized) - } - - // These are present for API compatibility. Their broader Git parity work is - // still a follow-on slice. - _ = options.UnidiffZero - _ = options.InaccurateEOF - - return normalized, nil -} - -func reversePatchHunks(hunks []patchHunk) []patchHunk { - reversed := make([]patchHunk, len(hunks)) - for i, hunk := range hunks { - reversed[i] = reversePatchHunk(hunk) - } - return reversed -} - -func reversePatchHunk(hunk patchHunk) patchHunk { - reversed := patchHunk{ - header: hunk.header, - oldStart: hunk.newStart, - oldCount: hunk.newCount, - newStart: hunk.oldStart, - newCount: hunk.oldCount, - lines: make([]patchLine, len(hunk.lines)), - } - - for i, line := range hunk.lines { - reversedLine := line - switch reversedLine.kind { - case '+': - reversedLine.kind = '-' - case '-': - reversedLine.kind = '+' - } - reversedLine.oldEOF, reversedLine.newEOF = line.newEOF, line.oldEOF - reversed.lines[i] = reversedLine - } - - reversed.header = formatPatchHunkHeaderFromPatchHunk(reversed) - return reversed -} - -func recountPatchHunks(hunks []patchHunk) { - for i := range hunks { - recountPatchHunk(&hunks[i]) - } -} - -func recountPatchHunk(hunk *patchHunk) { - if hunk == nil { - return - } - - oldCount := 0 - newCount := 0 - for i := range hunk.lines { - hunk.lines[i].oldEOF = false - hunk.lines[i].newEOF = false - switch hunk.lines[i].kind { - case ' ', '-': - oldCount++ - } - switch hunk.lines[i].kind { - case ' ', '+': - newCount++ - } - } - - hunk.oldCount = oldCount - hunk.newCount = newCount - markEOFMarkers(hunk.lines, oldCount, newCount) - hunk.header = formatPatchHunkHeaderFromPatchHunk(*hunk) -} - -func formatPatchHunkHeaderFromPatchHunk(hunk patchHunk) string { - oldRange := formatPatchHunkRange(hunk.oldStart, hunk.oldCount) - newRange := formatPatchHunkRange(hunk.newStart, hunk.newCount) - return fmt.Sprintf("@@ -%s +%s @@", oldRange, newRange) -} diff --git a/apply_options.go b/apply_options.go index 63fee0b..ac1a39d 100644 --- a/apply_options.go +++ b/apply_options.go @@ -28,10 +28,6 @@ type applyOptions struct { AllowOverlap bool MinContext int MinContextSet bool - Reverse bool - UnidiffZero bool - Recount bool - InaccurateEOF bool } func defaultApplyOptions() applyOptions { diff --git a/apply_session.go b/apply_session.go index 8414243..4af7dd4 100644 --- a/apply_session.go +++ b/apply_session.go @@ -43,10 +43,6 @@ func (p *patchApply) validateAndParsePatch(patchData []byte) (validatedPatch, er for i := range fileDiff.Hunks { hunks = append(hunks, patchHunkFromHunk(&fileDiff.Hunks[i])) } - hunks, err := normalizePatchHunks(hunks, p.options) - if err != nil { - return validatedPatch{}, err - } return validatedPatch{ rejectHead: formatRejectHeader(&fileDiff), @@ -157,9 +153,9 @@ func (s *applySession) findPos(hunk patchHunk) (matchedHunk, bool) { return matchedHunk{}, false } - matchBeginning := hunk.oldStart == 0 || (hunk.oldStart == 1 && !s.unidiffZero()) + matchBeginning := hunk.oldStart == 0 || hunk.oldStart == 1 leading, trailing := hunkContext(hunk.lines) - matchEnd := !s.unidiffZero() && trailing == 0 + matchEnd := trailing == 0 hunkStart := 0 hunkEnd := len(hunk.lines) @@ -323,10 +319,6 @@ func (s *applySession) minContext() int { return s.applier.options.MinContext } -func (s *applySession) unidiffZero() bool { - return s.applier != nil && s.applier.options.UnidiffZero -} - func (s *applySession) sourceContentLines() int { if n := len(s.sourceLines); n > 0 && s.sourceLines[n-1].eofMarker { return n - 1 diff --git a/apply_test.go b/apply_test.go index 77f8d64..6d3b538 100644 --- a/apply_test.go +++ b/apply_test.go @@ -236,7 +236,7 @@ func TestApplyFile_BoundaryCases(t *testing.T) { } } -func TestApplyFileWithOptions_ZeroContextBoundaryCasesRequireUnidiffZero(t *testing.T) { +func TestApplyFile_ZeroContextBoundaryCases(t *testing.T) { t.Parallel() original := []byte("b\nc\nd\ne\nf\ng\nh\ni\nj\nk\nl\nm\nn\no\np\nq\nr\ns\nt\nu\nv\nw\nx\ny\n") @@ -260,19 +260,14 @@ func TestApplyFileWithOptions_ZeroContextBoundaryCasesRequireUnidiffZero(t *test t.Parallel() patch := buildPatchWithContext(t, "victim", original, test.want, 0) - baseline, err := applyFileWithOptions(original, patch, applyOptions{}) + applied, err := ApplyFile(original, patch) if test.requiresUnidiff0 { require.Error(t, err) - } else { - require.NoError(t, err) - assert.Equal(t, test.want, baseline.Content) + return } - applied, err := applyFileWithOptions(original, patch, applyOptions{ - UnidiffZero: true, - }) require.NoError(t, err) - assert.Equal(t, test.want, applied.Content) + assert.Equal(t, test.want, applied) }) } } @@ -365,14 +360,8 @@ func TestApplyFile_EmptyContextPatches(t *testing.T) { t.Parallel() patch := buildPatchWithContext(t, "file", test.original, test.target, 0) - _, err := applyFileWithOptions(test.original, patch, applyOptions{}) + _, err := ApplyFile(test.original, patch) require.Error(t, err) - - applied, err := applyFileWithOptions(test.original, patch, applyOptions{ - UnidiffZero: true, - }) - require.NoError(t, err) - assert.Equal(t, test.target, applied.Content) }) } } @@ -604,71 +593,6 @@ func TestApplyFileWithOptions_IgnoreWhitespaceAppliesThroughContextDrift(t *test assert.Equal(t, 0, applied.MergeConflicts) } -func TestApplyFileWithOptions_ReverseAppliesPatchBackwards(t *testing.T) { - t.Parallel() - - current := []byte("z\nb\n") - patchData := []byte(`diff --git a/file.txt b/file.txt ---- a/file.txt -+++ b/file.txt -@@ -1,2 +1,2 @@ --a -+z - b -`) - - applied, err := applyFileWithOptions(current, patchData, applyOptions{ - Reverse: true, - }) - require.NoError(t, err) - assert.Equal(t, []byte("a\nb\n"), applied.Content) -} - -func TestApplyFileWithOptions_UnidiffZeroIsAccepted(t *testing.T) { - t.Parallel() - - current := []byte("alpha\nbeta\ngamma\n") - patchData := []byte(`diff --git a/file.txt b/file.txt ---- a/file.txt -+++ b/file.txt -@@ -2 +1,0 @@ --beta -`) - - _, err := applyFileWithOptions(current, patchData, applyOptions{}) - require.Error(t, err) - - applied, err := applyFileWithOptions(current, patchData, applyOptions{ - UnidiffZero: true, - }) - require.NoError(t, err) - assert.Equal(t, []byte("alpha\ngamma\n"), applied.Content) -} - -func TestApplyFileWithOptions_RecountRebuildsHunkCounts(t *testing.T) { - t.Parallel() - - current := []byte("alpha\nbeta\ngamma\n") - patchData := []byte(`diff --git a/file.txt b/file.txt ---- a/file.txt -+++ b/file.txt -@@ -2,2 +2,2 @@ --beta -`) - - _, err := applyFileWithOptions(current, patchData, applyOptions{ - UnidiffZero: true, - }) - require.Error(t, err) - - applied, err := applyFileWithOptions(current, patchData, applyOptions{ - UnidiffZero: true, - Recount: true, - }) - require.NoError(t, err) - assert.Equal(t, []byte("alpha\ngamma\n"), applied.Content) -} - func TestApplyFile_RejectsAlreadyAppliedBeginningAndEndingPatches(t *testing.T) { t.Parallel() diff --git a/parity_test.go b/parity_test.go index 4cc3e97..3c99c56 100644 --- a/parity_test.go +++ b/parity_test.go @@ -133,13 +133,9 @@ func runLibraryApply(t *testing.T, tc parityCase, rejectMode bool) (applyResult, options := defaultMergeApplyOptions() options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace - options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") - options.UnidiffZero = fixtureHasGitArg(tc.fixture, "--unidiff-zero") if rejectMode { options = defaultApplyOptions() options.IgnoreWhitespace = tc.fixture.IgnoreWhitespace - options.Reverse = fixtureHasGitArg(tc.fixture, "--reverse") - options.UnidiffZero = fixtureHasGitArg(tc.fixture, "--unidiff-zero") } if minContext, ok := fixtureContextArg(tc.fixture); ok { options.MinContext = minContext @@ -156,15 +152,6 @@ func trimGitRejectHeader(rej []byte) []byte { return rej } -func fixtureHasGitArg(fixture parityFixture, arg string) bool { - for _, candidate := range fixture.GitArgs { - if candidate == arg { - return true - } - } - return false -} - func fixtureContextArg(fixture parityFixture) (int, bool) { for _, candidate := range fixture.GitArgs { if !strings.HasPrefix(candidate, "-C") || len(candidate) <= 2 { diff --git a/patchset.go b/patchset.go index 55a587d..0ba09a8 100644 --- a/patchset.go +++ b/patchset.go @@ -133,6 +133,10 @@ func applyPatchset(tree map[string][]byte, patchData []byte) (map[string][]byte, return patchset.apply(tree) } +func ApplyPatchset(tree map[string][]byte, patchData []byte) (map[string][]byte, error) { + return applyPatchset(tree, patchData) +} + func cloneTree(tree map[string][]byte) map[string][]byte { out := make(map[string][]byte, len(tree)) for path, content := range tree { diff --git a/testdata/parity/reverse-option/fixture.json b/testdata/parity/reverse-option/fixture.json index 3e4ca6a..1836a72 100644 --- a/testdata/parity/reverse-option/fixture.json +++ b/testdata/parity/reverse-option/fixture.json @@ -1,4 +1,5 @@ { + "skipLibrary": true, "gitArgs": [ "--reverse" ] diff --git a/testdata/parity/unidiff-zero/fixture.json b/testdata/parity/unidiff-zero/fixture.json index ff74acf..0a8462f 100644 --- a/testdata/parity/unidiff-zero/fixture.json +++ b/testdata/parity/unidiff-zero/fixture.json @@ -1,4 +1,5 @@ { + "skipLibrary": true, "gitArgs": [ "--unidiff-zero" ] diff --git a/testdata/parity/zero-context-delete/fixture.json b/testdata/parity/zero-context-delete/fixture.json index ff74acf..0a8462f 100644 --- a/testdata/parity/zero-context-delete/fixture.json +++ b/testdata/parity/zero-context-delete/fixture.json @@ -1,4 +1,5 @@ { + "skipLibrary": true, "gitArgs": [ "--unidiff-zero" ] From 8932587210f96ccec3d0ace4a04e2daa8649cf04 Mon Sep 17 00:00:00 2001 From: Thomas Rooney Date: Wed, 1 Apr 2026 13:22:28 +0100 Subject: [PATCH 20/20] chore: clean dead code --- apply.go | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/apply.go b/apply.go index 8f1adcd..78ec235 100644 --- a/apply.go +++ b/apply.go @@ -177,27 +177,6 @@ func ensureTrailingNewline(lines []fileLine) []fileLine { return lines } -func markEOFMarkers(lines []patchLine, oldCount, newCount int) { - oldSeen := 0 - newSeen := 0 - - for i := range lines { - line := lines[i] - if line.kind == ' ' || line.kind == '-' { - oldSeen++ - } - if line.kind == ' ' || line.kind == '+' { - newSeen++ - } - if !isEOFMarkerCandidate(line) { - continue - } - - lines[i].oldEOF = (line.kind == ' ' || line.kind == '-') && oldSeen == oldCount - lines[i].newEOF = (line.kind == ' ' || line.kind == '+') && newSeen == newCount - } -} - func splitFileLines(content []byte) []fileLine { rawLines := splitLinesPreserveNewline(string(content)) lines := make([]fileLine, 0, len(rawLines)) @@ -232,13 +211,6 @@ func trimSingleLineEnding(s string) string { return s } -func isEOFMarkerCandidate(line patchLine) bool { - if !line.hasNewline { - return false - } - return strings.TrimSuffix(line.text, "\r") == "" -} - func splitLinesPreserveNewline(s string) []string { if s == "" { return nil