diff --git a/changelog/unreleased/pull-2598 b/changelog/unreleased/pull-2598 new file mode 100644 index 000000000..7000c66d5 --- /dev/null +++ b/changelog/unreleased/pull-2598 @@ -0,0 +1,6 @@ +Enhancement: Improve speed of diff command + +We've improved the performance of the diff command when comparing snapshots +with similar content. It should run up to twice as fast as before. + +https://github.com/restic/restic/pull/2598 diff --git a/cmd/restic/cmd_diff.go b/cmd/restic/cmd_diff.go index 090568845..1fc8b0458 100644 --- a/cmd/restic/cmd_diff.go +++ b/cmd/restic/cmd_diff.go @@ -116,10 +116,10 @@ func addBlobs(bs restic.BlobSet, node *restic.Node) { // DiffStats collects the differences between two snapshots. type DiffStats struct { - ChangedFiles int - Added DiffStat - Removed DiffStat - BlobsBefore, BlobsAfter restic.BlobSet + ChangedFiles int + Added DiffStat + Removed DiffStat + BlobsBefore, BlobsAfter, BlobsCommon restic.BlobSet } // NewDiffStats creates new stats for a diff run. @@ -127,6 +127,7 @@ func NewDiffStats() *DiffStats { return &DiffStats{ BlobsBefore: restic.NewBlobSet(), BlobsAfter: restic.NewBlobSet(), + BlobsCommon: restic.NewBlobSet(), } } @@ -177,6 +178,27 @@ func (c *Comparer) printDir(ctx context.Context, mode string, stats *DiffStat, b return nil } +func (c *Comparer) collectDir(ctx context.Context, blobs restic.BlobSet, id restic.ID) error { + debug.Log("print tree %v", id) + tree, err := c.repo.LoadTree(ctx, id) + if err != nil { + return err + } + + for _, node := range tree.Nodes { + addBlobs(blobs, node) + + if node.Type == "dir" { + err := c.collectDir(ctx, blobs, *node.Subtree) + if err != nil { + Warnf("error: %v\n", err) + } + } + } + + return nil +} + func uniqueNodeNames(tree1, tree2 *restic.Tree) (tree1Nodes, tree2Nodes map[string]*restic.Node, uniqueNames []string) { names := make(map[string]struct{}) tree1Nodes = make(map[string]*restic.Node) @@ -248,7 +270,12 @@ func (c *Comparer) diffTree(ctx context.Context, stats *DiffStats, prefix string } if node1.Type == "dir" && node2.Type == "dir" { - err := c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) + var err error + if (*node1.Subtree).Equal(*node2.Subtree) { + err = c.collectDir(ctx, stats.BlobsCommon, *node1.Subtree) + } else { + err = c.diffTree(ctx, stats, name, *node1.Subtree, *node2.Subtree) + } if err != nil { Warnf("error: %v\n", err) } @@ -345,8 +372,8 @@ func runDiff(opts DiffOptions, gopts GlobalOptions, args []string) error { } both := stats.BlobsBefore.Intersect(stats.BlobsAfter) - updateBlobs(repo, stats.BlobsBefore.Sub(both), &stats.Removed) - updateBlobs(repo, stats.BlobsAfter.Sub(both), &stats.Added) + updateBlobs(repo, stats.BlobsBefore.Sub(both).Sub(stats.BlobsCommon), &stats.Removed) + updateBlobs(repo, stats.BlobsAfter.Sub(both).Sub(stats.BlobsCommon), &stats.Added) Printf("\n") Printf("Files: %5d new, %5d removed, %5d changed\n", stats.Added.Files, stats.Removed.Files, stats.ChangedFiles) diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index 439ea8510..b279a75b0 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -154,6 +154,21 @@ func testRunCheckOutput(gopts GlobalOptions) (string, error) { return buf.String(), err } +func testRunDiffOutput(gopts GlobalOptions, firstSnapshotID string, secondSnapshotID string) (string, error) { + buf := bytes.NewBuffer(nil) + + globalOptions.stdout = buf + defer func() { + globalOptions.stdout = os.Stdout + }() + + opts := DiffOptions{ + ShowMetadata: false, + } + err := runDiff(opts, gopts, []string{firstSnapshotID, secondSnapshotID}) + return string(buf.Bytes()), err +} + func testRunRebuildIndex(t testing.TB, gopts GlobalOptions) { globalOptions.stdout = ioutil.Discard defer func() { @@ -1472,3 +1487,91 @@ func TestQuietBackup(t *testing.T) { testRunCheck(t, env.gopts) } + +func copyFile(dst string, src string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + _, err = io.Copy(dstFile, srcFile) + return err +} + +var diffOutputRegexPatterns = []string{ + "-.+modfile", + "M.+modfile1", + "\\+.+modfile2", + "\\+.+modfile3", + "\\+.+modfile4", + "-.+submoddir", + "-.+submoddir.subsubmoddir", + "\\+.+submoddir2", + "\\+.+submoddir2.subsubmoddir", + "Files: +2 new, +1 removed, +1 changed", + "Dirs: +3 new, +2 removed", + "Data Blobs: +2 new, +1 removed", + "Added: +7[0-9]{2}\\.[0-9]{3} KiB", + "Removed: +2[0-9]{2}\\.[0-9]{3} KiB", +} + +func TestDiff(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + + testRunInit(t, env.gopts) + + datadir := filepath.Join(env.base, "testdata") + testdir := filepath.Join(datadir, "testdir") + subtestdir := filepath.Join(testdir, "subtestdir") + testfile := filepath.Join(testdir, "testfile") + + rtest.OK(t, os.Mkdir(testdir, 0755)) + rtest.OK(t, os.Mkdir(subtestdir, 0755)) + rtest.OK(t, appendRandomData(testfile, 256*1024)) + + moddir := filepath.Join(datadir, "moddir") + submoddir := filepath.Join(moddir, "submoddir") + subsubmoddir := filepath.Join(submoddir, "subsubmoddir") + modfile := filepath.Join(moddir, "modfile") + rtest.OK(t, os.Mkdir(moddir, 0755)) + rtest.OK(t, os.Mkdir(submoddir, 0755)) + rtest.OK(t, os.Mkdir(subsubmoddir, 0755)) + rtest.OK(t, copyFile(modfile, testfile)) + rtest.OK(t, appendRandomData(modfile+"1", 256*1024)) + + snapshots := make(map[string]struct{}) + opts := BackupOptions{} + testRunBackup(t, "", []string{datadir}, opts, env.gopts) + snapshots, firstSnapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, env.gopts)) + + rtest.OK(t, os.Rename(modfile, modfile+"3")) + rtest.OK(t, os.Rename(submoddir, submoddir+"2")) + rtest.OK(t, appendRandomData(modfile+"1", 256*1024)) + rtest.OK(t, appendRandomData(modfile+"2", 256*1024)) + rtest.OK(t, os.Mkdir(modfile+"4", 0755)) + + testRunBackup(t, "", []string{datadir}, opts, env.gopts) + snapshots, secondSnapshotID := lastSnapshot(snapshots, loadSnapshotMap(t, env.gopts)) + + _, err := testRunDiffOutput(env.gopts, "", secondSnapshotID) + rtest.Assert(t, err != nil, "expected error on invalid snapshot id") + + out, err := testRunDiffOutput(env.gopts, firstSnapshotID, secondSnapshotID) + if err != nil { + t.Fatalf("expected no error from diff for test repository, got %v", err) + } + + for _, pattern := range diffOutputRegexPatterns { + r, err := regexp.Compile(pattern) + rtest.Assert(t, err == nil, "failed to compile regexp %v", pattern) + rtest.Assert(t, r.MatchString(out), "expected pattern %v in output, got\n%v", pattern, out) + } +}