From f0034104025894b28ca7e9c714a61314df79ec6c Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 19 Sep 2020 12:41:52 +0200 Subject: [PATCH] init: Add `--copy-chunker-params` option This allows creating multiple repositories with identical chunker parameters which is required for working deduplication when copying snapshots between different repositories. --- changelog/unreleased/issue-323 | 7 +++++ cmd/restic/cmd_copy.go | 3 +- cmd/restic/cmd_init.go | 50 +++++++++++++++++++++++++++---- cmd/restic/integration_test.go | 32 +++++++++++++++++++- doc/045_working_with_repos.rst | 25 +++++++++++++++- internal/repository/repository.go | 6 +++- 6 files changed, 114 insertions(+), 9 deletions(-) diff --git a/changelog/unreleased/issue-323 b/changelog/unreleased/issue-323 index 6b3b56b31..a01495eac 100644 --- a/changelog/unreleased/issue-323 +++ b/changelog/unreleased/issue-323 @@ -9,5 +9,12 @@ and destination repository. Also, the transferred files are not re-chunked, which may break deduplication between files already stored in the destination repo and files copied there using this command. +To fully support deduplication between repositories when the copy command is +used, the init command now supports the `--copy-chunker-params` option, +which initializes the new repository with identical parameters for splitting +files into chunks as an already existing repository. This allows copied +snapshots to be equally deduplicated in both repositories. + https://github.com/restic/restic/issues/323 https://github.com/restic/restic/pull/2606 +https://github.com/restic/restic/pull/2928 diff --git a/cmd/restic/cmd_copy.go b/cmd/restic/cmd_copy.go index d00f7dbc5..788c9a7e8 100644 --- a/cmd/restic/cmd_copy.go +++ b/cmd/restic/cmd_copy.go @@ -18,7 +18,8 @@ The "copy" command copies one or more snapshots from one repository to another repository. Note that this will have to read (download) and write (upload) the entire snapshot(s) due to the different encryption keys on the source and destination, and that transferred files are not re-chunked, which may break -their deduplication. +their deduplication. This can be mitigated by the "--copy-chunker-params" +option when initializing a new destination repository using the "init" command. `, RunE: func(cmd *cobra.Command, args []string) error { return runCopy(copyOptions, globalOptions, args) diff --git a/cmd/restic/cmd_init.go b/cmd/restic/cmd_init.go index becb02c55..6abc2695b 100644 --- a/cmd/restic/cmd_init.go +++ b/cmd/restic/cmd_init.go @@ -1,6 +1,7 @@ package main import ( + "github.com/restic/chunker" "github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/repository" @@ -20,19 +21,36 @@ Exit status is 0 if the command was successful, and non-zero if there was any er `, DisableAutoGenTag: true, RunE: func(cmd *cobra.Command, args []string) error { - return runInit(globalOptions, args) + return runInit(initOptions, globalOptions, args) }, } -func init() { - cmdRoot.AddCommand(cmdInit) +// InitOptions bundles all options for the init command. +type InitOptions struct { + secondaryRepoOptions + CopyChunkerParameters bool } -func runInit(gopts GlobalOptions, args []string) error { +var initOptions InitOptions + +func init() { + cmdRoot.AddCommand(cmdInit) + + f := cmdInit.Flags() + initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from") + f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)") +} + +func runInit(opts InitOptions, gopts GlobalOptions, args []string) error { if gopts.Repo == "" { return errors.Fatal("Please specify repository location (-r)") } + chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts) + if err != nil { + return err + } + be, err := create(gopts.Repo, gopts.extended) if err != nil { return errors.Fatalf("create repository at %s failed: %v\n", gopts.Repo, err) @@ -47,7 +65,7 @@ func runInit(gopts GlobalOptions, args []string) error { s := repository.New(be) - err = s.Init(gopts.ctx, gopts.password) + err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial) if err != nil { return errors.Fatalf("create key in repository at %s failed: %v\n", gopts.Repo, err) } @@ -60,3 +78,25 @@ func runInit(gopts GlobalOptions, args []string) error { return nil } + +func maybeReadChunkerPolynomial(opts InitOptions, gopts GlobalOptions) (*chunker.Pol, error) { + if opts.CopyChunkerParameters { + otherGopts, err := fillSecondaryGlobalOpts(opts.secondaryRepoOptions, gopts, "secondary") + if err != nil { + return nil, err + } + + otherRepo, err := OpenRepository(otherGopts) + if err != nil { + return nil, err + } + + pol := otherRepo.Config().ChunkerPolynomial + return &pol, nil + } + + if opts.Repo != "" { + return nil, errors.Fatal("Secondary repository must only be specified when copying the chunker parameters") + } + return nil, nil +} diff --git a/cmd/restic/integration_test.go b/cmd/restic/integration_test.go index deba04586..3958d6481 100644 --- a/cmd/restic/integration_test.go +++ b/cmd/restic/integration_test.go @@ -51,7 +51,7 @@ func testRunInit(t testing.TB, opts GlobalOptions) { restic.TestDisableCheckPolynomial(t) restic.TestSetLockTimeout(t, 0) - rtest.OK(t, runInit(opts, nil)) + rtest.OK(t, runInit(InitOptions{}, opts, nil)) t.Logf("repository initialized at %v", opts.Repo) } @@ -731,6 +731,36 @@ func TestCopyIncremental(t *testing.T) { len(copiedSnapshotIDs), len(snapshotIDs)) } +func TestInitCopyChunkerParams(t *testing.T) { + env, cleanup := withTestEnvironment(t) + defer cleanup() + env2, cleanup2 := withTestEnvironment(t) + defer cleanup2() + + testRunInit(t, env2.gopts) + + initOpts := InitOptions{ + secondaryRepoOptions: secondaryRepoOptions{ + Repo: env2.gopts.Repo, + password: env2.gopts.password, + }, + } + rtest.Assert(t, runInit(initOpts, env.gopts, nil) != nil, "expected invalid init options to fail") + + initOpts.CopyChunkerParameters = true + rtest.OK(t, runInit(initOpts, env.gopts, nil)) + + repo, err := OpenRepository(env.gopts) + rtest.OK(t, err) + + otherRepo, err := OpenRepository(env2.gopts) + rtest.OK(t, err) + + rtest.Assert(t, repo.Config().ChunkerPolynomial == otherRepo.Config().ChunkerPolynomial, + "expected equal chunker polynomials, got %v expected %v", repo.Config().ChunkerPolynomial, + otherRepo.Config().ChunkerPolynomial) +} + func testRunTag(t testing.TB, opts TagOptions, gopts GlobalOptions) { rtest.OK(t, runTag(opts, gopts, []string{})) } diff --git a/doc/045_working_with_repos.rst b/doc/045_working_with_repos.rst index 58573e3d6..8ce9f8815 100644 --- a/doc/045_working_with_repos.rst +++ b/doc/045_working_with_repos.rst @@ -110,7 +110,8 @@ be skipped by later copy runs. entire snapshot(s) due to the different encryption keys used in the source and destination repository. Also, the transferred files are not re-chunked, which may break deduplication between files already stored in the destination repo - and files copied there using this command. + and files copied there using this command. See the next section for how to avoid + this problem. For the destination repository ``--repo2`` the password can be read from a file ``--password-file2`` or from a command ``--password-command2``. @@ -142,6 +143,28 @@ which case only these instead of all snapshots will be copied: $ restic -r /srv/restic-repo copy --repo2 /srv/restic-repo-copy 410b18a2 4e5d5487 latest +Ensuring deduplication for copied snapshots +------------------------------------------- + +Even though the copy command can transfer snapshots between arbitrary repositories, +deduplication between snapshots from the source and destination repository may not work. +To ensure proper deduplication, both repositories have to use the same parameters for +splitting large files into smaller chunks, which requires additional setup steps. With +the same parameters restic will for both repositories split identical files into +identical chunks and therefore deduplication also works for snapshots copied between +these repositories. + +The chunker parameters are generated once when creating a new (destination) repository. +That is for a copy destination repository we have to instruct restic to initialize it +using the same chunker parameters as the source repository: + +.. code-block:: console + + $ restic -r /srv/restic-repo-copy init --repo2 /srv/restic-repo --copy-chunker-params + +Note that it is not possible to change the chunker parameters of an existing repository. + + Checking integrity and consistency ================================== diff --git a/internal/repository/repository.go b/internal/repository/repository.go index a5c6e2d61..3c6d9665f 100644 --- a/internal/repository/repository.go +++ b/internal/repository/repository.go @@ -8,6 +8,7 @@ import ( "io" "os" + "github.com/restic/chunker" "github.com/restic/restic/internal/cache" "github.com/restic/restic/internal/crypto" "github.com/restic/restic/internal/debug" @@ -614,7 +615,7 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int // Init creates a new master key with the supplied password, initializes and // saves the repository config. -func (r *Repository) Init(ctx context.Context, password string) error { +func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error { has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile}) if err != nil { return err @@ -627,6 +628,9 @@ func (r *Repository) Init(ctx context.Context, password string) error { if err != nil { return err } + if chunkerPolynomial != nil { + cfg.ChunkerPolynomial = *chunkerPolynomial + } return r.init(ctx, password, cfg) }