2
2
mirror of https://github.com/octoleo/restic.git synced 2024-12-23 11:28:54 +00:00

Merge pull request #1040 from restic/add-cache

Add local cache
This commit is contained in:
Alexander Neumann 2017-09-25 13:13:07 +02:00
commit 5a999cb77f
43 changed files with 1494 additions and 19 deletions

View File

@ -387,6 +387,16 @@ func runBackup(opts BackupOptions, gopts GlobalOptions, args []string) error {
return err return err
} }
// exclude restic cache
if repo.Cache != nil {
f, err := rejectResticCache(repo)
if err != nil {
return err
}
rejectFuncs = append(rejectFuncs, f)
}
err = repo.LoadIndex(context.TODO()) err = repo.LoadIndex(context.TODO())
if err != nil { if err != nil {
return err return err

View File

@ -19,6 +19,9 @@ var cmdCheck = &cobra.Command{
Long: ` Long: `
The "check" command tests the repository for errors and reports any errors it The "check" command tests the repository for errors and reports any errors it
finds. It can also be used to read all data and therefore simulate a restore. finds. It can also be used to read all data and therefore simulate a restore.
By default, the "check" command will always load all data directly from the
repository and not use a local cache.
`, `,
DisableAutoGenTag: true, DisableAutoGenTag: true,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
@ -30,6 +33,7 @@ finds. It can also be used to read all data and therefore simulate a restore.
type CheckOptions struct { type CheckOptions struct {
ReadData bool ReadData bool
CheckUnused bool CheckUnused bool
WithCache bool
} }
var checkOptions CheckOptions var checkOptions CheckOptions
@ -40,6 +44,7 @@ func init() {
f := cmdCheck.Flags() f := cmdCheck.Flags()
f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs") f.BoolVar(&checkOptions.ReadData, "read-data", false, "read all data blobs")
f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs") f.BoolVar(&checkOptions.CheckUnused, "check-unused", false, "find unused blobs")
f.BoolVar(&checkOptions.WithCache, "with-cache", false, "use the cache")
} }
func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress { func newReadProgress(gopts GlobalOptions, todo restic.Stat) *restic.Progress {
@ -77,6 +82,11 @@ func runCheck(opts CheckOptions, gopts GlobalOptions, args []string) error {
return errors.Fatal("check has no arguments") return errors.Fatal("check has no arguments")
} }
if !opts.WithCache {
// do not use a cache for the checker
gopts.NoCache = true
}
repo, err := OpenRepository(gopts) repo, err := OpenRepository(gopts)
if err != nil { if err != nil {
return err return err

View File

@ -85,6 +85,25 @@ func runPrune(gopts GlobalOptions) error {
return pruneRepository(gopts, repo) return pruneRepository(gopts, repo)
} }
func mixedBlobs(list []restic.Blob) bool {
var tree, data bool
for _, pb := range list {
switch pb.Type {
case restic.TreeBlob:
tree = true
case restic.DataBlob:
data = true
}
if tree && data {
return true
}
}
return false
}
func pruneRepository(gopts GlobalOptions, repo restic.Repository) error { func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
ctx := gopts.ctx ctx := gopts.ctx
@ -191,6 +210,11 @@ func pruneRepository(gopts GlobalOptions, repo restic.Repository) error {
// find packs that need a rewrite // find packs that need a rewrite
rewritePacks := restic.NewIDSet() rewritePacks := restic.NewIDSet()
for _, pack := range idx.Packs { for _, pack := range idx.Packs {
if mixedBlobs(pack.Entries) {
rewritePacks.Insert(pack.ID)
continue
}
for _, blob := range pack.Entries { for _, blob := range pack.Entries {
h := restic.BlobHandle{ID: blob.ID, Type: blob.Type} h := restic.BlobHandle{ID: blob.ID, Type: blob.Type}
if !usedBlobs.Has(h) { if !usedBlobs.Has(h) {

View File

@ -12,6 +12,7 @@ import (
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/filter" "github.com/restic/restic/internal/filter"
"github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/repository"
) )
// RejectFunc is a function that takes a filename and os.FileInfo of a // RejectFunc is a function that takes a filename and os.FileInfo of a
@ -177,3 +178,27 @@ func rejectByDevice(samples []string) (RejectFunc, error) {
panic(fmt.Sprintf("item %v, device id %v not found, allowedDevs: %v", item, id, allowed)) panic(fmt.Sprintf("item %v, device id %v not found, allowedDevs: %v", item, id, allowed))
}, nil }, nil
} }
// rejectResticCache returns a RejectFunc that rejects the restic cache
// directory (if set).
func rejectResticCache(repo *repository.Repository) (RejectFunc, error) {
if repo.Cache == nil {
return func(string, os.FileInfo) bool {
return false
}, nil
}
cacheBase := repo.Cache.BaseDir()
if cacheBase == "" {
return nil, errors.New("cacheBase is empty string")
}
return func(item string, _ os.FileInfo) bool {
if fs.HasPathPrefix(cacheBase, item) {
debug.Log("rejecting restic cache directory %v", item)
return true
}
return false
}, nil
}

View File

@ -19,6 +19,7 @@ import (
"github.com/restic/restic/internal/backend/s3" "github.com/restic/restic/internal/backend/s3"
"github.com/restic/restic/internal/backend/sftp" "github.com/restic/restic/internal/backend/sftp"
"github.com/restic/restic/internal/backend/swift" "github.com/restic/restic/internal/backend/swift"
"github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/options" "github.com/restic/restic/internal/options"
"github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/repository"
@ -38,6 +39,8 @@ type GlobalOptions struct {
Quiet bool Quiet bool
NoLock bool NoLock bool
JSON bool JSON bool
CacheDir string
NoCache bool
ctx context.Context ctx context.Context
password string password string
@ -68,7 +71,8 @@ func init() {
f.BoolVarP(&globalOptions.Quiet, "quiet", "q", false, "do not output comprehensive progress report") f.BoolVarP(&globalOptions.Quiet, "quiet", "q", false, "do not output comprehensive progress report")
f.BoolVar(&globalOptions.NoLock, "no-lock", false, "do not lock the repo, this allows some operations on read-only repos") f.BoolVar(&globalOptions.NoLock, "no-lock", false, "do not lock the repo, this allows some operations on read-only repos")
f.BoolVarP(&globalOptions.JSON, "json", "", false, "set output mode to JSON for commands that support it") f.BoolVarP(&globalOptions.JSON, "json", "", false, "set output mode to JSON for commands that support it")
f.StringVar(&globalOptions.CacheDir, "cache-dir", "", "set the cache directory")
f.BoolVar(&globalOptions.NoCache, "no-cache", false, "do not use a local cache")
f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)") f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)")
restoreTerminal() restoreTerminal()
@ -322,6 +326,17 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
return nil, err return nil, err
} }
if opts.NoCache {
return s, nil
}
cache, err := cache.New(s.Config().ID, opts.CacheDir)
if err != nil {
Warnf("unable to open cache: %v\n", err)
} else {
s.UseCache(cache)
}
return s, nil return s, nil
} }

View File

@ -199,6 +199,7 @@ func withTestEnvironment(t testing.TB) (env *testEnvironment, cleanup func()) {
env.gopts = GlobalOptions{ env.gopts = GlobalOptions{
Repo: env.repo, Repo: env.repo,
Quiet: true, Quiet: true,
CacheDir: env.cache,
ctx: context.Background(), ctx: context.Background(),
password: TestPassword, password: TestPassword,
stdout: os.Stdout, stdout: os.Stdout,

26
doc/cache.rst Normal file
View File

@ -0,0 +1,26 @@
Local Cache
===========
In order to speed up certain operations, restic manages a local cache of data.
This document describes the data structures for the local cache with version 1.
Versions
--------
The cache directory is selected according to the `XDG base dir specification
<http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html>`__.
Each repository has its own cache sub-directory, consting of the repository ID
which is chosen at ``init``. All cache directories for different repos are
independent of each other.
The cache dir for a repo contains a file named ``version``, which contains a
single ASCII integer line that stands for the current version of the cache. If
a lower version number is found the cache is recreated with the current
version. If a higher version number is found the cache is ignored and left as
is.
Snapshots and Indexes
---------------------
Snapshot, Data and Index files are cached in the sub-directories ``snapshots``,
``data`` and ``index``, as read from the repository.

View File

@ -40,10 +40,18 @@ $ sudo restic autocomplete
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -78,10 +78,18 @@ given as the arguments.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -25,10 +25,18 @@ The "cat" command is used to print internal objects to stdout.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -18,6 +18,10 @@ restic\-check \- Check the repository for errors
The "check" command tests the repository for errors and reports any errors it The "check" command tests the repository for errors and reports any errors it
finds. It can also be used to read all data and therefore simulate a restore. finds. It can also be used to read all data and therefore simulate a restore.
.PP
By default, the "check" command will always load all data directly from the
repository and not use a local cache.
.SH OPTIONS .SH OPTIONS
.PP .PP
@ -32,12 +36,24 @@ finds. It can also be used to read all data and therefore simulate a restore.
\fB\-\-read\-data\fP[=false] \fB\-\-read\-data\fP[=false]
read all data blobs read all data blobs
.PP
\fB\-\-with\-cache\fP[=false]
use the cache
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -26,10 +26,18 @@ is used for debugging purposes only.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -58,10 +58,18 @@ repo.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -88,10 +88,18 @@ data after 'forget' was run successfully, see the 'prune' command.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -25,10 +25,18 @@ The "init" command initializes a new repository.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -25,10 +25,18 @@ The "key" command manages keys (passwords) for accessing the repository.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -25,10 +25,18 @@ The "list" command allows listing objects in the repository based on type.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -44,10 +44,18 @@ The special snapshot\-ID "latest" can be used to list files and directories of t
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -31,10 +31,18 @@ set and no command is specified, all manpages are written to the directory.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -30,10 +30,18 @@ name is explicitly given, a list of migrations that can be applied is printed.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -50,10 +50,18 @@ read\-only mount.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -26,10 +26,18 @@ referenced and therefore not needed any more.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -26,10 +26,18 @@ repository.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -54,10 +54,18 @@ repository.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -41,10 +41,18 @@ The "snapshots" command lists all snapshots stored in the repository.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -56,10 +56,18 @@ When no snapshot\-ID is given, all snapshots matching the host, tag and path fil
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -29,10 +29,18 @@ The "unlock" command removes stale locks that have been created by other restic
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -26,10 +26,18 @@ and the version of this software.
.SH OPTIONS INHERITED FROM PARENT COMMANDS .SH OPTIONS INHERITED FROM PARENT COMMANDS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -20,6 +20,10 @@ directories in an encrypted repository stored on different backends.
.SH OPTIONS .SH OPTIONS
.PP
\fB\-\-cache\-dir\fP=""
set the cache directory
.PP .PP
\fB\-h\fP, \fB\-\-help\fP[=false] \fB\-h\fP, \fB\-\-help\fP[=false]
help for restic help for restic
@ -28,6 +32,10 @@ directories in an encrypted repository stored on different backends.
\fB\-\-json\fP[=false] \fB\-\-json\fP[=false]
set output mode to JSON for commands that support it set output mode to JSON for commands that support it
.PP
\fB\-\-no\-cache\fP[=false]
do not use a local cache
.PP .PP
\fB\-\-no\-lock\fP[=false] \fB\-\-no\-lock\fP[=false]
do not lock the repo, this allows some operations on read\-only repos do not lock the repo, this allows some operations on read\-only repos

View File

@ -1240,3 +1240,19 @@ instead of the default, set the environment variable like this:
$ export TMPDIR=/var/tmp/restic-tmp $ export TMPDIR=/var/tmp/restic-tmp
$ restic -r /tmp/backup backup ~/work $ restic -r /tmp/backup backup ~/work
Caching
-------
Restic keeps a cache with some files from the repository on the local machine.
This allows faster operations, since meta data does not need to be loaded from
a remote repository. The cache is automatically created, usually in the
directory ``.cache/restic`` in the user's home directory. The environment
variable ``XDG_CACHE_DIR`` or the command line parameter ``--cache-dir`` can
each be used to specify where the cache is located. The parameter
``--no-cache`` disables the cache entirely. In this case, all data is loaded
from the repo.
The cache is ephemeral: When a file cannot be read from the cache, it is loaded
from the repository.

View File

@ -7,3 +7,7 @@ References
------------------------ ------------------------
.. include:: rest_backend.rst .. include:: rest_backend.rst
------------------------
.. include:: cache.rst

170
internal/cache/backend.go vendored Normal file
View File

@ -0,0 +1,170 @@
package cache
import (
"context"
"io"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/restic"
)
// Backend wraps a restic.Backend and adds a cache.
type Backend struct {
restic.Backend
*Cache
}
// ensure cachedBackend implements restic.Backend
var _ restic.Backend = &Backend{}
// Remove deletes a file from the backend and the cache if it has been cached.
func (b *Backend) Remove(ctx context.Context, h restic.Handle) error {
debug.Log("cache Remove(%v)", h)
err := b.Backend.Remove(ctx, h)
if err != nil {
return err
}
return b.Cache.Remove(h)
}
type teeReader struct {
rd io.Reader
wr io.Writer
err error
}
func (t *teeReader) Read(p []byte) (n int, err error) {
n, err = t.rd.Read(p)
if t.err == nil && n > 0 {
_, t.err = t.wr.Write(p[:n])
}
return n, err
}
var autoCacheTypes = map[restic.FileType]struct{}{
restic.IndexFile: struct{}{},
restic.SnapshotFile: struct{}{},
}
// Save stores a new file is the backend and the cache.
func (b *Backend) Save(ctx context.Context, h restic.Handle, rd io.Reader) (err error) {
if _, ok := autoCacheTypes[h.Type]; !ok {
return b.Backend.Save(ctx, h, rd)
}
debug.Log("Save(%v): auto-store in the cache", h)
wr, err := b.Cache.SaveWriter(h)
if err != nil {
debug.Log("unable to save %v to cache: %v", h, err)
return b.Backend.Save(ctx, h, rd)
}
tr := &teeReader{rd: rd, wr: wr}
err = b.Backend.Save(ctx, h, tr)
if err != nil {
wr.Close()
b.Cache.Remove(h)
return err
}
err = wr.Close()
if err != nil {
debug.Log("cache writer returned error: %v", err)
_ = b.Cache.Remove(h)
}
return nil
}
var autoCacheFiles = map[restic.FileType]bool{
restic.IndexFile: true,
restic.SnapshotFile: true,
}
func (b *Backend) cacheFile(ctx context.Context, h restic.Handle) error {
rd, err := b.Backend.Load(ctx, h, 0, 0)
if err != nil {
return err
}
if err = b.Cache.Save(h, rd); err != nil {
return err
}
if err = rd.Close(); err != nil {
// try to remove from the cache, ignore errors
_ = b.Cache.Remove(h)
return err
}
return nil
}
// Load loads a file from the cache or the backend.
func (b *Backend) Load(ctx context.Context, h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
if b.Cache.Has(h) {
debug.Log("Load(%v, %v, %v) from cache", h, length, offset)
rd, err := b.Cache.Load(h, length, offset)
if err == nil {
return rd, nil
}
debug.Log("error loading %v from cache: %v", h, err)
}
// partial file requested
if offset != 0 || length != 0 {
if b.Cache.PerformReadahead(h) {
debug.Log("performing readahead for %v", h)
err := b.cacheFile(ctx, h)
if err == nil {
return b.Cache.Load(h, length, offset)
}
debug.Log("error caching %v: %v", h, err)
}
debug.Log("Load(%v, %v, %v): partial file requested, delegating to backend", h, length, offset)
return b.Backend.Load(ctx, h, length, offset)
}
// if we don't automatically cache this file type, fall back to the backend
if _, ok := autoCacheFiles[h.Type]; !ok {
debug.Log("Load(%v, %v, %v): delegating to backend", h, length, offset)
return b.Backend.Load(ctx, h, length, offset)
}
debug.Log("auto-store %v in the cache", h)
err := b.cacheFile(ctx, h)
if err == nil {
// load the cached version
return b.Cache.Load(h, 0, 0)
}
debug.Log("error caching %v: %v, falling back to backend", h, err)
return b.Backend.Load(ctx, h, length, offset)
}
// Stat tests whether the backend has a file. If it does not exist but still
// exists in the cache, it is removed from the cache.
func (b *Backend) Stat(ctx context.Context, h restic.Handle) (restic.FileInfo, error) {
debug.Log("cache Stat(%v)", h)
fi, err := b.Backend.Stat(ctx, h)
if err != nil {
if b.Backend.IsNotExist(err) {
// try to remove from the cache, ignore errors
_ = b.Cache.Remove(h)
}
return fi, err
}
return fi, err
}
// IsNotExist returns true if the error is caused by a non-existing file.
func (b *Backend) IsNotExist(err error) bool {
return b.Backend.IsNotExist(err)
}

114
internal/cache/backend_test.go vendored Normal file
View File

@ -0,0 +1,114 @@
package cache
import (
"bytes"
"context"
"math/rand"
"testing"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/mem"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
func loadAndCompare(t testing.TB, be restic.Backend, h restic.Handle, data []byte) {
buf, err := backend.LoadAll(context.TODO(), be, h)
if err != nil {
t.Fatal(err)
}
if len(buf) != len(data) {
t.Fatalf("wrong number of bytes read, want %v, got %v", len(data), len(buf))
}
if !bytes.Equal(buf, data) {
t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[:16], buf[:16])
}
}
func save(t testing.TB, be restic.Backend, h restic.Handle, data []byte) {
err := be.Save(context.TODO(), h, bytes.NewReader(data))
if err != nil {
t.Fatal(err)
}
}
func remove(t testing.TB, be restic.Backend, h restic.Handle) {
err := be.Remove(context.TODO(), h)
if err != nil {
t.Fatal(err)
}
}
func randomData(n int) (restic.Handle, []byte) {
data := test.Random(rand.Int(), n)
id := restic.Hash(data)
copy(id[:], data)
h := restic.Handle{
Type: restic.IndexFile,
Name: id.String(),
}
return h, data
}
func TestBackend(t *testing.T) {
be := mem.New()
c, cleanup := TestNewCache(t)
defer cleanup()
wbe := c.Wrap(be)
h, data := randomData(5234142)
// save directly in backend
save(t, be, h, data)
if c.Has(h) {
t.Errorf("cache has file too early")
}
// load data via cache
loadAndCompare(t, wbe, h, data)
if !c.Has(h) {
t.Errorf("cache dosen't have file after load")
}
// remove via cache
remove(t, wbe, h)
if c.Has(h) {
t.Errorf("cache has file after remove")
}
// save via cache
save(t, wbe, h, data)
if !c.Has(h) {
t.Errorf("cache dosen't have file after load")
}
// load data directly from backend
loadAndCompare(t, be, h, data)
// load data via cache
loadAndCompare(t, be, h, data)
// remove directly
remove(t, be, h)
if !c.Has(h) {
t.Errorf("file not in cache any more")
}
// run stat
_, err := wbe.Stat(context.TODO(), h)
if err == nil {
t.Errorf("expected error for removed file not found, got nil")
}
if !wbe.IsNotExist(err) {
t.Errorf("Stat() returned error that does not match IsNotExist(): %v", err)
}
if c.Has(h) {
t.Errorf("removed file still in cache after stat")
}
}

167
internal/cache/cache.go vendored Normal file
View File

@ -0,0 +1,167 @@
package cache
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"github.com/pkg/errors"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
)
// Cache manages a local cache.
type Cache struct {
Path string
Base string
PerformReadahead func(restic.Handle) bool
}
const dirMode = 0700
const fileMode = 0600
func readVersion(dir string) (v uint, err error) {
buf, err := ioutil.ReadFile(filepath.Join(dir, "version"))
if os.IsNotExist(err) {
return 0, nil
}
if err != nil {
return 0, errors.Wrap(err, "ReadFile")
}
ver, err := strconv.ParseUint(string(buf), 10, 32)
if err != nil {
return 0, errors.Wrap(err, "ParseUint")
}
return uint(ver), nil
}
const cacheVersion = 1
// ensure Cache implements restic.Cache
var _ restic.Cache = &Cache{}
var cacheLayoutPaths = map[restic.FileType]string{
restic.DataFile: "data",
restic.SnapshotFile: "snapshots",
restic.IndexFile: "index",
}
const cachedirTagSignature = "Signature: 8a477f597d28d172789f06886806bc55\n"
func writeCachedirTag(dir string) error {
if err := fs.MkdirAll(dir, dirMode); err != nil {
return err
}
f, err := fs.OpenFile(filepath.Join(dir, "CACHEDIR.TAG"), os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0644)
if err != nil {
if os.IsExist(errors.Cause(err)) {
return nil
}
return errors.Wrap(err, "OpenFile")
}
debug.Log("Create CACHEDIR.TAG at %v", dir)
if _, err := f.Write([]byte(cachedirTagSignature)); err != nil {
f.Close()
return errors.Wrap(err, "Write")
}
return f.Close()
}
// New returns a new cache for the repo ID at basedir. If basedir is the empty
// string, the default cache location (according to the XDG standard) is used.
//
// For partial files, the complete file is loaded and stored in the cache when
// performReadahead returns true.
func New(id string, basedir string) (c *Cache, err error) {
if basedir == "" {
basedir, err = getXDGCacheDir()
if err != nil {
return nil, err
}
}
// create base dir and tag it as a cache directory
if err = writeCachedirTag(basedir); err != nil {
return nil, err
}
cachedir := filepath.Join(basedir, id)
debug.Log("using cache dir %v", cachedir)
v, err := readVersion(cachedir)
if err != nil {
return nil, err
}
if v > cacheVersion {
return nil, errors.New("cache version is newer")
}
// create the repo cache dir if it does not exist yet
if err = fs.MkdirAll(cachedir, dirMode); err != nil {
return nil, err
}
if v < cacheVersion {
err = ioutil.WriteFile(filepath.Join(cachedir, "version"), []byte(fmt.Sprintf("%d", cacheVersion)), 0644)
if err != nil {
return nil, errors.Wrap(err, "WriteFile")
}
}
for _, p := range cacheLayoutPaths {
if err = fs.MkdirAll(filepath.Join(cachedir, p), dirMode); err != nil {
return nil, err
}
}
c = &Cache{
Path: cachedir,
Base: basedir,
PerformReadahead: func(restic.Handle) bool {
// do not perform readahead by default
return false
},
}
return c, nil
}
// errNoSuchFile is returned when a file is not cached.
type errNoSuchFile struct {
Type string
Name string
}
func (e errNoSuchFile) Error() string {
return fmt.Sprintf("file %v (%v) is not cached", e.Name, e.Type)
}
// IsNotExist returns true if the error was caused by a non-existing file.
func (c *Cache) IsNotExist(err error) bool {
_, ok := errors.Cause(err).(errNoSuchFile)
return ok
}
// Wrap returns a backend with a cache.
func (c *Cache) Wrap(be restic.Backend) restic.Backend {
return &Backend{
Backend: be,
Cache: c,
}
}
// BaseDir returns the base directory.
func (c *Cache) BaseDir() string {
return c.Base
}

49
internal/cache/dir.go vendored Normal file
View File

@ -0,0 +1,49 @@
package cache
import (
"os"
"path/filepath"
"github.com/pkg/errors"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
)
// getXDGCacheDir returns the cache directory according to XDG basedir spec, see
// http://standards.freedesktop.org/basedir-spec/basedir-spec-latest.html
func getXDGCacheDir() (string, error) {
xdgcache := os.Getenv("XDG_CACHE_HOME")
home := os.Getenv("HOME")
if xdgcache == "" && home == "" {
return "", errors.New("unable to locate cache directory (XDG_CACHE_HOME and HOME unset)")
}
cachedir := ""
if xdgcache != "" {
cachedir = filepath.Join(xdgcache, "restic")
} else if home != "" {
cachedir = filepath.Join(home, ".cache", "restic")
}
fi, err := fs.Stat(cachedir)
if os.IsNotExist(errors.Cause(err)) {
err = fs.MkdirAll(cachedir, 0700)
if err != nil {
return "", errors.Wrap(err, "MkdirAll")
}
fi, err = fs.Stat(cachedir)
debug.Log("create cache dir %v", cachedir)
}
if err != nil {
return "", errors.Wrap(err, "Stat")
}
if !fi.IsDir() {
return "", errors.Errorf("cache dir %v is not a directory", cachedir)
}
return cachedir, nil
}

207
internal/cache/file.go vendored Normal file
View File

@ -0,0 +1,207 @@
package cache
import (
"io"
"os"
"path/filepath"
"github.com/pkg/errors"
"github.com/restic/restic/internal/crypto"
"github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/fs"
"github.com/restic/restic/internal/restic"
)
func (c *Cache) filename(h restic.Handle) string {
if len(h.Name) < 2 {
panic("Name is empty or too short")
}
subdir := h.Name[:2]
return filepath.Join(c.Path, cacheLayoutPaths[h.Type], subdir, h.Name)
}
func (c *Cache) canBeCached(t restic.FileType) bool {
if c == nil {
return false
}
if _, ok := cacheLayoutPaths[t]; !ok {
return false
}
return true
}
type readCloser struct {
io.Reader
io.Closer
}
// Load returns a reader that yields the contents of the file with the
// given handle. rd must be closed after use. If an error is returned, the
// ReadCloser is nil.
func (c *Cache) Load(h restic.Handle, length int, offset int64) (io.ReadCloser, error) {
debug.Log("Load from cache: %v", h)
if !c.canBeCached(h.Type) {
return nil, errors.New("cannot be cached")
}
f, err := fs.Open(c.filename(h))
if err != nil {
return nil, errors.Wrap(err, "Open")
}
fi, err := f.Stat()
if err != nil {
_ = f.Close()
return nil, errors.Wrap(err, "Stat")
}
if fi.Size() <= crypto.Extension {
_ = f.Close()
_ = c.Remove(h)
return nil, errors.New("cached file is truncated, removing")
}
if offset > 0 {
if _, err = f.Seek(offset, io.SeekStart); err != nil {
f.Close()
return nil, err
}
}
rd := readCloser{Reader: f, Closer: f}
if length > 0 {
rd.Reader = io.LimitReader(f, int64(length))
}
return rd, nil
}
// SaveWriter returns a writer for the cache object h. It must be closed after writing is finished.
func (c *Cache) SaveWriter(h restic.Handle) (io.WriteCloser, error) {
debug.Log("Save to cache: %v", h)
if !c.canBeCached(h.Type) {
return nil, errors.New("cannot be cached")
}
p := c.filename(h)
err := fs.MkdirAll(filepath.Dir(p), 0700)
if err != nil {
return nil, errors.Wrap(err, "MkdirAll")
}
f, err := fs.OpenFile(p, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0400)
if err != nil {
return nil, errors.Wrap(err, "Create")
}
return f, err
}
// Save saves a file in the cache.
func (c *Cache) Save(h restic.Handle, rd io.Reader) error {
debug.Log("Save to cache: %v", h)
if rd == nil {
return errors.New("Save() called with nil reader")
}
f, err := c.SaveWriter(h)
if err != nil {
return err
}
if _, err = io.Copy(f, rd); err != nil {
_ = f.Close()
_ = c.Remove(h)
return errors.Wrap(err, "Copy")
}
if err = f.Close(); err != nil {
return errors.Wrap(err, "Close")
}
return nil
}
// Remove deletes a file. When the file is not cache, no error is returned.
func (c *Cache) Remove(h restic.Handle) error {
if !c.Has(h) {
return nil
}
return fs.Remove(c.filename(h))
}
// Clear removes all files of type t from the cache that are not contained in
// the set valid.
func (c *Cache) Clear(t restic.FileType, valid restic.IDSet) error {
debug.Log("Clearing cache for %v: %v valid files", t, len(valid))
if !c.canBeCached(t) {
return nil
}
list, err := c.list(t)
if err != nil {
return err
}
for id := range list {
if valid.Has(id) {
continue
}
if err = fs.Remove(c.filename(restic.Handle{Type: t, Name: id.String()})); err != nil {
return err
}
}
return nil
}
func isFile(fi os.FileInfo) bool {
return fi.Mode()&(os.ModeType|os.ModeCharDevice) == 0
}
// list returns a list of all files of type T in the cache.
func (c *Cache) list(t restic.FileType) (restic.IDSet, error) {
if !c.canBeCached(t) {
return nil, errors.New("cannot be cached")
}
list := restic.NewIDSet()
dir := filepath.Join(c.Path, cacheLayoutPaths[t])
err := filepath.Walk(dir, func(name string, fi os.FileInfo, err error) error {
if err != nil {
return errors.Wrap(err, "Walk")
}
if !isFile(fi) {
return nil
}
id, err := restic.ParseID(filepath.Base(name))
if err != nil {
return nil
}
list.Insert(id)
return nil
})
return list, err
}
// Has returns true if the file is cached.
func (c *Cache) Has(h restic.Handle) bool {
if !c.canBeCached(h.Type) {
return false
}
_, err := fs.Stat(c.filename(h))
if err == nil {
return true
}
return false
}

259
internal/cache/file_test.go vendored Normal file
View File

@ -0,0 +1,259 @@
package cache
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"math/rand"
"testing"
"time"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
func generateRandomFiles(t testing.TB, tpe restic.FileType, c *Cache) restic.IDSet {
ids := restic.NewIDSet()
for i := 0; i < rand.Intn(15)+10; i++ {
buf := test.Random(rand.Int(), 1<<19)
id := restic.Hash(buf)
h := restic.Handle{Type: tpe, Name: id.String()}
if c.Has(h) {
t.Errorf("index %v present before save", id)
}
err := c.Save(h, bytes.NewReader(buf))
if err != nil {
t.Fatal(err)
}
ids.Insert(id)
}
return ids
}
// randomID returns a random ID from s.
func randomID(s restic.IDSet) restic.ID {
for id := range s {
return id
}
panic("set is empty")
}
func load(t testing.TB, c *Cache, h restic.Handle) []byte {
rd, err := c.Load(h, 0, 0)
if err != nil {
t.Fatal(err)
}
if rd == nil {
t.Fatalf("Load() returned nil reader")
}
buf, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
return buf
}
func listFiles(t testing.TB, c *Cache, tpe restic.FileType) restic.IDSet {
list, err := c.list(tpe)
if err != nil {
t.Errorf("listing failed: %v", err)
}
return list
}
func clearFiles(t testing.TB, c *Cache, tpe restic.FileType, valid restic.IDSet) {
if err := c.Clear(tpe, valid); err != nil {
t.Error(err)
}
}
func TestFiles(t *testing.T) {
seed := time.Now().Unix()
t.Logf("seed is %v", seed)
rand.Seed(seed)
c, cleanup := TestNewCache(t)
defer cleanup()
var tests = []restic.FileType{
restic.SnapshotFile,
restic.DataFile,
restic.IndexFile,
}
for _, tpe := range tests {
t.Run(fmt.Sprintf("%v", tpe), func(t *testing.T) {
ids := generateRandomFiles(t, tpe, c)
id := randomID(ids)
h := restic.Handle{Type: tpe, Name: id.String()}
id2 := restic.Hash(load(t, c, h))
if !id.Equal(id2) {
t.Errorf("wrong data returned, want %v, got %v", id.Str(), id2.Str())
}
if !c.Has(h) {
t.Errorf("cache thinks index %v isn't present", id.Str())
}
list := listFiles(t, c, tpe)
if !ids.Equals(list) {
t.Errorf("wrong list of index IDs returned, want:\n %v\ngot:\n %v", ids, list)
}
clearFiles(t, c, tpe, restic.NewIDSet(id))
list2 := listFiles(t, c, tpe)
ids.Delete(id)
want := restic.NewIDSet(id)
if !list2.Equals(want) {
t.Errorf("ClearIndexes removed indexes, want:\n %v\ngot:\n %v", list2, want)
}
clearFiles(t, c, tpe, restic.NewIDSet())
want = restic.NewIDSet()
list3 := listFiles(t, c, tpe)
if !list3.Equals(want) {
t.Errorf("ClearIndexes returned a wrong list, want:\n %v\ngot:\n %v", want, list3)
}
})
}
}
func TestFileSaveWriter(t *testing.T) {
seed := time.Now().Unix()
t.Logf("seed is %v", seed)
rand.Seed(seed)
c, cleanup := TestNewCache(t)
defer cleanup()
// save about 5 MiB of data in the cache
data := test.Random(rand.Int(), 5234142)
id := restic.ID{}
copy(id[:], data)
h := restic.Handle{
Type: restic.DataFile,
Name: id.String(),
}
wr, err := c.SaveWriter(h)
if err != nil {
t.Fatal(err)
}
n, err := io.Copy(wr, bytes.NewReader(data))
if err != nil {
t.Fatal(err)
}
if n != int64(len(data)) {
t.Fatalf("wrong number of bytes written, want %v, got %v", len(data), n)
}
if err = wr.Close(); err != nil {
t.Fatal(err)
}
rd, err := c.Load(h, 0, 0)
if err != nil {
t.Fatal(err)
}
buf, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if len(buf) != len(data) {
t.Fatalf("wrong number of bytes read, want %v, got %v", len(data), len(buf))
}
if !bytes.Equal(buf, data) {
t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[:16], buf[:16])
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
}
func TestFileLoad(t *testing.T) {
seed := time.Now().Unix()
t.Logf("seed is %v", seed)
rand.Seed(seed)
c, cleanup := TestNewCache(t)
defer cleanup()
// save about 5 MiB of data in the cache
data := test.Random(rand.Int(), 5234142)
id := restic.ID{}
copy(id[:], data)
h := restic.Handle{
Type: restic.DataFile,
Name: id.String(),
}
if err := c.Save(h, bytes.NewReader(data)); err != nil {
t.Fatalf("Save() returned error: %v", err)
}
var tests = []struct {
offset int64
length int
}{
{0, 0},
{5, 0},
{32*1024 + 5, 0},
{0, 123},
{0, 64*1024 + 234},
{100, 5234142},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%v/%v", test.length, test.offset), func(t *testing.T) {
rd, err := c.Load(h, test.length, test.offset)
if err != nil {
t.Fatal(err)
}
buf, err := ioutil.ReadAll(rd)
if err != nil {
t.Fatal(err)
}
if err = rd.Close(); err != nil {
t.Fatal(err)
}
o := int(test.offset)
l := test.length
if test.length == 0 {
l = len(data) - o
}
if l > len(data)-o {
l = len(data) - o
}
if len(buf) != l {
t.Fatalf("wrong number of bytes returned: want %d, got %d", l, len(buf))
}
if !bytes.Equal(buf, data[o:o+l]) {
t.Fatalf("wrong data returned, want:\n %02x\ngot:\n %02x", data[o:o+16], buf[:16])
}
})
}
}

20
internal/cache/testing.go vendored Normal file
View File

@ -0,0 +1,20 @@
package cache
import (
"testing"
"github.com/restic/restic/internal/restic"
"github.com/restic/restic/internal/test"
)
// TestNewCache returns a cache in a temporary directory which is removed when
// cleanup is called.
func TestNewCache(t testing.TB) (*Cache, func()) {
dir, cleanup := test.TempDir(t)
t.Logf("created new cache at %v", dir)
cache, err := New(restic.NewRandomID().String(), dir)
if err != nil {
t.Fatal(err)
}
return cache, cleanup
}

View File

@ -17,6 +17,7 @@ import (
type Index struct { type Index struct {
m sync.Mutex m sync.Mutex
pack map[restic.BlobHandle][]indexEntry pack map[restic.BlobHandle][]indexEntry
treePacks restic.IDs
final bool // set to true for all indexes read from the backend ("finalized") final bool // set to true for all indexes read from the backend ("finalized")
id restic.ID // set to the ID of the index when it's finalized id restic.ID // set to the ID of the index when it's finalized
@ -437,6 +438,11 @@ func (idx *Index) Dump(w io.Writer) error {
return nil return nil
} }
// TreePacks returns a list of packs that contain only tree blobs.
func (idx *Index) TreePacks() restic.IDs {
return idx.treePacks
}
// isErrOldIndex returns true if the error may be caused by an old index // isErrOldIndex returns true if the error may be caused by an old index
// format. // format.
func isErrOldIndex(err error) bool { func isErrOldIndex(err error) bool {
@ -469,6 +475,8 @@ func DecodeIndex(buf []byte) (idx *Index, err error) {
idx = NewIndex() idx = NewIndex()
for _, pack := range idxJSON.Packs { for _, pack := range idxJSON.Packs {
var data, tree bool
for _, blob := range pack.Blobs { for _, blob := range pack.Blobs {
idx.store(restic.PackedBlob{ idx.store(restic.PackedBlob{
Blob: restic.Blob{ Blob: restic.Blob{
@ -479,6 +487,17 @@ func DecodeIndex(buf []byte) (idx *Index, err error) {
}, },
PackID: pack.ID, PackID: pack.ID,
}) })
switch blob.Type {
case restic.DataBlob:
data = true
case restic.TreeBlob:
tree = true
}
}
if !data && tree {
idx.treePacks = append(idx.treePacks, pack.ID)
} }
} }
idx.supersedes = idxJSON.Supersedes idx.supersedes = idxJSON.Supersedes
@ -501,6 +520,8 @@ func DecodeOldIndex(buf []byte) (idx *Index, err error) {
idx = NewIndex() idx = NewIndex()
for _, pack := range list { for _, pack := range list {
var data, tree bool
for _, blob := range pack.Blobs { for _, blob := range pack.Blobs {
idx.store(restic.PackedBlob{ idx.store(restic.PackedBlob{
Blob: restic.Blob{ Blob: restic.Blob{
@ -511,6 +532,17 @@ func DecodeOldIndex(buf []byte) (idx *Index, err error) {
}, },
PackID: pack.ID, PackID: pack.ID,
}) })
switch blob.Type {
case restic.DataBlob:
data = true
case restic.TreeBlob:
tree = true
}
}
if !data && tree {
idx.treePacks = append(idx.treePacks, pack.ID)
} }
} }
idx.final = true idx.final = true

View File

@ -89,8 +89,8 @@ func (r *packerManager) insertPacker(p *Packer) {
} }
// savePacker stores p in the backend. // savePacker stores p in the backend.
func (r *Repository) savePacker(p *Packer) error { func (r *Repository) savePacker(t restic.BlobType, p *Packer) error {
debug.Log("save packer with %d blobs (%d bytes)\n", p.Packer.Count(), p.Packer.Size()) debug.Log("save packer for %v with %d blobs (%d bytes)\n", t, p.Packer.Count(), p.Packer.Size())
_, err := p.Packer.Finalize() _, err := p.Packer.Finalize()
if err != nil { if err != nil {
return err return err
@ -112,6 +112,20 @@ func (r *Repository) savePacker(p *Packer) error {
debug.Log("saved as %v", h) debug.Log("saved as %v", h)
if t == restic.TreeBlob && r.Cache != nil {
debug.Log("saving tree pack file in cache")
_, err = p.tmpfile.Seek(0, 0)
if err != nil {
return errors.Wrap(err, "Seek")
}
err := r.Cache.Save(h, p.tmpfile)
if err != nil {
return err
}
}
err = p.tmpfile.Close() err = p.tmpfile.Close()
if err != nil { if err != nil {
return errors.Wrap(err, "close tempfile") return errors.Wrap(err, "close tempfile")

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"os" "os"
"github.com/restic/restic/internal/cache"
"github.com/restic/restic/internal/errors" "github.com/restic/restic/internal/errors"
"github.com/restic/restic/internal/restic" "github.com/restic/restic/internal/restic"
@ -23,6 +24,7 @@ type Repository struct {
key *crypto.Key key *crypto.Key
keyName string keyName string
idx *MasterIndex idx *MasterIndex
restic.Cache
treePM *packerManager treePM *packerManager
dataPM *packerManager dataPM *packerManager
@ -45,6 +47,16 @@ func (r *Repository) Config() restic.Config {
return r.cfg return r.cfg
} }
// UseCache replaces the backend with the wrapped cache.
func (r *Repository) UseCache(c restic.Cache) {
if c == nil {
return
}
debug.Log("using cache")
r.Cache = c
r.be = c.Wrap(r.be)
}
// PrefixLength returns the number of bytes required so that all prefixes of // PrefixLength returns the number of bytes required so that all prefixes of
// all IDs of type t are unique. // all IDs of type t are unique.
func (r *Repository) PrefixLength(t restic.FileType) (int, error) { func (r *Repository) PrefixLength(t restic.FileType) (int, error) {
@ -53,11 +65,11 @@ func (r *Repository) PrefixLength(t restic.FileType) (int, error) {
// LoadAndDecrypt loads and decrypts data identified by t and id from the // LoadAndDecrypt loads and decrypts data identified by t and id from the
// backend. // backend.
func (r *Repository) LoadAndDecrypt(ctx context.Context, t restic.FileType, id restic.ID) ([]byte, error) { func (r *Repository) LoadAndDecrypt(ctx context.Context, t restic.FileType, id restic.ID) (buf []byte, err error) {
debug.Log("load %v with id %v", t, id.Str()) debug.Log("load %v with id %v", t, id.Str())
h := restic.Handle{Type: t, Name: id.String()} h := restic.Handle{Type: t, Name: id.String()}
buf, err := backend.LoadAll(ctx, r.be, h) buf, err = backend.LoadAll(ctx, r.be, h)
if err != nil { if err != nil {
debug.Log("error loading %v: %v", h, err) debug.Log("error loading %v: %v", h, err)
return nil, err return nil, err
@ -76,6 +88,26 @@ func (r *Repository) LoadAndDecrypt(ctx context.Context, t restic.FileType, id r
return buf[:n], nil return buf[:n], nil
} }
// sortCachedPacks moves all cached pack files to the front of blobs.
func (r *Repository) sortCachedPacks(blobs []restic.PackedBlob) []restic.PackedBlob {
if r.Cache == nil {
return blobs
}
cached := make([]restic.PackedBlob, 0, len(blobs)/2)
noncached := make([]restic.PackedBlob, 0, len(blobs)/2)
for _, blob := range blobs {
if r.Cache.Has(restic.Handle{Type: restic.DataFile, Name: blob.PackID.String()}) {
cached = append(cached, blob)
continue
}
noncached = append(noncached, blob)
}
return append(cached, noncached...)
}
// loadBlob tries to load and decrypt content identified by t and id from a // loadBlob tries to load and decrypt content identified by t and id from a
// pack from the backend, the result is stored in plaintextBuf, which must be // pack from the backend, the result is stored in plaintextBuf, which must be
// large enough to hold the complete blob. // large enough to hold the complete blob.
@ -89,9 +121,12 @@ func (r *Repository) loadBlob(ctx context.Context, id restic.ID, t restic.BlobTy
return 0, err return 0, err
} }
// try cached pack files first
blobs = r.sortCachedPacks(blobs)
var lastError error var lastError error
for _, blob := range blobs { for _, blob := range blobs {
debug.Log("id %v found: %v", id.Str(), blob) debug.Log("blob %v/%v found: %v", t, id.Str(), blob)
if blob.Type != t { if blob.Type != t {
debug.Log("blob %v has wrong block type, want %v", blob, t) debug.Log("blob %v has wrong block type, want %v", blob, t)
@ -212,7 +247,7 @@ func (r *Repository) SaveAndEncrypt(ctx context.Context, t restic.BlobType, data
} }
// else write the pack to the backend // else write the pack to the backend
return *id, r.savePacker(packer) return *id, r.savePacker(t, packer)
} }
// SaveJSONUnpacked serialises item as JSON and encrypts and saves it in the // SaveJSONUnpacked serialises item as JSON and encrypts and saves it in the
@ -251,20 +286,27 @@ func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []by
// Flush saves all remaining packs. // Flush saves all remaining packs.
func (r *Repository) Flush() error { func (r *Repository) Flush() error {
for _, pm := range []*packerManager{r.dataPM, r.treePM} { pms := []struct {
pm.pm.Lock() t restic.BlobType
pm *packerManager
}{
{restic.DataBlob, r.dataPM},
{restic.TreeBlob, r.treePM},
}
debug.Log("manually flushing %d packs", len(pm.packers)) for _, p := range pms {
for _, p := range pm.packers { p.pm.pm.Lock()
err := r.savePacker(p)
debug.Log("manually flushing %d packs", len(p.pm.packers))
for _, packer := range p.pm.packers {
err := r.savePacker(p.t, packer)
if err != nil { if err != nil {
pm.pm.Unlock() p.pm.pm.Unlock()
return err return err
} }
} }
pm.packers = pm.packers[:0] p.pm.packers = p.pm.packers[:0]
p.pm.pm.Unlock()
pm.pm.Unlock()
} }
return nil return nil
@ -353,10 +395,58 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
ParallelWorkFuncParseID(worker)) ParallelWorkFuncParseID(worker))
}() }()
validIndex := restic.NewIDSet()
for idx := range indexes { for idx := range indexes {
id, err := idx.ID()
if err == nil {
validIndex.Insert(id)
}
r.idx.Insert(idx) r.idx.Insert(idx)
} }
if r.Cache != nil {
// clear old index files
err := r.Cache.Clear(restic.IndexFile, validIndex)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing index files in cache: %v\n", err)
}
packs := restic.NewIDSet()
for _, idx := range r.idx.All() {
for id := range idx.Packs() {
packs.Insert(id)
}
}
// clear old data files
err = r.Cache.Clear(restic.DataFile, packs)
if err != nil {
fmt.Fprintf(os.Stderr, "error clearing data files in cache: %v\n", err)
}
treePacks := restic.NewIDSet()
for _, idx := range r.idx.All() {
for _, id := range idx.TreePacks() {
treePacks.Insert(id)
}
}
// use readahead
cache := r.Cache.(*cache.Cache)
cache.PerformReadahead = func(h restic.Handle) bool {
if h.Type != restic.DataFile {
return false
}
id, err := restic.ParseID(h.Name)
if err != nil {
return false
}
return treePacks.Has(id)
}
}
if err := <-errCh; err != nil { if err := <-errCh; err != nil {
return err return err
} }

37
internal/restic/cache.go Normal file
View File

@ -0,0 +1,37 @@
package restic
import "io"
// Cache manages a local cache.
type Cache interface {
// BaseDir returns the base directory of the cache.
BaseDir() string
// Wrap returns a backend with a cache.
Wrap(Backend) Backend
// IsNotExist returns true if the error was caused by a non-existing file.
IsNotExist(err error) bool
// Load returns a reader that yields the contents of the file with the
// given id if it is cached. rd must be closed after use. If an error is
// returned, the ReadCloser is nil. The files are still encrypted
Load(h Handle, length int, offset int64) (io.ReadCloser, error)
// SaveIndex saves an index in the cache.
Save(Handle, io.Reader) error
// SaveWriter returns a writer for the to be cached object h. It must be
// closed after writing is finished.
SaveWriter(Handle) (io.WriteCloser, error)
// Remove deletes a single file from the cache. If it isn't cached, this
// functions must return no error.
Remove(Handle) error
// Clear removes all files of type t from the cache that are not contained in the set.
Clear(FileType, IDSet) error
// Has returns true if the file is cached.
Has(Handle) bool
}

View File

@ -5,6 +5,7 @@ import (
"io" "io"
"github.com/restic/restic/internal/debug" "github.com/restic/restic/internal/debug"
"github.com/restic/restic/internal/errors"
) )
type backendReaderAt struct { type backendReaderAt struct {
@ -37,5 +38,5 @@ func ReadAt(ctx context.Context, be Backend, h Handle, offset int64, p []byte) (
debug.Log("ReadAt(%v) ReadFull returned %v bytes", h, n) debug.Log("ReadAt(%v) ReadFull returned %v bytes", h, n)
return n, err return n, errors.Wrapf(err, "ReadFull(%v)", h)
} }