From 1944ab13d4c8841e78ef8ef5d244d5ecedf32df6 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Mon, 22 Dec 2014 14:46:54 +0100 Subject: [PATCH] Add dangling blob detection and removal to 'fsck' --- cmd/restic/cmd_fsck.go | 142 +++++++++++++++++++++++++++++++++++------ cmd/restic/cmd_ls.go | 2 +- cmd/restic/main.go | 4 ++ contenthandler.go | 29 +++++++++ 4 files changed, 157 insertions(+), 20 deletions(-) diff --git a/cmd/restic/cmd_fsck.go b/cmd/restic/cmd_fsck.go index ae80f0f2a..44e8b307d 100644 --- a/cmd/restic/cmd_fsck.go +++ b/cmd/restic/cmd_fsck.go @@ -1,13 +1,24 @@ package main import ( + "errors" "fmt" "github.com/restic/restic" "github.com/restic/restic/backend" ) -type CmdFsck struct{} +type CmdFsck struct { + CheckData bool ` long:"check-data" description:"Read data blobs" default:"false"` + Snapshot string `short:"s" long:"snapshot" description:"Only check this snapshot"` + Orphaned bool `short:"o" long:"orphaned" description:"Check for orphaned blobs"` + RemoveOrphaned bool `short:"x" long:"remove-orphaned" description:"Remove orphaned blobs (implies -o)"` + + // lists checking for orphaned blobs + o_data *restic.BlobList + o_trees *restic.BlobList + o_maps *restic.BlobList +} func init() { _, err := parser.AddCommand("fsck", @@ -19,21 +30,45 @@ func init() { } } -func fsckFile(ch *restic.ContentHandler, IDs []backend.ID) error { +func fsckFile(opts CmdFsck, ch *restic.ContentHandler, IDs []backend.ID) error { for _, id := range IDs { debug("checking data blob %v\n", id) - // load content - _, err := ch.Load(backend.Data, id) - if err != nil { - return err + if opts.CheckData { + // load content + _, err := ch.Load(backend.Data, id) + if err != nil { + return err + } + } else { + // test if data blob is there + ok, err := ch.Test(backend.Data, id) + if err != nil { + return err + } + + if !ok { + return fmt.Errorf("data blob %v not found", id) + } + } + + // if orphan check is active, record storage id + if opts.o_data != nil { + // lookup storage ID + sid, err := ch.Lookup(id) + if err != nil { + return err + } + + // add ID to list + opts.o_data.Insert(restic.Blob{ID: sid}) } } return nil } -func fsckTree(ch *restic.ContentHandler, id backend.ID) error { +func fsckTree(opts CmdFsck, ch *restic.ContentHandler, id backend.ID) error { debug("checking tree %v\n", id) tree, err := restic.LoadTree(ch, id) @@ -41,6 +76,18 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error { return err } + // if orphan check is active, record storage id + if opts.o_trees != nil { + // lookup storage ID + sid, err := ch.Lookup(id) + if err != nil { + return err + } + + // add ID to list + opts.o_trees.Insert(restic.Blob{ID: sid}) + } + for i, node := range tree { if node.Name == "" { return fmt.Errorf("node %v of tree %v has no name", i, id) @@ -56,7 +103,7 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error { return fmt.Errorf("file node %q of tree %v has no content", node.Name, id) } - err := fsckFile(ch, node.Content) + err := fsckFile(opts, ch, node.Content) if err != nil { return err } @@ -65,7 +112,7 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error { return fmt.Errorf("dir node %q of tree %v has no subtree", node.Name, id) } - err := fsckTree(ch, node.Subtree) + err := fsckTree(opts, ch, node.Subtree) if err != nil { return err } @@ -75,7 +122,7 @@ func fsckTree(ch *restic.ContentHandler, id backend.ID) error { return nil } -func fsck_snapshot(s restic.Server, id backend.ID) error { +func fsck_snapshot(opts CmdFsck, s restic.Server, id backend.ID) error { debug("checking snapshot %v\n", id) ch, err := restic.NewContentHandler(s) @@ -96,16 +143,25 @@ func fsck_snapshot(s restic.Server, id backend.ID) error { return fmt.Errorf("snapshot %v has no map", sn.ID) } - return fsckTree(ch, sn.Tree) + // if orphan check is active, record storage id for map + if opts.o_maps != nil { + opts.o_maps.Insert(restic.Blob{ID: sn.Map}) + } + + return fsckTree(opts, ch, sn.Tree) } func (cmd CmdFsck) Usage() string { - return "fsck [all|snapshot-ID]" + return "[fsck-options]" } func (cmd CmdFsck) Execute(args []string) error { - if len(args) == 0 { - return fmt.Errorf("type or ID not specified, Usage: %s", cmd.Usage()) + if len(args) != 0 { + return errors.New("fsck has no arguments") + } + + if cmd.RemoveOrphaned && !cmd.Orphaned { + cmd.Orphaned = true } s, err := OpenRepo() @@ -113,27 +169,75 @@ func (cmd CmdFsck) Execute(args []string) error { return err } - if len(args) == 1 && args[0] != "all" { - snapshotID, err := s.FindSnapshot(args[0]) + if cmd.Snapshot != "" { + snapshotID, err := s.FindSnapshot(cmd.Snapshot) if err != nil { - return fmt.Errorf("invalid id %q: %v", args[0], err) + return fmt.Errorf("invalid id %q: %v", cmd.Snapshot, err) } - return fsck_snapshot(s, snapshotID) + return fsck_snapshot(cmd, s, snapshotID) + } + + if cmd.Orphaned { + cmd.o_data = restic.NewBlobList() + cmd.o_trees = restic.NewBlobList() + cmd.o_maps = restic.NewBlobList() } list, err := s.List(backend.Snapshot) + debug("checking %d snapshots\n", len(list)) if err != nil { return err } for _, snapshotID := range list { - err := fsck_snapshot(s, snapshotID) + err := fsck_snapshot(cmd, s, snapshotID) if err != nil { return err } } + if !cmd.Orphaned { + return nil + } + + debug("starting orphaned check\n") + + l := []struct { + desc string + tpe backend.Type + list *restic.BlobList + }{ + {"data blob", backend.Data, cmd.o_data}, + {"tree", backend.Tree, cmd.o_trees}, + {"maps", backend.Map, cmd.o_maps}, + } + + for _, d := range l { + debug("checking for orphaned %v\n", d.desc) + + blobs, err := s.List(d.tpe) + if err != nil { + return err + } + + for _, id := range blobs { + _, err := d.list.Find(restic.Blob{ID: id}) + if err == restic.ErrBlobNotFound { + if !cmd.RemoveOrphaned { + fmt.Printf("orphaned %v %v\n", d.desc, id) + continue + } + + fmt.Printf("removing orphaned %v %v\n", d.desc, id) + err := s.Remove(d.tpe, id) + if err != nil { + return err + } + } + } + } + return nil } diff --git a/cmd/restic/cmd_ls.go b/cmd/restic/cmd_ls.go index 94d2b04bb..a34f6664f 100644 --- a/cmd/restic/cmd_ls.go +++ b/cmd/restic/cmd_ls.go @@ -60,7 +60,7 @@ func print_tree(prefix string, ch *restic.ContentHandler, id backend.ID) error { } func (cmd CmdLs) Usage() string { - return "ls snapshot-ID [DIR]" + return "snapshot-ID [DIR]" } func (cmd CmdLs) Execute(s restic.Server, key *restic.Key, args []string) error { diff --git a/cmd/restic/main.go b/cmd/restic/main.go index 5fec20030..f5c39f5e3 100644 --- a/cmd/restic/main.go +++ b/cmd/restic/main.go @@ -130,6 +130,10 @@ func create(u string) (backend.Backend, error) { } func OpenRepo() (restic.Server, error) { + if opts.Repo == "" { + return restic.Server{}, errors.New("Please specify repository location (-r)") + } + be, err := open(opts.Repo) if err != nil { return restic.Server{}, err diff --git a/contenthandler.go b/contenthandler.go index 16fd7a2ab..cba3dc833 100644 --- a/contenthandler.go +++ b/contenthandler.go @@ -181,6 +181,17 @@ func (ch *ContentHandler) Load(t backend.Type, id backend.ID) ([]byte, error) { return buf, nil } +// Lookup returns the storage ID for the given blob +func (ch *ContentHandler) Lookup(id backend.ID) (backend.ID, error) { + // lookup storage hash + blob, err := ch.bl.Find(Blob{ID: id}) + if err != nil { + return nil, err + } + + return blob.Storage, nil +} + // LoadJSON calls Load() to get content from the backend and afterwards calls // json.Unmarshal on the item. func (ch *ContentHandler) LoadJSON(t backend.Type, id backend.ID, item interface{}) error { @@ -214,3 +225,21 @@ func (ch *ContentHandler) LoadJSONRaw(t backend.Type, id backend.ID, item interf err = json.Unmarshal(backend.Uncompress(buf), item) return err } + +// Test checks if a blob is in the repository. For Data and Tree blobs, the +// storage ID is looked up. +func (ch *ContentHandler) Test(t backend.Type, id backend.ID) (bool, error) { + if t == backend.Data || t == backend.Tree { + // lookup storage id + + // lookup storage hash + blob, err := ch.bl.Find(Blob{ID: id}) + if err != nil { + return false, fmt.Errorf("Storage ID %s not found", id) + } + + id = blob.Storage + } + + return ch.s.Test(t, id) +}