From e543f5926c7ea63c561847862acf798464c7fece Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 4 Jan 2015 22:39:30 +0100 Subject: [PATCH] Refactor Archiver and backup command Improve incremental backup by first scanning the tree, loading the old tree and afterwards comparing both trees in memory. --- archiver.go | 31 ++++++++++--- cmd/restic/cmd_backup.go | 43 +++++++++++++----- contenthandler.go | 5 +++ progress.go | 6 +-- scanner.go | 6 +-- tree.go | 94 ++++++++++++++++++++++++++++++++++------ 6 files changed, 149 insertions(+), 36 deletions(-) diff --git a/archiver.go b/archiver.go index c3d1b8e54..27418645b 100644 --- a/archiver.go +++ b/archiver.go @@ -1,6 +1,7 @@ package restic import ( + "errors" "fmt" "io" "os" @@ -58,6 +59,7 @@ func NewArchiver(s Server, p *Progress) (*Archiver, error) { arch.ch = NewContentHandler(s) // load all blobs from all snapshots + // TODO: only use bloblist from old snapshot if available err = arch.ch.LoadAllMaps() if err != nil { return nil, err @@ -96,7 +98,28 @@ func (arch *Archiver) SaveFile(node *Node) error { file, err := os.Open(node.path) defer file.Close() if err != nil { - return arrar.Annotate(err, "SaveFile()") + return arrar.Annotatef(err, "SaveFile(%v)", node.path) + } + + // check file again + fi, err := file.Stat() + if err != nil { + return err + } + + if fi.ModTime() != node.ModTime { + e2 := arch.Error(node.path, fi, errors.New("file changed as we read it\n")) + + if e2 == nil { + // create new node + n, err := NodeFromFileInfo(node.path, fi) + if err != nil { + return err + } + + // copy node + *node = *n + } } var blobs Blobs @@ -203,8 +226,8 @@ func (arch *Archiver) saveTree(t *Tree) (Blob, error) { var wg sync.WaitGroup for _, node := range *t { - if node.Tree != nil && node.Subtree == nil { - b, err := arch.saveTree(node.Tree) + if node.tree != nil && node.Subtree == nil { + b, err := arch.saveTree(node.tree) if err != nil { return Blob{}, err } @@ -229,8 +252,6 @@ func (arch *Archiver) saveTree(t *Tree) (Blob, error) { } arch.p.Report(Stat{Files: 1}) }(node) - } else { - arch.p.Report(Stat{Other: 1}) } } diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index ebcc5c0fe..290938c26 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -3,6 +3,7 @@ package main import ( "fmt" "os" + "path/filepath" "strings" "time" @@ -59,9 +60,9 @@ func format_duration(d time.Duration) string { func print_tree2(indent int, t *restic.Tree) { for _, node := range *t { - if node.Tree != nil { + if node.Tree() != nil { fmt.Printf("%s%s/\n", strings.Repeat(" ", indent), node.Name) - print_tree2(indent+1, node.Tree) + print_tree2(indent+1, node.Tree()) } else { fmt.Printf("%s%s\n", strings.Repeat(" ", indent), node.Name) } @@ -94,7 +95,7 @@ func (cmd CmdBackup) Execute(args []string) error { fmt.Printf("found parent snapshot %v\n", parentSnapshotID) } - fmt.Printf("scanning %s\n", target) + fmt.Printf("scan %s\n", target) scanProgress := restic.NewProgress(time.Second) if terminal.IsTerminal(int(os.Stdout.Fd())) { @@ -113,29 +114,51 @@ func (cmd CmdBackup) Execute(args []string) error { sc := restic.NewScanner(scanProgress) - t, err := sc.Scan(target) + newTree, err := sc.Scan(target) if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) return err } + if parentSnapshotID != nil { + fmt.Printf("load old snapshot\n") + ch := restic.NewContentHandler(s) + sn, err := ch.LoadSnapshot(parentSnapshotID) + if err != nil { + return err + } + + oldTree, err := restic.LoadTreeRecursive(filepath.Dir(sn.Dir), ch, sn.Tree) + if err != nil { + return err + } + + newTree.CopyFrom(oldTree) + } + archiveProgress := restic.NewProgress(time.Second) - targetStat := scanProgress.Current() + targetStat := newTree.StatTodo() if terminal.IsTerminal(int(os.Stdout.Fd())) { var bps, eta uint64 + itemsTodo := targetStat.Files + targetStat.Dirs + archiveProgress.F = func(s restic.Stat, d time.Duration, ticker bool) { sec := uint64(d / time.Second) - if sec > 0 && ticker { + if targetStat.Bytes > 0 && sec > 0 && ticker { bps = s.Bytes / sec - eta = (targetStat.Bytes - s.Bytes) / bps + if bps > 0 { + eta = (targetStat.Bytes - s.Bytes) / bps + } } - fmt.Printf("\x1b[2K\r[%s] %3.2f%% %s/s %s / %s ETA %s", + itemsDone := s.Files + s.Dirs + fmt.Printf("\x1b[2K\r[%s] %3.2f%% %s/s %s / %s %d / %d items ETA %s", format_duration(d), float64(s.Bytes)/float64(targetStat.Bytes)*100, format_bytes(bps), format_bytes(s.Bytes), format_bytes(targetStat.Bytes), + itemsDone, itemsTodo, format_seconds(eta)) } @@ -154,11 +177,11 @@ func (cmd CmdBackup) Execute(args []string) error { arch.Error = func(dir string, fi os.FileInfo, err error) error { // TODO: make ignoring errors configurable - fmt.Fprintf(os.Stderr, "\nerror for %s: %v\n%v\n", dir, err, fi) + fmt.Fprintf(os.Stderr, "\nerror for %s: %v\n", dir, err) return nil } - _, id, err := arch.Snapshot(target, t, parentSnapshotID) + _, id, err := arch.Snapshot(target, newTree, parentSnapshotID) if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) } diff --git a/contenthandler.go b/contenthandler.go index 202ae9049..7381178e8 100644 --- a/contenthandler.go +++ b/contenthandler.go @@ -243,3 +243,8 @@ func (ch *ContentHandler) Test(t backend.Type, id backend.ID) (bool, error) { return ch.s.Test(t, id) } + +// BlobList returns the current BlobList. +func (ch *ContentHandler) BlobList() *BlobList { + return ch.bl +} diff --git a/progress.go b/progress.go index 8bb427808..086b06a5c 100644 --- a/progress.go +++ b/progress.go @@ -25,7 +25,6 @@ type Progress struct { type Stat struct { Files uint64 Dirs uint64 - Other uint64 Bytes uint64 } @@ -162,7 +161,6 @@ func (s *Stat) Add(other Stat) { s.Bytes += other.Bytes s.Dirs += other.Dirs s.Files += other.Files - s.Other += other.Other } func (s Stat) String() string { @@ -182,6 +180,6 @@ func (s Stat) String() string { str = fmt.Sprintf("%dB", s.Bytes) } - return fmt.Sprintf("Stat(%d files, %d dirs, %d other, %v)", - s.Files, s.Dirs, s.Other, str) + return fmt.Sprintf("Stat(%d files, %d dirs, %v)", + s.Files, s.Dirs, str) } diff --git a/scanner.go b/scanner.go index 15d59dd06..9c90465d1 100644 --- a/scanner.go +++ b/scanner.go @@ -66,7 +66,7 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) { if entry.IsDir() { // save all errors in node.err, sort out later - node.Tree, node.err = scan(filterFn, progress, path) + node.tree, node.err = scan(filterFn, progress, path) } } @@ -80,8 +80,6 @@ func scan(filterFn FilterFunc, progress *Progress, dir string) (*Tree, error) { progress.Report(Stat{Files: 1, Bytes: node.Size}) case "dir": progress.Report(Stat{Dirs: 1}) - default: - progress.Report(Stat{Other: 1}) } } @@ -112,7 +110,7 @@ func (sc *Scanner) Scan(path string) (*Tree, error) { sc.p.Report(Stat{Dirs: 1}) - node.Tree, err = scan(sc.Filter, sc.p, path) + node.tree, err = scan(sc.Filter, sc.p, path) if err != nil { return nil, arrar.Annotate(err, "loadTree()") } diff --git a/tree.go b/tree.go index db51404be..64398ad09 100644 --- a/tree.go +++ b/tree.go @@ -6,6 +6,8 @@ import ( "fmt" "os" "os/user" + "path/filepath" + "reflect" "sort" "strconv" "strings" @@ -37,7 +39,7 @@ type Node struct { Content []backend.ID `json:"content"` Subtree backend.ID `json:"subtree,omitempty"` - Tree *Tree `json:"-"` + tree *Tree path string err error @@ -92,11 +94,33 @@ func LoadTree(ch *ContentHandler, id backend.ID) (Tree, error) { return tree, nil } -// PopulateFrom copies subtrees and content from other when it hasn't changed. -func (t Tree) PopulateFrom(other Tree) error { +// LoadTreeRecursive loads the tree and all subtrees via ch. +func LoadTreeRecursive(path string, ch *ContentHandler, id backend.ID) (Tree, error) { + tree, err := LoadTree(ch, id) + if err != nil { + return nil, err + } + + for _, n := range tree { + n.path = filepath.Join(path, n.Name) + if n.Type == "dir" && n.Subtree != nil { + t, err := LoadTreeRecursive(n.path, ch, n.Subtree) + if err != nil { + return nil, err + } + + n.tree = &t + } + } + + return tree, nil +} + +// CopyFrom recursively copies all content from other to t. +func (t Tree) CopyFrom(other Tree) { for _, node := range t { - // only copy entries for files - if node.Type != "file" { + // only process files and dirs + if node.Type != "file" && node.Type != "dir" { continue } @@ -108,14 +132,32 @@ func (t Tree) PopulateFrom(other Tree) error { continue } - // compare content - if node.SameContent(oldNode) { - // copy Content - node.Content = oldNode.Content + if node.Type == "file" { + // compare content + if node.SameContent(oldNode) { + // copy Content + node.Content = oldNode.Content + } + } else { + // fill in all subtrees from old subtree + node.tree.CopyFrom(*oldNode.tree) + + // check if tree has changed + if node.tree.Equals(*oldNode.tree) { + // if nothing has changed, copy subtree ID + node.Subtree = oldNode.Subtree + } } } +} - return nil +// Equals returns true if t and other have exactly the same nodes. +func (t Tree) Equals(other Tree) bool { + if len(t) != len(other) { + return false + } + + return reflect.DeepEqual(t, other) } func (t *Tree) Insert(node *Node) error { @@ -160,15 +202,41 @@ func (t Tree) Stat() Stat { s.Bytes += n.Size case "dir": s.Dirs++ - s.Add(n.Tree.Stat()) - default: - s.Other++ + if n.tree != nil { + s.Add(n.tree.Stat()) + } } } return s } +func (t Tree) StatTodo() Stat { + s := Stat{} + for _, n := range t { + switch n.Type { + case "file": + if n.Content == nil { + s.Files++ + s.Bytes += n.Size + } + case "dir": + if n.Subtree == nil { + s.Dirs++ + if n.tree != nil { + s.Add(n.tree.StatTodo()) + } + } + } + } + + return s +} + +func (node Node) Tree() *Tree { + return node.tree +} + func (node *Node) fill_extra(path string, fi os.FileInfo) (err error) { stat, ok := fi.Sys().(*syscall.Stat_t) if !ok {