diff --git a/archiver.go b/archiver.go index 0870ca248..8abef33cf 100644 --- a/archiver.go +++ b/archiver.go @@ -1,6 +1,7 @@ package khepri import ( + "errors" "io" "os" "path/filepath" @@ -239,7 +240,22 @@ func (arch *Archiver) SaveFile(node *Node) error { return nil } -func (arch *Archiver) loadTree(dir string) (*Tree, error) { +func (arch *Archiver) loadTree(dir string, oldTreeID backend.ID) (*Tree, error) { + var ( + oldTree Tree + err error + ) + + if oldTreeID != nil { + // load old tree + oldTree, err = LoadTree(arch.ch, oldTreeID) + if err != nil { + return nil, arrar.Annotate(err, "load old tree") + } + + debug("old tree: %v\n", oldTree) + } + // open and list path fd, err := os.Open(dir) defer fd.Close() @@ -252,8 +268,8 @@ func (arch *Archiver) loadTree(dir string) (*Tree, error) { return nil, err } + // build new tree tree := Tree{} - for _, entry := range entries { path := filepath.Join(dir, entry.Name()) @@ -267,14 +283,39 @@ func (arch *Archiver) loadTree(dir string) (*Tree, error) { return nil, err } - tree = append(tree, node) + err = tree.Insert(node) + if err != nil { + return nil, err + } if entry.IsDir() { - node.Tree, err = arch.loadTree(path) + oldSubtree, err := oldTree.Find(node.Name) + if err != nil && err != ErrNodeNotFound { + return nil, err + } + + var oldSubtreeID backend.ID + if err == nil { + oldSubtreeID = oldSubtree.Subtree + } + + node.Tree, err = arch.loadTree(path, oldSubtreeID) if err != nil { return nil, err } } + } + + // populate with content from oldTree + err = tree.PopulateFrom(oldTree) + if err != nil { + return nil, err + } + + for _, node := range tree { + if node.Type == "file" && node.Content != nil { + continue + } switch node.Type { case "file": @@ -292,7 +333,28 @@ func (arch *Archiver) loadTree(dir string) (*Tree, error) { return &tree, nil } -func (arch *Archiver) LoadTree(path string) (*Tree, error) { +func (arch *Archiver) LoadTree(path string, baseSnapshot backend.ID) (*Tree, error) { + var oldTree Tree + + if baseSnapshot != nil { + // load old tree from snapshot + snapshot, err := arch.ch.LoadSnapshot(baseSnapshot) + if err != nil { + return nil, arrar.Annotate(err, "load old snapshot") + } + + if snapshot.Content == nil { + return nil, errors.New("snapshot without tree!") + } + + oldTree, err = LoadTree(arch.ch, snapshot.Content) + if err != nil { + return nil, arrar.Annotate(err, "load old tree") + } + + debug("old tree: %v\n", oldTree) + } + // reset global stats arch.updateStats = Stats{} @@ -307,14 +369,35 @@ func (arch *Archiver) LoadTree(path string) (*Tree, error) { } if node.Type != "dir" { - arch.Stats.Files = 1 - arch.Stats.Bytes = node.Size + t := &Tree{node} + + // compare with old tree + t.PopulateFrom(oldTree) + + // if no old node has been found, update stats + if node.Content == nil && node.Subtree == nil { + arch.Stats.Files = 1 + arch.Stats.Bytes = node.Size + } + arch.update(arch.ScannerStats, arch.Stats) - return &Tree{node}, nil + + return t, nil } arch.Stats.Directories = 1 - node.Tree, err = arch.loadTree(path) + + var oldSubtreeID backend.ID + oldSubtree, err := oldTree.Find(node.Name) + if err != nil && err != ErrNodeNotFound { + return nil, arrar.Annotate(err, "search node in old tree") + } + + if err == nil { + oldSubtreeID = oldSubtree.Subtree + } + + node.Tree, err = arch.loadTree(path, oldSubtreeID) if err != nil { return nil, arrar.Annotate(err, "loadTree()") } @@ -369,11 +452,12 @@ func (arch *Archiver) saveTree(t *Tree) (Blob, error) { return blob, nil } -func (arch *Archiver) Snapshot(dir string, t *Tree) (*Snapshot, backend.ID, error) { +func (arch *Archiver) Snapshot(dir string, t *Tree, parentSnapshot backend.ID) (*Snapshot, backend.ID, error) { // reset global stats arch.updateStats = Stats{} sn := NewSnapshot(dir) + sn.Parent = parentSnapshot blob, err := arch.saveTree(t) if err != nil { diff --git a/backend/local.go b/backend/local.go index dc7752cc1..d266c506c 100644 --- a/backend/local.go +++ b/backend/local.go @@ -236,6 +236,10 @@ func (b *Local) filename(t Type, id ID) string { // Get returns the content stored under the given ID. If the data doesn't match // the requested ID, ErrWrongData is returned. func (b *Local) Get(t Type, id ID) ([]byte, error) { + if id == nil { + return nil, errors.New("unable to load nil ID") + } + // try to open file file, err := os.Open(b.filename(t, id)) defer file.Close() diff --git a/backend/sftp.go b/backend/sftp.go index 704f7c450..a189076b7 100644 --- a/backend/sftp.go +++ b/backend/sftp.go @@ -344,6 +344,10 @@ func (r *SFTP) filename(t Type, id ID) string { // Get returns the content stored under the given ID. If the data doesn't match // the requested ID, ErrWrongData is returned. func (r *SFTP) Get(t Type, id ID) ([]byte, error) { + if id == nil { + return nil, errors.New("unable to load nil ID") + } + // try to open file file, err := r.c.Open(r.filename(t, id)) defer file.Close() diff --git a/cmd/khepri/cmd_backup.go b/cmd/khepri/cmd_backup.go index a7f329dda..3a3c04271 100644 --- a/cmd/khepri/cmd_backup.go +++ b/cmd/khepri/cmd_backup.go @@ -53,11 +53,22 @@ func print_tree2(indent int, t *khepri.Tree) { } func commandBackup(be backend.Server, key *khepri.Key, args []string) error { - if len(args) != 1 { - return errors.New("usage: backup [dir|file]") + if len(args) < 1 || len(args) > 2 { + return errors.New("usage: backup [dir|file] [snapshot-id]") } + var parentSnapshotID backend.ID + var err error + target := args[0] + if len(args) > 1 { + parentSnapshotID, err = backend.FindSnapshot(be, args[1]) + if err != nil { + return fmt.Errorf("invalid id %q: %v", args[1], err) + } + + fmt.Printf("found parent snapshot %v\n", parentSnapshotID) + } arch, err := khepri.NewArchiver(be, key) if err != nil { @@ -89,7 +100,7 @@ func commandBackup(be backend.Server, key *khepri.Key, args []string) error { // return true // } - t, err := arch.LoadTree(target) + t, err := arch.LoadTree(target, parentSnapshotID) if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) return err @@ -144,7 +155,7 @@ func commandBackup(be backend.Server, key *khepri.Key, args []string) error { }(ch) } - sn, id, err := arch.Snapshot(target, t) + sn, id, err := arch.Snapshot(target, t, parentSnapshotID) if err != nil { fmt.Fprintf(os.Stderr, "error: %v\n", err) } diff --git a/cmd/khepri/main.go b/cmd/khepri/main.go index 5df73b881..237f1142c 100644 --- a/cmd/khepri/main.go +++ b/cmd/khepri/main.go @@ -32,7 +32,7 @@ func errx(code int, format string, data ...interface{}) { type commandFunc func(backend.Server, *khepri.Key, []string) error -var commands map[string]commandFunc +var commands = make(map[string]commandFunc) func readPassword(env string, prompt string) string { @@ -126,7 +126,6 @@ func create(u string) (backend.Server, error) { } func init() { - commands = make(map[string]commandFunc) commands["backup"] = commandBackup commands["restore"] = commandRestore commands["list"] = commandList diff --git a/contenthandler.go b/contenthandler.go index e1ab13749..9661a44af 100644 --- a/contenthandler.go +++ b/contenthandler.go @@ -8,6 +8,8 @@ import ( "github.com/fd0/khepri/backend" ) +var ErrWrongData = errors.New("wrong data decrypt, checksum does not match") + type ContentHandler struct { be backend.Server key *Key @@ -173,6 +175,11 @@ func (ch *ContentHandler) Load(t backend.Type, id backend.ID) ([]byte, error) { return nil, errors.New("Invalid length") } + // check SHA256 sum + if !id.Equal(backend.Hash(buf)) { + return nil, ErrWrongData + } + return buf, nil } diff --git a/snapshot.go b/snapshot.go index db6f81f7d..1e592a178 100644 --- a/snapshot.go +++ b/snapshot.go @@ -12,6 +12,7 @@ import ( type Snapshot struct { Time time.Time `json:"time"` + Parent backend.ID `json:"parent,omitempty"` Content backend.ID `json:"content"` Map backend.ID `json:"map"` Dir string `json:"dir"` diff --git a/tree.go b/tree.go index 1635db19a..b5b92e885 100644 --- a/tree.go +++ b/tree.go @@ -1,9 +1,11 @@ package khepri import ( + "errors" "fmt" "os" "os/user" + "sort" "strconv" "strings" "syscall" @@ -31,7 +33,7 @@ type Node struct { Links uint64 `json:"links,omitempty"` LinkTarget string `json:"linktarget,omitempty"` Device uint64 `json:"device,omitempty"` - Content []backend.ID `json:"content,omitempty"` + Content []backend.ID `json:"content"` Subtree backend.ID `json:"subtree,omitempty"` Tree *Tree `json:"-"` @@ -39,6 +41,11 @@ type Node struct { path string } +var ( + ErrNodeNotFound = errors.New("named node not found") + ErrNodeAlreadyInTree = errors.New("node already present") +) + type Blob struct { ID backend.ID `json:"id,omitempty"` Size uint64 `json:"size,omitempty"` @@ -69,6 +76,79 @@ func (t Tree) String() string { return strings.Join(s, "\n") } +func LoadTree(ch *ContentHandler, id backend.ID) (Tree, error) { + if id == nil { + return nil, nil + } + + tree := Tree{} + err := ch.LoadJSON(backend.Tree, id, &tree) + if err != nil { + return nil, err + } + + return tree, nil +} + +// PopulateFrom copies subtrees and content from other when it hasn't changed. +func (t Tree) PopulateFrom(other Tree) error { + for _, node := range t { + // only copy entries for files + if node.Type != "file" { + continue + } + + // find entry in other tree + oldNode, err := other.Find(node.Name) + + // if the node could not be found, proceed to the next + if err == ErrNodeNotFound { + continue + } + + // compare content + if node.SameContent(oldNode) { + // copy Content + node.Content = oldNode.Content + } + } + + return nil +} + +func (t *Tree) Insert(node *Node) error { + pos, _, err := t.find(node.Name) + if err == nil { + // already present + return ErrNodeAlreadyInTree + } + + // insert blob + // https://code.google.com/p/go-wiki/wiki/bliceTricks + *t = append(*t, &Node{}) + copy((*t)[pos+1:], (*t)[pos:]) + (*t)[pos] = node + + return nil +} + +func (t Tree) find(name string) (int, *Node, error) { + pos := sort.Search(len(t), func(i int) bool { + return t[i].Name >= name + }) + + if pos < len(t) && t[pos].Name == name { + return pos, t[pos], nil + } + + return pos, nil, ErrNodeNotFound +} + +func (t Tree) Find(name string) (*Node, error) { + _, node, err := t.find(name) + return node, err +} + func (node *Node) fill_extra(path string, fi os.FileInfo) (err error) { stat, ok := fi.Sys().(*syscall.Stat_t) if !ok { @@ -265,6 +345,28 @@ func (node *Node) CreateAt(ch *ContentHandler, path string) error { return nil } +func (node Node) SameContent(olderNode *Node) bool { + // if this node has a type other than "file", treat as if content has changed + if node.Type != "file" { + return false + } + + // if the name or type has changed, this is surely something different + if node.Name != olderNode.Name || node.Type != olderNode.Type { + return false + } + + // if timestamps or inodes differ, content has changed + if node.ModTime != olderNode.ModTime || + node.ChangeTime != olderNode.ChangeTime || + node.Inode != olderNode.Inode { + return false + } + + // otherwise the node is assumed to have the same content + return true +} + func (b Blob) Free() { if b.ID != nil { b.ID.Free()