diff --git a/internal/archiver/archiver.go b/internal/archiver/archiver.go index 247d922de..3dae576d9 100644 --- a/internal/archiver/archiver.go +++ b/internal/archiver/archiver.go @@ -2,7 +2,6 @@ package archiver import ( "context" - "encoding/json" "os" "path" "runtime" @@ -175,17 +174,13 @@ func (arch *Archiver) error(item string, err error) error { // saveTree stores a tree in the repo. It checks the index and the known blobs // before saving anything. -func (arch *Archiver) saveTree(ctx context.Context, t *restic.Tree) (restic.ID, ItemStats, error) { +func (arch *Archiver) saveTree(ctx context.Context, t *restic.TreeJSONBuilder) (restic.ID, ItemStats, error) { var s ItemStats - buf, err := json.Marshal(t) + buf, err := t.Finalize() if err != nil { - return restic.ID{}, s, errors.Wrap(err, "MarshalJSON") + return restic.ID{}, s, err } - // append a newline so that the data is always consistent (json.Encoder - // adds a newline after each object) - buf = append(buf, '\n') - b := &Buffer{Data: buf} res := arch.blobSaver.Save(ctx, restic.TreeBlob, b) @@ -620,7 +615,11 @@ func (arch *Archiver) SaveTree(ctx context.Context, snPath string, atree *Tree, return nil, err } - id, nodeStats, err := arch.saveTree(ctx, subtree) + tb, err := restic.TreeToBuilder(subtree) + if err != nil { + return nil, err + } + id, nodeStats, err := arch.saveTree(ctx, tb) if err != nil { return nil, err } @@ -834,7 +833,11 @@ func (arch *Archiver) Snapshot(ctx context.Context, targets []string, opts Snaps return errors.New("snapshot is empty") } - rootTreeID, stats, err = arch.saveTree(wgCtx, tree) + tb, err := restic.TreeToBuilder(tree) + if err != nil { + return err + } + rootTreeID, stats, err = arch.saveTree(wgCtx, tb) arch.stopWorkers() return err }) diff --git a/internal/archiver/tree_saver.go b/internal/archiver/tree_saver.go index 221df85e1..5aab09b94 100644 --- a/internal/archiver/tree_saver.go +++ b/internal/archiver/tree_saver.go @@ -10,7 +10,7 @@ import ( // TreeSaver concurrently saves incoming trees to the repo. type TreeSaver struct { - saveTree func(context.Context, *restic.Tree) (restic.ID, ItemStats, error) + saveTree func(context.Context, *restic.TreeJSONBuilder) (restic.ID, ItemStats, error) errFn ErrorFunc ch chan<- saveTreeJob @@ -18,7 +18,7 @@ type TreeSaver struct { // NewTreeSaver returns a new tree saver. A worker pool with treeWorkers is // started, it is stopped when ctx is cancelled. -func NewTreeSaver(ctx context.Context, wg *errgroup.Group, treeWorkers uint, saveTree func(context.Context, *restic.Tree) (restic.ID, ItemStats, error), errFn ErrorFunc) *TreeSaver { +func NewTreeSaver(ctx context.Context, wg *errgroup.Group, treeWorkers uint, saveTree func(context.Context, *restic.TreeJSONBuilder) (restic.ID, ItemStats, error), errFn ErrorFunc) *TreeSaver { ch := make(chan saveTreeJob) s := &TreeSaver{ @@ -78,7 +78,7 @@ func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, I // allow GC of nodes array once the loop is finished job.nodes = nil - tree := restic.NewTree(len(nodes)) + builder := restic.NewTreeJSONBuilder() for i, fn := range nodes { // fn is a copy, so clear the original value explicitly @@ -104,13 +104,13 @@ func (s *TreeSaver) save(ctx context.Context, job *saveTreeJob) (*restic.Node, I } debug.Log("insert %v", fnr.node.Name) - err := tree.Insert(fnr.node) + err := builder.AddNode(fnr.node) if err != nil { return nil, stats, err } } - id, treeStats, err := s.saveTree(ctx, tree) + id, treeStats, err := s.saveTree(ctx, builder) stats.Add(treeStats) if err != nil { return nil, stats, err diff --git a/internal/archiver/tree_saver_test.go b/internal/archiver/tree_saver_test.go index 7a152ff0c..36e585ae1 100644 --- a/internal/archiver/tree_saver_test.go +++ b/internal/archiver/tree_saver_test.go @@ -18,7 +18,7 @@ func TestTreeSaver(t *testing.T) { wg, ctx := errgroup.WithContext(ctx) - saveFn := func(context.Context, *restic.Tree) (restic.ID, ItemStats, error) { + saveFn := func(context.Context, *restic.TreeJSONBuilder) (restic.ID, ItemStats, error) { return restic.NewRandomID(), ItemStats{TreeBlobs: 1, TreeSize: 123}, nil } @@ -73,7 +73,7 @@ func TestTreeSaverError(t *testing.T) { wg, ctx := errgroup.WithContext(ctx) var num int32 - saveFn := func(context.Context, *restic.Tree) (restic.ID, ItemStats, error) { + saveFn := func(context.Context, *restic.TreeJSONBuilder) (restic.ID, ItemStats, error) { val := atomic.AddInt32(&num, 1) if val == test.failAt { t.Logf("sending error for request %v\n", test.failAt) diff --git a/internal/restic/tree.go b/internal/restic/tree.go index 33d1ec577..d1264074c 100644 --- a/internal/restic/tree.go +++ b/internal/restic/tree.go @@ -1,6 +1,7 @@ package restic import ( + "bytes" "context" "encoding/json" "fmt" @@ -143,3 +144,52 @@ func SaveTree(ctx context.Context, r BlobSaver, t *Tree) (ID, error) { id, _, _, err := r.SaveBlob(ctx, TreeBlob, buf, ID{}, false) return id, err } + +type TreeJSONBuilder struct { + buf bytes.Buffer + lastName string +} + +func NewTreeJSONBuilder() *TreeJSONBuilder { + tb := &TreeJSONBuilder{} + _, _ = tb.buf.WriteString(`{"nodes":[`) + return tb +} + +func (builder *TreeJSONBuilder) AddNode(node *Node) error { + if node.Name <= builder.lastName { + return errors.Errorf("nodes are not ordered got %q, last %q", node.Name, builder.lastName) + } + if builder.lastName != "" { + _ = builder.buf.WriteByte(',') + } + builder.lastName = node.Name + + val, err := json.Marshal(node) + if err != nil { + return err + } + _, _ = builder.buf.Write(val) + return nil +} + +func (builder *TreeJSONBuilder) Finalize() ([]byte, error) { + // append a newline so that the data is always consistent (json.Encoder + // adds a newline after each object) + _, _ = builder.buf.WriteString("]}\n") + buf := builder.buf.Bytes() + // drop reference to buffer + builder.buf = bytes.Buffer{} + return buf, nil +} + +func TreeToBuilder(t *Tree) (*TreeJSONBuilder, error) { + builder := NewTreeJSONBuilder() + for _, node := range t.Nodes { + err := builder.AddNode(node) + if err != nil { + return nil, err + } + } + return builder, nil +} diff --git a/internal/restic/tree_test.go b/internal/restic/tree_test.go index 3ed3e7938..811f0c6c6 100644 --- a/internal/restic/tree_test.go +++ b/internal/restic/tree_test.go @@ -119,6 +119,37 @@ func TestEmptyLoadTree(t *testing.T) { tree, tree2) } +func TestTreeEqualSerialization(t *testing.T) { + files := []string{"node.go", "tree.go", "tree_test.go"} + for i := 1; i <= len(files); i++ { + tree := restic.NewTree(i) + builder := restic.NewTreeJSONBuilder() + + for _, fn := range files[:i] { + fi, err := os.Lstat(fn) + rtest.OK(t, err) + node, err := restic.NodeFromFileInfo(fn, fi) + rtest.OK(t, err) + + rtest.OK(t, tree.Insert(node)) + rtest.OK(t, builder.AddNode(node)) + + rtest.Assert(t, tree.Insert(node) != nil, "no error on duplicate node") + rtest.Assert(t, builder.AddNode(node) != nil, "no error on duplicate node") + } + + treeBytes, err := json.Marshal(tree) + treeBytes = append(treeBytes, '\n') + rtest.OK(t, err) + + stiBytes, err := builder.Finalize() + rtest.OK(t, err) + + // compare serialization of an individual node and the SaveTreeIterator + rtest.Equals(t, treeBytes, stiBytes) + } +} + func BenchmarkBuildTree(b *testing.B) { const size = 100 // Directories of this size are not uncommon.