From 7944e8e323dc79a20ba5b5da437f7f396df7b315 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Sun, 26 Jul 2015 00:40:00 +0200 Subject: [PATCH] Update index format --- doc/Design.md | 7 ++- repository/index.go | 49 ++++++++++++++++++--- repository/index_test.go | 94 +++++++++++++++++++++++++++++++--------- repository/repository.go | 2 +- 4 files changed, 120 insertions(+), 32 deletions(-) diff --git a/doc/Design.md b/doc/Design.md index 0a925202f..c78f4d94e 100644 --- a/doc/Design.md +++ b/doc/Design.md @@ -164,7 +164,7 @@ Data and Tree Blobs, so the outer structure is `IV || Ciphertext || MAC` again. The plaintext consists of a JSON document like the following: { - "obsolete": [ + "supersedes": [ "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" ], "packs": [ @@ -197,10 +197,9 @@ This JSON document lists Packs and the blobs contained therein. In this example, the Pack `73d04e61` contains two data Blobs and one Tree blob, the plaintext hashes are listed afterwards. -The field `obsolete` lists the storage IDs of index files that have been +The field `supersedes` lists the storage IDs of index files that have been replaced with the current index file. This happens when index files are -repacked, this happens for example when old snapshots are removed and Packs are -recombined. +repacked, for example when old snapshots are removed and Packs are recombined. There may be an arbitrary number of index files, containing information on non-disjoint sets of Packs. The number of packs described in a single file is diff --git a/repository/index.go b/repository/index.go index 863388b60..ca81d545c 100644 --- a/repository/index.go +++ b/repository/index.go @@ -256,8 +256,15 @@ func (idx *Index) generatePackList(selectFn func(indexEntry) bool) ([]*packJSON, return list, nil } +type jsonIndex struct { + Supersedes []backend.ID `json:"supersedes,omitempty"` + Packs []*packJSON `json:"packs"` +} + +type jsonOldIndex []*packJSON + // encode writes the JSON serialization of the index filtered by selectFn to enc. -func (idx *Index) encode(w io.Writer, selectFn func(indexEntry) bool) error { +func (idx *Index) encode(w io.Writer, supersedes []backend.ID, selectFn func(indexEntry) bool) error { list, err := idx.generatePackList(func(entry indexEntry) bool { return !entry.old }) @@ -268,7 +275,11 @@ func (idx *Index) encode(w io.Writer, selectFn func(indexEntry) bool) error { debug.Log("Index.Encode", "done") enc := json.NewEncoder(w) - return enc.Encode(list) + idxJSON := jsonIndex{ + Supersedes: supersedes, + Packs: list, + } + return enc.Encode(idxJSON) } // Encode writes the JSON serialization of the index to the writer w. This @@ -279,7 +290,7 @@ func (idx *Index) Encode(w io.Writer) error { idx.m.Lock() defer idx.m.Unlock() - return idx.encode(w, func(e indexEntry) bool { return !e.old }) + return idx.encode(w, nil, func(e indexEntry) bool { return !e.old }) } // Dump writes the pretty-printed JSON representation of the index to w. @@ -309,14 +320,38 @@ func (idx *Index) Dump(w io.Writer) error { } // DecodeIndex loads and unserializes an index from rd. -func DecodeIndex(rd io.Reader) (*Index, error) { +func DecodeIndex(rd io.Reader) (*Index, backend.IDs, error) { debug.Log("Index.DecodeIndex", "Start decoding index") + idxJSON := jsonIndex{} + + dec := json.NewDecoder(rd) + err := dec.Decode(&idxJSON) + if err != nil { + debug.Log("Index.DecodeIndex", "Error %#v", err) + return nil, nil, err + } + + idx := NewIndex() + for _, pack := range idxJSON.Packs { + for _, blob := range pack.Blobs { + idx.store(blob.Type, blob.ID, &pack.ID, blob.Offset, blob.Length, true) + } + } + + debug.Log("Index.DecodeIndex", "done") + return idx, idxJSON.Supersedes, err +} + +// DecodeOldIndex loads and unserializes an index in the old format from rd. +func DecodeOldIndex(rd io.Reader) (*Index, backend.IDs, error) { + debug.Log("Index.DecodeOldIndex", "Start decoding old index") list := []*packJSON{} dec := json.NewDecoder(rd) err := dec.Decode(&list) if err != nil { - return nil, err + debug.Log("Index.DecodeOldIndex", "Error %#v", err) + return nil, nil, err } idx := NewIndex() @@ -326,6 +361,6 @@ func DecodeIndex(rd io.Reader) (*Index, error) { } } - debug.Log("Index.DecodeIndex", "done") - return idx, err + debug.Log("Index.DecodeOldIndex", "done") + return idx, backend.IDs{}, err } diff --git a/repository/index_test.go b/repository/index_test.go index 668acebb9..66b7af41c 100644 --- a/repository/index_test.go +++ b/repository/index_test.go @@ -58,7 +58,7 @@ func TestIndexSerialize(t *testing.T) { err := idx.Encode(wr) OK(t, err) - idx2, err := repository.DecodeIndex(wr) + idx2, _, err := repository.DecodeIndex(wr) OK(t, err) Assert(t, idx2 != nil, "nil returned for decoded index") @@ -113,7 +113,7 @@ func TestIndexSerialize(t *testing.T) { err = idx2.Encode(wr3) OK(t, err) - idx3, err := repository.DecodeIndex(wr3) + idx3, _, err := repository.DecodeIndex(wr3) OK(t, err) Assert(t, idx3 != nil, "nil returned for decoded index") @@ -165,26 +165,58 @@ func TestIndexSize(t *testing.T) { // example index serialization from doc/Design.md var docExample = []byte(` +{ + "supersedes": [ + "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452" + ], + "packs": [ + { + "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", + "blobs": [ + { + "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", + "type": "data", + "offset": 0, + "length": 25 + },{ + "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", + "type": "tree", + "offset": 38, + "length": 100 + }, + { + "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", + "type": "data", + "offset": 150, + "length": 123 + } + ] + } + ] +} +`) + +var docOldExample = []byte(` [ { "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c", "blobs": [ - { - "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", - "type": "data", - "offset": 0, - "length": 25 - },{ - "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", - "type": "tree", - "offset": 38, - "length": 100 - }, - { - "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", - "type": "data", - "offset": 150, - "length": 123 - } + { + "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce", + "type": "data", + "offset": 0, + "length": 25 + },{ + "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae", + "type": "tree", + "offset": 38, + "length": 100 + }, + { + "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66", + "type": "data", + "offset": 150, + "length": 123 + } ] } ] `) @@ -210,7 +242,9 @@ var exampleTests = []struct { } func TestIndexUnserialize(t *testing.T) { - idx, err := repository.DecodeIndex(bytes.NewReader(docExample)) + oldIdx := backend.IDs{ParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")} + + idx, supersedes, err := repository.DecodeIndex(bytes.NewReader(docExample)) OK(t, err) for _, test := range exampleTests { @@ -222,6 +256,26 @@ func TestIndexUnserialize(t *testing.T) { Equals(t, test.offset, offset) Equals(t, test.length, length) } + + Equals(t, oldIdx, supersedes) +} + +func TestIndexUnserializeOld(t *testing.T) { + idx, supersedes, err := repository.DecodeOldIndex(bytes.NewReader(docOldExample)) + OK(t, err) + + for _, test := range exampleTests { + packID, tpe, offset, length, err := idx.Lookup(test.id) + OK(t, err) + + Equals(t, test.packID, *packID) + Equals(t, test.tpe, tpe) + Equals(t, test.offset, offset) + Equals(t, test.length, length) + } + + Assert(t, len(supersedes) == 0, + "expected %v supersedes, got %v", 0, len(supersedes)) } func TestStoreOverwritesPreliminaryEntry(t *testing.T) { diff --git a/repository/repository.go b/repository/repository.go index 57373cd2e..6263ce448 100644 --- a/repository/repository.go +++ b/repository/repository.go @@ -555,7 +555,7 @@ func LoadIndex(repo *Repository, id string) (*Index, error) { return nil, err } - idx, err := DecodeIndex(decryptRd) + idx, _, err := DecodeIndex(decryptRd) if err != nil { debug.Log("LoadIndex", "error while decoding index %v: %v", id, err) return nil, err