Merge pull request #3666 from MichaelEischer/compression

Implement compression support
2024-12-22 10:58:55 +00:00 · 2022-04-30 11:49:05 +02:00 · 2022-04-30 11:49:05 +02:00 · ac9324aeaf
commit ac9324aeaf
parent 4e1ef7804a dc5adef255
28 changed files with 899 additions and 284 deletions
--- a/changelog/unreleased/issue-21
+++ b/changelog/unreleased/issue-21
@ -0,0 +1,21 @@
+Enhancement: Add comppression support
+
+We have added compression support to the restic repository format. To create a
+repository using the new format run `init --repository-version 2`. Please note
+that the repository cannot be read by restic versions prior to 0.14.0.
+
+You can configure if data is compressed with the option `--compression`. It can
+be set to `auto` (the default, which will compress very fast), `max` (which
+will trade backup speed and CPU usage for better compression), or `off` (which
+disables compression). Each setting is only applied for the single run of restic.
+
+The new format version has not received much testing yet. Do not rely on it as
+your only backup copy! Please run `check` in regular intervals to detect any
+problems.
+
+Upgrading in place is not yet supported. As a workaround, first create a new
+repository using `init --repository-version 2 --copy-chunker-params --repo2 path/to/old/repo`.
+Then use the `copy` command to copy all snapshots to the new repository.
+
+https://github.com/restic/restic/issues/21
+https://github.com/restic/restic/pull/3666
--- a/cmd/restic/cmd_debug.go
+++ b/cmd/restic/cmd_debug.go
@ -15,6 +15,7 @@ import (
 	"sort"
 	"time"

+	"github.com/klauspost/compress/zstd"
 	"github.com/spf13/cobra"
 	"golang.org/x/sync/errgroup"

@ -309,6 +310,10 @@ func decryptUnsigned(ctx context.Context, k *crypto.Key, buf []byte) []byte {
 }

 func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list []restic.Blob) error {
+	dec, err := zstd.NewReader(nil)
+	if err != nil {
+		panic(err)
+	}
 	be := repo.Backend()
 	h := restic.Handle{
 		Name: pack.String(),
@ -333,44 +338,47 @@ func loadBlobs(ctx context.Context, repo restic.Repository, pack restic.ID, list

 		nonce, plaintext := buf[:key.NonceSize()], buf[key.NonceSize():]
 		plaintext, err = key.Open(plaintext[:0], nonce, plaintext, nil)
+		outputPrefix := ""
+		filePrefix := ""
 		if err != nil {
 			Warnf("error decrypting blob: %v\n", err)
-			var plain []byte
 			if tryRepair || repairByte {
-				plain = tryRepairWithBitflip(ctx, key, buf, repairByte)
+				plaintext = tryRepairWithBitflip(ctx, key, buf, repairByte)
 			}
-			var prefix string
-			if plain != nil {
-				id := restic.Hash(plain)
-				if !id.Equal(blob.ID) {
-					Printf("         repaired blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plain), id, blob.ID)
-					prefix = "repaired-wrong-hash-"
-				} else {
-					Printf("         successfully repaired blob (length %v), hash is %v, ID matches\n", len(plain), id)
-					prefix = "repaired-"
-				}
+			if plaintext != nil {
+				outputPrefix = "repaired "
+				filePrefix = "repaired-"
 			} else {
-				plain = decryptUnsigned(ctx, key, buf)
-				prefix = "damaged-"
+				plaintext = decryptUnsigned(ctx, key, buf)
+				err = storePlainBlob(blob.ID, "damaged-", plaintext)
+				if err != nil {
+					return err
+				}
+				continue
 			}
-			err = storePlainBlob(blob.ID, prefix, plain)
+		}
+
+		if blob.IsCompressed() {
+			decompressed, err := dec.DecodeAll(plaintext, nil)
 			if err != nil {
-				return err
+				Printf("         failed to decompress blob %v\n", blob.ID)
+			}
+			if decompressed != nil {
+				plaintext = decompressed
 			}
-			continue
 		}

 		id := restic.Hash(plaintext)
 		var prefix string
 		if !id.Equal(blob.ID) {
-			Printf("         successfully decrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", len(plaintext), id, blob.ID)
+			Printf("         successfully %vdecrypted blob (length %v), hash is %v, ID does not match, wanted %v\n", outputPrefix, len(plaintext), id, blob.ID)
 			prefix = "wrong-hash-"
 		} else {
-			Printf("         successfully decrypted blob (length %v), hash is %v, ID matches\n", len(plaintext), id)
+			Printf("         successfully %vdecrypted blob (length %v), hash is %v, ID matches\n", outputPrefix, len(plaintext), id)
 			prefix = "correct-"
 		}
 		if extractPack {
-			err = storePlainBlob(id, prefix, plaintext)
+			err = storePlainBlob(id, filePrefix+prefix, plaintext)
 			if err != nil {
 				return err
 			}
@ -476,27 +484,15 @@ func examinePack(ctx context.Context, repo restic.Repository, id restic.ID) erro

 	blobsLoaded := false
 	// examine all data the indexes have for the pack file
-	for _, idx := range repo.Index().(*repository.MasterIndex).All() {
-		idxIDs, err := idx.IDs()
-		if err != nil {
-			idxIDs = restic.IDs{}
-		}
-
-		blobs := idx.ListPack(id)
+	for b := range repo.Index().ListPacks(ctx, restic.NewIDSet(id)) {
+		blobs := b.Blobs
 		if len(blobs) == 0 {
 			continue
 		}

-		Printf("    index %v:\n", idxIDs)
+		checkPackSize(blobs, fi.Size)

-		// convert list of blobs to []restic.Blob
-		var list []restic.Blob
-		for _, b := range blobs {
-			list = append(list, b.Blob)
-		}
-		checkPackSize(list, fi.Size)
-
-		err = loadBlobs(ctx, repo, id, list)
+		err = loadBlobs(ctx, repo, id, blobs)
 		if err != nil {
 			Warnf("error: %v\n", err)
 		} else {
@ -532,14 +528,10 @@ func checkPackSize(blobs []restic.Blob, fileSize int64) {
 		if offset != uint64(pb.Offset) {
 			Printf("      hole in file, want offset %v, got %v\n", offset, pb.Offset)
 		}
-		offset += uint64(pb.Length)
+		offset = uint64(pb.Offset + pb.Length)
 		size += uint64(pb.Length)
 	}
-
-	// compute header size, per blob: 1 byte type, 4 byte length, 32 byte id
-	size += uint64(restic.CiphertextLength(len(blobs) * (1 + 4 + 32)))
-	// length in uint32 little endian
-	size += 4
+	size += uint64(pack.CalculateHeaderSize(blobs))

 	if uint64(fileSize) != size {
 		Printf("      file sizes do not match: computed %v from index, file size is %v\n", size, fileSize)
--- a/cmd/restic/cmd_init.go
+++ b/cmd/restic/cmd_init.go
@ -1,10 +1,13 @@
 package main

 import (
+	"strconv"
+
 	"github.com/restic/chunker"
 	"github.com/restic/restic/internal/backend/location"
 	"github.com/restic/restic/internal/errors"
 	"github.com/restic/restic/internal/repository"
+	"github.com/restic/restic/internal/restic"

 	"github.com/spf13/cobra"
 )
@ -30,6 +33,7 @@ Exit status is 0 if the command was successful, and non-zero if there was any er
 type InitOptions struct {
 	secondaryRepoOptions
 	CopyChunkerParameters bool
+	RepositoryVersion     string
 }

 var initOptions InitOptions
@ -40,9 +44,26 @@ func init() {
 	f := cmdInit.Flags()
 	initSecondaryRepoOptions(f, &initOptions.secondaryRepoOptions, "secondary", "to copy chunker parameters from")
 	f.BoolVar(&initOptions.CopyChunkerParameters, "copy-chunker-params", false, "copy chunker parameters from the secondary repository (useful with the copy command)")
+	f.StringVar(&initOptions.RepositoryVersion, "repository-version", "stable", "repository format version to use, allowed values are a format version, 'latest' and 'stable'")
 }

 func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
+	var version uint
+	if opts.RepositoryVersion == "latest" || opts.RepositoryVersion == "" {
+		version = restic.MaxRepoVersion
+	} else if opts.RepositoryVersion == "stable" {
+		version = restic.StableRepoVersion
+	} else {
+		v, err := strconv.ParseUint(opts.RepositoryVersion, 10, 32)
+		if err != nil {
+			return errors.Fatal("invalid repository version")
+		}
+		version = uint(v)
+	}
+	if version < restic.MinRepoVersion || version > restic.MaxRepoVersion {
+		return errors.Fatalf("only repository versions between %v and %v are allowed", restic.MinRepoVersion, restic.MaxRepoVersion)
+	}
+
 	chunkerPolynomial, err := maybeReadChunkerPolynomial(opts, gopts)
 	if err != nil {
 		return err
@ -65,9 +86,9 @@ func runInit(opts InitOptions, gopts GlobalOptions, args []string) error {
 		return errors.Fatalf("create repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err)
 	}

-	s := repository.New(be)
+	s := repository.New(be, repository.Options{Compression: gopts.Compression})

-	err = s.Init(gopts.ctx, gopts.password, chunkerPolynomial)
+	err = s.Init(gopts.ctx, version, gopts.password, chunkerPolynomial)
 	if err != nil {
 		return errors.Fatalf("create key in repository at %s failed: %v\n", location.StripPassword(gopts.Repo), err)
 	}
--- a/cmd/restic/global.go
+++ b/cmd/restic/global.go
@ -64,6 +64,7 @@ type GlobalOptions struct {
 	InsecureTLS     bool
 	TLSClientCert   string
 	CleanupCache    bool
+	Compression     repository.CompressionMode

 	LimitUploadKb   int
 	LimitDownloadKb int
@ -120,6 +121,7 @@ func init() {
 	f.StringVar(&globalOptions.TLSClientCert, "tls-client-cert", "", "path to a `file` containing PEM encoded TLS client certificate and private key")
 	f.BoolVar(&globalOptions.InsecureTLS, "insecure-tls", false, "skip TLS certificate verification when connecting to the repo (insecure)")
 	f.BoolVar(&globalOptions.CleanupCache, "cleanup-cache", false, "auto remove old cache directories")
+	f.Var(&globalOptions.Compression, "compression", "compression mode (only available for repo format version 2), one of (auto|off|max)")
 	f.IntVar(&globalOptions.LimitUploadKb, "limit-upload", 0, "limits uploads to a maximum rate in KiB/s. (default: unlimited)")
 	f.IntVar(&globalOptions.LimitDownloadKb, "limit-download", 0, "limits downloads to a maximum rate in KiB/s. (default: unlimited)")
 	f.StringSliceVarP(&globalOptions.Options, "option", "o", []string{}, "set extended option (`key=value`, can be specified multiple times)")
@ -435,7 +437,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
 		}
 	}

-	s := repository.New(be)
+	s := repository.New(be, repository.Options{Compression: opts.Compression})

 	passwordTriesLeft := 1
 	if stdinIsTerminal() && opts.password == "" {
@ -471,7 +473,7 @@ func OpenRepository(opts GlobalOptions) (*repository.Repository, error) {
 			id = id[:8]
 		}
 		if !opts.JSON {
-			Verbosef("repository %v opened successfully, password is correct\n", id)
+			Verbosef("repository %v opened (repo version %v) successfully, password is correct\n", id, s.Config().Version)
 		}
 	}

--- a/cmd/restic/integration_test.go
+++ b/cmd/restic/integration_test.go
@ -1470,7 +1470,7 @@ func TestRebuildIndexAlwaysFull(t *testing.T) {
 	defer func() {
 		repository.IndexFull = indexFull
 	}()
-	repository.IndexFull = func(*repository.Index) bool { return true }
+	repository.IndexFull = func(*repository.Index, bool) bool { return true }
 	testRebuildIndex(t, nil)
 }

--- a/doc/030_preparing_a_new_repo.rst
+++ b/doc/030_preparing_a_new_repo.rst
@ -35,6 +35,13 @@ options exist:
 * Configuring a program to be called when the password is needed via the
   option ``--password-command`` or the environment variable
   ``RESTIC_PASSWORD_COMMAND``
+   
+ * The ``init`` command has an option called ``--repository-version`` which can
+   be used to explicitely set the version for the new repository. By default,
+   the current stable version is used. Have a look at the `design documentation
+   <https://github.com/restic/restic/blob/master/doc/design.rst>`__ for
+   details.
+

 Local
 *****
@ -692,4 +699,3 @@ On MSYS2, you can install ``winpty`` as follows:

    $ pacman -S winpty
    $ winpty restic -r /srv/restic-repo init
-
--- a/doc/design.rst
+++ b/doc/design.rst
@ -62,28 +62,30 @@ like the following:
 .. code:: json

    {
-      "version": 1,
+      "version": 2,
      "id": "5956a3f67a6230d4a92cefb29529f10196c7d92582ec305fd71ff6d331d6271b",
      "chunker_polynomial": "25b468838dcb75"
    }

 After decryption, restic first checks that the version field contains a
-version number that it understands, otherwise it aborts. At the moment,
-the version is expected to be 1. The field ``id`` holds a unique ID
-which consists of 32 random bytes, encoded in hexadecimal. This uniquely
-identifies the repository, regardless if it is accessed via SFTP or
-locally. The field ``chunker_polynomial`` contains a parameter that is
-used for splitting large files into smaller chunks (see below).
+version number that it understands, otherwise it aborts. At the moment, the
+version is expected to be 1 or 2. The list of changes in the repository
+format is contained in the section "Changes" below.
+
+The field ``id`` holds a unique ID which consists of 32 random bytes, encoded
+in hexadecimal. This uniquely identifies the repository, regardless if it is
+accessed via a remote storage backend or locally. The field
+``chunker_polynomial`` contains a parameter that is used for splitting large
+files into smaller chunks (see below).

 Repository Layout
 -----------------

 The ``local`` and ``sftp`` backends are implemented using files and
 directories stored in a file system. The directory layout is the same
-for both backend types.
+for both backend types and is also used for all other remote backends.

-The basic layout of a repository stored in a ``local`` or ``sftp``
-backend is shown here:
+The basic layout of a repository is shown here:

 ::

@ -109,8 +111,7 @@ backend is shown here:
    │   └── 22a5af1bdc6e616f8a29579458c49627e01b32210d09adb288d1ecda7c5711ec
    └── tmp

-A local repository can be initialized with the ``restic init`` command,
-e.g.:
+A local repository can be initialized with the ``restic init`` command, e.g.:

 .. code-block:: console

@ -186,40 +187,75 @@ After decryption, a Pack's header consists of the following elements:

 ::

-    Type_Blob1 || Length(EncryptedBlob1) || Hash(Plaintext_Blob1) ||
+    Type_Blob1 || Data_Blob1 ||
    [...]
-    Type_BlobN || Length(EncryptedBlobN) || Hash(Plaintext_Blobn) ||
+    Type_BlobN || Data_BlobN ||
+
+The Blob type field is a single byte. What follows it depends on the type. The
+following Blob types are defined:
+
+-----------+----------------------+-------------------------------------------------------------------------------+
+| Type      | Meaning              |  Data                                                                         |
+===========+======================+===============================================================================+
+| 0b00      | data blob            |  ``Length(encrypted_blob) || Hash(plaintext_blob)``                           |
+-----------+----------------------+-------------------------------------------------------------------------------+
+| 0b01      | tree blob            |  ``Length(encrypted_blob) || Hash(plaintext_blob)``                           |
+-----------+----------------------+-------------------------------------------------------------------------------+
+| 0b10      | compressed data blob |  ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+
+| 0b11      | compressed tree blob |  ``Length(encrypted_blob) || Length(plaintext_blob) || Hash(plaintext_blob)`` |
+-----------+----------------------+-------------------------------------------------------------------------------+

 This is enough to calculate the offsets for all the Blobs in the Pack.
-Length is the length of a Blob as a four byte integer in little-endian
-format. The type field is a one byte field and labels the content of a
-blob according to the following table:
+The length fields are encoded as four byte integers in little-endian
+format. In the Data column, ``Length(plaintext_blob)`` means the length
+of the decrypted and uncompressed data a blob consists of.

-+--------+-----------+
-| Type   | Meaning   |
-+========+===========+
-| 0      | data      |
-+--------+-----------+
-| 1      | tree      |
-+--------+-----------+
+All other types are invalid, more types may be added in the future. The
+compressed types are only valid for repository format version 2. Data and
+tree blobs may be compressed with the zstandard compression algorithm.

-All other types are invalid, more types may be added in the future.
+In repository format version 1, data and tree blobs should be stored in
+separate pack files. In version 2, they must be stored in separate files.
+Compressed and non-compress blobs of the same type may be mixed in a pack
+file.

 For reconstructing the index or parsing a pack without an index, first
 the last four bytes must be read in order to find the length of the
 header. Afterwards, the header can be read and parsed, which yields all
 plaintext hashes, types, offsets and lengths of all included blobs.

+Unpacked Data Format
+====================
+
+Individual files for the index, locks or snapshots are encrypted
+and authenticated like Data and Tree Blobs, so the outer structure is
+``IV || Ciphertext || MAC`` again. In repository format version 1 the
+plaintext always consists of a JSON document which must either be an
+object or an array.
+
+Repository format version 2 adds support for compression. The plaintext
+now starts with a header to indicate the encoding version to distinguish
+it from plain JSON and to allow for further evolution of the storage format:
+``encoding_version || data``
+The ``encoding_version`` field is encoded as one byte.
+For backwards compatibility the encoding versions '[' (0x5b) and '{' (0x7b)
+are used to mark that the whole plaintext (including the encoding version
+byte) should treated as JSON document.
+
+For new data the encoding version is currently always ``2``. For that
+version ``data`` contains a JSON document compressed using the zstandard
+compression algorithm.
+
 Indexing
 ========

 Index files contain information about Data and Tree Blobs and the Packs
 they are contained in and store this information in the repository. When
 the local cached index is not accessible any more, the index files can
-be downloaded and used to reconstruct the index. The files are encrypted
-and authenticated like Data and Tree Blobs, so the outer structure is
-``IV || Ciphertext || MAC`` again. The plaintext consists of a JSON
-document like the following:
+be downloaded and used to reconstruct the index. The file encoding is
+described in the "Unpacked Data Format" section. The plaintext consists
+of a JSON document like the following:

 .. code:: json

@ -235,18 +271,22 @@ document like the following:
              "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
              "type": "data",
              "offset": 0,
-              "length": 25
-            },{
+              "length": 38,
+              // no 'uncompressed_length' as blob is not compressed
+            },
+            {
              "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
              "type": "tree",
              "offset": 38,
-              "length": 100
+              "length": 112,
+              "uncompressed_length": 511,
            },
            {
              "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
              "type": "data",
              "offset": 150,
-              "length": 123
+              "length": 123,
+              "uncompressed_length": 234,
            }
          ]
        }, [...]
@ -255,7 +295,11 @@ document like the following:

 This JSON document lists Packs and the blobs contained therein. In this
 example, the Pack ``73d04e61`` contains two data Blobs and one Tree
-blob, the plaintext hashes are listed afterwards.
+blob, the plaintext hashes are listed afterwards. The ``length`` field
+corresponds to ``Length(encrypted_blob)`` in the pack file header.
+Field ``uncompressed_length`` is only present for compressed blobs and
+therefore is never present in version 1. It is set to the value of
+``Length(blob)``.

 The field ``supersedes`` lists the storage IDs of index files that have
 been replaced with the current index file. This happens when index files
@ -350,8 +394,9 @@ Snapshots

 A snapshot represents a directory with all files and sub-directories at
 a given point in time. For each backup that is made, a new snapshot is
-created. A snapshot is a JSON document that is stored in an encrypted
-file below the directory ``snapshots`` in the repository. The filename
+created. A snapshot is a JSON document that is stored in a file below
+the directory ``snapshots`` in the repository. It uses the file encoding
+described in the "Unpacked Data Format" section. The filename
 is the storage ID. This string is unique and used within restic to
 uniquely identify a snapshot.

@ -412,7 +457,7 @@ Blobs of data. The SHA-256 hashes of all Blobs are saved in an ordered
 list which then represents the content of the file.

 In order to relate these plaintext hashes to the actual location within
-a Pack file , an index is used. If the index is not available, the
+a Pack file, an index is used. If the index is not available, the
 header of all data Blobs can be read.

 Trees and Data
@ -517,8 +562,8 @@ time there must not be any other locks (exclusive and non-exclusive).
 There may be multiple non-exclusive locks in parallel.

 A lock is a file in the subdir ``locks`` whose filename is the storage
-ID of the contents. It is encrypted and authenticated the same way as
-other files in the repository and contains the following JSON structure:
+ID of the contents. It is stored in the file encoding described in the
+"Unpacked Data Format" section and contains the following JSON structure:

 .. code:: json

@ -721,3 +766,11 @@ An adversary who has a leaked (decrypted) key for a repository could:
   only be done using the ``copy`` command, which moves the data into a new
   repository with a new master key, or by making a completely new repository
   and new backup.
+
+Changes
+=======
+
+Repository Version 2
+--------------------
+
+ * Support compression for blobs (data/tree) and index / lock / snapshot files
--- a/go.mod
+++ b/go.mod
@ -21,7 +21,7 @@ require (
 	github.com/hashicorp/golang-lru v0.5.4
 	github.com/json-iterator/go v1.1.12 // indirect
 	github.com/juju/ratelimit v1.0.1
-	github.com/klauspost/compress v1.15.1 // indirect
+	github.com/klauspost/compress v1.15.1
 	github.com/klauspost/cpuid/v2 v2.0.12 // indirect
 	github.com/kurin/blazer v0.5.4-0.20211030221322-ba894c124ac6
 	github.com/minio/md5-simd v1.1.2 // indirect
--- a/internal/archiver/archiver_test.go
+++ b/internal/archiver/archiver_test.go
@ -1894,7 +1894,7 @@ func TestArchiverContextCanceled(t *testing.T) {
 	defer removeTempdir()

 	// Ensure that the archiver itself reports the canceled context and not just the backend
-	repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()})
+	repo, _ := repository.TestRepositoryWithBackend(t, &noCancelBackend{mem.New()}, 0)

 	back := restictest.Chdir(t, tempdir)
 	defer back()
--- a/internal/checker/checker_test.go
+++ b/internal/checker/checker_test.go
@ -350,7 +350,7 @@ func TestCheckerModifiedData(t *testing.T) {
 	t.Logf("archived as %v", sn.ID().Str())

 	beError := &errorBackend{Backend: repo.Backend()}
-	checkRepo := repository.New(beError)
+	checkRepo := repository.New(beError, repository.Options{})
 	test.OK(t, checkRepo.SearchKey(context.TODO(), test.TestPassword, 5, ""))

 	chkr := checker.New(checkRepo, false)
--- a/internal/pack/pack.go
+++ b/internal/pack/pack.go
@ -32,7 +32,7 @@ func NewPacker(k *crypto.Key, wr io.Writer) *Packer {

 // Add saves the data read from rd as a new blob to the packer. Returned is the
 // number of bytes written to the pack.
-func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error) {
+func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte, uncompressedLength int) (int, error) {
 	p.m.Lock()
 	defer p.m.Unlock()

@ -41,13 +41,15 @@ func (p *Packer) Add(t restic.BlobType, id restic.ID, data []byte) (int, error)
 	n, err := p.wr.Write(data)
 	c.Length = uint(n)
 	c.Offset = p.bytes
+	c.UncompressedLength = uint(uncompressedLength)
 	p.bytes += uint(n)
 	p.blobs = append(p.blobs, c)

 	return n, errors.Wrap(err, "Write")
 }

-var entrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))
+var entrySize = uint(binary.Size(restic.BlobType(0)) + 2*headerLengthSize + len(restic.ID{}))
+var plainEntrySize = uint(binary.Size(restic.BlobType(0)) + headerLengthSize + len(restic.ID{}))

 // headerEntry describes the format of header entries. It serves only as
 // documentation.
@ -57,6 +59,15 @@ type headerEntry struct {
 	ID     restic.ID
 }

+// compressedHeaderEntry describes the format of header entries for compressed blobs.
+// It serves only as documentation.
+type compressedHeaderEntry struct {
+	Type               uint8
+	Length             uint32
+	UncompressedLength uint32
+	ID                 restic.ID
+}
+
 // Finalize writes the header for all added blobs and finalizes the pack.
 // Returned are the number of bytes written, including the header.
 func (p *Packer) Finalize() (uint, error) {
@ -70,7 +81,7 @@ func (p *Packer) Finalize() (uint, error) {
 		return 0, err
 	}

-	encryptedHeader := make([]byte, 0, len(header)+p.k.Overhead()+p.k.NonceSize())
+	encryptedHeader := make([]byte, 0, restic.CiphertextLength(len(header)))
 	nonce := crypto.NewRandomNonce()
 	encryptedHeader = append(encryptedHeader, nonce...)
 	encryptedHeader = p.k.Seal(encryptedHeader, nonce, header, nil)
@ -81,7 +92,7 @@ func (p *Packer) Finalize() (uint, error) {
 		return 0, errors.Wrap(err, "Write")
 	}

-	hdrBytes := restic.CiphertextLength(len(header))
+	hdrBytes := len(encryptedHeader)
 	if n != hdrBytes {
 		return 0, errors.New("wrong number of bytes written")
 	}
@ -104,11 +115,15 @@ func (p *Packer) makeHeader() ([]byte, error) {
 	buf := make([]byte, 0, len(p.blobs)*int(entrySize))

 	for _, b := range p.blobs {
-		switch b.Type {
-		case restic.DataBlob:
+		switch {
+		case b.Type == restic.DataBlob && b.UncompressedLength == 0:
 			buf = append(buf, 0)
-		case restic.TreeBlob:
+		case b.Type == restic.TreeBlob && b.UncompressedLength == 0:
 			buf = append(buf, 1)
+		case b.Type == restic.DataBlob && b.UncompressedLength != 0:
+			buf = append(buf, 2)
+		case b.Type == restic.TreeBlob && b.UncompressedLength != 0:
+			buf = append(buf, 3)
 		default:
 			return nil, errors.Errorf("invalid blob type %v", b.Type)
 		}
@ -116,6 +131,10 @@ func (p *Packer) makeHeader() ([]byte, error) {
 		var lenLE [4]byte
 		binary.LittleEndian.PutUint32(lenLE[:], uint32(b.Length))
 		buf = append(buf, lenLE[:]...)
+		if b.UncompressedLength != 0 {
+			binary.LittleEndian.PutUint32(lenLE[:], uint32(b.UncompressedLength))
+			buf = append(buf, lenLE[:]...)
+		}
 		buf = append(buf, b.ID[:]...)
 	}

@ -152,7 +171,7 @@ func (p *Packer) String() string {

 var (
 	// we require at least one entry in the header, and one blob for a pack file
-	minFileSize = entrySize + crypto.Extension + uint(headerLengthSize)
+	minFileSize = plainEntrySize + crypto.Extension + uint(headerLengthSize)
 )

 const (
@ -167,16 +186,11 @@ const (
 	eagerEntries = 15
 )

-// readRecords reads up to max records from the underlying ReaderAt, returning
-// the raw header, the total number of records in the header, and any error.
-// If the header contains fewer than max entries, the header is truncated to
+// readRecords reads up to bufsize bytes from the underlying ReaderAt, returning
+// the raw header, the total number of bytes in the header, and any error.
+// If the header contains fewer than bufsize bytes, the header is truncated to
 // the appropriate size.
-func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
-	var bufsize int
-	bufsize += max * int(entrySize)
-	bufsize += crypto.Extension
-	bufsize += headerLengthSize
-
+func readRecords(rd io.ReaderAt, size int64, bufsize int) ([]byte, int, error) {
 	if bufsize > int(size) {
 		bufsize = int(size)
 	}
@ -197,8 +211,6 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
 		err = InvalidFileError{Message: "header length is zero"}
 	case hlen < crypto.Extension:
 		err = InvalidFileError{Message: "header length is too small"}
-	case (hlen-crypto.Extension)%uint32(entrySize) != 0:
-		err = InvalidFileError{Message: "header length is invalid"}
 	case int64(hlen) > size-int64(headerLengthSize):
 		err = InvalidFileError{Message: "header is larger than file"}
 	case int64(hlen) > MaxHeaderSize-int64(headerLengthSize):
@ -208,8 +220,8 @@ func readRecords(rd io.ReaderAt, size int64, max int) ([]byte, int, error) {
 		return nil, 0, errors.Wrap(err, "readHeader")
 	}

-	total := (int(hlen) - crypto.Extension) / int(entrySize)
-	if total < max {
+	total := int(hlen + headerLengthSize)
+	if total < bufsize {
 		// truncate to the beginning of the pack header
 		b = b[len(b)-int(hlen):]
 	}
@ -230,11 +242,12 @@ func readHeader(rd io.ReaderAt, size int64) ([]byte, error) {
 	// eagerly download eagerEntries header entries as part of header-length request.
 	// only make second request if actual number of entries is greater than eagerEntries

-	b, c, err := readRecords(rd, size, eagerEntries)
+	eagerSize := eagerEntries*int(entrySize) + headerSize
+	b, c, err := readRecords(rd, size, eagerSize)
 	if err != nil {
 		return nil, err
 	}
-	if c <= eagerEntries {
+	if c <= eagerSize {
 		// eager read sufficed, return what we got
 		return b, nil
 	}
@ -262,7 +275,7 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
 		return nil, 0, err
 	}

-	if len(buf) < k.NonceSize()+k.Overhead() {
+	if len(buf) < restic.CiphertextLength(0) {
 		return nil, 0, errors.New("invalid header, too small")
 	}

@ -274,11 +287,12 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr
 		return nil, 0, err
 	}

-	entries = make([]restic.Blob, 0, uint(len(buf))/entrySize)
+	// might over allocate a bit if all blobs have EntrySize but only by a few percent
+	entries = make([]restic.Blob, 0, uint(len(buf))/plainEntrySize)

 	pos := uint(0)
 	for len(buf) > 0 {
-		entry, err := parseHeaderEntry(buf)
+		entry, headerSize, err := parseHeaderEntry(buf)
 		if err != nil {
 			return nil, 0, err
 		}
@ -286,36 +300,60 @@ func List(k *crypto.Key, rd io.ReaderAt, size int64) (entries []restic.Blob, hdr

 		entries = append(entries, entry)
 		pos += entry.Length
-		buf = buf[entrySize:]
+		buf = buf[headerSize:]
 	}

 	return entries, hdrSize, nil
 }

-func parseHeaderEntry(p []byte) (b restic.Blob, err error) {
-	if uint(len(p)) < entrySize {
+func parseHeaderEntry(p []byte) (b restic.Blob, size uint, err error) {
+	l := uint(len(p))
+	size = plainEntrySize
+	if l < plainEntrySize {
 		err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
-		return b, err
+		return b, size, err
 	}
-	p = p[:entrySize]
+	tpe := p[0]

-	switch p[0] {
-	case 0:
+	switch tpe {
+	case 0, 2:
 		b.Type = restic.DataBlob
-	case 1:
+	case 1, 3:
 		b.Type = restic.TreeBlob
 	default:
-		return b, errors.Errorf("invalid type %d", p[0])
+		return b, size, errors.Errorf("invalid type %d", tpe)
 	}

 	b.Length = uint(binary.LittleEndian.Uint32(p[1:5]))
-	copy(b.ID[:], p[5:])
+	p = p[5:]
+	if tpe == 2 || tpe == 3 {
+		size = entrySize
+		if l < entrySize {
+			err = errors.Errorf("parseHeaderEntry: buffer of size %d too short", len(p))
+			return b, size, err
+		}
+		b.UncompressedLength = uint(binary.LittleEndian.Uint32(p[0:4]))
+		p = p[4:]
+	}

-	return b, nil
+	copy(b.ID[:], p[:])
+
+	return b, size, nil
+}
+
+func CalculateEntrySize(blob restic.Blob) int {
+	if blob.UncompressedLength != 0 {
+		return int(entrySize)
+	}
+	return int(plainEntrySize)
 }

 func CalculateHeaderSize(blobs []restic.Blob) int {
-	return headerSize + len(blobs)*int(entrySize)
+	size := headerSize
+	for _, blob := range blobs {
+		size += CalculateEntrySize(blob)
+	}
+	return size
 }

 // Size returns the size of all packs computed by index information.
@ -333,7 +371,7 @@ func Size(ctx context.Context, mi restic.MasterIndex, onlyHdr bool) map[restic.I
 		if !onlyHdr {
 			size += int64(blob.Length)
 		}
-		packSize[blob.PackID] = size + int64(entrySize)
+		packSize[blob.PackID] = size + int64(CalculateEntrySize(blob.Blob))
 	}

 	return packSize
--- a/internal/pack/pack_internal_test.go
+++ b/internal/pack/pack_internal_test.go
@ -13,7 +13,7 @@ import (

 func TestParseHeaderEntry(t *testing.T) {
 	h := headerEntry{
-		Type:   0, // Blob.
+		Type:   0, // Blob
 		Length: 100,
 	}
 	for i := range h.ID {
@ -23,25 +23,58 @@ func TestParseHeaderEntry(t *testing.T) {
 	buf := new(bytes.Buffer)
 	_ = binary.Write(buf, binary.LittleEndian, &h)

-	b, err := parseHeaderEntry(buf.Bytes())
+	b, size, err := parseHeaderEntry(buf.Bytes())
 	rtest.OK(t, err)
 	rtest.Equals(t, restic.DataBlob, b.Type)
+	rtest.Equals(t, plainEntrySize, size)
 	t.Logf("%v %v", h.ID, b.ID)
-	rtest.Assert(t, bytes.Equal(h.ID[:], b.ID[:]), "id mismatch")
+	rtest.Equals(t, h.ID[:], b.ID[:])
 	rtest.Equals(t, uint(h.Length), b.Length)
+	rtest.Equals(t, uint(0), b.UncompressedLength)
+
+	c := compressedHeaderEntry{
+		Type:               2, // compressed Blob
+		Length:             100,
+		UncompressedLength: 200,
+	}
+	for i := range c.ID {
+		c.ID[i] = byte(i)
+	}
+
+	buf = new(bytes.Buffer)
+	_ = binary.Write(buf, binary.LittleEndian, &c)
+
+	b, size, err = parseHeaderEntry(buf.Bytes())
+	rtest.OK(t, err)
+	rtest.Equals(t, restic.DataBlob, b.Type)
+	rtest.Equals(t, entrySize, size)
+	t.Logf("%v %v", c.ID, b.ID)
+	rtest.Equals(t, c.ID[:], b.ID[:])
+	rtest.Equals(t, uint(c.Length), b.Length)
+	rtest.Equals(t, uint(c.UncompressedLength), b.UncompressedLength)
+}
+
+func TestParseHeaderEntryErrors(t *testing.T) {
+	h := headerEntry{
+		Type:   0, // Blob
+		Length: 100,
+	}
+	for i := range h.ID {
+		h.ID[i] = byte(i)
+	}

 	h.Type = 0xae
-	buf.Reset()
+	buf := new(bytes.Buffer)
 	_ = binary.Write(buf, binary.LittleEndian, &h)

-	b, err = parseHeaderEntry(buf.Bytes())
+	_, _, err := parseHeaderEntry(buf.Bytes())
 	rtest.Assert(t, err != nil, "no error for invalid type")

 	h.Type = 0
 	buf.Reset()
 	_ = binary.Write(buf, binary.LittleEndian, &h)

-	b, err = parseHeaderEntry(buf.Bytes()[:entrySize-1])
+	_, _, err = parseHeaderEntry(buf.Bytes()[:plainEntrySize-1])
 	rtest.Assert(t, err != nil, "no error for short input")
 }

@ -97,7 +130,8 @@ func TestReadHeaderEagerLoad(t *testing.T) {
 func TestReadRecords(t *testing.T) {
 	testReadRecords := func(dataSize, entryCount, totalRecords int) {
 		totalHeader := rtest.Random(0, totalRecords*int(entrySize)+crypto.Extension)
-		off := len(totalHeader) - (entryCount*int(entrySize) + crypto.Extension)
+		bufSize := entryCount*int(entrySize) + crypto.Extension
+		off := len(totalHeader) - bufSize
 		if off < 0 {
 			off = 0
 		}
@ -110,10 +144,10 @@ func TestReadRecords(t *testing.T) {

 		rd := bytes.NewReader(buf.Bytes())

-		header, count, err := readRecords(rd, int64(rd.Len()), entryCount)
+		header, count, err := readRecords(rd, int64(rd.Len()), bufSize+4)
 		rtest.OK(t, err)
+		rtest.Equals(t, len(totalHeader)+4, count)
 		rtest.Equals(t, expectedHeader, header)
-		rtest.Equals(t, totalRecords, count)
 	}

 	// basic
--- a/internal/pack/pack_test.go
+++ b/internal/pack/pack_test.go
@ -38,7 +38,7 @@ func newPack(t testing.TB, k *crypto.Key, lengths []int) ([]Buf, []byte, uint) {
 	var buf bytes.Buffer
 	p := pack.NewPacker(k, &buf)
 	for _, b := range bufs {
-		_, err := p.Add(restic.TreeBlob, b.id, b.data)
+		_, err := p.Add(restic.TreeBlob, b.id, b.data, 2*len(b.data))
 		rtest.OK(t, err)
 	}

--- a/internal/repository/index.go
+++ b/internal/repository/index.go
@ -75,12 +75,12 @@ const maxuint32 = 1<<32 - 1

 func (idx *Index) store(packIndex int, blob restic.Blob) {
 	// assert that offset and length fit into uint32!
-	if blob.Offset > maxuint32 || blob.Length > maxuint32 {
+	if blob.Offset > maxuint32 || blob.Length > maxuint32 || blob.UncompressedLength > maxuint32 {
 		panic("offset or length does not fit in uint32. You have packs > 4GB!")
 	}

 	m := &idx.byType[blob.Type]
-	m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length))
+	m.add(blob.ID, packIndex, uint32(blob.Offset), uint32(blob.Length), uint32(blob.UncompressedLength))
 }

 // Final returns true iff the index is already written to the repository, it is
@ -93,12 +93,13 @@ func (idx *Index) Final() bool {
 }

 const (
-	indexMaxBlobs = 50000
-	indexMaxAge   = 10 * time.Minute
+	indexMaxBlobs           = 50000
+	indexMaxBlobsCompressed = 3 * indexMaxBlobs
+	indexMaxAge             = 10 * time.Minute
 )

 // IndexFull returns true iff the index is "full enough" to be saved as a preliminary index.
-var IndexFull = func(idx *Index) bool {
+var IndexFull = func(idx *Index, compress bool) bool {
 	idx.m.Lock()
 	defer idx.m.Unlock()

@ -109,12 +110,18 @@ var IndexFull = func(idx *Index) bool {
 		blobs += idx.byType[typ].len()
 	}
 	age := time.Since(idx.created)
+	var maxBlobs uint
+	if compress {
+		maxBlobs = indexMaxBlobsCompressed
+	} else {
+		maxBlobs = indexMaxBlobs
+	}

 	switch {
 	case age >= indexMaxAge:
 		debug.Log("index %p is old enough", idx, age)
 		return true
-	case blobs >= indexMaxBlobs:
+	case blobs >= maxBlobs:
 		debug.Log("index %p has %d blobs", idx, blobs)
 		return true
 	}
@ -169,8 +176,9 @@ func (idx *Index) toPackedBlob(e *indexEntry, t restic.BlobType) restic.PackedBl
 			BlobHandle: restic.BlobHandle{
 				ID:   e.id,
 				Type: t},
-			Length: uint(e.length),
-			Offset: uint(e.offset),
+			Length:             uint(e.length),
+			Offset:             uint(e.offset),
+			UncompressedLength: uint(e.uncompressedLength),
 		},
 		PackID: idx.packs[e.packIndex],
 	}
@ -225,6 +233,9 @@ func (idx *Index) LookupSize(bh restic.BlobHandle) (plaintextLength uint, found
 	if e == nil {
 		return 0, false
 	}
+	if e.uncompressedLength != 0 {
+		return uint(e.uncompressedLength), true
+	}
 	return uint(restic.PlaintextLength(int(e.length))), true
 }

@ -357,10 +368,11 @@ type packJSON struct {
 }

 type blobJSON struct {
-	ID     restic.ID       `json:"id"`
-	Type   restic.BlobType `json:"type"`
-	Offset uint            `json:"offset"`
-	Length uint            `json:"length"`
+	ID                 restic.ID       `json:"id"`
+	Type               restic.BlobType `json:"type"`
+	Offset             uint            `json:"offset"`
+	Length             uint            `json:"length"`
+	UncompressedLength uint            `json:"uncompressed_length,omitempty"`
 }

 // generatePackList returns a list of packs.
@ -391,10 +403,11 @@ func (idx *Index) generatePackList() ([]*packJSON, error) {

 			// add blob
 			p.Blobs = append(p.Blobs, blobJSON{
-				ID:     e.id,
-				Type:   restic.BlobType(typ),
-				Offset: uint(e.offset),
-				Length: uint(e.length),
+				ID:                 e.id,
+				Type:               restic.BlobType(typ),
+				Offset:             uint(e.offset),
+				Length:             uint(e.length),
+				UncompressedLength: uint(e.uncompressedLength),
 			})

 			return true
@ -553,7 +566,7 @@ func (idx *Index) merge(idx2 *Index) error {
 		m2.foreach(func(e2 *indexEntry) bool {
 			if !hasIdenticalEntry(e2) {
 				// packIndex needs to be changed as idx2.pack was appended to idx.pack, see above
-				m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length)
+				m.add(e2.id, e2.packIndex+packlen, e2.offset, e2.length, e2.uncompressedLength)
 			}
 			return true
 		})
@ -601,8 +614,9 @@ func DecodeIndex(buf []byte, id restic.ID) (idx *Index, oldFormat bool, err erro
 				BlobHandle: restic.BlobHandle{
 					Type: blob.Type,
 					ID:   blob.ID},
-				Offset: blob.Offset,
-				Length: blob.Length,
+				Offset:             blob.Offset,
+				Length:             blob.Length,
+				UncompressedLength: blob.UncompressedLength,
 			})

 			switch blob.Type {
@ -648,6 +662,7 @@ func decodeOldIndex(buf []byte) (idx *Index, err error) {
 					ID:   blob.ID},
 				Offset: blob.Offset,
 				Length: blob.Length,
+				// no compressed length in the old index format
 			})

 			switch blob.Type {
--- a/internal/repository/index_test.go
+++ b/internal/repository/index_test.go
@ -23,11 +23,17 @@ func TestIndexSerialize(t *testing.T) {
 		pos := uint(0)
 		for j := 0; j < 20; j++ {
 			length := uint(i*100 + j)
+			uncompressedLength := uint(0)
+			if i >= 25 {
+				// test a mix of compressed and uncompressed packs
+				uncompressedLength = 2 * length
+			}
 			pb := restic.PackedBlob{
 				Blob: restic.Blob{
-					BlobHandle: restic.NewRandomBlobHandle(),
-					Offset:     pos,
-					Length:     length,
+					BlobHandle:         restic.NewRandomBlobHandle(),
+					Offset:             pos,
+					Length:             length,
+					UncompressedLength: uncompressedLength,
 				},
 				PackID: packID,
 			}
@ -164,7 +170,7 @@ func TestIndexSize(t *testing.T) {
 }

 // example index serialization from doc/Design.rst
-var docExample = []byte(`
+var docExampleV1 = []byte(`
 {
  "supersedes": [
 	"ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
@ -177,12 +183,12 @@ var docExample = []byte(`
 		  "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
 		  "type": "data",
 		  "offset": 0,
-		  "length": 25
+		  "length": 38
 		},{
 		  "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
 		  "type": "tree",
 		  "offset": 38,
-		  "length": 100
+		  "length": 112
 		},
 		{
 		  "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
@ -196,6 +202,41 @@ var docExample = []byte(`
 }
 `)

+var docExampleV2 = []byte(`
+{
+	"supersedes": [
+	  "ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452"
+	],
+	"packs": [
+	  {
+		"id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
+		"blobs": [
+		  {
+			"id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
+			"type": "data",
+			"offset": 0,
+			"length": 38
+		  },
+		  {
+			"id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
+			"type": "tree",
+			"offset": 38,
+			"length": 112,
+			"uncompressed_length": 511
+		  },
+		  {
+			"id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
+			"type": "data",
+			"offset": 150,
+			"length": 123,
+			"uncompressed_length": 234
+		  }
+		]
+	  }
+	]
+  }
+`)
+
 var docOldExample = []byte(`
 [ {
  "id": "73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c",
@ -204,12 +245,12 @@ var docOldExample = []byte(`
 	  "id": "3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce",
 	  "type": "data",
 	  "offset": 0,
-	  "length": 25
+	  "length": 38
 	},{
 	  "id": "9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae",
 	  "type": "tree",
 	  "offset": 38,
-	  "length": 100
+	  "length": 112
 	},
 	{
 	  "id": "d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66",
@ -222,22 +263,23 @@ var docOldExample = []byte(`
 `)

 var exampleTests = []struct {
-	id, packID     restic.ID
-	tpe            restic.BlobType
-	offset, length uint
+	id, packID         restic.ID
+	tpe                restic.BlobType
+	offset, length     uint
+	uncompressedLength uint
 }{
 	{
 		restic.TestParseID("3ec79977ef0cf5de7b08cd12b874cd0f62bbaf7f07f3497a5b1bbcc8cb39b1ce"),
 		restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
-		restic.DataBlob, 0, 25,
+		restic.DataBlob, 0, 38, 0,
 	}, {
 		restic.TestParseID("9ccb846e60d90d4eb915848add7aa7ea1e4bbabfc60e573db9f7bfb2789afbae"),
 		restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
-		restic.TreeBlob, 38, 100,
+		restic.TreeBlob, 38, 112, 511,
 	}, {
 		restic.TestParseID("d3dc577b4ffd38cc4b32122cabf8655a0223ed22edfd93b353dc0c3f2b0fdf66"),
 		restic.TestParseID("73d04e6125cf3c28a299cc2f3cca3b78ceac396e4fcf9575e34536b26782413c"),
-		restic.DataBlob, 150, 123,
+		restic.DataBlob, 150, 123, 234,
 	},
 }

@ -254,41 +296,56 @@ var exampleLookupTest = struct {
 }

 func TestIndexUnserialize(t *testing.T) {
-	oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}
+	for _, task := range []struct {
+		idxBytes []byte
+		version  int
+	}{
+		{docExampleV1, 1},
+		{docExampleV2, 2},
+	} {
+		oldIdx := restic.IDs{restic.TestParseID("ed54ae36197f4745ebc4b54d10e0f623eaaaedd03013eb7ae90df881b7781452")}

-	idx, oldFormat, err := repository.DecodeIndex(docExample, restic.NewRandomID())
-	rtest.OK(t, err)
-	rtest.Assert(t, !oldFormat, "new index format recognized as old format")
+		idx, oldFormat, err := repository.DecodeIndex(task.idxBytes, restic.NewRandomID())
+		rtest.OK(t, err)
+		rtest.Assert(t, !oldFormat, "new index format recognized as old format")

-	for _, test := range exampleTests {
-		list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
-		if len(list) != 1 {
-			t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
+		for _, test := range exampleTests {
+			list := idx.Lookup(restic.BlobHandle{ID: test.id, Type: test.tpe}, nil)
+			if len(list) != 1 {
+				t.Errorf("expected one result for blob %v, got %v: %v", test.id.Str(), len(list), list)
+			}
+			blob := list[0]
+
+			t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
+
+			rtest.Equals(t, test.packID, blob.PackID)
+			rtest.Equals(t, test.tpe, blob.Type)
+			rtest.Equals(t, test.offset, blob.Offset)
+			rtest.Equals(t, test.length, blob.Length)
+			if task.version == 1 {
+				rtest.Equals(t, uint(0), blob.UncompressedLength)
+			} else if task.version == 2 {
+				rtest.Equals(t, test.uncompressedLength, blob.UncompressedLength)
+			} else {
+				t.Fatal("Invalid index version")
+			}
 		}
-		blob := list[0]

-		t.Logf("looking for blob %v/%v, got %v", test.tpe, test.id.Str(), blob)
+		rtest.Equals(t, oldIdx, idx.Supersedes())

-		rtest.Equals(t, test.packID, blob.PackID)
-		rtest.Equals(t, test.tpe, blob.Type)
-		rtest.Equals(t, test.offset, blob.Offset)
-		rtest.Equals(t, test.length, blob.Length)
-	}
-
-	rtest.Equals(t, oldIdx, idx.Supersedes())
-
-	blobs := idx.ListPack(exampleLookupTest.packID)
-	if len(blobs) != len(exampleLookupTest.blobs) {
-		t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
-	}
-
-	for _, blob := range blobs {
-		b, ok := exampleLookupTest.blobs[blob.ID]
-		if !ok {
-			t.Errorf("unexpected blob %v found", blob.ID.Str())
+		blobs := idx.ListPack(exampleLookupTest.packID)
+		if len(blobs) != len(exampleLookupTest.blobs) {
+			t.Fatalf("expected %d blobs in pack, got %d", len(exampleLookupTest.blobs), len(blobs))
 		}
-		if blob.Type != b {
-			t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
+
+		for _, blob := range blobs {
+			b, ok := exampleLookupTest.blobs[blob.ID]
+			if !ok {
+				t.Errorf("unexpected blob %v found", blob.ID.Str())
+			}
+			if blob.Type != b {
+				t.Errorf("unexpected type for blob %v: want %v, got %v", blob.ID.Str(), b, blob.Type)
+			}
 		}
 	}
 }
@ -403,8 +460,9 @@ func createRandomIndex(rng *rand.Rand, packfiles int) (idx *repository.Index, lo
 					Type: restic.DataBlob,
 					ID:   id,
 				},
-				Length: uint(size),
-				Offset: uint(offset),
+				Length:             uint(size),
+				UncompressedLength: uint(2 * size),
+				Offset:             uint(offset),
 			})

 			offset += size
@ -475,11 +533,17 @@ func TestIndexHas(t *testing.T) {
 		pos := uint(0)
 		for j := 0; j < 20; j++ {
 			length := uint(i*100 + j)
+			uncompressedLength := uint(0)
+			if i >= 25 {
+				// test a mix of compressed and uncompressed packs
+				uncompressedLength = 2 * length
+			}
 			pb := restic.PackedBlob{
 				Blob: restic.Blob{
-					BlobHandle: restic.NewRandomBlobHandle(),
-					Offset:     pos,
-					Length:     length,
+					BlobHandle:         restic.NewRandomBlobHandle(),
+					Offset:             pos,
+					Length:             length,
+					UncompressedLength: uncompressedLength,
 				},
 				PackID: packID,
 			}
--- a/internal/repository/indexmap.go
+++ b/internal/repository/indexmap.go
@ -32,7 +32,7 @@ const (

 // add inserts an indexEntry for the given arguments into the map,
 // using id as the key.
-func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) {
+func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32, uncompressedLength uint32) {
 	switch {
 	case m.numentries == 0: // Lazy initialization.
 		m.init()
@ -47,6 +47,7 @@ func (m *indexMap) add(id restic.ID, packIdx int, offset, length uint32) {
 	e.packIndex = packIdx
 	e.offset = offset
 	e.length = length
+	e.uncompressedLength = uncompressedLength

 	m.buckets[h] = e
 	m.numentries++
@ -130,12 +131,12 @@ func (m *indexMap) len() uint { return m.numentries }

 func (m *indexMap) newEntry() *indexEntry {
 	// Allocating in batches means that we get closer to optimal space usage,
-	// as Go's malloc will overallocate for structures of size 56 (indexEntry
+	// as Go's malloc will overallocate for structures of size 60 (indexEntry
 	// on amd64).
 	//
-	// 256*56 and 256*48 both have minimal malloc overhead among reasonable sizes.
+	// 128*60 and 128*60 both have low malloc overhead among reasonable sizes.
 	// See src/runtime/sizeclasses.go in the standard library.
-	const entryAllocBatch = 256
+	const entryAllocBatch = 128

 	if m.free == nil {
 		free := new([entryAllocBatch]indexEntry)
@ -152,9 +153,10 @@ func (m *indexMap) newEntry() *indexEntry {
 }

 type indexEntry struct {
-	id        restic.ID
-	next      *indexEntry
-	packIndex int // Position in containing Index's packs field.
-	offset    uint32
-	length    uint32
+	id                 restic.ID
+	next               *indexEntry
+	packIndex          int // Position in containing Index's packs field.
+	offset             uint32
+	length             uint32
+	uncompressedLength uint32
 }
--- a/internal/repository/indexmap_test.go
+++ b/internal/repository/indexmap_test.go
@ -22,7 +22,7 @@ func TestIndexMapBasic(t *testing.T) {
 		r.Read(id[:])
 		rtest.Assert(t, m.get(id) == nil, "%v retrieved but not added", id)

-		m.add(id, 0, 0, 0)
+		m.add(id, 0, 0, 0, 0)
 		rtest.Assert(t, m.get(id) != nil, "%v added but not retrieved", id)
 		rtest.Equals(t, uint(i), m.len())
 	}
@ -41,7 +41,7 @@ func TestIndexMapForeach(t *testing.T) {
 	for i := 0; i < N; i++ {
 		var id restic.ID
 		id[0] = byte(i)
-		m.add(id, i, uint32(i), uint32(i))
+		m.add(id, i, uint32(i), uint32(i), uint32(i/2))
 	}

 	seen := make(map[int]struct{})
@ -51,6 +51,7 @@ func TestIndexMapForeach(t *testing.T) {
 		rtest.Equals(t, i, e.packIndex)
 		rtest.Equals(t, i, int(e.length))
 		rtest.Equals(t, i, int(e.offset))
+		rtest.Equals(t, i/2, int(e.uncompressedLength))

 		seen[i] = struct{}{}
 		return true
@ -85,13 +86,13 @@ func TestIndexMapForeachWithID(t *testing.T) {

 	// Test insertion and retrieval of duplicates.
 	for i := 0; i < ndups; i++ {
-		m.add(id, i, 0, 0)
+		m.add(id, i, 0, 0, 0)
 	}

 	for i := 0; i < 100; i++ {
 		var otherid restic.ID
 		r.Read(otherid[:])
-		m.add(otherid, -1, 0, 0)
+		m.add(otherid, -1, 0, 0, 0)
 	}

 	n = 0
@ -109,7 +110,7 @@ func TestIndexMapForeachWithID(t *testing.T) {

 func BenchmarkIndexMapHash(b *testing.B) {
 	var m indexMap
-	m.add(restic.ID{}, 0, 0, 0) // Trigger lazy initialization.
+	m.add(restic.ID{}, 0, 0, 0, 0) // Trigger lazy initialization.

 	ids := make([]restic.ID, 128) // 4 KiB.
 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
--- a/internal/repository/master_index.go
+++ b/internal/repository/master_index.go
@ -16,6 +16,7 @@ type MasterIndex struct {
 	idx          []*Index
 	pendingBlobs restic.BlobSet
 	idxMutex     sync.RWMutex
+	compress     bool
 }

 // NewMasterIndex creates a new master index.
@ -28,6 +29,10 @@ func NewMasterIndex() *MasterIndex {
 	return &MasterIndex{idx: idx, pendingBlobs: restic.NewBlobSet()}
 }

+func (mi *MasterIndex) markCompressed() {
+	mi.compress = true
+}
+
 // Lookup queries all known Indexes for the ID and returns all matches.
 func (mi *MasterIndex) Lookup(bh restic.BlobHandle) (pbs []restic.PackedBlob) {
 	mi.idxMutex.RLock()
@ -206,7 +211,7 @@ func (mi *MasterIndex) FinalizeFullIndexes() []*Index {
 			continue
 		}

-		if IndexFull(idx) {
+		if IndexFull(idx, mi.compress) {
 			debug.Log("index %p is full", idx)
 			idx.Finalize()
 			list = append(list, idx)
@ -334,7 +339,7 @@ func (mi *MasterIndex) Save(ctx context.Context, repo restic.Repository, packBla
 			for pbs := range idx.EachByPack(ctx, packBlacklist) {
 				newIndex.StorePack(pbs.packID, pbs.blobs)
 				p.Add(1)
-				if IndexFull(newIndex) {
+				if IndexFull(newIndex, mi.compress) {
 					select {
 					case ch <- newIndex:
 					case <-ctx.Done():
--- a/internal/repository/master_index_test.go
+++ b/internal/repository/master_index_test.go
@ -30,9 +30,10 @@ func TestMasterIndex(t *testing.T) {
 	blob2 := restic.PackedBlob{
 		PackID: restic.NewRandomID(),
 		Blob: restic.Blob{
-			BlobHandle: bhInIdx2,
-			Length:     uint(restic.CiphertextLength(100)),
-			Offset:     10,
+			BlobHandle:         bhInIdx2,
+			Length:             uint(restic.CiphertextLength(100)),
+			Offset:             10,
+			UncompressedLength: 200,
 		},
 	}

@ -48,9 +49,10 @@ func TestMasterIndex(t *testing.T) {
 	blob12b := restic.PackedBlob{
 		PackID: restic.NewRandomID(),
 		Blob: restic.Blob{
-			BlobHandle: bhInIdx12,
-			Length:     uint(restic.CiphertextLength(123)),
-			Offset:     50,
+			BlobHandle:         bhInIdx12,
+			Length:             uint(restic.CiphertextLength(123)),
+			Offset:             50,
+			UncompressedLength: 80,
 		},
 	}

@ -86,7 +88,7 @@ func TestMasterIndex(t *testing.T) {

 	size, found = mIdx.LookupSize(bhInIdx2)
 	rtest.Equals(t, true, found)
-	rtest.Equals(t, uint(100), size)
+	rtest.Equals(t, uint(200), size)

 	// test idInIdx12
 	found = mIdx.Has(bhInIdx12)
@ -144,9 +146,10 @@ func TestMasterMergeFinalIndexes(t *testing.T) {
 	blob2 := restic.PackedBlob{
 		PackID: restic.NewRandomID(),
 		Blob: restic.Blob{
-			BlobHandle: bhInIdx2,
-			Length:     100,
-			Offset:     10,
+			BlobHandle:         bhInIdx2,
+			Length:             100,
+			Offset:             10,
+			UncompressedLength: 200,
 		},
 	}

@ -335,8 +338,8 @@ var (
 	depth        = 3
 )

-func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Repository, func()) {
-	repo, cleanup := repository.TestRepository(t)
+func createFilledRepo(t testing.TB, snapshots int, dup float32, version uint) (restic.Repository, func()) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)

 	for i := 0; i < 3; i++ {
 		restic.TestCreateSnapshot(t, repo, snapshotTime.Add(time.Duration(i)*time.Second), depth, dup)
@ -346,7 +349,11 @@ func createFilledRepo(t testing.TB, snapshots int, dup float32) (restic.Reposito
 }

 func TestIndexSave(t *testing.T) {
-	repo, cleanup := createFilledRepo(t, 3, 0)
+	repository.TestAllVersions(t, testIndexSave)
+}
+
+func testIndexSave(t *testing.T, version uint) {
+	repo, cleanup := createFilledRepo(t, 3, 0, version)
 	defer cleanup()

 	err := repo.LoadIndex(context.TODO())
--- a/internal/repository/packer_manager_test.go
+++ b/internal/repository/packer_manager_test.go
@ -70,7 +70,7 @@ func fillPacks(t testing.TB, rnd *rand.Rand, be Saver, pm *packerManager, buf []
 		// Only change a few bytes so we know we're not benchmarking the RNG.
 		rnd.Read(buf[:min(l, 4)])

-		n, err := packer.Add(restic.DataBlob, id, buf)
+		n, err := packer.Add(restic.DataBlob, id, buf, 0)
 		if err != nil {
 			t.Fatal(err)
 		}
--- a/internal/repository/repack_test.go
+++ b/internal/repository/repack_test.go
@ -212,7 +212,11 @@ func reloadIndex(t *testing.T, repo restic.Repository) {
 }

 func TestRepack(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testRepack)
+}
+
+func testRepack(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	seed := time.Now().UnixNano()
@ -279,9 +283,13 @@ func TestRepack(t *testing.T) {
 }

 func TestRepackCopy(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testRepackCopy)
+}
+
+func testRepackCopy(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()
-	dstRepo, dstCleanup := repository.TestRepository(t)
+	dstRepo, dstCleanup := repository.TestRepositoryWithVersion(t, version)
 	defer dstCleanup()

 	seed := time.Now().UnixNano()
@ -318,7 +326,11 @@ func TestRepackCopy(t *testing.T) {
 }

 func TestRepackWrongBlob(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testRepackWrongBlob)
+}
+
+func testRepackWrongBlob(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	seed := time.Now().UnixNano()
--- a/internal/repository/repository.go
+++ b/internal/repository/repository.go
@ -12,6 +12,7 @@ import (
 	"sync"

 	"github.com/cenkalti/backoff/v4"
+	"github.com/klauspost/compress/zstd"
 	"github.com/restic/chunker"
 	"github.com/restic/restic/internal/backend/dryrun"
 	"github.com/restic/restic/internal/cache"
@ -36,16 +37,71 @@ type Repository struct {
 	idx     *MasterIndex
 	Cache   *cache.Cache

+	opts Options
+
 	noAutoIndexUpdate bool

 	treePM *packerManager
 	dataPM *packerManager
+
+	allocEnc sync.Once
+	allocDec sync.Once
+	enc      *zstd.Encoder
+	dec      *zstd.Decoder
+}
+
+type Options struct {
+	Compression CompressionMode
+}
+
+// CompressionMode configures if data should be compressed.
+type CompressionMode uint
+
+// Constants for the different compression levels.
+const (
+	CompressionAuto CompressionMode = 0
+	CompressionOff  CompressionMode = 1
+	CompressionMax  CompressionMode = 2
+)
+
+// Set implements the method needed for pflag command flag parsing.
+func (c *CompressionMode) Set(s string) error {
+	switch s {
+	case "auto":
+		*c = CompressionAuto
+	case "off":
+		*c = CompressionOff
+	case "max":
+		*c = CompressionMax
+	default:
+		return fmt.Errorf("invalid compression mode %q, must be one of (auto|off|max)", s)
+	}
+
+	return nil
+}
+
+func (c *CompressionMode) String() string {
+	switch *c {
+	case CompressionAuto:
+		return "auto"
+	case CompressionOff:
+		return "off"
+	case CompressionMax:
+		return "max"
+	default:
+		return "invalid"
+	}
+
+}
+func (c *CompressionMode) Type() string {
+	return "mode"
 }

 // New returns a new repository with backend be.
-func New(be restic.Backend) *Repository {
+func New(be restic.Backend, opts Options) *Repository {
 	repo := &Repository{
 		be:     be,
+		opts:   opts,
 		idx:    NewMasterIndex(),
 		dataPM: newPackerManager(be, nil),
 		treePM: newPackerManager(be, nil),
@ -60,6 +116,14 @@ func (r *Repository) DisableAutoIndexUpdate() {
 	r.noAutoIndexUpdate = true
 }

+// setConfig assigns the given config and updates the repository parameters accordingly
+func (r *Repository) setConfig(cfg restic.Config) {
+	r.cfg = cfg
+	if r.cfg.Version >= 2 {
+		r.idx.markCompressed()
+	}
+}
+
 // Config returns the repository configuration.
 func (r *Repository) Config() restic.Config {
 	return r.cfg
@ -125,6 +189,9 @@ func (r *Repository) LoadUnpacked(ctx context.Context, buf []byte, t restic.File
 	if err != nil {
 		return nil, err
 	}
+	if t != restic.ConfigFile {
+		return r.decompressUnpacked(plaintext)
+	}

 	return plaintext, nil
 }
@ -218,12 +285,23 @@ func (r *Repository) LoadBlob(ctx context.Context, t restic.BlobType, id restic.
 			continue
 		}

+		if blob.IsCompressed() {
+			plaintext, err = r.getZstdDecoder().DecodeAll(plaintext, make([]byte, 0, blob.DataLength()))
+			if err != nil {
+				lastError = errors.Errorf("decompressing blob %v failed: %v", id, err)
+				continue
+			}
+		}
+
 		// check hash
 		if !restic.Hash(plaintext).Equal(id) {
 			lastError = errors.Errorf("blob %v returned invalid hash", id)
 			continue
 		}

+		if len(plaintext) > cap(buf) {
+			return plaintext, nil
+		}
 		// move decrypted data to the start of the buffer
 		copy(buf, plaintext)
 		return buf[:len(plaintext)], nil
@ -252,12 +330,70 @@ func (r *Repository) LookupBlobSize(id restic.ID, tpe restic.BlobType) (uint, bo
 	return r.idx.LookupSize(restic.BlobHandle{ID: id, Type: tpe})
 }

+func (r *Repository) getZstdEncoder() *zstd.Encoder {
+	r.allocEnc.Do(func() {
+		level := zstd.SpeedDefault
+		if r.opts.Compression == CompressionMax {
+			level = zstd.SpeedBestCompression
+		}
+
+		opts := []zstd.EOption{
+			// Set the compression level configured.
+			zstd.WithEncoderLevel(level),
+			// Disable CRC, we have enough checks in place, makes the
+			// compressed data four bytes shorter.
+			zstd.WithEncoderCRC(false),
+			// Set a window of 512kbyte, so we have good lookbehind for usual
+			// blob sizes.
+			zstd.WithWindowSize(512 * 1024),
+		}
+
+		enc, err := zstd.NewWriter(nil, opts...)
+		if err != nil {
+			panic(err)
+		}
+		r.enc = enc
+	})
+	return r.enc
+}
+
+func (r *Repository) getZstdDecoder() *zstd.Decoder {
+	r.allocDec.Do(func() {
+		opts := []zstd.DOption{
+			// Use all available cores.
+			zstd.WithDecoderConcurrency(0),
+			// Limit the maximum decompressed memory. Set to a very high,
+			// conservative value.
+			zstd.WithDecoderMaxMemory(16 * 1024 * 1024 * 1024),
+		}
+
+		dec, err := zstd.NewReader(nil, opts...)
+		if err != nil {
+			panic(err)
+		}
+		r.dec = dec
+	})
+	return r.dec
+}
+
 // saveAndEncrypt encrypts data and stores it to the backend as type t. If data
 // is small enough, it will be packed together with other small blobs.
 // The caller must ensure that the id matches the data.
 func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data []byte, id restic.ID) error {
 	debug.Log("save id %v (%v, %d bytes)", id, t, len(data))

+	uncompressedLength := 0
+	if r.cfg.Version > 1 {
+
+		// we have a repo v2, so compression is available. if the user opts to
+		// not compress, we won't compress any data, but everything else is
+		// compressed.
+		if r.opts.Compression != CompressionOff || t != restic.DataBlob {
+			uncompressedLength = len(data)
+			data = r.getZstdEncoder().EncodeAll(data, nil)
+		}
+	}
+
 	nonce := crypto.NewRandomNonce()

 	ciphertext := make([]byte, 0, restic.CiphertextLength(len(data)))
@ -284,7 +420,7 @@ func (r *Repository) saveAndEncrypt(ctx context.Context, t restic.BlobType, data
 	}

 	// save ciphertext
-	_, err = packer.Add(t, id, ciphertext)
+	_, err = packer.Add(t, id, ciphertext, uncompressedLength)
 	if err != nil {
 		return err
 	}
@ -312,9 +448,50 @@ func (r *Repository) SaveJSONUnpacked(ctx context.Context, t restic.FileType, it
 	return r.SaveUnpacked(ctx, t, plaintext)
 }

+func (r *Repository) compressUnpacked(p []byte) ([]byte, error) {
+	// compression is only available starting from version 2
+	if r.cfg.Version < 2 {
+		return p, nil
+	}
+
+	// version byte
+	out := []byte{2}
+	out = r.getZstdEncoder().EncodeAll(p, out)
+	return out, nil
+}
+
+func (r *Repository) decompressUnpacked(p []byte) ([]byte, error) {
+	// compression is only available starting from version 2
+	if r.cfg.Version < 2 {
+		return p, nil
+	}
+
+	if len(p) == 0 {
+		// too short for version header
+		return p, nil
+	}
+	if p[0] == '[' || p[0] == '{' {
+		// probably raw JSON
+		return p, nil
+	}
+	// version
+	if p[0] != 2 {
+		return nil, errors.New("not supported encoding format")
+	}
+
+	return r.getZstdDecoder().DecodeAll(p[1:], nil)
+}
+
 // SaveUnpacked encrypts data and stores it in the backend. Returned is the
 // storage hash.
 func (r *Repository) SaveUnpacked(ctx context.Context, t restic.FileType, p []byte) (id restic.ID, err error) {
+	if t != restic.ConfigFile {
+		p, err = r.compressUnpacked(p)
+		if err != nil {
+			return restic.ID{}, err
+		}
+	}
+
 	ciphertext := restic.NewBlobBuffer(len(p))
 	ciphertext = ciphertext[:0]
 	nonce := crypto.NewRandomNonce()
@ -478,6 +655,17 @@ func (r *Repository) LoadIndex(ctx context.Context) error {
 		return err
 	}

+	if r.cfg.Version < 2 {
+		// sanity check
+		ctx, cancel := context.WithCancel(ctx)
+		defer cancel()
+		for blob := range r.idx.Each(ctx) {
+			if blob.IsCompressed() {
+				return errors.Fatal("index uses feature not supported by repository version 1")
+			}
+		}
+	}
+
 	// remove index files from the cache which have been removed in the repo
 	return r.PrepareCache(validIndex)
 }
@ -592,18 +780,28 @@ func (r *Repository) SearchKey(ctx context.Context, password string, maxKeys int
 	r.dataPM.key = key.master
 	r.treePM.key = key.master
 	r.keyName = key.Name()
-	r.cfg, err = restic.LoadConfig(ctx, r)
+	cfg, err := restic.LoadConfig(ctx, r)
 	if err == crypto.ErrUnauthenticated {
 		return errors.Fatalf("config or key %v is damaged: %v", key.Name(), err)
 	} else if err != nil {
 		return errors.Fatalf("config cannot be loaded: %v", err)
 	}
+
+	r.setConfig(cfg)
 	return nil
 }

 // Init creates a new master key with the supplied password, initializes and
 // saves the repository config.
-func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomial *chunker.Pol) error {
+func (r *Repository) Init(ctx context.Context, version uint, password string, chunkerPolynomial *chunker.Pol) error {
+	if version > restic.MaxRepoVersion {
+		return fmt.Errorf("repo version %v too high", version)
+	}
+
+	if version < restic.MinRepoVersion {
+		return fmt.Errorf("repo version %v too low", version)
+	}
+
 	has, err := r.be.Test(ctx, restic.Handle{Type: restic.ConfigFile})
 	if err != nil {
 		return err
@ -612,7 +810,7 @@ func (r *Repository) Init(ctx context.Context, password string, chunkerPolynomia
 		return errors.New("repository master key and config already initialized")
 	}

-	cfg, err := restic.CreateConfig()
+	cfg, err := restic.CreateConfig(version)
 	if err != nil {
 		return err
 	}
@ -635,7 +833,7 @@ func (r *Repository) init(ctx context.Context, password string, cfg restic.Confi
 	r.dataPM.key = key.master
 	r.treePM.key = key.master
 	r.keyName = key.Name()
-	r.cfg = cfg
+	r.setConfig(cfg)
 	_, err = r.SaveJSONUnpacked(ctx, restic.ConfigFile, cfg)
 	return err
 }
@ -768,9 +966,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack

 	debug.Log("streaming pack %v (%d to %d bytes), blobs: %v", packID, dataStart, dataEnd, len(blobs))

+	dec, err := zstd.NewReader(nil)
+	if err != nil {
+		panic(dec)
+	}
+	defer dec.Close()
+
 	ctx, cancel := context.WithCancel(ctx)
 	// stream blobs in pack
-	err := beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
+	err = beLoad(ctx, h, int(dataEnd-dataStart), int64(dataStart), func(rd io.Reader) error {
 		// prevent callbacks after cancelation
 		if ctx.Err() != nil {
 			return ctx.Err()
@ -783,6 +987,7 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
 		bufRd := bufio.NewReaderSize(rd, bufferSize)
 		currentBlobEnd := dataStart
 		var buf []byte
+		var decode []byte
 		for _, entry := range blobs {
 			skipBytes := int(entry.Offset - currentBlobEnd)
 			if skipBytes < 0 {
@ -822,6 +1027,15 @@ func StreamPack(ctx context.Context, beLoad BackendLoadFn, key *crypto.Key, pack
 			// decryption errors are likely permanent, give the caller a chance to skip them
 			nonce, ciphertext := buf[:key.NonceSize()], buf[key.NonceSize():]
 			plaintext, err := key.Open(ciphertext[:0], nonce, ciphertext, nil)
+			if err == nil && entry.IsCompressed() {
+				// DecodeAll will allocate a slice if it is not large enough since it
+				// knows the decompressed size (because we're using EncodeAll)
+				decode, err = dec.DecodeAll(plaintext, decode[:0])
+				plaintext = decode
+				if err != nil {
+					err = errors.Errorf("decompressing blob %v failed: %v", h, err)
+				}
+			}
 			if err == nil {
 				id := restic.Hash(plaintext)
 				if !id.Equal(entry.ID) {
--- a/internal/repository/repository_test.go
+++ b/internal/repository/repository_test.go
@ -15,6 +15,7 @@ import (
 	"time"

 	"github.com/google/go-cmp/cmp"
+	"github.com/klauspost/compress/zstd"
 	"github.com/restic/restic/internal/archiver"
 	"github.com/restic/restic/internal/crypto"
 	"github.com/restic/restic/internal/repository"
@ -28,7 +29,11 @@ var testSizes = []int{5, 23, 2<<18 + 23, 1 << 20}
 var rnd = rand.New(rand.NewSource(time.Now().UnixNano()))

 func TestSave(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testSave)
+}
+
+func testSave(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	for _, size := range testSizes {
@ -63,7 +68,11 @@ func TestSave(t *testing.T) {
 }

 func TestSaveFrom(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testSaveFrom)
+}
+
+func testSaveFrom(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	for _, size := range testSizes {
@ -96,7 +105,11 @@ func TestSaveFrom(t *testing.T) {
 }

 func BenchmarkSaveAndEncrypt(t *testing.B) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.BenchmarkAllVersions(t, benchmarkSaveAndEncrypt)
+}
+
+func benchmarkSaveAndEncrypt(t *testing.B, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	size := 4 << 20 // 4MiB
@ -118,7 +131,11 @@ func BenchmarkSaveAndEncrypt(t *testing.B) {
 }

 func TestLoadTree(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testLoadTree)
+}
+
+func testLoadTree(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	if rtest.BenchArchiveDirectory == "" {
@ -134,7 +151,11 @@ func TestLoadTree(t *testing.T) {
 }

 func BenchmarkLoadTree(t *testing.B) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.BenchmarkAllVersions(t, benchmarkLoadTree)
+}
+
+func benchmarkLoadTree(t *testing.B, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	if rtest.BenchArchiveDirectory == "" {
@ -154,7 +175,11 @@ func BenchmarkLoadTree(t *testing.B) {
 }

 func TestLoadBlob(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testLoadBlob)
+}
+
+func testLoadBlob(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	length := 1000000
@ -183,7 +208,11 @@ func TestLoadBlob(t *testing.T) {
 }

 func BenchmarkLoadBlob(b *testing.B) {
-	repo, cleanup := repository.TestRepository(b)
+	repository.BenchmarkAllVersions(b, benchmarkLoadBlob)
+}
+
+func benchmarkLoadBlob(b *testing.B, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(b, version)
 	defer cleanup()

 	length := 1000000
@ -219,7 +248,11 @@ func BenchmarkLoadBlob(b *testing.B) {
 }

 func BenchmarkLoadUnpacked(b *testing.B) {
-	repo, cleanup := repository.TestRepository(b)
+	repository.BenchmarkAllVersions(b, benchmarkLoadUnpacked)
+}
+
+func benchmarkLoadUnpacked(b *testing.B, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(b, version)
 	defer cleanup()

 	length := 1000000
@ -255,7 +288,11 @@ func BenchmarkLoadUnpacked(b *testing.B) {
 }

 func TestLoadJSONUnpacked(t *testing.T) {
-	repo, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testLoadJSONUnpacked)
+}
+
+func testLoadJSONUnpacked(t *testing.T, version uint) {
+	repo, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	if rtest.BenchArchiveDirectory == "" {
@ -313,9 +350,13 @@ func loadIndex(ctx context.Context, repo restic.Repository, id restic.ID) (*repo
 }

 func BenchmarkLoadIndex(b *testing.B) {
+	repository.BenchmarkAllVersions(b, benchmarkLoadIndex)
+}
+
+func benchmarkLoadIndex(b *testing.B, version uint) {
 	repository.TestUseLowSecurityKDFParameters(b)

-	repo, cleanup := repository.TestRepository(b)
+	repo, cleanup := repository.TestRepositoryWithVersion(b, version)
 	defer cleanup()

 	idx := repository.NewIndex()
@ -362,12 +403,16 @@ func saveRandomDataBlobs(t testing.TB, repo restic.Repository, num int, sizeMax
 }

 func TestRepositoryIncrementalIndex(t *testing.T) {
-	r, cleanup := repository.TestRepository(t)
+	repository.TestAllVersions(t, testRepositoryIncrementalIndex)
+}
+
+func testRepositoryIncrementalIndex(t *testing.T, version uint) {
+	r, cleanup := repository.TestRepositoryWithVersion(t, version)
 	defer cleanup()

 	repo := r.(*repository.Repository)

-	repository.IndexFull = func(*repository.Index) bool { return true }
+	repository.IndexFull = func(*repository.Index, bool) bool { return true }

 	// add 15 packs
 	for j := 0; j < 5; j++ {
@ -417,10 +462,31 @@ func TestRepositoryIncrementalIndex(t *testing.T) {
 }

 // buildPackfileWithoutHeader returns a manually built pack file without a header.
-func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key) (blobs []restic.Blob, packfile []byte) {
+func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key, compress bool) (blobs []restic.Blob, packfile []byte) {
+	opts := []zstd.EOption{
+		// Set the compression level configured.
+		zstd.WithEncoderLevel(zstd.SpeedDefault),
+		// Disable CRC, we have enough checks in place, makes the
+		// compressed data four bytes shorter.
+		zstd.WithEncoderCRC(false),
+		// Set a window of 512kbyte, so we have good lookbehind for usual
+		// blob sizes.
+		zstd.WithWindowSize(512 * 1024),
+	}
+	enc, err := zstd.NewWriter(nil, opts...)
+	if err != nil {
+		panic(err)
+	}
+
 	var offset uint
 	for i, size := range blobSizes {
 		plaintext := test.Random(800+i, size)
+		id := restic.Hash(plaintext)
+		uncompressedLength := uint(0)
+		if compress {
+			uncompressedLength = uint(len(plaintext))
+			plaintext = enc.EncodeAll(plaintext, nil)
+		}

 		// we use a deterministic nonce here so the whole process is
 		// deterministic, last byte is the blob index
@ -438,11 +504,12 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key)

 		blobs = append(blobs, restic.Blob{
 			BlobHandle: restic.BlobHandle{
-				ID:   restic.Hash(plaintext),
 				Type: restic.DataBlob,
+				ID:   id,
 			},
-			Length: uint(ciphertextLength),
-			Offset: offset,
+			Length:             uint(ciphertextLength),
+			UncompressedLength: uncompressedLength,
+			Offset:             offset,
 		})

 		offset = uint(len(packfile))
@ -452,6 +519,10 @@ func buildPackfileWithoutHeader(t testing.TB, blobSizes []int, key *crypto.Key)
 }

 func TestStreamPack(t *testing.T) {
+	repository.TestAllVersions(t, testStreamPack)
+}
+
+func testStreamPack(t *testing.T, version uint) {
 	// always use the same key for deterministic output
 	const jsonKey = `{"mac":{"k":"eQenuI8adktfzZMuC8rwdA==","r":"k8cfAly2qQSky48CQK7SBA=="},"encrypt":"MKO9gZnRiQFl8mDUurSDa9NMjiu9MUifUrODTHS05wo="}`

@ -476,7 +547,17 @@ func TestStreamPack(t *testing.T) {
 		18883,
 	}

-	packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key)
+	var compress bool
+	switch version {
+	case 1:
+		compress = false
+	case 2:
+		compress = true
+	default:
+		t.Fatal("test does not suport repository version", version)
+	}
+
+	packfileBlobs, packfile := buildPackfileWithoutHeader(t, blobSizes, &key, compress)

 	load := func(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
 		data := packfile
--- a/internal/repository/testing.go
+++ b/internal/repository/testing.go
@ -2,6 +2,7 @@ package repository

 import (
 	"context"
+	"fmt"
 	"os"
 	"testing"

@ -41,7 +42,7 @@ const TestChunkerPol = chunker.Pol(0x3DA3358B4DC173)
 // TestRepositoryWithBackend returns a repository initialized with a test
 // password. If be is nil, an in-memory backend is used. A constant polynomial
 // is used for the chunker and low-security test parameters.
-func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Repository, cleanup func()) {
+func TestRepositoryWithBackend(t testing.TB, be restic.Backend, version uint) (r restic.Repository, cleanup func()) {
 	t.Helper()
 	TestUseLowSecurityKDFParameters(t)
 	restic.TestDisableCheckPolynomial(t)
@ -51,9 +52,9 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi
 		be, beCleanup = TestBackend(t)
 	}

-	repo := New(be)
+	repo := New(be, Options{})

-	cfg := restic.TestCreateConfig(t, TestChunkerPol)
+	cfg := restic.TestCreateConfig(t, TestChunkerPol, version)
 	err := repo.init(context.TODO(), test.TestPassword, cfg)
 	if err != nil {
 		t.Fatalf("TestRepository(): initialize repo failed: %v", err)
@ -71,6 +72,11 @@ func TestRepositoryWithBackend(t testing.TB, be restic.Backend) (r restic.Reposi
 // a non-existing directory, a local backend is created there and this is used
 // instead. The directory is not removed, but left there for inspection.
 func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
+	t.Helper()
+	return TestRepositoryWithVersion(t, 0)
+}
+
+func TestRepositoryWithVersion(t testing.TB, version uint) (r restic.Repository, cleanup func()) {
 	t.Helper()
 	dir := os.Getenv("RESTIC_TEST_REPO")
 	if dir != "" {
@ -80,7 +86,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
 			if err != nil {
 				t.Fatalf("error creating local backend at %v: %v", dir, err)
 			}
-			return TestRepositoryWithBackend(t, be)
+			return TestRepositoryWithBackend(t, be, version)
 		}

 		if err == nil {
@ -88,7 +94,7 @@ func TestRepository(t testing.TB) (r restic.Repository, cleanup func()) {
 		}
 	}

-	return TestRepositoryWithBackend(t, nil)
+	return TestRepositoryWithBackend(t, nil, version)
 }

 // TestOpenLocal opens a local repository.
@ -98,7 +104,7 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) {
 		t.Fatal(err)
 	}

-	repo := New(be)
+	repo := New(be, Options{})
 	err = repo.SearchKey(context.TODO(), test.TestPassword, 10, "")
 	if err != nil {
 		t.Fatal(err)
@ -106,3 +112,23 @@ func TestOpenLocal(t testing.TB, dir string) (r restic.Repository) {

 	return repo
 }
+
+type VersionedTest func(t *testing.T, version uint)
+
+func TestAllVersions(t *testing.T, test VersionedTest) {
+	for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ {
+		t.Run(fmt.Sprintf("v%d", version), func(t *testing.T) {
+			test(t, uint(version))
+		})
+	}
+}
+
+type VersionedBenchmark func(b *testing.B, version uint)
+
+func BenchmarkAllVersions(b *testing.B, bench VersionedBenchmark) {
+	for version := restic.MinRepoVersion; version <= restic.MaxRepoVersion; version++ {
+		b.Run(fmt.Sprintf("v%d", version), func(b *testing.B) {
+			bench(b, uint(version))
+		})
+	}
+}
--- a/internal/restic/blob.go
+++ b/internal/restic/blob.go
@ -9,13 +9,25 @@ import (
 // Blob is one part of a file or a tree.
 type Blob struct {
 	BlobHandle
-	Length uint
-	Offset uint
+	Length             uint
+	Offset             uint
+	UncompressedLength uint
 }

 func (b Blob) String() string {
-	return fmt.Sprintf("<Blob (%v) %v, offset %v, length %v>",
-		b.Type, b.ID.Str(), b.Offset, b.Length)
+	return fmt.Sprintf("<Blob (%v) %v, offset %v, length %v, uncompressed length %v>",
+		b.Type, b.ID.Str(), b.Offset, b.Length, b.UncompressedLength)
+}
+
+func (b Blob) DataLength() uint {
+	if b.UncompressedLength != 0 {
+		return b.UncompressedLength
+	}
+	return uint(PlaintextLength(int(b.Length)))
+}
+
+func (b Blob) IsCompressed() bool {
+	return b.UncompressedLength != 0
 }

 // PackedBlob is a blob stored within a file.
--- a/internal/restic/config.go
+++ b/internal/restic/config.go
@ -18,9 +18,12 @@ type Config struct {
 	ChunkerPolynomial chunker.Pol `json:"chunker_polynomial"`
 }

-// RepoVersion is the version that is written to the config when a repository
+const MinRepoVersion = 1
+const MaxRepoVersion = 2
+
+// StableRepoVersion is the version that is written to the config when a repository
 // is newly created with Init().
-const RepoVersion = 1
+const StableRepoVersion = 1

 // JSONUnpackedLoader loads unpacked JSON.
 type JSONUnpackedLoader interface {
@ -29,7 +32,7 @@ type JSONUnpackedLoader interface {

 // CreateConfig creates a config file with a randomly selected polynomial and
 // ID.
-func CreateConfig() (Config, error) {
+func CreateConfig(version uint) (Config, error) {
 	var (
 		err error
 		cfg Config
@ -41,18 +44,24 @@ func CreateConfig() (Config, error) {
 	}

 	cfg.ID = NewRandomID().String()
-	cfg.Version = RepoVersion
+	cfg.Version = version

 	debug.Log("New config: %#v", cfg)
 	return cfg, nil
 }

 // TestCreateConfig creates a config for use within tests.
-func TestCreateConfig(t testing.TB, pol chunker.Pol) (cfg Config) {
+func TestCreateConfig(t testing.TB, pol chunker.Pol, version uint) (cfg Config) {
 	cfg.ChunkerPolynomial = pol

 	cfg.ID = NewRandomID().String()
-	cfg.Version = RepoVersion
+	if version == 0 {
+		version = StableRepoVersion
+	}
+	if version < MinRepoVersion || version > MaxRepoVersion {
+		t.Fatalf("version %d is out of range", version)
+	}
+	cfg.Version = version

 	return cfg
 }
@ -77,7 +86,7 @@ func LoadConfig(ctx context.Context, r JSONUnpackedLoader) (Config, error) {
 		return Config{}, err
 	}

-	if cfg.Version != RepoVersion {
+	if cfg.Version < MinRepoVersion || cfg.Version > MaxRepoVersion {
 		return Config{}, errors.Errorf("unsupported repository version %v", cfg.Version)
 	}

--- a/internal/restic/config_test.go
+++ b/internal/restic/config_test.go
@ -32,7 +32,7 @@ func TestConfig(t *testing.T) {
 		return restic.ID{}, nil
 	}

-	cfg1, err := restic.CreateConfig()
+	cfg1, err := restic.CreateConfig(restic.MaxRepoVersion)
 	rtest.OK(t, err)

 	_, err = saver(save).SaveJSONUnpacked(restic.ConfigFile, cfg1)
--- a/internal/restorer/filerestorer.go
+++ b/internal/restorer/filerestorer.go
@ -117,7 +117,7 @@ func (r *fileRestorer) restoreFiles(ctx context.Context) error {
 		err := r.forEachBlob(fileBlobs, func(packID restic.ID, blob restic.Blob) {
 			if largeFile {
 				packsMap[packID] = append(packsMap[packID], fileBlobInfo{id: blob.ID, offset: fileOffset})
-				fileOffset += int64(restic.PlaintextLength(int(blob.Length)))
+				fileOffset += int64(blob.DataLength())
 			}
 			pack, ok := packs[packID]
 			if !ok {
@ -195,7 +195,7 @@ func (r *fileRestorer) downloadPack(ctx context.Context, pack *packInfo) error {
 				if packID.Equal(pack.id) {
 					addBlob(blob, fileOffset)
 				}
-				fileOffset += int64(restic.PlaintextLength(int(blob.Length)))
+				fileOffset += int64(blob.DataLength())
 			})
 			if err != nil {
 				// restoreFiles should have caught this error before