2018-06-08 10:46:00 +00:00
|
|
|
// Copyright (C) 2018 The Syncthing Authors.
|
|
|
|
//
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
// You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
package db
|
|
|
|
|
|
|
|
import (
|
2018-06-26 09:40:34 +00:00
|
|
|
"fmt"
|
2018-06-08 10:46:00 +00:00
|
|
|
|
refactor: use modern Protobuf encoder (#9817)
At a high level, this is what I've done and why:
- I'm moving the protobuf generation for the `protocol`, `discovery` and
`db` packages to the modern alternatives, and using `buf` to generate
because it's nice and simple.
- After trying various approaches on how to integrate the new types with
the existing code, I opted for splitting off our own data model types
from the on-the-wire generated types. This means we can have a
`FileInfo` type with nicer ergonomics and lots of methods, while the
protobuf generated type stays clean and close to the wire protocol. It
does mean copying between the two when required, which certainly adds a
small amount of inefficiency. If we want to walk this back in the future
and use the raw generated type throughout, that's possible, this however
makes the refactor smaller (!) as it doesn't change everything about the
type for everyone at the same time.
- I have simply removed in cold blood a significant number of old
database migrations. These depended on previous generations of generated
messages of various kinds and were annoying to support in the new
fashion. The oldest supported database version now is the one from
Syncthing 1.9.0 from Sep 7, 2020.
- I changed config structs to be regular manually defined structs.
For the sake of discussion, some things I tried that turned out not to
work...
### Embedding / wrapping
Embedding the protobuf generated structs in our existing types as a data
container and keeping our methods and stuff:
```
package protocol
type FileInfo struct {
*generated.FileInfo
}
```
This generates a lot of problems because the internal shape of the
generated struct is quite different (different names, different types,
more pointers), because initializing it doesn't work like you'd expect
(i.e., you end up with an embedded nil pointer and a panic), and because
the types of child types don't get wrapped. That is, even if we also
have a similar wrapper around a `Vector`, that's not the type you get
when accessing `someFileInfo.Version`, you get the `*generated.Vector`
that doesn't have methods, etc.
### Aliasing
```
package protocol
type FileInfo = generated.FileInfo
```
Doesn't help because you can't attach methods to it, plus all the above.
### Generating the types into the target package like we do now and
attaching methods
This fails because of the different shape of the generated type (as in
the embedding case above) plus the generated struct already has a bunch
of methods that we can't necessarily override properly (like `String()`
and a bunch of getters).
### Methods to functions
I considered just moving all the methods we attach to functions in a
specific package, so that for example
```
package protocol
func (f FileInfo) Equal(other FileInfo) bool
```
would become
```
package fileinfos
func Equal(a, b *generated.FileInfo) bool
```
and this would mostly work, but becomes quite verbose and cumbersome,
and somewhat limits discoverability (you can't see what methods are
available on the type in auto completions, etc). In the end I did this
in some cases, like in the database layer where a lot of things like
`func (fv *FileVersion) IsEmpty() bool` becomes `func fvIsEmpty(fv
*generated.FileVersion)` because they were anyway just internal methods.
Fixes #8247
2024-12-01 15:50:17 +00:00
|
|
|
"google.golang.org/protobuf/proto"
|
|
|
|
|
|
|
|
"github.com/syncthing/syncthing/internal/gen/bep"
|
2018-06-08 10:46:00 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/protocol"
|
|
|
|
)
|
|
|
|
|
2020-10-21 12:21:09 +00:00
|
|
|
// dbMigrationVersion is for migrations that do not change the schema and thus
|
|
|
|
// do not put restrictions on downgrades (e.g. for repairs after a bugfix).
|
2018-06-26 09:40:34 +00:00
|
|
|
const (
|
2020-08-18 07:20:12 +00:00
|
|
|
dbVersion = 14
|
2024-04-08 09:14:27 +00:00
|
|
|
dbMigrationVersion = 20
|
2020-08-18 07:20:12 +00:00
|
|
|
dbMinSyncthingVersion = "v1.9.0"
|
2018-06-26 09:40:34 +00:00
|
|
|
)
|
|
|
|
|
2020-11-23 17:31:32 +00:00
|
|
|
type migration struct {
|
|
|
|
schemaVersion int64
|
|
|
|
migrationVersion int64
|
|
|
|
minSyncthingVersion string
|
|
|
|
migration func(prevSchema int) error
|
|
|
|
}
|
|
|
|
|
2018-06-26 09:40:34 +00:00
|
|
|
type databaseDowngradeError struct {
|
|
|
|
minSyncthingVersion string
|
|
|
|
}
|
2018-06-08 10:46:00 +00:00
|
|
|
|
2020-06-16 07:27:34 +00:00
|
|
|
func (e *databaseDowngradeError) Error() string {
|
2018-06-26 09:40:34 +00:00
|
|
|
if e.minSyncthingVersion == "" {
|
|
|
|
return "newer Syncthing required"
|
|
|
|
}
|
|
|
|
return fmt.Sprintf("Syncthing %s required", e.minSyncthingVersion)
|
|
|
|
}
|
|
|
|
|
2020-10-21 12:21:09 +00:00
|
|
|
// UpdateSchema updates a possibly outdated database to the current schema and
|
|
|
|
// also does repairs where necessary.
|
2019-12-02 07:18:04 +00:00
|
|
|
func UpdateSchema(db *Lowlevel) error {
|
|
|
|
updater := &schemaUpdater{db}
|
2018-10-10 09:34:24 +00:00
|
|
|
return updater.updateSchema()
|
|
|
|
}
|
|
|
|
|
|
|
|
type schemaUpdater struct {
|
2019-12-02 07:18:04 +00:00
|
|
|
*Lowlevel
|
2018-10-10 09:34:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (db *schemaUpdater) updateSchema() error {
|
2020-02-29 18:51:32 +00:00
|
|
|
// Updating the schema can touch any and all parts of the database. Make
|
|
|
|
// sure we do not run GC concurrently with schema migrations.
|
|
|
|
db.gcMut.Lock()
|
|
|
|
defer db.gcMut.Unlock()
|
|
|
|
|
2018-10-10 09:34:24 +00:00
|
|
|
miscDB := NewMiscDataNamespace(db.Lowlevel)
|
2019-11-30 12:03:24 +00:00
|
|
|
prevVersion, _, err := miscDB.Int64("dbVersion")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2018-06-08 10:46:00 +00:00
|
|
|
|
refactor: use modern Protobuf encoder (#9817)
At a high level, this is what I've done and why:
- I'm moving the protobuf generation for the `protocol`, `discovery` and
`db` packages to the modern alternatives, and using `buf` to generate
because it's nice and simple.
- After trying various approaches on how to integrate the new types with
the existing code, I opted for splitting off our own data model types
from the on-the-wire generated types. This means we can have a
`FileInfo` type with nicer ergonomics and lots of methods, while the
protobuf generated type stays clean and close to the wire protocol. It
does mean copying between the two when required, which certainly adds a
small amount of inefficiency. If we want to walk this back in the future
and use the raw generated type throughout, that's possible, this however
makes the refactor smaller (!) as it doesn't change everything about the
type for everyone at the same time.
- I have simply removed in cold blood a significant number of old
database migrations. These depended on previous generations of generated
messages of various kinds and were annoying to support in the new
fashion. The oldest supported database version now is the one from
Syncthing 1.9.0 from Sep 7, 2020.
- I changed config structs to be regular manually defined structs.
For the sake of discussion, some things I tried that turned out not to
work...
### Embedding / wrapping
Embedding the protobuf generated structs in our existing types as a data
container and keeping our methods and stuff:
```
package protocol
type FileInfo struct {
*generated.FileInfo
}
```
This generates a lot of problems because the internal shape of the
generated struct is quite different (different names, different types,
more pointers), because initializing it doesn't work like you'd expect
(i.e., you end up with an embedded nil pointer and a panic), and because
the types of child types don't get wrapped. That is, even if we also
have a similar wrapper around a `Vector`, that's not the type you get
when accessing `someFileInfo.Version`, you get the `*generated.Vector`
that doesn't have methods, etc.
### Aliasing
```
package protocol
type FileInfo = generated.FileInfo
```
Doesn't help because you can't attach methods to it, plus all the above.
### Generating the types into the target package like we do now and
attaching methods
This fails because of the different shape of the generated type (as in
the embedding case above) plus the generated struct already has a bunch
of methods that we can't necessarily override properly (like `String()`
and a bunch of getters).
### Methods to functions
I considered just moving all the methods we attach to functions in a
specific package, so that for example
```
package protocol
func (f FileInfo) Equal(other FileInfo) bool
```
would become
```
package fileinfos
func Equal(a, b *generated.FileInfo) bool
```
and this would mostly work, but becomes quite verbose and cumbersome,
and somewhat limits discoverability (you can't see what methods are
available on the type in auto completions, etc). In the end I did this
in some cases, like in the database layer where a lot of things like
`func (fv *FileVersion) IsEmpty() bool` becomes `func fvIsEmpty(fv
*generated.FileVersion)` because they were anyway just internal methods.
Fixes #8247
2024-12-01 15:50:17 +00:00
|
|
|
if prevVersion > 0 && prevVersion < 14 {
|
|
|
|
// This is a database version that is too old to be upgraded directly.
|
|
|
|
// The user will have to upgrade to an older version first.
|
|
|
|
return fmt.Errorf("database version %d is too old to be upgraded directly; step via Syncthing v1.27.0 to upgrade", prevVersion)
|
|
|
|
}
|
|
|
|
|
2018-06-26 09:40:34 +00:00
|
|
|
if prevVersion > dbVersion {
|
2020-06-16 07:27:34 +00:00
|
|
|
err := &databaseDowngradeError{}
|
2019-11-30 12:03:24 +00:00
|
|
|
if minSyncthingVersion, ok, dbErr := miscDB.String("dbMinSyncthingVersion"); dbErr != nil {
|
|
|
|
return dbErr
|
|
|
|
} else if ok {
|
2018-06-26 09:40:34 +00:00
|
|
|
err.minSyncthingVersion = minSyncthingVersion
|
|
|
|
}
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-10-21 12:21:09 +00:00
|
|
|
prevMigration, _, err := miscDB.Int64("dbMigrationVersion")
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
// Cover versions before adding `dbMigrationVersion` (== 0) and possible future weirdness.
|
|
|
|
if prevMigration < prevVersion {
|
|
|
|
prevMigration = prevVersion
|
|
|
|
}
|
|
|
|
|
|
|
|
if prevVersion == dbVersion && prevMigration >= dbMigrationVersion {
|
2018-06-26 09:40:34 +00:00
|
|
|
return nil
|
2018-06-08 10:46:00 +00:00
|
|
|
}
|
|
|
|
|
2020-11-23 17:31:32 +00:00
|
|
|
migrations := []migration{
|
|
|
|
{14, 14, "v1.9.0", db.updateSchemaTo14},
|
2021-03-15 06:58:01 +00:00
|
|
|
{14, 16, "v1.9.0", db.checkRepairMigration},
|
2021-04-29 20:01:46 +00:00
|
|
|
{14, 17, "v1.9.0", db.migration17},
|
2024-04-08 09:14:27 +00:00
|
|
|
{14, 19, "v1.9.0", db.dropAllIndexIDsMigration},
|
|
|
|
{14, 20, "v1.9.0", db.dropOutgoingIndexIDsMigration},
|
2018-06-24 07:50:18 +00:00
|
|
|
}
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
|
|
|
|
for _, m := range migrations {
|
2020-10-21 12:21:09 +00:00
|
|
|
if prevMigration < m.migrationVersion {
|
|
|
|
l.Infof("Running database migration %d...", m.migrationVersion)
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
if err := m.migration(int(prevVersion)); err != nil {
|
2020-10-21 12:21:09 +00:00
|
|
|
return fmt.Errorf("failed to do migration %v: %w", m.migrationVersion, err)
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
}
|
2020-11-23 17:31:32 +00:00
|
|
|
if err := db.writeVersions(m, miscDB); err != nil {
|
|
|
|
return fmt.Errorf("failed to write versions after migration %v: %w", m.migrationVersion, err)
|
|
|
|
}
|
2019-11-29 08:11:52 +00:00
|
|
|
}
|
2018-10-30 04:40:51 +00:00
|
|
|
}
|
2018-06-08 10:46:00 +00:00
|
|
|
|
2020-11-23 17:31:32 +00:00
|
|
|
if err := db.writeVersions(migration{
|
|
|
|
schemaVersion: dbVersion,
|
|
|
|
migrationVersion: dbMigrationVersion,
|
|
|
|
minSyncthingVersion: dbMinSyncthingVersion,
|
|
|
|
}, miscDB); err != nil {
|
|
|
|
return fmt.Errorf("failed to write versions after migrations: %w", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
l.Infoln("Compacting database after migration...")
|
|
|
|
return db.Compact()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (*schemaUpdater) writeVersions(m migration, miscDB *NamespacedKV) error {
|
2024-04-08 09:14:27 +00:00
|
|
|
if err := miscDB.PutInt64("dbVersion", m.schemaVersion); err != nil {
|
2019-11-29 08:11:52 +00:00
|
|
|
return err
|
|
|
|
}
|
2024-04-08 09:14:27 +00:00
|
|
|
if err := miscDB.PutString("dbMinSyncthingVersion", m.minSyncthingVersion); err != nil {
|
2019-11-29 08:11:52 +00:00
|
|
|
return err
|
|
|
|
}
|
2024-04-08 09:14:27 +00:00
|
|
|
if err := miscDB.PutInt64("dbMigrationVersion", m.migrationVersion); err != nil {
|
2020-10-21 12:21:09 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-11-23 17:31:32 +00:00
|
|
|
return nil
|
2018-06-08 10:46:00 +00:00
|
|
|
}
|
|
|
|
|
2020-08-18 07:20:12 +00:00
|
|
|
func (db *schemaUpdater) updateSchemaTo14(_ int) error {
|
|
|
|
// Checks for missing blocks and marks those entries as requiring a
|
|
|
|
// rehash/being invalid. The db is checked/repaired afterwards, i.e.
|
|
|
|
// no care is taken to get metadata and sequences right.
|
|
|
|
// If the corresponding files changed on disk compared to the global
|
|
|
|
// version, this will cause a conflict.
|
|
|
|
|
|
|
|
var key, gk []byte
|
|
|
|
for _, folderStr := range db.ListFolders() {
|
|
|
|
folder := []byte(folderStr)
|
2020-12-21 11:59:22 +00:00
|
|
|
meta := newMetadataTracker(db.keyer, db.evLogger)
|
2020-08-18 07:20:12 +00:00
|
|
|
meta.counts.Created = 0 // Recalculate metadata afterwards
|
|
|
|
|
|
|
|
t, err := db.newReadWriteTransaction(meta.CommitHook(folder))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
key, err = t.keyer.GenerateDeviceFileKey(key, folder, protocol.LocalDeviceID[:], nil)
|
2021-03-17 20:41:07 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-08-18 07:20:12 +00:00
|
|
|
it, err := t.NewPrefixIterator(key)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer it.Release()
|
|
|
|
for it.Next() {
|
refactor: use modern Protobuf encoder (#9817)
At a high level, this is what I've done and why:
- I'm moving the protobuf generation for the `protocol`, `discovery` and
`db` packages to the modern alternatives, and using `buf` to generate
because it's nice and simple.
- After trying various approaches on how to integrate the new types with
the existing code, I opted for splitting off our own data model types
from the on-the-wire generated types. This means we can have a
`FileInfo` type with nicer ergonomics and lots of methods, while the
protobuf generated type stays clean and close to the wire protocol. It
does mean copying between the two when required, which certainly adds a
small amount of inefficiency. If we want to walk this back in the future
and use the raw generated type throughout, that's possible, this however
makes the refactor smaller (!) as it doesn't change everything about the
type for everyone at the same time.
- I have simply removed in cold blood a significant number of old
database migrations. These depended on previous generations of generated
messages of various kinds and were annoying to support in the new
fashion. The oldest supported database version now is the one from
Syncthing 1.9.0 from Sep 7, 2020.
- I changed config structs to be regular manually defined structs.
For the sake of discussion, some things I tried that turned out not to
work...
### Embedding / wrapping
Embedding the protobuf generated structs in our existing types as a data
container and keeping our methods and stuff:
```
package protocol
type FileInfo struct {
*generated.FileInfo
}
```
This generates a lot of problems because the internal shape of the
generated struct is quite different (different names, different types,
more pointers), because initializing it doesn't work like you'd expect
(i.e., you end up with an embedded nil pointer and a panic), and because
the types of child types don't get wrapped. That is, even if we also
have a similar wrapper around a `Vector`, that's not the type you get
when accessing `someFileInfo.Version`, you get the `*generated.Vector`
that doesn't have methods, etc.
### Aliasing
```
package protocol
type FileInfo = generated.FileInfo
```
Doesn't help because you can't attach methods to it, plus all the above.
### Generating the types into the target package like we do now and
attaching methods
This fails because of the different shape of the generated type (as in
the embedding case above) plus the generated struct already has a bunch
of methods that we can't necessarily override properly (like `String()`
and a bunch of getters).
### Methods to functions
I considered just moving all the methods we attach to functions in a
specific package, so that for example
```
package protocol
func (f FileInfo) Equal(other FileInfo) bool
```
would become
```
package fileinfos
func Equal(a, b *generated.FileInfo) bool
```
and this would mostly work, but becomes quite verbose and cumbersome,
and somewhat limits discoverability (you can't see what methods are
available on the type in auto completions, etc). In the end I did this
in some cases, like in the database layer where a lot of things like
`func (fv *FileVersion) IsEmpty() bool` becomes `func fvIsEmpty(fv
*generated.FileVersion)` because they were anyway just internal methods.
Fixes #8247
2024-12-01 15:50:17 +00:00
|
|
|
var bepf bep.FileInfo
|
|
|
|
if err := proto.Unmarshal(it.Value(), &bepf); err != nil {
|
2020-08-18 07:20:12 +00:00
|
|
|
return err
|
|
|
|
}
|
refactor: use modern Protobuf encoder (#9817)
At a high level, this is what I've done and why:
- I'm moving the protobuf generation for the `protocol`, `discovery` and
`db` packages to the modern alternatives, and using `buf` to generate
because it's nice and simple.
- After trying various approaches on how to integrate the new types with
the existing code, I opted for splitting off our own data model types
from the on-the-wire generated types. This means we can have a
`FileInfo` type with nicer ergonomics and lots of methods, while the
protobuf generated type stays clean and close to the wire protocol. It
does mean copying between the two when required, which certainly adds a
small amount of inefficiency. If we want to walk this back in the future
and use the raw generated type throughout, that's possible, this however
makes the refactor smaller (!) as it doesn't change everything about the
type for everyone at the same time.
- I have simply removed in cold blood a significant number of old
database migrations. These depended on previous generations of generated
messages of various kinds and were annoying to support in the new
fashion. The oldest supported database version now is the one from
Syncthing 1.9.0 from Sep 7, 2020.
- I changed config structs to be regular manually defined structs.
For the sake of discussion, some things I tried that turned out not to
work...
### Embedding / wrapping
Embedding the protobuf generated structs in our existing types as a data
container and keeping our methods and stuff:
```
package protocol
type FileInfo struct {
*generated.FileInfo
}
```
This generates a lot of problems because the internal shape of the
generated struct is quite different (different names, different types,
more pointers), because initializing it doesn't work like you'd expect
(i.e., you end up with an embedded nil pointer and a panic), and because
the types of child types don't get wrapped. That is, even if we also
have a similar wrapper around a `Vector`, that's not the type you get
when accessing `someFileInfo.Version`, you get the `*generated.Vector`
that doesn't have methods, etc.
### Aliasing
```
package protocol
type FileInfo = generated.FileInfo
```
Doesn't help because you can't attach methods to it, plus all the above.
### Generating the types into the target package like we do now and
attaching methods
This fails because of the different shape of the generated type (as in
the embedding case above) plus the generated struct already has a bunch
of methods that we can't necessarily override properly (like `String()`
and a bunch of getters).
### Methods to functions
I considered just moving all the methods we attach to functions in a
specific package, so that for example
```
package protocol
func (f FileInfo) Equal(other FileInfo) bool
```
would become
```
package fileinfos
func Equal(a, b *generated.FileInfo) bool
```
and this would mostly work, but becomes quite verbose and cumbersome,
and somewhat limits discoverability (you can't see what methods are
available on the type in auto completions, etc). In the end I did this
in some cases, like in the database layer where a lot of things like
`func (fv *FileVersion) IsEmpty() bool` becomes `func fvIsEmpty(fv
*generated.FileVersion)` because they were anyway just internal methods.
Fixes #8247
2024-12-01 15:50:17 +00:00
|
|
|
fi := protocol.FileInfoFromDB(&bepf)
|
2020-08-18 07:20:12 +00:00
|
|
|
if len(fi.Blocks) > 0 || len(fi.BlocksHash) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
key = t.keyer.GenerateBlockListKey(key, fi.BlocksHash)
|
|
|
|
_, err := t.Get(key)
|
|
|
|
if err == nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2021-02-08 14:30:39 +00:00
|
|
|
fi.SetMustRescan()
|
2020-08-18 07:20:12 +00:00
|
|
|
if err = t.putFile(it.Key(), fi); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
gk, err = t.keyer.GenerateGlobalVersionKey(gk, folder, []byte(fi.Name))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2021-09-07 22:11:16 +00:00
|
|
|
key, err = t.updateGlobal(gk, key, folder, protocol.LocalDeviceID[:], fi, meta)
|
2020-08-18 07:20:12 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
it.Release()
|
|
|
|
|
|
|
|
if err = t.Commit(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
t.close()
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-03-15 06:58:01 +00:00
|
|
|
func (db *schemaUpdater) checkRepairMigration(_ int) error {
|
|
|
|
for _, folder := range db.ListFolders() {
|
|
|
|
_, err := db.getMetaAndCheckGCLocked(folder)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2021-04-29 20:01:46 +00:00
|
|
|
// migration17 finds all files that were pulled as invalid from an invalid
|
|
|
|
// global and make sure they get scanned/pulled again.
|
|
|
|
func (db *schemaUpdater) migration17(prev int) error {
|
|
|
|
if prev < 16 {
|
|
|
|
// Issue was introduced in migration to 16
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
t, err := db.newReadOnlyTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
for _, folderStr := range db.ListFolders() {
|
|
|
|
folder := []byte(folderStr)
|
|
|
|
meta, err := db.loadMetadataTracker(folderStr)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
batch := NewFileInfoBatch(func(fs []protocol.FileInfo) error {
|
|
|
|
return db.updateLocalFiles(folder, fs, meta)
|
|
|
|
})
|
|
|
|
var innerErr error
|
refactor: use modern Protobuf encoder (#9817)
At a high level, this is what I've done and why:
- I'm moving the protobuf generation for the `protocol`, `discovery` and
`db` packages to the modern alternatives, and using `buf` to generate
because it's nice and simple.
- After trying various approaches on how to integrate the new types with
the existing code, I opted for splitting off our own data model types
from the on-the-wire generated types. This means we can have a
`FileInfo` type with nicer ergonomics and lots of methods, while the
protobuf generated type stays clean and close to the wire protocol. It
does mean copying between the two when required, which certainly adds a
small amount of inefficiency. If we want to walk this back in the future
and use the raw generated type throughout, that's possible, this however
makes the refactor smaller (!) as it doesn't change everything about the
type for everyone at the same time.
- I have simply removed in cold blood a significant number of old
database migrations. These depended on previous generations of generated
messages of various kinds and were annoying to support in the new
fashion. The oldest supported database version now is the one from
Syncthing 1.9.0 from Sep 7, 2020.
- I changed config structs to be regular manually defined structs.
For the sake of discussion, some things I tried that turned out not to
work...
### Embedding / wrapping
Embedding the protobuf generated structs in our existing types as a data
container and keeping our methods and stuff:
```
package protocol
type FileInfo struct {
*generated.FileInfo
}
```
This generates a lot of problems because the internal shape of the
generated struct is quite different (different names, different types,
more pointers), because initializing it doesn't work like you'd expect
(i.e., you end up with an embedded nil pointer and a panic), and because
the types of child types don't get wrapped. That is, even if we also
have a similar wrapper around a `Vector`, that's not the type you get
when accessing `someFileInfo.Version`, you get the `*generated.Vector`
that doesn't have methods, etc.
### Aliasing
```
package protocol
type FileInfo = generated.FileInfo
```
Doesn't help because you can't attach methods to it, plus all the above.
### Generating the types into the target package like we do now and
attaching methods
This fails because of the different shape of the generated type (as in
the embedding case above) plus the generated struct already has a bunch
of methods that we can't necessarily override properly (like `String()`
and a bunch of getters).
### Methods to functions
I considered just moving all the methods we attach to functions in a
specific package, so that for example
```
package protocol
func (f FileInfo) Equal(other FileInfo) bool
```
would become
```
package fileinfos
func Equal(a, b *generated.FileInfo) bool
```
and this would mostly work, but becomes quite verbose and cumbersome,
and somewhat limits discoverability (you can't see what methods are
available on the type in auto completions, etc). In the end I did this
in some cases, like in the database layer where a lot of things like
`func (fv *FileVersion) IsEmpty() bool` becomes `func fvIsEmpty(fv
*generated.FileVersion)` because they were anyway just internal methods.
Fixes #8247
2024-12-01 15:50:17 +00:00
|
|
|
err = t.withHave(folder, protocol.LocalDeviceID[:], nil, false, func(fi protocol.FileInfo) bool {
|
2021-04-29 20:01:46 +00:00
|
|
|
if fi.IsInvalid() && fi.FileLocalFlags() == 0 {
|
refactor: use modern Protobuf encoder (#9817)
At a high level, this is what I've done and why:
- I'm moving the protobuf generation for the `protocol`, `discovery` and
`db` packages to the modern alternatives, and using `buf` to generate
because it's nice and simple.
- After trying various approaches on how to integrate the new types with
the existing code, I opted for splitting off our own data model types
from the on-the-wire generated types. This means we can have a
`FileInfo` type with nicer ergonomics and lots of methods, while the
protobuf generated type stays clean and close to the wire protocol. It
does mean copying between the two when required, which certainly adds a
small amount of inefficiency. If we want to walk this back in the future
and use the raw generated type throughout, that's possible, this however
makes the refactor smaller (!) as it doesn't change everything about the
type for everyone at the same time.
- I have simply removed in cold blood a significant number of old
database migrations. These depended on previous generations of generated
messages of various kinds and were annoying to support in the new
fashion. The oldest supported database version now is the one from
Syncthing 1.9.0 from Sep 7, 2020.
- I changed config structs to be regular manually defined structs.
For the sake of discussion, some things I tried that turned out not to
work...
### Embedding / wrapping
Embedding the protobuf generated structs in our existing types as a data
container and keeping our methods and stuff:
```
package protocol
type FileInfo struct {
*generated.FileInfo
}
```
This generates a lot of problems because the internal shape of the
generated struct is quite different (different names, different types,
more pointers), because initializing it doesn't work like you'd expect
(i.e., you end up with an embedded nil pointer and a panic), and because
the types of child types don't get wrapped. That is, even if we also
have a similar wrapper around a `Vector`, that's not the type you get
when accessing `someFileInfo.Version`, you get the `*generated.Vector`
that doesn't have methods, etc.
### Aliasing
```
package protocol
type FileInfo = generated.FileInfo
```
Doesn't help because you can't attach methods to it, plus all the above.
### Generating the types into the target package like we do now and
attaching methods
This fails because of the different shape of the generated type (as in
the embedding case above) plus the generated struct already has a bunch
of methods that we can't necessarily override properly (like `String()`
and a bunch of getters).
### Methods to functions
I considered just moving all the methods we attach to functions in a
specific package, so that for example
```
package protocol
func (f FileInfo) Equal(other FileInfo) bool
```
would become
```
package fileinfos
func Equal(a, b *generated.FileInfo) bool
```
and this would mostly work, but becomes quite verbose and cumbersome,
and somewhat limits discoverability (you can't see what methods are
available on the type in auto completions, etc). In the end I did this
in some cases, like in the database layer where a lot of things like
`func (fv *FileVersion) IsEmpty() bool` becomes `func fvIsEmpty(fv
*generated.FileVersion)` because they were anyway just internal methods.
Fixes #8247
2024-12-01 15:50:17 +00:00
|
|
|
fi.SetMustRescan()
|
|
|
|
fi.Version = protocol.Vector{}
|
|
|
|
batch.Append(fi)
|
2021-04-29 20:01:46 +00:00
|
|
|
innerErr = batch.FlushIfFull()
|
|
|
|
return innerErr == nil
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
if innerErr != nil {
|
|
|
|
return innerErr
|
|
|
|
}
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := batch.Flush(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2024-04-08 09:14:27 +00:00
|
|
|
func (db *schemaUpdater) dropAllIndexIDsMigration(_ int) error {
|
2021-05-15 09:13:39 +00:00
|
|
|
return db.dropIndexIDs()
|
|
|
|
}
|
|
|
|
|
2024-04-08 09:14:27 +00:00
|
|
|
func (db *schemaUpdater) dropOutgoingIndexIDsMigration(_ int) error {
|
|
|
|
return db.dropOtherDeviceIndexIDs()
|
|
|
|
}
|