2019-12-02 07:18:04 +00:00
|
|
|
// Copyright (C) 2014 The Syncthing Authors.
|
2018-10-10 09:34:24 +00:00
|
|
|
//
|
|
|
|
// This Source Code Form is subject to the terms of the Mozilla Public
|
|
|
|
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
|
|
// You can obtain one at https://mozilla.org/MPL/2.0/.
|
|
|
|
|
|
|
|
package db
|
|
|
|
|
|
|
|
import (
|
2019-12-02 07:18:04 +00:00
|
|
|
"bytes"
|
2020-04-12 08:26:57 +00:00
|
|
|
"context"
|
2019-12-02 07:18:04 +00:00
|
|
|
"encoding/binary"
|
2020-12-21 11:59:22 +00:00
|
|
|
"errors"
|
2020-10-06 18:14:09 +00:00
|
|
|
"fmt"
|
2020-07-11 07:36:09 +00:00
|
|
|
"io"
|
2020-09-10 08:54:41 +00:00
|
|
|
"os"
|
2020-10-08 15:37:45 +00:00
|
|
|
"regexp"
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
"time"
|
2019-12-02 07:18:04 +00:00
|
|
|
|
2020-07-11 07:36:09 +00:00
|
|
|
"github.com/dchest/siphash"
|
2020-04-20 07:02:33 +00:00
|
|
|
"github.com/greatroar/blobloom"
|
2019-11-29 08:11:52 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/db/backend"
|
2020-12-21 11:59:22 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/events"
|
2020-09-10 08:54:41 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/fs"
|
2019-12-02 07:18:04 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/protocol"
|
2020-07-11 07:36:09 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/rand"
|
2020-05-13 12:28:42 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/sha256"
|
2020-12-22 19:17:14 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/svcutil"
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/sync"
|
2020-04-12 08:26:57 +00:00
|
|
|
"github.com/syncthing/syncthing/lib/util"
|
2020-11-17 12:19:04 +00:00
|
|
|
"github.com/thejerf/suture/v4"
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2020-02-27 10:19:21 +00:00
|
|
|
// We set the bloom filter capacity to handle 100k individual items with
|
|
|
|
// a false positive probability of 1% for the first pass. Once we know
|
|
|
|
// how many items we have we will use that number instead, if it's more
|
|
|
|
// than 100k. For fewer than 100k items we will just get better false
|
|
|
|
// positive rate instead.
|
|
|
|
indirectGCBloomCapacity = 100000
|
2020-04-20 07:02:33 +00:00
|
|
|
indirectGCBloomFalsePositiveRate = 0.01 // 1%
|
|
|
|
indirectGCBloomMaxBytes = 32 << 20 // Use at most 32MiB memory, which covers our desired FP rate at 27 M items
|
2020-02-27 10:19:21 +00:00
|
|
|
indirectGCDefaultInterval = 13 * time.Hour
|
|
|
|
indirectGCTimeKey = "lastIndirectGCTime"
|
|
|
|
|
|
|
|
// Use indirection for the block list when it exceeds this many entries
|
|
|
|
blocksIndirectionCutoff = 3
|
2020-05-13 12:28:42 +00:00
|
|
|
// Use indirection for the version vector when it exceeds this many entries
|
|
|
|
versionIndirectionCutoff = 10
|
2020-01-26 14:13:28 +00:00
|
|
|
|
2020-03-24 12:53:20 +00:00
|
|
|
recheckDefaultInterval = 30 * 24 * time.Hour
|
2020-09-10 08:54:41 +00:00
|
|
|
|
|
|
|
needsRepairSuffix = ".needsrepair"
|
2020-03-24 12:53:20 +00:00
|
|
|
)
|
2020-01-26 14:13:28 +00:00
|
|
|
|
2018-10-10 09:34:24 +00:00
|
|
|
// Lowlevel is the lowest level database interface. It has a very simple
|
2019-11-29 08:11:52 +00:00
|
|
|
// purpose: hold the actual backend database, and the in-memory state
|
2018-10-10 09:34:24 +00:00
|
|
|
// that belong to that database. In the same way that a single on disk
|
|
|
|
// database can only be opened once, there should be only one Lowlevel for
|
2019-11-29 08:11:52 +00:00
|
|
|
// any given backend.
|
2018-10-10 09:34:24 +00:00
|
|
|
type Lowlevel struct {
|
2020-04-12 08:26:57 +00:00
|
|
|
*suture.Supervisor
|
2019-11-29 08:11:52 +00:00
|
|
|
backend.Backend
|
2020-03-24 12:53:20 +00:00
|
|
|
folderIdx *smallIndex
|
|
|
|
deviceIdx *smallIndex
|
|
|
|
keyer keyer
|
|
|
|
gcMut sync.RWMutex
|
|
|
|
gcKeyCount int
|
|
|
|
indirectGCInterval time.Duration
|
|
|
|
recheckInterval time.Duration
|
2020-12-21 10:32:59 +00:00
|
|
|
oneFileSetCreated chan struct{}
|
2020-12-21 11:59:22 +00:00
|
|
|
evLogger events.Logger
|
2019-08-20 07:41:41 +00:00
|
|
|
}
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
func NewLowlevel(backend backend.Backend, evLogger events.Logger, opts ...Option) (*Lowlevel, error) {
|
2020-11-17 12:19:04 +00:00
|
|
|
// Only log restarts in debug mode.
|
2020-12-22 19:17:14 +00:00
|
|
|
spec := svcutil.SpecWithDebugLogger(l)
|
2019-12-02 07:18:04 +00:00
|
|
|
db := &Lowlevel{
|
2020-11-17 12:19:04 +00:00
|
|
|
Supervisor: suture.New("db.Lowlevel", spec),
|
2020-03-24 12:53:20 +00:00
|
|
|
Backend: backend,
|
|
|
|
folderIdx: newSmallIndex(backend, []byte{KeyTypeFolderIdx}),
|
|
|
|
deviceIdx: newSmallIndex(backend, []byte{KeyTypeDeviceIdx}),
|
|
|
|
gcMut: sync.NewRWMutex(),
|
|
|
|
indirectGCInterval: indirectGCDefaultInterval,
|
|
|
|
recheckInterval: recheckDefaultInterval,
|
2020-12-21 10:32:59 +00:00
|
|
|
oneFileSetCreated: make(chan struct{}),
|
2020-12-21 11:59:22 +00:00
|
|
|
evLogger: evLogger,
|
2020-03-24 12:53:20 +00:00
|
|
|
}
|
|
|
|
for _, opt := range opts {
|
|
|
|
opt(db)
|
2019-02-14 23:15:13 +00:00
|
|
|
}
|
2019-12-02 07:18:04 +00:00
|
|
|
db.keyer = newDefaultKeyer(db.folderIdx, db.deviceIdx)
|
2020-12-22 19:17:14 +00:00
|
|
|
db.Add(svcutil.AsService(db.gcRunner, "db.Lowlevel/gcRunner"))
|
2020-09-10 08:54:41 +00:00
|
|
|
if path := db.needsRepairPath(); path != "" {
|
|
|
|
if _, err := os.Lstat(path); err == nil {
|
|
|
|
l.Infoln("Database was marked for repair - this may take a while")
|
2020-12-21 11:59:22 +00:00
|
|
|
if err := db.checkRepair(); err != nil {
|
|
|
|
db.handleFailure(err)
|
|
|
|
return nil, err
|
|
|
|
}
|
2020-09-10 08:54:41 +00:00
|
|
|
os.Remove(path)
|
|
|
|
}
|
|
|
|
}
|
2020-12-21 11:59:22 +00:00
|
|
|
return db, nil
|
2019-02-14 23:15:13 +00:00
|
|
|
}
|
2019-05-02 09:15:00 +00:00
|
|
|
|
2020-03-24 12:53:20 +00:00
|
|
|
type Option func(*Lowlevel)
|
|
|
|
|
|
|
|
// WithRecheckInterval sets the time interval in between metadata recalculations
|
|
|
|
// and consistency checks.
|
|
|
|
func WithRecheckInterval(dur time.Duration) Option {
|
|
|
|
return func(db *Lowlevel) {
|
|
|
|
if dur > 0 {
|
|
|
|
db.recheckInterval = dur
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// WithIndirectGCInterval sets the time interval in between GC runs.
|
|
|
|
func WithIndirectGCInterval(dur time.Duration) Option {
|
|
|
|
return func(db *Lowlevel) {
|
|
|
|
if dur > 0 {
|
|
|
|
db.indirectGCInterval = dur
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-11-29 08:11:52 +00:00
|
|
|
// ListFolders returns the list of folders currently in the database
|
|
|
|
func (db *Lowlevel) ListFolders() []string {
|
|
|
|
return db.folderIdx.Values()
|
2019-07-26 20:18:42 +00:00
|
|
|
}
|
2019-12-02 07:18:04 +00:00
|
|
|
|
|
|
|
// updateRemoteFiles adds a list of fileinfos to the database and updates the
|
|
|
|
// global versionlist and metadata.
|
|
|
|
func (db *Lowlevel) updateRemoteFiles(folder, device []byte, fs []protocol.FileInfo, meta *metadataTracker) error {
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
db.gcMut.RLock()
|
|
|
|
defer db.gcMut.RUnlock()
|
|
|
|
|
2020-07-19 06:55:27 +00:00
|
|
|
t, err := db.newReadWriteTransaction(meta.CommitHook(folder))
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
var dk, gk, keyBuf []byte
|
2020-06-07 08:31:12 +00:00
|
|
|
devID, err := protocol.DeviceIDFromBytes(device)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-12-02 07:18:04 +00:00
|
|
|
for _, f := range fs {
|
|
|
|
name := []byte(f.Name)
|
|
|
|
dk, err = db.keyer.GenerateDeviceFileKey(dk, folder, device, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
ef, ok, err := t.getFileTrunc(dk, true)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if ok && unchanged(f, ef) {
|
2020-08-20 14:11:20 +00:00
|
|
|
l.Debugf("not inserting unchanged (remote); folder=%q device=%v %v", folder, devID, f)
|
2019-12-02 07:18:04 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
meta.removeFile(devID, ef)
|
|
|
|
}
|
|
|
|
meta.addFile(devID, f)
|
|
|
|
|
2020-08-20 14:11:20 +00:00
|
|
|
l.Debugf("insert (remote); folder=%q device=%v %v", folder, devID, f)
|
2020-08-18 07:20:12 +00:00
|
|
|
if err := t.putFile(dk, f); err != nil {
|
2019-12-02 07:18:04 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
gk, err = db.keyer.GenerateGlobalVersionKey(gk, folder, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
keyBuf, _, err = t.updateGlobal(gk, keyBuf, folder, device, f, meta)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-07-19 06:55:27 +00:00
|
|
|
if err := t.Checkpoint(); err != nil {
|
2019-12-02 07:18:04 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-12 10:59:55 +00:00
|
|
|
return t.Commit()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// updateLocalFiles adds fileinfos to the db, and updates the global versionlist,
|
|
|
|
// metadata, sequence and blockmap buckets.
|
|
|
|
func (db *Lowlevel) updateLocalFiles(folder []byte, fs []protocol.FileInfo, meta *metadataTracker) error {
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
db.gcMut.RLock()
|
|
|
|
defer db.gcMut.RUnlock()
|
|
|
|
|
2020-07-19 06:55:27 +00:00
|
|
|
t, err := db.newReadWriteTransaction(meta.CommitHook(folder))
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
var dk, gk, keyBuf []byte
|
|
|
|
blockBuf := make([]byte, 4)
|
|
|
|
for _, f := range fs {
|
|
|
|
name := []byte(f.Name)
|
|
|
|
dk, err = db.keyer.GenerateDeviceFileKey(dk, folder, protocol.LocalDeviceID[:], name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
ef, ok, err := t.getFileByKey(dk)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if ok && unchanged(f, ef) {
|
2020-08-20 14:11:20 +00:00
|
|
|
l.Debugf("not inserting unchanged (local); folder=%q %v", folder, f)
|
2019-12-02 07:18:04 +00:00
|
|
|
continue
|
|
|
|
}
|
2020-05-11 18:15:11 +00:00
|
|
|
blocksHashSame := ok && bytes.Equal(ef.BlocksHash, f.BlocksHash)
|
2019-12-02 07:18:04 +00:00
|
|
|
|
|
|
|
if ok {
|
2020-05-16 12:39:27 +00:00
|
|
|
if len(ef.Blocks) != 0 && !ef.IsInvalid() && ef.Size > 0 {
|
2019-12-02 07:18:04 +00:00
|
|
|
for _, block := range ef.Blocks {
|
|
|
|
keyBuf, err = db.keyer.GenerateBlockMapKey(keyBuf, folder, block.Hash, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.Delete(keyBuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2020-05-11 18:15:11 +00:00
|
|
|
if !blocksHashSame {
|
|
|
|
keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, ef.BlocksHash, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err = t.Delete(keyBuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
keyBuf, err = db.keyer.GenerateSequenceKey(keyBuf, folder, ef.SequenceNo())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.Delete(keyBuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
l.Debugf("removing sequence; folder=%q sequence=%v %v", folder, ef.SequenceNo(), ef.FileName())
|
|
|
|
}
|
|
|
|
|
|
|
|
f.Sequence = meta.nextLocalSeq()
|
|
|
|
|
|
|
|
if ok {
|
|
|
|
meta.removeFile(protocol.LocalDeviceID, ef)
|
|
|
|
}
|
|
|
|
meta.addFile(protocol.LocalDeviceID, f)
|
|
|
|
|
|
|
|
l.Debugf("insert (local); folder=%q %v", folder, f)
|
2020-08-18 07:20:12 +00:00
|
|
|
if err := t.putFile(dk, f); err != nil {
|
2019-12-02 07:18:04 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
gk, err = db.keyer.GenerateGlobalVersionKey(gk, folder, []byte(f.Name))
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
keyBuf, _, err = t.updateGlobal(gk, keyBuf, folder, protocol.LocalDeviceID[:], f, meta)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
keyBuf, err = db.keyer.GenerateSequenceKey(keyBuf, folder, f.Sequence)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.Put(keyBuf, dk); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
l.Debugf("adding sequence; folder=%q sequence=%v %v", folder, f.Sequence, f.Name)
|
|
|
|
|
2020-05-16 12:39:27 +00:00
|
|
|
if len(f.Blocks) != 0 && !f.IsInvalid() && f.Size > 0 {
|
2019-12-02 07:18:04 +00:00
|
|
|
for i, block := range f.Blocks {
|
|
|
|
binary.BigEndian.PutUint32(blockBuf, uint32(i))
|
|
|
|
keyBuf, err = db.keyer.GenerateBlockMapKey(keyBuf, folder, block.Hash, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.Put(keyBuf, blockBuf); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2020-05-11 18:15:11 +00:00
|
|
|
if !blocksHashSame {
|
|
|
|
keyBuf, err := db.keyer.GenerateBlockListMapKey(keyBuf, folder, f.BlocksHash, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err = t.Put(keyBuf, nil); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
2020-07-19 06:55:27 +00:00
|
|
|
if err := t.Checkpoint(); err != nil {
|
2019-12-02 07:18:04 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-12 10:59:55 +00:00
|
|
|
return t.Commit()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (db *Lowlevel) dropFolder(folder []byte) error {
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
db.gcMut.RLock()
|
|
|
|
defer db.gcMut.RUnlock()
|
|
|
|
|
2019-12-02 07:18:04 +00:00
|
|
|
t, err := db.newReadWriteTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
// Remove all items related to the given folder from the device->file bucket
|
|
|
|
k0, err := db.keyer.GenerateDeviceFileKey(nil, folder, nil, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(k0.WithoutNameAndDevice()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove all sequences related to the folder
|
2020-05-11 18:15:11 +00:00
|
|
|
k1, err := db.keyer.GenerateSequenceKey(k0, folder, 0)
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(k1.WithoutSequence()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove all items related to the given folder from the global bucket
|
2020-05-11 18:15:11 +00:00
|
|
|
k2, err := db.keyer.GenerateGlobalVersionKey(k1, folder, nil)
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(k2.WithoutName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove all needs related to the folder
|
2020-05-11 18:15:11 +00:00
|
|
|
k3, err := db.keyer.GenerateNeedFileKey(k2, folder, nil)
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(k3.WithoutName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Remove the blockmap of the folder
|
2020-05-11 18:15:11 +00:00
|
|
|
k4, err := db.keyer.GenerateBlockMapKey(k3, folder, nil, nil)
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(k4.WithoutHashAndName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-05-11 18:15:11 +00:00
|
|
|
k5, err := db.keyer.GenerateBlockListMapKey(k4, folder, nil, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(k5.WithoutHashAndName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-02-12 10:59:55 +00:00
|
|
|
return t.Commit()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (db *Lowlevel) dropDeviceFolder(device, folder []byte, meta *metadataTracker) error {
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
db.gcMut.RLock()
|
|
|
|
defer db.gcMut.RUnlock()
|
|
|
|
|
2020-07-28 14:46:42 +00:00
|
|
|
t, err := db.newReadWriteTransaction(meta.CommitHook(folder))
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
key, err := db.keyer.GenerateDeviceFileKey(nil, folder, device, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dbi, err := t.NewPrefixIterator(key)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-05-29 11:43:02 +00:00
|
|
|
defer dbi.Release()
|
|
|
|
|
2019-12-02 07:18:04 +00:00
|
|
|
var gk, keyBuf []byte
|
|
|
|
for dbi.Next() {
|
|
|
|
name := db.keyer.NameFromDeviceFileKey(dbi.Key())
|
|
|
|
gk, err = db.keyer.GenerateGlobalVersionKey(gk, folder, name)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
keyBuf, err = t.removeFromGlobal(gk, keyBuf, folder, device, name, meta)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.Delete(dbi.Key()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.Checkpoint(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
2020-05-29 11:43:02 +00:00
|
|
|
dbi.Release()
|
2019-12-02 07:18:04 +00:00
|
|
|
if err := dbi.Error(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
if bytes.Equal(device, protocol.LocalDeviceID[:]) {
|
|
|
|
key, err := db.keyer.GenerateBlockMapKey(nil, folder, nil, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(key.WithoutHashAndName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-05-11 18:15:11 +00:00
|
|
|
key2, err := db.keyer.GenerateBlockListMapKey(key, folder, nil, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
if err := t.deleteKeyPrefix(key2.WithoutHashAndName()); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
2020-02-12 10:59:55 +00:00
|
|
|
return t.Commit()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
2020-05-30 07:50:23 +00:00
|
|
|
func (db *Lowlevel) checkGlobals(folder []byte) error {
|
2019-12-02 07:18:04 +00:00
|
|
|
t, err := db.newReadWriteTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
key, err := db.keyer.GenerateGlobalVersionKey(nil, folder, nil)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
dbi, err := t.NewPrefixIterator(key.WithoutName())
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer dbi.Release()
|
|
|
|
|
|
|
|
var dk []byte
|
2020-05-30 07:50:23 +00:00
|
|
|
ro := t.readOnlyTransaction
|
2019-12-02 07:18:04 +00:00
|
|
|
for dbi.Next() {
|
2020-03-16 09:09:27 +00:00
|
|
|
var vl VersionList
|
2020-05-30 07:50:23 +00:00
|
|
|
if err := vl.Unmarshal(dbi.Value()); err != nil || vl.Empty() {
|
2020-10-19 06:28:53 +00:00
|
|
|
if err := t.Delete(dbi.Key()); err != nil && !backend.IsNotFound(err) {
|
2020-03-16 09:09:27 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-04-07 11:13:18 +00:00
|
|
|
continue
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Check the global version list for consistency. An issue in previous
|
|
|
|
// versions of goleveldb could result in reordered writes so that
|
|
|
|
// there are global entries pointing to no longer existing files. Here
|
|
|
|
// we find those and clear them out.
|
|
|
|
|
|
|
|
name := db.keyer.NameFromGlobalVersionKey(dbi.Key())
|
2020-05-30 07:50:23 +00:00
|
|
|
newVL := &VersionList{}
|
|
|
|
var changed, changedHere bool
|
|
|
|
for _, fv := range vl.RawVersions {
|
|
|
|
changedHere, err = checkGlobalsFilterDevices(dk, folder, name, fv.Devices, newVL, ro)
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-05-30 07:50:23 +00:00
|
|
|
changed = changed || changedHere
|
|
|
|
|
|
|
|
changedHere, err = checkGlobalsFilterDevices(dk, folder, name, fv.InvalidDevices, newVL, ro)
|
2019-12-02 07:18:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-05-30 07:50:23 +00:00
|
|
|
changed = changed || changedHere
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
2020-05-30 07:50:23 +00:00
|
|
|
if newVL.Empty() {
|
2020-10-19 06:28:53 +00:00
|
|
|
if err := t.Delete(dbi.Key()); err != nil && !backend.IsNotFound(err) {
|
2020-03-19 13:30:20 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-05-30 07:50:23 +00:00
|
|
|
} else if changed {
|
|
|
|
if err := t.Put(dbi.Key(), mustMarshal(newVL)); err != nil {
|
2019-12-02 07:18:04 +00:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-05-29 11:43:02 +00:00
|
|
|
dbi.Release()
|
2019-12-02 07:18:04 +00:00
|
|
|
if err := dbi.Error(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
l.Debugf("db check completed for %q", folder)
|
2020-02-12 10:59:55 +00:00
|
|
|
return t.Commit()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
2020-05-30 07:50:23 +00:00
|
|
|
func checkGlobalsFilterDevices(dk, folder, name []byte, devices [][]byte, vl *VersionList, t readOnlyTransaction) (bool, error) {
|
|
|
|
var changed bool
|
|
|
|
var err error
|
|
|
|
for _, device := range devices {
|
|
|
|
dk, err = t.keyer.GenerateDeviceFileKey(dk, folder, device, name)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
2020-07-28 14:25:07 +00:00
|
|
|
f, ok, err := t.getFileTrunc(dk, false)
|
2020-05-30 07:50:23 +00:00
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
if !ok {
|
|
|
|
changed = true
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
_, _, _, _, _, _, err = vl.update(folder, device, f, t)
|
|
|
|
if err != nil {
|
|
|
|
return false, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return changed, nil
|
|
|
|
}
|
|
|
|
|
2019-12-02 07:18:04 +00:00
|
|
|
func (db *Lowlevel) getIndexID(device, folder []byte) (protocol.IndexID, error) {
|
|
|
|
key, err := db.keyer.GenerateIndexIDKey(nil, device, folder)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
cur, err := db.Get(key)
|
|
|
|
if backend.IsNotFound(err) {
|
|
|
|
return 0, nil
|
|
|
|
} else if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
var id protocol.IndexID
|
|
|
|
if err := id.Unmarshal(cur); err != nil {
|
|
|
|
return 0, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
return id, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (db *Lowlevel) setIndexID(device, folder []byte, id protocol.IndexID) error {
|
|
|
|
bs, _ := id.Marshal() // marshalling can't fail
|
|
|
|
key, err := db.keyer.GenerateIndexIDKey(nil, device, folder)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return db.Put(key, bs)
|
|
|
|
}
|
|
|
|
|
2020-12-21 10:10:59 +00:00
|
|
|
func (db *Lowlevel) dropFolderIndexIDs(folder []byte) error {
|
|
|
|
t, err := db.newReadWriteTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
if err := t.deleteKeyPrefixMatching([]byte{KeyTypeIndexID}, func(key []byte) bool {
|
|
|
|
keyFolder, ok := t.keyer.FolderFromIndexIDKey(key)
|
|
|
|
if !ok {
|
|
|
|
l.Debugf("Deleting IndexID with missing FolderIdx: %v", key)
|
|
|
|
return true
|
|
|
|
}
|
|
|
|
return bytes.Equal(keyFolder, folder)
|
|
|
|
}); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return t.Commit()
|
|
|
|
}
|
|
|
|
|
2019-12-02 07:18:04 +00:00
|
|
|
func (db *Lowlevel) dropMtimes(folder []byte) error {
|
|
|
|
key, err := db.keyer.GenerateMtimesKey(nil, folder)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return db.dropPrefix(key)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (db *Lowlevel) dropFolderMeta(folder []byte) error {
|
|
|
|
key, err := db.keyer.GenerateFolderMetaKey(nil, folder)
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
return db.dropPrefix(key)
|
|
|
|
}
|
|
|
|
|
|
|
|
func (db *Lowlevel) dropPrefix(prefix []byte) error {
|
|
|
|
t, err := db.newReadWriteTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
if err := t.deleteKeyPrefix(prefix); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-12 10:59:55 +00:00
|
|
|
return t.Commit()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
2020-11-17 12:19:04 +00:00
|
|
|
func (db *Lowlevel) gcRunner(ctx context.Context) error {
|
2020-02-29 18:51:32 +00:00
|
|
|
// Calculate the time for the next GC run. Even if we should run GC
|
|
|
|
// directly, give the system a while to get up and running and do other
|
|
|
|
// stuff first. (We might have migrations and stuff which would be
|
|
|
|
// better off running before GC.)
|
2020-03-24 12:53:20 +00:00
|
|
|
next := db.timeUntil(indirectGCTimeKey, db.indirectGCInterval)
|
2020-02-29 18:51:32 +00:00
|
|
|
if next < time.Minute {
|
|
|
|
next = time.Minute
|
|
|
|
}
|
|
|
|
|
|
|
|
t := time.NewTimer(next)
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
defer t.Stop()
|
2020-02-29 18:51:32 +00:00
|
|
|
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
for {
|
|
|
|
select {
|
2020-04-12 08:26:57 +00:00
|
|
|
case <-ctx.Done():
|
2020-11-17 12:19:04 +00:00
|
|
|
return ctx.Err()
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
case <-t.C:
|
2020-04-12 08:26:57 +00:00
|
|
|
if err := db.gcIndirect(ctx); err != nil {
|
2020-02-27 10:19:21 +00:00
|
|
|
l.Warnln("Database indirection GC failed:", err)
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
}
|
2020-02-27 10:19:21 +00:00
|
|
|
db.recordTime(indirectGCTimeKey)
|
2020-03-24 12:53:20 +00:00
|
|
|
t.Reset(db.timeUntil(indirectGCTimeKey, db.indirectGCInterval))
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-26 14:13:28 +00:00
|
|
|
// recordTime records the current time under the given key, affecting the
|
|
|
|
// next call to timeUntil with the same key.
|
|
|
|
func (db *Lowlevel) recordTime(key string) {
|
|
|
|
miscDB := NewMiscDataNamespace(db)
|
|
|
|
_ = miscDB.PutInt64(key, time.Now().Unix()) // error wilfully ignored
|
|
|
|
}
|
|
|
|
|
|
|
|
// timeUntil returns how long we should wait until the next interval, or
|
|
|
|
// zero if it should happen directly.
|
|
|
|
func (db *Lowlevel) timeUntil(key string, every time.Duration) time.Duration {
|
|
|
|
miscDB := NewMiscDataNamespace(db)
|
|
|
|
lastTime, _, _ := miscDB.Int64(key) // error wilfully ignored
|
|
|
|
nextTime := time.Unix(lastTime, 0).Add(every)
|
|
|
|
sleepTime := time.Until(nextTime)
|
|
|
|
if sleepTime < 0 {
|
|
|
|
sleepTime = 0
|
|
|
|
}
|
|
|
|
return sleepTime
|
|
|
|
}
|
|
|
|
|
2020-04-12 08:26:57 +00:00
|
|
|
func (db *Lowlevel) gcIndirect(ctx context.Context) error {
|
2020-02-27 10:19:21 +00:00
|
|
|
// The indirection GC uses bloom filters to track used block lists and
|
|
|
|
// versions. This means iterating over all items, adding their hashes to
|
|
|
|
// the filter, then iterating over the indirected items and removing
|
|
|
|
// those that don't match the filter. The filter will give false
|
|
|
|
// positives so we will keep around one percent of things that we don't
|
|
|
|
// really need (at most).
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
//
|
2020-02-27 10:19:21 +00:00
|
|
|
// Indirection GC needs to run when there are no modifications to the
|
|
|
|
// FileInfos or indirected items.
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
|
|
|
|
db.gcMut.Lock()
|
|
|
|
defer db.gcMut.Unlock()
|
|
|
|
|
|
|
|
t, err := db.newReadWriteTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
defer t.Release()
|
|
|
|
|
2020-02-27 10:19:21 +00:00
|
|
|
// Set up the bloom filters with the initial capacity and false positive
|
|
|
|
// rate, or higher capacity if we've done this before and seen lots of
|
|
|
|
// items. For simplicity's sake we track just one count, which is the
|
|
|
|
// highest of the various indirected items.
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
|
2020-02-27 10:19:21 +00:00
|
|
|
capacity := indirectGCBloomCapacity
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
if db.gcKeyCount > capacity {
|
|
|
|
capacity = db.gcKeyCount
|
|
|
|
}
|
2020-05-13 12:28:42 +00:00
|
|
|
blockFilter := newBloomFilter(capacity)
|
|
|
|
versionFilter := newBloomFilter(capacity)
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
|
2020-02-27 10:19:21 +00:00
|
|
|
// Iterate the FileInfos, unmarshal the block and version hashes and
|
|
|
|
// add them to the filter.
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
|
2020-02-29 18:51:32 +00:00
|
|
|
it, err := t.NewPrefixIterator([]byte{KeyTypeDevice})
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-29 18:51:32 +00:00
|
|
|
defer it.Release()
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
for it.Next() {
|
2020-04-12 08:26:57 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2020-05-13 12:28:42 +00:00
|
|
|
var hashes IndirectionHashesOnly
|
|
|
|
if err := hashes.Unmarshal(it.Value()); err != nil {
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
return err
|
|
|
|
}
|
2020-05-13 12:28:42 +00:00
|
|
|
if len(hashes.BlocksHash) > 0 {
|
2020-07-11 07:36:09 +00:00
|
|
|
blockFilter.add(hashes.BlocksHash)
|
2020-05-13 12:28:42 +00:00
|
|
|
}
|
|
|
|
if len(hashes.VersionHash) > 0 {
|
2020-07-11 07:36:09 +00:00
|
|
|
versionFilter.add(hashes.VersionHash)
|
2020-02-11 14:37:22 +00:00
|
|
|
}
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
}
|
|
|
|
it.Release()
|
|
|
|
if err := it.Error(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Iterate over block lists, removing keys with hashes that don't match
|
|
|
|
// the filter.
|
|
|
|
|
2020-02-29 18:51:32 +00:00
|
|
|
it, err = t.NewPrefixIterator([]byte{KeyTypeBlockList})
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-02-29 18:51:32 +00:00
|
|
|
defer it.Release()
|
2020-02-27 10:19:21 +00:00
|
|
|
matchedBlocks := 0
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
for it.Next() {
|
2020-04-12 08:26:57 +00:00
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
key := blockListKey(it.Key())
|
2020-07-11 07:36:09 +00:00
|
|
|
if blockFilter.has(key.Hash()) {
|
2020-02-27 10:19:21 +00:00
|
|
|
matchedBlocks++
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := t.Delete(key); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
it.Release()
|
|
|
|
if err := it.Error(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-05-13 12:28:42 +00:00
|
|
|
// Iterate over version lists, removing keys with hashes that don't match
|
|
|
|
// the filter.
|
|
|
|
|
|
|
|
it, err = db.NewPrefixIterator([]byte{KeyTypeVersion})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
matchedVersions := 0
|
|
|
|
for it.Next() {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return ctx.Err()
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
|
|
|
key := versionKey(it.Key())
|
2020-07-11 07:36:09 +00:00
|
|
|
if versionFilter.has(key.Hash()) {
|
2020-05-13 12:28:42 +00:00
|
|
|
matchedVersions++
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
if err := t.Delete(key); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
it.Release()
|
|
|
|
if err := it.Error(); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
// Remember the number of unique keys we kept until the next pass.
|
2020-02-27 10:19:21 +00:00
|
|
|
db.gcKeyCount = matchedBlocks
|
2020-05-13 12:28:42 +00:00
|
|
|
if matchedVersions > matchedBlocks {
|
|
|
|
db.gcKeyCount = matchedVersions
|
|
|
|
}
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
|
|
|
|
if err := t.Commit(); err != nil {
|
|
|
|
return err
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
lib/db: Deduplicate block lists in database (fixes #5898) (#6283)
* lib/db: Deduplicate block lists in database (fixes #5898)
This moves the block list in the database out from being just a field on
the FileInfo to being an object of its own. When putting a FileInfo we
marshal the block list separately and store it keyed by the sha256 of
the marshalled block list. When getting, if we are not doing a
"truncated" get, we do an extra read and unmarshal for the block list.
Old block lists are cleared out by a periodic GC sweep. The alternative
would be to use refcounting, but:
- There is a larger risk of getting that wrong and either dropping a
block list in error or keeping them around forever.
- It's tricky with our current database, as we don't have dirty reads.
This means that if we update two FileInfos with identical block lists in
the same transaction we can't just do read/modify/write for the ref
counters as we wouldn't see our own first update. See above about
tracking this and risks about getting it wrong.
GC uses a bloom filter for keys to avoid heavy RAM usage. GC can't run
concurrently with FileInfo updates so there is a new lock around those
operation at the lowlevel.
The end result is a much more compact database, especially for setups
with many peers where files get duplicated many times.
This is per-key-class stats for a large database I'm currently working
with, under the current schema:
```
0x00: 9138161 items, 870876 KB keys + 7397482 KB data, 95 B + 809 B avg, 1637651 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x08: 1349 items, 12 KB keys + 10 KB data, 9 B + 8 B avg, 17 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 7 B avg, 30 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
Total 10426475 items, 968490 KB keys + 9202925 KB data.
```
Note 7.4 GB of data in class 00, total size 9.2 GB. After running the
migration we get this instead:
```
0x00: 9138161 items, 870876 KB keys + 2611392 KB data, 95 B + 285 B avg, 4788 B max
0x01: 185656 items, 10388 KB keys + 1790909 KB data, 55 B + 9646 B avg, 924525 B max
0x02: 916890 items, 84795 KB keys + 3667 KB data, 92 B + 4 B avg, 192 B max
0x03: 384 items, 27 KB keys + 5 KB data, 72 B + 15 B avg, 87 B max
0x04: 1109 items, 17 KB keys + 17 KB data, 15 B + 15 B avg, 69 B max
0x06: 383 items, 3 KB keys + 0 KB data, 9 B + 2 B avg, 18 B max
0x07: 510 items, 4 KB keys + 12 KB data, 9 B + 24 B avg, 41 B max
0x09: 194 items, 0 KB keys + 123 KB data, 5 B + 634 B avg, 11484 B max
0x0a: 3 items, 0 KB keys + 0 KB data, 14 B + 17 B avg, 51 B max
0x0b: 181836 items, 2363 KB keys + 10694 KB data, 13 B + 58 B avg, 173 B max
0x0d: 44282 items, 1461 KB keys + 61081 KB data, 33 B + 1379 B avg, 1637399 B max
Total 10469408 items, 969939 KB keys + 4477905 KB data.
```
Class 00 is now down to 2.6 GB, with just 61 MB added in class 0d.
There will be some additional reads in some cases which theoretically
hurts performance, but this will be more than compensated for by smaller
writes and better compaction.
On my own home setup which just has three devices and a handful of
folders the difference is smaller in absolute numbers of course, but
still less than half the old size:
```
0x00: 297122 items, 20894 KB keys + 306860 KB data, 70 B + 1032 B avg, 103237 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
Total 1947412 items, 151268 KB keys + 337485 KB data.
```
to:
```
0x00: 297122 items, 20894 KB keys + 37038 KB data, 70 B + 124 B avg, 520 B max
0x01: 115299 items, 7738 KB keys + 17542 KB data, 67 B + 152 B avg, 419 B max
0x02: 1430537 items, 121223 KB keys + 5722 KB data, 84 B + 4 B avg, 253 B max
...
0x0d: 18041 items, 595 KB keys + 71964 KB data, 33 B + 3988 B avg, 101109 B max
Total 1965447 items, 151863 KB keys + 139628 KB data.
```
* wip
* wip
* wip
* wip
2020-01-24 07:35:44 +00:00
|
|
|
return db.Compact()
|
2019-12-02 07:18:04 +00:00
|
|
|
}
|
|
|
|
|
2020-07-11 07:36:09 +00:00
|
|
|
func newBloomFilter(capacity int) bloomFilter {
|
|
|
|
var buf [16]byte
|
|
|
|
io.ReadFull(rand.Reader, buf[:])
|
|
|
|
|
|
|
|
return bloomFilter{
|
|
|
|
f: blobloom.NewOptimized(blobloom.Config{
|
|
|
|
Capacity: uint64(capacity),
|
|
|
|
FPRate: indirectGCBloomFalsePositiveRate,
|
|
|
|
MaxBits: 8 * indirectGCBloomMaxBytes,
|
|
|
|
}),
|
|
|
|
|
|
|
|
k0: binary.LittleEndian.Uint64(buf[:8]),
|
|
|
|
k1: binary.LittleEndian.Uint64(buf[8:]),
|
|
|
|
}
|
2020-05-13 12:28:42 +00:00
|
|
|
}
|
|
|
|
|
2020-07-11 07:36:09 +00:00
|
|
|
type bloomFilter struct {
|
|
|
|
f *blobloom.Filter
|
|
|
|
k0, k1 uint64 // Random key for SipHash.
|
|
|
|
}
|
|
|
|
|
|
|
|
func (b *bloomFilter) add(id []byte) { b.f.Add(b.hash(id)) }
|
|
|
|
func (b *bloomFilter) has(id []byte) bool { return b.f.Has(b.hash(id)) }
|
|
|
|
|
|
|
|
// Hash function for the bloomfilter: SipHash of the SHA-256.
|
|
|
|
//
|
|
|
|
// The randomization in SipHash means we get different collisions across
|
|
|
|
// runs and colliding keys are not kept indefinitely.
|
|
|
|
func (b *bloomFilter) hash(id []byte) uint64 {
|
|
|
|
if len(id) != sha256.Size {
|
|
|
|
panic("bug: bloomFilter.hash passed something not a SHA256 hash")
|
2020-05-13 12:28:42 +00:00
|
|
|
}
|
2020-07-11 07:36:09 +00:00
|
|
|
return siphash.Hash(b.k0, b.k1, id)
|
2020-04-20 07:02:33 +00:00
|
|
|
}
|
|
|
|
|
2020-09-10 08:54:41 +00:00
|
|
|
// checkRepair checks folder metadata and sequences for miscellaneous errors.
|
2020-12-21 11:59:22 +00:00
|
|
|
func (db *Lowlevel) checkRepair() error {
|
2020-03-19 14:58:32 +00:00
|
|
|
for _, folder := range db.ListFolders() {
|
2020-12-21 11:59:22 +00:00
|
|
|
if _, err := db.getMetaAndCheck(folder); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
2020-12-21 11:59:22 +00:00
|
|
|
return nil
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
func (db *Lowlevel) getMetaAndCheck(folder string) (*metadataTracker, error) {
|
2020-03-19 14:58:32 +00:00
|
|
|
db.gcMut.RLock()
|
|
|
|
defer db.gcMut.RUnlock()
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
fixed, err := db.checkLocalNeed([]byte(folder))
|
2020-09-04 12:01:46 +00:00
|
|
|
if err != nil {
|
2020-12-21 11:59:22 +00:00
|
|
|
return nil, fmt.Errorf("checking local need: %w", err)
|
2020-09-04 12:01:46 +00:00
|
|
|
}
|
|
|
|
if fixed != 0 {
|
|
|
|
l.Infof("Repaired %d local need entries for folder %v in database", fixed, folder)
|
|
|
|
}
|
|
|
|
|
|
|
|
meta, err := db.recalcMeta(folder)
|
|
|
|
if err != nil {
|
2020-12-21 11:59:22 +00:00
|
|
|
return nil, fmt.Errorf("recalculating metadata: %w", err)
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-09-04 12:01:46 +00:00
|
|
|
fixed, err = db.repairSequenceGCLocked(folder, meta)
|
|
|
|
if err != nil {
|
2020-12-21 11:59:22 +00:00
|
|
|
return nil, fmt.Errorf("repairing sequences: %w", err)
|
2020-09-04 12:01:46 +00:00
|
|
|
}
|
|
|
|
if fixed != 0 {
|
|
|
|
l.Infof("Repaired %d sequence entries for folder %v in database", fixed, folder)
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
return meta, nil
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
func (db *Lowlevel) loadMetadataTracker(folder string) (*metadataTracker, error) {
|
|
|
|
meta := newMetadataTracker(db.keyer, db.evLogger)
|
2020-03-19 14:58:32 +00:00
|
|
|
if err := meta.fromDB(db, []byte(folder)); err != nil {
|
2020-06-17 08:03:39 +00:00
|
|
|
if err == errMetaInconsistent {
|
|
|
|
l.Infof("Stored folder metadata for %q is inconsistent; recalculating", folder)
|
|
|
|
} else {
|
|
|
|
l.Infof("No stored folder metadata for %q; recalculating", folder)
|
|
|
|
|
|
|
|
}
|
2020-03-19 14:58:32 +00:00
|
|
|
return db.getMetaAndCheck(folder)
|
|
|
|
}
|
|
|
|
|
|
|
|
curSeq := meta.Sequence(protocol.LocalDeviceID)
|
2020-12-21 11:59:22 +00:00
|
|
|
if metaOK, err := db.verifyLocalSequence(curSeq, folder); err != nil {
|
|
|
|
return nil, fmt.Errorf("verifying sequences: %w", err)
|
|
|
|
} else if !metaOK {
|
2020-03-19 14:58:32 +00:00
|
|
|
l.Infof("Stored folder metadata for %q is out of date after crash; recalculating", folder)
|
|
|
|
return db.getMetaAndCheck(folder)
|
|
|
|
}
|
|
|
|
|
2020-03-24 12:53:20 +00:00
|
|
|
if age := time.Since(meta.Created()); age > db.recheckInterval {
|
2020-06-18 08:55:41 +00:00
|
|
|
l.Infof("Stored folder metadata for %q is %v old; recalculating", folder, util.NiceDurationString(age))
|
2020-03-19 14:58:32 +00:00
|
|
|
return db.getMetaAndCheck(folder)
|
|
|
|
}
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
return meta, nil
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-07-28 14:46:42 +00:00
|
|
|
func (db *Lowlevel) recalcMeta(folderStr string) (*metadataTracker, error) {
|
|
|
|
folder := []byte(folderStr)
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
meta := newMetadataTracker(db.keyer, db.evLogger)
|
2020-07-28 14:46:42 +00:00
|
|
|
if err := db.checkGlobals(folder); err != nil {
|
2020-10-19 06:28:53 +00:00
|
|
|
return nil, fmt.Errorf("checking globals: %w", err)
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-07-28 14:46:42 +00:00
|
|
|
t, err := db.newReadWriteTransaction(meta.CommitHook(folder))
|
2020-03-19 14:58:32 +00:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
var deviceID protocol.DeviceID
|
2020-07-28 14:46:42 +00:00
|
|
|
err = t.withAllFolderTruncated(folder, func(device []byte, f FileInfoTruncated) bool {
|
2020-03-19 14:58:32 +00:00
|
|
|
copy(deviceID[:], device)
|
|
|
|
meta.addFile(deviceID, f)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2020-07-28 14:46:42 +00:00
|
|
|
err = t.withGlobal(folder, nil, true, func(f protocol.FileIntf) bool {
|
2020-05-30 07:50:23 +00:00
|
|
|
meta.addFile(protocol.GlobalDeviceID, f)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
|
2020-05-11 13:07:06 +00:00
|
|
|
meta.emptyNeeded(protocol.LocalDeviceID)
|
2020-07-28 14:46:42 +00:00
|
|
|
err = t.withNeed(folder, protocol.LocalDeviceID[:], true, func(f protocol.FileIntf) bool {
|
2020-05-11 13:07:06 +00:00
|
|
|
meta.addNeeded(protocol.LocalDeviceID, f)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
for _, device := range meta.devices() {
|
|
|
|
meta.emptyNeeded(device)
|
2020-07-28 14:46:42 +00:00
|
|
|
err = t.withNeed(folder, device[:], true, func(f protocol.FileIntf) bool {
|
2020-05-11 13:07:06 +00:00
|
|
|
meta.addNeeded(device, f)
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-19 14:58:32 +00:00
|
|
|
meta.SetCreated()
|
|
|
|
if err := t.Commit(); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return meta, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Verify the local sequence number from actual sequence entries. Returns
|
|
|
|
// true if it was all good, or false if a fixup was necessary.
|
2020-12-21 11:59:22 +00:00
|
|
|
func (db *Lowlevel) verifyLocalSequence(curSeq int64, folder string) (bool, error) {
|
2020-03-19 14:58:32 +00:00
|
|
|
// Walk the sequence index from the current (supposedly) highest
|
|
|
|
// sequence number and raise the alarm if we get anything. This recovers
|
|
|
|
// from the occasion where we have written sequence entries to disk but
|
|
|
|
// not yet written new metadata to disk.
|
|
|
|
//
|
|
|
|
// Note that we can have the same thing happen for remote devices but
|
|
|
|
// there it's not a problem -- we'll simply advertise a lower sequence
|
|
|
|
// number than we've actually seen and receive some duplicate updates
|
|
|
|
// and then be in sync again.
|
|
|
|
|
|
|
|
t, err := db.newReadOnlyTransaction()
|
|
|
|
if err != nil {
|
2020-12-21 11:59:22 +00:00
|
|
|
return false, err
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
ok := true
|
2020-05-30 07:50:23 +00:00
|
|
|
if err := t.withHaveSequence([]byte(folder), curSeq+1, func(fi protocol.FileIntf) bool {
|
2020-03-19 14:58:32 +00:00
|
|
|
ok = false // we got something, which we should not have
|
|
|
|
return false
|
2020-12-21 11:59:22 +00:00
|
|
|
}); err != nil {
|
|
|
|
return false, err
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
t.close()
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
return ok, nil
|
2020-03-19 14:58:32 +00:00
|
|
|
}
|
|
|
|
|
2020-03-18 16:34:46 +00:00
|
|
|
// repairSequenceGCLocked makes sure the sequence numbers in the sequence keys
|
|
|
|
// match those in the corresponding file entries. It returns the amount of fixed
|
|
|
|
// entries.
|
|
|
|
func (db *Lowlevel) repairSequenceGCLocked(folderStr string, meta *metadataTracker) (int, error) {
|
2020-07-19 06:55:27 +00:00
|
|
|
t, err := db.newReadWriteTransaction(meta.CommitHook([]byte(folderStr)))
|
2020-03-18 16:34:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
fixed := 0
|
|
|
|
|
|
|
|
folder := []byte(folderStr)
|
|
|
|
|
|
|
|
// First check that every file entry has a matching sequence entry
|
|
|
|
// (this was previously db schema upgrade to 9).
|
|
|
|
|
|
|
|
dk, err := t.keyer.GenerateDeviceFileKey(nil, folder, protocol.LocalDeviceID[:], nil)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
it, err := t.NewPrefixIterator(dk.WithoutName())
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer it.Release()
|
|
|
|
|
|
|
|
var sk sequenceKey
|
|
|
|
for it.Next() {
|
2020-08-18 07:20:12 +00:00
|
|
|
intf, err := t.unmarshalTrunc(it.Value(), false)
|
2020-03-18 16:34:46 +00:00
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2020-08-18 07:20:12 +00:00
|
|
|
fi := intf.(protocol.FileInfo)
|
2020-03-18 16:34:46 +00:00
|
|
|
if sk, err = t.keyer.GenerateSequenceKey(sk, folder, fi.Sequence); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
switch dk, err = t.Get(sk); {
|
|
|
|
case err != nil:
|
|
|
|
if !backend.IsNotFound(err) {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
fallthrough
|
|
|
|
case !bytes.Equal(it.Key(), dk):
|
|
|
|
fixed++
|
|
|
|
fi.Sequence = meta.nextLocalSeq()
|
|
|
|
if sk, err = t.keyer.GenerateSequenceKey(sk, folder, fi.Sequence); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if err := t.Put(sk, it.Key()); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
2020-08-18 07:20:12 +00:00
|
|
|
if err := t.putFile(it.Key(), fi); err != nil {
|
2020-03-18 16:34:46 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
2020-07-19 06:55:27 +00:00
|
|
|
if err := t.Checkpoint(); err != nil {
|
2020-03-18 16:34:46 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := it.Error(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
it.Release()
|
|
|
|
|
|
|
|
// Secondly check there's no sequence entries pointing at incorrect things.
|
|
|
|
|
|
|
|
sk, err = t.keyer.GenerateSequenceKey(sk, folder, 0)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
it, err = t.NewPrefixIterator(sk.WithoutSequence())
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer it.Release()
|
|
|
|
|
|
|
|
for it.Next() {
|
|
|
|
// Check that the sequence from the key matches the
|
|
|
|
// sequence in the file.
|
|
|
|
fi, ok, err := t.getFileTrunc(it.Value(), true)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
if ok {
|
|
|
|
if seq := t.keyer.SequenceFromSequenceKey(it.Key()); seq == fi.SequenceNo() {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Either the file is missing or has a different sequence number
|
|
|
|
fixed++
|
|
|
|
if err := t.Delete(it.Key()); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if err := it.Error(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
it.Release()
|
|
|
|
|
|
|
|
return fixed, t.Commit()
|
|
|
|
}
|
|
|
|
|
2020-09-04 12:01:46 +00:00
|
|
|
// Does not take care of metadata - if anything is repaired, the need count
|
|
|
|
// needs to be recalculated.
|
|
|
|
func (db *Lowlevel) checkLocalNeed(folder []byte) (int, error) {
|
|
|
|
repaired := 0
|
|
|
|
|
|
|
|
t, err := db.newReadWriteTransaction()
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer t.close()
|
|
|
|
|
|
|
|
key, err := t.keyer.GenerateNeedFileKey(nil, folder, nil)
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
dbi, err := t.NewPrefixIterator(key.WithoutName())
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
defer dbi.Release()
|
|
|
|
|
|
|
|
var needName string
|
|
|
|
var needDone bool
|
|
|
|
next := func() {
|
|
|
|
needDone = !dbi.Next()
|
|
|
|
if !needDone {
|
|
|
|
needName = string(t.keyer.NameFromGlobalVersionKey(dbi.Key()))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
next()
|
|
|
|
t.withNeedIteratingGlobal(folder, protocol.LocalDeviceID[:], true, func(fi protocol.FileIntf) bool {
|
|
|
|
f := fi.(FileInfoTruncated)
|
|
|
|
for !needDone && needName < f.Name {
|
|
|
|
repaired++
|
2020-10-06 18:14:09 +00:00
|
|
|
if err = t.Delete(dbi.Key()); err != nil && !backend.IsNotFound(err) {
|
2020-09-04 12:01:46 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
l.Debugln("check local need: removing", needName)
|
|
|
|
next()
|
|
|
|
}
|
|
|
|
if needName == f.Name {
|
|
|
|
next()
|
|
|
|
} else {
|
|
|
|
repaired++
|
|
|
|
key, err = t.keyer.GenerateNeedFileKey(key, folder, []byte(f.Name))
|
|
|
|
if err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
if err = t.Put(key, nil); err != nil {
|
|
|
|
return false
|
|
|
|
}
|
|
|
|
l.Debugln("check local need: adding", f.Name)
|
|
|
|
}
|
|
|
|
return true
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for !needDone {
|
|
|
|
repaired++
|
2020-10-06 18:14:09 +00:00
|
|
|
if err := t.Delete(dbi.Key()); err != nil && !backend.IsNotFound(err) {
|
2020-09-04 12:01:46 +00:00
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
l.Debugln("check local need: removing", needName)
|
|
|
|
next()
|
|
|
|
}
|
|
|
|
|
|
|
|
if err := dbi.Error(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
dbi.Release()
|
|
|
|
|
|
|
|
if err = t.Commit(); err != nil {
|
|
|
|
return 0, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return repaired, nil
|
|
|
|
}
|
|
|
|
|
2020-09-10 08:54:41 +00:00
|
|
|
func (db *Lowlevel) needsRepairPath() string {
|
|
|
|
path := db.Location()
|
|
|
|
if path == "" {
|
|
|
|
return ""
|
|
|
|
}
|
|
|
|
if path[len(path)-1] == fs.PathSeparator {
|
|
|
|
path = path[:len(path)-1]
|
|
|
|
}
|
|
|
|
return path + needsRepairSuffix
|
|
|
|
}
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
func (db *Lowlevel) checkErrorForRepair(err error) {
|
|
|
|
if errors.Is(err, errEntryFromGlobalMissing) || errors.Is(err, errEmptyGlobal) {
|
|
|
|
// Inconsistency error, mark db for repair on next start.
|
|
|
|
if path := db.needsRepairPath(); path != "" {
|
|
|
|
if fd, err := os.Create(path); err == nil {
|
|
|
|
fd.Close()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-02 07:18:04 +00:00
|
|
|
// unchanged checks if two files are the same and thus don't need to be updated.
|
|
|
|
// Local flags or the invalid bit might change without the version
|
|
|
|
// being bumped.
|
2020-05-30 07:50:23 +00:00
|
|
|
func unchanged(nf, ef protocol.FileIntf) bool {
|
2019-12-02 07:18:04 +00:00
|
|
|
return ef.FileVersion().Equal(nf.FileVersion()) && ef.IsInvalid() == nf.IsInvalid() && ef.FileLocalFlags() == nf.FileLocalFlags()
|
|
|
|
}
|
2020-10-08 15:37:45 +00:00
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
func (db *Lowlevel) handleFailure(err error) {
|
|
|
|
db.checkErrorForRepair(err)
|
|
|
|
if shouldReportFailure(err) {
|
|
|
|
db.evLogger.Log(events.Failure, err)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-08 15:37:45 +00:00
|
|
|
var ldbPathRe = regexp.MustCompile(`(open|write|read) .+[\\/].+[\\/]index[^\\/]+[\\/][^\\/]+: `)
|
|
|
|
|
2020-12-21 11:59:22 +00:00
|
|
|
func shouldReportFailure(err error) bool {
|
|
|
|
return !ldbPathRe.MatchString(err.Error())
|
2020-10-08 15:37:45 +00:00
|
|
|
}
|