diff --git a/cmd/stindex/idxck.go b/cmd/stindex/idxck.go index 7defbe2ae..cf752820e 100644 --- a/cmd/stindex/idxck.go +++ b/cmd/stindex/idxck.go @@ -148,7 +148,7 @@ func idxck(ldb backend.Backend) (success bool) { } } - if fi.BlocksHash != nil { + if len(fi.Blocks) == 0 && len(fi.BlocksHash) != 0 { key := string(fi.BlocksHash) if _, ok := blocklists[key]; !ok { fmt.Printf("Missing block list for file %q, block list hash %x\n", fi.Name, fi.BlocksHash) diff --git a/cmd/syncthing/main.go b/cmd/syncthing/main.go index 2fe1ba6e5..d5bd24710 100644 --- a/cmd/syncthing/main.go +++ b/cmd/syncthing/main.go @@ -119,8 +119,8 @@ are mostly useful for developers. Use with care. "h", "m" and "s" abbreviations for hours minutes and seconds. Valid values are like "720h", "30s", etc. - STGCBLOCKSEVERY Set to a time interval to override the default database - block GC interval of 13 hours. Same format as the + STGCINDIRECTEVERY Set to a time interval to override the default database + indirection GC interval of 13 hours. Same format as the STRECHECKDBEVERY variable. GOMAXPROCS Set the maximum number of CPU cores to use. Defaults to all diff --git a/lib/db/lowlevel.go b/lib/db/lowlevel.go index 17f4d0413..fe30c56ab 100644 --- a/lib/db/lowlevel.go +++ b/lib/db/lowlevel.go @@ -19,22 +19,30 @@ import ( ) const ( - // We set the bloom filter capacity to handle 100k individual block lists - // with a false positive probability of 1% for the first pass. Once we know - // how many block lists we have we will use that number instead, if it's - // more than 100k. For fewer than 100k block lists we will just get better - // false positive rate instead. - blockGCBloomCapacity = 100000 - blockGCBloomFalsePositiveRate = 0.01 // 1% - blockGCDefaultInterval = 13 * time.Hour - blockGCTimeKey = "lastBlockGCTime" + // We set the bloom filter capacity to handle 100k individual items with + // a false positive probability of 1% for the first pass. Once we know + // how many items we have we will use that number instead, if it's more + // than 100k. For fewer than 100k items we will just get better false + // positive rate instead. + indirectGCBloomCapacity = 100000 + indirectGCBloomFalsePositiveRate = 0.01 // 1% + indirectGCDefaultInterval = 13 * time.Hour + indirectGCTimeKey = "lastIndirectGCTime" + + // Use indirection for the block list when it exceeds this many entries + blocksIndirectionCutoff = 3 ) -var blockGCInterval = blockGCDefaultInterval +var indirectGCInterval = indirectGCDefaultInterval func init() { + // deprecated if dur, err := time.ParseDuration(os.Getenv("STGCBLOCKSEVERY")); err == nil { - blockGCInterval = dur + indirectGCInterval = dur + } + // current + if dur, err := time.ParseDuration(os.Getenv("STGCINDIRECTEVERY")); err == nil { + indirectGCInterval = dur } } @@ -485,18 +493,18 @@ func (db *Lowlevel) dropPrefix(prefix []byte) error { } func (db *Lowlevel) gcRunner() { - t := time.NewTimer(db.timeUntil(blockGCTimeKey, blockGCInterval)) + t := time.NewTimer(db.timeUntil(indirectGCTimeKey, indirectGCInterval)) defer t.Stop() for { select { case <-db.gcStop: return case <-t.C: - if err := db.gcBlocks(); err != nil { - l.Warnln("Database block GC failed:", err) + if err := db.gcIndirect(); err != nil { + l.Warnln("Database indirection GC failed:", err) } - db.recordTime(blockGCTimeKey) - t.Reset(db.timeUntil(blockGCTimeKey, blockGCInterval)) + db.recordTime(indirectGCTimeKey) + t.Reset(db.timeUntil(indirectGCTimeKey, indirectGCInterval)) } } } @@ -521,15 +529,16 @@ func (db *Lowlevel) timeUntil(key string, every time.Duration) time.Duration { return sleepTime } -func (db *Lowlevel) gcBlocks() error { - // The block GC uses a bloom filter to track used block lists. This means - // iterating over all items, adding their block lists to the filter, then - // iterating over the block lists and removing those that don't match the - // filter. The filter will give false positives so we will keep around one - // percent of block lists that we don't really need (at most). +func (db *Lowlevel) gcIndirect() error { + // The indirection GC uses bloom filters to track used block lists and + // versions. This means iterating over all items, adding their hashes to + // the filter, then iterating over the indirected items and removing + // those that don't match the filter. The filter will give false + // positives so we will keep around one percent of things that we don't + // really need (at most). // - // Block GC needs to run when there are no modifications to the FileInfos or - // block lists. + // Indirection GC needs to run when there are no modifications to the + // FileInfos or indirected items. db.gcMut.Lock() defer db.gcMut.Unlock() @@ -540,18 +549,19 @@ func (db *Lowlevel) gcBlocks() error { } defer t.Release() - // Set up the bloom filter with the initial capacity and false positive - // rate, or higher capacity if we've done this before and seen lots of block - // lists. + // Set up the bloom filters with the initial capacity and false positive + // rate, or higher capacity if we've done this before and seen lots of + // items. For simplicity's sake we track just one count, which is the + // highest of the various indirected items. - capacity := blockGCBloomCapacity + capacity := indirectGCBloomCapacity if db.gcKeyCount > capacity { capacity = db.gcKeyCount } - filter := bloom.NewWithEstimates(uint(capacity), blockGCBloomFalsePositiveRate) + blockFilter := bloom.NewWithEstimates(uint(capacity), indirectGCBloomFalsePositiveRate) - // Iterate the FileInfos, unmarshal the blocks hashes and add them to - // the filter. + // Iterate the FileInfos, unmarshal the block and version hashes and + // add them to the filter. it, err := db.NewPrefixIterator([]byte{KeyTypeDevice}) if err != nil { @@ -563,7 +573,7 @@ func (db *Lowlevel) gcBlocks() error { return err } if len(bl.BlocksHash) > 0 { - filter.Add(bl.BlocksHash) + blockFilter.Add(bl.BlocksHash) } } it.Release() @@ -578,11 +588,11 @@ func (db *Lowlevel) gcBlocks() error { if err != nil { return err } - matched := 0 + matchedBlocks := 0 for it.Next() { key := blockListKey(it.Key()) - if filter.Test(key.BlocksHash()) { - matched++ + if blockFilter.Test(key.BlocksHash()) { + matchedBlocks++ continue } if err := t.Delete(key); err != nil { @@ -595,7 +605,7 @@ func (db *Lowlevel) gcBlocks() error { } // Remember the number of unique keys we kept until the next pass. - db.gcKeyCount = matched + db.gcKeyCount = matchedBlocks if err := t.Commit(); err != nil { return err diff --git a/lib/db/schemaupdater.go b/lib/db/schemaupdater.go index 29349fe9a..0486adfdc 100644 --- a/lib/db/schemaupdater.go +++ b/lib/db/schemaupdater.go @@ -446,10 +446,11 @@ func (db *schemaUpdater) updateSchemato9(prev int) error { } metas := make(map[string]*metadataTracker) for it.Next() { - var fi protocol.FileInfo - if err := fi.Unmarshal(it.Value()); err != nil { + intf, err := t.unmarshalTrunc(it.Value(), false) + if err != nil { return err } + fi := intf.(protocol.FileInfo) device, ok := t.keyer.DeviceFromDeviceFileKey(it.Key()) if !ok { return errDeviceIdxMissing @@ -510,7 +511,7 @@ func (db *schemaUpdater) updateSchemato9(prev int) error { } } - db.recordTime(blockGCTimeKey) + db.recordTime(indirectGCTimeKey) return t.Commit() } diff --git a/lib/db/transactions.go b/lib/db/transactions.go index e18f872a7..bfacc3495 100644 --- a/lib/db/transactions.go +++ b/lib/db/transactions.go @@ -78,30 +78,34 @@ func (t readOnlyTransaction) unmarshalTrunc(bs []byte, trunc bool) (FileIntf, er return tf, nil } - var tf protocol.FileInfo - if err := tf.Unmarshal(bs); err != nil { + var fi protocol.FileInfo + if err := fi.Unmarshal(bs); err != nil { return nil, err } - if err := t.fillBlockList(&tf); err != nil { + if err := t.fillFileInfo(&fi); err != nil { return nil, err } - return tf, nil + return fi, nil } -func (t readOnlyTransaction) fillBlockList(fi *protocol.FileInfo) error { - if len(fi.BlocksHash) == 0 { - return nil +// fillFileInfo follows the (possible) indirection of blocks and fills it out. +func (t readOnlyTransaction) fillFileInfo(fi *protocol.FileInfo) error { + var key []byte + + if len(fi.Blocks) == 0 && len(fi.BlocksHash) != 0 { + // The blocks list is indirected and we need to load it. + key = t.keyer.GenerateBlockListKey(key, fi.BlocksHash) + bs, err := t.Get(key) + if err != nil { + return err + } + var bl BlockList + if err := bl.Unmarshal(bs); err != nil { + return err + } + fi.Blocks = bl.Blocks } - blocksKey := t.keyer.GenerateBlockListKey(nil, fi.BlocksHash) - bs, err := t.Get(blocksKey) - if err != nil { - return err - } - var bl BlockList - if err := bl.Unmarshal(bs); err != nil { - return err - } - fi.Blocks = bl.Blocks + return nil } @@ -453,26 +457,33 @@ func (t readWriteTransaction) close() { t.WriteTransaction.Release() } -func (t readWriteTransaction) putFile(key []byte, fi protocol.FileInfo) error { - if fi.Blocks != nil { - if fi.BlocksHash == nil { - fi.BlocksHash = protocol.BlocksHash(fi.Blocks) - } - blocksKey := t.keyer.GenerateBlockListKey(nil, fi.BlocksHash) - if _, err := t.Get(blocksKey); backend.IsNotFound(err) { +func (t readWriteTransaction) putFile(fkey []byte, fi protocol.FileInfo) error { + var bkey []byte + + // Always set the blocks hash when there are blocks. + if len(fi.Blocks) > 0 { + fi.BlocksHash = protocol.BlocksHash(fi.Blocks) + } else { + fi.BlocksHash = nil + } + + // Indirect the blocks if the block list is large enough. + if len(fi.Blocks) > blocksIndirectionCutoff { + bkey = t.keyer.GenerateBlockListKey(bkey, fi.BlocksHash) + if _, err := t.Get(bkey); backend.IsNotFound(err) { // Marshal the block list and save it blocksBs := mustMarshal(&BlockList{Blocks: fi.Blocks}) - if err := t.Put(blocksKey, blocksBs); err != nil { + if err := t.Put(bkey, blocksBs); err != nil { return err } } else if err != nil { return err } + fi.Blocks = nil } - fi.Blocks = nil fiBs := mustMarshal(&fi) - return t.Put(key, fiBs) + return t.Put(fkey, fiBs) } // updateGlobal adds this device+version to the version list for the given @@ -723,15 +734,12 @@ func (t *readWriteTransaction) withAllFolderTruncated(folder []byte, fn func(dev } continue } - var f FileInfoTruncated - // The iterator function may keep a reference to the unmarshalled - // struct, which in turn references the buffer it was unmarshalled - // from. dbi.Value() just returns an internal slice that it reuses, so - // we need to copy it. - err := f.Unmarshal(append([]byte{}, dbi.Value()...)) + + intf, err := t.unmarshalTrunc(dbi.Value(), true) if err != nil { return err } + f := intf.(FileInfoTruncated) switch f.Name { case "", ".", "..", "/": // A few obviously invalid filenames