mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-10 18:24:44 +00:00
2345 lines
51 KiB
Go
2345 lines
51 KiB
Go
|
// Copyright 2014 The lldb Authors. All rights reserved.
|
||
|
// Use of this source code is governed by a BSD-style
|
||
|
// license that can be found in the LICENSE file.
|
||
|
|
||
|
package lldb
|
||
|
|
||
|
import (
|
||
|
"bytes"
|
||
|
"errors"
|
||
|
"fmt"
|
||
|
"io"
|
||
|
"sort"
|
||
|
"strings"
|
||
|
|
||
|
"github.com/cznic/fileutil"
|
||
|
"github.com/cznic/internal/buffer"
|
||
|
"github.com/cznic/sortutil"
|
||
|
)
|
||
|
|
||
|
const (
|
||
|
kData = 256 // [1, 512]
|
||
|
kIndex = 256 // [2, 2048]
|
||
|
kKV = 19 // Size of the key/value field in btreeDataPage
|
||
|
kSz = kKV - 1 - 7 // Content prefix size
|
||
|
kH = kKV - 7 // Content field offset for handle
|
||
|
tagBTreeDataPage = 1
|
||
|
tagBTreeIndexPage = 0
|
||
|
)
|
||
|
|
||
|
// BTree is a B+tree[1][2], i.e. a variant which speeds up
|
||
|
// enumeration/iteration of the BTree. According to its origin it can be
|
||
|
// volatile (backed only by memory) or non-volatile (backed by a non-volatile
|
||
|
// Allocator).
|
||
|
//
|
||
|
// The specific implementation of BTrees in this package are B+trees with
|
||
|
// delayed split/concatenation (discussed in e.g. [3]).
|
||
|
//
|
||
|
// Note: No BTree methods returns io.EOF for physical Filer reads/writes. The
|
||
|
// io.EOF is returned only by bTreeEnumerator methods to indicate "no more K-V
|
||
|
// pair".
|
||
|
//
|
||
|
// [1]: http://en.wikipedia.org/wiki/B+tree
|
||
|
// [2]: http://zgking.com:8080/home/donghui/publications/books/dshandbook_BTree.pdf
|
||
|
// [3]: http://people.cs.aau.dk/~simas/aalg06/UbiquitBtree.pdf
|
||
|
type BTree struct {
|
||
|
store btreeStore
|
||
|
root btree
|
||
|
collate func(a, b []byte) int
|
||
|
serial uint64
|
||
|
}
|
||
|
|
||
|
// NewBTree returns a new, memory-only BTree.
|
||
|
func NewBTree(collate func(a, b []byte) int) *BTree {
|
||
|
store := newMemBTreeStore()
|
||
|
root, err := newBTree(store)
|
||
|
if err != nil { // should not happen
|
||
|
panic(err.Error())
|
||
|
}
|
||
|
|
||
|
return &BTree{store, root, collate, 0}
|
||
|
}
|
||
|
|
||
|
// IsMem reports if t is a memory only BTree.
|
||
|
func (t *BTree) IsMem() (r bool) {
|
||
|
_, r = t.store.(*memBTreeStore)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Clear empties the tree.
|
||
|
func (t *BTree) Clear() (err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t.serial++
|
||
|
return t.root.clear(t.store)
|
||
|
}
|
||
|
|
||
|
// Delete deletes key and its associated value from the tree.
|
||
|
func (t *BTree) Delete(key []byte) (err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t.serial++
|
||
|
_, err = t.root.extract(t.store, nil, t.collate, key)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// DeleteAny deletes one key and its associated value from the tree. If the
|
||
|
// tree is empty on return then empty is true.
|
||
|
func (t *BTree) DeleteAny() (empty bool, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t.serial++
|
||
|
return t.root.deleteAny(t.store)
|
||
|
}
|
||
|
|
||
|
func elem(v interface{}) string {
|
||
|
switch x := v.(type) {
|
||
|
default:
|
||
|
panic("internal error")
|
||
|
case nil:
|
||
|
return "nil"
|
||
|
case bool:
|
||
|
if x {
|
||
|
return "true"
|
||
|
}
|
||
|
|
||
|
return "false"
|
||
|
case int64:
|
||
|
return fmt.Sprint(x)
|
||
|
case uint64:
|
||
|
return fmt.Sprint(x)
|
||
|
case float64:
|
||
|
s := fmt.Sprintf("%g", x)
|
||
|
if !strings.Contains(s, ".") {
|
||
|
s += "."
|
||
|
}
|
||
|
return s
|
||
|
case complex128:
|
||
|
s := fmt.Sprint(x)
|
||
|
return s[1 : len(s)-1]
|
||
|
case []byte:
|
||
|
return fmt.Sprintf("[]byte{% 02x}", x)
|
||
|
case string:
|
||
|
return fmt.Sprintf("%q", x)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Dump outputs a human readable dump of t to w. It is usable iff t keys and
|
||
|
// values are encoded scalars (see EncodeScalars). Intended use is only for
|
||
|
// examples or debugging. Some type information is lost in the rendering, for
|
||
|
// example a float value '17.' and an integer value '17' may both output as
|
||
|
// '17'.
|
||
|
func (t *BTree) Dump(w io.Writer) (err error) {
|
||
|
enum, err := t.seekFirst()
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
for {
|
||
|
bkey, bval, err := enum.current()
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
key, err := DecodeScalars(bkey)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
val, err := DecodeScalars(bval)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
kk := []string{}
|
||
|
if key == nil {
|
||
|
kk = []string{"null"}
|
||
|
}
|
||
|
for _, v := range key {
|
||
|
kk = append(kk, elem(v))
|
||
|
}
|
||
|
vv := []string{}
|
||
|
if val == nil {
|
||
|
vv = []string{"null"}
|
||
|
}
|
||
|
for _, v := range val {
|
||
|
vv = append(vv, elem(v))
|
||
|
}
|
||
|
skey := strings.Join(kk, ", ")
|
||
|
sval := strings.Join(vv, ", ")
|
||
|
if len(vv) > 1 {
|
||
|
sval = fmt.Sprintf("[]interface{%s}", sval)
|
||
|
}
|
||
|
if _, err = fmt.Fprintf(w, "%s → %s\n", skey, sval); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
err = enum.next()
|
||
|
if err != nil {
|
||
|
if fileutil.IsEOF(err) {
|
||
|
err = nil
|
||
|
break
|
||
|
}
|
||
|
|
||
|
return err
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Extract is a combination of Get and Delete. If the key exists in the tree,
|
||
|
// it is returned (like Get) and also deleted from a tree in a more efficient
|
||
|
// way which doesn't walk it twice. The returned slice may be a sub-slice of
|
||
|
// buf if buf was large enough to hold the entire content. Otherwise, a newly
|
||
|
// allocated slice will be returned. It is valid to pass a nil buf.
|
||
|
func (t *BTree) Extract(buf, key []byte) (value []byte, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t.serial++
|
||
|
return t.root.extract(t.store, buf, t.collate, key)
|
||
|
}
|
||
|
|
||
|
// First returns the first KV pair of the tree, if it exists. Otherwise key == nil
|
||
|
// and value == nil.
|
||
|
func (t *BTree) First() (key, value []byte, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var p btreeDataPage
|
||
|
if _, p, err = t.root.first(t.store); err != nil || p == nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if key, err = p.key(t.store, 0); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
value, err = p.value(t.store, 0)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Get returns the value associated with key, or nil if no such value exists.
|
||
|
// The returned slice may be a sub-slice of buf if buf was large enough to hold
|
||
|
// the entire content. Otherwise, a newly allocated slice will be returned.
|
||
|
// It is valid to pass a nil buf.
|
||
|
//
|
||
|
// Get is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (t *BTree) Get(buf, key []byte) (value []byte, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
pbuffer := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pbuffer)
|
||
|
buffer := *pbuffer
|
||
|
if buffer, err = t.root.get(t.store, buffer, t.collate, key); buffer == nil || err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if len(buffer) != 0 {
|
||
|
// The buffer cache returns nil for empty buffers, bypass it
|
||
|
value = need(len(buffer), buf)
|
||
|
} else {
|
||
|
value = []byte{}
|
||
|
}
|
||
|
copy(value, buffer)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Handle reports t's handle.
|
||
|
func (t *BTree) Handle() int64 {
|
||
|
return int64(t.root)
|
||
|
}
|
||
|
|
||
|
// Last returns the last KV pair of the tree, if it exists. Otherwise key == nil
|
||
|
// and value == nil.
|
||
|
func (t *BTree) Last() (key, value []byte, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var p btreeDataPage
|
||
|
if _, p, err = t.root.last(t.store); err != nil || p == nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
index := p.len() - 1
|
||
|
if key, err = p.key(t.store, index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
value, err = p.value(t.store, index)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Put combines Get and Set in a more efficient way where the tree is walked
|
||
|
// only once. The upd(ater) receives the current (key, old-value), if that
|
||
|
// exists or (key, nil) otherwise. It can then return a (new-value, true, nil)
|
||
|
// to create or overwrite the existing value in the KV pair, or (whatever,
|
||
|
// false, nil) if it decides not to create or not to update the value of the KV
|
||
|
// pair.
|
||
|
//
|
||
|
// tree.Set(k, v)
|
||
|
//
|
||
|
// conceptually equals
|
||
|
//
|
||
|
// tree.Put(k, func(k, v []byte){ return v, true }([]byte, bool))
|
||
|
//
|
||
|
// modulo the differing return values.
|
||
|
//
|
||
|
// The returned slice may be a sub-slice of buf if buf was large enough to hold
|
||
|
// the entire content. Otherwise, a newly allocated slice will be returned.
|
||
|
// It is valid to pass a nil buf.
|
||
|
func (t *BTree) Put(buf, key []byte, upd func(key, old []byte) (new []byte, write bool, err error)) (old []byte, written bool, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t.serial++
|
||
|
return t.root.put2(buf, t.store, t.collate, key, upd)
|
||
|
}
|
||
|
|
||
|
// Seek returns an Enumerator with "position" or an error of any. Normally the
|
||
|
// position is on a KV pair such that key >= KV.key. Then hit is key == KV.key.
|
||
|
// The position is possibly "after" the last KV pair, but that is not an error.
|
||
|
//
|
||
|
// Seek is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (t *BTree) Seek(key []byte) (enum *BTreeEnumerator, hit bool, err error) {
|
||
|
enum0, hit, err := t.seek(key)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
enum = &BTreeEnumerator{
|
||
|
enum: enum0,
|
||
|
firstHit: hit,
|
||
|
key: append([]byte(nil), key...),
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (t *BTree) seek(key []byte) (enum *bTreeEnumerator, hit bool, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
r := &bTreeEnumerator{t: t, collate: t.collate, serial: t.serial}
|
||
|
if r.p, r.index, hit, err = t.root.seek(t.store, r.collate, key); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
enum = r
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// IndexSeek returns an Enumerator with "position" or an error of any. Normally
|
||
|
// the position is on a KV pair such that key >= KV.key. Then hit is key ==
|
||
|
// KV.key. The position is possibly "after" the last KV pair, but that is not
|
||
|
// an error. The collate function originally passed to CreateBTree is used for
|
||
|
// enumerating the tree but a custom collate function c is used for IndexSeek.
|
||
|
//
|
||
|
// IndexSeek is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (t *BTree) IndexSeek(key []byte, c func(a, b []byte) int) (enum *BTreeEnumerator, hit bool, err error) { //TODO +test
|
||
|
enum0, hit, err := t.indexSeek(key, c)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
enum = &BTreeEnumerator{
|
||
|
enum: enum0,
|
||
|
firstHit: hit,
|
||
|
key: append([]byte(nil), key...),
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (t *BTree) indexSeek(key []byte, c func(a, b []byte) int) (enum *bTreeEnumerator, hit bool, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
r := &bTreeEnumerator{t: t, collate: t.collate, serial: t.serial}
|
||
|
if r.p, r.index, hit, err = t.root.seek(t.store, c, key); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
enum = r
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// seekFirst returns an enumerator positioned on the first KV pair in the tree,
|
||
|
// if any. For an empty tree, err == io.EOF is returend.
|
||
|
//
|
||
|
// SeekFirst is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (t *BTree) SeekFirst() (enum *BTreeEnumerator, err error) {
|
||
|
enum0, err := t.seekFirst()
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var key []byte
|
||
|
if key, _, err = enum0.current(); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
enum = &BTreeEnumerator{
|
||
|
enum: enum0,
|
||
|
firstHit: true,
|
||
|
key: append([]byte(nil), key...),
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (t *BTree) seekFirst() (enum *bTreeEnumerator, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var p btreeDataPage
|
||
|
if _, p, err = t.root.first(t.store); err == nil && p == nil {
|
||
|
err = io.EOF
|
||
|
}
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return &bTreeEnumerator{t: t, collate: t.collate, p: p, index: 0, serial: t.serial}, nil
|
||
|
}
|
||
|
|
||
|
// seekLast returns an enumerator positioned on the last KV pair in the tree,
|
||
|
// if any. For an empty tree, err == io.EOF is returend.
|
||
|
//
|
||
|
// SeekLast is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (t *BTree) SeekLast() (enum *BTreeEnumerator, err error) {
|
||
|
enum0, err := t.seekLast()
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var key []byte
|
||
|
if key, _, err = enum0.current(); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
enum = &BTreeEnumerator{
|
||
|
enum: enum0,
|
||
|
firstHit: true,
|
||
|
key: append([]byte(nil), key...),
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (t *BTree) seekLast() (enum *bTreeEnumerator, err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var p btreeDataPage
|
||
|
if _, p, err = t.root.last(t.store); err == nil && p == nil {
|
||
|
err = io.EOF
|
||
|
}
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return &bTreeEnumerator{t: t, collate: t.collate, p: p, index: p.len() - 1, serial: t.serial}, nil
|
||
|
}
|
||
|
|
||
|
// Set sets the value associated with key. Any previous value, if existed, is
|
||
|
// overwritten by the new one.
|
||
|
func (t *BTree) Set(key, value []byte) (err error) {
|
||
|
if t == nil {
|
||
|
err = errors.New("BTree method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
t.serial++
|
||
|
pdst := buffer.Get(maxBuf)
|
||
|
dst := *pdst
|
||
|
_, err = t.root.put(dst, t.store, t.collate, key, value, true)
|
||
|
buffer.Put(pdst)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// bTreeEnumerator is a closure of a BTree and a position. It is returned from
|
||
|
// BTree.seek.
|
||
|
//
|
||
|
// NOTE: bTreeEnumerator cannot be used after its BTree was mutated after the
|
||
|
// bTreeEnumerator was acquired from any of the seek, seekFirst, seekLast
|
||
|
// methods.
|
||
|
type bTreeEnumerator struct {
|
||
|
t *BTree
|
||
|
collate func(a, b []byte) int
|
||
|
p btreeDataPage
|
||
|
index int
|
||
|
serial uint64
|
||
|
}
|
||
|
|
||
|
// Current returns the KV pair the enumerator is currently positioned on. If
|
||
|
// the position is before the first KV pair in the tree or after the last KV
|
||
|
// pair in the tree then err == io.EOF is returned.
|
||
|
//
|
||
|
// If the enumerator has been invalidated by updating the tree, ErrINVAL is
|
||
|
// returned.
|
||
|
func (e *bTreeEnumerator) current() (key, value []byte, err error) {
|
||
|
if e == nil {
|
||
|
err = errors.New("bTreeEnumerator method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if e.serial != e.t.serial {
|
||
|
err = &ErrINVAL{Src: "bTreeEnumerator invalidated by updating the tree"}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if e.p == nil || e.index == e.p.len() {
|
||
|
return nil, nil, io.EOF
|
||
|
}
|
||
|
|
||
|
if key, err = e.p.key(e.t.store, e.index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
value, err = e.p.value(e.t.store, e.index)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Next attempts to position the enumerator onto the next KV pair wrt the
|
||
|
// current position. If there is no "next" KV pair, io.EOF is returned.
|
||
|
//
|
||
|
// If the enumerator has been invalidated by updating the tree, ErrINVAL is
|
||
|
// returned.
|
||
|
func (e *bTreeEnumerator) next() (err error) {
|
||
|
if e == nil {
|
||
|
err = errors.New("bTreeEnumerator method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if e.serial != e.t.serial {
|
||
|
err = &ErrINVAL{Src: "bTreeEnumerator invalidated by updating the tree"}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if e.p == nil {
|
||
|
return io.EOF
|
||
|
}
|
||
|
|
||
|
switch {
|
||
|
case e.index < e.p.len()-1:
|
||
|
e.index++
|
||
|
default:
|
||
|
ph := e.p.next()
|
||
|
if ph == 0 {
|
||
|
err = io.EOF
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if e.p, err = e.t.store.Get(e.p, ph); err != nil {
|
||
|
e.p = nil
|
||
|
return
|
||
|
}
|
||
|
e.index = 0
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Prev attempts to position the enumerator onto the previous KV pair wrt the
|
||
|
// current position. If there is no "previous" KV pair, io.EOF is returned.
|
||
|
//
|
||
|
// If the enumerator has been invalidated by updating the tree, ErrINVAL is
|
||
|
// returned.
|
||
|
func (e *bTreeEnumerator) prev() (err error) {
|
||
|
if e == nil {
|
||
|
err = errors.New("bTreeEnumerator method invoked on nil receiver")
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if e.serial != e.t.serial {
|
||
|
err = &ErrINVAL{Src: "bTreeEnumerator invalidated by updating the tree"}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if e.p == nil {
|
||
|
return io.EOF
|
||
|
}
|
||
|
|
||
|
switch {
|
||
|
case e.index > 0:
|
||
|
e.index--
|
||
|
default:
|
||
|
ph := e.p.prev()
|
||
|
if ph == 0 {
|
||
|
err = io.EOF
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if e.p, err = e.t.store.Get(e.p, ph); err != nil {
|
||
|
e.p = nil
|
||
|
return
|
||
|
}
|
||
|
e.index = e.p.len() - 1
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// BTreeEnumerator captures the state of enumerating a tree. It is returned
|
||
|
// from the Seek* methods. The enumerator is aware of any mutations made to
|
||
|
// the tree in the process of enumerating it and automatically resumes the
|
||
|
// enumeration.
|
||
|
type BTreeEnumerator struct {
|
||
|
enum *bTreeEnumerator
|
||
|
err error
|
||
|
key []byte
|
||
|
firstHit bool
|
||
|
}
|
||
|
|
||
|
// Next returns the currently enumerated KV pair, if it exists and moves to the
|
||
|
// next KV in the key collation order. If there is no KV pair to return, err ==
|
||
|
// io.EOF is returned.
|
||
|
//
|
||
|
// Next is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (e *BTreeEnumerator) Next() (key, value []byte, err error) {
|
||
|
if err = e.err; err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
canRetry := true
|
||
|
retry:
|
||
|
if e.enum.p == nil {
|
||
|
e.err = io.EOF
|
||
|
return nil, nil, e.err
|
||
|
}
|
||
|
|
||
|
if e.enum.index == e.enum.p.len() && e.enum.serial == e.enum.t.serial {
|
||
|
if err := e.enum.next(); err != nil {
|
||
|
e.err = err
|
||
|
return nil, nil, e.err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if key, value, err = e.enum.current(); err != nil {
|
||
|
if _, ok := err.(*ErrINVAL); !ok || !canRetry {
|
||
|
e.err = err
|
||
|
return
|
||
|
}
|
||
|
|
||
|
canRetry = false
|
||
|
var hit bool
|
||
|
if e.enum, hit, err = e.enum.t.seek(e.key); err != nil {
|
||
|
e.err = err
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if !e.firstHit && hit {
|
||
|
err = e.enum.next()
|
||
|
if err != nil {
|
||
|
e.err = err
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
|
||
|
goto retry
|
||
|
}
|
||
|
|
||
|
e.firstHit = false
|
||
|
e.key = append([]byte(nil), key...)
|
||
|
e.err = e.enum.next()
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// Prev returns the currently enumerated KV pair, if it exists and moves to the
|
||
|
// previous KV in the key collation order. If there is no KV pair to return,
|
||
|
// err == io.EOF is returned.
|
||
|
//
|
||
|
// Prev is safe for concurrent access by multiple goroutines iff no other
|
||
|
// goroutine mutates the tree.
|
||
|
func (e *BTreeEnumerator) Prev() (key, value []byte, err error) {
|
||
|
if err = e.err; err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
canRetry := true
|
||
|
retry:
|
||
|
if key, value, err = e.enum.current(); err != nil {
|
||
|
if _, ok := err.(*ErrINVAL); !ok || !canRetry {
|
||
|
e.err = err
|
||
|
return
|
||
|
}
|
||
|
|
||
|
canRetry = false
|
||
|
var hit bool
|
||
|
if e.enum, hit, err = e.enum.t.seek(e.key); err != nil {
|
||
|
e.err = err
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if !e.firstHit && hit {
|
||
|
err = e.enum.prev()
|
||
|
if err != nil {
|
||
|
e.err = err
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
|
||
|
goto retry
|
||
|
}
|
||
|
|
||
|
e.firstHit = false
|
||
|
e.key = append([]byte(nil), key...)
|
||
|
e.err = e.enum.prev()
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// CreateBTree creates a new BTree in store. It returns the tree, its (freshly
|
||
|
// assigned) handle (for OpenBTree or RemoveBTree) or an error, if any.
|
||
|
func CreateBTree(store *Allocator, collate func(a, b []byte) int) (bt *BTree, handle int64, err error) {
|
||
|
r := &BTree{store: store, collate: collate}
|
||
|
if r.root, err = newBTree(store); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return r, int64(r.root), nil
|
||
|
}
|
||
|
|
||
|
// OpenBTree opens a store's BTree using handle. It returns the tree or an
|
||
|
// error, if any. The same tree may be opened more than once, but operations on
|
||
|
// the separate instances should not ever overlap or void the other instances.
|
||
|
// However, the intended API usage is to open the same tree handle only once
|
||
|
// (handled by some upper layer "dispatcher").
|
||
|
func OpenBTree(store *Allocator, collate func(a, b []byte) int, handle int64) (bt *BTree, err error) {
|
||
|
r := &BTree{store: store, root: btree(handle), collate: collate}
|
||
|
pb := buffer.Get(7)
|
||
|
defer buffer.Put(pb)
|
||
|
b := *pb
|
||
|
if b, err = store.Get(b, handle); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if len(b) != 7 {
|
||
|
return nil, &ErrILSEQ{Off: h2off(handle), More: "btree.go:671"}
|
||
|
}
|
||
|
|
||
|
return r, nil
|
||
|
}
|
||
|
|
||
|
// RemoveBTree removes tree, represented by handle from store. Empty trees are
|
||
|
// cheap, each uses only few bytes of the store. If there's a chance that a
|
||
|
// tree will eventually get reused (non empty again), it's recommended to
|
||
|
// not/never remove it. One advantage of such approach is a stable handle of
|
||
|
// such tree.
|
||
|
func RemoveBTree(store *Allocator, handle int64) (err error) {
|
||
|
tree, err := OpenBTree(store, nil, handle)
|
||
|
if err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if err = tree.Clear(); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return store.Free(handle)
|
||
|
}
|
||
|
|
||
|
type btreeStore interface {
|
||
|
Alloc(b []byte) (handle int64, err error)
|
||
|
Free(handle int64) (err error)
|
||
|
Get(dst []byte, handle int64) (b []byte, err error)
|
||
|
Realloc(handle int64, b []byte) (err error)
|
||
|
}
|
||
|
|
||
|
// Read only zero bytes
|
||
|
var zeros [2 * kKV]byte
|
||
|
|
||
|
func init() {
|
||
|
if kData < 1 || kData > 512 {
|
||
|
panic(fmt.Errorf("kData %d: out of limits", kData))
|
||
|
}
|
||
|
|
||
|
if kIndex < 2 || kIndex > 2048 {
|
||
|
panic(fmt.Errorf("kIndex %d: out of limits", kIndex))
|
||
|
}
|
||
|
|
||
|
if kKV < 8 || kKV > 23 {
|
||
|
panic(fmt.Errorf("kKV %d: out of limits", kKV))
|
||
|
}
|
||
|
|
||
|
if n := len(zeros); n < 15 {
|
||
|
panic(fmt.Errorf("not enough zeros: %d", n))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
type memBTreeStore struct {
|
||
|
h int64
|
||
|
m map[int64][]byte
|
||
|
}
|
||
|
|
||
|
func newMemBTreeStore() *memBTreeStore {
|
||
|
return &memBTreeStore{h: 0, m: map[int64][]byte{}}
|
||
|
}
|
||
|
|
||
|
func (s *memBTreeStore) String() string {
|
||
|
var a sortutil.Int64Slice
|
||
|
for k := range s.m {
|
||
|
a = append(a, k)
|
||
|
}
|
||
|
sort.Sort(a)
|
||
|
var sa []string
|
||
|
for _, k := range a {
|
||
|
sa = append(sa, fmt.Sprintf("%#x:|% x|", k, s.m[k]))
|
||
|
}
|
||
|
return strings.Join(sa, "\n")
|
||
|
}
|
||
|
|
||
|
func (s *memBTreeStore) Alloc(b []byte) (handle int64, err error) {
|
||
|
s.h++
|
||
|
handle = s.h
|
||
|
s.m[handle] = bpack(b)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (s *memBTreeStore) Free(handle int64) (err error) {
|
||
|
if _, ok := s.m[handle]; !ok {
|
||
|
return &ErrILSEQ{Type: ErrOther, Off: h2off(handle), More: "btree.go:754"}
|
||
|
}
|
||
|
|
||
|
delete(s.m, handle)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (s *memBTreeStore) Get(dst []byte, handle int64) (b []byte, err error) {
|
||
|
r, ok := s.m[handle]
|
||
|
if !ok {
|
||
|
return nil, &ErrILSEQ{Type: ErrOther, Off: h2off(handle), More: "btree.go:764"}
|
||
|
}
|
||
|
|
||
|
b = need(len(r), dst)
|
||
|
copy(b, r)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (s *memBTreeStore) Realloc(handle int64, b []byte) (err error) {
|
||
|
if _, ok := s.m[handle]; !ok {
|
||
|
return &ErrILSEQ{Type: ErrOther, Off: h2off(handle), More: "btree.go:774"}
|
||
|
}
|
||
|
|
||
|
s.m[handle] = bpack(b)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
|
||
|
0...0 (1 bytes):
|
||
|
Flag
|
||
|
|
||
|
0
|
||
|
+---+
|
||
|
| 0 |
|
||
|
+---+
|
||
|
|
||
|
0 indicates an index page
|
||
|
|
||
|
1...count*14-1
|
||
|
"array" of items, 14 bytes each. Count of items in kIndex-1..2*kIndex+2
|
||
|
|
||
|
Count = (len(raw) - 8) / 14
|
||
|
|
||
|
0..6 7..13
|
||
|
+-------+----------+
|
||
|
| Child | DataPage |
|
||
|
+-------+----------+
|
||
|
|
||
|
Child == handle of a child index page
|
||
|
DataPage == handle of a data page
|
||
|
|
||
|
Offsets into the raw []byte:
|
||
|
Child[X] == 1+14*X
|
||
|
DataPage[X] == 8+14*X
|
||
|
|
||
|
*/
|
||
|
type btreeIndexPage []byte
|
||
|
|
||
|
func newBTreeIndexPage(leftmostChild int64) (p btreeIndexPage) {
|
||
|
p = (*buffer.Get(1 + (kIndex+1)*2*7))[:8]
|
||
|
p[0] = tagBTreeIndexPage
|
||
|
h2b(p[1:], leftmostChild)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) len() int {
|
||
|
return (len(p) - 8) / 14
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) child(index int) int64 {
|
||
|
return b2h(p[1+14*index:])
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) setChild(index int, dp int64) {
|
||
|
h2b(p[1+14*index:], dp)
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) dataPage(index int) int64 {
|
||
|
return b2h(p[8+14*index:])
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) setDataPage(index int, dp int64) {
|
||
|
h2b(p[8+14*index:], dp)
|
||
|
}
|
||
|
|
||
|
func (q btreeIndexPage) insert(index int) btreeIndexPage {
|
||
|
switch len0 := q.len(); {
|
||
|
case index < len0:
|
||
|
has := len(q)
|
||
|
need := has + 14
|
||
|
switch {
|
||
|
case cap(q) >= need:
|
||
|
q = q[:need]
|
||
|
default:
|
||
|
q = append(q, zeros[:14]...)
|
||
|
}
|
||
|
copy(q[8+14*(index+1):8+14*(index+1)+2*(len0-index)*7], q[8+14*index:])
|
||
|
case index == len0:
|
||
|
has := len(q)
|
||
|
need := has + 14
|
||
|
switch {
|
||
|
case cap(q) >= need:
|
||
|
q = q[:need]
|
||
|
default:
|
||
|
q = append(q, zeros[:14]...)
|
||
|
}
|
||
|
}
|
||
|
return q
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) insert3(index int, dataPage, child int64) btreeIndexPage {
|
||
|
p = p.insert(index)
|
||
|
p.setDataPage(index, dataPage)
|
||
|
p.setChild(index+1, child)
|
||
|
return p
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) cmp(a btreeStore, c func(a, b []byte) int, keyA []byte, keyBIndex int) (int, error) {
|
||
|
pb := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pb)
|
||
|
b := *pb
|
||
|
dp, err := a.Get(b, p.dataPage(keyBIndex))
|
||
|
if err != nil {
|
||
|
return 0, err
|
||
|
}
|
||
|
|
||
|
return btreeDataPage(dp).cmp(a, c, keyA, 0)
|
||
|
}
|
||
|
|
||
|
func (q btreeIndexPage) setLen(n int) btreeIndexPage {
|
||
|
q = q[:cap(q)]
|
||
|
need := 8 + 14*n
|
||
|
if need < len(q) {
|
||
|
return q[:need]
|
||
|
}
|
||
|
return append(q, make([]byte, need-len(q))...)
|
||
|
}
|
||
|
|
||
|
func (p btreeIndexPage) split(a btreeStore, root btree, ph *int64, parent int64, parentIndex int, index *int) (btreeIndexPage, error) {
|
||
|
right := newBTreeIndexPage(0)
|
||
|
right = right.setLen(kIndex)
|
||
|
copy(right[1:1+(2*kIndex+1)*7], p[1+14*(kIndex+1):])
|
||
|
p = p.setLen(kIndex)
|
||
|
if err := a.Realloc(*ph, p); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
rh, err := a.Alloc(right)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if parentIndex >= 0 {
|
||
|
ppp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(ppp)
|
||
|
pp := btreeIndexPage(*ppp)
|
||
|
if pp, err = a.Get(pp, parent); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
pp = pp.insert3(parentIndex, p.dataPage(kIndex), rh)
|
||
|
if err = a.Realloc(parent, pp); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
nr := newBTreeIndexPage(*ph)
|
||
|
nr = nr.insert3(0, p.dataPage(kIndex), rh)
|
||
|
nrh, err := a.Alloc(nr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if err = a.Realloc(int64(root), h2b(make([]byte, 7), nrh)); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if *index > kIndex {
|
||
|
p = right
|
||
|
*ph = rh
|
||
|
*index -= kIndex + 1
|
||
|
}
|
||
|
return p, nil
|
||
|
}
|
||
|
|
||
|
// p is dirty on return
|
||
|
func (p btreeIndexPage) extract(index int) btreeIndexPage {
|
||
|
n := p.len() - 1
|
||
|
if index < n {
|
||
|
sz := (n-index)*14 + 7
|
||
|
copy(p[1+14*index:1+14*index+sz], p[1+14*(index+1):])
|
||
|
}
|
||
|
return p.setLen(n)
|
||
|
}
|
||
|
|
||
|
// must persist all changes made
|
||
|
func (p btreeIndexPage) underflow(a btreeStore, root, iroot, parent int64, ph *int64, parentIndex int, index *int) (btreeIndexPage, error) {
|
||
|
lh, rh, err := checkSiblings(a, parent, parentIndex)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pleft := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pleft)
|
||
|
left := btreeIndexPage(*pleft)
|
||
|
|
||
|
if lh != 0 {
|
||
|
if left, err = a.Get(left, lh); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if lc := btreeIndexPage(left).len(); lc > kIndex {
|
||
|
ppp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(ppp)
|
||
|
pp := *ppp
|
||
|
if pp, err = a.Get(pp, parent); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pc := p.len()
|
||
|
p = p.setLen(pc + 1)
|
||
|
di, si, sz := 1+1*14, 1+0*14, (2*pc+1)*7
|
||
|
copy(p[di:di+sz], p[si:])
|
||
|
p.setChild(0, btreeIndexPage(left).child(lc))
|
||
|
p.setDataPage(0, btreeIndexPage(pp).dataPage(parentIndex-1))
|
||
|
*index++
|
||
|
btreeIndexPage(pp).setDataPage(parentIndex-1, btreeIndexPage(left).dataPage(lc-1))
|
||
|
left = left.setLen(lc - 1)
|
||
|
if err = a.Realloc(parent, pp); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if err = a.Realloc(*ph, p); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return p, a.Realloc(lh, left)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if rh != 0 {
|
||
|
pright := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pright)
|
||
|
right := *pright
|
||
|
if right, err = a.Get(right, rh); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if rc := btreeIndexPage(right).len(); rc > kIndex {
|
||
|
ppp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(ppp)
|
||
|
pp := *ppp
|
||
|
if pp, err = a.Get(pp, parent); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pc := p.len()
|
||
|
p = p.setLen(pc + 1)
|
||
|
p.setDataPage(pc, btreeIndexPage(pp).dataPage(parentIndex))
|
||
|
pc++
|
||
|
p.setChild(pc, btreeIndexPage(right).child(0))
|
||
|
btreeIndexPage(pp).setDataPage(parentIndex, btreeIndexPage(right).dataPage(0))
|
||
|
di, si, sz := 1+0*14, 1+1*14, (2*rc+1)*7
|
||
|
copy(right[di:di+sz], right[si:])
|
||
|
right = btreeIndexPage(right).setLen(rc - 1)
|
||
|
if err = a.Realloc(parent, pp); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if err = a.Realloc(*ph, p); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return p, a.Realloc(rh, right)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if lh != 0 {
|
||
|
*index += left.len() + 1
|
||
|
if left, err = left.concat(a, root, iroot, parent, lh, *ph, parentIndex-1); err != nil {
|
||
|
return p, err
|
||
|
}
|
||
|
|
||
|
p, *ph = left, lh
|
||
|
return p, nil
|
||
|
}
|
||
|
|
||
|
return p.concat(a, root, iroot, parent, *ph, rh, parentIndex)
|
||
|
}
|
||
|
|
||
|
// must persist all changes made
|
||
|
func (p btreeIndexPage) concat(a btreeStore, root, iroot, parent, ph, rh int64, parentIndex int) (btreeIndexPage, error) {
|
||
|
ppp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(ppp)
|
||
|
pp := *ppp
|
||
|
pp, err := a.Get(pp, parent)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pright := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pright)
|
||
|
right := *pright
|
||
|
if right, err = a.Get(right, rh); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pc := p.len()
|
||
|
rc := btreeIndexPage(right).len()
|
||
|
p = p.setLen(pc + rc + 1)
|
||
|
p.setDataPage(pc, btreeIndexPage(pp).dataPage(parentIndex))
|
||
|
di, si, sz := 1+14*(pc+1), 1+0*14, (2*rc+1)*7
|
||
|
copy(p[di:di+sz], right[si:])
|
||
|
if err := a.Realloc(ph, p); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if err := a.Free(rh); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if pc := btreeIndexPage(pp).len(); pc > 1 {
|
||
|
if parentIndex < pc-1 {
|
||
|
di, si, sz := 8+parentIndex*14, 8+(parentIndex+1)*14, 2*(pc-1-parentIndex)*7
|
||
|
copy(pp[di:si+sz], pp[si:])
|
||
|
}
|
||
|
pp = btreeIndexPage(pp).setLen(pc - 1)
|
||
|
return p, a.Realloc(parent, pp)
|
||
|
}
|
||
|
|
||
|
if err := a.Free(iroot); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pb7 := buffer.Get(7)
|
||
|
defer buffer.Put(pb7)
|
||
|
b7 := *pb7
|
||
|
return p, a.Realloc(root, h2b(b7[:7], ph))
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
|
||
|
0...0 (1 bytes):
|
||
|
Flag
|
||
|
|
||
|
0
|
||
|
+---+
|
||
|
| 1 |
|
||
|
+---+
|
||
|
|
||
|
1 indicates a data page
|
||
|
|
||
|
1...14 (14 bytes)
|
||
|
|
||
|
1..7 8..14
|
||
|
+------+------+
|
||
|
| Prev | Next |
|
||
|
+------+------+
|
||
|
|
||
|
Prev, Next == Handles of the data pages doubly linked list
|
||
|
|
||
|
Count = (len(raw) - 15) / (2*kKV)
|
||
|
|
||
|
15...count*2*kKV-1
|
||
|
"array" of items, 2*kKV bytes each. Count of items in kData-1..2*kData
|
||
|
|
||
|
Item
|
||
|
0..kKV-1 kKV..2*kKV-1
|
||
|
+----------+--------------+
|
||
|
| Key | Value |
|
||
|
+----------+--------------+
|
||
|
|
||
|
Key/Value encoding
|
||
|
|
||
|
Length 0...kKV-1
|
||
|
|
||
|
0 1...N N+1...kKV-1
|
||
|
+---+---------+-------------+
|
||
|
| N | Data | Padding |
|
||
|
+---+---------+-------------+
|
||
|
|
||
|
N == content length
|
||
|
Data == Key or Value content
|
||
|
Padding == MUST be zero bytes
|
||
|
|
||
|
Length >= kKV
|
||
|
|
||
|
0 1...kkV-8 kKV-7...kkV-1
|
||
|
+------+-----------+--------------+
|
||
|
| 0xFF | Data | H |
|
||
|
+------+-----------+--------------+
|
||
|
|
||
|
Data == Key or Value content, first kKV-7 bytes
|
||
|
H == Handle to THE REST of the content, w/o the first bytes in Data.
|
||
|
|
||
|
Offsets into the raw []byte:
|
||
|
Key[X] == 15+2*kKV*X
|
||
|
Value[X] == 15+kKV+2*kKV*X
|
||
|
*/
|
||
|
type btreeDataPage []byte
|
||
|
|
||
|
func newBTreeDataPage() (p btreeDataPage) {
|
||
|
p = (*buffer.CGet(1 + 2*7 + (kData+1)*2*kKV))[:1+2*7]
|
||
|
p[0] = tagBTreeDataPage
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func newBTreeDataPageAlloc(a btreeStore) (p btreeDataPage, h int64, err error) {
|
||
|
p = newBTreeDataPage()
|
||
|
h, err = a.Alloc(p)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) len() int {
|
||
|
return (len(p) - 15) / (2 * kKV)
|
||
|
}
|
||
|
|
||
|
func (q btreeDataPage) setLen(n int) btreeDataPage {
|
||
|
q = q[:cap(q)]
|
||
|
need := 15 + 2*kKV*n
|
||
|
if need < len(q) {
|
||
|
return q[:need]
|
||
|
}
|
||
|
return append(q, make([]byte, need-len(q))...)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) prev() int64 {
|
||
|
return b2h(p[1:])
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) next() int64 {
|
||
|
return b2h(p[8:])
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) setPrev(h int64) {
|
||
|
h2b(p[1:], h)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) setNext(h int64) {
|
||
|
h2b(p[8:], h)
|
||
|
}
|
||
|
|
||
|
func (q btreeDataPage) insert(index int) btreeDataPage {
|
||
|
switch len0 := q.len(); {
|
||
|
case index < len0:
|
||
|
has := len(q)
|
||
|
need := has + 2*kKV
|
||
|
switch {
|
||
|
case cap(q) >= need:
|
||
|
q = q[:need]
|
||
|
default:
|
||
|
q = append(q, zeros[:2*kKV]...)
|
||
|
}
|
||
|
q.copy(q, index+1, index, len0-index)
|
||
|
return q
|
||
|
case index == len0:
|
||
|
has := len(q)
|
||
|
need := has + 2*kKV
|
||
|
switch {
|
||
|
case cap(q) >= need:
|
||
|
return q[:need]
|
||
|
default:
|
||
|
return append(q, zeros[:2*kKV]...)
|
||
|
}
|
||
|
}
|
||
|
panic("internal error")
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) contentField(off int) (b []byte, h int64) {
|
||
|
p = p[off:]
|
||
|
switch n := int(p[0]); {
|
||
|
case n >= kKV: // content has a handle
|
||
|
b = append([]byte(nil), p[1:1+kSz]...)
|
||
|
h = b2h(p[kH:])
|
||
|
default: // content is embedded
|
||
|
b, h = append([]byte(nil), p[1:1+n]...), 0
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) content(a btreeStore, off int) (b []byte, err error) {
|
||
|
b, h := p.contentField(off)
|
||
|
if h == 0 {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// content has a handle
|
||
|
b2, err := a.Get(nil, h) //TODO buffers: Later, not a public API
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return append(b, b2...), nil
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) setContent(a btreeStore, off int, b []byte) (err error) {
|
||
|
p = p[off:]
|
||
|
switch {
|
||
|
case p[0] >= kKV: // existing content has a handle
|
||
|
switch n := len(b); {
|
||
|
case n < kKV:
|
||
|
p[0] = byte(n)
|
||
|
if err = a.Free(b2h(p[kH:])); err != nil {
|
||
|
return
|
||
|
}
|
||
|
copy(p[1:], b)
|
||
|
default:
|
||
|
// reuse handle
|
||
|
copy(p[1:1+kSz], b)
|
||
|
return a.Realloc(b2h(p[kH:]), b[kSz:])
|
||
|
}
|
||
|
default: // existing content is embedded
|
||
|
switch n := len(b); {
|
||
|
case n < kKV:
|
||
|
p[0] = byte(n)
|
||
|
copy(p[1:], b)
|
||
|
default:
|
||
|
p[0] = 0xff
|
||
|
copy(p[1:1+kSz], b)
|
||
|
h, err := a.Alloc(b[kSz:])
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
h2b(p[kH:], h)
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) keyField(index int) (b []byte, h int64) {
|
||
|
return p.contentField(15 + 2*kKV*index)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) key(a btreeStore, index int) (b []byte, err error) {
|
||
|
return p.content(a, 15+2*kKV*index)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) valueField(index int) (b []byte, h int64) {
|
||
|
return p.contentField(15 + kKV + 2*kKV*index)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) value(a btreeStore, index int) (b []byte, err error) {
|
||
|
value, err := p.content(a, 15+kKV+2*kKV*index)
|
||
|
if err == nil && value == nil {
|
||
|
// We have a valid page, no fetch error, the key is valid so return
|
||
|
// non-nil data
|
||
|
return []byte{}, nil
|
||
|
}
|
||
|
return value, err
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) valueCopy(a btreeStore, index int) (b []byte, err error) {
|
||
|
if b, err = p.content(a, 15+kKV+2*kKV*index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return append([]byte(nil), b...), nil
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) setKey(a btreeStore, index int, key []byte) (err error) {
|
||
|
return p.setContent(a, 15+2*kKV*index, key)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) setValue(a btreeStore, index int, value []byte) (err error) {
|
||
|
return p.setContent(a, 15+kKV+2*kKV*index, value)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) cmp(a btreeStore, c func(a, b []byte) int, keyA []byte, keyBIndex int) (y int, err error) {
|
||
|
var keyB []byte
|
||
|
if keyB, err = p.content(a, 15+2*kKV*keyBIndex); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
return c(keyA, keyB), nil
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) copy(src btreeDataPage, di, si, n int) {
|
||
|
do, so := 15+2*kKV*di, 15+2*kKV*si
|
||
|
copy(p[do:do+2*kKV*n], src[so:])
|
||
|
}
|
||
|
|
||
|
// {p,left} dirty on exit
|
||
|
func (p btreeDataPage) moveLeft(left btreeDataPage, n int) (btreeDataPage, btreeDataPage) {
|
||
|
nl, np := left.len(), p.len()
|
||
|
left = left.setLen(nl + n)
|
||
|
left.copy(p, nl, 0, n)
|
||
|
p.copy(p, 0, n, np-n)
|
||
|
return p.setLen(np - n), left
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) moveRight(right btreeDataPage, n int) (btreeDataPage, btreeDataPage) {
|
||
|
nr, np := right.len(), p.len()
|
||
|
right = right.setLen(nr + n)
|
||
|
right.copy(right, n, 0, nr)
|
||
|
right.copy(p, 0, np-n, n)
|
||
|
return p.setLen(np - n), right
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) insertItem(a btreeStore, index int, key, value []byte) (btreeDataPage, error) {
|
||
|
p = p.insert(index)
|
||
|
di, sz := 15+2*kKV*index, 2*kKV
|
||
|
copy(p[di:di+sz], zeros[:sz])
|
||
|
if err := p.setKey(a, index, key); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
return p, p.setValue(a, index, value)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) split(a btreeStore, root, ph, parent int64, parentIndex, index int, key, value []byte) (btreeDataPage, error) {
|
||
|
right, rh, err := newBTreeDataPageAlloc(a)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if next := p.next(); next != 0 {
|
||
|
right.setNext(p.next())
|
||
|
nxh := right.next()
|
||
|
pnx := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pnx)
|
||
|
nx := *pnx
|
||
|
if nx, err = a.Get(nx, nxh); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
btreeDataPage(nx).setPrev(rh)
|
||
|
if err = a.Realloc(nxh, nx); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
p.setNext(rh)
|
||
|
right.setPrev(ph)
|
||
|
right = right.setLen(kData)
|
||
|
right.copy(p, 0, kData, kData)
|
||
|
p = p.setLen(kData)
|
||
|
|
||
|
if parentIndex >= 0 {
|
||
|
ppp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(ppp)
|
||
|
pp := btreeIndexPage(*ppp)
|
||
|
if pp, err = a.Get(pp, parent); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
pp = pp.insert3(parentIndex, rh, rh)
|
||
|
if err = a.Realloc(parent, pp); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
} else {
|
||
|
nr := newBTreeIndexPage(ph)
|
||
|
nr = nr.insert3(0, rh, rh)
|
||
|
nrh, err := a.Alloc(nr)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if err = a.Realloc(root, h2b(make([]byte, 7), nrh)); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
}
|
||
|
if index > kData {
|
||
|
if right, err = right.insertItem(a, index-kData, key, value); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
} else {
|
||
|
if p, err = p.insertItem(a, index, key, value); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
if err = a.Realloc(ph, p); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return p, a.Realloc(rh, right)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) overflow(a btreeStore, root, ph, parent int64, parentIndex, index int, key, value []byte) (btreeDataPage, error) {
|
||
|
leftH, rightH, err := checkSiblings(a, parent, parentIndex)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if leftH != 0 {
|
||
|
pleft := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pleft)
|
||
|
left := btreeDataPage(*pleft)
|
||
|
if left, err = a.Get(left, leftH); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if left.len() < 2*kData && index > 0 {
|
||
|
|
||
|
p, left = p.moveLeft(left, 1)
|
||
|
if err = a.Realloc(leftH, left); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if p, err = p.insertItem(a, index-1, key, value); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return p, a.Realloc(ph, p)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if rightH != 0 {
|
||
|
pright := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pright)
|
||
|
right := btreeDataPage(*pright)
|
||
|
if right, err = a.Get(right, rightH); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if right.len() < 2*kData {
|
||
|
if index < 2*kData {
|
||
|
p, right = p.moveRight(right, 1)
|
||
|
if err = a.Realloc(rightH, right); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if p, err = p.insertItem(a, index, key, value); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return p, a.Realloc(ph, p)
|
||
|
} else {
|
||
|
if right, err = right.insertItem(a, 0, key, value); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return p, a.Realloc(rightH, right)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return p.split(a, root, ph, parent, parentIndex, index, key, value)
|
||
|
}
|
||
|
|
||
|
func (p btreeDataPage) swap(a btreeStore, di int, value []byte, canOverwrite bool) (oldValue []byte, err error) {
|
||
|
if oldValue, err = p.value(a, di); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if !canOverwrite {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
oldValue = append([]byte(nil), oldValue...)
|
||
|
err = p.setValue(a, di, value)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
type btreePage []byte
|
||
|
|
||
|
func (p btreePage) isIndex() bool {
|
||
|
return p[0] == tagBTreeIndexPage
|
||
|
}
|
||
|
|
||
|
func (p btreePage) len() int {
|
||
|
if p.isIndex() {
|
||
|
return btreeIndexPage(p).len()
|
||
|
}
|
||
|
|
||
|
return btreeDataPage(p).len()
|
||
|
}
|
||
|
|
||
|
func (p btreePage) find(a btreeStore, c func(a, b []byte) int, key []byte) (index int, ok bool, err error) {
|
||
|
l := 0
|
||
|
h := p.len() - 1
|
||
|
isIndex := p.isIndex()
|
||
|
if c == nil {
|
||
|
c = bytes.Compare
|
||
|
}
|
||
|
for l <= h {
|
||
|
index = (l + h) >> 1
|
||
|
var cmp int
|
||
|
if isIndex {
|
||
|
if cmp, err = btreeIndexPage(p).cmp(a, c, key, index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
} else {
|
||
|
if cmp, err = btreeDataPage(p).cmp(a, c, key, index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
switch ok = cmp == 0; {
|
||
|
case cmp > 0:
|
||
|
l = index + 1
|
||
|
case ok:
|
||
|
return
|
||
|
default:
|
||
|
h = index - 1
|
||
|
}
|
||
|
}
|
||
|
return l, false, nil
|
||
|
}
|
||
|
|
||
|
// p is dirty after extract!
|
||
|
func (p btreeDataPage) extract(a btreeStore, index int) (btreeDataPage, []byte, error) {
|
||
|
value, err := p.valueCopy(a, index)
|
||
|
if err != nil {
|
||
|
return nil, nil, err
|
||
|
}
|
||
|
|
||
|
if _, h := p.keyField(index); h != 0 {
|
||
|
if err = a.Free(h); err != nil {
|
||
|
return nil, nil, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if _, h := p.valueField(index); h != 0 {
|
||
|
if err = a.Free(h); err != nil {
|
||
|
return nil, nil, err
|
||
|
}
|
||
|
}
|
||
|
|
||
|
n := p.len() - 1
|
||
|
if index < n {
|
||
|
p.copy(p, index, index+1, n-index)
|
||
|
}
|
||
|
return p.setLen(n), value, nil
|
||
|
}
|
||
|
|
||
|
func checkSiblings(a btreeStore, parent int64, parentIndex int) (left, right int64, err error) {
|
||
|
if parentIndex >= 0 {
|
||
|
pp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pp)
|
||
|
p := btreeIndexPage(*pp)
|
||
|
if p, err = a.Get(p, parent); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if parentIndex > 0 {
|
||
|
left = p.child(parentIndex - 1)
|
||
|
}
|
||
|
if parentIndex < p.len() {
|
||
|
right = p.child(parentIndex + 1)
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// underflow must persist all changes made.
|
||
|
func (p btreeDataPage) underflow(a btreeStore, root, iroot, parent, ph int64, parentIndex int) (err error) {
|
||
|
lh, rh, err := checkSiblings(a, parent, parentIndex)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
if lh != 0 {
|
||
|
pleft := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pleft)
|
||
|
left := *pleft
|
||
|
if left, err = a.Get(left, lh); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
if btreeDataPage(left).len()+p.len() >= 2*kData {
|
||
|
left, p = btreeDataPage(left).moveRight(p, 1)
|
||
|
if err = a.Realloc(lh, left); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
return a.Realloc(ph, p)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if rh != 0 {
|
||
|
pright := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pright)
|
||
|
right := *pright
|
||
|
if right, err = a.Get(right, rh); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
if p.len()+btreeDataPage(right).len() > 2*kData {
|
||
|
right, p = btreeDataPage(right).moveLeft(p, 1)
|
||
|
if err = a.Realloc(rh, right); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
return a.Realloc(ph, p)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if lh != 0 {
|
||
|
pleft := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pleft)
|
||
|
left := *pleft
|
||
|
if left, err = a.Get(left, lh); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
if err = a.Realloc(ph, p); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
return btreeDataPage(left).concat(a, root, iroot, parent, lh, ph, parentIndex-1)
|
||
|
}
|
||
|
|
||
|
return p.concat(a, root, iroot, parent, ph, rh, parentIndex)
|
||
|
}
|
||
|
|
||
|
// concat must persist all changes made.
|
||
|
func (p btreeDataPage) concat(a btreeStore, root, iroot, parent, ph, rh int64, parentIndex int) (err error) {
|
||
|
pright := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pright)
|
||
|
right := *pright
|
||
|
if right, err = a.Get(right, rh); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
right, p = btreeDataPage(right).moveLeft(p, btreeDataPage(right).len())
|
||
|
nxh := btreeDataPage(right).next()
|
||
|
if nxh != 0 {
|
||
|
pnx := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pnx)
|
||
|
nx := *pnx
|
||
|
if nx, err = a.Get(nx, nxh); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
btreeDataPage(nx).setPrev(ph)
|
||
|
if err = a.Realloc(nxh, nx); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
}
|
||
|
p.setNext(nxh)
|
||
|
if err = a.Free(rh); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
ppp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(ppp)
|
||
|
pp := *ppp
|
||
|
if pp, err = a.Get(pp, parent); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
if btreeIndexPage(pp).len() > 1 {
|
||
|
pp = btreeIndexPage(pp).extract(parentIndex)
|
||
|
btreeIndexPage(pp).setChild(parentIndex, ph)
|
||
|
if err = a.Realloc(parent, pp); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
return a.Realloc(ph, p)
|
||
|
}
|
||
|
|
||
|
if err = a.Free(iroot); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
if err = a.Realloc(ph, p); err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
var b7 [7]byte
|
||
|
return a.Realloc(root, h2b(b7[:], ph))
|
||
|
}
|
||
|
|
||
|
// external "root" is stable and contains the real root.
|
||
|
type btree int64
|
||
|
|
||
|
func newBTree(a btreeStore) (btree, error) {
|
||
|
r, err := a.Alloc(zeros[:7])
|
||
|
return btree(r), err
|
||
|
}
|
||
|
|
||
|
func (root btree) String(a btreeStore) string {
|
||
|
pr := buffer.Get(16)
|
||
|
defer buffer.Put(pr)
|
||
|
r := *pr
|
||
|
r, err := a.Get(r, int64(root))
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
|
||
|
iroot := b2h(r)
|
||
|
m := map[int64]bool{int64(root): true}
|
||
|
|
||
|
s := []string{fmt.Sprintf("tree %#x -> %#x\n====", root, iroot)}
|
||
|
if iroot == 0 {
|
||
|
return s[0]
|
||
|
}
|
||
|
|
||
|
var f func(int64, string)
|
||
|
f = func(h int64, ind string) {
|
||
|
if m[h] {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
m[h] = true
|
||
|
pb := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pb)
|
||
|
b := btreePage(*pb)
|
||
|
var err error
|
||
|
if b, err = a.Get(b, h); err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
|
||
|
s = append(s, fmt.Sprintf("%s@%#x", ind, h))
|
||
|
switch b.isIndex() {
|
||
|
case true:
|
||
|
da := []int64{}
|
||
|
b := btreeIndexPage(b)
|
||
|
for i := 0; i < b.len(); i++ {
|
||
|
c, d := b.child(i), b.dataPage(i)
|
||
|
s = append(s, fmt.Sprintf("%schild[%d] %#x dataPage[%d] %#x", ind, i, c, i, d))
|
||
|
da = append(da, c)
|
||
|
da = append(da, d)
|
||
|
}
|
||
|
i := b.len()
|
||
|
c := b.child(i)
|
||
|
s = append(s, fmt.Sprintf("%schild[%d] %#x", ind, i, c))
|
||
|
for _, c := range da {
|
||
|
f(c, ind+" ")
|
||
|
}
|
||
|
f(c, ind+" ")
|
||
|
case false:
|
||
|
b := btreeDataPage(b)
|
||
|
s = append(s, fmt.Sprintf("%sprev %#x next %#x", ind, b.prev(), b.next()))
|
||
|
for i := 0; i < b.len(); i++ {
|
||
|
k, err := b.key(a, i)
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
|
||
|
v, err := b.value(a, i)
|
||
|
if err != nil {
|
||
|
panic(err)
|
||
|
}
|
||
|
|
||
|
s = append(s, fmt.Sprintf("%sK[%d]|% x| V[%d]|% x|", ind, i, k, i, v))
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
f(int64(iroot), "")
|
||
|
return strings.Join(s, "\n")
|
||
|
}
|
||
|
|
||
|
func (root btree) put(dst []byte, a btreeStore, c func(a, b []byte) int, key, value []byte, canOverwrite bool) (prev []byte, err error) {
|
||
|
prev, _, err = root.put2(dst, a, c, key, func(key, old []byte) (new []byte, write bool, err error) {
|
||
|
new, write = value, true
|
||
|
return
|
||
|
})
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (root btree) put2(dst []byte, a btreeStore, c func(a, b []byte) int, key []byte, upd func(key, old []byte) (new []byte, write bool, err error)) (old []byte, written bool, err error) {
|
||
|
var r, value []byte
|
||
|
if r, err = a.Get(dst, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
iroot := b2h(r)
|
||
|
var h int64
|
||
|
if iroot == 0 {
|
||
|
p := newBTreeDataPage()
|
||
|
if value, written, err = upd(key, nil); err != nil || !written {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if p, err = p.insertItem(a, 0, key, value); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
h, err = a.Alloc(p)
|
||
|
if err != nil {
|
||
|
return nil, true, err
|
||
|
}
|
||
|
|
||
|
err = a.Realloc(int64(root), h2b(r, h)[:7])
|
||
|
return
|
||
|
}
|
||
|
|
||
|
parentIndex := -1
|
||
|
var parent int64
|
||
|
ph := iroot
|
||
|
|
||
|
pp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pp)
|
||
|
p := *pp
|
||
|
|
||
|
for {
|
||
|
if p, err = a.Get(p[:cap(p)], ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var index int
|
||
|
var ok bool
|
||
|
|
||
|
if index, ok, err = btreePage(p).find(a, c, key); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
switch {
|
||
|
case ok: // Key found
|
||
|
if btreePage(p).isIndex() {
|
||
|
ph = btreeIndexPage(p).dataPage(index)
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if old, err = btreeDataPage(p).valueCopy(a, 0); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if value, written, err = upd(key, old); err != nil || !written {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if _, err = btreeDataPage(p).swap(a, 0, value, true); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
err = a.Realloc(ph, p)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if old, err = btreeDataPage(p).valueCopy(a, index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if value, written, err = upd(key, old); err != nil || !written {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if _, err = btreeDataPage(p).swap(a, index, value, true); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
err = a.Realloc(ph, p)
|
||
|
return
|
||
|
case btreePage(p).isIndex():
|
||
|
if btreePage(p).len() > 2*kIndex {
|
||
|
if p, err = btreeIndexPage(p).split(a, root, &ph, parent, parentIndex, &index); err != nil {
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
parentIndex = index
|
||
|
parent = ph
|
||
|
ph = btreeIndexPage(p).child(index)
|
||
|
default:
|
||
|
if value, written, err = upd(key, nil); err != nil || !written {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if btreePage(p).len() < 2*kData { // page is not full
|
||
|
if p, err = btreeDataPage(p).insertItem(a, index, key, value); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
err = a.Realloc(ph, p)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// page is full
|
||
|
p, err = btreeDataPage(p).overflow(a, int64(root), ph, parent, parentIndex, index, key, value)
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//TODO actually use 'dst' to return 'value'
|
||
|
func (root btree) get(a btreeStore, dst []byte, c func(a, b []byte) int, key []byte) (b []byte, err error) {
|
||
|
var r []byte
|
||
|
if r, err = a.Get(dst, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
iroot := b2h(r)
|
||
|
if iroot == 0 {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
ph := iroot
|
||
|
|
||
|
for {
|
||
|
var p btreePage
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var index int
|
||
|
var ok bool
|
||
|
if index, ok, err = p.find(a, c, key); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
switch {
|
||
|
case ok:
|
||
|
if p.isIndex() {
|
||
|
dh := btreeIndexPage(p).dataPage(index)
|
||
|
dp, err := a.Get(dst, dh)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return btreeDataPage(dp).value(a, 0)
|
||
|
}
|
||
|
|
||
|
return btreeDataPage(p).value(a, index)
|
||
|
case p.isIndex():
|
||
|
ph = btreeIndexPage(p).child(index)
|
||
|
default:
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
//TODO actually use 'dst' to return 'value'
|
||
|
func (root btree) extract(a btreeStore, dst []byte, c func(a, b []byte) int, key []byte) (value []byte, err error) {
|
||
|
var r []byte
|
||
|
if r, err = a.Get(dst, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
iroot := b2h(r)
|
||
|
if iroot == 0 {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
ph := iroot
|
||
|
parentIndex := -1
|
||
|
var parent int64
|
||
|
|
||
|
pp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pp)
|
||
|
p := *pp
|
||
|
|
||
|
for {
|
||
|
if p, err = a.Get(p[:cap(p)], ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var index int
|
||
|
var ok bool
|
||
|
if index, ok, err = btreePage(p).find(a, c, key); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if ok {
|
||
|
if btreePage(p).isIndex() {
|
||
|
dph := btreeIndexPage(p).dataPage(index)
|
||
|
dp, err := a.Get(dst, dph)
|
||
|
if err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
if btreeDataPage(dp).len() > kData {
|
||
|
if dp, value, err = btreeDataPage(dp).extract(a, 0); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
|
||
|
return value, a.Realloc(dph, dp)
|
||
|
}
|
||
|
|
||
|
if btreeIndexPage(p).len() < kIndex && ph != iroot {
|
||
|
var err error
|
||
|
if p, err = btreeIndexPage(p).underflow(a, int64(root), iroot, parent, &ph, parentIndex, &index); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
parentIndex = index + 1
|
||
|
parent = ph
|
||
|
ph = btreeIndexPage(p).child(parentIndex)
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
p, value, err = btreeDataPage(p).extract(a, index)
|
||
|
if btreePage(p).len() >= kData {
|
||
|
err = a.Realloc(ph, p)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if ph != iroot {
|
||
|
err = btreeDataPage(p).underflow(a, int64(root), iroot, parent, ph, parentIndex)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if btreePage(p).len() == 0 {
|
||
|
if err = a.Free(ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
err = a.Realloc(int64(root), zeros[:7])
|
||
|
return
|
||
|
}
|
||
|
err = a.Realloc(ph, p)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if !btreePage(p).isIndex() {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if btreePage(p).len() < kIndex && ph != iroot {
|
||
|
if p, err = btreeIndexPage(p).underflow(a, int64(root), iroot, parent, &ph, parentIndex, &index); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
}
|
||
|
parentIndex = index
|
||
|
parent = ph
|
||
|
ph = btreeIndexPage(p).child(index)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (root btree) deleteAny(a btreeStore) (bool, error) {
|
||
|
pr := buffer.Get(7)
|
||
|
defer buffer.Put(pr)
|
||
|
r := *pr
|
||
|
var err error
|
||
|
if r, err = a.Get(r, int64(root)); err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
iroot := b2h(r)
|
||
|
if iroot == 0 {
|
||
|
return true, nil
|
||
|
}
|
||
|
|
||
|
ph := iroot
|
||
|
parentIndex := -1
|
||
|
var parent int64
|
||
|
pp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pp)
|
||
|
p := *pp
|
||
|
|
||
|
for {
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
index := btreePage(p).len() / 2
|
||
|
if btreePage(p).isIndex() {
|
||
|
dph := btreeIndexPage(p).dataPage(index)
|
||
|
pdp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pdp)
|
||
|
dp := *pdp
|
||
|
if dp, err = a.Get(dp, dph); err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
if btreeDataPage(dp).len() > kData {
|
||
|
if dp, _, err = btreeDataPage(dp).extract(a, 0); err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
return false, a.Realloc(dph, dp)
|
||
|
}
|
||
|
|
||
|
if btreeIndexPage(p).len() < kIndex && ph != iroot {
|
||
|
if p, err = btreeIndexPage(p).underflow(a, int64(root), iroot, parent, &ph, parentIndex, &index); err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
}
|
||
|
parentIndex = index + 1
|
||
|
parent = ph
|
||
|
ph = btreeIndexPage(p).child(parentIndex)
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
p, _, err = btreeDataPage(p).extract(a, index)
|
||
|
if btreePage(p).len() >= kData {
|
||
|
err = a.Realloc(ph, p)
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
if ph != iroot {
|
||
|
err = btreeDataPage(p).underflow(a, int64(root), iroot, parent, ph, parentIndex)
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
if btreePage(p).len() == 0 {
|
||
|
if err = a.Free(ph); err != nil {
|
||
|
return true, err
|
||
|
}
|
||
|
|
||
|
return true, a.Realloc(int64(root), zeros[:7])
|
||
|
}
|
||
|
|
||
|
return false, a.Realloc(ph, p)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (root btree) first(a btreeStore) (ph int64, p btreeDataPage, err error) {
|
||
|
pr := buffer.Get(7)
|
||
|
defer buffer.Put(pr)
|
||
|
r := *pr
|
||
|
if r, err = a.Get(r, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
for ph = b2h(r); ph != 0; ph = btreeIndexPage(p).child(0) {
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if !btreePage(p).isIndex() {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (root btree) last(a btreeStore) (ph int64, p btreeDataPage, err error) {
|
||
|
pr := buffer.Get(7)
|
||
|
defer buffer.Put(pr)
|
||
|
r := *pr
|
||
|
if r, err = a.Get(r, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
for ph = b2h(r); ph != 0; ph = btreeIndexPage(p).child(btreeIndexPage(p).len()) {
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if !btreePage(p).isIndex() {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// key >= p[index].key
|
||
|
func (root btree) seek(a btreeStore, c func(a, b []byte) int, key []byte) (p btreeDataPage, index int, equal bool, err error) {
|
||
|
pr := buffer.Get(7)
|
||
|
defer buffer.Put(pr)
|
||
|
r := *pr
|
||
|
if r, err = a.Get(r, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
for ph := b2h(r); ph != 0; ph = btreeIndexPage(p).child(index) {
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if index, equal, err = btreePage(p).find(a, c, key); err != nil {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if equal {
|
||
|
if !btreePage(p).isIndex() {
|
||
|
break
|
||
|
}
|
||
|
|
||
|
p, err = a.Get(p, btreeIndexPage(p).dataPage(index))
|
||
|
index = 0
|
||
|
break
|
||
|
}
|
||
|
|
||
|
if !btreePage(p).isIndex() {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (root btree) clear(a btreeStore) (err error) {
|
||
|
pr := buffer.Get(7)
|
||
|
defer buffer.Put(pr)
|
||
|
r := *pr
|
||
|
if r, err = a.Get(r, int64(root)); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
iroot := b2h(r)
|
||
|
if iroot == 0 {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if err = root.clear2(a, iroot); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
var b [7]byte
|
||
|
return a.Realloc(int64(root), b[:])
|
||
|
}
|
||
|
|
||
|
func (root btree) clear2(a btreeStore, ph int64) (err error) {
|
||
|
pp := buffer.Get(maxBuf)
|
||
|
defer buffer.Put(pp)
|
||
|
p := *pp
|
||
|
if p, err = a.Get(p, ph); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
switch btreePage(p).isIndex() {
|
||
|
case true:
|
||
|
ip := btreeIndexPage(p)
|
||
|
for i := 0; i <= ip.len(); i++ {
|
||
|
root.clear2(a, ip.child(i))
|
||
|
|
||
|
}
|
||
|
case false:
|
||
|
dp := btreeDataPage(p)
|
||
|
for i := 0; i < dp.len(); i++ {
|
||
|
if err = dp.setKey(a, i, nil); err != nil {
|
||
|
return
|
||
|
}
|
||
|
|
||
|
if err = dp.setValue(a, i, nil); err != nil {
|
||
|
return
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
return a.Free(ph)
|
||
|
}
|