syncthing/cmd/stdiscosrv/replication.go

// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.

package main

import (
	"crypto/tls"
	"encoding/binary"
	"fmt"
	io "io"
	"log"
	"net"
	"time"

	"github.com/syncthing/syncthing/lib/protocol"
)

type replicator interface {
	send(key string, addrs []DatabaseAddress, seen int64)
}

// a replicationSender tries to connect to the remote address and provide
// them with a feed of replication updates.
type replicationSender struct {
	dst        string
	cert       tls.Certificate // our certificate
	allowedIDs []protocol.DeviceID
	outbox     chan ReplicationRecord
	stop       chan struct{}
}

func newReplicationSender(dst string, cert tls.Certificate, allowedIDs []protocol.DeviceID) *replicationSender {
	return &replicationSender{
		dst:        dst,
		cert:       cert,
		allowedIDs: allowedIDs,
		outbox:     make(chan ReplicationRecord, replicationOutboxSize),
		stop:       make(chan struct{}),
	}
}

func (s *replicationSender) Serve() {
	// Sleep a little at startup. Peers often restart at the same time, and
	// this avoid the service failing and entering backoff state
	// unnecessarily, while also reducing the reconnect rate to something
	// reasonable by default.
	time.Sleep(2 * time.Second)

	tlsCfg := &tls.Config{
		Certificates:       []tls.Certificate{s.cert},
		MinVersion:         tls.VersionTLS12,
		InsecureSkipVerify: true,
	}

	// Dial the TLS connection.
	conn, err := tls.Dial("tcp", s.dst, tlsCfg)
	if err != nil {
		log.Println("Replication connect:", err)
		return
	}
	defer func() {
		conn.SetWriteDeadline(time.Now().Add(time.Second))
		conn.Close()
	}()

	// Get the other side device ID.
	remoteID, err := deviceID(conn)
	if err != nil {
		log.Println("Replication connect:", err)
		return
	}

	// Verify it's in the set of allowed device IDs.
	if !deviceIDIn(remoteID, s.allowedIDs) {
		log.Println("Replication connect: unexpected device ID:", remoteID)
		return
	}

	// Send records.
	buf := make([]byte, 1024)
	for {
		select {
		case rec := <-s.outbox:
			// Buffer must hold record plus four bytes for size
			size := rec.Size()
			if len(buf) < size+4 {
				buf = make([]byte, size+4)
			}

			// Record comes after the four bytes size
			n, err := rec.MarshalTo(buf[4:])
			if err != nil {
				// odd to get an error here, but we haven't sent anything
				// yet so it's not fatal
				replicationSendsTotal.WithLabelValues("error").Inc()
				log.Println("Replication marshal:", err)
				continue
			}
			binary.BigEndian.PutUint32(buf, uint32(n))

			// Send
			conn.SetWriteDeadline(time.Now().Add(5 * time.Second))
			if _, err := conn.Write(buf[:4+n]); err != nil {
				replicationSendsTotal.WithLabelValues("error").Inc()
				log.Println("Replication write:", err)
				return
			}
			replicationSendsTotal.WithLabelValues("success").Inc()

		case <-s.stop:
			return
		}
	}
}

func (s *replicationSender) Stop() {
	close(s.stop)
}

func (s *replicationSender) String() string {
	return fmt.Sprintf("replicationSender(%q)", s.dst)
}

func (s *replicationSender) send(key string, ps []DatabaseAddress, seen int64) {
	item := ReplicationRecord{
		Key:       key,
		Addresses: ps,
	}

	// The send should never block. The inbox is suitably buffered for at
	// least a few seconds of stalls, which shouldn't happen in practice.
	select {
	case s.outbox <- item:
	default:
		replicationSendsTotal.WithLabelValues("drop").Inc()
	}
}

// a replicationMultiplexer sends to multiple replicators
type replicationMultiplexer []replicator

func (m replicationMultiplexer) send(key string, ps []DatabaseAddress, seen int64) {
	for _, s := range m {
		// each send is nonblocking
		s.send(key, ps, seen)
	}
}

// replicationListener accepts incoming connections and reads replication
// items from them. Incoming items are applied to the KV store.
type replicationListener struct {
	addr       string
	cert       tls.Certificate
	allowedIDs []protocol.DeviceID
	db         database
	stop       chan struct{}
}

func newReplicationListener(addr string, cert tls.Certificate, allowedIDs []protocol.DeviceID, db database) *replicationListener {
	return &replicationListener{
		addr:       addr,
		cert:       cert,
		allowedIDs: allowedIDs,
		db:         db,
		stop:       make(chan struct{}),
	}
}

func (l *replicationListener) Serve() {
	tlsCfg := &tls.Config{
		Certificates:       []tls.Certificate{l.cert},
		ClientAuth:         tls.RequestClientCert,
		MinVersion:         tls.VersionTLS12,
		InsecureSkipVerify: true,
	}

	lst, err := tls.Listen("tcp", l.addr, tlsCfg)
	if err != nil {
		log.Println("Replication listen:", err)
		return
	}
	defer lst.Close()

	for {
		select {
		case <-l.stop:
			return
		default:
		}

		// Accept a connection
		conn, err := lst.Accept()
		if err != nil {
			log.Println("Replication accept:", err)
			return
		}

		// Figure out the other side device ID
		remoteID, err := deviceID(conn.(*tls.Conn))
		if err != nil {
			log.Println("Replication accept:", err)
			conn.SetWriteDeadline(time.Now().Add(time.Second))
			conn.Close()
			continue
		}

		// Verify it is in the set of allowed device IDs
		if !deviceIDIn(remoteID, l.allowedIDs) {
			log.Println("Replication accept: unexpected device ID:", remoteID)
			conn.SetWriteDeadline(time.Now().Add(time.Second))
			conn.Close()
			continue
		}

		go l.handle(conn)
	}
}

func (l *replicationListener) Stop() {
	close(l.stop)
}

func (l *replicationListener) String() string {
	return fmt.Sprintf("replicationListener(%q)", l.addr)
}

func (l *replicationListener) handle(conn net.Conn) {
	defer func() {
		conn.SetWriteDeadline(time.Now().Add(time.Second))
		conn.Close()
	}()

	buf := make([]byte, 1024)

	for {
		select {
		case <-l.stop:
			return
		default:
		}

		conn.SetReadDeadline(time.Now().Add(time.Minute))

		// First four bytes are the size
		if _, err := io.ReadFull(conn, buf[:4]); err != nil {
			log.Println("Replication read size:", err)
			replicationRecvsTotal.WithLabelValues("error").Inc()
			return
		}

		// Read the rest of the record
		size := int(binary.BigEndian.Uint32(buf[:4]))
		if len(buf) < size {
			buf = make([]byte, size)
		}
		if _, err := io.ReadFull(conn, buf[:size]); err != nil {
			log.Println("Replication read record:", err)
			replicationRecvsTotal.WithLabelValues("error").Inc()
			return
		}

		// Unmarshal
		var rec ReplicationRecord
		if err := rec.Unmarshal(buf[:size]); err != nil {
			log.Println("Replication unmarshal:", err)
			replicationRecvsTotal.WithLabelValues("error").Inc()
			continue
		}

		// Store
		l.db.merge(rec.Key, rec.Addresses, rec.Seen)
		replicationRecvsTotal.WithLabelValues("success").Inc()
	}
}

func deviceID(conn *tls.Conn) (protocol.DeviceID, error) {
	// Handshake may not be complete on the server side yet, which we need
	// to get the client certificate.
	if !conn.ConnectionState().HandshakeComplete {
		if err := conn.Handshake(); err != nil {
			return protocol.DeviceID{}, err
		}
	}

	// We expect exactly one certificate.
	certs := conn.ConnectionState().PeerCertificates
	if len(certs) != 1 {
		return protocol.DeviceID{}, fmt.Errorf("unexpected number of certificates (%d != 1)", len(certs))
	}

	return protocol.NewDeviceID(certs[0].Raw), nil
}

func deviceIDIn(id protocol.DeviceID, ids []protocol.DeviceID) bool {
	for _, candidate := range ids {
		if id == candidate {
			return true
		}
	}
	return false
}
cmd/stdiscosrv: New discovery server (fixes #4618) This is a new revision of the discovery server. Relevant changes and non-changes: - Protocol towards clients is unchanged. - Recommended large scale design is still to be deployed nehind nginx (I tested, and it's still a lot faster at terminating TLS). - Database backend is leveldb again, only. It scales enough, is easy to setup, and we don't need any backend to take care of. - Server supports replication. This is a simple TCP channel - protect it with a firewall when deploying over the internet. (We deploy this within the same datacenter, and with firewall.) Any incoming client announces are sent over the replication channel(s) to other peer discosrvs. Incoming replication changes are applied to the database as if they came from clients, but without the TLS/certificate overhead. - Metrics are exposed using the prometheus library, when enabled. - The database values and replication protocol is protobuf, because JSON was quite CPU intensive when I tried that and benchmarked it. - The "Retry-After" value for failed lookups gets slowly increased from a default of 120 seconds, by 5 seconds for each failed lookup, independently by each discosrv. This lowers the query load over time for clients that are never seen. The Retry-After maxes out at 3600 after a couple of weeks of this increase. The number of failed lookups is stored in the database, now and then (avoiding making each lookup a database put). All in all this means clients can be pointed towards a cluster using just multiple A / AAAA records to gain both load sharing and redundancy (if one is down, clients will talk to the remaining ones). GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648 2018-01-14 08:52:31 +00:00			`// Copyright (C) 2018 The Syncthing Authors.`
			`//`
			`// This Source Code Form is subject to the terms of the Mozilla Public`
			`// License, v. 2.0. If a copy of the MPL was not distributed with this file,`
			`// You can obtain one at https://mozilla.org/MPL/2.0/.`

			`package main`

			`import (`
			`"crypto/tls"`
			`"encoding/binary"`
			`"fmt"`
			`io "io"`
			`"log"`
			`"net"`
			`"time"`

			`"github.com/syncthing/syncthing/lib/protocol"`
			`)`

			`type replicator interface {`
			`send(key string, addrs []DatabaseAddress, seen int64)`
			`}`

			`// a replicationSender tries to connect to the remote address and provide`
			`// them with a feed of replication updates.`
			`type replicationSender struct {`
			`dst string`
			`cert tls.Certificate // our certificate`
			`allowedIDs []protocol.DeviceID`
			`outbox chan ReplicationRecord`
			`stop chan struct{}`
			`}`

			`func newReplicationSender(dst string, cert tls.Certificate, allowedIDs []protocol.DeviceID) *replicationSender {`
			`return &replicationSender{`
			`dst: dst,`
			`cert: cert,`
			`allowedIDs: allowedIDs,`
			`outbox: make(chan ReplicationRecord, replicationOutboxSize),`
			`stop: make(chan struct{}),`
			`}`
			`}`

			`func (s *replicationSender) Serve() {`
			`// Sleep a little at startup. Peers often restart at the same time, and`
			`// this avoid the service failing and entering backoff state`
			`// unnecessarily, while also reducing the reconnect rate to something`
			`// reasonable by default.`
			`time.Sleep(2 * time.Second)`

			`tlsCfg := &tls.Config{`
			`Certificates: []tls.Certificate{s.cert},`
			`MinVersion: tls.VersionTLS12,`
			`InsecureSkipVerify: true,`
			`}`

			`// Dial the TLS connection.`
			`conn, err := tls.Dial("tcp", s.dst, tlsCfg)`
			`if err != nil {`
			`log.Println("Replication connect:", err)`
			`return`
			`}`
			`defer func() {`
			`conn.SetWriteDeadline(time.Now().Add(time.Second))`
			`conn.Close()`
			`}()`

			`// Get the other side device ID.`
			`remoteID, err := deviceID(conn)`
			`if err != nil {`
			`log.Println("Replication connect:", err)`
			`return`
			`}`

			`// Verify it's in the set of allowed device IDs.`
			`if !deviceIDIn(remoteID, s.allowedIDs) {`
			`log.Println("Replication connect: unexpected device ID:", remoteID)`
			`return`
			`}`

			`// Send records.`
			`buf := make([]byte, 1024)`
			`for {`
			`select {`
			`case rec := <-s.outbox:`
			`// Buffer must hold record plus four bytes for size`
			`size := rec.Size()`
			`if len(buf) < size+4 {`
			`buf = make([]byte, size+4)`
			`}`

			`// Record comes after the four bytes size`
			`n, err := rec.MarshalTo(buf[4:])`
			`if err != nil {`
			`// odd to get an error here, but we haven't sent anything`
			`// yet so it's not fatal`
			`replicationSendsTotal.WithLabelValues("error").Inc()`
			`log.Println("Replication marshal:", err)`
			`continue`
			`}`
			`binary.BigEndian.PutUint32(buf, uint32(n))`

			`// Send`
			`conn.SetWriteDeadline(time.Now().Add(5 * time.Second))`
			`if _, err := conn.Write(buf[:4+n]); err != nil {`
			`replicationSendsTotal.WithLabelValues("error").Inc()`
			`log.Println("Replication write:", err)`
			`return`
			`}`
			`replicationSendsTotal.WithLabelValues("success").Inc()`

			`case <-s.stop:`
			`return`
			`}`
			`}`
			`}`

			`func (s *replicationSender) Stop() {`
			`close(s.stop)`
			`}`

			`func (s *replicationSender) String() string {`
			`return fmt.Sprintf("replicationSender(%q)", s.dst)`
			`}`

			`func (s *replicationSender) send(key string, ps []DatabaseAddress, seen int64) {`
			`item := ReplicationRecord{`
			`Key: key,`
			`Addresses: ps,`
			`}`

			`// The send should never block. The inbox is suitably buffered for at`
			`// least a few seconds of stalls, which shouldn't happen in practice.`
			`select {`
			`case s.outbox <- item:`
			`default:`
			`replicationSendsTotal.WithLabelValues("drop").Inc()`
			`}`
			`}`

			`// a replicationMultiplexer sends to multiple replicators`
			`type replicationMultiplexer []replicator`

			`func (m replicationMultiplexer) send(key string, ps []DatabaseAddress, seen int64) {`
			`for _, s := range m {`
			`// each send is nonblocking`
			`s.send(key, ps, seen)`
			`}`
			`}`

all: Fix typos (#4772) Skip-check: authors 2018-02-24 07:51:29 +00:00			`// replicationListener accepts incoming connections and reads replication`
cmd/stdiscosrv: New discovery server (fixes #4618) This is a new revision of the discovery server. Relevant changes and non-changes: - Protocol towards clients is unchanged. - Recommended large scale design is still to be deployed nehind nginx (I tested, and it's still a lot faster at terminating TLS). - Database backend is leveldb again, only. It scales enough, is easy to setup, and we don't need any backend to take care of. - Server supports replication. This is a simple TCP channel - protect it with a firewall when deploying over the internet. (We deploy this within the same datacenter, and with firewall.) Any incoming client announces are sent over the replication channel(s) to other peer discosrvs. Incoming replication changes are applied to the database as if they came from clients, but without the TLS/certificate overhead. - Metrics are exposed using the prometheus library, when enabled. - The database values and replication protocol is protobuf, because JSON was quite CPU intensive when I tried that and benchmarked it. - The "Retry-After" value for failed lookups gets slowly increased from a default of 120 seconds, by 5 seconds for each failed lookup, independently by each discosrv. This lowers the query load over time for clients that are never seen. The Retry-After maxes out at 3600 after a couple of weeks of this increase. The number of failed lookups is stored in the database, now and then (avoiding making each lookup a database put). All in all this means clients can be pointed towards a cluster using just multiple A / AAAA records to gain both load sharing and redundancy (if one is down, clients will talk to the remaining ones). GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648 2018-01-14 08:52:31 +00:00			`// items from them. Incoming items are applied to the KV store.`
			`type replicationListener struct {`
			`addr string`
			`cert tls.Certificate`
			`allowedIDs []protocol.DeviceID`
			`db database`
			`stop chan struct{}`
			`}`

			`func newReplicationListener(addr string, cert tls.Certificate, allowedIDs []protocol.DeviceID, db database) *replicationListener {`
			`return &replicationListener{`
			`addr: addr,`
			`cert: cert,`
			`allowedIDs: allowedIDs,`
			`db: db,`
			`stop: make(chan struct{}),`
			`}`
			`}`

			`func (l *replicationListener) Serve() {`
			`tlsCfg := &tls.Config{`
			`Certificates: []tls.Certificate{l.cert},`
			`ClientAuth: tls.RequestClientCert,`
			`MinVersion: tls.VersionTLS12,`
			`InsecureSkipVerify: true,`
			`}`

			`lst, err := tls.Listen("tcp", l.addr, tlsCfg)`
			`if err != nil {`
			`log.Println("Replication listen:", err)`
			`return`
			`}`
			`defer lst.Close()`

			`for {`
			`select {`
			`case <-l.stop:`
			`return`
			`default:`
			`}`

			`// Accept a connection`
			`conn, err := lst.Accept()`
			`if err != nil {`
			`log.Println("Replication accept:", err)`
			`return`
			`}`

			`// Figure out the other side device ID`
			`remoteID, err := deviceID(conn.(*tls.Conn))`
			`if err != nil {`
			`log.Println("Replication accept:", err)`
			`conn.SetWriteDeadline(time.Now().Add(time.Second))`
			`conn.Close()`
			`continue`
			`}`

			`// Verify it is in the set of allowed device IDs`
			`if !deviceIDIn(remoteID, l.allowedIDs) {`
			`log.Println("Replication accept: unexpected device ID:", remoteID)`
			`conn.SetWriteDeadline(time.Now().Add(time.Second))`
			`conn.Close()`
			`continue`
			`}`

			`go l.handle(conn)`
			`}`
			`}`

			`func (l *replicationListener) Stop() {`
			`close(l.stop)`
			`}`

			`func (l *replicationListener) String() string {`
			`return fmt.Sprintf("replicationListener(%q)", l.addr)`
			`}`

			`func (l *replicationListener) handle(conn net.Conn) {`
			`defer func() {`
			`conn.SetWriteDeadline(time.Now().Add(time.Second))`
			`conn.Close()`
			`}()`

			`buf := make([]byte, 1024)`

			`for {`
			`select {`
			`case <-l.stop:`
			`return`
			`default:`
			`}`

			`conn.SetReadDeadline(time.Now().Add(time.Minute))`

			`// First four bytes are the size`
			`if _, err := io.ReadFull(conn, buf[:4]); err != nil {`
			`log.Println("Replication read size:", err)`
			`replicationRecvsTotal.WithLabelValues("error").Inc()`
			`return`
			`}`

			`// Read the rest of the record`
			`size := int(binary.BigEndian.Uint32(buf[:4]))`
			`if len(buf) < size {`
			`buf = make([]byte, size)`
			`}`
			`if _, err := io.ReadFull(conn, buf[:size]); err != nil {`
			`log.Println("Replication read record:", err)`
			`replicationRecvsTotal.WithLabelValues("error").Inc()`
			`return`
			`}`

			`// Unmarshal`
			`var rec ReplicationRecord`
			`if err := rec.Unmarshal(buf[:size]); err != nil {`
			`log.Println("Replication unmarshal:", err)`
			`replicationRecvsTotal.WithLabelValues("error").Inc()`
			`continue`
			`}`

			`// Store`
			`l.db.merge(rec.Key, rec.Addresses, rec.Seen)`
			`replicationRecvsTotal.WithLabelValues("success").Inc()`
			`}`
			`}`

			`func deviceID(conn *tls.Conn) (protocol.DeviceID, error) {`
			`// Handshake may not be complete on the server side yet, which we need`
			`// to get the client certificate.`
			`if !conn.ConnectionState().HandshakeComplete {`
			`if err := conn.Handshake(); err != nil {`
			`return protocol.DeviceID{}, err`
			`}`
			`}`

			`// We expect exactly one certificate.`
			`certs := conn.ConnectionState().PeerCertificates`
			`if len(certs) != 1 {`
			`return protocol.DeviceID{}, fmt.Errorf("unexpected number of certificates (%d != 1)", len(certs))`
			`}`

			`return protocol.NewDeviceID(certs[0].Raw), nil`
			`}`

			`func deviceIDIn(id protocol.DeviceID, ids []protocol.DeviceID) bool {`
			`for _, candidate := range ids {`
			`if id == candidate {`
			`return true`
			`}`
			`}`
			`return false`
			`}`