mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-08 17:24:08 +00:00
916ec63af6
This is a new revision of the discovery server. Relevant changes and non-changes: - Protocol towards clients is unchanged. - Recommended large scale design is still to be deployed nehind nginx (I tested, and it's still a lot faster at terminating TLS). - Database backend is leveldb again, only. It scales enough, is easy to setup, and we don't need any backend to take care of. - Server supports replication. This is a simple TCP channel - protect it with a firewall when deploying over the internet. (We deploy this within the same datacenter, and with firewall.) Any incoming client announces are sent over the replication channel(s) to other peer discosrvs. Incoming replication changes are applied to the database as if they came from clients, but without the TLS/certificate overhead. - Metrics are exposed using the prometheus library, when enabled. - The database values and replication protocol is protobuf, because JSON was quite CPU intensive when I tried that and benchmarked it. - The "Retry-After" value for failed lookups gets slowly increased from a default of 120 seconds, by 5 seconds for each failed lookup, independently by each discosrv. This lowers the query load over time for clients that are never seen. The Retry-After maxes out at 3600 after a couple of weeks of this increase. The number of failed lookups is stored in the database, now and then (avoiding making each lookup a database put). All in all this means clients can be pointed towards a cluster using just multiple A / AAAA records to gain both load sharing and redundancy (if one is down, clients will talk to the remaining ones). GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4648
220 lines
6.1 KiB
Go
220 lines
6.1 KiB
Go
package procfs
|
|
|
|
import (
|
|
"bufio"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
)
|
|
|
|
// CPUStat shows how much time the cpu spend in various stages.
|
|
type CPUStat struct {
|
|
User float64
|
|
Nice float64
|
|
System float64
|
|
Idle float64
|
|
Iowait float64
|
|
IRQ float64
|
|
SoftIRQ float64
|
|
Steal float64
|
|
Guest float64
|
|
GuestNice float64
|
|
}
|
|
|
|
// SoftIRQStat represent the softirq statistics as exported in the procfs stat file.
|
|
// A nice introduction can be found at https://0xax.gitbooks.io/linux-insides/content/interrupts/interrupts-9.html
|
|
// It is possible to get per-cpu stats by reading /proc/softirqs
|
|
type SoftIRQStat struct {
|
|
Hi uint64
|
|
Timer uint64
|
|
NetTx uint64
|
|
NetRx uint64
|
|
Block uint64
|
|
BlockIoPoll uint64
|
|
Tasklet uint64
|
|
Sched uint64
|
|
Hrtimer uint64
|
|
Rcu uint64
|
|
}
|
|
|
|
// Stat represents kernel/system statistics.
|
|
type Stat struct {
|
|
// Boot time in seconds since the Epoch.
|
|
BootTime uint64
|
|
// Summed up cpu statistics.
|
|
CPUTotal CPUStat
|
|
// Per-CPU statistics.
|
|
CPU []CPUStat
|
|
// Number of times interrupts were handled, which contains numbered and unnumbered IRQs.
|
|
IRQTotal uint64
|
|
// Number of times a numbered IRQ was triggered.
|
|
IRQ []uint64
|
|
// Number of times a context switch happened.
|
|
ContextSwitches uint64
|
|
// Number of times a process was created.
|
|
ProcessCreated uint64
|
|
// Number of processes currently running.
|
|
ProcessesRunning uint64
|
|
// Number of processes currently blocked (waiting for IO).
|
|
ProcessesBlocked uint64
|
|
// Number of times a softirq was scheduled.
|
|
SoftIRQTotal uint64
|
|
// Detailed softirq statistics.
|
|
SoftIRQ SoftIRQStat
|
|
}
|
|
|
|
// NewStat returns kernel/system statistics read from /proc/stat.
|
|
func NewStat() (Stat, error) {
|
|
fs, err := NewFS(DefaultMountPoint)
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
|
|
return fs.NewStat()
|
|
}
|
|
|
|
// Parse a cpu statistics line and returns the CPUStat struct plus the cpu id (or -1 for the overall sum).
|
|
func parseCPUStat(line string) (CPUStat, int64, error) {
|
|
cpuStat := CPUStat{}
|
|
var cpu string
|
|
|
|
count, err := fmt.Sscanf(line, "%s %f %f %f %f %f %f %f %f %f %f",
|
|
&cpu,
|
|
&cpuStat.User, &cpuStat.Nice, &cpuStat.System, &cpuStat.Idle,
|
|
&cpuStat.Iowait, &cpuStat.IRQ, &cpuStat.SoftIRQ, &cpuStat.Steal,
|
|
&cpuStat.Guest, &cpuStat.GuestNice)
|
|
|
|
if err != nil && err != io.EOF {
|
|
return CPUStat{}, -1, fmt.Errorf("couldn't parse %s (cpu): %s", line, err)
|
|
}
|
|
if count == 0 {
|
|
return CPUStat{}, -1, fmt.Errorf("couldn't parse %s (cpu): 0 elements parsed", line)
|
|
}
|
|
|
|
cpuStat.User /= userHZ
|
|
cpuStat.Nice /= userHZ
|
|
cpuStat.System /= userHZ
|
|
cpuStat.Idle /= userHZ
|
|
cpuStat.Iowait /= userHZ
|
|
cpuStat.IRQ /= userHZ
|
|
cpuStat.SoftIRQ /= userHZ
|
|
cpuStat.Steal /= userHZ
|
|
cpuStat.Guest /= userHZ
|
|
cpuStat.GuestNice /= userHZ
|
|
|
|
if cpu == "cpu" {
|
|
return cpuStat, -1, nil
|
|
}
|
|
|
|
cpuID, err := strconv.ParseInt(cpu[3:], 10, 64)
|
|
if err != nil {
|
|
return CPUStat{}, -1, fmt.Errorf("couldn't parse %s (cpu/cpuid): %s", line, err)
|
|
}
|
|
|
|
return cpuStat, cpuID, nil
|
|
}
|
|
|
|
// Parse a softirq line.
|
|
func parseSoftIRQStat(line string) (SoftIRQStat, uint64, error) {
|
|
softIRQStat := SoftIRQStat{}
|
|
var total uint64
|
|
var prefix string
|
|
|
|
_, err := fmt.Sscanf(line, "%s %d %d %d %d %d %d %d %d %d %d %d",
|
|
&prefix, &total,
|
|
&softIRQStat.Hi, &softIRQStat.Timer, &softIRQStat.NetTx, &softIRQStat.NetRx,
|
|
&softIRQStat.Block, &softIRQStat.BlockIoPoll,
|
|
&softIRQStat.Tasklet, &softIRQStat.Sched,
|
|
&softIRQStat.Hrtimer, &softIRQStat.Rcu)
|
|
|
|
if err != nil {
|
|
return SoftIRQStat{}, 0, fmt.Errorf("couldn't parse %s (softirq): %s", line, err)
|
|
}
|
|
|
|
return softIRQStat, total, nil
|
|
}
|
|
|
|
// NewStat returns an information about current kernel/system statistics.
|
|
func (fs FS) NewStat() (Stat, error) {
|
|
// See https://www.kernel.org/doc/Documentation/filesystems/proc.txt
|
|
|
|
f, err := os.Open(fs.Path("stat"))
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
defer f.Close()
|
|
|
|
stat := Stat{}
|
|
|
|
scanner := bufio.NewScanner(f)
|
|
for scanner.Scan() {
|
|
line := scanner.Text()
|
|
parts := strings.Fields(scanner.Text())
|
|
// require at least <key> <value>
|
|
if len(parts) < 2 {
|
|
continue
|
|
}
|
|
switch {
|
|
case parts[0] == "btime":
|
|
if stat.BootTime, err = strconv.ParseUint(parts[1], 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (btime): %s", parts[1], err)
|
|
}
|
|
case parts[0] == "intr":
|
|
if stat.IRQTotal, err = strconv.ParseUint(parts[1], 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (intr): %s", parts[1], err)
|
|
}
|
|
numberedIRQs := parts[2:]
|
|
stat.IRQ = make([]uint64, len(numberedIRQs))
|
|
for i, count := range numberedIRQs {
|
|
if stat.IRQ[i], err = strconv.ParseUint(count, 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (intr%d): %s", count, i, err)
|
|
}
|
|
}
|
|
case parts[0] == "ctxt":
|
|
if stat.ContextSwitches, err = strconv.ParseUint(parts[1], 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (ctxt): %s", parts[1], err)
|
|
}
|
|
case parts[0] == "processes":
|
|
if stat.ProcessCreated, err = strconv.ParseUint(parts[1], 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (processes): %s", parts[1], err)
|
|
}
|
|
case parts[0] == "procs_running":
|
|
if stat.ProcessesRunning, err = strconv.ParseUint(parts[1], 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (procs_running): %s", parts[1], err)
|
|
}
|
|
case parts[0] == "procs_blocked":
|
|
if stat.ProcessesBlocked, err = strconv.ParseUint(parts[1], 10, 64); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s (procs_blocked): %s", parts[1], err)
|
|
}
|
|
case parts[0] == "softirq":
|
|
softIRQStats, total, err := parseSoftIRQStat(line)
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
stat.SoftIRQTotal = total
|
|
stat.SoftIRQ = softIRQStats
|
|
case strings.HasPrefix(parts[0], "cpu"):
|
|
cpuStat, cpuID, err := parseCPUStat(line)
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
if cpuID == -1 {
|
|
stat.CPUTotal = cpuStat
|
|
} else {
|
|
for int64(len(stat.CPU)) <= cpuID {
|
|
stat.CPU = append(stat.CPU, CPUStat{})
|
|
}
|
|
stat.CPU[cpuID] = cpuStat
|
|
}
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return Stat{}, fmt.Errorf("couldn't parse %s: %s", f.Name(), err)
|
|
}
|
|
|
|
return stat, nil
|
|
}
|