syncthing/cmd/ursrv/serve/analytics.go

277 lines
5.4 KiB
Go
Raw Permalink Normal View History

// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package serve
2014-06-28 09:24:25 +00:00
2016-05-30 07:52:38 +00:00
import (
"regexp"
2016-05-30 07:52:38 +00:00
"sort"
2018-02-25 16:55:08 +00:00
"strconv"
2016-05-30 07:52:38 +00:00
"strings"
)
2014-06-28 09:24:25 +00:00
type analytic struct {
Key string
Count int
Percentage float64
2016-05-30 07:52:38 +00:00
Items []analytic `json:",omitempty"`
2014-06-28 09:24:25 +00:00
}
type analyticList []analytic
func (l analyticList) Less(a, b int) bool {
2016-05-30 07:52:38 +00:00
if l[a].Key == "Others" {
return false
2016-05-30 07:52:38 +00:00
}
if l[b].Key == "Others" {
return true
2016-05-30 07:52:38 +00:00
}
2014-06-28 09:24:25 +00:00
return l[b].Count < l[a].Count // inverse
}
func (l analyticList) Swap(a, b int) {
l[a], l[b] = l[b], l[a]
}
func (l analyticList) Len() int {
return len(l)
}
// Returns a list of frequency analytics for a given list of strings.
2014-12-09 15:52:02 +00:00
func analyticsFor(ss []string, cutoff int) []analytic {
2014-06-28 09:24:25 +00:00
m := make(map[string]int)
t := 0
for _, s := range ss {
m[s]++
t++
}
l := make([]analytic, 0, len(m))
for k, c := range m {
2016-05-30 07:52:38 +00:00
l = append(l, analytic{
Key: k,
Count: c,
Percentage: 100 * float64(c) / float64(t),
})
2014-06-28 09:24:25 +00:00
}
sort.Sort(analyticList(l))
2014-12-09 15:52:02 +00:00
if cutoff > 0 && len(l) > cutoff {
c := 0
for _, i := range l[cutoff:] {
c += i.Count
}
2016-05-30 07:52:38 +00:00
l = append(l[:cutoff], analytic{
Key: "Others",
Count: c,
Percentage: 100 * float64(c) / float64(t),
})
2014-12-09 15:52:02 +00:00
}
2014-06-28 09:24:25 +00:00
return l
}
2018-02-25 16:55:08 +00:00
// Find the points at which certain penetration levels are met
func penetrationLevels(as []analytic, points []float64) []analytic {
sort.Slice(as, func(a, b int) bool {
return versionLess(as[b].Key, as[a].Key)
})
var res []analytic
idx := 0
sum := 0.0
for _, a := range as {
sum += a.Percentage
if sum >= points[idx] {
a.Count = int(points[idx])
a.Percentage = sum
res = append(res, a)
idx++
if idx == len(points) {
break
}
}
}
return res
}
2014-06-28 09:24:25 +00:00
func statsForInts(data []int) [4]float64 {
var res [4]float64
if len(data) == 0 {
return res
}
sort.Ints(data)
res[0] = float64(data[int(float64(len(data))*0.05)])
res[1] = float64(data[len(data)/2])
res[2] = float64(data[int(float64(len(data))*0.95)])
res[3] = float64(data[len(data)-1])
return res
}
func statsForInt64s(data []int64) [4]float64 {
var res [4]float64
if len(data) == 0 {
return res
}
sort.Slice(data, func(a, b int) bool {
return data[a] < data[b]
})
res[0] = float64(data[int(float64(len(data))*0.05)])
res[1] = float64(data[len(data)/2])
res[2] = float64(data[int(float64(len(data))*0.95)])
res[3] = float64(data[len(data)-1])
return res
}
2014-06-28 09:24:25 +00:00
func statsForFloats(data []float64) [4]float64 {
var res [4]float64
if len(data) == 0 {
return res
}
sort.Float64s(data)
res[0] = data[int(float64(len(data))*0.05)]
res[1] = data[len(data)/2]
res[2] = data[int(float64(len(data))*0.95)]
res[3] = data[len(data)-1]
return res
}
2016-05-30 07:52:38 +00:00
func group(by func(string) string, as []analytic, perGroup int, otherPct float64) []analytic {
2016-05-30 07:52:38 +00:00
var res []analytic
next:
for _, a := range as {
group := by(a.Key)
for i := range res {
if res[i].Key == group {
res[i].Count += a.Count
res[i].Percentage += a.Percentage
if len(res[i].Items) < perGroup {
res[i].Items = append(res[i].Items, a)
}
continue next
}
}
res = append(res, analytic{
Key: group,
Count: a.Count,
Percentage: a.Percentage,
Items: []analytic{a},
})
}
sort.Sort(analyticList(res))
if otherPct > 0 {
// Groups with less than otherPCt go into "Other"
other := analytic{
Key: "Other",
}
for i := 0; i < len(res); i++ {
if res[i].Percentage < otherPct || res[i].Key == "Other" {
other.Count += res[i].Count
other.Percentage += res[i].Percentage
res = append(res[:i], res[i+1:]...)
i--
}
}
if other.Count > 0 {
res = append(res, other)
}
}
2016-05-30 07:52:38 +00:00
return res
}
func byVersion(s string) string {
parts := strings.Split(s, ".")
if len(parts) >= 2 {
return strings.Join(parts[:2], ".")
}
return s
}
func byPlatform(s string) string {
parts := strings.Split(s, "-")
if len(parts) >= 2 {
return parts[0]
}
return s
}
2016-06-07 06:12:32 +00:00
var numericGoVersion = regexp.MustCompile(`^go[0-9]\.[0-9]+`)
2016-06-07 06:12:32 +00:00
func byCompiler(s string) string {
if m := numericGoVersion.FindString(s); m != "" {
return m
2016-06-07 06:12:32 +00:00
}
return "Other"
}
2018-02-25 16:55:08 +00:00
func versionLess(a, b string) bool {
arel, apre := versionParts(a)
brel, bpre := versionParts(b)
minlen := len(arel)
if l := len(brel); l < minlen {
minlen = l
}
for i := 0; i < minlen; i++ {
if arel[i] != brel[i] {
return arel[i] < brel[i]
}
}
// Longer version is newer, when the preceding parts are equal
2018-02-25 16:55:08 +00:00
if len(arel) != len(brel) {
return len(arel) < len(brel)
}
if apre != bpre {
// "(+dev)" versions are ahead
if apre == plusStr {
return false
}
if bpre == plusStr {
return true
}
return apre < bpre
}
2018-02-25 16:55:08 +00:00
// don't actually care how the prerelease stuff compares for our purposes
return false
}
// Split a version as returned from transformVersion into parts.
2018-02-25 16:55:08 +00:00
// "1.2.3-beta.2" -> []int{1, 2, 3}, "beta.2"}
func versionParts(v string) ([]int, string) {
parts := strings.SplitN(v[1:], " ", 2) // " (+dev)" versions
if len(parts) == 1 {
parts = strings.SplitN(parts[0], "-", 2) // "-rc.1" type versions
2018-02-25 16:55:08 +00:00
}
fields := strings.Split(parts[0], ".")
release := make([]int, len(fields))
for i, s := range fields {
v, _ := strconv.Atoi(s)
release[i] = v
}
var prerelease string
if len(parts) > 1 {
prerelease = parts[1]
}
return release, prerelease
}