diff --git a/cmd/aggregate/main.go b/cmd/aggregate/main.go index 06cb51767..54e34d22a 100644 --- a/cmd/aggregate/main.go +++ b/cmd/aggregate/main.go @@ -2,10 +2,11 @@ package main import ( "database/sql" - _ "github.com/lib/pq" "log" "os" "time" + + _ "github.com/lib/pq" ) var dbConn = getEnvDefault("UR_DB_URL", "postgres://user:password@localhost/ur?sslmode=disable") @@ -38,9 +39,16 @@ func main() { } func runAggregation(db *sql.DB) { - since := maxIndexedDay(db) - log.Println("Aggregating data since", since) - rows, err := aggregate(db, since) + since := maxIndexedDay(db, "VersionSummary") + log.Println("Aggregating VersionSummary data since", since) + rows, err := aggregateVersionSummary(db, since) + if err != nil { + log.Fatalln("aggregate:", err) + } + log.Println("Inserted", rows, "rows") + + log.Println("Aggregating UserMovement data") + rows, err = aggregateUserMovement(db) if err != nil { log.Fatalln("aggregate:", err) } @@ -64,6 +72,15 @@ func setupDB(db *sql.DB) error { return err } + _, err = db.Exec(`CREATE TABLE IF NOT EXISTS UserMovement ( + Day TIMESTAMP NOT NULL, + Added INTEGER NOT NULL, + Removed INTEGER NOT NULL + )`) + if err != nil { + return err + } + row := db.QueryRow(`SELECT 'UniqueDayVersionIndex'::regclass`) if err := row.Scan(nil); err != nil { _, err = db.Exec(`CREATE UNIQUE INDEX UniqueDayVersionIndex ON VersionSummary (Day, Version)`) @@ -74,12 +91,17 @@ func setupDB(db *sql.DB) error { _, err = db.Exec(`CREATE INDEX DayIndex ON VerionSummary (Day)`) } + row = db.QueryRow(`SELECT 'MovementDayIndex'::regclass`) + if err := row.Scan(nil); err != nil { + _, err = db.Exec(`CREATE INDEX MovementDayIndex ON UserMovement (Day)`) + } + return err } -func maxIndexedDay(db *sql.DB) time.Time { +func maxIndexedDay(db *sql.DB, table string) time.Time { var t time.Time - row := db.QueryRow("SELECT MAX(Day) FROM VersionSummary") + row := db.QueryRow("SELECT MAX(Day) FROM " + table) err := row.Scan(&t) if err != nil { return time.Time{} @@ -87,7 +109,7 @@ func maxIndexedDay(db *sql.DB) time.Time { return t } -func aggregate(db *sql.DB, since time.Time) (int64, error) { +func aggregateVersionSummary(db *sql.DB, since time.Time) (int64, error) { res, err := db.Exec(`INSERT INTO VersionSummary ( SELECT DATE_TRUNC('day', Received) AS Day, @@ -107,3 +129,76 @@ func aggregate(db *sql.DB, since time.Time) (int64, error) { return res.RowsAffected() } + +func aggregateUserMovement(db *sql.DB) (int64, error) { + rows, err := db.Query(`SELECT + DATE_TRUNC('day', Received) AS Day, + UniqueID + FROM Reports + WHERE + DATE_TRUNC('day', Received) < DATE_TRUNC('day', NOW()) + AND Version like 'v0.%' + ORDER BY Day + `) + if err != nil { + return 0, err + } + defer rows.Close() + + firstSeen := make(map[string]time.Time) + lastSeen := make(map[string]time.Time) + var minTs time.Time + + for rows.Next() { + var ts time.Time + var id string + if err := rows.Scan(&ts, &id); err != nil { + return 0, err + } + + if minTs.IsZero() { + minTs = ts + } + if _, ok := firstSeen[id]; !ok { + firstSeen[id] = ts + } + lastSeen[id] = ts + } + + type sumRow struct { + day time.Time + added int + removed int + } + var sumRows []sumRow + for t := minTs; t.Before(time.Now().Truncate(24 * time.Hour)); t = t.AddDate(0, 0, 1) { + var added, removed int + for id, first := range firstSeen { + last := lastSeen[id] + if first.Equal(t) { + added++ + } + if last == t && t.Before(time.Now().AddDate(0, 0, -14)) { + removed++ + } + } + sumRows = append(sumRows, sumRow{t, added, removed}) + } + + tx, err := db.Begin() + if err != nil { + return 0, err + } + if _, err := tx.Exec("DELETE FROM UserMovement"); err != nil { + tx.Rollback() + return 0, err + } + for _, r := range sumRows { + if _, err := tx.Exec("INSERT INTO UserMovement (Day, Added, Removed) VALUES ($1, $2, $3)", r.day, r.added, r.removed); err != nil { + tx.Rollback() + return 0, err + } + } + + return int64(len(sumRows)), tx.Commit() +} diff --git a/cmd/ursrv/main.go b/cmd/ursrv/main.go index b6946d6eb..11f3d020a 100644 --- a/cmd/ursrv/main.go +++ b/cmd/ursrv/main.go @@ -180,6 +180,7 @@ func main() { http.HandleFunc("/", withDB(db, rootHandler)) http.HandleFunc("/newdata", withDB(db, newDataHandler)) http.HandleFunc("/summary.json", withDB(db, summaryHandler)) + http.HandleFunc("/movement.json", withDB(db, movementHandler)) http.Handle("/static/", http.StripPrefix("/static/", http.FileServer(http.Dir("static")))) err = srv.Serve(listener) @@ -269,6 +270,25 @@ func summaryHandler(db *sql.DB, w http.ResponseWriter, r *http.Request) { w.Write(bs) } +func movementHandler(db *sql.DB, w http.ResponseWriter, r *http.Request) { + s, err := getMovement(db) + if err != nil { + log.Println("movementHandler:", err) + http.Error(w, "Database Error", http.StatusInternalServerError) + return + } + + bs, err := json.Marshal(s) + if err != nil { + log.Println("movementHandler:", err) + http.Error(w, "JSON Encode Error", http.StatusInternalServerError) + return + } + + w.Header().Set("Content-Type", "application/json") + w.Write(bs) +} + type category struct { Values [4]float64 Key string @@ -554,3 +574,33 @@ func getSummary(db *sql.DB) (summary, error) { return s, nil } + +func getMovement(db *sql.DB) ([][]interface{}, error) { + rows, err := db.Query(`SELECT Day, Added, Removed FROM UserMovement WHERE Day > now() - '1 year'::INTERVAL ORDER BY Day`) + if err != nil { + return nil, err + } + defer rows.Close() + + res := [][]interface{}{ + {"Day", "Joined", "Left"}, + } + + for rows.Next() { + var day time.Time + var added, removed int + err := rows.Scan(&day, &added, &removed) + if err != nil { + return nil, err + } + + row := []interface{}{day.Format("2006-01-02"), added, -removed} + if removed == 0 { + row[2] = nil + } + + res = append(res, row) + } + + return res, nil +} diff --git a/static/index.html b/static/index.html index ec511792f..d13ec9433 100644 --- a/static/index.html +++ b/static/index.html @@ -15,14 +15,14 @@ found in the LICENSE file.
This is the total number of unique users with reporting enabled, per day. Area color represents the major version.
- + + ++ This is the total number of unique users joining and leaving per day. A user is counted as "joined" on first the day their unique ID is seen, and as "left" on the last day the unique ID was seen before a two weeks or longer absence. +
+ ++ Reappearance of users cause the "left" data to shrink retroactively. + Spikes in December 2014 were due to the unique ID format changing and have been partly removed to avoid skewing the graph scale. +