db_postgres: report network metrics with hostname label
This can help figure out when it's necessary to discuss with network operators to request a connection limit bump.
This commit is contained in:
parent
dac003960c
commit
421d3f258a
@ -177,6 +177,9 @@ func (db *PostgresDB) Close() error {
|
||||
}
|
||||
|
||||
func (db *PostgresDB) RegisterMetrics(r prometheus.Registerer) error {
|
||||
if err := r.Register(&postgresMetricsCollector{db}); err != nil {
|
||||
return err
|
||||
}
|
||||
return r.Register(promcollectors.NewDBStatsCollector(db.db, "main"))
|
||||
}
|
||||
|
||||
@ -558,3 +561,78 @@ func (db *PostgresDB) StoreReadReceipt(ctx context.Context, networkID int64, rec
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (db *PostgresDB) listTopNetworkAddrs(ctx context.Context) (map[string]int, error) {
|
||||
ctx, cancel := context.WithTimeout(ctx, postgresQueryTimeout)
|
||||
defer cancel()
|
||||
|
||||
addrs := make(map[string]int)
|
||||
|
||||
rows, err := db.db.QueryContext(ctx, `
|
||||
SELECT addr, COUNT(addr) AS n
|
||||
FROM "Network"
|
||||
GROUP BY addr
|
||||
ORDER BY n DESC`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer rows.Close()
|
||||
|
||||
for rows.Next() {
|
||||
var (
|
||||
addr string
|
||||
n int
|
||||
)
|
||||
if err := rows.Scan(&addr, &n); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
addrs[addr] = n
|
||||
}
|
||||
|
||||
return addrs, rows.Err()
|
||||
}
|
||||
|
||||
var postgresNetworksTotalDesc = prometheus.NewDesc("soju_networks_total", "Number of networks", []string{"hostname"}, nil)
|
||||
|
||||
type postgresMetricsCollector struct {
|
||||
db *PostgresDB
|
||||
}
|
||||
|
||||
var _ prometheus.Collector = (*postgresMetricsCollector)(nil)
|
||||
|
||||
func (c *postgresMetricsCollector) Describe(ch chan<- *prometheus.Desc) {
|
||||
ch <- postgresNetworksTotalDesc
|
||||
}
|
||||
|
||||
func (c *postgresMetricsCollector) Collect(ch chan<- prometheus.Metric) {
|
||||
addrs, err := c.db.listTopNetworkAddrs(context.TODO())
|
||||
if err != nil {
|
||||
ch <- prometheus.NewInvalidMetric(postgresNetworksTotalDesc, err)
|
||||
return
|
||||
}
|
||||
|
||||
// Group by hostname
|
||||
hostnames := make(map[string]int)
|
||||
for addr, n := range addrs {
|
||||
hostname := addr
|
||||
network := Network{Addr: addr}
|
||||
if u, err := network.URL(); err == nil {
|
||||
hostname = u.Hostname()
|
||||
}
|
||||
hostnames[hostname] += n
|
||||
}
|
||||
|
||||
// Group networks with low counts for privacy
|
||||
watermark := 10
|
||||
grouped := 0
|
||||
for hostname, n := range hostnames {
|
||||
if n >= watermark && hostname != "" && hostname != "*" {
|
||||
ch <- prometheus.MustNewConstMetric(postgresNetworksTotalDesc, prometheus.GaugeValue, float64(n), hostname)
|
||||
} else {
|
||||
grouped += n
|
||||
}
|
||||
}
|
||||
if grouped > 0 {
|
||||
ch <- prometheus.MustNewConstMetric(postgresNetworksTotalDesc, prometheus.GaugeValue, float64(grouped), "*")
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user