From 9ad4dbb8e9852d529d13e212c03cd53f320f7261 Mon Sep 17 00:00:00 2001 From: Ignas Kiela Date: Sat, 3 Jun 2023 00:46:35 +0300 Subject: [PATCH] Track number of builds currently running and export number of build slots Duration histogram tracks the number of builds finished, and having the number of builds running, number of builds started is trivial to calculate, while having a gauge for currently running jobs makes it easier to find current utilization. Additionally, a status label for duration metric is added, to keep track of the distribution of different build statuses that was kept by the builds finished metric. --- worker/context.go | 34 +++++++++++++++------------------- worker/main.go | 10 ++++++++++ 2 files changed, 25 insertions(+), 19 deletions(-) diff --git a/worker/context.go b/worker/context.go index 0218d6a..4cd1709 100644 --- a/worker/context.go +++ b/worker/context.go @@ -24,19 +24,15 @@ import ( ) var ( - buildsStarted = promauto.NewCounter(prometheus.CounterOpts{ - Name: "buildsrht_builds_started_total", - Help: "The total number of builds which have been started", + buildsRunning = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "buildsrht_builds_running", + Help: "The number of builds currently running", }) - buildsFinished = promauto.NewCounterVec(prometheus.CounterOpts{ - Name: "buildsrht_builds_finished_total", - Help: "The total number of finished builds by status", - }, []string{"status"}) - buildDuration = promauto.NewHistogram(prometheus.HistogramOpts{ - Name: "buildsrht_build_duration_seconds", + buildDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "buildsrht_build_duration", Help: "Duration of each build in seconds", Buckets: []float64{10, 30, 60, 90, 120, 300, 600, 900, 1800}, - }) + }, []string{"status"}) ) type WorkerContext struct { @@ -84,9 +80,16 @@ func (wctx *WorkerContext) RunBuild( } buildUser = strings.Split(buildUser, ":")[0] - timer := prometheus.NewTimer(buildDuration) + timer := prometheus.NewTimer(prometheus.ObserverFunc(func(v float64) { + if job != nil { + buildDuration.WithLabelValues(job.Status).Observe(v) + } else { + buildDuration.WithLabelValues("failed").Observe(v) + } + })) defer timer.ObserveDuration() - buildsStarted.Inc() + buildsRunning.Inc() + defer buildsRunning.Dec() var manifest Manifest ms.Decode(_manifest, &manifest) @@ -112,13 +115,10 @@ func (wctx *WorkerContext) RunBuild( log.Printf("run_build panic: %v", err) if job != nil && ctx != nil { if ctx.Context.Err() == context.DeadlineExceeded { - buildsFinished.WithLabelValues("timeout").Inc() job.SetStatus("timeout") } else if ctx.Context.Err() == context.Canceled { - buildsFinished.WithLabelValues("cancelled").Inc() job.SetStatus("cancelled") } else { - buildsFinished.WithLabelValues("failed").Inc() job.SetStatus("failed") } ctx.ProcessTriggers() @@ -130,10 +130,7 @@ func (wctx *WorkerContext) RunBuild( ctx.LogFile.Close() } } else if job != nil { - buildsFinished.WithLabelValues("failed").Inc() job.SetStatus("failed") - } else { - buildsFinished.WithLabelValues("failed").Inc() } } if cleanup != nil { @@ -220,7 +217,6 @@ func (wctx *WorkerContext) RunBuild( ctx.ProcessTriggers() ctx.LogFile.Close() - buildsFinished.WithLabelValues("success").Inc() return nil } diff --git a/worker/main.go b/worker/main.go index e9b6f01..ebbcd30 100644 --- a/worker/main.go +++ b/worker/main.go @@ -16,6 +16,9 @@ import ( celery "github.com/gocelery/gocelery" _ "github.com/lib/pq" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" ) var ( @@ -25,6 +28,11 @@ var ( jobs map[int]*JobContext jobsMutex sync.Mutex + + build_workers = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "buildsrht_build_workers", + Help: "The number of build workers configured", + }) ) func main() { @@ -35,6 +43,8 @@ func main() { "path to config.ini file") flag.Parse() + build_workers.Set(float64(workers)) + var err error for _, path := range []string{configPath, "/etc/sr.ht/config.ini"} { config, err = ini.LoadFile(path) -- 2.38.5