feat: add metrics to backend client

tommyknows · tommyknows · commit eee49a545883 · 2023-03-31T08:33:34.000+02:00
This commit adds two simple prometheus metrics to the http client that is being used by the backend; "requests_total" and "requests_duration_histogram_seconds". With that we should get some initial visibility into backend failures, response times and client requests per seconds as well. I decided to register everything in an `init` function to the `metrics.Gatherer`. Not perfect, but simple and probably good enough for a long time. I got to that `metrics.Gatherer` type by following the metrics-code of `controller-runtime`; I'm not sure if there's a better way to register metrics to that Registry, or if using a different registry would be fine as well and they'd simply get interlaced?... Additionally, controller-runtime also has a [`rest_client_requests_total` metric](https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/metrics#pkg-constants) that it registers. As far as I can tell, this is for the default `http.Client` and comes from `client-go`. We could probably also make use of that, but would be missing a latency bucket. That latency bucket also exists, but is [disabled by default](kubernetes-sigs/controller-runtime#1587) because it created a cardinality explosion for some users, so I'm wary to enable it as well. By using a completely separate code-path and metrics-handler, we get metrics for only our backend, instead of them being interlaced with potential metrics from `client-go`. Additionally, we can start off with both latency and count-metrics, as I don't think we'll have issues with cardinality (we're only registering two labels - `client-go` also registered a "url" label which is not optimal).
diff --git a/go.mod b/go.mod
@@ -6,6 +6,7 @@ require (
 	github.com/go-git/go-git/v5 v5.6.0
 	github.com/go-logr/logr v1.2.3
 	github.com/google/go-github/v49 v49.0.0
+	github.com/prometheus/client_golang v1.14.0
 	github.com/stretchr/testify v1.8.1
 	golang.org/x/exp v0.0.0-20230213192124-5e25df0256eb
 	golang.org/x/oauth2 v0.0.0-20221014153046-6fdb5e3db783
@@ -109,7 +110,6 @@ require (
 	github.com/pjbgf/sha1cd v0.3.0 // indirect
 	github.com/pkg/errors v0.9.1 // indirect
 	github.com/pmezard/go-difflib v1.0.0 // indirect
-	github.com/prometheus/client_golang v1.14.0 // indirect
 	github.com/prometheus/client_model v0.3.0 // indirect
 	github.com/prometheus/common v0.37.0 // indirect
 	github.com/prometheus/procfs v0.8.0 // indirect
diff --git a/internal/backend/backend.go b/internal/backend/backend.go
@@ -26,10 +26,52 @@ import (
 
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"sigs.k8s.io/controller-runtime/pkg/client"
+	"sigs.k8s.io/controller-runtime/pkg/metrics"
+
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
 
 	"github.com/snyk/kubernetes-scanner/internal/config"
 )
 
+// the default transport automatically honors HTTP_PROXY settings.
+// this value is overwritten by init with a roundtripper that has prometheus metrics.
+var transport = http.DefaultTransport
+
+func init() {
+	commonLabels := []string{"code", "method"}
+	// all supported metrics:
+	var (
+		requests = prometheus.NewCounterVec(
+			prometheus.CounterOpts{
+				Subsystem: "http_outgoing",
+				Name:      "requests_total",
+				Help:      "A counter for outgoing requests.",
+			},
+			commonLabels,
+		)
+
+		durations = prometheus.NewHistogramVec(
+			prometheus.HistogramOpts{
+				Subsystem: "http_outgoing",
+				Name:      "request_duration_histogram_seconds",
+				Help:      "Request time duration.",
+				Buckets:   prometheus.DefBuckets,
+			},
+			commonLabels,
+		)
+	)
+
+	metrics.Registry.MustRegister(&httpMetricsCollector{
+		metrics: []prometheus.Collector{requests, durations},
+	})
+
+	transport = promhttp.InstrumentRoundTripperDuration(
+		durations,
+		promhttp.InstrumentRoundTripperCounter(requests, transport),
+	)
+}
+
 type Backend struct {
 	apiEndpoint      string
 	clusterName      string
@@ -45,8 +87,7 @@ func New(clusterName string, cfg *config.Egress) *Backend {
 		authorizationKey: cfg.SnykServiceAccountToken,
 
 		client: &http.Client{
-			// the default transport automatically honors HTTP_PROXY settings.
-			Transport: http.DefaultTransport,
+			Transport: transport,
 			Timeout:   cfg.HTTPClientTimeout.Duration,
 		},
 	}
@@ -131,3 +172,57 @@ type resource struct {
 	ScannedAt        metav1.Time   `json:"scanned_at"`
 	DeletedAt        *metav1.Time  `json:"deleted_at,omitempty"`
 }
+
+//func instrumentClient(rt http.RoundTripper, reg prometheus.Registerer) (http.RoundTripper, error) {
+//commonLabels := []string{"code", "method"}
+//// all supported metrics:
+//var (
+//requests = prometheus.NewCounterVec(
+//prometheus.CounterOpts{
+//Subsystem: "http_outgoing",
+//Name:      "requests_total",
+//Help:      "A counter for outgoing requests.",
+//},
+//commonLabels,
+//)
+
+//durations = prometheus.NewHistogramVec(
+//prometheus.HistogramOpts{
+//Subsystem: "http_outgoing",
+//Name:      "request_duration_histogram_seconds",
+//Help:      "Request time duration.",
+//Buckets:   prometheus.DefBuckets,
+//},
+//commonLabels,
+//)
+//)
+
+//h := &httpMetricsCollector{
+//metrics: []prometheus.Collector{requests, durations},
+//}
+//// unregister the handler to make sure it's never registered twice.
+//_ = reg.Unregister(h)
+
+//return promhttp.InstrumentRoundTripperDuration(
+//durations,
+//promhttp.InstrumentRoundTripperCounter(requests, rt),
+//), reg.Register(h)
+//}
+
+type httpMetricsCollector struct {
+	metrics []prometheus.Collector
+}
+
+// Describe implements prometheus.Collector interface.
+func (h *httpMetricsCollector) Describe(in chan<- *prometheus.Desc) {
+	for _, m := range h.metrics {
+		m.Describe(in)
+	}
+}
+
+// Collect implements prometheus.Collector interface.
+func (h *httpMetricsCollector) Collect(in chan<- prometheus.Metric) {
+	for _, m := range h.metrics {
+		m.Collect(in)
+	}
+}
diff --git a/internal/backend/backend_test.go b/internal/backend/backend_test.go
@@ -32,6 +32,7 @@ import (
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/metrics"
 
 	"github.com/snyk/kubernetes-scanner/internal/config"
 )
@@ -71,6 +72,21 @@ func TestBackend(t *testing.T) {
 	err = b.Upsert(ctx, pod, "v1", orgID, &metav1.Time{Time: now().Local()})
 	require.NoError(t, err)
 
+	// some simple checks to make sure that the metrics show up.
+	metrics, err := metrics.Registry.Gather()
+	require.NoError(t, err)
+
+	var customMetrics int
+	for _, metric := range metrics {
+		switch *metric.Name {
+		case "http_outgoing_requests_total":
+			require.Equal(t, metric.GetMetric()[0].Counter.GetValue(), 2.0)
+			customMetrics++
+		case "http_outgoing_request_duration_histogram_seconds":
+			customMetrics++
+		}
+	}
+	require.Equal(t, customMetrics, 2)
 }
 
 func TestBackendErrorHandling(t *testing.T) {