Skip to content

Commit 25ea723

Browse files
authored
Add telemetry collection of deployment replica count (#1551)
Problem: Want to collect deployment replica count as a telemetry datapoint. Solution: Collect deployment replica count.
1 parent dca4d64 commit 25ea723

File tree

7 files changed

+295
-12
lines changed

7 files changed

+295
-12
lines changed

cmd/gateway/commands.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,7 @@ func createStaticModeCommand() *cobra.Command {
153153
PodIP: podIP,
154154
ServiceName: serviceName.value,
155155
Namespace: namespace,
156+
Name: podName,
156157
},
157158
HealthConfig: config.HealthConfig{
158159
Enabled: !disableHealth,

deploy/helm-chart/templates/rbac.yaml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,36 @@ rules:
2121
- namespaces
2222
- services
2323
- secrets
24-
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
25-
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
26-
- nodes
2724
verbs:
2825
- list
2926
- watch
27+
# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled.
28+
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
29+
- apiGroups:
30+
- ""
31+
resources:
32+
- pods
33+
verbs:
34+
- get
35+
- apiGroups:
36+
- ""
37+
resources:
38+
- nodes
39+
verbs:
40+
- list
3041
- apiGroups:
3142
- ""
3243
resources:
3344
- events
3445
verbs:
3546
- create
3647
- patch
48+
- apiGroups:
49+
- apps
50+
resources:
51+
- replicasets
52+
verbs:
53+
- get
3754
- apiGroups:
3855
- discovery.k8s.io
3956
resources:

deploy/manifests/nginx-gateway.yaml

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,36 @@ rules:
3232
- namespaces
3333
- services
3434
- secrets
35-
# FIXME(bjee19): make nodes permission dependent on telemetry being enabled.
36-
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
37-
- nodes
3835
verbs:
3936
- list
4037
- watch
38+
# FIXME(bjee19): make nodes, pods, replicasets permission dependent on telemetry being enabled.
39+
# https://github.com/nginxinc/nginx-gateway-fabric/issues/1317.
40+
- apiGroups:
41+
- ""
42+
resources:
43+
- pods
44+
verbs:
45+
- get
46+
- apiGroups:
47+
- ""
48+
resources:
49+
- nodes
50+
verbs:
51+
- list
4152
- apiGroups:
4253
- ""
4354
resources:
4455
- events
4556
verbs:
4657
- create
4758
- patch
59+
- apiGroups:
60+
- apps
61+
resources:
62+
- replicasets
63+
verbs:
64+
- get
4865
- apiGroups:
4966
- discovery.k8s.io
5067
resources:

internal/mode/static/config/config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ type GatewayPodConfig struct {
4848
ServiceName string
4949
// Namespace is the namespace of this Pod.
5050
Namespace string
51+
// Name is the name of the Pod.
52+
Name string
5153
}
5254

5355
// MetricsConfig specifies the metrics config.

internal/mode/static/manager.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"github.com/go-logr/logr"
99
ngxclient "github.com/nginxinc/nginx-plus-go-client/client"
1010
"github.com/prometheus/client_golang/prometheus"
11+
appsv1 "k8s.io/api/apps/v1"
1112
apiv1 "k8s.io/api/core/v1"
1213
discoveryV1 "k8s.io/api/discovery/v1"
1314
apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
@@ -65,6 +66,7 @@ func init() {
6566
utilruntime.Must(discoveryV1.AddToScheme(scheme))
6667
utilruntime.Must(ngfAPI.AddToScheme(scheme))
6768
utilruntime.Must(apiext.AddToScheme(scheme))
69+
utilruntime.Must(appsv1.AddToScheme(scheme))
6870
}
6971

7072
// nolint:gocyclo
@@ -214,10 +216,14 @@ func StartManager(cfg config.Config) error {
214216
}
215217

216218
dataCollector := telemetry.NewDataCollectorImpl(telemetry.DataCollectorConfig{
217-
K8sClientReader: mgr.GetClient(),
219+
K8sClientReader: mgr.GetAPIReader(),
218220
GraphGetter: processor,
219221
ConfigurationGetter: eventHandler,
220222
Version: cfg.Version,
223+
PodNSName: types.NamespacedName{
224+
Namespace: cfg.GatewayPodConfig.Namespace,
225+
Name: cfg.GatewayPodConfig.Name,
226+
},
221227
})
222228
if err = mgr.Add(createTelemetryJob(cfg, dataCollector, nginxChecker.getReadyCh())); err != nil {
223229
return fmt.Errorf("cannot register telemetry job: %w", err)

internal/mode/static/telemetry/collector.go

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ import (
55
"errors"
66
"fmt"
77

8+
appsv1 "k8s.io/api/apps/v1"
89
v1 "k8s.io/api/core/v1"
10+
"k8s.io/apimachinery/pkg/types"
911
"sigs.k8s.io/controller-runtime/pkg/client"
1012

1113
"github.com/nginxinc/nginx-gateway-fabric/internal/mode/static/state/dataplane"
@@ -49,6 +51,7 @@ type Data struct {
4951
ProjectMetadata ProjectMetadata
5052
NodeCount int
5153
NGFResourceCounts NGFResourceCounts
54+
NGFReplicaCount int
5255
}
5356

5457
// DataCollectorConfig holds configuration parameters for DataCollectorImpl.
@@ -61,6 +64,8 @@ type DataCollectorConfig struct {
6164
ConfigurationGetter ConfigurationGetter
6265
// Version is the NGF version.
6366
Version string
67+
// PodNSName is the NamespacedName of the NGF Pod.
68+
PodNSName types.NamespacedName
6469
}
6570

6671
// DataCollectorImpl is am implementation of DataCollector.
@@ -89,13 +94,19 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
8994
return Data{}, fmt.Errorf("failed to collect NGF resource counts: %w", err)
9095
}
9196

97+
ngfReplicaCount, err := collectNGFReplicaCount(ctx, c.cfg.K8sClientReader, c.cfg.PodNSName)
98+
if err != nil {
99+
return Data{}, fmt.Errorf("failed to collect NGF replica count: %w", err)
100+
}
101+
92102
data := Data{
93103
NodeCount: nodeCount,
94104
NGFResourceCounts: graphResourceCount,
95105
ProjectMetadata: ProjectMetadata{
96106
Name: "NGF",
97107
Version: c.cfg.Version,
98108
},
109+
NGFReplicaCount: ngfReplicaCount,
99110
}
100111

101112
return data, nil
@@ -104,7 +115,7 @@ func (c DataCollectorImpl) Collect(ctx context.Context) (Data, error) {
104115
func collectNodeCount(ctx context.Context, k8sClient client.Reader) (int, error) {
105116
var nodes v1.NodeList
106117
if err := k8sClient.List(ctx, &nodes); err != nil {
107-
return 0, err
118+
return 0, fmt.Errorf("failed to get NodeList: %w", err)
108119
}
109120

110121
return len(nodes.Items), nil
@@ -147,3 +158,38 @@ func collectGraphResourceCount(
147158

148159
return ngfResourceCounts, nil
149160
}
161+
162+
func collectNGFReplicaCount(ctx context.Context, k8sClient client.Reader, podNSName types.NamespacedName) (int, error) {
163+
var pod v1.Pod
164+
if err := k8sClient.Get(
165+
ctx,
166+
types.NamespacedName{Namespace: podNSName.Namespace, Name: podNSName.Name},
167+
&pod,
168+
); err != nil {
169+
return 0, fmt.Errorf("failed to get NGF Pod: %w", err)
170+
}
171+
172+
podOwnerRefs := pod.GetOwnerReferences()
173+
if len(podOwnerRefs) != 1 {
174+
return 0, fmt.Errorf("expected one owner reference of the NGF Pod, got %d", len(podOwnerRefs))
175+
}
176+
177+
if podOwnerRefs[0].Kind != "ReplicaSet" {
178+
return 0, fmt.Errorf("expected pod owner reference to be ReplicaSet, got %s", podOwnerRefs[0].Kind)
179+
}
180+
181+
var replicaSet appsv1.ReplicaSet
182+
if err := k8sClient.Get(
183+
ctx,
184+
types.NamespacedName{Namespace: podNSName.Namespace, Name: podOwnerRefs[0].Name},
185+
&replicaSet,
186+
); err != nil {
187+
return 0, fmt.Errorf("failed to get NGF Pod's ReplicaSet: %w", err)
188+
}
189+
190+
if replicaSet.Spec.Replicas == nil {
191+
return 0, errors.New("replica set replicas was nil")
192+
}
193+
194+
return int(*replicaSet.Spec.Replicas), nil
195+
}

0 commit comments

Comments
 (0)