Skip to content

Commit 3e0f90f

Browse files
authored
[Bugfix] Wait for ImageStatus (#1602)
1 parent b085209 commit 3e0f90f

File tree

4 files changed

+52
-8
lines changed

4 files changed

+52
-8
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
- (Feature) Extract Scheduler API
55
- (Bugfix) Fix Image Discovery
66
- (Bugfix) Fix Resources Copy mechanism to prevent invalid pod creation
7+
- (Bugfix) Wait for ImageStatus in ImageDiscover
78

89
## [1.2.38](https://github.com/arangodb/kube-arangodb/tree/1.2.38) (2024-02-22)
910
- (Feature) Extract GRPC Server

README.md

+4-1
Original file line numberDiff line numberDiff line change
@@ -161,12 +161,15 @@ Flags:
161161
--deployment.feature.upgrade-version-check-v2 Enable initContainer with pre version check based by Operator - Required ArangoDB 3.8.0 or higher
162162
--features-config-map-name string Name of the Feature Map ConfigMap (default "arangodb-operator-feature-config-map")
163163
-h, --help help for arangodb_operator
164+
--image.discovery.status Discover Operator Image from Pod Status by default. When disabled Pod Spec is used. (default true)
165+
--image.discovery.timeout duration Timeout for image discovery process (default 1m0s)
164166
--internal.scaling-integration Enable Scaling Integration
165167
--kubernetes.burst int Burst for the k8s API (default 30)
166168
--kubernetes.max-batch-size int Size of batch during objects read (default 256)
167169
--kubernetes.qps float32 Number of queries per second for k8s API (default 15)
168170
--log.format string Set log format. Allowed values: 'pretty', 'JSON'. If empty, default format is used (default "pretty")
169-
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [debug])
171+
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [info])
172+
--log.sampling If true, operator will try to minimize duplication of logging events (default true)
170173
--memory-limit uint Define memory limit for hard shutdown and the dump of goroutines. Used for testing
171174
--metrics.excluded-prefixes stringArray List of the excluded metrics prefixes
172175
--mode.single Enable single mode in Operator. WARNING: There should be only one replica of Operator, otherwise Operator can take unexpected actions

cmd/cmd.go

+35-4
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ var (
161161
backupArangoD time.Duration
162162
backupUploadArangoD time.Duration
163163
}
164+
operatorImageDiscovery struct {
165+
timeout time.Duration
166+
defaultStatusDiscovery bool
167+
}
164168
operatorReconciliationRetry struct {
165169
delay time.Duration
166170
count int
@@ -241,6 +245,8 @@ func init() {
241245
f.IntVar(&operatorBackup.concurrentUploads, "backup-concurrent-uploads", globals.DefaultBackupConcurrentUploads, "Number of concurrent uploads per deployment")
242246
f.Uint64Var(&memoryLimit.hardLimit, "memory-limit", 0, "Define memory limit for hard shutdown and the dump of goroutines. Used for testing")
243247
f.StringArrayVar(&metricsOptions.excludedMetricPrefixes, "metrics.excluded-prefixes", nil, "List of the excluded metrics prefixes")
248+
f.BoolVar(&operatorImageDiscovery.defaultStatusDiscovery, "image.discovery.status", true, "Discover Operator Image from Pod Status by default. When disabled Pod Spec is used.")
249+
f.DurationVar(&operatorImageDiscovery.timeout, "image.discovery.timeout", time.Minute, "Timeout for image discovery process")
244250
if err := features.Init(&cmdMain); err != nil {
245251
panic(err.Error())
246252
}
@@ -584,6 +590,20 @@ func newOperatorConfigAndDeps(id, namespace, name string) (operator.Config, oper
584590
// getMyPodInfo looks up the image & service account of the pod with given name in given namespace
585591
// Returns image, serviceAccount, error.
586592
func getMyPodInfo(kubecli kubernetes.Interface, namespace, name string) (string, string, error) {
593+
if image, sa, ok := getMyPodInfoWrap(kubecli, namespace, name, getMyImageInfoFunc(operatorImageDiscovery.defaultStatusDiscovery)); ok {
594+
return image, sa, nil
595+
}
596+
597+
logger.Warn("Unable to discover image, fallback to second method")
598+
599+
if image, sa, ok := getMyPodInfoWrap(kubecli, namespace, name, getMyImageInfoFunc(!operatorImageDiscovery.defaultStatusDiscovery)); ok {
600+
return image, sa, nil
601+
}
602+
603+
return "", "", errors.Errorf("Unable to discover image")
604+
}
605+
606+
func getMyPodInfoWrap(kubecli kubernetes.Interface, namespace, name string, imageFunc func(in *core.Pod) (string, bool)) (string, string, bool) {
587607
var image, sa string
588608
op := func() error {
589609
pod, err := kubecli.CoreV1().Pods(namespace).Get(context.Background(), name, meta.GetOptions{})
@@ -595,15 +615,26 @@ func getMyPodInfo(kubecli kubernetes.Interface, namespace, name string) (string,
595615
return errors.WithStack(err)
596616
}
597617
sa = pod.Spec.ServiceAccountName
598-
if image, err = k8sutil.GetArangoDBImageIDFromPod(pod, shared.ServerContainerName, shared.OperatorContainerName, constants.MyContainerNameEnv.GetOrDefault(shared.OperatorContainerName)); err != nil {
618+
if i, ok := imageFunc(pod); !ok {
599619
return errors.Wrap(err, "failed to get image ID from pod")
620+
} else {
621+
image = i
600622
}
601623
return nil
602624
}
603-
if err := retry.Retry(op, time.Minute*5); err != nil {
604-
return "", "", errors.WithStack(err)
625+
if err := retry.Retry(op, operatorImageDiscovery.timeout/2); err == nil {
626+
return image, sa, true
627+
}
628+
return "", "", false
629+
}
630+
631+
func getMyImageInfoFunc(status bool) func(pod *core.Pod) (string, bool) {
632+
return func(pod *core.Pod) (string, bool) {
633+
if status {
634+
return k8sutil.GetArangoDBImageIDFromContainerStatuses(pod.Status.ContainerStatuses, shared.ServerContainerName, shared.OperatorContainerName, constants.MyContainerNameEnv.GetOrDefault(shared.OperatorContainerName))
635+
}
636+
return k8sutil.GetArangoDBImageIDFromContainers(pod.Spec.Containers, shared.ServerContainerName, shared.OperatorContainerName, constants.MyContainerNameEnv.GetOrDefault(shared.OperatorContainerName))
605637
}
606-
return image, sa, nil
607638
}
608639

609640
func createRecorder(kubecli kubernetes.Interface, name, namespace string) record.EventRecorder {

pkg/util/k8sutil/images.go

+12-3
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,16 @@ func GetArangoDBImageIDFromPod(pod *core.Pod, names ...string) (string, error) {
5353
if image, ok := GetArangoDBImageIDFromContainerStatuses(pod.Status.ContainerStatuses, names...); ok {
5454
return image, nil
5555
}
56+
5657
if image, ok := GetArangoDBImageIDFromContainers(pod.Spec.Containers, names...); ok {
5758
return image, nil
5859
}
5960

6061
if cs := pod.Status.ContainerStatuses; len(cs) > 0 {
6162
if image := cs[0].ImageID; image != "" {
62-
return ConvertImageID2Image(image), nil
63+
if disc := ConvertImageID2Image(image); disc != "" {
64+
return disc, nil
65+
}
6366
}
6467
}
6568
if cs := pod.Spec.Containers; len(cs) > 0 {
@@ -75,7 +78,11 @@ func GetArangoDBImageIDFromPod(pod *core.Pod, names ...string) (string, error) {
7578
func GetArangoDBImageIDFromContainerStatuses(containers []core.ContainerStatus, names ...string) (string, bool) {
7679
for _, name := range names {
7780
if id := container.GetContainerStatusIDByName(containers, name); id != -1 {
78-
return ConvertImageID2Image(containers[id].ImageID), true
81+
if image := containers[id].ImageID; image != "" {
82+
if disc := ConvertImageID2Image(image); disc != "" {
83+
return disc, true
84+
}
85+
}
7986
}
8087
}
8188

@@ -86,7 +93,9 @@ func GetArangoDBImageIDFromContainerStatuses(containers []core.ContainerStatus,
8693
func GetArangoDBImageIDFromContainers(containers []core.Container, names ...string) (string, bool) {
8794
for _, name := range names {
8895
if id := container.GetContainerIDByName(containers, name); id != -1 {
89-
return containers[id].Image, true
96+
if image := containers[id].Image; image != "" {
97+
return image, true
98+
}
9099
}
91100
}
92101

0 commit comments

Comments
 (0)