Skip to content

Commit d5dcd80

Browse files
committed
[Feature] Parametrize ForceDelete timeout
1 parent 386efa1 commit d5dcd80

File tree

5 files changed

+48
-33
lines changed

5 files changed

+48
-33
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
- (Bugfix) Remove ImagePullSecrets Reference from Container
1414
- (Feature) DebugPackage ArangoProfiles
1515
- (Feature) Scheduler CLI
16+
- (Feature) Parametrize ForceDelete timeout
1617

1718
## [1.2.39](https://github.com/arangodb/kube-arangodb/tree/1.2.39) (2024-03-11)
1819
- (Feature) Extract Scheduler API

README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ Flags:
168168
--kubernetes.max-batch-size int Size of batch during objects read (default 256)
169169
--kubernetes.qps float32 Number of queries per second for k8s API (default 15)
170170
--log.format string Set log format. Allowed values: 'pretty', 'JSON'. If empty, default format is used (default "pretty")
171-
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, server, server-authentication (default [info])
171+
--log.level stringArray Set log levels in format <level> or <logger>=<level>. Possible loggers: action, agency, api-server, assertion, backup-operator, chaos-monkey, crd, deployment, deployment-ci, deployment-reconcile, deployment-replication, deployment-resilience, deployment-resources, deployment-storage, deployment-storage-pc, deployment-storage-service, http, inspector, integrations, k8s-client, ml-batchjob-operator, ml-cronjob-operator, ml-extension-operator, ml-extension-shutdown, ml-storage-operator, monitor, operator, operator-arangojob-handler, operator-v2, operator-v2-event, operator-v2-worker, panics, pod_compare, root, root-event-recorder, scheduler, server, server-authentication (default [info])
172172
--log.sampling If true, operator will try to minimize duplication of logging events (default true)
173173
--memory-limit uint Define memory limit for hard shutdown and the dump of goroutines. Used for testing
174174
--metrics.excluded-prefixes stringArray List of the excluded metrics prefixes
@@ -196,6 +196,7 @@ Flags:
196196
--timeout.arangod-check duration The version check request timeout to the ArangoDB (default 2s)
197197
--timeout.backup-arangod duration The request timeout to the ArangoDB during backup calls (default 30s)
198198
--timeout.backup-upload duration The request timeout to the ArangoDB during uploading files (default 5m0s)
199+
--timeout.force-delete-pod-grace-period duration Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals (default 15m0s)
199200
--timeout.k8s duration The request timeout to the kubernetes (default 2s)
200201
--timeout.reconciliation duration The reconciliation timeout to the ArangoDB CR (default 1m0s)
201202
--timeout.shard-rebuild duration Timeout after which particular out-synced shard is considered as failed and rebuild is triggered (default 1h0m0s)

cmd/cmd.go

+12-9
Original file line numberDiff line numberDiff line change
@@ -147,15 +147,16 @@ var (
147147
concurrentUploads int
148148
}
149149
operatorTimeouts struct {
150-
k8s time.Duration
151-
arangoD time.Duration
152-
arangoDCheck time.Duration
153-
reconciliation time.Duration
154-
agency time.Duration
155-
shardRebuild time.Duration
156-
shardRebuildRetry time.Duration
157-
backupArangoD time.Duration
158-
backupUploadArangoD time.Duration
150+
k8s time.Duration
151+
arangoD time.Duration
152+
arangoDCheck time.Duration
153+
reconciliation time.Duration
154+
agency time.Duration
155+
shardRebuild time.Duration
156+
shardRebuildRetry time.Duration
157+
backupArangoD time.Duration
158+
backupUploadArangoD time.Duration
159+
forcePodDeletionGracePeriod time.Duration
159160
}
160161
operatorImageDiscovery struct {
161162
timeout time.Duration
@@ -224,6 +225,7 @@ func init() {
224225
f.DurationVar(&operatorTimeouts.shardRebuildRetry, "timeout.shard-rebuild-retry", globals.DefaultOutSyncedShardRebuildRetryTimeout, "Timeout after which rebuild shards retry flow is triggered")
225226
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
226227
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
228+
f.DurationVar(&operatorTimeouts.forcePodDeletionGracePeriod, "timeout.force-delete-pod-grace-period", globals.DefaultForcePodDeletionGracePeriodTimeout, "Default period when ArangoDB Pod should be forcefully removed after all containers were stopped - set to 0 to disable forceful removals")
227229
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
228230
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
229231
f.DurationVar(&operatorReconciliationRetry.delay, "operator.reconciliation.retry.delay", globals.DefaultOperatorUpdateRetryDelay, "Delay between Object Update operations in the Reconciliation loop")
@@ -291,6 +293,7 @@ func executeMain(cmd *cobra.Command, args []string) {
291293
globals.GetGlobalTimeouts().ShardRebuildRetry().Set(operatorTimeouts.shardRebuildRetry)
292294
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
293295
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
296+
globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Set(operatorTimeouts.forcePodDeletionGracePeriod)
294297

295298
globals.GetGlobals().Retry().OperatorUpdateRetryDelay().Set(operatorReconciliationRetry.delay)
296299
globals.GetGlobals().Retry().OperatorUpdateRetryCount().Set(operatorReconciliationRetry.count)

pkg/deployment/resources/pod_inspector.go

+9-8
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ const (
5757
// we will mark the pod as scheduled for termination
5858
recheckSoonPodInspectorInterval = util.Interval(time.Second) // Time between Pod inspection if we think something will change soon
5959
maxPodInspectorInterval = util.Interval(time.Hour) // Maximum time between Pod inspection (if nothing else happens)
60-
forcePodDeletionGracePeriod = 15 * time.Minute
6160
)
6261

6362
func (r *Resources) handleRestartedPod(pod *core.Pod, memberStatus *api.MemberStatus, wasTerminated, markAsTerminated *bool) {
@@ -426,13 +425,15 @@ func (r *Resources) InspectPods(ctx context.Context, cachedStatus inspectorInter
426425
var gps int64 = 10
427426

428427
forceDelete := false
429-
if t := k8sutil.PodStopTime(pod); !t.IsZero() {
430-
if time.Since(t) > forcePodDeletionGracePeriod {
431-
forceDelete = true
432-
}
433-
} else if t := pod.DeletionTimestamp; t != nil {
434-
if time.Since(t.Time) > forcePodDeletionGracePeriod {
435-
forceDelete = true
428+
if gracePeriod := globals.GetGlobalTimeouts().ForcePodDeletionGracePeriodTimeout().Get(); gracePeriod > 0 {
429+
if t := k8sutil.PodStopTime(pod); !t.IsZero() {
430+
if time.Since(t) > gracePeriod {
431+
forceDelete = true
432+
}
433+
} else if t := pod.DeletionTimestamp; t != nil {
434+
if time.Since(t.Time) > gracePeriod {
435+
forceDelete = true
436+
}
436437
}
437438
}
438439

pkg/util/globals/global.go

+24-15
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//
22
// DISCLAIMER
33
//
4-
// Copyright 2016-2023 ArangoDB GmbH, Cologne, Germany
4+
// Copyright 2016-2024 ArangoDB GmbH, Cologne, Germany
55
//
66
// Licensed under the Apache License, Version 2.0 (the "License");
77
// you may not use this file except in compliance with the License.
@@ -23,11 +23,12 @@ package globals
2323
import "time"
2424

2525
const (
26-
DefaultKubernetesTimeout = 2 * time.Second
27-
DefaultArangoDTimeout = time.Second * 5
28-
DefaultArangoDAgencyTimeout = time.Second * 10
29-
DefaultArangoDCheckTimeout = time.Second * 2
30-
DefaultReconciliationTimeout = time.Minute
26+
DefaultKubernetesTimeout = 2 * time.Second
27+
DefaultArangoDTimeout = time.Second * 5
28+
DefaultArangoDAgencyTimeout = time.Second * 10
29+
DefaultArangoDCheckTimeout = time.Second * 2
30+
DefaultReconciliationTimeout = time.Minute
31+
DefaultForcePodDeletionGracePeriodTimeout = 15 * time.Minute
3132

3233
BackupDefaultArangoClientTimeout = 30 * time.Second
3334
BackupUploadArangoClientTimeout = 300 * time.Second
@@ -50,15 +51,16 @@ const (
5051

5152
var globalObj = &globals{
5253
timeouts: &globalTimeouts{
53-
requests: NewTimeout(DefaultKubernetesTimeout),
54-
arangod: NewTimeout(DefaultArangoDTimeout),
55-
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
56-
reconciliation: NewTimeout(DefaultReconciliationTimeout),
57-
agency: NewTimeout(DefaultArangoDAgencyTimeout),
58-
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
59-
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
60-
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
61-
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
54+
requests: NewTimeout(DefaultKubernetesTimeout),
55+
arangod: NewTimeout(DefaultArangoDTimeout),
56+
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
57+
reconciliation: NewTimeout(DefaultReconciliationTimeout),
58+
agency: NewTimeout(DefaultArangoDAgencyTimeout),
59+
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
60+
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
61+
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
62+
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
63+
forcePodDeletionGracePeriodTimeout: NewTimeout(DefaultForcePodDeletionGracePeriodTimeout),
6264
},
6365
kubernetes: &globalKubernetes{
6466
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
@@ -144,6 +146,8 @@ type GlobalTimeouts interface {
144146
ArangoDCheck() Timeout
145147
Agency() Timeout
146148

149+
ForcePodDeletionGracePeriodTimeout() Timeout
150+
147151
BackupArangoClientTimeout() Timeout
148152
BackupArangoClientUploadTimeout() Timeout
149153
}
@@ -152,6 +156,11 @@ type globalTimeouts struct {
152156
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
153157
backupArangoClientTimeout Timeout
154158
backupArangoClientUploadTimeout Timeout
159+
forcePodDeletionGracePeriodTimeout Timeout
160+
}
161+
162+
func (g *globalTimeouts) ForcePodDeletionGracePeriodTimeout() Timeout {
163+
return g.forcePodDeletionGracePeriodTimeout
155164
}
156165

157166
func (g *globalTimeouts) Agency() Timeout {

0 commit comments

Comments
 (0)