Skip to content

Commit 095dc19

Browse files
authored
Merge pull request #151 from arangodb/bugfix/cleanup-long-terminating-stateless-pods
Cleanup stateless pods that are in terminating state for a long time
2 parents efb5f6d + 9a12a97 commit 095dc19

File tree

3 files changed

+40
-9
lines changed

3 files changed

+40
-9
lines changed

pkg/apis/deployment/v1alpha/server_group.go

+10
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,16 @@ func (g ServerGroup) DefaultTerminationGracePeriod() time.Duration {
101101
}
102102
}
103103

104+
// IsStateless returns true when the groups runs servers without a persistent volume.
105+
func (g ServerGroup) IsStateless() bool {
106+
switch g {
107+
case ServerGroupCoordinators, ServerGroupSyncMasters, ServerGroupSyncWorkers:
108+
return true
109+
default:
110+
return false
111+
}
112+
}
113+
104114
// IsArangod returns true when the groups runs servers of type `arangod`.
105115
func (g ServerGroup) IsArangod() bool {
106116
switch g {

pkg/deployment/resources/pod_cleanup.go

+24-9
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@
2323
package resources
2424

2525
import (
26+
"time"
27+
2628
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
2729

2830
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
2931
)
3032

33+
const (
34+
statelessTerminationPeriod = time.Minute // We wait this long for a stateless server to terminate on it's own. Afterwards we kill it.
35+
)
36+
3137
// CleanupTerminatedPods removes all pods in Terminated state that belong to a member in Created state.
3238
func (r *Resources) CleanupTerminatedPods() error {
3339
log := r.log
@@ -47,20 +53,29 @@ func (r *Resources) CleanupTerminatedPods() error {
4753
}
4854

4955
// Check pod state
50-
if !(k8sutil.IsPodSucceeded(&p) || k8sutil.IsPodFailed(&p)) {
56+
if !(k8sutil.IsPodSucceeded(&p) || k8sutil.IsPodFailed(&p) || k8sutil.IsPodTerminating(&p)) {
5157
continue
5258
}
5359

5460
// Find member status
55-
memberStatus, _, found := status.Members.MemberStatusByPodName(p.GetName())
61+
memberStatus, group, found := status.Members.MemberStatusByPodName(p.GetName())
5662
if !found {
57-
log.Debug().Str("pod", p.GetName()).Msg("no memberstatus found for pod")
58-
continue
59-
}
60-
61-
// Check member termination condition
62-
if !memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) {
63-
continue
63+
log.Debug().Str("pod", p.GetName()).Msg("no memberstatus found for pod. Performing cleanup")
64+
} else {
65+
// Check member termination condition
66+
if !memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) {
67+
if !group.IsStateless() {
68+
// For statefull members, we have to wait for confirmed termination
69+
continue
70+
} else {
71+
// If a stateless server does not terminate within a reasonable amount or time, we kill it.
72+
t := p.GetDeletionTimestamp()
73+
if t == nil || t.Add(statelessTerminationPeriod).After(time.Now()) {
74+
// Either delete timestamp is not set, or not yet waiting long enough
75+
continue
76+
}
77+
}
78+
}
6479
}
6580

6681
// Ok, we can delete the pod

pkg/util/k8sutil/pods.go

+6
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,12 @@ func IsPodMarkedForDeletion(pod *v1.Pod) bool {
123123
return pod.DeletionTimestamp != nil
124124
}
125125

126+
// IsPodTerminating returns true if the pod has been marked for deletion
127+
// but is still running.
128+
func IsPodTerminating(pod *v1.Pod) bool {
129+
return IsPodMarkedForDeletion(pod) && pod.Status.Phase == v1.PodRunning
130+
}
131+
126132
// IsArangoDBImageIDAndVersionPod returns true if the given pod is used for fetching image ID and ArangoDB version of an image
127133
func IsArangoDBImageIDAndVersionPod(p v1.Pod) bool {
128134
role, found := p.GetLabels()[LabelKeyRole]

0 commit comments

Comments
 (0)