Skip to content

Commit 43ffc34

Browse files
committed
Add timeout to plan actions
1 parent 4ab7a63 commit 43ffc34

11 files changed

+101
-1
lines changed

pkg/deployment/reconcile/action.go

+3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728
)
2829

2930
// Action executes a single Plan item.
@@ -35,4 +36,6 @@ type Action interface {
3536
// CheckProgress checks the progress of the action.
3637
// Returns true if the action is completely finished, false otherwise.
3738
CheckProgress(ctx context.Context) (bool, error)
39+
// Timeout returns the amount of time after which this action will timeout.
40+
Timeout() time.Duration
3841
}

pkg/deployment/reconcile/action_add_member.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
2930
"github.com/rs/zerolog"
@@ -64,3 +65,8 @@ func (a *actionAddMember) CheckProgress(ctx context.Context) (bool, error) {
6465
// Nothing todo
6566
return true, nil
6667
}
68+
69+
// Timeout returns the amount of time after which this action will timeout.
70+
func (a *actionAddMember) Timeout() time.Duration {
71+
return addMemberTimeout
72+
}

pkg/deployment/reconcile/action_cleanout_member.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
2930
"github.com/rs/zerolog"
@@ -114,3 +115,8 @@ func (a *actionCleanoutMember) CheckProgress(ctx context.Context) (bool, error)
114115
// Cleanout completed
115116
return true, nil
116117
}
118+
119+
// Timeout returns the amount of time after which this action will timeout.
120+
func (a *actionCleanoutMember) Timeout() time.Duration {
121+
return cleanoutMemberTimeout
122+
}

pkg/deployment/reconcile/action_remove_member.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
"github.com/pkg/errors"
2930
"github.com/rs/zerolog"
@@ -94,3 +95,8 @@ func (a *actionRemoveMember) CheckProgress(ctx context.Context) (bool, error) {
9495
// Nothing todo
9596
return true, nil
9697
}
98+
99+
// Timeout returns the amount of time after which this action will timeout.
100+
func (a *actionRemoveMember) Timeout() time.Duration {
101+
return removeMemberTimeout
102+
}

pkg/deployment/reconcile/action_renew_tls_certificate.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
2930
"github.com/rs/zerolog"
@@ -69,3 +70,8 @@ func (a *renewTLSCertificateAction) Start(ctx context.Context) (bool, error) {
6970
func (a *renewTLSCertificateAction) CheckProgress(ctx context.Context) (bool, error) {
7071
return true, nil
7172
}
73+
74+
// Timeout returns the amount of time after which this action will timeout.
75+
func (a *renewTLSCertificateAction) Timeout() time.Duration {
76+
return renewTLSCertificateTimeout
77+
}

pkg/deployment/reconcile/action_rotate_member.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
2930
"github.com/rs/zerolog"
@@ -116,3 +117,8 @@ func (a *actionRotateMember) CheckProgress(ctx context.Context) (bool, error) {
116117
}
117118
return true, nil
118119
}
120+
121+
// Timeout returns the amount of time after which this action will timeout.
122+
func (a *actionRotateMember) Timeout() time.Duration {
123+
return rotateMemberTimeout
124+
}

pkg/deployment/reconcile/action_shutdown_member.go

+5
Original file line numberDiff line numberDiff line change
@@ -111,3 +111,8 @@ func (a *actionShutdownMember) CheckProgress(ctx context.Context) (bool, error)
111111
// Member still not shutdown, retry soon
112112
return false, nil
113113
}
114+
115+
// Timeout returns the amount of time after which this action will timeout.
116+
func (a *actionShutdownMember) Timeout() time.Duration {
117+
return shutdownMemberTimeout
118+
}

pkg/deployment/reconcile/action_upgrade_member.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
2930
"github.com/rs/zerolog"
@@ -126,3 +127,8 @@ func (a *actionUpgradeMember) CheckProgress(ctx context.Context) (bool, error) {
126127
}
127128
return isUpgrading, nil
128129
}
130+
131+
// Timeout returns the amount of time after which this action will timeout.
132+
func (a *actionUpgradeMember) Timeout() time.Duration {
133+
return upgradeMemberTimeout
134+
}

pkg/deployment/reconcile/action_wait_for_member_up.go

+6
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ package reconcile
2424

2525
import (
2626
"context"
27+
"time"
2728

2829
driver "github.com/arangodb/go-driver"
2930
"github.com/arangodb/go-driver/agency"
@@ -164,3 +165,8 @@ func (a *actionWaitForMemberUp) checkProgressArangoSync(ctx context.Context) (bo
164165
}
165166
return true, nil
166167
}
168+
169+
// Timeout returns the amount of time after which this action will timeout.
170+
func (a *actionWaitForMemberUp) Timeout() time.Duration {
171+
return waitForMemberUpTimeout
172+
}

pkg/deployment/reconcile/plan_executor.go

+15-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ package reconcile
2525
import (
2626
"context"
2727
"fmt"
28+
"time"
2829

2930
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3031

@@ -106,7 +107,20 @@ func (d *Reconciler) ExecutePlan(ctx context.Context) (bool, error) {
106107
}
107108
log.Debug().Bool("ready", ready).Msg("Action CheckProgress completed")
108109
if !ready {
109-
// Not ready check, come back soon
110+
// Not ready yet, check timeout
111+
deadline := planAction.CreationTime.Add(action.Timeout())
112+
if time.Now().After(deadline) {
113+
// Timeout has expired
114+
log.Warn().Msg("Action not finished in time. Removing the entire plan")
115+
status.Plan = api.Plan{}
116+
// Save plan update
117+
if err := d.context.UpdateStatus(status); err != nil {
118+
log.Debug().Err(err).Msg("Failed to update CR status")
119+
return false, maskAny(err)
120+
}
121+
return true, nil
122+
}
123+
// Timeout not yet expired, come back soon
110124
return true, nil
111125
}
112126
// Continue with next action

pkg/deployment/reconcile/timeouts.go

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
//
2+
// DISCLAIMER
3+
//
4+
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
5+
//
6+
// Licensed under the Apache License, Version 2.0 (the "License");
7+
// you may not use this file except in compliance with the License.
8+
// You may obtain a copy of the License at
9+
//
10+
// http://www.apache.org/licenses/LICENSE-2.0
11+
//
12+
// Unless required by applicable law or agreed to in writing, software
13+
// distributed under the License is distributed on an "AS IS" BASIS,
14+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
// See the License for the specific language governing permissions and
16+
// limitations under the License.
17+
//
18+
// Copyright holder is ArangoDB GmbH, Cologne, Germany
19+
//
20+
// Author Ewout Prangsma
21+
//
22+
23+
package reconcile
24+
25+
import "time"
26+
27+
const (
28+
addMemberTimeout = time.Minute * 5
29+
cleanoutMemberTimeout = time.Hour * 12
30+
removeMemberTimeout = time.Minute * 15
31+
renewTLSCertificateTimeout = time.Minute * 30
32+
rotateMemberTimeout = time.Minute * 30
33+
shutdownMemberTimeout = time.Minute * 30
34+
upgradeMemberTimeout = time.Hour * 6
35+
waitForMemberUpTimeout = time.Minute * 15
36+
)

0 commit comments

Comments
 (0)