5
5
"errors"
6
6
"fmt"
7
7
"net/http"
8
+ "os/exec"
8
9
"strings"
9
10
"time"
10
11
@@ -15,6 +16,7 @@ import (
15
16
core "k8s.io/api/core/v1"
16
17
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
17
18
"k8s.io/apimachinery/pkg/types"
19
+ ctlr "sigs.k8s.io/controller-runtime"
18
20
"sigs.k8s.io/controller-runtime/pkg/client"
19
21
"sigs.k8s.io/yaml"
20
22
@@ -100,8 +102,126 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
100
102
Skip ("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108" )
101
103
runRecoveryTest (teaURL , coffeeURL , ngfPodName , nginxContainerName , files , & ns )
102
104
})
105
+
106
+ It ("recovers when drained node is restarted" , func () {
107
+ runRestartNodeTest (teaURL , coffeeURL , files , & ns , true )
108
+ })
109
+
110
+ It ("recovers when node is restarted abruptly" , func () {
111
+ // FIXME(bjee19) remove Skip() when https://github.com/nginxinc/nginx-gateway-fabric/issues/1108 is completed.
112
+ Skip ("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108" )
113
+ runRestartNodeTest (teaURL , coffeeURL , files , & ns , false )
114
+ })
103
115
})
104
116
117
+ func runRestartNodeTest (teaURL , coffeeURL string , files []string , ns * core.Namespace , drain bool ) {
118
+ nodeNames , err := getNodeNames ()
119
+ Expect (err ).ToNot (HaveOccurred ())
120
+ Expect (nodeNames ).To (HaveLen (1 ))
121
+
122
+ kindNodeName := nodeNames [0 ]
123
+
124
+ if portFwdPort != 0 {
125
+ close (portForwardStopCh )
126
+ }
127
+
128
+ if drain {
129
+ _ , err := exec .Command (
130
+ "kubectl" ,
131
+ "drain" ,
132
+ kindNodeName ,
133
+ "--ignore-daemonsets" ,
134
+ "--delete-local-data" ,
135
+ ).CombinedOutput ()
136
+ if err != nil {
137
+ Expect (err ).ToNot (HaveOccurred ())
138
+ }
139
+
140
+ _ , err = exec .Command (
141
+ "kubectl" ,
142
+ "delete" ,
143
+ "node" ,
144
+ kindNodeName ,
145
+ ).CombinedOutput ()
146
+ if err != nil {
147
+ Expect (err ).ToNot (HaveOccurred ())
148
+ }
149
+ }
150
+
151
+ containerOutput , err := exec .Command (
152
+ "docker" ,
153
+ "container" ,
154
+ "ls" ,
155
+ ).CombinedOutput ()
156
+ if err != nil {
157
+ Expect (err ).ToNot (HaveOccurred ())
158
+ }
159
+ fmt .Println (string (containerOutput ))
160
+
161
+ var containerName string
162
+ for _ , line := range strings .Split (string (containerOutput ), "\n " ) {
163
+ for _ , word := range strings .Split (line , " " ) {
164
+ // This is a potential weak spot in the code where we rely on the container which NGF
165
+ // is running on to contain "control-plane" in the name and for no other container to have that either.
166
+ // This is currently working in our test framework may break in the future.
167
+ if strings .Contains (word , "control-plane" ) {
168
+ containerName = strings .TrimSpace (word )
169
+ break
170
+ }
171
+ }
172
+ }
173
+ Expect (containerName ).ToNot (Equal ("" ))
174
+
175
+ // really jank - get the string that contains "control-plane"
176
+ fmt .Println ("This is our container name: " + containerName )
177
+
178
+ _ , err = exec .Command (
179
+ "docker" ,
180
+ "restart" ,
181
+ containerName ,
182
+ ).CombinedOutput ()
183
+ if err != nil {
184
+ fmt .Println (fmt .Sprint (err .Error ()))
185
+ Expect (err ).ToNot (HaveOccurred ())
186
+ }
187
+
188
+ // need to wait for docker container to restart and be running before polling for ready NGF Pods or else we will error
189
+ Eventually (
190
+ func () bool {
191
+ output , err := exec .Command (
192
+ "docker" ,
193
+ "container" ,
194
+ "inspect" ,
195
+ containerName ,
196
+ ).CombinedOutput ()
197
+ return strings .Contains (string (output ), "\" Running\" : true" ) && err == nil
198
+ }).
199
+ WithTimeout (timeoutConfig .CreateTimeout ).
200
+ WithPolling (500 * time .Millisecond ).
201
+ Should (BeTrue ())
202
+
203
+ var podNames []string
204
+ Eventually (
205
+ func () bool {
206
+ podNames , err = framework .GetReadyNGFPodNames (k8sClient , ngfNamespace , releaseName , timeoutConfig .GetStatusTimeout )
207
+ return len (podNames ) == 1 && err == nil
208
+ }).
209
+ WithTimeout (timeoutConfig .CreateTimeout ).
210
+ WithPolling (500 * time .Millisecond ).
211
+ Should (BeTrue ())
212
+ ngfPodName := podNames [0 ]
213
+ Expect (ngfPodName ).ToNot (Equal ("" ))
214
+
215
+ if portFwdPort != 0 {
216
+ ports := []string {fmt .Sprintf ("%d:80" , ngfHTTPForwardedPort ), fmt .Sprintf ("%d:443" , ngfHTTPSForwardedPort )}
217
+ portForwardStopCh = make (chan struct {})
218
+ err = framework .PortForward (ctlr .GetConfigOrDie (), ngfNamespace , ngfPodName , ports , portForwardStopCh )
219
+ Expect (err ).ToNot (HaveOccurred ())
220
+ }
221
+
222
+ checkNGFFunctionality (teaURL , coffeeURL , ngfPodName , files , ns )
223
+ }
224
+
105
225
func runRecoveryTest (teaURL , coffeeURL , ngfPodName , containerName string , files []string , ns * core.Namespace ) {
106
226
var (
107
227
err error
@@ -128,37 +248,7 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files
128
248
WithPolling (500 * time .Millisecond ).
129
249
Should (Succeed ())
130
250
}
131
-
132
- Eventually (
133
- func () error {
134
- return checkForWorkingTraffic (teaURL , coffeeURL )
135
- }).
136
- WithTimeout (timeoutConfig .RequestTimeout ).
137
- WithPolling (500 * time .Millisecond ).
138
- Should (Succeed ())
139
-
140
- Expect (resourceManager .DeleteFromFiles (files , ns .Name )).To (Succeed ())
141
-
142
- Eventually (
143
- func () error {
144
- return checkForFailingTraffic (teaURL , coffeeURL )
145
- }).
146
- WithTimeout (timeoutConfig .RequestTimeout ).
147
- WithPolling (500 * time .Millisecond ).
148
- Should (Succeed ())
149
-
150
- Expect (resourceManager .ApplyFromFiles (files , ns .Name )).To (Succeed ())
151
- Expect (resourceManager .WaitForAppsToBeReadyWithPodCount (ns .Name , 2 )).To (Succeed ())
152
-
153
- Eventually (
154
- func () error {
155
- return checkForWorkingTraffic (teaURL , coffeeURL )
156
- }).
157
- WithTimeout (timeoutConfig .RequestTimeout ).
158
- WithPolling (500 * time .Millisecond ).
159
- Should (Succeed ())
160
-
161
- checkContainerLogsForErrors (ngfPodName )
251
+ checkNGFFunctionality (teaURL , coffeeURL , ngfPodName , files , ns )
162
252
}
163
253
164
254
func restartContainer (ngfPodName , containerName string ) {
@@ -256,6 +346,39 @@ func expectRequestToFail(appURL, address string) error {
256
346
return nil
257
347
}
258
348
349
+ func checkNGFFunctionality (teaURL , coffeeURL , ngfPodName string , files []string , ns * core.Namespace ) {
350
+ Eventually (
351
+ func () error {
352
+ return checkForWorkingTraffic (teaURL , coffeeURL )
353
+ }).
354
+ WithTimeout (timeoutConfig .RequestTimeout ).
355
+ WithPolling (500 * time .Millisecond ).
356
+ Should (Succeed ())
357
+
358
+ Expect (resourceManager .DeleteFromFiles (files , ns .Name )).To (Succeed ())
359
+
360
+ Eventually (
361
+ func () error {
362
+ return checkForFailingTraffic (teaURL , coffeeURL )
363
+ }).
364
+ WithTimeout (timeoutConfig .RequestTimeout ).
365
+ WithPolling (500 * time .Millisecond ).
366
+ Should (Succeed ())
367
+
368
+ Expect (resourceManager .ApplyFromFiles (files , ns .Name )).To (Succeed ())
369
+ Expect (resourceManager .WaitForAppsToBeReadyWithPodCount (ns .Name , 2 )).To (Succeed ())
370
+
371
+ Eventually (
372
+ func () error {
373
+ return checkForWorkingTraffic (teaURL , coffeeURL )
374
+ }).
375
+ WithTimeout (timeoutConfig .RequestTimeout ).
376
+ WithPolling (500 * time .Millisecond ).
377
+ Should (Succeed ())
378
+
379
+ checkContainerLogsForErrors (ngfPodName )
380
+ }
381
+
259
382
// checkContainerLogsForErrors checks both nginx and ngf container's logs for any possible errors.
260
383
// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests,
261
384
// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests
@@ -347,6 +470,24 @@ func getContainerRestartCount(ngfPodName, containerName string) (int, error) {
347
470
return restartCount , nil
348
471
}
349
472
473
+ func getNodeNames () ([]string , error ) {
474
+ ctx , cancel := context .WithTimeout (context .Background (), timeoutConfig .GetTimeout )
475
+ defer cancel ()
476
+ var nodes core.NodeList
477
+
478
+ if err := k8sClient .List (ctx , & nodes ); err != nil {
479
+ return nil , fmt .Errorf ("error getting nodes: %w" , err )
480
+ }
481
+
482
+ names := make ([]string , 0 , len (nodes .Items ))
483
+
484
+ for _ , node := range nodes .Items {
485
+ names = append (names , node .Name )
486
+ }
487
+
488
+ return names , nil
489
+ }
490
+
350
491
func runNodeDebuggerJob (ngfPodName , jobScript string ) (* v1.Job , error ) {
351
492
ctx , cancel := context .WithTimeout (context .Background (), timeoutConfig .GetTimeout )
352
493
defer cancel ()
0 commit comments