Skip to content

Commit 330d50b

Browse files
committed
Automate remaining graceful recovery tests
1 parent 7654cb6 commit 330d50b

File tree

1 file changed

+172
-31
lines changed

1 file changed

+172
-31
lines changed

tests/suite/graceful_recovery_test.go

Lines changed: 172 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"errors"
66
"fmt"
77
"net/http"
8+
"os/exec"
89
"strings"
910
"time"
1011

@@ -15,6 +16,7 @@ import (
1516
core "k8s.io/api/core/v1"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1718
"k8s.io/apimachinery/pkg/types"
19+
ctlr "sigs.k8s.io/controller-runtime"
1820
"sigs.k8s.io/controller-runtime/pkg/client"
1921
"sigs.k8s.io/yaml"
2022

@@ -100,8 +102,126 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
100102
Skip("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108")
101103
runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, &ns)
102104
})
105+
106+
It("recovers when drained node is restarted", func() {
107+
runRestartNodeTest(teaURL, coffeeURL, files, &ns, true)
108+
})
109+
110+
It("recovers when node is restarted abruptly", func() {
111+
// FIXME(bjee19) remove Skip() when https://github.com/nginxinc/nginx-gateway-fabric/issues/1108 is completed.
112+
Skip("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108")
113+
runRestartNodeTest(teaURL, coffeeURL, files, &ns, false)
114+
})
103115
})
104116

117+
func runRestartNodeTest(teaURL, coffeeURL string, files []string, ns *core.Namespace, drain bool) {
118+
nodeNames, err := getNodeNames()
119+
Expect(err).ToNot(HaveOccurred())
120+
Expect(nodeNames).To(HaveLen(1))
121+
122+
kindNodeName := nodeNames[0]
123+
124+
if portFwdPort != 0 {
125+
close(portForwardStopCh)
126+
}
127+
128+
if drain {
129+
_, err := exec.Command(
130+
"kubectl",
131+
"drain",
132+
kindNodeName,
133+
"--ignore-daemonsets",
134+
"--delete-local-data",
135+
).CombinedOutput()
136+
if err != nil {
137+
Expect(err).ToNot(HaveOccurred())
138+
}
139+
140+
_, err = exec.Command(
141+
"kubectl",
142+
"delete",
143+
"node",
144+
kindNodeName,
145+
).CombinedOutput()
146+
if err != nil {
147+
Expect(err).ToNot(HaveOccurred())
148+
}
149+
}
150+
151+
containerOutput, err := exec.Command(
152+
"docker",
153+
"container",
154+
"ls",
155+
).CombinedOutput()
156+
if err != nil {
157+
Expect(err).ToNot(HaveOccurred())
158+
}
159+
fmt.Println(string(containerOutput))
160+
161+
var containerName string
162+
for _, line := range strings.Split(string(containerOutput), "\n") {
163+
for _, word := range strings.Split(line, " ") {
164+
// This is a potential weak spot in the code where we rely on the container which NGF
165+
// is running on to contain "control-plane" in the name and for no other container to have that either.
166+
// This is currently working in our test framework may break in the future.
167+
if strings.Contains(word, "control-plane") {
168+
containerName = strings.TrimSpace(word)
169+
break
170+
}
171+
}
172+
}
173+
Expect(containerName).ToNot(Equal(""))
174+
175+
// really jank - get the string that contains "control-plane"
176+
fmt.Println("This is our container name: " + containerName)
177+
178+
_, err = exec.Command(
179+
"docker",
180+
"restart",
181+
containerName,
182+
).CombinedOutput()
183+
if err != nil {
184+
fmt.Println(fmt.Sprint(err.Error()))
185+
Expect(err).ToNot(HaveOccurred())
186+
}
187+
188+
// need to wait for docker container to restart and be running before polling for ready NGF Pods or else we will error
189+
Eventually(
190+
func() bool {
191+
output, err := exec.Command(
192+
"docker",
193+
"container",
194+
"inspect",
195+
containerName,
196+
).CombinedOutput()
197+
return strings.Contains(string(output), "\"Running\": true") && err == nil
198+
}).
199+
WithTimeout(timeoutConfig.CreateTimeout).
200+
WithPolling(500 * time.Millisecond).
201+
Should(BeTrue())
202+
203+
var podNames []string
204+
Eventually(
205+
func() bool {
206+
podNames, err = framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetStatusTimeout)
207+
return len(podNames) == 1 && err == nil
208+
}).
209+
WithTimeout(timeoutConfig.CreateTimeout).
210+
WithPolling(500 * time.Millisecond).
211+
Should(BeTrue())
212+
ngfPodName := podNames[0]
213+
Expect(ngfPodName).ToNot(Equal(""))
214+
215+
if portFwdPort != 0 {
216+
ports := []string{fmt.Sprintf("%d:80", ngfHTTPForwardedPort), fmt.Sprintf("%d:443", ngfHTTPSForwardedPort)}
217+
portForwardStopCh = make(chan struct{})
218+
err = framework.PortForward(ctlr.GetConfigOrDie(), ngfNamespace, ngfPodName, ports, portForwardStopCh)
219+
Expect(err).ToNot(HaveOccurred())
220+
}
221+
222+
checkNGFFunctionality(teaURL, coffeeURL, ngfPodName, files, ns)
223+
}
224+
105225
func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files []string, ns *core.Namespace) {
106226
var (
107227
err error
@@ -128,37 +248,7 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files
128248
WithPolling(500 * time.Millisecond).
129249
Should(Succeed())
130250
}
131-
132-
Eventually(
133-
func() error {
134-
return checkForWorkingTraffic(teaURL, coffeeURL)
135-
}).
136-
WithTimeout(timeoutConfig.RequestTimeout).
137-
WithPolling(500 * time.Millisecond).
138-
Should(Succeed())
139-
140-
Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed())
141-
142-
Eventually(
143-
func() error {
144-
return checkForFailingTraffic(teaURL, coffeeURL)
145-
}).
146-
WithTimeout(timeoutConfig.RequestTimeout).
147-
WithPolling(500 * time.Millisecond).
148-
Should(Succeed())
149-
150-
Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed())
151-
Expect(resourceManager.WaitForAppsToBeReadyWithPodCount(ns.Name, 2)).To(Succeed())
152-
153-
Eventually(
154-
func() error {
155-
return checkForWorkingTraffic(teaURL, coffeeURL)
156-
}).
157-
WithTimeout(timeoutConfig.RequestTimeout).
158-
WithPolling(500 * time.Millisecond).
159-
Should(Succeed())
160-
161-
checkContainerLogsForErrors(ngfPodName)
251+
checkNGFFunctionality(teaURL, coffeeURL, ngfPodName, files, ns)
162252
}
163253

164254
func restartContainer(ngfPodName, containerName string) {
@@ -256,6 +346,39 @@ func expectRequestToFail(appURL, address string) error {
256346
return nil
257347
}
258348

349+
func checkNGFFunctionality(teaURL, coffeeURL, ngfPodName string, files []string, ns *core.Namespace) {
350+
Eventually(
351+
func() error {
352+
return checkForWorkingTraffic(teaURL, coffeeURL)
353+
}).
354+
WithTimeout(timeoutConfig.RequestTimeout).
355+
WithPolling(500 * time.Millisecond).
356+
Should(Succeed())
357+
358+
Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed())
359+
360+
Eventually(
361+
func() error {
362+
return checkForFailingTraffic(teaURL, coffeeURL)
363+
}).
364+
WithTimeout(timeoutConfig.RequestTimeout).
365+
WithPolling(500 * time.Millisecond).
366+
Should(Succeed())
367+
368+
Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed())
369+
Expect(resourceManager.WaitForAppsToBeReadyWithPodCount(ns.Name, 2)).To(Succeed())
370+
371+
Eventually(
372+
func() error {
373+
return checkForWorkingTraffic(teaURL, coffeeURL)
374+
}).
375+
WithTimeout(timeoutConfig.RequestTimeout).
376+
WithPolling(500 * time.Millisecond).
377+
Should(Succeed())
378+
379+
checkContainerLogsForErrors(ngfPodName)
380+
}
381+
259382
// checkContainerLogsForErrors checks both nginx and ngf container's logs for any possible errors.
260383
// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests,
261384
// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests
@@ -347,6 +470,24 @@ func getContainerRestartCount(ngfPodName, containerName string) (int, error) {
347470
return restartCount, nil
348471
}
349472

473+
func getNodeNames() ([]string, error) {
474+
ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout)
475+
defer cancel()
476+
var nodes core.NodeList
477+
478+
if err := k8sClient.List(ctx, &nodes); err != nil {
479+
return nil, fmt.Errorf("error getting nodes: %w", err)
480+
}
481+
482+
names := make([]string, 0, len(nodes.Items))
483+
484+
for _, node := range nodes.Items {
485+
names = append(names, node.Name)
486+
}
487+
488+
return names, nil
489+
}
490+
350491
func runNodeDebuggerJob(ngfPodName, jobScript string) (*v1.Job, error) {
351492
ctx, cancel := context.WithTimeout(context.Background(), timeoutConfig.GetTimeout)
352493
defer cancel()

0 commit comments

Comments
 (0)