Skip to content

Commit 0021cec

Browse files
authored
Remove sock files on nginx startup (#2131)
Problem: NGF Pod cannot recover if NGINX master process fails without cleaning up Solution: Update the nginx Dockerfile CMD to clean up sock files on start-up
1 parent 747bf0a commit 0021cec

File tree

13 files changed

+31
-51
lines changed

13 files changed

+31
-51
lines changed

build/Dockerfile.nginx

+2
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@ RUN chown -R 101:1001 /etc/nginx /var/cache/nginx /var/lib/nginx
2121
LABEL org.nginx.ngf.image.build.agent="${BUILD_AGENT}"
2222

2323
USER 101:1001
24+
25+
CMD ["sh", "-c", "rm -rf /var/run/nginx/*.sock && /docker-entrypoint.sh nginx -g 'daemon off;'"]

build/Dockerfile.nginxplus

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,4 @@ USER 101:1001
3838

3939
LABEL org.nginx.ngf.image.build.agent="${BUILD_AGENT}"
4040

41-
CMD ["nginx", "-g", "daemon off;"]
41+
CMD ["sh", "-c", "rm -rf /var/run/nginx/*.sock && nginx -g 'daemon off;'"]

charts/nginx-gateway-fabric/templates/deployment.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -161,8 +161,6 @@ spec:
161161
mountPath: /var/run/nginx
162162
- name: nginx-cache
163163
mountPath: /var/cache/nginx
164-
- name: nginx-lib
165-
mountPath: /var/lib/nginx
166164
- name: nginx-includes
167165
mountPath: /etc/nginx/includes
168166
{{- with .Values.nginx.extraVolumeMounts -}}
@@ -197,8 +195,6 @@ spec:
197195
emptyDir: {}
198196
- name: nginx-cache
199197
emptyDir: {}
200-
- name: nginx-lib
201-
emptyDir: {}
202198
- name: nginx-includes
203199
emptyDir: {}
204200
{{- with .Values.extraVolumes -}}

config/tests/static-deployment.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,6 @@ spec:
106106
mountPath: /var/run/nginx
107107
- name: nginx-cache
108108
mountPath: /var/cache/nginx
109-
- name: nginx-lib
110-
mountPath: /var/lib/nginx
111109
- name: nginx-includes
112110
mountPath: /etc/nginx/includes
113111
terminationGracePeriodSeconds: 30
@@ -127,7 +125,5 @@ spec:
127125
emptyDir: {}
128126
- name: nginx-cache
129127
emptyDir: {}
130-
- name: nginx-lib
131-
emptyDir: {}
132128
- name: nginx-includes
133129
emptyDir: {}

deploy/manifests/nginx-gateway-experimental.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -262,8 +262,6 @@ spec:
262262
mountPath: /var/run/nginx
263263
- name: nginx-cache
264264
mountPath: /var/cache/nginx
265-
- name: nginx-lib
266-
mountPath: /var/lib/nginx
267265
- name: nginx-includes
268266
mountPath: /etc/nginx/includes
269267
terminationGracePeriodSeconds: 30
@@ -283,8 +281,6 @@ spec:
283281
emptyDir: {}
284282
- name: nginx-cache
285283
emptyDir: {}
286-
- name: nginx-lib
287-
emptyDir: {}
288284
- name: nginx-includes
289285
emptyDir: {}
290286
---

deploy/manifests/nginx-gateway.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,6 @@ spec:
258258
mountPath: /var/run/nginx
259259
- name: nginx-cache
260260
mountPath: /var/cache/nginx
261-
- name: nginx-lib
262-
mountPath: /var/lib/nginx
263261
- name: nginx-includes
264262
mountPath: /etc/nginx/includes
265263
terminationGracePeriodSeconds: 30
@@ -279,8 +277,6 @@ spec:
279277
emptyDir: {}
280278
- name: nginx-cache
281279
emptyDir: {}
282-
- name: nginx-lib
283-
emptyDir: {}
284280
- name: nginx-includes
285281
emptyDir: {}
286282
---

deploy/manifests/nginx-plus-gateway-experimental.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,6 @@ spec:
269269
mountPath: /var/run/nginx
270270
- name: nginx-cache
271271
mountPath: /var/cache/nginx
272-
- name: nginx-lib
273-
mountPath: /var/lib/nginx
274272
- name: nginx-includes
275273
mountPath: /etc/nginx/includes
276274
terminationGracePeriodSeconds: 30
@@ -290,8 +288,6 @@ spec:
290288
emptyDir: {}
291289
- name: nginx-cache
292290
emptyDir: {}
293-
- name: nginx-lib
294-
emptyDir: {}
295291
- name: nginx-includes
296292
emptyDir: {}
297293
---

deploy/manifests/nginx-plus-gateway.yaml

-4
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,6 @@ spec:
265265
mountPath: /var/run/nginx
266266
- name: nginx-cache
267267
mountPath: /var/cache/nginx
268-
- name: nginx-lib
269-
mountPath: /var/lib/nginx
270268
- name: nginx-includes
271269
mountPath: /etc/nginx/includes
272270
terminationGracePeriodSeconds: 30
@@ -286,8 +284,6 @@ spec:
286284
emptyDir: {}
287285
- name: nginx-cache
288286
emptyDir: {}
289-
- name: nginx-lib
290-
emptyDir: {}
291287
- name: nginx-includes
292288
emptyDir: {}
293289
---

internal/mode/static/nginx/config/servers_template.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,14 @@ server {
9494
{{- end }}
9595
{{ end }}
9696
server {
97-
listen unix:/var/lib/nginx/nginx-502-server.sock;
97+
listen unix:/var/run/nginx/nginx-502-server.sock;
9898
access_log off;
9999
100100
return 502;
101101
}
102102
103103
server {
104-
listen unix:/var/lib/nginx/nginx-500-server.sock;
104+
listen unix:/var/run/nginx/nginx-500-server.sock;
105105
access_log off;
106106
107107
return 500;

internal/mode/static/nginx/config/upstreams.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ var upstreamsTemplate = gotemplate.Must(gotemplate.New("upstreams").Parse(upstre
1313

1414
const (
1515
// nginx502Server is used as a backend for services that cannot be resolved (have no IP address).
16-
nginx502Server = "unix:/var/lib/nginx/nginx-502-server.sock"
16+
nginx502Server = "unix:/var/run/nginx/nginx-502-server.sock"
1717
// nginx500Server is used as a server for the invalid backend ref upstream.
18-
nginx500Server = "unix:/var/lib/nginx/nginx-500-server.sock"
18+
nginx500Server = "unix:/var/run/nginx/nginx-500-server.sock"
1919
// invalidBackendRef is used as an upstream name for invalid backend references.
2020
invalidBackendRef = "invalid-backend-ref"
2121
// ossZoneSize is the upstream zone size for nginx open source.

internal/mode/static/nginx/config/upstreams_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func TestExecuteUpstreams(t *testing.T) {
4444
"upstream invalid-backend-ref",
4545
"server 10.0.0.0:80;",
4646
"server 11.0.0.0:80;",
47-
"server unix:/var/lib/nginx/nginx-502-server.sock;",
47+
"server unix:/var/run/nginx/nginx-502-server.sock;",
4848
}
4949

5050
upstreamResults := gen.executeUpstreams(dataplane.Configuration{Upstreams: stateUpstreams})

site/content/overview/gateway-architecture.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ The following list describes the connections, preceeded by their types in parent
8282
- Write: The _NGINX master_ writes its PID to the `nginx.pid` file stored in the `nginx-run` volume.
8383
- Read: The _NGINX master_ reads _configuration files_ and the _TLS cert and keys_ referenced in the configuration when it starts or during a reload. These files, certificates, and keys are stored in the `nginx-conf` and `nginx-secrets` volumes that are mounted to both the `nginx-gateway` and `nginx` containers.
8484
1. (File I/O)
85-
- Write: The _NGINX master_ writes to the auxiliary Unix sockets folder, which is located in the `/var/lib/nginx`
85+
- Write: The _NGINX master_ writes to the auxiliary Unix sockets folder, which is located in the `/var/run/nginx`
8686
directory.
8787
- Read: The _NGINX master_ reads the `nginx.conf` file from the `/etc/nginx` directory. This [file](https://github.com/nginxinc/nginx-gateway-fabric/blob/v1.3.0/internal/mode/static/nginx/conf/nginx.conf) contains the global and http configuration settings for NGINX. In addition, _NGINX master_ reads the NJS modules referenced in the configuration when it starts or during a reload. NJS modules are stored in the `/usr/lib/nginx/modules` directory.
8888
1. (File I/O) The _NGINX master_ sends logs to its _stdout_ and _stderr_, which are collected by the container runtime.

tests/suite/graceful_recovery_test.go

+22-20
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ const (
2727
)
2828

2929
// Since checkContainerLogsForErrors may experience interference from previous tests (as explained in the function
30-
// documentation), this test is recommended to be run separate from other nfr tests.
30+
// documentation), this test is recommended to be run separate from other tests.
3131
var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "graceful-recovery"), func() {
3232
files := []string{
3333
"graceful-recovery/cafe.yaml",
@@ -81,7 +81,7 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
8181
func() error {
8282
return checkForWorkingTraffic(teaURL, coffeeURL)
8383
}).
84-
WithTimeout(timeoutConfig.RequestTimeout).
84+
WithTimeout(timeoutConfig.RequestTimeout * 2).
8585
WithPolling(500 * time.Millisecond).
8686
Should(Succeed())
8787
})
@@ -96,8 +96,6 @@ var _ = Describe("Graceful Recovery test", Ordered, Label("functional", "gracefu
9696
})
9797

9898
It("recovers when nginx container is restarted", func() {
99-
// FIXME(bjee19) remove Skip() when https://github.com/nginxinc/nginx-gateway-fabric/issues/1108 is completed.
100-
Skip("Test currently fails due to this issue: https://github.com/nginxinc/nginx-gateway-fabric/issues/1108")
10199
runRecoveryTest(teaURL, coffeeURL, ngfPodName, nginxContainerName, files, &ns)
102100
})
103101
})
@@ -154,11 +152,11 @@ func runRecoveryTest(teaURL, coffeeURL, ngfPodName, containerName string, files
154152
func() error {
155153
return checkForWorkingTraffic(teaURL, coffeeURL)
156154
}).
157-
WithTimeout(timeoutConfig.RequestTimeout).
155+
WithTimeout(timeoutConfig.RequestTimeout * 2).
158156
WithPolling(500 * time.Millisecond).
159157
Should(Succeed())
160158

161-
checkContainerLogsForErrors(ngfPodName)
159+
checkContainerLogsForErrors(ngfPodName, containerName == nginxContainerName)
162160
}
163161

164162
func restartContainer(ngfPodName, containerName string) {
@@ -260,15 +258,17 @@ func expectRequestToFail(appURL, address string) error {
260258
// Since this function retrieves all the logs from both containers and the NGF pod may be shared between tests,
261259
// the logs retrieved may contain log messages from previous tests, thus any errors in the logs from previous tests
262260
// may cause an interference with this test and cause this test to fail.
263-
func checkContainerLogsForErrors(ngfPodName string) {
264-
logs, err := resourceManager.GetPodLogs(
261+
// Additionally, when the NGINX process is killed, some errors are expected in the NGF logs while we wait for the
262+
// NGINX container to be restarted.
263+
func checkContainerLogsForErrors(ngfPodName string, checkNginxLogsOnly bool) {
264+
nginxLogs, err := resourceManager.GetPodLogs(
265265
ngfNamespace,
266266
ngfPodName,
267267
&core.PodLogOptions{Container: nginxContainerName},
268268
)
269269
Expect(err).ToNot(HaveOccurred())
270270

271-
for _, line := range strings.Split(logs, "\n") {
271+
for _, line := range strings.Split(nginxLogs, "\n") {
272272
Expect(line).ToNot(ContainSubstring("[crit]"), line)
273273
Expect(line).ToNot(ContainSubstring("[alert]"), line)
274274
Expect(line).ToNot(ContainSubstring("[emerg]"), line)
@@ -281,18 +281,20 @@ func checkContainerLogsForErrors(ngfPodName string) {
281281
}
282282
}
283283

284-
logs, err = resourceManager.GetPodLogs(
285-
ngfNamespace,
286-
ngfPodName,
287-
&core.PodLogOptions{Container: ngfContainerName},
288-
)
289-
Expect(err).ToNot(HaveOccurred())
284+
if !checkNginxLogsOnly {
285+
ngfLogs, err := resourceManager.GetPodLogs(
286+
ngfNamespace,
287+
ngfPodName,
288+
&core.PodLogOptions{Container: ngfContainerName},
289+
)
290+
Expect(err).ToNot(HaveOccurred())
290291

291-
for _, line := range strings.Split(logs, "\n") {
292-
if *plusEnabled && strings.Contains(line, "\"level\":\"error\"") {
293-
Expect(line).To(ContainSubstring("Usage reporting must be enabled when using NGINX Plus"), line)
294-
} else {
295-
Expect(line).ToNot(ContainSubstring("\"level\":\"error\""), line)
292+
for _, line := range strings.Split(ngfLogs, "\n") {
293+
if *plusEnabled && strings.Contains(line, "\"level\":\"error\"") {
294+
Expect(line).To(ContainSubstring("Usage reporting must be enabled when using NGINX Plus"), line)
295+
} else {
296+
Expect(line).ToNot(ContainSubstring("\"level\":\"error\""), line)
297+
}
296298
}
297299
}
298300
}

0 commit comments

Comments
 (0)