Skip to content

Commit ed4146d

Browse files
committed
Ensure NGINX reload occurs (nginx#1033)
* Ensure reload occurs
1 parent 787b7c8 commit ed4146d

19 files changed

+438
-60
lines changed

docs/architecture.md

+7-4
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ API to update the handled resources' statuses and emit events.
106106
- Read: *NKG* reads the PID file `nginx.pid` from the `nginx-run` volume, located at `/var/run/nginx`. *NKG*
107107
extracts the PID of the nginx process from this file in order to send reload signals to *NGINX master*.
108108
4. (File I/O) *NKG* writes logs to its *stdout* and *stderr*, which are collected by the container runtime.
109-
5. (HTTP) *NKG* fetches the NGINX metrics via the unix:/var/lib/nginx/nginx-status.sock UNIX socket and converts it to
109+
5. (HTTP) *NKG* fetches the NGINX metrics via the unix:/var/run/nginx/nginx-status.sock UNIX socket and converts it to
110110
*Prometheus* format used in #2.
111111
6. (Signal) To reload NGINX, *NKG* sends the [reload signal][reload] to the **NGINX master**.
112112
7. (File I/O)
@@ -124,9 +124,12 @@ API to update the handled resources' statuses and emit events.
124124
9. (File I/O) The *NGINX master* sends logs to its *stdout* and *stderr*, which are collected by the container runtime.
125125
10. (File I/O) An *NGINX worker* writes logs to its *stdout* and *stderr*, which are collected by the container runtime.
126126
11. (Signal) The *NGINX master* controls the [lifecycle of *NGINX workers*][lifecycle] it creates workers with the new
127-
configuration and shutdowns workers with the old configuration.
128-
12. (HTTP,HTTPS) A *client* sends traffic to and receives traffic from any of the *NGINX workers* on ports 80 and 443.
129-
13. (HTTP,HTTPS) An *NGINX worker* sends traffic to and receives traffic from the *backends*.
127+
configuration and shutdowns workers with the old configuration.
128+
12. (HTTP) To consider a configuration reload a success, *NKG* ensures that at least one NGINX worker has the new
129+
configuration. To do that, *NKG* checks a particular endpoint via the unix:/var/run/nginx/nginx-config-version.sock
130+
UNIX socket.
131+
13. (HTTP,HTTPS) A *client* sends traffic to and receives traffic from any of the *NGINX workers* on ports 80 and 443.
132+
14. (HTTP,HTTPS) An *NGINX worker* sends traffic to and receives traffic from the *backends*.
130133

131134
[controller]: https://kubernetes.io/docs/concepts/architecture/controller/
132135

docs/images/nkg-pod.png

6.15 KB
Loading

internal/mode/static/handler.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ type eventHandlerConfig struct {
4545
logger logr.Logger
4646
// controlConfigNSName is the NamespacedName of the NginxGateway config for this controller.
4747
controlConfigNSName types.NamespacedName
48+
// version is the current version number of the nginx config.
49+
version int
4850
}
4951

5052
// eventHandlerImpl implements EventHandler.
@@ -90,7 +92,11 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev
9092
}
9193

9294
var nginxReloadRes nginxReloadResult
93-
if err := h.updateNginx(ctx, dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver)); err != nil {
95+
h.cfg.version++
96+
if err := h.updateNginx(
97+
ctx,
98+
dataplane.BuildConfiguration(ctx, graph, h.cfg.serviceResolver, h.cfg.version),
99+
); err != nil {
94100
h.cfg.logger.Error(err, "Failed to update NGINX configuration")
95101
nginxReloadRes.error = err
96102
} else {
@@ -107,7 +113,7 @@ func (h *eventHandlerImpl) updateNginx(ctx context.Context, conf dataplane.Confi
107113
return fmt.Errorf("failed to replace NGINX configuration files: %w", err)
108114
}
109115

110-
if err := h.cfg.nginxRuntimeMgr.Reload(ctx); err != nil {
116+
if err := h.cfg.nginxRuntimeMgr.Reload(ctx, conf.Version); err != nil {
111117
return fmt.Errorf("failed to reload NGINX: %w", err)
112118
}
113119

internal/mode/static/handler_test.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ var _ = Describe("eventHandler", func() {
111111
handler.HandleEventBatch(context.Background(), batch)
112112

113113
checkUpsertEventExpectations(e)
114-
expectReconfig(dataplane.Configuration{}, fakeCfgFiles)
114+
expectReconfig(dataplane.Configuration{Version: 1}, fakeCfgFiles)
115115
})
116116

117117
It("should process Delete", func() {
@@ -124,7 +124,7 @@ var _ = Describe("eventHandler", func() {
124124
handler.HandleEventBatch(context.Background(), batch)
125125

126126
checkDeleteEventExpectations(e)
127-
expectReconfig(dataplane.Configuration{}, fakeCfgFiles)
127+
expectReconfig(dataplane.Configuration{Version: 1}, fakeCfgFiles)
128128
})
129129
})
130130

internal/mode/static/nginx/config/generator.go

+16
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ const (
2020

2121
// httpConfigFile is the path to the configuration file with HTTP configuration.
2222
httpConfigFile = httpFolder + "/http.conf"
23+
24+
// configVersionFile is the path to the config version configuration file.
25+
configVersionFile = httpFolder + "/config-version.conf"
2326
)
2427

2528
// ConfigFolders is a list of folders where NGINX configuration files are stored.
@@ -63,6 +66,8 @@ func (g GeneratorImpl) Generate(conf dataplane.Configuration) []file.File {
6366

6467
files = append(files, generateHTTPConfig(conf))
6568

69+
files = append(files, generateConfigVersion(conf.Version))
70+
6671
return files
6772
}
6873

@@ -104,3 +109,14 @@ func getExecuteFuncs() []executeFunc {
104109
executeMaps,
105110
}
106111
}
112+
113+
// generateConfigVersion writes the config version file.
114+
func generateConfigVersion(configVersion int) file.File {
115+
c := executeVersion(configVersion)
116+
117+
return file.File{
118+
Content: c,
119+
Path: configVersionFile,
120+
Type: file.TypeRegular,
121+
}
122+
}

internal/mode/static/nginx/config/generator_test.go

+7-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package config_test
22

33
import (
4+
"fmt"
45
"testing"
56

67
. "github.com/onsi/gomega"
@@ -65,7 +66,7 @@ func TestGenerate(t *testing.T) {
6566

6667
files := generator.Generate(conf)
6768

68-
g.Expect(files).To(HaveLen(2))
69+
g.Expect(files).To(HaveLen(3))
6970

7071
g.Expect(files[0]).To(Equal(file.File{
7172
Type: file.TypeSecret,
@@ -82,4 +83,9 @@ func TestGenerate(t *testing.T) {
8283
g.Expect(httpCfg).To(ContainSubstring("listen 443"))
8384
g.Expect(httpCfg).To(ContainSubstring("upstream"))
8485
g.Expect(httpCfg).To(ContainSubstring("split_clients"))
86+
87+
g.Expect(files[2].Type).To(Equal(file.TypeRegular))
88+
g.Expect(files[2].Path).To(Equal("/etc/nginx/conf.d/config-version.conf"))
89+
configVersion := string(files[2].Content)
90+
g.Expect(configVersion).To(ContainSubstring(fmt.Sprintf("return 200 %d", conf.Version)))
8591
}

internal/mode/static/nginx/config/servers_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -79,9 +79,9 @@ func TestExecuteServers(t *testing.T) {
7979
func TestExecuteForDefaultServers(t *testing.T) {
8080
testcases := []struct {
8181
msg string
82-
conf dataplane.Configuration
8382
httpPorts []int
8483
sslPorts []int
84+
conf dataplane.Configuration
8585
}{
8686
{
8787
conf: dataplane.Configuration{},
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
package config
2+
3+
import (
4+
gotemplate "text/template"
5+
)
6+
7+
var versionTemplate = gotemplate.Must(gotemplate.New("version").Parse(versionTemplateText))
8+
9+
func executeVersion(version int) []byte {
10+
return execute(versionTemplate, version)
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package config
2+
3+
var versionTemplateText = `
4+
server {
5+
listen unix:/var/run/nginx/nginx-config-version.sock;
6+
access_log off;
7+
8+
location /version {
9+
return 200 {{.}};
10+
}
11+
}
12+
`
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
package config
2+
3+
import (
4+
"strings"
5+
"testing"
6+
7+
. "github.com/onsi/gomega"
8+
)
9+
10+
func TestExecuteVersion(t *testing.T) {
11+
g := NewWithT(t)
12+
expSubStrings := map[string]int{
13+
"return 200 42;": 1,
14+
}
15+
16+
maps := string(executeVersion(42))
17+
for expSubStr, expCount := range expSubStrings {
18+
g.Expect(expCount).To(Equal(strings.Count(maps, expSubStr)))
19+
}
20+
}

internal/mode/static/nginx/runtime/manager.go

+61-18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package runtime
22

33
import (
4+
"bytes"
45
"context"
56
"errors"
67
"fmt"
@@ -15,58 +16,73 @@ import (
1516
)
1617

1718
const (
18-
pidFile = "/var/run/nginx/nginx.pid"
19-
pidFileTimeout = 10 * time.Second
19+
pidFile = "/var/run/nginx/nginx.pid"
20+
pidFileTimeout = 10000 * time.Millisecond
21+
childProcsTimeout = 1000 * time.Millisecond
22+
nginxReloadTimeout = 60000 * time.Millisecond
2023
)
2124

2225
type (
2326
readFileFunc func(string) ([]byte, error)
2427
checkFileFunc func(string) (fs.FileInfo, error)
2528
)
2629

30+
var (
31+
noNewWorkersErrFmt = "reload unsuccessful: no new NGINX worker processes started for config version %d." +
32+
" Please check the NGINX container logs for possible configuration issues: %w"
33+
childProcPathFmt = "/proc/%[1]v/task/%[1]v/children"
34+
)
35+
2736
//go:generate go run github.com/maxbrunsfeld/counterfeiter/v6 . Manager
2837

2938
// Manager manages the runtime of NGINX.
3039
type Manager interface {
3140
// Reload reloads NGINX configuration. It is a blocking operation.
32-
Reload(ctx context.Context) error
41+
Reload(ctx context.Context, configVersion int) error
3342
}
3443

3544
// ManagerImpl implements Manager.
36-
type ManagerImpl struct{}
45+
type ManagerImpl struct {
46+
verifyClient *verifyClient
47+
}
3748

3849
// NewManagerImpl creates a new ManagerImpl.
3950
func NewManagerImpl() *ManagerImpl {
40-
return &ManagerImpl{}
51+
return &ManagerImpl{
52+
verifyClient: newVerifyClient(nginxReloadTimeout),
53+
}
4154
}
4255

43-
func (m *ManagerImpl) Reload(ctx context.Context) error {
56+
func (m *ManagerImpl) Reload(ctx context.Context, configVersion int) error {
4457
// We find the main NGINX PID on every reload because it will change if the NGINX container is restarted.
4558
pid, err := findMainProcess(ctx, os.Stat, os.ReadFile, pidFileTimeout)
4659
if err != nil {
4760
return fmt.Errorf("failed to find NGINX main process: %w", err)
4861
}
4962

63+
childProcFile := fmt.Sprintf(childProcPathFmt, pid)
64+
previousChildProcesses, err := os.ReadFile(childProcFile)
65+
if err != nil {
66+
return err
67+
}
68+
5069
// send HUP signal to the NGINX main process reload configuration
5170
// See https://nginx.org/en/docs/control.html
5271
if err := syscall.Kill(pid, syscall.SIGHUP); err != nil {
5372
return fmt.Errorf("failed to send the HUP signal to NGINX main: %w", err)
5473
}
5574

56-
// FIXME(pleshakov)
57-
// (1) ensure the reload actually happens.
58-
// https://github.com/nginxinc/nginx-kubernetes-gateway/issues/664
59-
60-
// for now, to prevent a subsequent reload starting before the in-flight reload finishes, we simply sleep.
61-
// Fixing (1) will make the sleep unnecessary.
62-
63-
select {
64-
case <-ctx.Done():
65-
return nil
66-
case <-time.After(1 * time.Second):
75+
if err := ensureNewNginxWorkers(
76+
ctx,
77+
childProcFile,
78+
previousChildProcesses,
79+
os.ReadFile,
80+
childProcsTimeout,
81+
); err != nil {
82+
return fmt.Errorf(noNewWorkersErrFmt, configVersion, err)
6783
}
6884

69-
return nil
85+
return m.verifyClient.waitForCorrectVersion(ctx, configVersion)
7086
}
7187

7288
// EnsureNginxRunning ensures NGINX is running by locating the main process.
@@ -116,3 +132,30 @@ func findMainProcess(
116132

117133
return pid, nil
118134
}
135+
136+
func ensureNewNginxWorkers(
137+
ctx context.Context,
138+
childProcFile string,
139+
previousContents []byte,
140+
readFile readFileFunc,
141+
timeout time.Duration,
142+
) error {
143+
ctx, cancel := context.WithTimeout(ctx, timeout)
144+
defer cancel()
145+
146+
return wait.PollUntilContextCancel(
147+
ctx,
148+
25*time.Millisecond,
149+
true, /* poll immediately */
150+
func(ctx context.Context) (bool, error) {
151+
content, err := readFile(childProcFile)
152+
if err != nil {
153+
return false, err
154+
}
155+
if !bytes.Equal(previousContents, content) {
156+
return true, nil
157+
}
158+
return false, nil
159+
},
160+
)
161+
}

0 commit comments

Comments
 (0)