Skip to content

Commit 7565e5c

Browse files
zeripathclarfontheyGiteaBot
authored
Implement systemd-notify protocol (#21151)
This PR adds support for the systemd notify protocol. Several status messagess are provided. We should likely add a common notify/status message for graceful. Replaces #21140 Signed-off-by: Andrew Thornton <[email protected]> --------- Signed-off-by: Andrew Thornton <[email protected]> Co-authored-by: ltdk <[email protected]> Co-authored-by: Giteabot <[email protected]>
1 parent a5be7f3 commit 7565e5c

File tree

4 files changed

+161
-13
lines changed

4 files changed

+161
-13
lines changed

contrib/systemd/gitea.service

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ After=network.target
5252
# Uncomment the next line if you have repos with lots of files and get a HTTP 500 error because of that
5353
# LimitNOFILE=524288:524288
5454
RestartSec=2s
55-
Type=simple
55+
Type=notify
5656
User=git
5757
Group=git
5858
WorkingDirectory=/var/lib/gitea/
@@ -62,6 +62,7 @@ WorkingDirectory=/var/lib/gitea/
6262
ExecStart=/usr/local/bin/gitea web --config /etc/gitea/app.ini
6363
Restart=always
6464
Environment=USER=git HOME=/home/git GITEA_WORK_DIR=/var/lib/gitea
65+
WatchdogSec=30s
6566
# If you install Git to directory prefix other than default PATH (which happens
6667
# for example if you install other versions of Git side-to-side with
6768
# distribution version), uncomment below line and add that prefix to PATH

modules/graceful/manager_unix.go

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"os"
1212
"os/signal"
1313
"runtime/pprof"
14+
"strconv"
1415
"sync"
1516
"syscall"
1617
"time"
@@ -45,14 +46,49 @@ type Manager struct {
4546

4647
func newGracefulManager(ctx context.Context) *Manager {
4748
manager := &Manager{
48-
isChild: len(os.Getenv(listenFDs)) > 0 && os.Getppid() > 1,
49+
isChild: len(os.Getenv(listenFDsEnv)) > 0 && os.Getppid() > 1,
4950
lock: &sync.RWMutex{},
5051
}
5152
manager.createServerWaitGroup.Add(numberOfServersToCreate)
5253
manager.start(ctx)
5354
return manager
5455
}
5556

57+
type systemdNotifyMsg string
58+
59+
const (
60+
readyMsg systemdNotifyMsg = "READY=1"
61+
stoppingMsg systemdNotifyMsg = "STOPPING=1"
62+
reloadingMsg systemdNotifyMsg = "RELOADING=1"
63+
watchdogMsg systemdNotifyMsg = "WATCHDOG=1"
64+
)
65+
66+
func statusMsg(msg string) systemdNotifyMsg {
67+
return systemdNotifyMsg("STATUS=" + msg)
68+
}
69+
70+
func pidMsg() systemdNotifyMsg {
71+
return systemdNotifyMsg("MAINPID=" + strconv.Itoa(os.Getpid()))
72+
}
73+
74+
// Notify systemd of status via the notify protocol
75+
func (g *Manager) notify(msg systemdNotifyMsg) {
76+
conn, err := getNotifySocket()
77+
if err != nil {
78+
// the err is logged in getNotifySocket
79+
return
80+
}
81+
if conn == nil {
82+
return
83+
}
84+
defer conn.Close()
85+
86+
if _, err = conn.Write([]byte(msg)); err != nil {
87+
log.Warn("Failed to notify NOTIFY_SOCKET: %v", err)
88+
return
89+
}
90+
}
91+
5692
func (g *Manager) start(ctx context.Context) {
5793
// Make contexts
5894
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx)
@@ -72,6 +108,8 @@ func (g *Manager) start(ctx context.Context) {
72108

73109
// Set the running state & handle signals
74110
g.setState(stateRunning)
111+
g.notify(statusMsg("Starting Gitea"))
112+
g.notify(pidMsg())
75113
go g.handleSignals(g.managerCtx)
76114

77115
// Handle clean up of unused provided listeners and delayed start-up
@@ -84,6 +122,7 @@ func (g *Manager) start(ctx context.Context) {
84122
// Ignore the error here there's not much we can do with it
85123
// They're logged in the CloseProvidedListeners function
86124
_ = CloseProvidedListeners()
125+
g.notify(readyMsg)
87126
}()
88127
if setting.StartupTimeout > 0 {
89128
go func() {
@@ -104,6 +143,8 @@ func (g *Manager) start(ctx context.Context) {
104143
return
105144
case <-time.After(setting.StartupTimeout):
106145
log.Error("Startup took too long! Shutting down")
146+
g.notify(statusMsg("Startup took too long! Shutting down"))
147+
g.notify(stoppingMsg)
107148
g.doShutdown()
108149
}
109150
}()
@@ -126,6 +167,13 @@ func (g *Manager) handleSignals(ctx context.Context) {
126167
syscall.SIGTSTP,
127168
)
128169

170+
watchdogTimeout := getWatchdogTimeout()
171+
t := &time.Ticker{}
172+
if watchdogTimeout != 0 {
173+
g.notify(watchdogMsg)
174+
t = time.NewTicker(watchdogTimeout / 2)
175+
}
176+
129177
pid := syscall.Getpid()
130178
for {
131179
select {
@@ -136,6 +184,7 @@ func (g *Manager) handleSignals(ctx context.Context) {
136184
g.DoGracefulRestart()
137185
case syscall.SIGUSR1:
138186
log.Warn("PID %d. Received SIGUSR1. Releasing and reopening logs", pid)
187+
g.notify(statusMsg("Releasing and reopening logs"))
139188
if err := log.ReleaseReopen(); err != nil {
140189
log.Error("Error whilst releasing and reopening logs: %v", err)
141190
}
@@ -153,6 +202,8 @@ func (g *Manager) handleSignals(ctx context.Context) {
153202
default:
154203
log.Info("PID %d. Received %v.", pid, sig)
155204
}
205+
case <-t.C:
206+
g.notify(watchdogMsg)
156207
case <-ctx.Done():
157208
log.Warn("PID: %d. Background context for manager closed - %v - Shutting down...", pid, ctx.Err())
158209
g.DoGracefulShutdown()
@@ -169,6 +220,9 @@ func (g *Manager) doFork() error {
169220
}
170221
g.forked = true
171222
g.lock.Unlock()
223+
224+
g.notify(reloadingMsg)
225+
172226
// We need to move the file logs to append pids
173227
setting.RestartLogsWithPIDSuffix()
174228

@@ -191,18 +245,27 @@ func (g *Manager) DoGracefulRestart() {
191245
}
192246
} else {
193247
log.Info("PID: %d. Not set restartable. Shutting down...", os.Getpid())
194-
248+
g.notify(stoppingMsg)
195249
g.doShutdown()
196250
}
197251
}
198252

199253
// DoImmediateHammer causes an immediate hammer
200254
func (g *Manager) DoImmediateHammer() {
255+
g.notify(statusMsg("Sending immediate hammer"))
201256
g.doHammerTime(0 * time.Second)
202257
}
203258

204259
// DoGracefulShutdown causes a graceful shutdown
205260
func (g *Manager) DoGracefulShutdown() {
261+
g.lock.Lock()
262+
if !g.forked {
263+
g.lock.Unlock()
264+
g.notify(stoppingMsg)
265+
} else {
266+
g.lock.Unlock()
267+
g.notify(statusMsg("Shutting down after fork"))
268+
}
206269
g.doShutdown()
207270
}
208271

modules/graceful/net_unix.go

Lines changed: 81 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,20 @@ import (
1414
"strconv"
1515
"strings"
1616
"sync"
17+
"time"
1718

1819
"code.gitea.io/gitea/modules/log"
1920
"code.gitea.io/gitea/modules/setting"
2021
"code.gitea.io/gitea/modules/util"
2122
)
2223

2324
const (
24-
listenFDs = "LISTEN_FDS"
25-
startFD = 3
26-
unlinkFDs = "GITEA_UNLINK_FDS"
25+
listenFDsEnv = "LISTEN_FDS"
26+
startFD = 3
27+
unlinkFDsEnv = "GITEA_UNLINK_FDS"
28+
29+
notifySocketEnv = "NOTIFY_SOCKET"
30+
watchdogTimeoutEnv = "WATCHDOG_USEC"
2731
)
2832

2933
// In order to keep the working directory the same as when we started we record
@@ -38,25 +42,62 @@ var (
3842
activeListenersToUnlink = []bool{}
3943
providedListeners = []net.Listener{}
4044
activeListeners = []net.Listener{}
45+
46+
notifySocketAddr string
47+
watchdogTimeout time.Duration
4148
)
4249

4350
func getProvidedFDs() (savedErr error) {
4451
// Only inherit the provided FDS once but we will save the error so that repeated calls to this function will return the same error
4552
once.Do(func() {
4653
mutex.Lock()
4754
defer mutex.Unlock()
55+
// now handle some additional systemd provided things
56+
notifySocketAddr = os.Getenv(notifySocketEnv)
57+
if notifySocketAddr != "" {
58+
log.Debug("Systemd Notify Socket provided: %s", notifySocketAddr)
59+
savedErr = os.Unsetenv(notifySocketEnv)
60+
if savedErr != nil {
61+
log.Warn("Unable to Unset the NOTIFY_SOCKET environment variable: %v", savedErr)
62+
return
63+
}
64+
// FIXME: We don't handle WATCHDOG_PID
65+
timeoutStr := os.Getenv(watchdogTimeoutEnv)
66+
if timeoutStr != "" {
67+
savedErr = os.Unsetenv(watchdogTimeoutEnv)
68+
if savedErr != nil {
69+
log.Warn("Unable to Unset the WATCHDOG_USEC environment variable: %v", savedErr)
70+
return
71+
}
4872

49-
numFDs := os.Getenv(listenFDs)
73+
s, err := strconv.ParseInt(timeoutStr, 10, 64)
74+
if err != nil {
75+
log.Error("Unable to parse the provided WATCHDOG_USEC: %v", err)
76+
savedErr = fmt.Errorf("unable to parse the provided WATCHDOG_USEC: %w", err)
77+
return
78+
}
79+
if s <= 0 {
80+
log.Error("Unable to parse the provided WATCHDOG_USEC: %s should be a positive number", timeoutStr)
81+
savedErr = fmt.Errorf("unable to parse the provided WATCHDOG_USEC: %s should be a positive number", timeoutStr)
82+
return
83+
}
84+
watchdogTimeout = time.Duration(s) * time.Microsecond
85+
}
86+
} else {
87+
log.Trace("No Systemd Notify Socket provided")
88+
}
89+
90+
numFDs := os.Getenv(listenFDsEnv)
5091
if numFDs == "" {
5192
return
5293
}
5394
n, err := strconv.Atoi(numFDs)
5495
if err != nil {
55-
savedErr = fmt.Errorf("%s is not a number: %s. Err: %w", listenFDs, numFDs, err)
96+
savedErr = fmt.Errorf("%s is not a number: %s. Err: %w", listenFDsEnv, numFDs, err)
5697
return
5798
}
5899

59-
fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDs), ",")
100+
fdsToUnlinkStr := strings.Split(os.Getenv(unlinkFDsEnv), ",")
60101
providedListenersToUnlink = make([]bool, n)
61102
for _, fdStr := range fdsToUnlinkStr {
62103
i, err := strconv.Atoi(fdStr)
@@ -73,7 +114,7 @@ func getProvidedFDs() (savedErr error) {
73114
if err == nil {
74115
// Close the inherited file if it's a listener
75116
if err = file.Close(); err != nil {
76-
savedErr = fmt.Errorf("error closing provided socket fd %d: %s", i, err)
117+
savedErr = fmt.Errorf("error closing provided socket fd %d: %w", i, err)
77118
return
78119
}
79120
providedListeners = append(providedListeners, l)
@@ -255,3 +296,36 @@ func getActiveListenersToUnlink() []bool {
255296
copy(listenersToUnlink, activeListenersToUnlink)
256297
return listenersToUnlink
257298
}
299+
300+
func getNotifySocket() (*net.UnixConn, error) {
301+
if err := getProvidedFDs(); err != nil {
302+
// This error will be logged elsewhere
303+
return nil, nil
304+
}
305+
306+
if notifySocketAddr == "" {
307+
return nil, nil
308+
}
309+
310+
socketAddr := &net.UnixAddr{
311+
Name: notifySocketAddr,
312+
Net: "unixgram",
313+
}
314+
315+
notifySocket, err := net.DialUnix(socketAddr.Net, nil, socketAddr)
316+
if err != nil {
317+
log.Warn("failed to dial NOTIFY_SOCKET %s: %v", socketAddr, err)
318+
return nil, err
319+
}
320+
321+
return notifySocket, nil
322+
}
323+
324+
func getWatchdogTimeout() time.Duration {
325+
if err := getProvidedFDs(); err != nil {
326+
// This error will be logged elsewhere
327+
return 0
328+
}
329+
330+
return watchdogTimeout
331+
}

modules/graceful/restart_unix.go

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ import (
1616
"strings"
1717
"sync"
1818
"syscall"
19+
"time"
1920
)
2021

2122
var killParent sync.Once
@@ -70,11 +71,20 @@ func RestartProcess() (int, error) {
7071
// Pass on the environment and replace the old count key with the new one.
7172
var env []string
7273
for _, v := range os.Environ() {
73-
if !strings.HasPrefix(v, listenFDs+"=") {
74+
if !strings.HasPrefix(v, listenFDsEnv+"=") {
7475
env = append(env, v)
7576
}
7677
}
77-
env = append(env, fmt.Sprintf("%s=%d", listenFDs, len(listeners)))
78+
env = append(env, fmt.Sprintf("%s=%d", listenFDsEnv, len(listeners)))
79+
80+
if notifySocketAddr != "" {
81+
env = append(env, fmt.Sprintf("%s=%s", notifySocketEnv, notifySocketAddr))
82+
}
83+
84+
if watchdogTimeout != 0 {
85+
watchdogStr := strconv.FormatInt(int64(watchdogTimeout/time.Millisecond), 10)
86+
env = append(env, fmt.Sprintf("%s=%s", watchdogTimeoutEnv, watchdogStr))
87+
}
7888

7989
sb := &strings.Builder{}
8090
for i, unlink := range getActiveListenersToUnlink() {
@@ -87,7 +97,7 @@ func RestartProcess() (int, error) {
8797
unlinkStr := sb.String()
8898
if len(unlinkStr) > 0 {
8999
unlinkStr = unlinkStr[:len(unlinkStr)-1]
90-
env = append(env, fmt.Sprintf("%s=%s", unlinkFDs, unlinkStr))
100+
env = append(env, fmt.Sprintf("%s=%s", unlinkFDsEnv, unlinkStr))
91101
}
92102

93103
allFiles := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, files...)

0 commit comments

Comments
 (0)