Skip to content

Add pprof labels in processes and for lifecycles #19202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 16 additions & 34 deletions modules/graceful/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package graceful

import (
"context"
"runtime/pprof"
"sync"
"time"

Expand Down Expand Up @@ -62,15 +63,13 @@ type WithCallback func(callback func())
// Similarly the callback function provided to atTerminate must return once termination is complete.
// Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals
// - users must therefore be careful to only call these as necessary.
// If run is not expected to run indefinitely RunWithShutdownChan is likely to be more appropriate.
type RunnableWithShutdownFns func(atShutdown, atTerminate func(func()))

// RunWithShutdownFns takes a function that has both atShutdown and atTerminate callbacks
// After the callback to atShutdown is called and is complete, the main function must return.
// Similarly the callback function provided to atTerminate must return once termination is complete.
// Please note that use of the atShutdown and atTerminate callbacks will create go-routines that will wait till their respective signals
// - users must therefore be careful to only call these as necessary.
// If run is not expected to run indefinitely RunWithShutdownChan is likely to be more appropriate.
func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) {
g.runningServerWaitGroup.Add(1)
defer g.runningServerWaitGroup.Done()
Expand Down Expand Up @@ -98,32 +97,6 @@ func (g *Manager) RunWithShutdownFns(run RunnableWithShutdownFns) {
})
}

// RunnableWithShutdownChan is a runnable with functions to run at shutdown and terminate.
// After the atShutdown channel is closed, the main function must return once shutdown is complete.
// (Optionally IsHammer may be waited for instead however, this should be avoided if possible.)
// The callback function provided to atTerminate must return once termination is complete.
// Please note that use of the atTerminate function will create a go-routine that will wait till terminate - users must therefore be careful to only call this as necessary.
type RunnableWithShutdownChan func(atShutdown <-chan struct{}, atTerminate WithCallback)

// RunWithShutdownChan takes a function that has channel to watch for shutdown and atTerminate callbacks
// After the atShutdown channel is closed, the main function must return once shutdown is complete.
// (Optionally IsHammer may be waited for instead however, this should be avoided if possible.)
// The callback function provided to atTerminate must return once termination is complete.
// Please note that use of the atTerminate function will create a go-routine that will wait till terminate - users must therefore be careful to only call this as necessary.
func (g *Manager) RunWithShutdownChan(run RunnableWithShutdownChan) {
g.runningServerWaitGroup.Add(1)
defer g.runningServerWaitGroup.Done()
defer func() {
if err := recover(); err != nil {
log.Critical("PANIC during RunWithShutdownChan: %v\nStacktrace: %s", err, log.Stack(2))
g.doShutdown()
}
}()
run(g.IsShutdown(), func(atTerminate func()) {
g.RunAtTerminate(atTerminate)
})
}

// RunWithShutdownContext takes a function that has a context to watch for shutdown.
// After the provided context is Done(), the main function must return once shutdown is complete.
// (Optionally the HammerContext may be obtained and waited for however, this should be avoided if possible.)
Expand All @@ -136,7 +109,9 @@ func (g *Manager) RunWithShutdownContext(run func(context.Context)) {
g.doShutdown()
}
}()
run(g.ShutdownContext())
ctx := g.ShutdownContext()
pprof.SetGoroutineLabels(ctx) // We don't have a label to restore back to but I think this is fine
run(ctx)
}

// RunAtTerminate adds to the terminate wait group and creates a go-routine to run the provided function at termination
Expand Down Expand Up @@ -198,6 +173,8 @@ func (g *Manager) doShutdown() {
}
g.lock.Lock()
g.shutdownCtxCancel()
atShutdownCtx := pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "post-shutdown"))
pprof.SetGoroutineLabels(atShutdownCtx)
for _, fn := range g.toRunAtShutdown {
go fn()
}
Expand All @@ -214,7 +191,7 @@ func (g *Manager) doShutdown() {
g.doTerminate()
g.WaitForTerminate()
g.lock.Lock()
g.doneCtxCancel()
g.managerCtxCancel()
g.lock.Unlock()
}()
}
Expand All @@ -227,6 +204,8 @@ func (g *Manager) doHammerTime(d time.Duration) {
default:
log.Warn("Setting Hammer condition")
g.hammerCtxCancel()
atHammerCtx := pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "post-hammer"))
pprof.SetGoroutineLabels(atHammerCtx)
for _, fn := range g.toRunAtHammer {
go fn()
}
Expand All @@ -244,6 +223,9 @@ func (g *Manager) doTerminate() {
default:
log.Warn("Terminating")
g.terminateCtxCancel()
atTerminateCtx := pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "post-terminate"))
pprof.SetGoroutineLabels(atTerminateCtx)

for _, fn := range g.toRunAtTerminate {
go fn()
}
Expand Down Expand Up @@ -331,20 +313,20 @@ func (g *Manager) InformCleanup() {

// Done allows the manager to be viewed as a context.Context, it returns a channel that is closed when the server is finished terminating
func (g *Manager) Done() <-chan struct{} {
return g.doneCtx.Done()
return g.managerCtx.Done()
}

// Err allows the manager to be viewed as a context.Context done at Terminate
func (g *Manager) Err() error {
return g.doneCtx.Err()
return g.managerCtx.Err()
}

// Value allows the manager to be viewed as a context.Context done at Terminate
func (g *Manager) Value(key interface{}) interface{} {
return g.doneCtx.Value(key)
return g.managerCtx.Value(key)
}

// Deadline returns nil as there is no fixed Deadline for the manager, it allows the manager to be viewed as a context.Context
func (g *Manager) Deadline() (deadline time.Time, ok bool) {
return g.doneCtx.Deadline()
return g.managerCtx.Deadline()
}
17 changes: 14 additions & 3 deletions modules/graceful/manager_unix.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"errors"
"os"
"os/signal"
"runtime/pprof"
"sync"
"syscall"
"time"
Expand All @@ -29,11 +30,11 @@ type Manager struct {
shutdownCtx context.Context
hammerCtx context.Context
terminateCtx context.Context
doneCtx context.Context
managerCtx context.Context
shutdownCtxCancel context.CancelFunc
hammerCtxCancel context.CancelFunc
terminateCtxCancel context.CancelFunc
doneCtxCancel context.CancelFunc
managerCtxCancel context.CancelFunc
runningServerWaitGroup sync.WaitGroup
createServerWaitGroup sync.WaitGroup
terminateWaitGroup sync.WaitGroup
Expand All @@ -58,7 +59,17 @@ func (g *Manager) start(ctx context.Context) {
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(ctx)
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(ctx)
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(ctx)
g.doneCtx, g.doneCtxCancel = context.WithCancel(ctx)
g.managerCtx, g.managerCtxCancel = context.WithCancel(ctx)

// Next add pprof labels to these contexts
g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate"))
g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown"))
g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer"))
g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager"))

// Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager
pprof.SetGoroutineLabels(g.managerCtx)
defer pprof.SetGoroutineLabels(ctx)

// Set the running state & handle signals
g.setState(stateRunning)
Expand Down
17 changes: 14 additions & 3 deletions modules/graceful/manager_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package graceful
import (
"context"
"os"
"runtime/pprof"
"strconv"
"sync"
"time"
Expand Down Expand Up @@ -40,11 +41,11 @@ type Manager struct {
shutdownCtx context.Context
hammerCtx context.Context
terminateCtx context.Context
doneCtx context.Context
managerCtx context.Context
shutdownCtxCancel context.CancelFunc
hammerCtxCancel context.CancelFunc
terminateCtxCancel context.CancelFunc
doneCtxCancel context.CancelFunc
managerCtxCancel context.CancelFunc
runningServerWaitGroup sync.WaitGroup
createServerWaitGroup sync.WaitGroup
terminateWaitGroup sync.WaitGroup
Expand All @@ -71,7 +72,17 @@ func (g *Manager) start() {
g.terminateCtx, g.terminateCtxCancel = context.WithCancel(g.ctx)
g.shutdownCtx, g.shutdownCtxCancel = context.WithCancel(g.ctx)
g.hammerCtx, g.hammerCtxCancel = context.WithCancel(g.ctx)
g.doneCtx, g.doneCtxCancel = context.WithCancel(g.ctx)
g.managerCtx, g.managerCtxCancel = context.WithCancel(g.ctx)

// Next add pprof labels to these contexts
g.terminateCtx = pprof.WithLabels(g.terminateCtx, pprof.Labels("graceful-lifecycle", "with-terminate"))
g.shutdownCtx = pprof.WithLabels(g.shutdownCtx, pprof.Labels("graceful-lifecycle", "with-shutdown"))
g.hammerCtx = pprof.WithLabels(g.hammerCtx, pprof.Labels("graceful-lifecycle", "with-hammer"))
g.managerCtx = pprof.WithLabels(g.managerCtx, pprof.Labels("graceful-lifecycle", "with-manager"))

// Now label this and all goroutines created by this goroutine with the graceful-lifecycle manager
pprof.SetGoroutineLabels(g.managerCtx)
defer pprof.SetGoroutineLabels(g.ctx)

// Make channels
g.shutdownRequested = make(chan struct{})
Expand Down
19 changes: 11 additions & 8 deletions modules/process/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"fmt"
"io"
"os/exec"
"runtime/pprof"
"sort"
"strconv"
"sync"
Expand Down Expand Up @@ -66,11 +67,9 @@ func GetManager() *Manager {
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
// process table.
func (pm *Manager) AddContext(parent context.Context, description string) (ctx context.Context, cancel context.CancelFunc, finished FinishedFunc) {
parentPID := GetParentPID(parent)

ctx, cancel = context.WithCancel(parent)

pid, finished := pm.Add(parentPID, description, cancel)
ctx, pid, finished := pm.Add(ctx, description, cancel)

return &Context{
Context: ctx,
Expand All @@ -87,11 +86,9 @@ func (pm *Manager) AddContext(parent context.Context, description string) (ctx c
// Most processes will not need to use the cancel function but there will be cases whereby you want to cancel the process but not immediately remove it from the
// process table.
func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Duration, description string) (ctx context.Context, cancel context.CancelFunc, finshed FinishedFunc) {
parentPID := GetParentPID(parent)

ctx, cancel = context.WithTimeout(parent, timeout)

pid, finshed := pm.Add(parentPID, description, cancel)
ctx, pid, finshed := pm.Add(ctx, description, cancel)

return &Context{
Context: ctx,
Expand All @@ -100,7 +97,9 @@ func (pm *Manager) AddContextTimeout(parent context.Context, timeout time.Durati
}

// Add create a new process
func (pm *Manager) Add(parentPID IDType, description string, cancel context.CancelFunc) (IDType, FinishedFunc) {
func (pm *Manager) Add(ctx context.Context, description string, cancel context.CancelFunc) (context.Context, IDType, FinishedFunc) {
parentPID := GetParentPID(ctx)

pm.mutex.Lock()
start, pid := pm.nextPID()

Expand All @@ -120,6 +119,7 @@ func (pm *Manager) Add(parentPID IDType, description string, cancel context.Canc
finished := func() {
cancel()
pm.remove(process)
pprof.SetGoroutineLabels(ctx)
}

if parent != nil {
Expand All @@ -128,7 +128,10 @@ func (pm *Manager) Add(parentPID IDType, description string, cancel context.Canc
pm.processes[pid] = process
pm.mutex.Unlock()

return pid, finished
pprofCtx := pprof.WithLabels(ctx, pprof.Labels("process-description", description, "ppid", string(parentPID), "pid", string(pid)))
pprof.SetGoroutineLabels(pprofCtx)

return pprofCtx, pid, finished
}

// nextPID will return the next available PID. pm.mutex should already be locked.
Expand Down