1
0
Fork 0
forked from forgejo/forgejo

Restore Graceful Restarting & Socket Activation (#7274)

* Prevent deadlock in indexer initialisation during graceful restart

* Move from gracehttp to our own service to add graceful ssh

* Add timeout for start of indexers and make hammer time configurable

* Fix issue with re-initialization in indexer during tests

* move the code to detect use of closed to graceful

* Handle logs gracefully - add a pid suffix just before restart

* Move to using a cond and a holder for indexers

* use time.Since

* Add some comments and attribution

* update modules.txt

* Use zero to disable timeout

* Move RestartProcess to its own file

* Add cleanup routine
This commit is contained in:
zeripath 2019-10-15 14:39:51 +01:00 committed by GitHub
parent 4a290bd64c
commit 167e8f18da
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
45 changed files with 1202 additions and 2009 deletions

View file

@ -0,0 +1,38 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package graceful
import "sync"
var cleanupWaitGroup sync.WaitGroup
func init() {
cleanupWaitGroup = sync.WaitGroup{}
// There are three places that could inherit sockets:
//
// * HTTP or HTTPS main listener
// * HTTP redirection fallback
// * SSH
//
// If you add an additional place you must increment this number
// and add a function to call InformCleanup if it's not going to be used
cleanupWaitGroup.Add(3)
// Wait till we're done getting all of the listeners and then close
// the unused ones
go func() {
cleanupWaitGroup.Wait()
// Ignore the error here there's not much we can do with it
// They're logged in the CloseProvidedListeners function
_ = CloseProvidedListeners()
}()
}
// InformCleanup tells the cleanup wait group that we have either taken a listener
// or will not be taking a listener
func InformCleanup() {
cleanupWaitGroup.Done()
}

209
modules/graceful/net.go Normal file
View file

@ -0,0 +1,209 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// This code is heavily inspired by the archived gofacebook/gracenet/net.go handler
package graceful
import (
"fmt"
"net"
"os"
"strconv"
"strings"
"sync"
"code.gitea.io/gitea/modules/log"
)
const (
listenFDs = "LISTEN_FDS"
startFD = 3
)
// In order to keep the working directory the same as when we started we record
// it at startup.
var originalWD, _ = os.Getwd()
var (
once = sync.Once{}
mutex = sync.Mutex{}
providedListeners = []net.Listener{}
activeListeners = []net.Listener{}
)
func getProvidedFDs() (savedErr error) {
// Only inherit the provided FDS once but we will save the error so that repeated calls to this function will return the same error
once.Do(func() {
mutex.Lock()
defer mutex.Unlock()
numFDs := os.Getenv(listenFDs)
if numFDs == "" {
return
}
n, err := strconv.Atoi(numFDs)
if err != nil {
savedErr = fmt.Errorf("%s is not a number: %s. Err: %v", listenFDs, numFDs, err)
return
}
for i := startFD; i < n+startFD; i++ {
file := os.NewFile(uintptr(i), fmt.Sprintf("listener_FD%d", i))
l, err := net.FileListener(file)
if err == nil {
// Close the inherited file if it's a listener
if err = file.Close(); err != nil {
savedErr = fmt.Errorf("error closing provided socket fd %d: %s", i, err)
return
}
providedListeners = append(providedListeners, l)
continue
}
// If needed we can handle packetconns here.
savedErr = fmt.Errorf("Error getting provided socket fd %d: %v", i, err)
return
}
})
return savedErr
}
// CloseProvidedListeners closes all unused provided listeners.
func CloseProvidedListeners() error {
mutex.Lock()
defer mutex.Unlock()
var returnableError error
for _, l := range providedListeners {
err := l.Close()
if err != nil {
log.Error("Error in closing unused provided listener: %v", err)
if returnableError != nil {
returnableError = fmt.Errorf("%v & %v", returnableError, err)
} else {
returnableError = err
}
}
}
providedListeners = []net.Listener{}
return returnableError
}
// GetListener obtains a listener for the local network address. The network must be
// a stream-oriented network: "tcp", "tcp4", "tcp6", "unix" or "unixpacket". It
// returns an provided net.Listener for the matching network and address, or
// creates a new one using net.Listen.
func GetListener(network, address string) (net.Listener, error) {
// Add a deferral to say that we've tried to grab a listener
defer InformCleanup()
switch network {
case "tcp", "tcp4", "tcp6":
tcpAddr, err := net.ResolveTCPAddr(network, address)
if err != nil {
return nil, err
}
return GetListenerTCP(network, tcpAddr)
case "unix", "unixpacket":
unixAddr, err := net.ResolveUnixAddr(network, address)
if err != nil {
return nil, err
}
return GetListenerUnix(network, unixAddr)
default:
return nil, net.UnknownNetworkError(network)
}
}
// GetListenerTCP announces on the local network address. The network must be:
// "tcp", "tcp4" or "tcp6". It returns a provided net.Listener for the
// matching network and address, or creates a new one using net.ListenTCP.
func GetListenerTCP(network string, address *net.TCPAddr) (*net.TCPListener, error) {
if err := getProvidedFDs(); err != nil {
return nil, err
}
mutex.Lock()
defer mutex.Unlock()
// look for a provided listener
for i, l := range providedListeners {
if isSameAddr(l.Addr(), address) {
providedListeners = append(providedListeners[:i], providedListeners[i+1:]...)
activeListeners = append(activeListeners, l)
return l.(*net.TCPListener), nil
}
}
// no provided listener for this address -> make a fresh listener
l, err := net.ListenTCP(network, address)
if err != nil {
return nil, err
}
activeListeners = append(activeListeners, l)
return l, nil
}
// GetListenerUnix announces on the local network address. The network must be:
// "unix" or "unixpacket". It returns a provided net.Listener for the
// matching network and address, or creates a new one using net.ListenUnix.
func GetListenerUnix(network string, address *net.UnixAddr) (*net.UnixListener, error) {
if err := getProvidedFDs(); err != nil {
return nil, err
}
mutex.Lock()
defer mutex.Unlock()
// look for a provided listener
for i, l := range providedListeners {
if isSameAddr(l.Addr(), address) {
providedListeners = append(providedListeners[:i], providedListeners[i+1:]...)
activeListeners = append(activeListeners, l)
return l.(*net.UnixListener), nil
}
}
// make a fresh listener
l, err := net.ListenUnix(network, address)
if err != nil {
return nil, err
}
activeListeners = append(activeListeners, l)
return l, nil
}
func isSameAddr(a1, a2 net.Addr) bool {
// If the addresses are not on the same network fail.
if a1.Network() != a2.Network() {
return false
}
// If the two addresses have the same string representation they're equal
a1s := a1.String()
a2s := a2.String()
if a1s == a2s {
return true
}
// This allows for ipv6 vs ipv4 local addresses to compare as equal. This
// scenario is common when listening on localhost.
const ipv6prefix = "[::]"
a1s = strings.TrimPrefix(a1s, ipv6prefix)
a2s = strings.TrimPrefix(a2s, ipv6prefix)
const ipv4prefix = "0.0.0.0"
a1s = strings.TrimPrefix(a1s, ipv4prefix)
a2s = strings.TrimPrefix(a2s, ipv4prefix)
return a1s == a2s
}
func getActiveListeners() []net.Listener {
mutex.Lock()
defer mutex.Unlock()
listeners := make([]net.Listener, len(activeListeners))
copy(listeners, activeListeners)
return listeners
}

View file

@ -0,0 +1,67 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// This code is heavily inspired by the archived gofacebook/gracenet/net.go handler
package graceful
import (
"fmt"
"os"
"os/exec"
"strings"
)
// RestartProcess starts a new process passing it the active listeners. It
// doesn't fork, but starts a new process using the same environment and
// arguments as when it was originally started. This allows for a newly
// deployed binary to be started. It returns the pid of the newly started
// process when successful.
func RestartProcess() (int, error) {
listeners := getActiveListeners()
// Extract the fds from the listeners.
files := make([]*os.File, len(listeners))
for i, l := range listeners {
var err error
// Now, all our listeners actually have File() functions so instead of
// individually casting we just use a hacky interface
files[i], err = l.(filer).File()
if err != nil {
return 0, err
}
// Remember to close these at the end.
defer files[i].Close()
}
// Use the original binary location. This works with symlinks such that if
// the file it points to has been changed we will use the updated symlink.
argv0, err := exec.LookPath(os.Args[0])
if err != nil {
return 0, err
}
// Pass on the environment and replace the old count key with the new one.
var env []string
for _, v := range os.Environ() {
if !strings.HasPrefix(v, listenFDs+"=") {
env = append(env, v)
}
}
env = append(env, fmt.Sprintf("%s=%d", listenFDs, len(listeners)))
allFiles := append([]*os.File{os.Stdin, os.Stdout, os.Stderr}, files...)
process, err := os.StartProcess(argv0, os.Args, &os.ProcAttr{
Dir: originalWD,
Env: env,
Files: allFiles,
})
if err != nil {
return 0, err
}
return process.Pid, nil
}
type filer interface {
File() (*os.File, error)
}

267
modules/graceful/server.go Normal file
View file

@ -0,0 +1,267 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
// This code is highly inspired by endless go
package graceful
import (
"crypto/tls"
"net"
"os"
"strings"
"sync"
"syscall"
"time"
"code.gitea.io/gitea/modules/log"
)
type state uint8
const (
stateInit state = iota
stateRunning
stateShuttingDown
stateTerminate
)
var (
// RWMutex for when adding servers or shutting down
runningServerReg sync.RWMutex
// ensure we only fork once
runningServersForked bool
// DefaultReadTimeOut default read timeout
DefaultReadTimeOut time.Duration
// DefaultWriteTimeOut default write timeout
DefaultWriteTimeOut time.Duration
// DefaultMaxHeaderBytes default max header bytes
DefaultMaxHeaderBytes int
// IsChild reports if we are a fork iff LISTEN_FDS is set and our parent PID is not 1
IsChild = len(os.Getenv(listenFDs)) > 0 && os.Getppid() > 1
)
func init() {
runningServerReg = sync.RWMutex{}
DefaultMaxHeaderBytes = 0 // use http.DefaultMaxHeaderBytes - which currently is 1 << 20 (1MB)
}
// ServeFunction represents a listen.Accept loop
type ServeFunction = func(net.Listener) error
// Server represents our graceful server
type Server struct {
network string
address string
listener net.Listener
PreSignalHooks map[os.Signal][]func()
PostSignalHooks map[os.Signal][]func()
wg sync.WaitGroup
sigChan chan os.Signal
state state
lock *sync.RWMutex
BeforeBegin func(network, address string)
OnShutdown func()
}
// NewServer creates a server on network at provided address
func NewServer(network, address string) *Server {
runningServerReg.Lock()
defer runningServerReg.Unlock()
if IsChild {
log.Info("Restarting new server: %s:%s on PID: %d", network, address, os.Getpid())
} else {
log.Info("Starting new server: %s:%s on PID: %d", network, address, os.Getpid())
}
srv := &Server{
wg: sync.WaitGroup{},
sigChan: make(chan os.Signal),
PreSignalHooks: map[os.Signal][]func(){},
PostSignalHooks: map[os.Signal][]func(){},
state: stateInit,
lock: &sync.RWMutex{},
network: network,
address: address,
}
srv.BeforeBegin = func(network, addr string) {
log.Debug("Starting server on %s:%s (PID: %d)", network, addr, syscall.Getpid())
}
return srv
}
// ListenAndServe listens on the provided network address and then calls Serve
// to handle requests on incoming connections.
func (srv *Server) ListenAndServe(serve ServeFunction) error {
go srv.handleSignals()
l, err := GetListener(srv.network, srv.address)
if err != nil {
log.Error("Unable to GetListener: %v", err)
return err
}
srv.listener = newWrappedListener(l, srv)
if IsChild {
_ = syscall.Kill(syscall.Getppid(), syscall.SIGTERM)
}
srv.BeforeBegin(srv.network, srv.address)
return srv.Serve(serve)
}
// ListenAndServeTLS listens on the provided network address and then calls
// Serve to handle requests on incoming TLS connections.
//
// Filenames containing a certificate and matching private key for the server must
// be provided. If the certificate is signed by a certificate authority, the
// certFile should be the concatenation of the server's certificate followed by the
// CA's certificate.
func (srv *Server) ListenAndServeTLS(certFile, keyFile string, serve ServeFunction) error {
config := &tls.Config{}
if config.NextProtos == nil {
config.NextProtos = []string{"http/1.1"}
}
config.Certificates = make([]tls.Certificate, 1)
var err error
config.Certificates[0], err = tls.LoadX509KeyPair(certFile, keyFile)
if err != nil {
log.Error("Failed to load https cert file %s for %s:%s: %v", certFile, srv.network, srv.address, err)
return err
}
return srv.ListenAndServeTLSConfig(config, serve)
}
// ListenAndServeTLSConfig listens on the provided network address and then calls
// Serve to handle requests on incoming TLS connections.
func (srv *Server) ListenAndServeTLSConfig(tlsConfig *tls.Config, serve ServeFunction) error {
go srv.handleSignals()
l, err := GetListener(srv.network, srv.address)
if err != nil {
log.Error("Unable to get Listener: %v", err)
return err
}
wl := newWrappedListener(l, srv)
srv.listener = tls.NewListener(wl, tlsConfig)
if IsChild {
_ = syscall.Kill(syscall.Getppid(), syscall.SIGTERM)
}
srv.BeforeBegin(srv.network, srv.address)
return srv.Serve(serve)
}
// Serve accepts incoming HTTP connections on the wrapped listener l, creating a new
// service goroutine for each. The service goroutines read requests and then call
// handler to reply to them. Handler is typically nil, in which case the
// DefaultServeMux is used.
//
// In addition to the standard Serve behaviour each connection is added to a
// sync.Waitgroup so that all outstanding connections can be served before shutting
// down the server.
func (srv *Server) Serve(serve ServeFunction) error {
defer log.Debug("Serve() returning... (PID: %d)", syscall.Getpid())
srv.setState(stateRunning)
err := serve(srv.listener)
log.Debug("Waiting for connections to finish... (PID: %d)", syscall.Getpid())
srv.wg.Wait()
srv.setState(stateTerminate)
// use of closed means that the listeners are closed - i.e. we should be shutting down - return nil
if err != nil && strings.Contains(err.Error(), "use of closed") {
return nil
}
return err
}
func (srv *Server) getState() state {
srv.lock.RLock()
defer srv.lock.RUnlock()
return srv.state
}
func (srv *Server) setState(st state) {
srv.lock.Lock()
defer srv.lock.Unlock()
srv.state = st
}
type wrappedListener struct {
net.Listener
stopped bool
server *Server
}
func newWrappedListener(l net.Listener, srv *Server) *wrappedListener {
return &wrappedListener{
Listener: l,
server: srv,
}
}
func (wl *wrappedListener) Accept() (net.Conn, error) {
var c net.Conn
// Set keepalive on TCPListeners connections.
if tcl, ok := wl.Listener.(*net.TCPListener); ok {
tc, err := tcl.AcceptTCP()
if err != nil {
return nil, err
}
_ = tc.SetKeepAlive(true) // see http.tcpKeepAliveListener
_ = tc.SetKeepAlivePeriod(3 * time.Minute) // see http.tcpKeepAliveListener
c = tc
} else {
var err error
c, err = wl.Listener.Accept()
if err != nil {
return nil, err
}
}
c = wrappedConn{
Conn: c,
server: wl.server,
}
wl.server.wg.Add(1)
return c, nil
}
func (wl *wrappedListener) Close() error {
if wl.stopped {
return syscall.EINVAL
}
wl.stopped = true
return wl.Listener.Close()
}
func (wl *wrappedListener) File() (*os.File, error) {
// returns a dup(2) - FD_CLOEXEC flag *not* set so the listening socket can be passed to child processes
return wl.Listener.(filer).File()
}
type wrappedConn struct {
net.Conn
server *Server
}
func (w wrappedConn) Close() error {
err := w.Conn.Close()
if err == nil {
w.server.wg.Done()
}
return err
}

View file

@ -0,0 +1,119 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package graceful
import (
"errors"
"fmt"
"os"
"runtime"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
// shutdown closes the listener so that no new connections are accepted
// and starts a goroutine that will hammer (stop all running requests) the server
// after setting.GracefulHammerTime.
func (srv *Server) shutdown() {
// only shutdown if we're running.
if srv.getState() != stateRunning {
return
}
srv.setState(stateShuttingDown)
if setting.GracefulHammerTime >= 0 {
go srv.hammerTime(setting.GracefulHammerTime)
}
if srv.OnShutdown != nil {
srv.OnShutdown()
}
err := srv.listener.Close()
if err != nil {
log.Error("PID: %d Listener.Close() error: %v", os.Getpid(), err)
} else {
log.Info("PID: %d Listener (%s) closed.", os.Getpid(), srv.listener.Addr())
}
}
// hammerTime forces the server to shutdown in a given timeout - whether it
// finished outstanding requests or not. if Read/WriteTimeout are not set or the
// max header size is very big a connection could hang...
//
// srv.Serve() will not return until all connections are served. this will
// unblock the srv.wg.Wait() in Serve() thus causing ListenAndServe* functions to
// return.
func (srv *Server) hammerTime(d time.Duration) {
defer func() {
// We call srv.wg.Done() until it panics.
// This happens if we call Done() when the WaitGroup counter is already at 0
// So if it panics -> we're done, Serve() will return and the
// parent will goroutine will exit.
if r := recover(); r != nil {
log.Error("WaitGroup at 0: Error: %v", r)
}
}()
if srv.getState() != stateShuttingDown {
return
}
time.Sleep(d)
log.Warn("Forcefully shutting down parent")
for {
if srv.getState() == stateTerminate {
break
}
srv.wg.Done()
// Give other goroutines a chance to finish before we forcibly stop them.
runtime.Gosched()
}
}
func (srv *Server) fork() error {
runningServerReg.Lock()
defer runningServerReg.Unlock()
// only one server instance should fork!
if runningServersForked {
return errors.New("another process already forked. Ignoring this one")
}
runningServersForked = true
// We need to move the file logs to append pids
setting.RestartLogsWithPIDSuffix()
_, err := RestartProcess()
return err
}
// RegisterPreSignalHook registers a function to be run before the signal handler for
// a given signal. These are not mutex locked and should therefore be only called before Serve.
func (srv *Server) RegisterPreSignalHook(sig os.Signal, f func()) (err error) {
for _, s := range hookableSignals {
if s == sig {
srv.PreSignalHooks[sig] = append(srv.PreSignalHooks[sig], f)
return
}
}
err = fmt.Errorf("Signal %v is not supported", sig)
return
}
// RegisterPostSignalHook registers a function to be run after the signal handler for
// a given signal. These are not mutex locked and should therefore be only called before Serve.
func (srv *Server) RegisterPostSignalHook(sig os.Signal, f func()) (err error) {
for _, s := range hookableSignals {
if s == sig {
srv.PostSignalHooks[sig] = append(srv.PostSignalHooks[sig], f)
return
}
}
err = fmt.Errorf("Signal %v is not supported", sig)
return
}

View file

@ -0,0 +1,45 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package graceful
import (
"crypto/tls"
"net/http"
)
func newHTTPServer(network, address string, handler http.Handler) (*Server, ServeFunction) {
server := NewServer(network, address)
httpServer := http.Server{
ReadTimeout: DefaultReadTimeOut,
WriteTimeout: DefaultWriteTimeOut,
MaxHeaderBytes: DefaultMaxHeaderBytes,
Handler: handler,
}
server.OnShutdown = func() {
httpServer.SetKeepAlivesEnabled(false)
}
return server, httpServer.Serve
}
// HTTPListenAndServe listens on the provided network address and then calls Serve
// to handle requests on incoming connections.
func HTTPListenAndServe(network, address string, handler http.Handler) error {
server, lHandler := newHTTPServer(network, address, handler)
return server.ListenAndServe(lHandler)
}
// HTTPListenAndServeTLS listens on the provided network address and then calls Serve
// to handle requests on incoming connections.
func HTTPListenAndServeTLS(network, address, certFile, keyFile string, handler http.Handler) error {
server, lHandler := newHTTPServer(network, address, handler)
return server.ListenAndServeTLS(certFile, keyFile, lHandler)
}
// HTTPListenAndServeTLSConfig listens on the provided network address and then calls Serve
// to handle requests on incoming connections.
func HTTPListenAndServeTLSConfig(network, address string, tlsConfig *tls.Config, handler http.Handler) error {
server, lHandler := newHTTPServer(network, address, handler)
return server.ListenAndServeTLSConfig(tlsConfig, lHandler)
}

View file

@ -0,0 +1,93 @@
// Copyright 2019 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package graceful
import (
"os"
"os/signal"
"syscall"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
var hookableSignals []os.Signal
func init() {
hookableSignals = []os.Signal{
syscall.SIGHUP,
syscall.SIGUSR1,
syscall.SIGUSR2,
syscall.SIGINT,
syscall.SIGTERM,
syscall.SIGTSTP,
}
}
// handleSignals listens for os Signals and calls any hooked in function that the
// user had registered with the signal.
func (srv *Server) handleSignals() {
var sig os.Signal
signal.Notify(
srv.sigChan,
hookableSignals...,
)
pid := syscall.Getpid()
for {
sig = <-srv.sigChan
srv.preSignalHooks(sig)
switch sig {
case syscall.SIGHUP:
if setting.GracefulRestartable {
log.Info("PID: %d. Received SIGHUP. Forking...", pid)
err := srv.fork()
if err != nil {
log.Error("Error whilst forking from PID: %d : %v", pid, err)
}
} else {
log.Info("PID: %d. Received SIGHUP. Not set restartable. Shutting down...", pid)
srv.shutdown()
}
case syscall.SIGUSR1:
log.Info("PID %d. Received SIGUSR1.", pid)
case syscall.SIGUSR2:
log.Warn("PID %d. Received SIGUSR2. Hammering...", pid)
srv.hammerTime(0 * time.Second)
case syscall.SIGINT:
log.Warn("PID %d. Received SIGINT. Shutting down...", pid)
srv.shutdown()
case syscall.SIGTERM:
log.Warn("PID %d. Received SIGTERM. Shutting down...", pid)
srv.shutdown()
case syscall.SIGTSTP:
log.Info("PID %d. Received SIGTSTP.")
default:
log.Info("PID %d. Received %v.", sig)
}
srv.postSignalHooks(sig)
}
}
func (srv *Server) preSignalHooks(sig os.Signal) {
if _, notSet := srv.PreSignalHooks[sig]; !notSet {
return
}
for _, f := range srv.PreSignalHooks[sig] {
f()
}
}
func (srv *Server) postSignalHooks(sig os.Signal) {
if _, notSet := srv.PostSignalHooks[sig]; !notSet {
return
}
for _, f := range srv.PostSignalHooks[sig] {
f()
}
}