427 lines
13 KiB
Python
427 lines
13 KiB
Python
# -*- test-case-name: twisted.runner.test.test_procmon -*-
|
|
# Copyright (c) Twisted Matrix Laboratories.
|
|
# See LICENSE for details.
|
|
|
|
"""
|
|
Support for starting, monitoring, and restarting child process.
|
|
"""
|
|
import attr
|
|
import incremental
|
|
|
|
from twisted.python import deprecate
|
|
from twisted.internet import error, protocol, reactor as _reactor
|
|
from twisted.application import service
|
|
from twisted.protocols import basic
|
|
from twisted.logger import Logger
|
|
|
|
|
|
|
|
@attr.s(frozen=True)
|
|
class _Process(object):
|
|
"""
|
|
The parameters of a process to be restarted.
|
|
|
|
@ivar args: command-line arguments (including name of command as first one)
|
|
@type args: C{list}
|
|
|
|
@ivar uid: user-id to run process as, or None (which means inherit uid)
|
|
@type uid: C{int}
|
|
|
|
@ivar gid: group-id to run process as, or None (which means inherit gid)
|
|
@type gid: C{int}
|
|
|
|
@ivar env: environment for process
|
|
@type env: C{dict}
|
|
|
|
@ivar cwd: initial working directory for process or None
|
|
(which means inherit cwd)
|
|
@type cwd: C{str}
|
|
"""
|
|
|
|
args = attr.ib()
|
|
uid = attr.ib(default=None)
|
|
gid = attr.ib(default=None)
|
|
env = attr.ib(default=attr.Factory(dict))
|
|
cwd = attr.ib(default=None)
|
|
|
|
@deprecate.deprecated(incremental.Version('Twisted', 18, 7, 0))
|
|
def toTuple(self):
|
|
"""
|
|
Convert process to tuple.
|
|
|
|
Convert process to tuple that looks like the legacy structure
|
|
of processes, for potential users who inspected processes
|
|
directly.
|
|
|
|
This was only an accidental feature, and will be removed. If
|
|
you need to remember what processes were added to a process monitor,
|
|
keep track of that when they are added. The process list
|
|
inside the process monitor is no longer a public API.
|
|
|
|
This allows changing the internal structure of the process list,
|
|
when warranted by bug fixes or additional features.
|
|
|
|
@return: tuple representation of process
|
|
"""
|
|
return (self.args, self.uid, self.gid, self.env)
|
|
|
|
|
|
|
|
class DummyTransport:
|
|
|
|
disconnecting = 0
|
|
|
|
|
|
|
|
transport = DummyTransport()
|
|
|
|
|
|
|
|
class LineLogger(basic.LineReceiver):
|
|
|
|
tag = None
|
|
stream = None
|
|
delimiter = b'\n'
|
|
service = None
|
|
|
|
def lineReceived(self, line):
|
|
try:
|
|
line = line.decode('utf-8')
|
|
except UnicodeDecodeError:
|
|
line = repr(line)
|
|
|
|
self.service.log.info(u'[{tag}] {line}',
|
|
tag=self.tag,
|
|
line=line,
|
|
stream=self.stream)
|
|
|
|
|
|
|
|
class LoggingProtocol(protocol.ProcessProtocol):
|
|
|
|
service = None
|
|
name = None
|
|
|
|
def connectionMade(self):
|
|
self._output = LineLogger()
|
|
self._output.tag = self.name
|
|
self._output.stream = 'stdout'
|
|
self._output.service = self.service
|
|
self._outputEmpty = True
|
|
|
|
self._error = LineLogger()
|
|
self._error.tag = self.name
|
|
self._error.stream = 'stderr'
|
|
self._error.service = self.service
|
|
self._errorEmpty = True
|
|
|
|
self._output.makeConnection(transport)
|
|
self._error.makeConnection(transport)
|
|
|
|
|
|
def outReceived(self, data):
|
|
self._output.dataReceived(data)
|
|
self._outputEmpty = data[-1] == b'\n'
|
|
|
|
def errReceived(self, data):
|
|
self._error.dataReceived(data)
|
|
self._errorEmpty = data[-1] == b'\n'
|
|
|
|
def processEnded(self, reason):
|
|
if not self._outputEmpty:
|
|
self._output.dataReceived(b'\n')
|
|
if not self._errorEmpty:
|
|
self._error.dataReceived(b'\n')
|
|
self.service.connectionLost(self.name)
|
|
|
|
@property
|
|
def output(self):
|
|
return self._output
|
|
|
|
@property
|
|
def empty(self):
|
|
return self._outputEmpty
|
|
|
|
|
|
|
|
class ProcessMonitor(service.Service):
|
|
"""
|
|
ProcessMonitor runs processes, monitors their progress, and restarts
|
|
them when they die.
|
|
|
|
The ProcessMonitor will not attempt to restart a process that appears to
|
|
die instantly -- with each "instant" death (less than 1 second, by
|
|
default), it will delay approximately twice as long before restarting
|
|
it. A successful run will reset the counter.
|
|
|
|
The primary interface is L{addProcess} and L{removeProcess}. When the
|
|
service is running (that is, when the application it is attached to is
|
|
running), adding a process automatically starts it.
|
|
|
|
Each process has a name. This name string must uniquely identify the
|
|
process. In particular, attempting to add two processes with the same
|
|
name will result in a C{KeyError}.
|
|
|
|
@type threshold: C{float}
|
|
@ivar threshold: How long a process has to live before the death is
|
|
considered instant, in seconds. The default value is 1 second.
|
|
|
|
@type killTime: C{float}
|
|
@ivar killTime: How long a process being killed has to get its affairs
|
|
in order before it gets killed with an unmaskable signal. The
|
|
default value is 5 seconds.
|
|
|
|
@type minRestartDelay: C{float}
|
|
@ivar minRestartDelay: The minimum time (in seconds) to wait before
|
|
attempting to restart a process. Default 1s.
|
|
|
|
@type maxRestartDelay: C{float}
|
|
@ivar maxRestartDelay: The maximum time (in seconds) to wait before
|
|
attempting to restart a process. Default 3600s (1h).
|
|
|
|
@type _reactor: L{IReactorProcess} provider
|
|
@ivar _reactor: A provider of L{IReactorProcess} and L{IReactorTime}
|
|
which will be used to spawn processes and register delayed calls.
|
|
|
|
@type log: L{Logger}
|
|
@ivar log: The logger used to propagate log messages from spawned
|
|
processes.
|
|
|
|
"""
|
|
threshold = 1
|
|
killTime = 5
|
|
minRestartDelay = 1
|
|
maxRestartDelay = 3600
|
|
log = Logger()
|
|
|
|
|
|
def __init__(self, reactor=_reactor):
|
|
self._reactor = reactor
|
|
|
|
self._processes = {}
|
|
self.protocols = {}
|
|
self.delay = {}
|
|
self.timeStarted = {}
|
|
self.murder = {}
|
|
self.restart = {}
|
|
|
|
|
|
@deprecate.deprecatedProperty(incremental.Version('Twisted', 18, 7, 0))
|
|
def processes(self):
|
|
"""
|
|
Processes as dict of tuples
|
|
|
|
@return: Dict of process name to monitored processes as tuples
|
|
"""
|
|
return {name: process.toTuple()
|
|
for name, process in self._processes.items()}
|
|
|
|
|
|
@deprecate.deprecated(incremental.Version('Twisted', 18, 7, 0))
|
|
def __getstate__(self):
|
|
dct = service.Service.__getstate__(self)
|
|
del dct['_reactor']
|
|
dct['protocols'] = {}
|
|
dct['delay'] = {}
|
|
dct['timeStarted'] = {}
|
|
dct['murder'] = {}
|
|
dct['restart'] = {}
|
|
del dct['_processes']
|
|
dct['processes'] = self.processes
|
|
return dct
|
|
|
|
|
|
def addProcess(self, name, args, uid=None, gid=None, env={}, cwd=None):
|
|
"""
|
|
Add a new monitored process and start it immediately if the
|
|
L{ProcessMonitor} service is running.
|
|
|
|
Note that args are passed to the system call, not to the shell. If
|
|
running the shell is desired, the common idiom is to use
|
|
C{ProcessMonitor.addProcess("name", ['/bin/sh', '-c', shell_script])}
|
|
|
|
@param name: A name for this process. This value must be
|
|
unique across all processes added to this monitor.
|
|
@type name: C{str}
|
|
@param args: The argv sequence for the process to launch.
|
|
@param uid: The user ID to use to run the process. If L{None},
|
|
the current UID is used.
|
|
@type uid: C{int}
|
|
@param gid: The group ID to use to run the process. If L{None},
|
|
the current GID is used.
|
|
@type uid: C{int}
|
|
@param env: The environment to give to the launched process. See
|
|
L{IReactorProcess.spawnProcess}'s C{env} parameter.
|
|
@type env: C{dict}
|
|
@param cwd: The initial working directory of the launched process.
|
|
The default of C{None} means inheriting the laucnhing process's
|
|
working directory.
|
|
@type env: C{dict}
|
|
@raises: C{KeyError} if a process with the given name already
|
|
exists
|
|
"""
|
|
if name in self._processes:
|
|
raise KeyError("remove %s first" % (name,))
|
|
self._processes[name] = _Process(args, uid, gid, env, cwd)
|
|
self.delay[name] = self.minRestartDelay
|
|
if self.running:
|
|
self.startProcess(name)
|
|
|
|
|
|
def removeProcess(self, name):
|
|
"""
|
|
Stop the named process and remove it from the list of monitored
|
|
processes.
|
|
|
|
@type name: C{str}
|
|
@param name: A string that uniquely identifies the process.
|
|
"""
|
|
self.stopProcess(name)
|
|
del self._processes[name]
|
|
|
|
|
|
def startService(self):
|
|
"""
|
|
Start all monitored processes.
|
|
"""
|
|
service.Service.startService(self)
|
|
for name in list(self._processes):
|
|
self.startProcess(name)
|
|
|
|
|
|
def stopService(self):
|
|
"""
|
|
Stop all monitored processes and cancel all scheduled process restarts.
|
|
"""
|
|
service.Service.stopService(self)
|
|
|
|
# Cancel any outstanding restarts
|
|
for name, delayedCall in list(self.restart.items()):
|
|
if delayedCall.active():
|
|
delayedCall.cancel()
|
|
|
|
for name in list(self._processes):
|
|
self.stopProcess(name)
|
|
|
|
|
|
def connectionLost(self, name):
|
|
"""
|
|
Called when a monitored processes exits. If
|
|
L{service.IService.running} is L{True} (ie the service is started), the
|
|
process will be restarted.
|
|
If the process had been running for more than
|
|
L{ProcessMonitor.threshold} seconds it will be restarted immediately.
|
|
If the process had been running for less than
|
|
L{ProcessMonitor.threshold} seconds, the restart will be delayed and
|
|
each time the process dies before the configured threshold, the restart
|
|
delay will be doubled - up to a maximum delay of maxRestartDelay sec.
|
|
|
|
@type name: C{str}
|
|
@param name: A string that uniquely identifies the process
|
|
which exited.
|
|
"""
|
|
# Cancel the scheduled _forceStopProcess function if the process
|
|
# dies naturally
|
|
if name in self.murder:
|
|
if self.murder[name].active():
|
|
self.murder[name].cancel()
|
|
del self.murder[name]
|
|
|
|
del self.protocols[name]
|
|
|
|
if self._reactor.seconds() - self.timeStarted[name] < self.threshold:
|
|
# The process died too fast - backoff
|
|
nextDelay = self.delay[name]
|
|
self.delay[name] = min(self.delay[name] * 2, self.maxRestartDelay)
|
|
|
|
else:
|
|
# Process had been running for a significant amount of time
|
|
# restart immediately
|
|
nextDelay = 0
|
|
self.delay[name] = self.minRestartDelay
|
|
|
|
# Schedule a process restart if the service is running
|
|
if self.running and name in self._processes:
|
|
self.restart[name] = self._reactor.callLater(nextDelay,
|
|
self.startProcess,
|
|
name)
|
|
|
|
|
|
def startProcess(self, name):
|
|
"""
|
|
@param name: The name of the process to be started
|
|
"""
|
|
# If a protocol instance already exists, it means the process is
|
|
# already running
|
|
if name in self.protocols:
|
|
return
|
|
|
|
process = self._processes[name]
|
|
|
|
proto = LoggingProtocol()
|
|
proto.service = self
|
|
proto.name = name
|
|
self.protocols[name] = proto
|
|
self.timeStarted[name] = self._reactor.seconds()
|
|
self._reactor.spawnProcess(proto, process.args[0], process.args,
|
|
uid=process.uid, gid=process.gid,
|
|
env=process.env, path=process.cwd)
|
|
|
|
|
|
def _forceStopProcess(self, proc):
|
|
"""
|
|
@param proc: An L{IProcessTransport} provider
|
|
"""
|
|
try:
|
|
proc.signalProcess('KILL')
|
|
except error.ProcessExitedAlready:
|
|
pass
|
|
|
|
|
|
def stopProcess(self, name):
|
|
"""
|
|
@param name: The name of the process to be stopped
|
|
"""
|
|
if name not in self._processes:
|
|
raise KeyError('Unrecognized process name: %s' % (name,))
|
|
|
|
proto = self.protocols.get(name, None)
|
|
if proto is not None:
|
|
proc = proto.transport
|
|
try:
|
|
proc.signalProcess('TERM')
|
|
except error.ProcessExitedAlready:
|
|
pass
|
|
else:
|
|
self.murder[name] = self._reactor.callLater(
|
|
self.killTime,
|
|
self._forceStopProcess, proc)
|
|
|
|
|
|
def restartAll(self):
|
|
"""
|
|
Restart all processes. This is useful for third party management
|
|
services to allow a user to restart servers because of an outside change
|
|
in circumstances -- for example, a new version of a library is
|
|
installed.
|
|
"""
|
|
for name in self._processes:
|
|
self.stopProcess(name)
|
|
|
|
|
|
def __repr__(self):
|
|
l = []
|
|
for name, proc in self._processes.items():
|
|
uidgid = ''
|
|
if proc.uid is not None:
|
|
uidgid = str(proc.uid)
|
|
if proc.gid is not None:
|
|
uidgid += ':'+str(proc.gid)
|
|
|
|
if uidgid:
|
|
uidgid = '(' + uidgid + ')'
|
|
l.append('%r%s: %r' % (name, uidgid, proc.args))
|
|
return ('<' + self.__class__.__name__ + ' '
|
|
+ ' '.join(l)
|
|
+ '>')
|