- 论坛徽章:
- 0
|
多谢LS,我后来自己看了下,在父子进程中,有些东西是继承的(复制的),比如信号及信号处理函数,这样才导致死锁现象. 程序的本意就是父进程收到终止信号后,设置Event变量,子进程判断后退出, 因为处理不好(其实是在修別人的bug),导致子进程同样处理此信号, 改成启动子进程之前忽略信号,之后恢复,代码如下,如果还有问题,欢迎斧正.- #!/usr/bin/python
- import os, sys, time, atexit, errno
- import multiprocessing as mp
- from signal import signal
- from signal import getsignal
- from signal import SIGTERM
- from signal import SIGINT
- from signal import SIG_IGN
- exit_flag = mp.Event()
- stdin = "/dev/null"
- stdout = "/dev/null"
- #stderr = "/dev/null"
- stderr = "/tmp/test.log"
- pidfile ="/tmp/test.pid"
- def daemonize():
- global stdin, stdout, stderr, pidfile
- try:
- pid = os.fork()
- if pid > 0:
- sys.exit(0)
- except OSError, e:
- sys.stderr.write("fork #1 failed: %d (%s)\n" % (e.errno, e.strerror))
- sys.exit(1)
- os.chdir("/")
- os.setsid()
- os.umask(0)
- try:
- pid = os.fork()
- if pid > 0:
- sys.exit(0)
- except OSError, e:
- sys.stderr.write("fork #2 failed: %d (%s)\n" % (e.errno, e.strerror))
- sys.exit(1)
- sys.stdout.flush()
- sys.stderr.flush()
- si = file(stdin, 'r')
- so = file(stdout, 'a+')
- se = file(stderr, 'a+', 0)
- os.dup2(si.fileno(), sys.stdin.fileno())
- os.dup2(so.fileno(), sys.stdout.fileno())
- os.dup2(se.fileno(), sys.stderr.fileno())
- atexit.register(delpid)
- pid = str(os.getpid())
- file(pidfile, 'w+').write("%s\n" % pid)
- def delpid():
- os.remove(pidfile)
- def handle_signal():
- signal(SIGINT, handler)
- signal(SIGTERM, handler)
- def handler(signum, frame):
- global exit_flag
- sys.stderr.write("get a signal: %s" % str(signum))
- exit_flag.set()
- def start(run):
- global pidfile
- try:
- pf = file(pidfile, 'r')
- pid = int(pf.read().strip())
- pf.close()
- except IOError:
- pid = None
- if pid:
- message = "pidfile %s already exists. Daemon already running!\n"
- sys.stderr.write(message % pidfile)
- sys.exit(1)
- daemonize()
- handle_signal()
- run()
- def stop():
- global pidfile
- try:
- pf = file(pidfile, 'r')
- pid = int(pf.read().strip())
- pf.close()
- except IOError:
- pid = None
- if not pid:
- message = "pidfile %s does not exist. Daemon not running!\n"
- sys.stderr.write(message % pidfile)
- return
- try:
- while True:
- os.kill(pid, SIGTERM)
- time.sleep(1)
- except OSError, err:
- if err.errno == errno.ESRCH:
- if os.path.exists(pidfile):
- os.remove(pidfile)
- else:
- sys.exit(1)
- def restart(run):
- stop()
- start(run)
- def worker():
- name = mp.current_process().name
- message = "my name:%s,mypid is:%s\n"
- sys.stderr.write(message % (name,os.getpid()))
- while True:
- if exit_flag.is_set():
- break
- def test_start():
- proc_pool = {}
- proc_info = {}
- default_handler = getsignal(SIGTERM)
- #Set signal handling of SIGTERM to ignore mode.
- signal(SIGTERM, SIG_IGN)
- for task in range(3):
- print task
- proc = mp.Process(target=worker)
- proc.start()
- proc_pool[proc.pid] = proc
- ##debug info
- sys.stderr.write('===pid %s: task %s===\n' % (proc.pid, proc.name))
- proc_info[proc.pid] = task
- #proc.join(timeout=1)
- signal(SIGTERM, default_handler)
- while True:
- is_alive = False
- for pid, proc in proc_pool.items():
- #???
- proc.join(timeout=1)
- if proc.is_alive():
- is_alive = True
- else:
- proc_pool.pop(pid)
- sys.stderr.write('===pid %s exit===\n' % (pid))
- if not exit_flag.is_set():
- #start another process for this task!(1)abort (2)???normal quit???
- signal(SIGTERM, SIG_IGN)
- proc1 = mp.Process(target=worker)
- proc1.start()
- signal(SIGTERM, default_handler)
- proc_pool[proc1.pid]=proc1
- sys.stderr.write('===-pid %s: task %s-===\n' % (proc1.pid, proc_info[pid]))
- proc_info[proc1.pid]=proc_info[pid]
- proc_info.pop(pid)
- #proc1.join(timeout=1)
- is_alive = True
- #for k,v in proc_info.items():
- # sys.stderr.write('new===pid %s: task %s===\n' % (k, v))
- #sys.stderr.write('~~~~~~~~~%s\n' % exit_flag.is_set())
-
- if not is_alive:
- break
- def main():
- if len(sys.argv) == 2:
- if "start" == sys.argv[1]:
- start(test_start)
- elif "stop" == sys.argv[1]:
- stop()
- elif "restart" == sys.argv[1]:
- restart(run)
- else:
- print "Unknown Command"
- sys.exit(2)
- sys.exit(0)
- else:
- print "usage: %s start|stop|restart" % sys.argv[0]
- sys.exit(2)
- if __name__ == "__main__":
- main()
复制代码 |
|