- 论坛徽章:
- 0
|
本帖最后由 KevinLee39 于 2010-10-03 17:07 编辑
我写的一个调度程序, 测试时有时候会产生僵尸进程(大概1/5的概率吧), 而且碰到这种情况, 主进程也停住不动了, 不知道停在哪里, 实在找不出原因了.
测试运行情况:
perl diaodu.pl cs 4
开始时间: 2010-10-03 15:19:11 pid: 7324 Version: 1.2.r33 2010-09-26
child 00 branch: 650100 pid: 7325 2010-10-03 15:19:11
child 01 branch: 650200 pid: 7326 2010-10-03 15:19:11
child 02 branch: 652100 pid: 7327 2010-10-03 15:19:11
另一个窗口里看进程的情况:
xjv8js 7327 7324 1 15:19:11 pts/ts 0:00 <defunct>
xjv8js 7325 7324 1 15:19:11 pts/ts 0:00 <defunct>
xjv8js 7527 11176 0 15:24:15 pts/tk 0:00 grep 7324
xjv8js 7328 7324 1 15:19:11 pts/ts 0:00 <defunct>
xjv8js 7324 8015 0 15:19:11 pts/ts 0:00 perl diaodu.pl cs 4
xjv8js 7326 7324 1 15:19:11 pts/ts 0:00 <defunct>
源码:
- #! /usr/bin/perl
- #
- my @branch = qw(
- 650100 650200 652100 652200 652300 652700 652800 652900
- 653000 653100 653200 654000 654100 654200 654300 659000
- );
- my $startdate = '20100101'; # 采集开始日期
- my $enddate = '20100930'; # 采集结束日期
- (our $REV = '$LastChangedRevision:: 33 ) =~ s/\D+(\d+)\D+/$1/g;
- (our $REVDATE='$LastChangedDate:: 2010-09-26#)=~s/.+(\d{4}-\d\d-\d\d).+/$1/g;
- (our $VERSION = '1.2.r'.$REV);
- use warnings;
- #use strict;
- use Cwd;
- use FindBin qw($Bin);
- use POSIX;
- use POSIX ":sys_wait_h";
- sub usage {
- print "
- Usage: $0 cj|sj|xz 并行进程个数
- cj 采集
- sj 审计
- xz 下载
- Ex: $0 cj 4
- Version: $VERSION $REVDATE
- ";
- }
- # 测试
- my $cs =q#"date; sleep " . int(rand(40))#;
- # 采集
- my $cj =q#"sh collectdata.sh 1 $branch \\\\\# \\\\\# $startdate $enddate C O"#;
- # 审计
- my $sj =q#"cbps_4003_bat_AuditDataConsistency 1 $branch "#;
- # 下载
- my $xz =q#"cbps_4004_bat_DataDownLoad 1 $branch "#;
- #
- # main
- #
- if (@ARGV != 2 or ($ARGV[0] ne 'cs' and $ARGV[0] ne 'cj'
- and $ARGV[0] ne 'sj' and $ARGV[0] ne 'xz')) {
- &usage();
- exit(-1);
- }
- $|++;
- my $mode = $ARGV[0]; # 运行模式: 采集,下载等
- my $binxindu = $ARGV[1]; # 希望的并行进程个数
- my $child = 0; # 当前并行进程个数(正在跑的有几个)
- my $position = -1; # 正在/已经采集的个数(采集到了哪个机构)
- my $done_branch = 0; # 已经采集完毕的个数
- my $child_pid;
- my %pid_branch;
- print "开始时间: ".&get_time()." pid: $ Version: $VERSION $REVDATE\n";
- my $sigset=POSIX::SigSet->new(&POSIX::SIGCHLD,&POSIX::SIGUSR1,&POSIX::SIGUSR2);
- my $action = POSIX::SigAction->new('reaper',
- $sigset,
- 0);
- POSIX::sigaction(&POSIX::SIGCHLD, $action);
- $action = POSIX::SigAction->new('add_binxindu',
- $sigset,
- 0);
- POSIX::sigaction(&POSIX::SIGUSR1, $action);
- $action = POSIX::SigAction->new('minus_binxindu',
- $sigset,
- 0);
- POSIX::sigaction(&POSIX::SIGUSR2, $action);
- # 启动
- while(1) {
- &run();
- if ($done_branch == @branch) {
- print "全部机构采集完毕\n";
- print "结束时间: ".&get_time()."\n";
- exit(0);
- }
- if ($binxindu == 0 && $child == 0) {
- print "当前并行度为0, 完成的数量: $done_branch\n";
- print "退出时间: ".&get_time()."\n";
- exit(0);
- }
- pause();
- }
- # end main
- #
- # 按照当前并行度设定启动适当数量的子进程
- #
- sub run {
- while($child < $binxindu && $position+1 < @branch) {
- &add_child();
- }
- }
- #
- # 处理子进程终止
- #
- sub reaper {
- while (($child_pid = waitpid(-1,WNOHANG)) > 0) {
- print "child $pid_branch{$child_pid} branch: "
- ."$branch[$pid_branch{$child_pid}] pid: $child_pid "
- .&get_time()." done\n";
- $child--;
- $done_branch++;
- }
- }
- #
- # 增加一个并行度
- #
- sub add_binxindu {
- $binxindu++;
- print "add process to $binxindu\n";
- &run();
- print "child: $child\n";
- print "done_branch: $done_branch\n";
- }
- #
- # 减少一个并行度
- #
- sub minus_binxindu {
- if ($binxindu > 0) {
- $binxindu--;
- }
- print "minus binxindu to $binxindu\n";
- print "child: $child\n";
- print "done_branch: $done_branch\n";
- }
- #
- # 增加一个并行进程
- #
- sub add_child {
- my $pid = fork;
- if (!defined($pid)) {
- print "child branch: $branch[$position+1] fork failed!\n";
- } elsif ($pid > 0) {
- # father process
- $position++;
- $pid_branch{"$pid"} = sprintf("%02d", $position);
- print "child $pid_branch{$pid} branch: $branch[$position] "
- ."pid: $pid ".&get_time()."\n";
- $child++;
- } else {
- # child process
- open STDOUT, ">>$mode$branch[$position].log";
- open STDERR, ">>&STDOUT";
- my $commander = eval qq(eval "\$mode"); # mode前面是连续2个$符号, 帖子里发表不出来.
- print "$commander\n"; exit(0);
- #exec $commander;
- }
- }
- #
- # 获取当前时间
- #
- sub get_time {
- my $time_str = strftime("%Y-%m-%d %H:%M:%S", localtime);
- return $time_str;
- }
复制代码 |
|