- 论坛徽章:
- 145
|
本帖最后由 jason680 于 2014-01-23 20:24 编辑
回复 26# felix0608
$ perl remove_over.pl
scaffold1 245 290
scaffold1 676 718
scaffold1 1027 1112
...
scaffold1 260446 260450
scaffold1 260660 261439
$ cat remove_over.pl- use strict;
- use warnings;
- my $DEBUG = 0;
- my $sFile_data = "file1";
- my $sFile_over = "file2";
- open( my $FHdata, "<", $sFile_data) or die "cannot open $sFile_data\n";
- open( my $FHover, "<", $sFile_over) or die "cannot open $sFile_over\n";
- sub DBGprint{ print " >>> DEBUG: @_" if $DEBUG};
- sub get_next{
- my ($FH) = @_;
- my $sLine = <$FH>;
- if(! defined $sLine){
- DBGprint "cannot read file\n";
- return;
- exit;
- }
- chomp $sLine;
- my @aRet = split(/\s+/,$sLine);
- DBGprint "get_next, @aRet\n";
- return @aRet;
- }
- sub Xprint{
- my( $sId, $sStart, $sEndof) = @_;
- print "$sId\t$sStart\t$sEndof\n";
- }
- sub remove_over(++){
- my ($raData, $raOver) = @_;
- DBGprint "data=@{$raData}, Over=@{$raOver} \n";
-
- exit if(! defined $raData->[0]);
- if(! defined $raOver->[0]){
- Xprint @{$raData};
- return(1,0);
- }
- # ID different
- # Data: scaffold1 ss xx
- # Over: scaffold2 tt yy
- if( $raData->[0] ne $raOver->[0]){
- Xprint @{$raData};
- return (1,0);
- }
-
- # ID same
- # Data: scaffold1 1 15
- if($raData->[1] == $raOver->[1]){
- # Over: scaffold1 1 15
- return(1,1) if ($raData->[2] == $raOver->[2]);
- # Over: scaffold1 1 20
- return(1,0) if ($raData->[2] < $raOver->[2]);
-
- # Data: scaffold1 1 15
- # Over: scaffold1 1 13
- # change Data start to 14 (Note: 13 + 1)
- $raData->[1] = $raOver->[2]+1;
- # Data: scaffold1 14 15
- # and read Over file
- return(0,1);
- }
- # Over start is more Data start
- # Data: scaffold1 1 xx
- # Over: scaffold1 10 yy
- if($raData->[1] < $raOver->[1]){
-
- # Data: scaffold1 1 15
- if($raData->[2] < $raOver->[1]){
- # Over: scaffold1 20 yy
- # Output: scaffold1 1 15
- Xprint($raData->[0], $raData->[1], $raData->[2]);
- return(1,0);
- }
- if($raData->[2] == $raOver->[1]){
- # Over: scaffold1 15 yy
- # Output: scaffold1 1 14
- Xprint($raData->[0], $raData->[1], $raData->[2] -1);
- return(1,0);
- }
- # Data: scaffold1 1 15
- # Over: scaffold1 10 yy
- # Output: scaffold1 1 9
- Xprint($raData->[0], $raData->[1], $raOver->[1]-1);
- # Over: scaffold1 10 15
- return(1,1) if ($raData->[2] == $raOver->[2]);
- # Over: scaffold1 10 20
- return(1,0) if ($raData->[2] < $raOver->[2]);
- # Data: scaffold1 1 15
- # Over: scaffold1 10 13
- # change Data start to 14 (Note: 13 + 1)
- $raData->[1] = $raOver->[2]+1;
- # Data: scaffold1 14 15
- # and read Over file
- return(0,1);
- }
-
- # Data: scaffold1 3 xx
- # Over: scaffold1 1 3
- if($raData->[1] == $raOver->[2]){
- $raData->[1] = $raOver->[2] + 1;
- return(0,1);
- }
- if($raData->[1] < $raOver->[2]){
- # Data: scaffold1 3 10
- # Over: scaffold1 1 13
- if($raData->[2] < $raOver->[2]){
- return(1,0);
- }
- # Data: scaffold1 3 10
- # Over: scaffold1 1 5
- $raData->[1] = $raOver->[2] + 1;
- }
- return(0,1);
- }
- sub FLGprint{
- my ($sFlg, $sMesg) = @_;
- DBGprint "$sMesg" if $sFlg;
- }
- $DEBUG=1 if(@ARGV >=1 && $ARGV[0] eq '-d');
- my $sRead_data = 1;
- my $sRead_over = 1;
- my (@aData, @aOver);
- my $sCnt = 0;
- while(1){
- $sCnt++;
- DBGprint "Cycle: $sCnt\n";
- FLGprint $sRead_data, "Read Data\n";
- @aData = get_next($FHdata) if($sRead_data == 1);
- FLGprint $sRead_over, "Read Over\n";
- @aOver = get_next($FHover) if($sRead_over == 1);
- ($sRead_data, $sRead_over) = remove_over(@aData, @aOver);
- #exit if($sCnt > 500);
- }
复制代码 |
|