- 论坛徽章:
- 0
|
不耗内存版,行处理:
- use strict;
- use warnings;
- my $repeat=$ARGV[0];#file repeat.fa
- my $out=$ARGV[1];#splited file
- my $min_seq_length=500;
- my $out_line_length=50;
- #repeat file load
- open REPEAT,"<$repeat" or die "hi:$!";
- open OUT,">$out" or die"Oh:$!";
- my $DNA="";
- my $ok_flag=1;
- my $txt;
- while($txt=<REPEAT>){
- chomp $txt;
- next unless($txt);
- if($txt =~ /^\>/){ #new DNA, clean the old one
- if($ok_flag) {
- while(length($DNA)>$out_line_length) {
- print OUT substr($DNA,0,$out_line_length,""),"\n";
- }
- print OUT $DNA,"\n";
- $ok_flag=0;
- }
- elsif(length($DNA)>=$min_seq_length) {
- while(length($DNA)>$out_line_length) {
- print OUT substr($DNA,0,$out_line_length,""),"\n";
- }
- print OUT $DNA,"\n";
- }
- $DNA="";
- print OUT "$txt\n";
- }
- elsif($txt=~/[atcg]/){
-
- if($txt=~/^([ATCG]+)/) {
- $DNA .=$1;
- }
-
- if($ok_flag) {
- while(length($DNA)>$out_line_length) {
- print OUT substr($DNA,0,$out_line_length,""),"\n";
- }
- print OUT $DNA,"\n";
- $ok_flag = 0;
- }
- else {
- if(length($DNA)>=$min_seq_length) {
- while(length($DNA)>$out_line_length) {
- print OUT substr($DNA,0,$out_line_length,""),"\n";
- }
- print OUT $DNA,"\n";
- }
- }
- $DNA="";
- if($txt=~/([ATCG]+)$/) {
- $DNA = $1;
- }
- }
- else {
- $DNA .= $txt;
-
- if($ok_flag) {
- while(length($DNA)>$out_line_length) {
- print OUT substr($DNA,0,$out_line_length,""),"\n";
- }
- }
- else {
- if(length($DNA)>=$min_seq_length) {
- while(length($DNA)>$out_line_length) {
- print OUT substr($DNA,0,$out_line_length,""),"\n";
- }
- $ok_flag = 1;
- }
- }
- }
- }
- close REPEAT;
- close OUT;
复制代码 |
|