- 论坛徽章:
- 307
|
本帖最后由 sunzhiguolu 于 2016-07-28 19:14 编辑
- #!/usr/bin/perl
- use strict;
- use warnings;
- my ($regex, %hData) = qr/\b(\d+)\h+(\d+)\b/;
- /("[^"]+";)/ and push (@{$hData{$1}}, $_) while (<>);
- open (my $fhF1, '>', './1.txt');
- open (my $fhF2, '>', './2.txt');
- foreach (keys %hData){
- chomp (my @aData = @{$hData{$_}});
- my ($sum, $min, $max) = (0) x 3;
- foreach my $offset (0 .. $#aData){
- my ($n1, $n2) = $aData[$offset] =~ /$regex/;
- $min = $n1 if (!$offset);
- $max = $n2 if ($offset == $#aData);
- $sum += $offset ? ($2 - $1 + 1) : ($2 - $1);
- }
- if (@aData == 1){
- print $fhF1 "@aData $sum\n";
- next;
- }
- print $fhF2 join ("\n", @aData[0 .. $#aData-1], ""), "$aData[-1] ", scalar (@aData), " $sum ", ($max - $min), "\n";
- }
- close ($fhF1);
- close ($fhF2);
复制代码 perl ab.pl file
cat 1.txt
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
chrX hg19_refGene exon 13336768 13338518 0 - . gene_id "NM_001135995"; transcript_id "NM_001135995"; 1750
chrX hg19_refGene exon 13608411 13608465 0 + . gene_id "NR_106734"; transcript_id "NR_106734"; 54
cat 2.txt
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
chrX hg19_refGene exon 13707240 13707407 0 + . gene_id "NM_001195328"; transcript_id "NM_001195328";
chrX hg19_refGene exon 13726840 13727944 0 + . gene_id "NM_001195328"; transcript_id "NM_001195328"; 2 1272 20704
chrX hg19_refGene exon 13707240 13707407 0 + . gene_id "NM_004251"; transcript_id "NM_004251";
chrX hg19_refGene exon 13721933 13722021 0 + . gene_id "NM_004251"; transcript_id "NM_004251";
chrX hg19_refGene exon 13726840 13727944 0 + . gene_id "NM_004251"; transcript_id "NM_004251"; 3 1361 20704
chrX hg19_refGene exon 13353360 13353431 0 + . gene_id "NR_045260"; transcript_id "NR_045260";
chrX hg19_refGene exon 13353601 13353664 0 + . gene_id "NR_045260"; transcript_id "NR_045260";
chrX hg19_refGene exon 13353778 13353816 0 + . gene_id "NR_045260"; transcript_id "NR_045260";
chrX hg19_refGene exon 13354498 13354579 0 + . gene_id "NR_045260"; transcript_id "NR_045260";
chrX hg19_refGene exon 13356014 13356067 0 + . gene_id "NR_045260"; transcript_id "NR_045260";
chrX hg19_refGene exon 13358045 13358218 0 + . gene_id "NR_045260"; transcript_id "NR_045260";
chrX hg19_refGene exon 13359850 13359944 0 + . gene_id "NR_045260"; transcript_id "NR_045260"; 7 579 6584
|
|