- 论坛徽章:
- 307
|
本帖最后由 sunzhiguolu 于 2016-07-29 14:17 编辑
- #!/usr/bin/perl
- use strict;
- use warnings;
- my ($regex, %hData) = qr/\b(\d+)\h+(\d+)\b/;
- push (@{$hData{qq(@{[split]}[0,-1])}}, $_) while (<>);
- open (my $fhF1, '>', './1.txt');
- open (my $fhF2, '>', './2.txt');
- foreach (keys %hData){
- chomp (my @aData = @{$hData{$_}});
- my ($sum, $min, $max) = (0, 9e9, 0);
- foreach my $offset (0 .. $#aData){
- my ($n1, $n2) = $aData[$offset] =~ /$regex/;
- my @aSort = sort {$a <=> $b} ($n1, $n2);
- $min = $aSort[0] if ($aSort[0] < $min);
- $max = $aSort[-1] if ($aSort[-1] > $max);
- $sum += $offset ? ($2 - $1 + 1) : ($2 - $1);
- }
- if (@aData == 1){
- print $fhF1 "@aData $sum\n";
- next;
- }
- print $fhF2 join ("\n", @aData[0 .. $#aData-1], ""), "$aData[-1] ", scalar (@aData), " $sum ", ($max - $min), "\n";
- }
- close ($fhF1);
- close ($fhF2);
复制代码 perl abc.pl file
cat 1.txt
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
chr4 hg19_refGene exon 9331637 9333229 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup4"; 1592
chrX hg19_refGene exon 13608411 13608465 0 + . gene_id "NR_106734"; transcript_id "NR_106734"; 54
chr6_cox_hap2 hg19_refGene exon 3314427 3314490 0 + . gene_id "NR_002742"; transcript_id "NR_002742"; 63
chr4 hg19_refGene exon 9326891 9328483 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup3"; 1592
chr4 hg19_refGene exon 9336384 9337976 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup5"; 1592
chr6_dbb_hap3 hg19_refGene exon 3090439 3090502 0 + . gene_id "NR_002742"; transcript_id "NR_002742"; 63
chr4 hg19_refGene exon 9364855 9366447 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup2"; 1592
chr4 hg19_refGene exon 9355364 9356956 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup1"; 1592
chr4 hg19_refGene exon 9350619 9352211 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331"; 1592
chr6_apd_hap1 hg19_refGene exon 3119608 3119671 0 + . gene_id "NR_002742"; transcript_id "NR_002742"; 63
chr4 hg19_refGene exon 9345874 9347466 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup7"; 1592
chr6_qbl_hap6 hg19_refGene exon 3098509 3098572 0 + . gene_id "NR_002742"; transcript_id "NR_002742"; 63
chr4 hg19_refGene exon 9341129 9342721 0 + . gene_id "NM_001242331"; transcript_id "NM_001242331_dup6"; 1592
chrX hg19_refGene exon 13336768 13338518 0 - . gene_id "NM_001135995"; transcript_id "NM_001135995"; 1750
chr6 hg19_refGene exon 31804853 31804916 0 + . gene_id "NR_002742"; transcript_id "NR_002742"; 63
cat 2.txt
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
chrX hg19_refGene exon 119206241 119206810 0 - . gene_id "NM_032498"; transcript_id "NM_032498";
chrX hg19_refGene exon 119209865 119209910 0 - . gene_id "NM_032498"; transcript_id "NM_032498";
chrX hg19_refGene exon 119210842 119211253 0 - . gene_id "NM_032498"; transcript_id "NM_032498";
chrX hg19_refGene exon 119211439 119211707 0 - . gene_id "NM_032498"; transcript_id "NM_032498"; 4 1296 5466
chr6_cox_hap2 hg19_refGene exon 3143277 3143605 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6_cox_hap2 hg19_refGene exon 3144218 3144300 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6_cox_hap2 hg19_refGene exon 3145265 3145367 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6_cox_hap2 hg19_refGene exon 3145936 3146051 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6_cox_hap2 hg19_refGene exon 3146494 3146569 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6_cox_hap2 hg19_refGene exon 3146725 3146905 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6_cox_hap2 hg19_refGene exon 3147233 3147467 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385"; 7 1122 4190
chrX hg19_refGene exon 13707240 13707407 0 + . gene_id "NM_001195328"; transcript_id "NM_001195328";
chrX hg19_refGene exon 13726840 13727944 0 + . gene_id "NM_001195328"; transcript_id "NM_001195328"; 2 1272 20704
chrX hg19_refGene exon 119292467 119292735 0 + . gene_id "NM_032498"; transcript_id "NM_032498_dup1";
chrX hg19_refGene exon 119292921 119293332 0 + . gene_id "NM_032498"; transcript_id "NM_032498_dup1";
chrX hg19_refGene exon 119294264 119294309 0 + . gene_id "NM_032498"; transcript_id "NM_032498_dup1";
chrX hg19_refGene exon 119297364 119297933 0 + . gene_id "NM_032498"; transcript_id "NM_032498_dup1"; 4 1296 5466
chr6 hg19_refGene exon 31633657 31633985 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6 hg19_refGene exon 31634598 31634680 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6 hg19_refGene exon 31635645 31635747 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6 hg19_refGene exon 31636316 31636431 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6 hg19_refGene exon 31636874 31636949 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6 hg19_refGene exon 31637105 31637285 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385";
chr6 hg19_refGene exon 31637613 31637847 0 + . gene_id "NM_001282385"; transcript_id "NM_001282385"; 7 1122 4190
chrX hg19_refGene exon 13707240 13707407 0 + . gene_id "NM_004251"; transcript_id "NM_004251";
chrX hg19_refGene exon 13721933 13722021 0 + . gene_id "NM_004251"; transcript_id "NM_004251";
chrX hg19_refGene exon 13726840 13727944 0 + . gene_id "NM_004251"; transcript_id "NM_004251"; 3 1361 20704
|
评分
-
查看全部评分
|