- 论坛徽章:
- 0
|
本帖最后由 yanglc2013 于 2013-04-03 18:15 编辑
因为他输出的格式是(只有lnc中的数据):
lnc-C1orf227-1:2 chr1 213028819 213031452 -
但我想在同行加上对应的refGene中的数据(附件中,下面的程序也是基于附件中的两个文件的比较。)
lnc-C1orf227-1:2 chr1 213028819 213031452 - NM_014053 chr1 213031596 213072705 + 2210 (overlap关系)
原帖地址:http://bbs.chinaunix.net/thread-4073924-1-1.html(一下程序已经解决原帖的问题,现在想输出结果把refGene中对应的数据输出)- #!/usr/bin/perl -w
- use strict;
- use Data::Dumper;
- my($ref,$lnc,$contain,$overlap,$gapped,$reverse);
- open($ref,$ARGV[0]) or die $!;
- open($lnc,$ARGV[1]) or die $!;
- open($contain,'>','contain.txt') or die $!;
- open($overlap,'>','overlap.txt') or die $!;
- open($gapped ,'>','gapped.txt') or die $!;
- open($reverse,'>','reverse.txt') or die $!;
- my %hRefDB;
- while (<$ref>){
- my ($chr, $beg, $end) = (split)[1,2,3]; #Tab拆分,并取第二,第三,第四个元素。
- push @{ $hRefDB{$chr}}, [$beg,$end]; #存入哈希数组。
- }
- while (<$lnc>) {
- my ($chr,$beg,$end) = (split)[1,2,3]; #Tab拆分,并取第二,第三,第四个元素。
- if (!exists $hRefDB{$chr}) {
- print "not exists in refDB\n";
- next;
- }
- my $lnc = [$beg,$end];
- my ($is_contain,$is_overlap,$is_reverse) = (0,0,0);
- foreach my $ref (@{$hRefDB{$chr}}){ #遍历哈希数组的每个元素。
- last if $is_contain and $is_overlap and $is_reverse;
- unless ($is_contain){ #除非$is_contain的值为0,否则,将is_contain($lnc,$ref)的返回值赋值给 $is_contain。
- $is_contain = is_contain($lnc,$ref)
- and next;
- }
- unless ($is_reverse){
- $is_reverse = is_reverse($lnc,$ref)
- and next;
- }
- unless ($is_overlap){
- $is_overlap = is_overlap($lnc,$ref)
- and next;
- }
- }
- my $is_gapped = $is_contain || $is_reverse || $is_overlap ? 0:1;
- print $contain $_ if $is_contain;
- print $overlap $_ if $is_overlap;
- print $gapped $_ if $is_gapped;
- print $reverse $_ if $is_reverse;
- }
- sub is_ln {
- my($num,$ref) = @_;
- return $num >= $ref->[0] && $num <=$ref->[1] ? 1:0;
- }
- sub is_contain {
- my ($lnc,$ref) = @_;
- return (is_ln($lnc->[0],$ref) && is_ln($lnc->[1],$ref)) ? 1:0;
- }
- sub is_reverse {
- my ($lnc,$ref) = @_;
- return is_contain($ref,$lnc);
- }
- sub is_overlap {
- my ($lnc,$ref) = @_;
- return (is_ln($lnc->[0],$ref) && ! is_ln($lnc->[1],$ref)) ||
- (is_ln($lnc->[1],$ref) && ! is_ln($lnc->[0],$ref)) ? 1:0;
- }
- sub is_gapped {
- my ($lnc,$ref) = @_;
- return (! is_ln($lnc->[0],$ref) && ! is_ln($lnc->[1],$ref)) ? 1:0;
- }
复制代码 |
|