- 论坛徽章:
- 6
|
本帖最后由 stanley_tam 于 2016-06-18 16:36 编辑
感觉我的结果不一样 - #!perl
- use strict;
- use warnings;
- package Sequence;
- sub new {
- my ($class, $id) = @_;
- my $self = +{};
- $self->{'id'} = $id;
- $self->{'members'} = [];
- bless $self, $class;
- return $self;
- }
- sub add_member {
- my ($self, $member) = @_;
- push @{ $self->{'members'} }, $member;
- }
- sub get_members {
- my ($self) = @_;
- return @{ $self->{'members'} };
- }
- sub get_id {
- my ($self) = @_;
- return $self->{'id'};
- }
- 1;
- package SequenceCollection;
- sub new {
- my ($class) = @_;
- my $self = +{};
- $self->{'sequences'} = [];
- $self->{'_id_seq'} = +{};
- $self->{'last_seen_id'} = q{};
- $self->{'is_duplicate_id'} = 0;
- bless $self, $class;
- return $self;
- }
- sub process_line {
- my ($self, $line) = @_;
- $line =~ s{\s}{}gmix;
- return if not $line;
- if ($line =~ m{^ \d+ $}mix) {
- return if $self->{'is_duplicate_id'};
- my $last_seen_id = $self->{'last_seen_id'};
- my $sequence = $self->{'_id_seq'}->{$last_seen_id};
- $sequence->add_member($line);
- }
- else {
- $self->{'last_seen_id'} = $line;
- if (exists $self->{'_id_seq'}->{$line}) {
- $self->{'is_duplicate_id'} = 1;
- }
- else {
- # new record
- $self->{'is_duplicate_id'} = 0;
- my $sequence = Sequence->new($line);
- $self->{'_id_seq'}->{$line} = $sequence;
- push @{$self->{'sequences'}}, $sequence;
- }
- }
- }
- sub get_sequences {
- my ($self) = @_;
- return @{ $self->{'sequences'} };
- }
- 1;
- package Query;
- sub new {
- my ($class, $id) = @_;
-
- my $self = +{};
- $self->{'id'} = $id;
- $self->{'query_list'} = [];
- $self->{'subject_list'} = [];
- $self->{'last_seen_start_number'} = undef;
- bless $self, $class;
- return $self;
- }
- sub populate_list {
- my ($self, $string_type, $string, $start_number) = @_;
- my @letters = split //, $string;
- if ($string_type eq 'Query') {
- my $query_list = $self->{'query_list'};
- $self->{'last_seen_start_number'} = $start_number;
- for my $letter (@letters){
- $query_list->[$start_number] = $letter;
- ++$start_number;
- }
- $self->{'query_list'} = $query_list;
- }
- elsif ($string_type eq 'Sbjct') {
- my $subject_list = $self->{'subject_list'};
- $start_number = $self->{'last_seen_start_number'};
- for my $letter (@letters){
- $subject_list->[$start_number] = $letter;
- ++$start_number;
- }
- $self->{'subject_list'} = $subject_list;
- }
- else {
- die "This shouldn't happen...$/";
- }
- }
- sub get_subject_character {
- my ($self, $number) = @_;
- my $subject_list = $self->{'subject_list'};
- my $character = $subject_list->[$number] // q{};
- return $character
- }
- 1;
- package QueryCollection;
- sub new {
- my ($class, $id) = @_;
-
- my $self = +{};
- $self->{'queries'} = [];
- $self->{'id_query'} = +{};
- $self->{'last_seen_id'} = undef;
- bless $self, $class;
- return $self;
- }
- sub process_line {
- my ($self, $line) = @_;
- $line =~ s{^\s+ | \s+$}{}gmix;
- return if not $line;
- if ($line =~ m{Query=}) {
- my ($id) = $line =~ m{^Query= \s* (.*)$}mx;
- $self->{'last_seen_id'} = $id;
- my $query = Query->new($id);
- push @{ $self->{'queries'} }, $query;
- $self->{'id_query'}->{$id} = $query;
- }
- elsif ($line =~ m{Query \s+ \d+ \s+}mix){
- my $last_seen_id = $self->{'last_seen_id'};
- my $query = $self->{'id_query'}->{$last_seen_id};
- my ($start_number, $string) = $line =~ m{^Query \s+ (\d+) \s+ (\S+) \s+}mx;
- my $string_type = 'Query';
- $query->populate_list($string_type, $string, $start_number);
- }
- elsif ($line =~ m{Sbjct \s+ \d+ \s+}mix){
- my $last_seen_id = $self->{'last_seen_id'};
- my $query = $self->{'id_query'}->{$last_seen_id};
- my ($start_number, $string) = $line =~ m{^Sbjct \s+ (\d+) \s+ (\S+) \s+}mx;
- my $string_type = 'Sbjct';
- $query->populate_list($string_type, $string, $start_number);
- }
- }
- sub get_query_by_id {
- my ($self, $id) = @_;
- my $query = $self->{'id_query'}->{$id} || q{};
- return $query;
- }
- sub get_subject_character {
- my ($self, $id, $number) = @_;
- my $character = q{};
-
- my $query = $self->get_query_by_id($id);
- if ($query) {
- $character = $query->get_subject_character($number);
- }
- return $character;
- }
- 1;
- package main;
- sub main {
- my $sequence_collection = SequenceCollection->new();
- open my $a_fh, '<', 'A.word';
- while (defined(my $line = readline $a_fh)) {
- $sequence_collection->process_line($line);
- }
- close $a_fh;
- my $query_collection = QueryCollection->new();
- open my $b_fh, '<', 'B.word';
- while (defined(my $line = readline $b_fh)) {
- $query_collection->process_line($line);
- }
- close $b_fh;
- for my $sequence ( $sequence_collection->get_sequences() ){
- my @members = $sequence->get_members();
- my $id = $sequence->get_id();
- my %count = ();
- for my $number ( @members ){
- my $character = $query_collection->get_subject_character($id, $number);
- ++$count{$character};
- }
- print "$/id => [$id]$/";
- print "members => [@members]$/";
- for my $character (keys %count){
- my $number = $count{$character};
- print "character => [$character]$/";
- print "number => [$number]$/";
- }
- }
- }
- main();
- __END__
复制代码 输出:- id => [NP_414894.2-1]
- members => [77]
- character => [-]
- number => [1]
- id => [NP_415088.1-1]
- members => [134]
- character => [W]
- number => [1]
- id => [NP_415560.1-1]
- members => [137]
- character => [S]
- number => [1]
- id => [NP_415921.2-1]
- members => [77]
- character => [-]
- number => [1]
- id => [YP_025310.1-1]
- members => [73 78]
- character => [T]
- number => [1]
- character => [V]
- number => [1]
- id => [YP_026163.2-1]
- members => [77]
- character => [-]
- number => [1]
- id => [YP_588459.1-1]
- members => [62 63]
- character => [N]
- number => [1]
- character => [K]
- number => [1]
- id => [YP_002791252.1-1]
- members => [26]
- character => [S]
- number => [1]
- id => [NP_417338.3-1]
- members => [77]
- character => [-]
- number => [1]
- id => [NP_417516.3-1]
- members => [77]
- character => [-]
- number => [1]
- id => [NP_418692.2-1]
- members => [77]
- character => [-]
- number => [1]
- id => [NP_418697.1-1]
- members => [81 85]
- character => [W]
- number => [1]
- character => [E]
- number => [1]
复制代码 |
|