- 论坛徽章:
- 0
|
下面的过程是做序列的最佳局域匹配,我现在是想做序列的次最佳匹配并输出结果,程序编来很是困难。下面是我最佳匹配的C++代码,那位大神帮我修改下,做出次最佳匹配。perl代码和C++代码都可以。
1序列结构及数据
rps-0,B0393.1
I001CAATCAAATTTTTTTTAAGTTAAGTAAAAAGAACACTTAATAAATTATTTTAATATTGTTAATAGTAATGTC
E012CAATCTTAACTTCCAAATGCAGCAATACGTCTACAAGAGACGTTTTGACGGACCAAATATCATCAACGTCAAGAAGACCTGGGAGAAGCTTCTTCTCGCT
I002CATTCTGAAAAGATTTATCAACAAAAAAGATTAATAAATTTTAAATC
E034TCAAGGAGCCAAGACTTCTCGTCATCTCTGACCCACGTATCGATCATCAGGCTGTCACTGAGGCTTCCTACGTCGGAGTTCCAGTCATCTCCTTCGTCAA
I003CATATATAAATTAAATATTTTTTACATTTATACATAAACTTAACATC
E056CCGAGTCTCCACTCAAGTTGATCGACATCGGAGTTCCATGCAACAACAAGGGAGAACGCTCAATCGGACTTATGTGGTGGATGCTCGCTCGCGAGATCCT
rps-2
I001CATCAATAAAATAAGAATTTGGAAGTCTAAAACTTATAAATTTAAAGTC
E012GGACGTGGAGGACGTGGTGGCCGCGGAGGACGTGGTGGACGCGGAGGAAGAGCCGGACGCGGAGGAGAGAAGGAGACCGAATGGACTCCAGTCACCAAGC
I002CATTCATCAAATAAAAAAATCAAAATAATACCCCAATATTAAAGGTC
E034AACCTTAAAGACGAGGTCCTCAAGATCTCCCCAGTCCAGAAGCAGACCACTGCCGGACAACGTACCCGCTTCAAGGCTTTCGTTGCTATCGGAGATCACG
2 以下是我的C++代码
#include "stdio.h"
#include "stdlib.h"
#include "math.h"
#include "string.h"
#include"ctype.h"
#include"conio.h"
void main()
{
char a[100000], b[100000], c[100000],d[5],d2[5], e[10],f[200],h[200],h1[200],f1[200],g[11]={ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9','\0'};
long int i=0,j=0,k=0,k2=0,z1=0,z2=0,z3=0,m=0,m1=0,m2=0,m3=0,m4=0,n=0,n1=0,n2=0,n3=0,p=0,p1=0,q=0,q1=0;
FILE *fp1, *fp2;
for(i=0;i<200;i++)
{ h[i]='\0';
f[i]='\0';
h1[i]='\0';
f1[i]='\0';
}
fp1=fopen("序列结构数据.txt","r");
fp2=fopen("局域比对结果.txt","w");
do
{
fgets(a,100000,fp1);
if(a[0]=='@')
break;
// fprintf(fp2,"%s", a);
if((a[0]!='I')&&(a[0]!='E')&&(a[0]!='S'))
{
fprintf(fp2,"%s", a);
}
if(a[0]=='I')
{
//fprintf(fp2,"%s", a);
strcpy(b,a);
z1=strlen(b);
fprintf(fp2,"%s",a);
}
if((a[0]=='E')||(a[0]=='S'))
{
strcpy(c,a);
z2=strlen(c);
if((z1<z2)||(z1==z2))
z3=z1;
if(z1>z2)
z3=z2;
fprintf(fp2,"%s", c);
if(z3>13)
{ p=0;
for(m=5;m<z3;m++)
for(m1=0;m1<100000;m1++)
{ if(m>100)
break;
if((b[m1+m-1]=='\n')||(b[m1+m-1]=='\0'))
break;
for(m2=m1;m2<m1+m;m2++)
{
//fprintf(fp2,"%c", b[m2]);
f[n1]=b[m2];
n1++;
}
// fprintf(fp2,"%s\n", f);
//fprintf(fp2,"\n");
for(m3=0;m3<100000;m3++)
{
if((c[m3+m-1]=='\n')||(c[m3+m-1]=='\0'))
break;
for(m4=m3;m4<m3+m;m4++)
{
// fprintf(fp2,"%c", c[m4]);
h[n2]=c[m4];
n2++;
}
//fprintf(fp2,"%s\n%s\n",f,h);
for(n3=0;n3<m;n3++)
{ if((f[n3]!='0')&&(f[n3]!='1')&&(f[n3]!='2')&&(f[n3]!='3')&&(f[n3]!='4')&&(f[n3]!='5')&&(f[n3]!='6')&&(f[n3]!='7')&&(f[n3]!='8')&&(f[n3]!='9'))
{ if(f[n3]==h[n3])
q=q+5;
if(f[n3]!=h[n3])
q=q-4;
}
}
// fprintf(fp2,"%d\n",q);
if(q>p)
{
p=q;
strcpy(f1,f);
strcpy(h1,h);
k=m1-3;k2=m1+m-4;
q1=m3-3;p1=m3+m-4;
}
// fprintf(fp2,"%d\n\n",p);
//fprintf(fp2,"%s\n%s\n",f1,h1);
q=0;
n2=0;
}
n1=0;
for(i=0;i<200;i++)
{ h[i]='\0';
f[i]='\0';
}
}
}
for(i=0;i<5;i++)
{
d[i]='\0';
d2[i]='\0';
}
for(j=0;j<4;j++)
{
d[j]=b[j];
d2[j]=c[j];
}
d[4]='\0';
d2[4]='\0';
if(z3>13)
fprintf(fp2,"%s\t%d\t%s\t%d\t%d\n%s\t%d\t%s\t%d\t%d\n%d\t\t%d\n",d,k,f1,k2,z1-4,d2,q1,h1,p1,z2-5,k2-k+1,p);
for(i=0;i<100000;i++)
{
b[i]='\0';
c[i]='\0';
}
z1=0;
z2=0;
z3=0;
}
}
while(a[0]!='@');
for(i=0;i<=8;i++)
e[i]='@';
e[9]='\0';
fprintf(fp2,"%s\n", e);
fclose(fp1);
fclose(fp2);
}
3 上面程序输出的最佳局域比对结果
rps-0,B0393.1
I001CAATCAAATTTTTTTTAAGTTAAGTAAAAAGAACACTTAATAAATTATTTTAATATTGTTAATAGTAATGTC
E012CAATCTTAACTTCCAAATGCAGCAATACGTCTACAAGAGACGTTTTGACGGACCAAATATCATCAACGTCAAGAAGACCTGGGAGAAGCTTCTTCTCGCT
I001 3 ATCAAATTTTTTTTAAGTTAAGTAAA 28 73
E012 52 ACCAAATATCATCAACGTCAAGAAGA 77 100
26 40
I002CATTCTGAAAAGATTTATCAACAAAAAAGATTAATAAATTTTAAATC
E034TCAAGGAGCCAAGACTTCTCGTCATCTCTGACCCACGTATCGATCATCAGGCTGTCACTGAGGCTTCCTACGTCGGAGTTCCAGTCATCTCCTTCGTCAA
I002 10 AAGATTTATCAACA 23 48
E034 11 AAGACTTCTCGTCA 24 100
14 34
I003CATATATAAATTAAATATTTTTTACATTTATACATAAACTTAACATC
E056CCGAGTCTCCACTCAAGTTGATCGACATCGGAGTTCCATGCAACAACAAGGGAGAACGCTCAATCGGACTTATGTGGTGGATGCTCGCTCGCGAGATCCT
I003 38 ACTTA 42 48
E056 68 ACTTA 72 100
5 25
rps-2
I001CATCAATAAAATAAGAATTTGGAAGTCTAAAACTTATAAATTTAAAGTC
E012GGACGTGGAGGACGTGGTGGCCGCGGAGGACGTGGTGGACGCGGAGGAAGAGCCGGACGCGGAGGAGAGAAGGAGACCGAATGGACTCCAGTCACCAAGC
I001 21 GGAAG 25 50
E012 46 GGAAG 50 100
5 25
I002CATTCATCAAATAAAAAAATCAAAATAATACCCCAATATTAAAGGTC
E034AACCTTAAAGACGAGGTCCTCAAGATCTCCCCAGTCCAGAAGCAGACCACTGCCGGACAACGTACCCGCTTCAAGGCTTTCGTTGCTATCGGAGATCACG
I002 35 AATATTAAAG 44 48
E034 1 AACCTTAAAG 10 100
10 32
|
|