- 论坛徽章:
- 0
|
原帖由 duanjigang 于 2008-12-19 09:27 发表 ![]()
用二叉树存储,每次读取一行遍历树查找,读完时遍历一遍树,打印信息,这样的方法试过没有?
正好这几天用二叉树,顺便做一个给你,具体时间我没测试,感觉还挺快的,你可以试试
不过看你的email都是sina的,所以做了个特殊处理。希望能对你有用。
//duanjigang@2008-12-19 for test
#include <stdio.h>
#define NAME_LEN 128
#define MAX_MAIL 100000
typedef struct _mail
{
char name[128];
int num;
int len;
unsigned int left;
unsigned int right;
}mail;
mail mail_list[MAX_MAIL];
int m_index = 0;
int insert_tree(mail* tree, mail *node);
int main(int argc, char* argv[])
{
if(argc != 2)
{
printf("usuage:%s file\n", argv[0]);
return 0;
}
else
{
FILE * fp = NULL;
int i = 0;
if((fp = fopen(argv[1], "r"))== NULL)
{
printf("open file %s fail\n", argv[1]);
return 0;
}
while(fgets(mail_list[m_index].name, NAME_LEN - 1, fp))
{
char* p = mail_list[m_index].name;
while((*p != '\n') &&(*p != '@'))p++;
if(*p == '@')
{
*p = 0;
mail_list[m_index].len = p - mail_list[m_index].name;
mail_list[m_index].num = 1;
m_index++;
break;
}
}
while(fgets(mail_list[m_index].name, NAME_LEN - 1, fp))
{
char* p = mail_list[m_index].name;
while((*p != '\n') &&(*p != '@'))p++;
if(*p == '@')
{
*p = 0;
mail_list[m_index].num = 1;
mail_list[m_index].len = p - mail_list[m_index].name;
if(!insert_tree(mail_list, &mail_list[m_index]))
{
m_index++;
}
}
}
fclose(fp);
for(i = 0; i < m_index; i++)
{
if(mail_list[i].num > 1)
{
printf("%d: %s@sina.com\n", mail_list[i].num, mail_list[i].name);
}
}
}
}
int insert_tree(mail* tree, mail* node)
{
mail* p = tree, *par = tree;
int len = tree->len;
if(len < node->len)
len = node->len;
while(p)
{
if(strncmp(p->name, node->name, len) < 0)
{
par = p;
p = p->left;
}else
if(strncmp(p->name, node->name, len) > 0)
{
par = p;
p = p->right;
}else
{
p->num++;
//printf("%d %s\n", p->num, p->name);
return 1;
}
}
if(strncmp(par->name, node->name, len) < 0)
{
par->left = node;
}else
{
par->right = node;
}
return 0;
}
|
gcc test.c -o test
./test fileName > out |
|