C语言词频统计 - Chinaunix - Powered by Discuz!

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX_WORD_TYPE 1024
#define FILE_NAME "cntword.c"
struct word
{
const char *word;
int word_len;
int count;
};
char *read_file(const char *file, int *o_len)
{
FILE *fp;
char *buf;
long len;
fp = fopen(file, "rb");
if (fp != NULL) {
fseek(fp, 0, SEEK_END);
len = ftell(fp);
fseek(fp, 0, SEEK_SET);
buf = (char *)malloc(len);
if (buf != NULL) {
if (fread(buf, len, 1, fp) == 1) {
*o_len = len;
fclose(fp);
return buf;
}
free(buf);
}
fclose(fp);
}
return NULL;
}
int nstr_ltrim(const char *s, int off, int lmt)
{
int i;
for (i = off; i < lmt; ++i) {
if (!isspace(s[i])) {
break;
}
}
return i;
}
int nstr_get_next_space(const char *s, int off, int lmt)
{
int i;
for (i = off; i < lmt; ++i) {
if (isspace(s[i])) {
break;
}
}
return i;
}
int get_word(const char *s, int *io_off, int lmt, int *o_word_off, int *o_word_lmt)
{
int off;
int word_off;
int word_lmt;
off = *io_off;
off = nstr_ltrim(s, off, lmt);
if (off < lmt) {
word_off = off;
off = nstr_get_next_space(s, off, lmt);
word_lmt = off;
*io_off = off;
*o_word_off = word_off;
*o_word_lmt = word_lmt;
return 0;
}
return -1;
}
int add_word(struct word *table, int table_len, const char *word, int word_len)
{
int i;
for (i = 0; i < table_len; ++i) {
if (table[i].word == NULL) {
break;
}
if (table[i].word_len == word_len && strncmp(table[i].word, word, word_len) == 0) {
++table[i].count;
return 0;
}
}
if (i < table_len) {
table[i].word = word;
table[i].word_len = word_len;
table[i].count = 1;
return 0;
}
return -1;
}
int count_word(char *file, int file_len, struct word *table, int table_len)
{
int file_off;
int word_off, word_lmt;
file_off = 0;
while (get_word(file, &file_off, file_len, &word_off, &word_lmt) == 0) {
add_word(table, table_len, file + word_off, word_lmt - word_off);
}
return 0;
}
int print_word(struct word *table, int table_len)
{
int i;
for (i = 0; i < table_len; ++i) {
if (table[i].word == NULL) {
break;
}
fwrite(table[i].word, 1, table[i].word_len, stdout);
printf(": %d\n", table[i].count);
}
return 0;
}
int get_table_len(struct word *table, int table_len)
{
int i;
for (i = 0; i < table_len; ++i) {
if (table[i].word == NULL) {
break;
}
}
return i;
}
int compare_word(const struct word *word1, const struct word *word2)
{
if (word1->count < word2->count) {
return 1;
}else if (word1->count > word2->count) {
return -1;
}
return 0;
}
int main(void)
{
struct word table[MAX_WORD_TYPE];
char *file;
int len;
int table_len;
file = read_file(FILE_NAME, &len);
if (file != NULL) {
memset(table, 0, sizeof(table));
count_word(file, len, table, MAX_WORD_TYPE);
table_len = get_table_len(table, MAX_WORD_TYPE);
qsort(table, table_len, sizeof(struct word), compare_word);
print_word(table, table_len);
free(file);
return 0;
}
return -1;
}

复制代码