- 论坛徽章:
- 2
|
下面的算法按空格分词。- #include <ctype.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #define MAX_WORD_TYPE 1024
- #define FILE_NAME "cntword.c"
- struct word
- {
- const char *word;
- int word_len;
- int count;
- };
- char *read_file(const char *file, int *o_len)
- {
- FILE *fp;
- char *buf;
- long len;
- fp = fopen(file, "rb");
- if (fp != NULL) {
- fseek(fp, 0, SEEK_END);
- len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
- buf = (char *)malloc(len);
- if (buf != NULL) {
- if (fread(buf, len, 1, fp) == 1) {
- *o_len = len;
- fclose(fp);
- return buf;
- }
- free(buf);
- }
- fclose(fp);
- }
- return NULL;
- }
- int nstr_ltrim(const char *s, int off, int lmt)
- {
- int i;
- for (i = off; i < lmt; ++i) {
- if (!isspace(s[i])) {
- break;
- }
- }
- return i;
- }
- int nstr_get_next_space(const char *s, int off, int lmt)
- {
- int i;
-
- for (i = off; i < lmt; ++i) {
- if (isspace(s[i])) {
- break;
- }
- }
- return i;
- }
- int get_word(const char *s, int *io_off, int lmt, int *o_word_off, int *o_word_lmt)
- {
- int off;
- int word_off;
- int word_lmt;
- off = *io_off;
- off = nstr_ltrim(s, off, lmt);
- if (off < lmt) {
- word_off = off;
- off = nstr_get_next_space(s, off, lmt);
- word_lmt = off;
- *io_off = off;
- *o_word_off = word_off;
- *o_word_lmt = word_lmt;
- return 0;
- }
- return -1;
- }
- int add_word(struct word *table, int table_len, const char *word, int word_len)
- {
- int i;
- for (i = 0; i < table_len; ++i) {
- if (table[i].word == NULL) {
- break;
- }
- if (table[i].word_len == word_len && strncmp(table[i].word, word, word_len) == 0) {
- ++table[i].count;
- return 0;
- }
- }
- if (i < table_len) {
- table[i].word = word;
- table[i].word_len = word_len;
- table[i].count = 1;
- return 0;
- }
- return -1;
- }
- int count_word(char *file, int file_len, struct word *table, int table_len)
- {
- int file_off;
- int word_off, word_lmt;
- file_off = 0;
- while (get_word(file, &file_off, file_len, &word_off, &word_lmt) == 0) {
- add_word(table, table_len, file + word_off, word_lmt - word_off);
- }
- return 0;
- }
- int print_word(struct word *table, int table_len)
- {
- int i;
- for (i = 0; i < table_len; ++i) {
- if (table[i].word == NULL) {
- break;
- }
- fwrite(table[i].word, 1, table[i].word_len, stdout);
- printf(": %d\n", table[i].count);
- }
- return 0;
- }
- int get_table_len(struct word *table, int table_len)
- {
- int i;
- for (i = 0; i < table_len; ++i) {
- if (table[i].word == NULL) {
- break;
- }
- }
- return i;
- }
- int compare_word(const struct word *word1, const struct word *word2)
- {
- if (word1->count < word2->count) {
- return 1;
- }else if (word1->count > word2->count) {
- return -1;
- }
- return 0;
- }
- int main(void)
- {
- struct word table[MAX_WORD_TYPE];
- char *file;
- int len;
- int table_len;
- file = read_file(FILE_NAME, &len);
- if (file != NULL) {
- memset(table, 0, sizeof(table));
- count_word(file, len, table, MAX_WORD_TYPE);
- table_len = get_table_len(table, MAX_WORD_TYPE);
- qsort(table, table_len, sizeof(struct word), compare_word);
- print_word(table, table_len);
- free(file);
- return 0;
- }
- return -1;
- }
复制代码 |
|