- 论坛徽章:
- 0
|
原帖由 bbpet 于 2008-6-27 22:29 发表 ![]()
我做了一个Bloom Filter的实现来测了下,结论是插入和查询的性能都好到没话说。
查询快到我设的1k个数据一瞬间就查完了,以至于偶实在没什么办法贴数据出来
开始看了两天大家讨论,还疑惑会不会插入慢,查 ...
我把几个程序合成一个了,好贴些,待检查的列表也弄到程序里去了,省掉io那段,时间也从rdtsc改成time了
有兴趣的自己改吧,数据可以适当放大,别越界,算清楚内存,就行了,友情提醒下,设太大要等久
另,代码很丑,不用批了,只是用来看效果,平时的程序不是这样的~
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #define HASHSIZE 300000000
- #define NAMESIZE 126934395
- #define STARTPOS 69343957
- static int initab();
- static void freetab();
- static int fillname();
- static int check(char *name);
- static int testSet1(unsigned int code);
- static int testSet2(unsigned int code);
- static unsigned int hash_larbin(char *name);
- static unsigned int hash_php(char *name);
- char nametab[] = {
- 'a', 'b', 'c', 'd', 'e', 'f', 'g',
- 'h', 'i', 'j', 'k', 'l', 'm', 'n',
- 'o', 'p', 'q', 'r', 's', 't', 'u',
- 'v', 'w', 'x', 'y', 'z',
- '0', '1', '2', '3', '4', '5', '6',
- '7', '8', '9',
- '_'
- };
- char *chklist[] = {
- "2",
- "to",
- "nice",
- "great",
- "aaaaaa",
- "aaaaab",
- "aaaaac",
- "aaaaad",
- "baaaaa",
- "caaaaa",
- "daaaaa",
- "aaaaca",
- "bbbbba",
- "bbbbbb",
- "bbbbbc",
- "bbbbbd",
- "abbbbb",
- "bbbbbb",
- "cbbbbb",
- "dbbbbb",
- "fi3a87",
- "fi3a8asfasvaf7",
- NULL
- };
- static char *table1;
- static char *table2;
- int
- main(int argc, char **argv)
- {
- char **p;
- if (initab()) {
- printf("omg!\n");
- return 0;
- }
- fillname();
- printf("%d\n", time(NULL));
- for (p = chklist; *p != NULL; p++) {
- printf("%s ", *p);
- if (check(*p))
- printf("known!\n");
- else
- printf("ignore!\n");
- }
- printf("%d\n", time(NULL));
- freetab();
- return 0;
- }
- static int
- initab()
- {
- ssize_t total;
- ssize_t i;
- total = HASHSIZE / 8;
- table1 = (char *) malloc(total);
- table2 = (char *) malloc(total);
- if (!table1 || !table2)
- return -1;
- for (i = 0; i < HASHSIZE / 8; i++) {
- table1[i] = 0;
- table2[i] = 0;
- }
- return 0;
- }
- static void
- freetab()
- {
- free(table1);
- free(table2);
- }
- static int
- fillname()
- {
- int i;
- int j;
- int x;
- unsigned int code;
- char username[10];
- for (i = STARTPOS; i < NAMESIZE; i++) {
- memset(username, 0, sizeof(username));
- for (x = i, j = 0; x > 0; j++) {
- username[j] = nametab[x % 37];
- x /= 37;
- }
- code = hash_larbin(username);
- testSet1(code);
- code = hash_php(username);
- testSet2(code);
- }
- }
- static int
- check(char *name)
- {
- int x;
- int y;
- unsigned int code;
- code = hash_larbin(name);
- x = testSet1(code);
- code = hash_php(name);
- y = testSet2(code);
- return (x > 0) && (y > 0);
- }
- static int
- testSet1(unsigned int code)
- {
- unsigned int pos;
- unsigned int bits;
- int res;
- pos = code / 8;
- bits = 1 << (code % 8);
- res = table1[pos] & bits;
- table1[pos] |= bits;
- return res;
- }
- static int
- testSet2(unsigned int code)
- {
- unsigned int pos;
- unsigned int bits;
- int res;
- pos = code / 8;
- bits = 1 << (code % 8);
- res = table2[pos] & bits;
- table2[pos] |= bits;
- return res;
- }
- static unsigned int
- hash_larbin(char *name)
- {
- unsigned int h;
- unsigned int i;
- h = 0;
- i = 0;
- while (name[i] != 0) {
- h = 31 * h + name[i];
- i++;
- }
- return h % HASHSIZE;
- }
- static unsigned int
- hash_php(char *name)
- {
- unsigned int h;
- unsigned int g;
- h = 0;
- g = 0;
- while (*name != 0) {
- h = (h << 4) + *name++;
- if ((g = (h & 0xF0000000))) {
- h = h ^ (g >> 24);
- h = h ^ g;
- }
- }
- return h;
- }
复制代码 |
|