赞
踩
在实现BloomFilter,首先实现一个位图;
在位图中,每个元素为“0”或“1”,表示其对应的元素不存在或者存在。
- typedef struct BitMap
- {
- size_t* _bits;
- size_t _range;
- }BitMap;
-
- void BitMapInit(BitMap* bm, size_t range) ;
- void BitMapSet(BitMap* bm, size_t x) ;
- void BitMapReset(BitMap* bm, size_t x) ;
- int BitMapTest(BitMap* bm, size_t x) ;
-
- void BitMapInit(BitMap* bm, size_t range) {
- assert(bm);
- bm->_bits=(size_t*)malloc(sizeof(size_t)*((range>>5)+1));
- bm->_range=(range>>5)+1;
- memset(bm->_bits,0,sizeof(size_t)*((range>>5)+1));
- }
-
- void BitMapSet(BitMap* bm, size_t x) {
- size_t index,num;
- index=x>>5;
- num=x%32;
- bm->_bits[index]|=(1<<num);
- }
-
- void BitMapReset(BitMap* bm, size_t x) {
- size_t index,num;
- index=x>>5;
- num=x%32;
- bm->_bits[index]&=(~(1<<num));
- }
- void BitMapDestroy(BitMap* bm){
- free(bm->_bits);
- }
-
- // x存在返回0,不存在返回-1
- int BitMapTest(BitMap* bm, size_t x){
- size_t index,num;
- index=x>>5;
- num=x%32;
- if(bm->_bits[index]&(1<<num))
- return 0;
- return -1;
- }
-
free(bm->_bits);
}
// x存在返回0,不存在返回-1
int BitMapTest(BitMap* bm, size_t x){
size_t index,num;
index=x>>5;
num=x%32;
if(bm->_bits[index]&(1<<num))
return 0;
return -1;
}
写好位图后,可以快速的在海量数据中查找一个数据是否在其中
笔试题:
给40亿个不重复的无符号整数,没排过序。给一个无符号整数,如何快速判断一个数是否在这40亿个数中。 【腾讯】
- #include"BitMap.h"
-
- int main(){
- BitMap bm;
- BitMapInit(&bm,-1);//-1 无符号整型中为最大值
- BitMapSet(&bm, 2);
- BitMapSet(&bm, 33);
- BitMapSet(&bm, 120);
- BitMapSet(&bm, 9);
-
- printf("1? %d \n",BitMapTest(&bm,1));
- printf("2? %d \n",BitMapTest(&bm,2));
- printf("33? %d \n",BitMapTest(&bm,33));
- printf("120? %d \n",BitMapTest(&bm,120));
- printf("9? %d \n",BitMapTest(&bm,9));
-
- BitMapReset(&bm, 33) ;
- BitMapReset(&bm, 120) ;
- printf("33? %d \n",BitMapTest(&bm,33));
- printf("120? %d \n",BitMapTest(&bm,120));
- system("pause");
- return 0;
- }
如果我们要存入的不是一个整数,而是字符串,那么引起冲突的可能性将大大增加,为了解决这个问题,就需要使用的布隆过滤器,但布隆过滤器并不是完全准确的,他只能只能保证不在是准确的,在则有可能是误判;
为降低存在的误判可能,采取多个HashFunc,这样,只要查找多次找到都为1,那么可以近似认为他就是存在;
反之,只要有一个为0,那么他必定不存在;
- #include"BitMap.h"
-
- typedef const char* KeyValue;
- typedef size_t(*HASH_FUNC)(const char* str);
-
- typedef struct{
- BitMap _bm;
- HASH_FUNC hashfunc1;
- HASH_FUNC hashfunc2;
- HASH_FUNC hashfunc3;
- }BloomFilter;
-
- void BloomFilterInit(BloomFilter* bf,size_t range);
- void BloomFilterSet(BloomFilter* bf, KeyValue x) ;
- void BloomFilterReset(BloomFilter* bf, KeyValue x) ;
- void BloomFilterTest() ;
- void BloomFilterDestory(BloomFilter* bf);
-
- static size_t BKDRHash(KeyValue str)
- {
- size_t seed = 131; // 31 131 1313 13131 131313
- size_t hash = 0;
- while (*str )
- {
- hash = hash * seed + (*str++);
- }
- return (hash & 0x7FFFFFFF);
- }
-
- size_t SDBMHash(KeyValue str)
- {
- size_t ch;
- size_t hash = 0;
- while (ch = (size_t)*str++)
- {
- hash = 65599 * hash + ch;
- //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
- }
- return hash;
- }
-
- size_t RSHash(KeyValue str)
- {
- size_t ch;
- size_t hash = 0;
- size_t magic = 63689;
- while (ch = (size_t)*str++)
- {
- hash = hash * magic + ch;
- magic *= 378551;
- }
- return hash;
- }
-
- void BloomFilterInit(BloomFilter* bf,size_t range){
- assert(bf);
- BitMapInit(&bf->_bm,range);
- bf->hashfunc1=BKDRHash;
- bf->hashfunc2=SDBMHash;
- bf->hashfunc3=RSHash;
- }
-
- void BloomFilterSet(BloomFilter* bf, KeyValue x){
- size_t hash1,hash2,hash3;
- hash1=bf->hashfunc1(x)%bf->_bm._range;
- hash2=bf->hashfunc2(x)%bf->_bm._range;
- hash3=bf->hashfunc3(x)%bf->_bm._range;
- BitMapSet(&bf->_bm,hash1);
- BitMapSet(&bf->_bm,hash2);
- BitMapSet(&bf->_bm,hash3);
- }
-
- void BloomFilterReset(BitMap* bm, KeyValue x) ;
-
- int BloomFilterFind(BloomFilter* bf,KeyValue x){
-
- size_t hash1,hash2,hash3;
- hash1=bf->hashfunc1(x)%bf->_bm._range;
- hash2=bf->hashfunc2(x)%bf->_bm._range;
- hash3=bf->hashfunc3(x)%bf->_bm._range;
-
- if(BitMapTest(&bf->_bm,hash1)==-1)
- return -1;
- if(BitMapTest(&bf->_bm,hash2)==-1)
- return -1;
- if(BitMapTest(&bf->_bm,hash3)==-1)
- return -1;
-
- return 0;
- }
-
- void BloomFilterDestory(BloomFilter* bf){
- assert(bf);
- BitMapDestroy(&bf->_bm);
- }
-
-
- void BloomFilterTest(){
- BloomFilter bf;
- BloomFilterInit(&bf,10000);
- BloomFilterSet(&bf,"sort");
- BloomFilterSet(&bf,"srot");
- BloomFilterSet(&bf,"return");
- BloomFilterSet(&bf,"find");
- BloomFilterSet(&bf,"sort123456789");
-
- printf("sort? %d\n",BloomFilterFind(&bf,"sort"));
- printf("srot? %d\n",BloomFilterFind(&bf,"srot"));
- printf("return? %d\n",BloomFilterFind(&bf,"return"));
- printf("sort123456789? %d\n",BloomFilterFind(&bf,"sort123456789"));
- printf("find? %d\n",BloomFilterFind(&bf,"find"));
- printf("find1? %d\n",BloomFilterFind(&bf,"find1"));
- printf("asd? %d\n",BloomFilterFind(&bf,"asd"));
- BloomFilterDestory(&bf);
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。