当前位置:   article > 正文

C语言布隆过滤器BloomFilter_布隆过滤器c语言实现

布隆过滤器c语言实现

在实现BloomFilter,首先实现一个位图;

BitMap

在位图中,每个元素为“0”或“1”,表示其对应的元素不存在或者存在。

  1. typedef struct BitMap
  2. {
  3. size_t* _bits;
  4. size_t _range;
  5. }BitMap;
  6. void BitMapInit(BitMap* bm, size_t range) ;
  7. void BitMapSet(BitMap* bm, size_t x) ;
  8. void BitMapReset(BitMap* bm, size_t x) ;
  9. int BitMapTest(BitMap* bm, size_t x) ;
  10. void BitMapInit(BitMap* bm, size_t range) {
  11. assert(bm);
  12. bm->_bits=(size_t*)malloc(sizeof(size_t)*((range>>5)+1));
  13. bm->_range=(range>>5)+1;
  14. memset(bm->_bits,0,sizeof(size_t)*((range>>5)+1));
  15. }
  16. void BitMapSet(BitMap* bm, size_t x) {
  17. size_t index,num;
  18. index=x>>5;
  19. num=x%32;
  20. bm->_bits[index]|=(1<<num);
  21. }
  22. void BitMapReset(BitMap* bm, size_t x) {
  23. size_t index,num;
  24. index=x>>5;
  25. num=x%32;
  26. bm->_bits[index]&=(~(1<<num));
  27. }
  28. void BitMapDestroy(BitMap* bm){
  29. free(bm->_bits);
  30. }
  31. // x存在返回0,不存在返回-1
  32. int BitMapTest(BitMap* bm, size_t x){
  33. size_t index,num;
  34. index=x>>5;
  35. num=x%32;
  36. if(bm->_bits[index]&(1<<num))
  37. return 0;
  38. return -1;
  39. }
free(bm->_bits); } // x存在返回0,不存在返回-1 int BitMapTest(BitMap* bm, size_t x){ size_t index,num; index=x>>5; num=x%32; if(bm->_bits[index]&(1<<num)) return 0; return -1; }

写好位图后,可以快速的在海量数据中查找一个数据是否在其中

笔试题: 
给40亿个不重复的无符号整数,没排过序。给一个无符号整数,如何快速判断一个数是否在这40亿个数中。 【腾讯】

  1. #include"BitMap.h"
  2. int main(){
  3. BitMap bm;
  4. BitMapInit(&bm,-1);//-1 无符号整型中为最大值
  5. BitMapSet(&bm, 2);
  6. BitMapSet(&bm, 33);
  7. BitMapSet(&bm, 120);
  8. BitMapSet(&bm, 9);
  9. printf("1? %d \n",BitMapTest(&bm,1));
  10. printf("2? %d \n",BitMapTest(&bm,2));
  11. printf("33? %d \n",BitMapTest(&bm,33));
  12. printf("120? %d \n",BitMapTest(&bm,120));
  13. printf("9? %d \n",BitMapTest(&bm,9));
  14. BitMapReset(&bm, 33) ;
  15. BitMapReset(&bm, 120) ;
  16. printf("33? %d \n",BitMapTest(&bm,33));
  17. printf("120? %d \n",BitMapTest(&bm,120));
  18. system("pause");
  19. return 0;
  20. }

 

BloomFilter

如果我们要存入的不是一个整数,而是字符串,那么引起冲突的可能性将大大增加,为了解决这个问题,就需要使用的布隆过滤器,但布隆过滤器并不是完全准确的,他只能只能保证不在是准确的,在则有可能是误判;

为降低存在的误判可能,采取多个HashFunc,这样,只要查找多次找到都为1,那么可以近似认为他就是存在;

反之,只要有一个为0,那么他必定不存在;

  1. #include"BitMap.h"
  2. typedef const char* KeyValue;
  3. typedef size_t(*HASH_FUNC)(const char* str);
  4. typedef struct{
  5. BitMap _bm;
  6. HASH_FUNC hashfunc1;
  7. HASH_FUNC hashfunc2;
  8. HASH_FUNC hashfunc3;
  9. }BloomFilter;
  10. void BloomFilterInit(BloomFilter* bf,size_t range);
  11. void BloomFilterSet(BloomFilter* bf, KeyValue x) ;
  12. void BloomFilterReset(BloomFilter* bf, KeyValue x) ;
  13. void BloomFilterTest() ;
  14. void BloomFilterDestory(BloomFilter* bf);
  15. static size_t BKDRHash(KeyValue str)
  16. {
  17. size_t seed = 131; // 31 131 1313 13131 131313
  18. size_t hash = 0;
  19. while (*str )
  20. {
  21. hash = hash * seed + (*str++);
  22. }
  23. return (hash & 0x7FFFFFFF);
  24. }
  25. size_t SDBMHash(KeyValue str)
  26. {
  27. size_t ch;
  28. size_t hash = 0;
  29. while (ch = (size_t)*str++)
  30. {
  31. hash = 65599 * hash + ch;
  32. //hash = (size_t)ch + (hash << 6) + (hash << 16) - hash;
  33. }
  34. return hash;
  35. }
  36. size_t RSHash(KeyValue str)
  37. {
  38. size_t ch;
  39. size_t hash = 0;
  40. size_t magic = 63689;
  41. while (ch = (size_t)*str++)
  42. {
  43. hash = hash * magic + ch;
  44. magic *= 378551;
  45. }
  46. return hash;
  47. }
  48. void BloomFilterInit(BloomFilter* bf,size_t range){
  49. assert(bf);
  50. BitMapInit(&bf->_bm,range);
  51. bf->hashfunc1=BKDRHash;
  52. bf->hashfunc2=SDBMHash;
  53. bf->hashfunc3=RSHash;
  54. }
  55. void BloomFilterSet(BloomFilter* bf, KeyValue x){
  56. size_t hash1,hash2,hash3;
  57. hash1=bf->hashfunc1(x)%bf->_bm._range;
  58. hash2=bf->hashfunc2(x)%bf->_bm._range;
  59. hash3=bf->hashfunc3(x)%bf->_bm._range;
  60. BitMapSet(&bf->_bm,hash1);
  61. BitMapSet(&bf->_bm,hash2);
  62. BitMapSet(&bf->_bm,hash3);
  63. }
  64. void BloomFilterReset(BitMap* bm, KeyValue x) ;
  65. int BloomFilterFind(BloomFilter* bf,KeyValue x){
  66. size_t hash1,hash2,hash3;
  67. hash1=bf->hashfunc1(x)%bf->_bm._range;
  68. hash2=bf->hashfunc2(x)%bf->_bm._range;
  69. hash3=bf->hashfunc3(x)%bf->_bm._range;
  70. if(BitMapTest(&bf->_bm,hash1)==-1)
  71. return -1;
  72. if(BitMapTest(&bf->_bm,hash2)==-1)
  73. return -1;
  74. if(BitMapTest(&bf->_bm,hash3)==-1)
  75. return -1;
  76. return 0;
  77. }
  78. void BloomFilterDestory(BloomFilter* bf){
  79. assert(bf);
  80. BitMapDestroy(&bf->_bm);
  81. }
  82. void BloomFilterTest(){
  83. BloomFilter bf;
  84. BloomFilterInit(&bf,10000);
  85. BloomFilterSet(&bf,"sort");
  86. BloomFilterSet(&bf,"srot");
  87. BloomFilterSet(&bf,"return");
  88. BloomFilterSet(&bf,"find");
  89. BloomFilterSet(&bf,"sort123456789");
  90. printf("sort? %d\n",BloomFilterFind(&bf,"sort"));
  91. printf("srot? %d\n",BloomFilterFind(&bf,"srot"));
  92. printf("return? %d\n",BloomFilterFind(&bf,"return"));
  93. printf("sort123456789? %d\n",BloomFilterFind(&bf,"sort123456789"));
  94. printf("find? %d\n",BloomFilterFind(&bf,"find"));
  95. printf("find1? %d\n",BloomFilterFind(&bf,"find1"));
  96. printf("asd? %d\n",BloomFilterFind(&bf,"asd"));
  97. BloomFilterDestory(&bf);
  98. }

 

 

 

声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/477886
推荐阅读
相关标签
  

闽ICP备14008679号