赞
踩
判断某一元素是否在某个集合内?
一般使用HashMap/Set,但是当集合非常庞大时,需要极大的空间,现实状况不允许。此时,可以考虑使用 Bloom Filter。
具体步骤:
它实际上是一个很长的二进制向量和一系列随机映射函数。用于检索一个元素是否在一个集合中。
优点:提高效率,缩小内存的使用。
缺点:有误识别率(false positive)和删除困难。
判断一个整数是否在100w个整数集合中。
public class HashJudge { /** * 4Bytes * 1 M = 4MB * 底层使用HahsMap 实现 DEFAULT_LOAD_FACTOR = 0.75f; * 内存申请空间 = 4/0.75f */ Set<Integer> numberSet = new HashSet<>(CommonConstant.SIZE_M); public HashJudge() { prepareSet(); } /** * 初始化 */ public void prepareSet() { for (int i = 0; i < CommonConstant.SIZE_M; i++) { numberSet.add(i); } } /** * 验证测试 */ public void judgeTest() { // 准确无误 int falseNegative = 0; int falsePositive = 0; for (int i = 0; i < CommonConstant.SIZE_M; i++) { if (!numberSet.contains(i)) { falseNegative++; } } long start = System.currentTimeMillis(); int judgeNum = CommonConstant.SIZE_M * 10; for (int i = CommonConstant.SIZE_M; i < CommonConstant.SIZE_M + judgeNum; i++) { if (numberSet.contains(i)) { System.out.println("false positive"); } } long interval = System.currentTimeMillis() - start; PrintUtil.prettyPrint("BloomJudge done! \n falseNegative:{}\n falsePositive:{} \n judgeNum:{} interval:{} ms ", falseNegative, falsePositive, judgeNum, interval); } }
public class BloomJudge { /** * 使用 Guava 内的 BloomFilter: * numBits = (-n * Math.log(p) / (Math.log(2) * Math.log(2))); * numBits ≈ -2.08 * ln(p) * default fpp 0.03 * numBits ≈ 7.3 * n = 7.3M bits ≈ 0.91MB */ private static BloomFilter<Integer> bloomFilter = BloomFilter.create(Funnels.integerFunnel(), CommonConstant.SIZE_M); public BloomJudge() { initFilter(); } /** * 初始化 */ public void initFilter() { for (int i = 0; i < CommonConstant.SIZE_M; i++) { bloomFilter.put(i); } } /** * 验证 */ public void judge() { int falseNegative = 0; int falsePositive = 0; for (int i = 0; i < CommonConstant.SIZE_M; i++) { if (!bloomFilter.mightContain(i)) { falseNegative++; } } long start = System.currentTimeMillis(); int judgeNum = CommonConstant.SIZE_M * 10; for (int i = CommonConstant.SIZE_M; i < CommonConstant.SIZE_M + judgeNum; i++) { if (bloomFilter.mightContain(i)) { falsePositive++; } } long interval = System.currentTimeMillis() - start; PrintUtil.prettyPrint("BloomJudge done! \n falseNegative:{}\n falsePositive:{} \n judgeNum:{} interval:{} ms ", falseNegative, falsePositive, judgeNum, interval); } }
// 常量 public class CommonConstant { public static int SIZE_K = 1 << 10; public static int SIZE_M = 1 << 20; public static int SIZE_G = 1 << 30; } // 测试入口 public static void main(String[] args) { /** * hash 4MB */ HashJudge hashJudge = new HashJudge(); hashJudge.judgeTest(); PrintUtil.blankRow(); /** * < 1MB * with fpp = 0.03 */ BloomJudge bloomJudge = new BloomJudge(); bloomJudge.judge(); }
result:
BloomJudge done!
falseNegative:0
falsePositive:0
judgeNum:10485760 interval:985 ms
BloomJudge done!
falseNegative:0
falsePositive:315312
judgeNum:10485760 interval:1563 ms
总结:
使用HashSet
使用BloomFilter
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。