赞
踩
现代大数据环境下。对于大数据的处理很平常。
对于给定的1亿或者无上限的数据。排序出top 50的数据。
对此有人给出了一个解决方案:堆排序。
public class ObjectTopK<E> { //要排序的数量 private int k; //数据存储 private List<E> list; //比较器 private Comparator comparator; //是否初始化堆 private boolean init=false; public ObjectTopK(int k){ this.k = k; //初始化大小 list = new ArrayList<E>(k); } public ObjectTopK(int k,Comparator<? super E> comparator){ this.k = k; //初始化大小 list = new ArrayList<E>(k); this.comparator = comparator; } /** * 添加元素 * @param e */ public void add(E e) { if( list.size() < k ) { list.add(e); }else if( !init ){ buildMinHeap(); init=true; }else { if( comparator != null && comparator.compare(e, list.get(0)) > 0) { list.set(0, e); buildMinHeap(0, list.size()); }else if(((Comparable)e).compareTo((Comparable)list.get(0)) > 0) { list.set(0, e); buildMinHeap(0, list.size()); } } } /** * 构造最小堆 */ private void buildMinHeap(){ for(int i=list.size()/2 -1 ; i>=0 ; i--) { buildMinHeap(i, list.size()); // print(); // System.out.println(); } } /** * 堆化 */ private void buildMinHeap(int start, int end) { if( comparator != null ) { int left = 2*start+1; while(left < end) { int min = left; if( left+1 < end && comparator.compare(list.get(left+1), list.get(left))<0){ min = left+1; } if(comparator.compare(list.get(start), list.get(min))<0) { break; } else { E temp = list.get(min); list.set(min, list.get(start)); list.set(start, temp); start = min; left = 2*start + 1; } } }else if( list.get(0) instanceof Comparable){ int left = 2*start+1; while(left < end) { int min = left; if( left+1 < end && ((Comparable)list.get(left+1)).compareTo((Comparable)list.get(left)) < 0){ min = left+1; } if( ((Comparable)list.get(start)).compareTo((Comparable)list.get(min)) < 0 ) { break; } else { E temp = list.get(min); list.set(min, list.get(start)); list.set(start, temp); start = min; left = 2*start + 1; } } } else { throw new RuntimeException("数据不能比较"); } } /** * 堆排序 */ public void sort() { for(int i=list.size()-1; i>=0; i--) { E temp = list.get(0); list.set(0, list.get(i)); list.set(i, temp); buildMinHeap(0, i); } } /** * 输出结果 */ public void print() { for(E e: list) { System.out.print(e+" "); } } public static void main(String[] args) throws IOException { ObjectTopK<Integer> obj = new ObjectTopK<Integer>(50); Random random = new Random(); long start = System.currentTimeMillis(); for(int i=0;i<Integer.MAX_VALUE;i++) { obj.add(random.nextInt(Integer.MAX_VALUE)); } obj.sort(); obj.print(); System.out.println("============================="); System.out.println("21亿数据耗时:"+(System.currentTimeMillis()-start)/1000); } }
2147483644 2147483643 2147483641 2147483639 2147483637 2147483637 2147483636 2147483633 2147483633 2147483632 2147483630 2147483629 2147483627 2147483627 2147483627 2147483626 2147483625 2147483625 2147483625 2147483619 2147483618 2147483618 2147483617 2147483612 2147483611 2147483611 2147483611 2147483609 2147483608 2147483607 2147483606 2147483606 2147483606 2147483605 2147483603 2147483602 2147483602 2147483599 2147483598 2147483597 2147483597 2147483597 2147483597 2147483592 2147483592 2147483590 2147483589 2147483589 2147483588 2147483588 =============================
21亿数据耗时:37
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。