- import org.apache.spark._
- import SparkContext._
- import org.apache.spark.SparkConf
- import java.util.Date
- import java.text.SimpleDateFormat
- import org.apache.hadoop.io.Text
- import org.apache.hadoop.mapred.TextOutputFormat
- import org.apache.spark.Partitioner
-
-
- object partitioner {
-
- def main(args: Array[String]): Unit = {
- val time = new SimpleDateFormat("MMddHHmm").format(new Date());
- val sparkConf = new SparkConf().setAppName("wordcount_"+time)
-
- sparkConf.set("mapreduce.framework.name", "yarn");
- val sc =new SparkContext(sparkConf)
-
- val textFile = sc.textFile(
- "hdfs://namenode:9000/data/mapreduce/chuping/test_in_1/new5", 1).cache()
-
- val result = textFile.flatMap (line => line.split("\t") ).
- map (word => (word,1)).reduceByKey(new testPartitioner, _+_)
-
- result.saveAsTextFile("hdfs://namenode:9000/data/zk/test/partitioner"+time)
- sc.stop()
- }
- }
-
- class testPartitioner extends Partitioner{
- val numPartitions = 3
- def getPartition(key: Any)=1 指定到第几个reduce
- }
这里的程序只是一个测试的程序,使用的也是一个count而已,无法体现partitioner的实际作用,但是在实际生产中,partitioner的运用比比皆是