赞
踩
一、Java代码
- SparkConf conf = new SparkConf();
- conf.setMaster("local").setAppName("jsonRDD");
- JavaSparkContext sc = new JavaSparkContext(conf);
- SQLContext sqlContext = new SQLContext(sc);
- JavaRDD<String> nameRDD = sc.parallelize(Arrays.asList(
- "{\"name\":\"zhangsan\",\"age\":\"18\"}",
- "{\"name\":\"lisi\",\"age\":\"19\"}",
- "{\"name\":\"wangwu\",\"age\":\"20\"}"
- ));
- JavaRDD<String> scoreRDD = sc.parallelize(Arrays.asList(
- "{\"name\":\"zhangsan\",\"score\":\"100\"}",
- "{\"name\":\"lisi\",\"score\":\"200\"}",
- "{\"name\":\"wangwu\",\"score\":\"300\"}"
- ));
-
- DataFrame namedf = sqlContext.read().json(nameRDD);
- DataFrame scoredf = sqlContext.read().json(scoreRDD);
- namedf.registerTempTable("name");
- scoredf.registerTempTable("score");
-
- DataFrame result = sqlContext.sql("select name.name,name.age,score.score from name,score where name.name = score.name");
- result.show();
-
- sc.stop();
二、Scala代码
- val conf = new SparkConf()
- conf.setMaster("local").setAppName("jsonrdd")
- val sc = new SparkContext(conf)
- val sqlContext = new SQLContext(sc)
-
- val nameRDD = sc.makeRDD(Array(
- "{\"name\":\"zhangsan\",\"age\":18}",
- "{\"name\":\"lisi\",\"age\":19}",
- "{\"name\":\"wangwu\",\"age\":20}"
- ))
- val scoreRDD = sc.makeRDD(Array(
- "{\"name\":\"zhangsan\",\"score\":100}",
- "{\"name\":\"lisi\",\"score\":200}",
- "{\"name\":\"wangwu\",\"score\":300}"
- ))
- val nameDF = sqlContext.read.json(nameRDD)
- val scoreDF = sqlContext.read.json(scoreRDD)
- nameDF.registerTempTable("name")
- scoreDF.registerTempTable("score")
- val result = sqlContext.sql("select name.name,name.age,score.score from name,score where name.name = score.name")
- result.show()
- sc.stop()
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。