赞
踩
题目及数据:
computer,huangxiaoming,85,86,41,75,93,42,85 computer,xuzheng,54,52,86,91,42 computer,huangbo,85,42,96,38 english,zhaobenshan,54,52,86,91,42,85,75 english,liuyifei,85,41,75,21,85,96,14 algorithm,liuyifei,75,85,62,48,54,96,15 computer,huangjiaju,85,75,86,85,85 english,liuyifei,76,95,86,74,68,74,48 english,huangdatou,48,58,67,86,15,33,85 algorithm,huanglei,76,95,86,74,68,74,48 algorithm,huangjiaju,85,75,86,85,85,74,86 computer,huangdatou,48,58,67,86,15,33,85 english,zhouqi,85,86,41,75,93,42,85,75,55,47,22 english,huangbo,85,42,96,38,55,47,22 algorithm,liutao,85,75,85,99,66 computer,huangzitao,85,86,41,75,93,42,85 math,wangbaoqiang,85,86,41,75,93,42,85 computer,liujialing,85,41,75,21,85,96,14,74,86 computer,liuyifei,75,85,62,48,54,96,15 computer,liutao,85,75,85,99,66,88,75,91 computer,huanglei,76,95,86,74,68,74,48 english,liujialing,75,85,62,48,54,96,15 math,huanglei,76,95,86,74,68,74,48 math,huangjiaju,85,75,86,85,85,74,86 math,liutao,48,58,67,86,15,33,85 english,huanglei,85,75,85,99,66,88,75,91 math,xuzheng,54,52,86,91,42,85,75 math,huangxiaoming,85,75,85,99,66,88,75,91 math,liujialing,85,86,41,75,93,42,85,75 english,huangxiaoming,85,86,41,75,93,42,85 algorithm,huangdatou,48,58,67,86,15,33,85 algorithm,huangzitao,85,86,41,75,93,42,85,75 一、数据解释 数据字段个数不固定: 第一个是课程名称,总共四个课程,computer,math,english,algorithm, 第二个是学生姓名,后面是每次考试的分数 二、统计需求: 1、统计每门课程的参考人数和课程平均分 2、统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数 3、求出每门课程参考学生平均分最高的学生的信息:课程,姓名和平均分
题目解析:1、课程平均分需要在map中先计算每个人的课程平均成绩,然后在reduce中求出整体的平均成绩
- /**
- * @author: lpj
- * @date: 2018年3月16日 下午7:16:47
- * @Description:
- */
- package lpj.reduceWork;
-
- import java.io.IOException;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- /**
- *
- */
- public class StudentScore3MR {
-
- public static void main(String[] args) throws Exception {
- Configuration conf = new Configuration();
- // conf.addResource("hdfs-site.xml");//使用配置文件
- // System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
- FileSystem fs = FileSystem.get(conf);//默认使用本地
-
- Job job = Job.getInstance(conf);
- job.setJarByClass(StudentScore3MR.class);
- job.setMapperClass(StudentScore3MR_Mapper.class);
- job.setReducerClass(StudentScore3MR_Reducer.class);
-
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
- //
- // String inputpath = args[0];
- // String outpath = args[1];
-
- Path inputPath = new Path("d:/a/homework6.txt");
- Path outputPath = new Path("d:/a/homework6");
- if (fs.exists(inputPath)) {
- fs.delete(outputPath, true);
- }
-
- FileInputFormat.setInputPaths(job, inputPath);
- FileOutputFormat.setOutputPath(job, outputPath);
- boolean isdone = job.waitForCompletion(true);
- System.exit(isdone ? 0 : 1);
- }
- //1、统计每门课程的参考人数和课程平均分
- public static class StudentScore3MR_Mapper extends Mapper<LongWritable, Text, Text, Text>{
- Text kout = new Text();
- Text valueout = new Text();
- @Override
- protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
- //algorithm,huangzitao,85,86,41,75,93,42,85,75
- String [] reads = value.toString().trim().split(",");
- String kk = reads[0];
- int sum = 0;
- int count = 0;
- double avg = 0;
- for(int i = 2; i < reads.length; i++){
- sum += Integer.parseInt(reads[i]);
- count++;
- }
- avg = 1.0 * sum / count;
- String vv = avg + "";
- kout.set(kk);
- valueout.set(vv);
- context.write(kout, valueout);
- }
- }
- public static class StudentScore3MR_Reducer extends Reducer<Text, Text, Text, Text>{
- Text kout = new Text();
- Text valueout = new Text();
- @Override
- protected void reduce(Text key, Iterable<Text> values, Context context)throws IOException, InterruptedException {
- double sum = 0;
- int count = 0;
- double avg = 0;
- for(Text text : values){
- sum += Double.parseDouble(text.toString());
- count ++;
- }
- avg = sum / count;
- String vv = count + "\t" + avg;
- valueout.set(vv);
- context.write(key, valueout);
- }
-
- }
-
- }
结果:
- algorithm 6 71.60119047619047
- computer 10 69.79896825396825
- english 9 66.22655122655122
- math 7 72.88265306122449
2、输出结果存储到不同的结果文件中,需要指定setNumReduceTasks,分区规则通过使用partitioner进行分区设定,平均成绩需要进行排序,可以使用封装对象的方式,通过实现WritableComparable接口进行设置排序规则
实体类定义:
- /**
- * @author: lpj
- * @date: 2018年3月14日 下午9:46:02
- * @Description:
- */
- package lpj.day2.homeworkbean;
-
- import java.io.DataInput;
- import java.io.DataOutput;
- import java.io.IOException;
- import java.text.DecimalFormat;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
-
- import org.apache.hadoop.io.WritableComparable;
-
- /**
- *
- */
- public class Student implements WritableComparable<Student>{
- private String name;
- private double score;
- private String course;
-
-
- public String getName() {
- return name;
- }
- public void setName(String name) {
- this.name = name;
- }
- public double getScore() {
- return score;
- }
- public void setScore(double score) {
- this.score = score;
- }
- public String getCourse() {
- return course;
- }
- public void setCourse(String course) {
- this.course = course;
- }
-
- @Override
- public String toString() {
- DecimalFormat fs = new DecimalFormat("#.#");
-
- return course + "\t" +name+ "\t"+ fs.format(score);
-
-
- }
-
- public Student() {
-
- }
-
-
- public Student(String name, double score, String course) {
- super();
- this.name = name;
- this.score = score;
- this.course = course;
- }
- @Override
- public int compareTo(Student o) {
- int diff = this.course.compareTo(o.course);
- if (diff == 0) {
-
- return (int)(o.score - this.score);
- }else{
- return diff > 0 ? 1 : -1;
- }
- }
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
- */
- @Override
- public void readFields(DataInput in) throws IOException {
- name = in.readUTF();
- score = in.readDouble();
- course = in.readUTF();
- }
- /* (non-Javadoc)
- * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
- */
- @Override
- public void write(DataOutput out) throws IOException {
- out.writeUTF(name);
- out.writeDouble(score);
- out.writeUTF(course);
- }
-
-
- }
分区器定义:
- /**
- * @author: lpj
- * @date: 2018年3月16日 下午10:13:24
- * @Description:
- */
- package lpj.reduceWorkbean;
-
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Partitioner;
-
- /**
- *
- */
- public class MyPatitioner extends Partitioner<Student, NullWritable>{
-
- /* (non-Javadoc)
- * @see org.apache.hadoop.mapreduce.Partitioner#getPartition(java.lang.Object, java.lang.Object, int)
- */
- @Override
- public int getPartition(Student key, NullWritable value, int numPartitions) {
- if (key.toString().startsWith("math")) {
- return 0;
- }else if (key.toString().startsWith("english")) {
- return 1;
- }else if (key.toString().startsWith("computer")) {
- return 2;
- }else {
- return 3;
- }
- }
-
- }
主体程序:
- /**
- * @author: lpj
- * @date: 2018年3月16日 下午7:16:47
- * @Description:
- */
- package lpj.reduceWork;
-
- import java.io.IOException;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
- import lpj.reduceWorkbean.MyPatitioner;
- import lpj.reduceWorkbean.Student;
- /**
- *
- */
- public class StudentScore3_2MR2 {
-
- public static void main(String[] args) throws Exception {
- Configuration conf = new Configuration();
- // conf.addResource("hdfs-site.xml");//使用配置文件
- // System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
- FileSystem fs = FileSystem.get(conf);//默认使用本地
-
- Job job = Job.getInstance(conf);
- job.setJarByClass(StudentScore3_2MR2.class);
- job.setMapperClass(StudentScore3MR_Mapper.class);
- job.setReducerClass(StudentScore3MR_Reducer.class);
-
- job.setMapOutputKeyClass(Student.class);
- job.setMapOutputValueClass(NullWritable.class);
- job.setOutputKeyClass(Student.class);
- job.setOutputValueClass(NullWritable.class);
-
- job.setPartitionerClass(MyPatitioner.class);//设置分区器
- job.setNumReduceTasks(4);//设置任务数目
- //
- // String inputpath = args[0];
- // String outpath = args[1];
-
- Path inputPath = new Path("d:/a/homework6.txt");
- Path outputPath = new Path("d:/a/homework6_2");
- if (fs.exists(inputPath)) {
- fs.delete(outputPath, true);
- }
-
- FileInputFormat.setInputPaths(job, inputPath);
- FileOutputFormat.setOutputPath(job, outputPath);
- boolean isdone = job.waitForCompletion(true);
- System.exit(isdone ? 0 : 1);
- }
- //2统计每门课程参考学生的平均分,并且按课程存入不同的结果文件,要求一门课程一个结果文件,并且按平均分从高到低排序,分数保留一位小数
- public static class StudentScore3MR_Mapper extends Mapper<LongWritable, Text, Student, NullWritable>{
- Text kout = new Text();
- Text valueout = new Text();
- Student stu = new Student();
- @Override
- protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
- //algorithm,huangzitao,85,86,41,75,93,42,85,75
- String [] reads = value.toString().trim().split(",");
- String kk = reads[0];
- int sum = 0;
- int count = 0;
- double avg = 0;
- for(int i = 2; i < reads.length; i++){
- sum += Integer.parseInt(reads[i]);
- count++;
- }
- avg = 1.0 * sum / count;
-
- stu.setCourse(kk);
- stu.setName(reads[1]);
- stu.setScore(avg);
-
-
- context.write(stu, NullWritable.get());
- }
- }
- public static class StudentScore3MR_Reducer extends Reducer< Student, NullWritable, Student, NullWritable>{
- Text kout = new Text();
- Text valueout = new Text();
- @Override
- protected void reduce(Student key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
-
- context.write(key, NullWritable.get());
- }
-
- }
-
- }
3、题目涉及排序以及分组,分组使用WritableComparator,进行分组字段设置。其中需要注意的是分组字段与排序字段的关系:分组字段一定是排序字段中的前几个
举例:排序规则:a,b,c,d,e。那么分组规则就只能是以下情况中的任意一种:
a / a,b / a,b,c / a,b,c,d / a,b,c,d,e 不能跳跃
排序字段一定大于等于分组字段,并且包含分组字段
实体类如题2
分组类代码:
- /**
- * @author: lpj
- * @date: 2018年3月16日 下午10:36:55
- * @Description:
- */
- package lpj.reduceWorkbean;
-
- import org.apache.hadoop.io.WritableComparable;
- import org.apache.hadoop.io.WritableComparator;
-
- /**
- *
- */
- public class MyGroup extends WritableComparator{
-
-
- public MyGroup() {
- super(Student.class,true);//创建对象
- }
-
- @Override
- public int compare(WritableComparable a, WritableComparable b) {
-
- Student s1 = (Student)a;
- Student s2 = (Student)b;
-
- return s1.getCourse().compareTo(s2.getCourse());//设置课程分组器
- }
-
-
-
- }
- /**
- * @author: lpj
- * @date: 2018年3月16日 下午7:16:47
- * @Description:
- */
- package lpj.reduceWork;
-
- import java.io.IOException;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.NullWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
- import lpj.reduceWorkbean.MyGroup;
- import lpj.reduceWorkbean.MyPatitioner;
- import lpj.reduceWorkbean.Student;
- /**
- *
- */
- public class StudentScore3_3MR3 {
-
- public static void main(String[] args) throws Exception {
- Configuration conf = new Configuration();
- // conf.addResource("hdfs-site.xml");//使用配置文件
- // System.setProperty("HADOOP_USER_NAME", "hadoop");//使用集群
- FileSystem fs = FileSystem.get(conf);//默认使用本地
-
- Job job = Job.getInstance(conf);
- job.setJarByClass(StudentScore3_3MR3.class);
- job.setMapperClass(StudentScore3MR_Mapper.class);
- job.setReducerClass(StudentScore3MR_Reducer.class);
-
- job.setMapOutputKeyClass(Student.class);
- job.setMapOutputValueClass(NullWritable.class);
- job.setOutputKeyClass(Student.class);
- job.setOutputValueClass(NullWritable.class);
- job.setGroupingComparatorClass(MyGroup.class);//调用分组
-
- Path inputPath = new Path("d:/a/homework6.txt");
- Path outputPath = new Path("d:/a/homework6_3");
- if (fs.exists(inputPath)) {
- fs.delete(outputPath, true);
- }
-
- FileInputFormat.setInputPaths(job, inputPath);
- FileOutputFormat.setOutputPath(job, outputPath);
- boolean isdone = job.waitForCompletion(true);
- System.exit(isdone ? 0 : 1);
- }
- //3求出每门课程参考学生平均分最高的学生的信息:课程,姓名和平均分
- public static class StudentScore3MR_Mapper extends Mapper<LongWritable, Text, Student, NullWritable>{
- Text kout = new Text();
- Text valueout = new Text();
- Student stu = new Student();
- @Override
- protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {
- //algorithm,huangzitao,85,86,41,75,93,42,85,75
- String [] reads = value.toString().trim().split(",");
- String kk = reads[0];
- int sum = 0;
- int count = 0;
- double avg = 0;
- for(int i = 2; i < reads.length; i++){
- sum += Integer.parseInt(reads[i]);
- count++;
- }
- avg = 1.0 * sum / count;
-
- stu.setCourse(kk);
- stu.setName(reads[1]);
- stu.setScore(avg);
- context.write(stu, NullWritable.get());
- }
- }
- public static class StudentScore3MR_Reducer extends Reducer< Student, NullWritable, Student, NullWritable>{
- Text kout = new Text();
- Text valueout = new Text();
- @Override
- protected void reduce(Student key, Iterable<NullWritable> values, Context context)throws IOException, InterruptedException {
- context.write(key, NullWritable.get());
-
- }
-
- }
-
- }
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。