MapReduce--平均分,最高,低分以及及格率的计算
计算班级的平均分,以及个人的最高最低分,以及每个班级的及格率。
来先看一下我的数据。
- 时间 班级 姓名 科目 成绩
- 20180501 1708a1 li bishi 80
- 20180501 1708a1 li jishi 55
- 20180501 1708a1 li project 90
- 20180501 1708a1 li2 bishi 80
- 20180501 1708a1 li2 jishi 20
- 20180501 1708a1 li2 project 90
- 20180501 1708a1 li3 bishi 50
- 20180501 1708a1 li3 jishi 70
- 20180501 1708a1 li3 project 60
- 20180501 1708a1 zhangsan bishi 88
- 20180501 1708a1 zhangsan jishi 55
- 20180501 1708a1 zhangsan project 98
- 20180501 1708a1 lishi bishi 18
- 20180501 1708a1 lishi jishi 15
- 20180501 1708a1 lishi project 15
- 20180501 1708a1 wangwu bishi 88
- 20180501 1708a1 wangwu jishi 76
- 20180501 1708a1 wangwu project 70
- 20180501 1708a2 li1 bishi 80
- 20180501 1708a2 li1 jishi 71
- 20180501 1708a2 li1 project 96
- 20180501 1708a2 li2 bishi 80
- 20180501 1708a2 li2 jishi 26
- 20180501 1708a2 li2 project 90
- 20180501 1708a2 li3 bishi 80
- 20180501 1708a2 li3 jishi 55
- 20180501 1708a2 li3 project 90
- 20180501 1708a2 zhangliang bishi 81
- 20180501 1708a2 zhangliang jishi 55
- 20180501 1708a2 zhangliang project 98
- 20180501 1708a2 liuli bishi 70
- 20180501 1708a2 liuli jishi 95
- 20180501 1708a2 liuli project 75
- 20180501 1708a2 wangwu bishi 80
- 20180501 1708a2 wangwu jishi 76
- 20180501 1708a2 wangwu project 70
- 20180501 1708a2 zhangxi bishi 18
- 20180501 1708a2 zhangxi jishi 16
- 20180501 1708a2 zhangxi project 10
数据之间是空格。。。。
代码来了 -- 平均分,最高分,最低分
- package com.huhu.day01;
-
- import java.io.IOException;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
- /**
- * 切割文本: 平均分,最高低分
- *
- * @author huhu_k
- *
- */
- public class HomeWork2 {
-
- // map
- public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
- Text keys = new Text();
- Text values = new Text();
-
- @Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- // 数据切割方式(文本中的内容)
- // 按行分
- String[] line = value.toString().split(" ");
- keys.set(line[0] + ":" + line[2]);
- values.set(line[3] + ":" + line[4]);
- context.write(keys, values);
- }
- }
-
- // reduce
- public static class MyReducer extends Reducer<Text, Text, Text, Text> {
-
- @Override
- protected void reduce(Text key, Iterable<Text> value, Context context)
- throws IOException, InterruptedException {
- int max = Integer.MIN_VALUE;
- int min = Integer.MAX_VALUE;
- // 和
- int sum = 0;
- // 人数
- int count = 0;
- // 分数
- int score = 0;
- String classs = "";
- for (Text t : value) {
- classs = t.toString().split(":")[0];
- score = Integer.parseInt(t.toString().split(":")[1]);
- if (max < score)
- max = score;
- if (min > score)
- min = score;
- switch (classs) {
- case "bishi":
- score += score * 0.4;
- break;
- case "jishi":
- score += score * 0.3;
- break;
- case "project":
- score += score * 0.3;
- break;
- }
- sum += score;
- count++;
- }
- int avg = (int) sum / count;
- String[] student = key.toString().split(":");
- Text ky = new Text(student[0] + "\t" + student[1]);
- context.write(ky, new Text("平均分 " + avg));
- context.write(ky, new Text("最高值为 " + max));
- context.write(ky, new Text("最低值 " + min));
- }
-
- }
-
- public static void main(String[] args) throws Exception {
-
- // 配置容器
- Configuration conf = new Configuration();
- // 创建一个job
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "MyMapReduce Two");
- // 配置job
- job.setJarByClass(HomeWork2.class);
- job.setMapperClass(MyMapper.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setReducerClass(MyReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
-
- // 输入输出
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
-
- // 执行程序
- boolean waitForCompletion = job.waitForCompletion(true);
- System.exit(waitForCompletion ? 0 : 1);
-
- }
-
- }
运行结果:
2.及格率
- package com.huhu.day01;
-
- import java.io.IOException;
- import java.text.DecimalFormat;
- import java.util.HashMap;
- import java.util.Map;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
-
- /**
- * 切割文本:及格率
- *
- * @author huhu_k
- *
- */
- public class HomeWork3 {
-
- // map
- public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
- Text keys = new Text();
- Text values = new Text();
-
- @Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- // 数据切割方式(文本中的内容)
- // 按行分
- String[] line = value.toString().split(" ");
- keys.set(line[0] + ":" + line[1]);
- context.write(keys, value);
- }
- }
-
- // reduce
- public static class MyReducer extends Reducer<Text, Text, Text, Text> {
- Map<String, Double> map = new HashMap<>();
- Map<String, String> maps = new HashMap<>();
-
- @Override
- protected void reduce(Text key, Iterable<Text> value, Context context)
- throws IOException, InterruptedException {
- for (Text t : value) {
- String[] values = t.toString().split(" ");
- String student = values[2] + ":" + values[0] + ":" + values[1];
- String subject = values[3];
- double score = Integer.valueOf(values[4]);
- if ("bishi".equals(subject)) {
- score *= 0.4;
- } else {
- score *= 0.3;
- }
- // 如果map中有学生,累加学生的没门课程的分数
- if (map.containsKey(student)) {
- double scores = map.get(student);
- scores += score;
- map.put(student, scores);
- } else {
- // 第一次进入时不包含,则直接添加
- map.put(student, score);
- }
- }
-
- for (Map.Entry<String, Double> m : map.entrySet()) {
- String classname = m.getKey().split(":")[2];
- Double score = m.getValue();
- if (maps.containsKey(classname) && score >= 60) {
- String k = Integer.parseInt(maps.get(classname).split(":")[0]) + 1 + "";
- String v = Integer.parseInt(maps.get(classname).split(":")[1]) + 1 + "";
- maps.put(classname, k + ":" + v);
- } else if (maps.containsKey(classname) && score < 60) {
- String k = Integer.parseInt(maps.get(classname).split(":")[0]) + 1 + "";
- String v = Integer.parseInt(maps.get(classname).split(":")[1]) + "";
- maps.put(classname, k + ":" + v);
- } else if (!maps.containsKey(classname) && score < 60) {
- maps.put(classname, "1:0");
- } else if (!maps.containsKey(classname) && score >= 60) {
- maps.put(classname, "1:1");
- }
- }
-
- }
-
- @Override
- protected void cleanup(Reducer<Text, Text, Text, Text>.Context context)
- throws IOException, InterruptedException {
- for (Map.Entry<String, String> m : maps.entrySet()) {
- DecimalFormat d = new DecimalFormat("0.00%");
- double pass = Double.valueOf(m.getValue().split(":")[1]) / Double.valueOf(m.getValue().split(":")[0]);
- context.write(new Text(m.getKey()), new Text("及格率为:" + d.format(pass)));
- }
- }
- }
-
- public static void main(String[] args) throws Exception {
-
- // 配置容器
- Configuration conf = new Configuration();
- // 创建一个job
- @SuppressWarnings("deprecation")
- Job job = new Job(conf, "MyMapReduce Count");
- // 配置job
- job.setJarByClass(HomeWork3.class);
- job.setMapperClass(MyMapper.class);
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(Text.class);
-
- job.setReducerClass(MyReducer.class);
- job.setOutputKeyClass(Text.class);
- job.setOutputValueClass(Text.class);
-
- // 输入输出
- FileInputFormat.addInputPath(job, new Path(args[0]));
- FileOutputFormat.setOutputPath(job, new Path(args[1]));
-
- // 执行程序
- boolean waitForCompletion = job.waitForCompletion(true);
- System.exit(waitForCompletion ? 0 : 1);
-
- }
-
- }
MapReduce一个分布式并行离线计算框架。我们只需要知道map(),reduce(),input,output,剩下的由框架完成
基于yarn的工作流程