赞
踩
目录
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-client</artifactId>
- <version>2.7.3</version>
- </dependency>
- <?xml version="1.0" encoding="UTF-8"?>
- <?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
- <configuration xmlns:xi="http://www.w3.org/2001/XInclude">
- <!--
- <property>
- <name>fs.defaultFS</name>
- <value>hdfs://localhost:8020</value>
- <descript>配置HDFS环境,不配置则默认使用Windows系统下的磁盘</descript>
- </property>
- -->
- <property>
- <name>fs.defaultFS</name>
- <value>file://34455/</value>
- <descript>使用Windows系统下的磁盘</descript>
- </property>
-
- </configuration>

- hadoop.root.logger=INFO,console
- hadoop.log.dir=.
- hadoop.log.file=hadoop.log
-
- log4j.threshold=ALL
-
- log4j.appender.NullAppender=org.apache.log4j.varia.NullAppender
-
- hadoop.log.maxfilesize=256MB
- hadoop.log.maxbackupindex=20
- log4j.appender.RFA=org.apache.log4j.RollingFileAppender
- log4j.appender.RFA.File=${hadoop.log.dir}/${hadoop.log.file}
-
- log4j.appender.RFA.MaxFileSize=${hadoop.log.maxfilesize}
- log4j.appender.RFA.MaxBackupIndex=${hadoop.log.maxbackupindex}
-
- log4j.appender.RFA.layout=org.apache.log4j.PatternLayout
-
- log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
- log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
-
- log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
-
- log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
-
- log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
- log4j.logger.org.apache.hadoop.conf.Configuration=ERROR
-
- log4j.appender.console=org.apache.log4j.ConsoleAppender
- log4j.appender.console.target=System.err
- log4j.appender.console.layout=org.apache.log4j.PatternLayout
- log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
-
- #Default values
- hadoop.tasklog.taskid=null
- hadoop.tasklog.iscleanup=false
- hadoop.tasklog.noKeepSplits=4
- hadoop.tasklog.totalLogFileSize=100
- hadoop.tasklog.purgeLogSplits=true
- hadoop.tasklog.logsRetainHours=12
-
- log4j.appender.TLA=org.apache.hadoop.mapred.TaskLogAppender
- log4j.appender.TLA.taskId=${hadoop.tasklog.taskid}
- log4j.appender.TLA.isCleanup=${hadoop.tasklog.iscleanup}
- log4j.appender.TLA.totalLogFileSize=${hadoop.tasklog.totalLogFileSize}
-
- log4j.appender.TLA.layout=org.apache.log4j.PatternLayout
- log4j.appender.TLA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
-
- hadoop.security.logger=INFO,NullAppender
- hadoop.security.log.maxfilesize=256MB
- hadoop.security.log.maxbackupindex=20
- log4j.category.SecurityLogger=${hadoop.security.logger}
- hadoop.security.log.file=SecurityAuth-${user.name}.audit
- log4j.appender.RFAS=org.apache.log4j.RollingFileAppender
- log4j.appender.RFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
- log4j.appender.RFAS.layout=org.apache.log4j.PatternLayout
- log4j.appender.RFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
- log4j.appender.RFAS.MaxFileSize=${hadoop.security.log.maxfilesize}
- log4j.appender.RFAS.MaxBackupIndex=${hadoop.security.log.maxbackupindex}
-
- log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
- log4j.appender.DRFAS.File=${hadoop.log.dir}/${hadoop.security.log.file}
- log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
- log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
- log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
-
- hdfs.audit.logger=INFO,NullAppender
- hdfs.audit.log.maxfilesize=256MB
- hdfs.audit.log.maxbackupindex=20
- log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
- log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
- log4j.appender.RFAAUDIT=org.apache.log4j.RollingFileAppender
- log4j.appender.RFAAUDIT.File=${hadoop.log.dir}/hdfs-audit.log
- log4j.appender.RFAAUDIT.layout=org.apache.log4j.PatternLayout
- log4j.appender.RFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
- log4j.appender.RFAAUDIT.MaxFileSize=${hdfs.audit.log.maxfilesize}
- log4j.appender.RFAAUDIT.MaxBackupIndex=${hdfs.audit.log.maxbackupindex}
-
- mapred.audit.logger=INFO,NullAppender
- mapred.audit.log.maxfilesize=256MB
- mapred.audit.log.maxbackupindex=20
- log4j.logger.org.apache.hadoop.mapred.AuditLogger=${mapred.audit.logger}
- log4j.additivity.org.apache.hadoop.mapred.AuditLogger=false
- log4j.appender.MRAUDIT=org.apache.log4j.RollingFileAppender
- log4j.appender.MRAUDIT.File=${hadoop.log.dir}/mapred-audit.log
- log4j.appender.MRAUDIT.layout=org.apache.log4j.PatternLayout
- log4j.appender.MRAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
- log4j.appender.MRAUDIT.MaxFileSize=${mapred.audit.log.maxfilesize}
- log4j.appender.MRAUDIT.MaxBackupIndex=${mapred.audit.log.maxbackupindex}
-
- hadoop.mapreduce.jobsummary.logger=${hadoop.root.logger}
- hadoop.mapreduce.jobsummary.log.file=hadoop-mapreduce.jobsummary.log
- hadoop.mapreduce.jobsummary.log.maxfilesize=256MB
- hadoop.mapreduce.jobsummary.log.maxbackupindex=20
- log4j.appender.JSA=org.apache.log4j.RollingFileAppender
- log4j.appender.JSA.File=${hadoop.log.dir}/${hadoop.mapreduce.jobsummary.log.file}
- log4j.appender.JSA.MaxFileSize=${hadoop.mapreduce.jobsummary.log.maxfilesize}
- log4j.appender.JSA.MaxBackupIndex=${hadoop.mapreduce.jobsummary.log.maxbackupindex}
- log4j.appender.JSA.layout=org.apache.log4j.PatternLayout
- log4j.appender.JSA.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
- log4j.logger.org.apache.hadoop.mapred.JobInProgress$JobSummary=${hadoop.mapreduce.jobsummary.logger}
- log4j.additivity.org.apache.hadoop.mapred.JobInProgress$JobSummary=false
-
- log4j.logger.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=${yarn.server.resourcemanager.appsummary.logger}
- log4j.additivity.org.apache.hadoop.yarn.server.resourcemanager.RMAppManager$ApplicationSummary=false
- log4j.appender.RMSUMMARY=org.apache.log4j.RollingFileAppender
- log4j.appender.RMSUMMARY.File=${hadoop.log.dir}/${yarn.server.resourcemanager.appsummary.log.file}
- log4j.appender.RMSUMMARY.MaxFileSize=256MB
- log4j.appender.RMSUMMARY.MaxBackupIndex=20
- log4j.appender.RMSUMMARY.layout=org.apache.log4j.PatternLayout
- log4j.appender.RMSUMMARY.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
-

- package com.gxwz.mapreduce;
-
- import java.io.IOException;
- import java.util.Arrays;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.HashMap;
- import java.util.LinkedList;
- import java.util.List;
- import java.util.Map;
- import java.util.Map.Entry;
-
- import org.apache.hadoop.conf.Configuration;
- import org.apache.hadoop.conf.Configured;
- import org.apache.hadoop.fs.FileSystem;
- import org.apache.hadoop.fs.Path;
- import org.apache.hadoop.io.IntWritable;
- import org.apache.hadoop.io.LongWritable;
- import org.apache.hadoop.io.Text;
- import org.apache.hadoop.mapreduce.Job;
- import org.apache.hadoop.mapreduce.Mapper;
- import org.apache.hadoop.mapreduce.Reducer;
- import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
- import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
- import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
- import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
- import org.apache.hadoop.util.Tool;
- import org.apache.hadoop.util.ToolRunner;
-
- /**
- * TODO MapReduce读取文本,实现降序排序
- * @author com
- * @Date 2019年9月28日 Configured
- */
- public class Top5 extends Configured implements Tool {
-
- public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
-
- Text outkey = new Text();
- IntWritable outval = new IntWritable(1);
- String [] line = null;
- @Override
- protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)
- throws IOException, InterruptedException {
- line = value.toString().split("\t");
- if(null != line && line.length > 0 && Arrays.toString(line).length()>2) {
- for (String s : line) {
- outkey.set(s);
- context.write(outkey, outval);
- }
- }
- }
- }
-
- public static class MyReduce extends Reducer<Text, IntWritable, Text, LongWritable> {
-
- Text outkey = new Text();
- LongWritable outval = new LongWritable();
- Integer sum = new Integer(0); //非new生成的Long变量指向的是java常量池中的对象,而new Long()生成的变量指向堆中新建的对象,两者在内存中的地址不同
- Map<String, Long> map = new HashMap<String, Long>();
- @Override
- protected void reduce(Text key, Iterable<IntWritable> values,
- Reducer<Text, IntWritable, Text, LongWritable>.Context context) throws IOException, InterruptedException {
- sum = 0;
- for (IntWritable value : values) {
- sum += value.get();
- }
- map.put(key.toString(), (long)sum);
- }
-
- @Override
- protected void cleanup(Reducer<Text, IntWritable, Text, LongWritable>.Context context)
- throws IOException, InterruptedException {
- List<Map.Entry<String, Long>> list = new LinkedList<Map.Entry<String,Long>>(map.entrySet());
- Collections.sort(list, new Comparator<Map.Entry<String,Long>>() {
- @Override
- public int compare(Entry<String, Long> o1, Entry<String, Long> o2) {
- return (int) (o2.getValue() - o1.getValue());
- }
- });
- for (Entry<String, Long> entry : list) {
- System.out.println(entry.getKey()+":"+entry.getValue());
- outkey.set(entry.getKey());
- outval.set(entry.getValue());
- context.write(outkey, outval);
- }
- }
- }
-
- @Override
- public int run(String[] args) throws Exception {
-
- // 1、配置文件获取
- Configuration conf = this.getConf();
- // 2、获取文件目录
- FileSystem fs = FileSystem.get(conf);
- // 3、定义 job的输入输出路径
- Path inpath = new Path(args[0]);
- Path outpath = new Path(args[1]);
- // 4、判断输出文件是否为空
- if(fs.exists(outpath)) {
- fs.delete(outpath, true);
- System.out.println("The old path has been deleted!");
- }
- // 5、获取一个job的实例
- Job job = Job.getInstance();
- // 6、设置MapReduce的打包类
- job.setJarByClass(Top5.class);
- // 7、设置Mapper类和Reducer类
- job.setMapperClass(MyMapper.class);
- job.setReducerClass(MyReduce.class);
- // 8、设置MR的输入输出格式
- job.setInputFormatClass(TextInputFormat.class);
- job.setOutputFormatClass(TextOutputFormat.class);
- // 9、因为Mapper的输出和Reducer的输出类型不一样,所有还需设置Mapper类的输出类
- job.setMapOutputKeyClass(Text.class);
- job.setMapOutputValueClass(IntWritable.class);
- // 10、设置job的输入输出路径
- FileInputFormat.addInputPath(job, inpath);
- FileOutputFormat.setOutputPath(job, outpath);
- // 11、提交job任务
- int result = job.waitForCompletion(true) ? 0 : 1;
- return result;
- }
-
- //C:\Users\com\Desktop\mr\top10\ C:\Users\com\Desktop\mr\top10\output\
- public static void main(String[] args) {
- String [] path = new String[2];
- path[0] = "C:\\Users\\com\\Desktop\\mr\\top10"; //输入路径
- path[1] = "C:\\Users\\com\\Desktop\\mr\\top10\\output"; //输出路径
- try {
- int result = ToolRunner.run(new Top5(), path);
- String msg = result==0?"job finish!":"job fail!";
- System.out.println(msg);
- System.exit(result);
- } catch (Exception e) {
- e.printStackTrace();
- }
- }
-
- }
-

小明 小绿 小黑 小红 小红 小白 小蓝 小蓝 小蓝 小黑 小白 小黑 小红 小红 小黄 小黑 小白 小绿 小红 小蓝 小蓝 小红 小红 小黄 小绿 小蓝 小蓝 小黑 小白 小蓝
- 小蓝 8
- 小红 7
- 小黑 5
- 小白 4
- 小绿 3
- 小黄 2
- 小明 1
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。