赞
踩
mapreduce 任务的提交方式
1. 打成JAR包,上传Linux,hadoop jar 执行
2. 嵌入某个应用程序:IDE 直接提交
3. local模式,在Windows上本地执行
以下代码,先使用IDEA直接提交,再打包成jar包后上传Linux执行
注:依赖下载的jar包的配置文件pom.xml放在文末
1.驱动程序WordCountDriver.class
package cn.kgc.wordcount; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import javax.security.auth.login.AppConfigurationEntry; import java.io.IOException; public class WordCountDriver { public static void main(String[] args) throws InterruptedException, IOException, ClassNotFoundException { //1.获取配置信息以及封装任务 Configuration conf=new Configuration(); Job job=Job.getInstance(conf); //2.设置jar加载路径 job.setJarByClass(WordCountDriver.class); //3.设置map和reduce类 job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); //4.设置map输出 job.setMapOutputKeyClass(Text.class); job.setMapOutputKeyClass(IntWritable.class); //5.设置最终的输出 job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); //6.设置输入和输出路径 FileInputFormat.setInputPaths(job,new Path("D:\\IDEA_project\\Hadoop_day1201\\data\\hello.txt")); FileOutputFormat.setOutputPath(job,new Path("D:\\IDEA_project\\Hadoop_day1201\\data\\output")); //7.提交任务执行代码 boolean result=job.waitForCompletion(true); System.exit(result?0:1); //o正常退出 //1或者-1 异常退出 } }
2.Map类WordCountMapper.java
package cn.kgc.wordcount; import com.amazonaws.services.dynamodbv2.xspec.L; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * <KEYIN,VALEIN,KEYOUT,VALUEOUT> * 输入的key(0,helloworld * 输入的value helloworld * 输出的key (hello,1) hello * 输出的value 1 */ public class WordCountMapper extends Mapper<LongWritable, Text,Text, IntWritable> { Text k = new Text(); IntWritable v = new IntWritable(1); protected void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException { //1.获取一行内容,并转换成字符串操作 String line = value.toString(); //2.按空格进行切割 String[] words = line.split(" "); // 3.输出(word,1) for (String word : words) { k.set(word); context.write(k,v); } } }
3.Reducer类WordCountReducer.java
package cn.kgc.wordcount; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; public class WordCountReducer extends Reducer<Text, IntWritable,Text, LongWritable>{ int sum; LongWritable v=new LongWritable(); @Override protected void reduce(Text key,Iterable<IntWritable> value,Context context) throws IOException, InterruptedException { //1.对输入的Interator类型进行累加求和 sum=0; for (IntWritable count : value) { sum+=count.get(); } //2.输出(word,总次数) v.set(sum); context.write(key,v); } }
4.运行代码
运行Driver类
步骤如图所示
1.打开hdfs和yarn
cd /opt/install/hadoop
sbin/start-dfs.sh
sbin/start-yarn.sh
2.执行jar包
hadoop jar hadoop_day1202-1.0-SNAPSHOT.jar cn.kgc.wordcount WordCountDriver /wcinput /wcoutput2
3.在文件系统中查看
4.在虚拟机中查看
hdfs dfs -cat /wcoutput2/part-r-00000
配置文件pom.xml
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>cn.kgc</groupId> <artifactId>Hadoop</artifactId> <version>1.0-SNAPSHOT</version> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>RELEASE</version> </dependency> <dependency> <groupId>org.apache.logging.log4j</groupId> <artifactId>log4j-core</artifactId> <version>2.8.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-jobclient</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> <version>2.6.0-cdh5.14.2</version> </dependency> </dependencies> <build> <pluginManagement><!-- lock down plugins versions to avoid using Maven defaults (may be moved to parent pom) --> <plugins> <plugin> <artifactId>maven-assembly-plugin</artifactId> <executions> <execution> <phase>package</phase> <goals> <goal>single</goal> </goals> </execution> </executions> <configuration> <descriptorRefs> <descriptorRef>jar-with-dependencies</descriptorRef> </descriptorRefs> </configuration> </plugin> </plugins> </pluginManagement> </build> <repositories> <repository> <id>cloudera</id> <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url> </repository> </repositories> </project>
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。