赞
踩
1、下载hadoop
下载链接hadoop 2.10.1。下载后用解压到本地。
2、下载winutils
下载链接winutils,下载完成后解压到本地,然后复制hadoop对应版本或就近版本的文件夹中的hadoop.dll
与winutils.exe
文件到hadoop的bin目录中去。
3、配置环境变量
新建环境变量HADOOP_HOME
,值为hadoop文件夹的位置
添加变量到PATH
4、最好需要重启电脑,让配置及运行文件生效
1、创建一个空的Maven项目
2、因为要使用到hadoop的一些api,所以需要引入依赖包,这里直接放上完整的pom文件,其中相关依赖版本号hadoop.version
变量与你的hadoop版本一致
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>cn.javayuli</groupId> <artifactId>MapReduceTest</artifactId> <version>1.0</version> <properties> <hadoop.version>2.10.1</hadoop.version> </properties> <repositories> <repository> <id>nexus-aliyun</id> <name>nexus-aliyun</name> <url>http://maven.aliyun.com/nexus/content/groups/public/</url> <releases> <enabled>true</enabled> </releases> <snapshots> <enabled>false</enabled> </snapshots> </repository> </repositories> <dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-auth</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>7.3.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>7.3.0</version> </dependency> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-icu</artifactId> <version>7.3.0</version> </dependency> <dependency> <groupId>jfree</groupId> <artifactId>jfreechart</artifactId> <version>1.0.13</version> </dependency> </dependencies> <build> <plugins> <plugin> <artifactId>maven-dependency-plugin</artifactId> <configuration> <excludeTransitive>false</excludeTransitive> <stripVersion>true</stripVersion> <outputDirectory>./lib</outputDirectory> </configuration> </plugin> </plugins> </build> </project>
3、编写一个Map程序
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import java.io.IOException; /** * @author 14516 */ public class WordCountMap extends Mapper<LongWritable, Text, Text, IntWritable> { @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String line = value.toString(); String[] split = line.split(""); for (String s: split) { context.write(new Text(s), new IntWritable(1)); } } }
4、编写一个Reduce函数
import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import java.io.IOException; /** * @author 14516 */ public class WordCountReduce extends Reducer<Text, IntWritable, Text, IntWritable> { @Override protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException { int count = 0; for (IntWritable val: values) { count++; } context.write(key, new IntWritable(count)); } }
5、编写一个入口函数
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.GenericOptionsParser; import org.apache.log4j.BasicConfigurator; import java.io.IOException; /** * @author 14516 */ public class WordCount { public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { // 自动快速地使用缺省Log4j环境 BasicConfigurator.configure(); Configuration configuration = new Configuration(); String[] otherArgs = new GenericOptionsParser(configuration, args).getRemainingArgs(); if (otherArgs.length < 2) { System.err.println("必须输入读取文件路径和输出路径"); System.exit(2); } Job job = Job.getInstance(); job.setJarByClass(WordCount.class); job.setJobName("Word Count"); JobConf jobConfiguration = (JobConf) job.getConfiguration(); // 设置读取文件的路径,都是从HDFS中读取。读取文件路径从脚本文件中传进来 FileInputFormat.addInputPath(jobConfiguration, new Path(args[0])); // 设置mapreduce程序的输出路径,MapReduce的结果都是输入到文件中 FileOutputFormat.setOutputPath(jobConfiguration, new Path(args[1])); // 设置实现了map函数的类 job.setMapperClass(WordCountMap.class); // 设置实现了reduce函数的类 job.setReducerClass(WordCountReduce.class); // 设置reduce函数的key值 job.setOutputKeyClass(Text.class); // 设置reduce函数的value值 job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 :1); } }
6、配置打包
idea中File->Project Structure
选择WordCount
点击Apply,立即应用
7、新建一个运行配置
8、创建input文件夹,并在input文件夹中创建测试文件A.txt
9、运行程序
程序运行后,会自动创建output文件夹,此时part-r-00000中就是执行结果,即每个字符出现的频次。
10、打成jar包
经过上述步骤6之后,可以在Build->Build Artifacts中进行打包
打包后就可以将jar包上传到服务器进行运行。
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。