赞
踩
1.新建一个maven项目
2.pom文件中引入以下jar包
org.apache.hadoop
hadoop-client
2.7.3
org.apache.hadoop
hadoop-common
2.7.3
org.apache.hadoop
hadoop-hdfs
2.7.3
org.apache.hadoop
hadoop-client
2.7.3
3. 可以装一个hadoop-eclipse插件,不是必须,插件装成功后,可以在资源目录中查看 hadoop集群地址
4.创建mapper类WordCountMapper.java
package com.niwodai.hadoop.mapper;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 1 将maptask传给我们的文本内容先转换成String
String line = value.toString();
// 2 根据空格将这一行切分成单词
String[] words = line.split(" ");
// 3 将单词输出为<单词,1>
for(String word:words){
// 将单词作为key,将次数1作为value,以便于后续的数据分发,可以根据单词分发,以便于相同单词会到相同的reducetask中
context.write(new Text(word), new IntWritable(1));
}
}
}
5.创建WordCountReducer.java 类
package com.niwodai.hadoop.reducer;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException {
int count = 0;
// 1 汇总各个key的个数
for(IntWritable value:values){
count +=value.get();
}
// 2输出该key的总次数
context.write(key, new IntWritable(count));
}
}
6.创建job WordCountJob
/**
*
/
/*
package com.niwodai.hadoop.main;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.niwodai.hadoop.mapper.WordCountMapper;
import com.niwodai.hadoop.reducer.WordCountReducer;
public class WordCountJob{
public static void main(String[] args) throws Exception{
// 1 获取配置信息,或者job对象实例
Configuration configuration = new Configuration();
//8 配置提交到yarn上运行,windows和Linux变量不一致(参考mapred-site.xml 该配置的名字)
//configuration.set(“mapreduce.framework.name”, “yarn”);
//configuration.set(“yarn.resourcemanager.hostname”, “yarn-rm-cluster”);
Job job = Job.getInstance(configuration);
// 6 指定本程序的jar包所在的本地路径
job.setJarByClass(WordCountJob.class);
// 2 指定本业务job要使用的mapper/Reducer业务类
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
// 3 指定mapper输出数据的kv类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 4 指定最终输出的数据的kv类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 5 指定job的输入原始文件所在目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 7 将job中配置的相关参数,以及job所用的java类所在的jar包, 提交给yarn去运行
// job.submit();
boolean result = job.waitForCompletion(true);
System.exit(result?0:1);
}
}
7.将这个maven项目打包成jar放入hadoop集群
执行命令:hadoop jar hadoop-wordcount.jar com.niwodai.hadoop.main.WordCountJob /input /output88
8.查看输出结果 ,执行命令 hdfs dfs -cat /output88/part-r-00000
China… 1
hello 3
wangwei… 1
world… 1
9.本地测试
(1)在windows环境上配置HADOOP_HOME环境变量。
(2)在eclipse上运行程序
(3)注意:如果eclipse打印不出日志,在控制台上只显示
9.其他异常解决:
当出现以下错误时,是nameNode和dataNode 时间没有同步
解决方法:
在没台节点上执行以下两个命令:
1)输入“cp /usr/share/zoneinfo/Asia/Shanghai /etc/localtime”
2)输入“ntpdate pool.ntp.org”
2020-05-07 16:57:19,233 INFO mapreduce.Job: Task Id : attempt_1588835901055_0001_r_000000_1000, Status : FAILED
Container launch failed for container_e09_1588835901055_0001_02_000006 : org.apache.hadoop.yarn.exceptions.YarnException: Unauthorized request to start container.
This token is expired. current time is 1588870485646 found 1588842438408
Note: System times on machines may be out of sync. Check system time and time zones.
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.instantiateExceptionImpl(SerializedExceptionPBImpl.java:171)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.instantiateException(SerializedExceptionPBImpl.java:182)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.deSerialize(SerializedExceptionPBImpl.java:106)
at org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl
C
o
n
t
a
i
n
e
r
.
l
a
u
n
c
h
(
C
o
n
t
a
i
n
e
r
L
a
u
n
c
h
e
r
I
m
p
l
.
j
a
v
a
:
163
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
m
a
p
r
e
d
u
c
e
.
v
2.
a
p
p
.
l
a
u
n
c
h
e
r
.
C
o
n
t
a
i
n
e
r
L
a
u
n
c
h
e
r
I
m
p
l
Container.launch(ContainerLauncherImpl.java:163) at org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl
Container.launch(ContainerLauncherImpl.java:163)atorg.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImplEventProcessor.run(ContainerLauncherImpl.java:394)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2020-05-07 16:57:22,265 INFO mapreduce.Job: Task Id : attempt_1588835901055_0001_r_000000_1001, Status : FAILED
Container launch failed for container_e09_1588835901055_0001_02_000007 : org.apache.hadoop.yarn.exceptions.YarnException: Unauthorized request to start container.
This token is expired. current time is 1588870488602 found 1588842441430
Note: System times on machines may be out of sync. Check system time and time zones.
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.instantiateExceptionImpl(SerializedExceptionPBImpl.java:171)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.instantiateException(SerializedExceptionPBImpl.java:182)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.deSerialize(SerializedExceptionPBImpl.java:106)
at org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl
C
o
n
t
a
i
n
e
r
.
l
a
u
n
c
h
(
C
o
n
t
a
i
n
e
r
L
a
u
n
c
h
e
r
I
m
p
l
.
j
a
v
a
:
163
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
m
a
p
r
e
d
u
c
e
.
v
2.
a
p
p
.
l
a
u
n
c
h
e
r
.
C
o
n
t
a
i
n
e
r
L
a
u
n
c
h
e
r
I
m
p
l
Container.launch(ContainerLauncherImpl.java:163) at org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl
Container.launch(ContainerLauncherImpl.java:163)atorg.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImplEventProcessor.run(ContainerLauncherImpl.java:394)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
2020-05-07 16:57:25,308 INFO mapreduce.Job: Task Id : attempt_1588835901055_0001_r_000000_1002, Status : FAILED
Container launch failed for container_e09_1588835901055_0001_02_000008 : org.apache.hadoop.yarn.exceptions.YarnException: Unauthorized request to start container.
This token is expired. current time is 1588870491638 found 1588842444455
Note: System times on machines may be out of sync. Check system time and time zones.
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.instantiateExceptionImpl(SerializedExceptionPBImpl.java:171)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.instantiateException(SerializedExceptionPBImpl.java:182)
at org.apache.hadoop.yarn.api.records.impl.pb.SerializedExceptionPBImpl.deSerialize(SerializedExceptionPBImpl.java:106)
at org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl
C
o
n
t
a
i
n
e
r
.
l
a
u
n
c
h
(
C
o
n
t
a
i
n
e
r
L
a
u
n
c
h
e
r
I
m
p
l
.
j
a
v
a
:
163
)
a
t
o
r
g
.
a
p
a
c
h
e
.
h
a
d
o
o
p
.
m
a
p
r
e
d
u
c
e
.
v
2.
a
p
p
.
l
a
u
n
c
h
e
r
.
C
o
n
t
a
i
n
e
r
L
a
u
n
c
h
e
r
I
m
p
l
Container.launch(ContainerLauncherImpl.java:163) at org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl
Container.launch(ContainerLauncherImpl.java:163)atorg.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImplEventProcessor.run(ContainerLauncherImpl.java:394)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。