当前位置:   article > 正文

在linux环境下搭建hadoop_linux搭建mapreduce

linux搭建mapreduce

本人是搭建比较新版本的hadoop,版本号是hadoop-2.6.0.

1.模拟假的集群搭建环境

linux环境上安装ssh,

安装jdk,配置jdk环境变量。

配置hadoop参数

配置mapred-site.xml

  1. <configuration>
  2. <property>
  3. <name>mapred.job.tracker</name>
  4. <value>localhost:9001</value>
  5. </property>
  6. </configuration>

配置core-site.xml

  1. <!-- Put site-specific property overrides in this file. -->
  2. <configuration>
  3. <property>
  4. <name>hadoop.tmp.dir</name>
  5. <value>/usr/local/hadoop-2.6.0/tmp</value>
  6. <description>A base for other temporary directories.</description>
  7. </property>
  8. <!-- file system properties -->
  9. <property>
  10. <name>fs.default.name</name>
  11. <value>hdfs://localhost:9000</value>
  12. </property>
  13. </configuration>

配置完以后基本就算可以了

启动hadoop

进入sbin 执行命令   ./start-all.sh

完成后 用jps指令查看对应的应用是否正常启动。

2.hadoop进阶(使用eclipse在linux环境开发)

贴下下面的代码  简单实现一个map-reduce功能

  1. package mapreduce;
  2. import java.io.IOException;
  3. import java.util.Iterator;
  4. import java.util.StringTokenizer;
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.IntWritable;
  8. import org.apache.hadoop.io.LongWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.Mapper;
  12. import org.apache.hadoop.mapreduce.Reducer;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  15. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  16. import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  17. import org.apache.hadoop.util.Tool;
  18. import org.apache.hadoop.util.ToolRunner;
  19. import org.slf4j.Logger;
  20. import org.slf4j.LoggerFactory;
  21. /***
  22. * 定义一个AvgScore 求学生的平均值 要实现一个Tool 工具类,是为了初始化一个hadoop配置实例
  23. */
  24. public class AvgScore implements Tool{
  25. public static final Logger log=LoggerFactory.getLogger(AvgScore.class);
  26. Configuration configuration;
  27. // 是版本 0.20.2的实现
  28. public static class MyMap extends Mapper<Object, Text, Text, IntWritable>{
  29. @Override
  30. protected void map(Object key, Text value, Context context) throws IOException, InterruptedException {
  31. String stuInfo = value.toString();//将输入的纯文本的数据转换成String
  32. System.out.println("studentInfo:"+stuInfo);
  33. log.info("MapSudentInfo:"+stuInfo);
  34. //将输入的数据先按行进行分割
  35. StringTokenizer tokenizerArticle = new StringTokenizer(stuInfo, "\n");
  36. //分别对每一行进行处理
  37. while(tokenizerArticle.hasMoreTokens()){
  38. // 每行按空格划分
  39. StringTokenizer tokenizer = new StringTokenizer(tokenizerArticle.nextToken());
  40. String name = tokenizer.nextToken();//学生姓名
  41. String score = tokenizer.nextToken();//学生成绩
  42. Text stu = new Text(name);
  43. int intscore = Integer.parseInt(score);
  44. log.info("MapStu:"+stu.toString()+" "+intscore);
  45. context.write(stu,new IntWritable(intscore));//输出学生姓名和成绩
  46. }
  47. }
  48. }
  49. public static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable>{
  50. @Override
  51. protected void reduce(Text key, Iterable<IntWritable> values,Context context)
  52. throws IOException, InterruptedException {
  53. int sum=0;
  54. int count=0;
  55. Iterator<IntWritable> iterator= values.iterator();
  56. while(iterator.hasNext()){
  57. sum+=iterator.next().get();//计算总分
  58. count++;//统计总科目
  59. }
  60. int avg= (int)sum/count;
  61. context.write(key,new IntWritable(avg));//输出学生姓名和平均值
  62. }
  63. }
  64. public int run(String [] args) throws Exception{
  65. Job job = new Job(getConf());
  66. job.setJarByClass(AvgScore.class);
  67. job.setJobName("avgscore");
  68. job.setOutputKeyClass(Text.class);
  69. job.setOutputValueClass(IntWritable.class);
  70. job.setMapperClass(MyMap.class);
  71. job.setCombinerClass(MyReduce.class);
  72. job.setReducerClass(MyReduce.class);
  73. job.setInputFormatClass(TextInputFormat.class);
  74. job.setOutputFormatClass(TextOutputFormat.class);
  75. FileInputFormat.addInputPath(job, new Path(args[0]));//设置输入文件路径
  76. FileOutputFormat.setOutputPath(job, new Path(args[1]));//设置输出文件路径
  77. boolean success= job.waitForCompletion(true);
  78. return success ? 0 : 1;
  79. }
  80. public static void main(String[] args) throws Exception {
  81. //在eclipse 工具上配置输入和输出参数
  82. int ret = ToolRunner.run(new AvgScore(), args);
  83. System.exit(ret);
  84. }
  85. @Override
  86. public Configuration getConf() {
  87. return configuration;
  88. }
  89. @Override
  90. public void setConf(Configuration conf) {
  91. conf = new Configuration();
  92. configuration=conf;
  93. }
  94. }



声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/你好赵伟/article/detail/1002468
推荐阅读
相关标签
  

闽ICP备14008679号