赞
踩
通俗的讲,就是根据单词找到包含这个单词的所有文档。
1、首先要确定map、reduce、combiner中的key和value是什么类型
2、然后确定key和value具体是什么?
Map : key为 单词+文件名 value为空
combiner : key为单词 value为次数+文件名
reduce: key为单词 value为相同单词的“次数+文件名”拼接而成
a.txt | i love beijing and love china |
---|---|
b.txt | i love beijing and not like New York |
c.txt | i dot like anycity |
d.txt | you like where |
e.txt | love familiy and love china |
package com.qyl.master; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.io.IOException; public class MyMapReduce { public static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{ private Text okey=new Text(); private Text ovalue=new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String filename = ((FileSplit) context.getInputSplit()).getPath().getName(); String[] strs = value.toString().split(" "); for(String s:strs){ okey.set(s+"-"+filename); context.write(okey,ovalue); } } } public static class MyCombiner extends Reducer<Text,Text,Text,Text>{ private Text okey=new Text(); private Text ovalue=new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { int count=0; for(Text text:values){ count++; } String strs[]=key.toString().split("-"); okey.set(strs[0]); ovalue.set(strs[1]+"="+count); context.write(okey,ovalue); } } public static class MyReduce extends Reducer<Text,Text,Text,Text>{ private Text ovalue=new Text(); @Override protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuilder sb=new StringBuilder(); for(Text text:values){ sb.append(text.toString()).append(","); } sb.delete(sb.length()-1,sb.length()); ovalue.set(sb.toString()); context.write(key,ovalue); } } public static void main(String[] args) { Configuration conf=new Configuration(); try { Job job=Job.getInstance(conf); job.setJarByClass(MyMapReduce.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReduce.class); job.setCombinerClass(MyCombiner.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); Path inPath =new Path("C:\\data"); FileInputFormat.addInputPath(job, inPath); Path outpath=new Path("C:\\data\\result"); if(outpath.getFileSystem(conf).exists(outpath)){ outpath.getFileSystem(conf).delete(outpath, true); } FileOutputFormat.setOutputPath(job, outpath); job.waitForCompletion(true); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }
New b.txt=1
York b.txt=1
and b.txt=1,e.txt=1,a.txt=1
anycity c.txt=1
beijing b.txt=1,a.txt=1
china a.txt=1,e.txt=1
dot c.txt=1
familiy e.txt=1
i c.txt=1,a.txt=1,b.txt=1
like b.txt=1,c.txt=1,d.txt=1
love e.txt=2,b.txt=1,a.txt=2
not b.txt=1
where d.txt=1
you d.txt=1
Copyright © 2003-2013 www.wpsshop.cn 版权所有,并保留所有权利。