MapReduce原理深入理解(二)

1.Mapreduce操作不需要reduce阶段

 

1 import org.apache.hadoop.conf.Configuration; 2 import org.apache.hadoop.fs.FileSystem; 3 import org.apache.hadoop.fs.Path; 4 import org.apache.hadoop.io.LongWritable; 5 import org.apache.hadoop.io.NullWritable; 6 import org.apache.hadoop.io.Text; 7 import org.apache.hadoop.mapreduce.Job; 8 import org.apache.hadoop.mapreduce.Mapper; 9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 11 12 import java.io.IOException; 13 14 public class WordCount03 { 15 public static class MyMapper extends Mapper<LongWritable, Text,Text, NullWritable>{ 16 @Override 17 protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 18 String line = value.toString(); 19 String s = line.split(",")[3]; 20 if(s.equals("男")){ 21 context.write(new Text(s),NullWritable.get()); 22 } 23 } 24 } 25 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { 26 Job job= Job.getInstance(); 27 job.setNumReduceTasks(0); 28 /** 29 * 有些情况下,不需要reduce(聚合程序), 30 * 在不需要聚合操作的时候,可以不需要reduce 31 * 而reduce默认为1,需要手动设置为0, 32 * 如果没有设置为0,会产生默认的reduce,只不过reduce不处理任何数据 33 */ 34 job.setJobName("mr03程序"); 35 job.setJarByClass(WordCount03.class); 36 job.setMapOutputKeyClass(Text.class); 37 job.setMapOutputValueClass(NullWritable.class); 38 Path in = new Path("/word"); 39 FileInputFormat.addInputPath(job,in); 40 Path out = new Path("/output"); 41 FileSystem fs = FileSystem.get(new Configuration()); 42 if(fs.exists(out)){ 43 fs.delete(out); 44 } 45 FileOutputFormat.setOutputPath(job,out); 46 job.waitForCompletion(true); 47 } 48 }

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/zwjxsw.html