Hadoop：mapreduce程序reduce输出控制(3)

日期：2021-04-06 栏目：程序人生浏览：次

2, 如果同一行数据，需要同时输出至多个文件的话，我们可以使用MultipleOutputs类：

public class MultiFile extends Confi gured implements Tool { public static class MapClass extends MapReduceBase implements Mapper<LongWritable, Text, NullWritable, Text> { private MultipleOutputs mos; private OutputCollector<NullWritable, Text> collector; public void confi gure(JobConf conf) { mos = new MultipleOutputs(conf); } public void map(LongWritable key, Text value, OutputCollector<NullWritable, Text> output, Reporter reporter) throws IOException { String[] arr = value.toString().split(",", -1); String chrono = arr[0] + "," + arr[1] + "," + arr[2]; String geo = arr[0] + "," + arr[4] + "," + arr[5]; collector = mos.getCollector("chrono", reporter); collector.collect(NullWritable.get(), new Text(chrono)); collector = mos.getCollector("geo", reporter); collector.collect(NullWritable.get(), new Text(geo)); } public void close() throws IOException { mos.close(); } } public int run(String[] args) throws Exception { Confi guration conf = getConf(); JobConf job = new JobConf(conf, MultiFile.class); Path in = new Path(args[0]); Path out = new Path(args[1]); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("MultiFile"); job.setMapperClass(MapClass.class); job.setInputFormat(TextInputFormat.class); job.setOutputKeyClass(NullWritable.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(0); MultipleOutputs.addNamedOutput(job, "chrono", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(job, "geo", TextOutputFormat.class, NullWritable.class, Text.class); JobClient.runJob(job); return 0; } }

转载注明出处：http://www.heiqu.com/560e462818f4132993e4df7b47a2ec31.html

Hadoop：mapreduce程序reduce输出控制(3)

相关推荐