Ubuntu下Eclipse开发Hadoop应用程序环境配置(4)

第七步:新建一个WordCount.java,这里使用系统自带的TokenCountMapper和LongSumReducer,代码如下:

package com.baison.action;
import org.apache.Hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.lib.TokenCountMapper;
import org.apache.hadoop.mapred.lib.LongSumReducer;
public class WordCount {
 public static void main(String[] args) {
  JobClient client = new JobClient();
  JobConf conf = new JobConf(WordCount.class);
  String[] arg = { "hdfs://localhost:9100/user/tanglg1987/input",
    "hdfs://localhost:9100/user/tanglg1987/output" };
  FileInputFormat.addInputPath(conf, new Path(arg[0]));
  FileOutputFormat.setOutputPath(conf, new Path(arg[1]));
  conf.setOutputKeyClass(Text.class);
  conf.setOutputValueClass(LongWritable.class);
  conf.setMapperClass(TokenCountMapper.class);
  conf.setCombinerClass(LongSumReducer.class);
  conf.setReducerClass(LongSumReducer.class);
  client.setConf(conf);
  try {
   JobClient.runJob(conf);
  } catch (Exception e) {
   e.printStackTrace();
  }
 }
}

linux

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:http://www.heiqu.com/185524c111fd630c625295632b957a4e.html