使用Cygwin模拟Linux环境安装配置运行基于单机的(5)

日期：2020-06-07 栏目：程序人生浏览：次

上面是根据给定的输入的数据进行执行的一个过程，其实结果已经输出到output-dir目录中，你可以进行查看，在目录G:\Hadoop-0.16.4\output-dir下面生成了两个文件：.part-00000.crc和part-00000。

查看处理结果，如下所示：

使用Cygwin模拟Linux环境安装配置运行基于单机的

或者，直接到G:\hadoop-0.16.4\output-dir目录下面打开part-00000文件查看即可，内容如下所示：

actor 1
add 2
after 2
apache 1
append 1
as 6
background 1
be 2
believe 1
bench 3
block 1
cafe 2
cat 4
communications 1
connection 1
cust 1

同上面的是一样的。

这是一个非常简单的例子，而且Hadoop在其中实现了Google的MapReduce算法，用以处理数据。

我们可以简单看一下关于WordCount类的实现，在Hadoop的发行包中也附带了例子的源代码，WordCount.java类实现如下所示：

package org.apache.hadoop.examples;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner; /**

/**
* 这是一个Hadoop Map/Reduce应用的例子。
* 读取输入的文件，实现将文件的每一行分解成单个的单词并统计单词的出现频率。
* 输出结果是被分解的单词的列表及其词频。
* 使用如下命令可以运行： bin/hadoop jar build/hadoop-examples.jar wordcount
* [-m maps] [-r reduces] in-dir out-dir
*/
public class WordCount extends Configured implements Tool {

/**
 * MapClass是一个内部静态类。统计数据文件中每一行的单词。
 */
public static class MapClass extends MapReduceBase
 implements Mapper<LongWritable, Text, Text, IntWritable> {

 private final static IntWritable one = new IntWritable(1);
 private Text word = new Text();

 public void map(LongWritable key, Text value,
 OutputCollector<Text, IntWritable> output,
 Reporter reporter) throws IOException {
 String line = value.toString();
 StringTokenizer itr = new StringTokenizer(line);
 while (itr.hasMoreTokens()) {
 word.set(itr.nextToken());
 output.collect(word, one);
 }
 }
}

/**
 * Reduce是一个内部静态类。作为统计单词数量的中间结果类，由于这个例子简单无须执行中间结果的合并。
 */
public static class Reduce extends MapReduceBase
 implements Reducer<Text, IntWritable, Text, IntWritable> {

 public void reduce(Text key, Iterator<IntWritable> values,
 OutputCollector<Text, IntWritable> output,
 Reporter reporter) throws IOException {
 int sum = 0;
 while (values.hasNext()) {
 sum += values.next().get();
 }
 output.collect(key, new IntWritable(sum));
 }
}

static int printUsage() { // 提示输入命令的用法
 System.out.println("wordcount [-m <maps>] [-r <reduces>] <input> <output>");
 ToolRunner.printGenericCommandUsage(System.out);
 return -1;
}

/**
 * map/reduce程序的驱动部分，用于实现提交map/reduce任务。
 */
public int run(String[] args) throws Exception {
 JobConf conf = new JobConf(getConf(), WordCount.class);
 conf.setJobName("wordcount");

 // the keys are words (strings)
 conf.setOutputKeyClass(Text.class);
 // the values are counts (ints)
 conf.setOutputValueClass(IntWritable.class);

 conf.setMapperClass(MapClass.class);
 conf.setCombinerClass(Reduce.class);
 conf.setReducerClass(Reduce.class);

 List<String> other_args = new ArrayList<String>();
 for(int i=0; i < args.length; ++i) {
 try {
 if ("-m".equals(args[i])) {
 conf.setNumMapTasks(Integer.parseInt(args[++i]));
 } else if ("-r".equals(args[i])) {
 conf.setNumReduceTasks(Integer.parseInt(args[++i]));
 } else {
 other_args.add(args[i]);
 }
 } catch (NumberFormatException except) {
 System.out.println("ERROR: Integer expected instead of " + args[i]);
 return printUsage();
 } catch (ArrayIndexOutOfBoundsException except) {
 System.out.println("ERROR: Required parameter missing from " +
 args[i-1]);
 return printUsage();
 }
 }
 // Make sure there are exactly 2 parameters left.
 if (other_args.size() != 2) {
 System.out.println("ERROR: Wrong number of parameters: " +
 other_args.size() + " instead of 2.");
 return printUsage();
 }
 conf.setInputPath(new Path(other_args.get(0)));
 conf.setOutputPath(new Path(other_args.get(1)));

 JobClient.runJob(conf);
 return 0;
}

public static void main(String[] args) throws Exception {
 int res = ToolRunner.run(new Configuration(), new WordCount(), args);
 System.exit(res);
}

}

通过对着这个例子进行简单的说明，大致了解一下MapReduce算法的思想及其实现。

转载注明出处：https://www.heiqu.com/wywxwd.html

使用Cygwin模拟Linux环境安装配置运行基于单机的(5)

相关推荐