通过单机的Hadoop伪分布式运行理解Hadoop运行过程(4)

任务开始启动,到执行完成以后,会导出一些便于查看的文件,比如:任务配置和任务执行情况,通过读这些日志文件,能加深对Hadoop的认识。

在logs\history目录下可以看到,当前执行完成的任务的详细配置信息和任务执行情况信息。

其中,job_200809211811_0001_conf.xml文件,是此次任务执行,包括Hadoop配置信息在内的任务配置信息:

<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>
<property><name>dfs.replication.interval</name><value>3</value></property>
<property><name>mapred.mapper.class</name><value>org.apache.hadoop.examples.WordCount$MapClass</value></property>
<property><name>ipc.client.maxidletime</name><value>120000</value></property>
<property><name>mapred.input.dir</name><value>hdfs://localhost:9000/user/SHIYANJUN/input</value></property>
<property><name>mapred.submit.replication</name><value>10</value></property>
<property><name>dfs.safemode.extension</name><value>30000</value></property>
<property><name>mapred.working.dir</name><value>hdfs://localhost:9000/user/SHIYANJUN</value></property>
<property><name>ipc.server.tcpnodelay</name><value>false</value></property>
<property><name>dfs.web.ugi</name><value>webuser,webgroup</value></property>
<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>
<property><name>mapred.job.split.file</name><value>/tmp/hadoop-SHIYANJUN/mapred/system/job_200809211811_0001/job.split</value></property>
<property><name>dfs.permissions.supergroup</name><value>supergroup</value></property>
<property><name>dfs.datanode.http.address</name><value>0.0.0.0:50075</value></property>
<property><name>mapred.output.compression.type</name><value>RECORD</value></property>
<property><name>dfs.replication.min</name><value>1</value></property>
<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>
<property><name>dfs.datanode.dns.nameserver</name><value>default</value></property>
<property><name>keep.failed.task.files</name><value>false</value></property>
<property><name>dfs.http.address</name><value>0.0.0.0:50070</value></property>
<property><name>mapred.output.value.class</name><value>org.apache.hadoop.io.IntWritable</value></property>
<property><name>io.bytes.per.checksum</name><value>512</value></property>
<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0</value></property>
<property><name>dfs.blockreport.intervalMsec</name><value>3600000</value></property>
<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.dfs.DistributedFileSystem</value></property>
<property><name>dfs.data.dir</name><value>${hadoop.tmp.dir}/dfs/data</value></property>
<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>
<property><name>dfs.block.size</name><value>67108864</value></property>
<property><name>fs.hftp.impl</name><value>org.apache.hadoop.dfs.HftpFileSystem</value></property>
<property><name>fs.checkpoint.period</name><value>3600</value></property>
<property><name>user.name</name><value>SHIYANJUN</value></property>
<property><name>mapred.child.tmp</name><value>./tmp</value></property>
<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>
<property><name>map.sort.class</name><value>org.apache.hadoop.mapred.MergeSorter</value></property>
<property><name>hadoop.logfile.count</name><value>10</value></property>
<property><name>ipc.client.connection.maxidletime</name><value>1000</value></property>
<property><name>mapred.output.dir</name><value>hdfs://localhost:9000/user/SHIYANJUN/output</value></property>
<property><name>io.map.index.skip</name><value>0</value></property>
<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>
<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
<property><name>mapred.output.compress</name><value>false</value></property>
<property><name>dfs.datanode.dns.interface</name><value>default</value></property>
<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
<property><name>fs.checkpoint.size</name><value>67108864</value></property>
<property><name>mapred.job.name</name><value>wordcount</value></property>
<property><name>dfs.max.objects</name><value>0</value></property>
<property><name>local.cache.size</name><value>10737418240</value></property>
<property><name>mapred.userlog.limit.kb</name><value>0</value></property>
<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>
<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060</value></property>
<property><name>mapred.task.timeout</name><value>600000</value></property>
<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>
<property><name>mapred.reducer.class</name><value>org.apache.hadoop.examples.WordCount$Reduce</value></property>
<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>
<property><name>hadoop.job.ugi</name><value>SHIYANJUN,None,root,Administrators,Users</value></property>
<property><name>ipc.client.kill.max</name><value>10</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>
<property><name>dfs.client.block.write.retries</name><value>3</value></property>
<property><name>dfs.datanode.du.reserved</name><value>0</value></property>
<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>
<property><name>dfs.replication.max</name><value>512</value></property>
<property><name>dfs.balance.bandwidthPerSec</name><value>1048576</value></property>
<property><name>mapred.max.tracker.failures</name><value>4</value></property>
<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>
<property><name>mapred.map.tasks</name><value>7</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>
<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>
<property><name>fs.default.name</name><value>localhost:9000</value></property>
<property><name>mapred.output.key.class</name><value>org.apache.hadoop.io.Text</value></property>
<property><name>ipc.client.timeout</name><value>60000</value></property>
<property><name>tasktracker.http.threads</name><value>40</value></property>
<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>
<property><name>mapred.reduce.tasks</name><value>1</value></property>
<property><name>dfs.datanode.address</name><value>0.0.0.0:50010</value></property>
<property><name>dfs.heartbeat.interval</name><value>3</value></property>
<property><name>dfs.replication.considerLoad</name><value>true</value></property>
<property><name>dfs.default.chunk.view.size</name><value>32768</value></property>
<property><name>io.file.buffer.size</name><value>4096</value></property>
<property><name>dfs.client.buffer.dir</name><value>${hadoop.tmp.dir}/dfs/tmp</value></property>
<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
<property><name>mapred.task.profile.maps</name><value>0-2</value></property>
<property><name>hadoop.native.lib</name><value>true</value></property>
<property><name>fs.s3.block.size</name><value>67108864</value></property>
<property><name>dfs.replication</name><value>1</value></property>
<property><name>mapred.jar</name><value>/tmp/hadoop-SHIYANJUN/mapred/local/jobTracker/job_200809211811_0001.jar</value></property>
<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030</value></property>
<property><name>fs.inmemory.size.mb</name><value>75</value></property>
<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>
<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec</value></property>
<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
<property><name>dfs.namenode.decommission.interval</name><value>300</value></property>
<property><name>mapred.job.tracker</name><value>localhost:9001</value></property>
<property><name>io.skip.checksum.errors</name><value>false</value></property>
<property><name>mapred.combiner.class</name><value>org.apache.hadoop.examples.WordCount$Reduce</value></property>
<property><name>ipc.server.listen.queue.size</name><value>128</value></property>
<property><name>mapred.reduce.max.attempts</name><value>4</value></property>
<property><name>fs.s3.maxRetries</name><value>4</value></property>
<property><name>fs.trash.interval</name><value>0</value></property>
<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>
<property><name>dfs.datanode.du.pct</name><value>0.98f</value></property>
<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>
<property><name>io.sort.mb</name><value>100</value></property>
<property><name>mapred.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value></property>
<property><name>io.sort.factor</name><value>10</value></property>
<property><name>mapred.task.profile</name><value>false</value></property>
<property><name>job.end.retry.interval</name><value>30000</value></property>
<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>
<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>
<property><name>webinterface.private.actions</name><value>false</value></property>
<property><name>hadoop.tmp.dir</name><value>/tmp/hadoop-${user.name}</value></property>
<property><name>mapred.map.output.compression.type</name><value>RECORD</value></property>
<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>
<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>
<property><name>mapred.compress.map.output</name><value>false</value></property>
<property><name>mapred.userlog.retain.hours</name><value>24</value></property>
<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>
<property><name>dfs.safemode.threshold.pct</name><value>0.999f</value></property>
<property><name>job.end.retry.attempts</name><value>0</value></property>
<property><name>ipc.client.idlethreshold</name><value>4000</value></property>
<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>
<property><name>dfs.permissions</name><value>true</value></property>
<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>
<property><name>dfs.namenode.handler.count</name><value>10</value></property>
<property><name>hadoop.logfile.size</name><value>10000000</value></property>
<property><name>dfs.namenode.logging.level</name><value>info</value></property>
<property><name>dfs.secondary.http.address</name><value>0.0.0.0:50090</value></property>
<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>
<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>
<property><name>dfs.name.dir</name><value>${hadoop.tmp.dir}/dfs/name</value></property>
<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
<property><name>mapred.min.split.size</name><value>0</value></property>
<property><name>mapred.map.max.attempts</name><value>4</value></property>
<property><name>ipc.client.tcpnodelay</name><value>false</value></property>
<property><name>jobclient.output.filter</name><value>FAILED</value></property>
<property><name>dfs.df.interval</name><value>60000</value></property>
</configuration>
 

从上面这个文件中,首先,可以了解到Hadoop的基本配种信息中的默认配置信息,从而进一步了解并学习如何配置一个Hadoop执行任务。

另外一个文件,1221994453046_job_200809211811_0001文件就是记录整个任务的详细信息的,如下所示:

Job JOBID="job_200809211811_0001" JOBNAME="wordcount" USER="SHIYANJUN" SUBMIT_TIME="1221994453031" JOBCONF="/tmp/hadoop-SHIYANJUN/mapred/system/job_200809211811_0001/job.xml"
Job JOBID="job_200809211811_0001" LAUNCH_TIME="1221994453375" TOTAL_MAPS="7" TOTAL_REDUCES="1"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000000" TASK_ATTEMPT_ID="task_200809211811_0001_m_000000_0" START_TIME="1221994457390" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000000" TASK_ATTEMPT_ID="task_200809211811_0001_m_000000_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994473281" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000000" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994473281" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:1684,Map-Reduce Framework.Map input bytes:10109,Map-Reduce Framework.Map output bytes:16845,Map-Reduce Framework.Combine input records:1684,Map-Reduce Framework.Combine output records:3"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000001" TASK_ATTEMPT_ID="task_200809211811_0001_m_000001_0" START_TIME="1221994457750" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000001" TASK_ATTEMPT_ID="task_200809211811_0001_m_000001_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994472015" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000001" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994472015" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:331,Map-Reduce Framework.Map input bytes:1987,Map-Reduce Framework.Map output bytes:3312,Map-Reduce Framework.Combine input records:331,Map-Reduce Framework.Combine output records:6"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000002" TASK_ATTEMPT_ID="task_200809211811_0001_m_000002_0" START_TIME="1221994473843" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000002" TASK_ATTEMPT_ID="task_200809211811_0001_m_000002_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994485937" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000002" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994485937" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:330,Map-Reduce Framework.Map input bytes:1985,Map-Reduce Framework.Map output bytes:3306,Map-Reduce Framework.Combine input records:330,Map-Reduce Framework.Combine output records:4"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000003" TASK_ATTEMPT_ID="task_200809211811_0001_m_000003_0" START_TIME="1221994474500" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000003" TASK_ATTEMPT_ID="task_200809211811_0001_m_000003_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994485343" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000003" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994485343" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:326,Map-Reduce Framework.Map input bytes:1957,Map-Reduce Framework.Map output bytes:3262,Map-Reduce Framework.Combine input records:326,Map-Reduce Framework.Combine output records:2"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000004" TASK_ATTEMPT_ID="task_200809211811_0001_m_000004_0" START_TIME="1221994487078" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000004" TASK_ATTEMPT_ID="task_200809211811_0001_m_000004_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994496453" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000004" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994496453" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:326,Map-Reduce Framework.Map input bytes:1957,Map-Reduce Framework.Map output bytes:3262,Map-Reduce Framework.Combine input records:326,Map-Reduce Framework.Combine output records:2"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000005" TASK_ATTEMPT_ID="task_200809211811_0001_m_000005_0" START_TIME="1221994487781" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000005" TASK_ATTEMPT_ID="task_200809211811_0001_m_000005_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994497078" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000005" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994497078" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:326,Map-Reduce Framework.Map input bytes:1957,Map-Reduce Framework.Map output bytes:3262,Map-Reduce Framework.Combine input records:326,Map-Reduce Framework.Combine output records:2"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000006" TASK_ATTEMPT_ID="task_200809211811_0001_m_000006_0" START_TIME="1221994497375" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
MapAttempt TASK_TYPE="MAP" TASKID="tip_200809211811_0001_m_000006" TASK_ATTEMPT_ID="task_200809211811_0001_m_000006_0" TASK_STATUS="SUCCESS" FINISH_TIME="1221994501765" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_m_000006" TASK_TYPE="MAP" TASK_STATUS="SUCCESS" FINISH_TIME="1221994501765" COUNTERS="Map-Reduce Framework.Map input records:1,Map-Reduce Framework.Map output records:326,Map-Reduce Framework.Map input bytes:1957,Map-Reduce Framework.Map output bytes:3262,Map-Reduce Framework.Combine input records:326,Map-Reduce Framework.Combine output records:2"
ReduceAttempt TASK_TYPE="REDUCE" TASKID="tip_200809211811_0001_r_000000" TASK_ATTEMPT_ID="task_200809211811_0001_r_000000_0" START_TIME="1221994464125" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
ReduceAttempt TASK_TYPE="REDUCE" TASKID="tip_200809211811_0001_r_000000" TASK_ATTEMPT_ID="task_200809211811_0001_r_000000_0" TASK_STATUS="SUCCESS" SHUFFLE_FINISHED="1221994509828" SORT_FINISHED="1221994509843" FINISH_TIME="1221994511421" HOSTNAME="tracker_cbbd2ce9428e48b:localhost/127.0.0.1:4881"
Task TASKID="tip_200809211811_0001_r_000000" TASK_TYPE="REDUCE" TASK_STATUS="SUCCESS" FINISH_TIME="1221994511421" COUNTERS="Map-Reduce Framework.Reduce input groups:7,Map-Reduce Framework.Reduce input records:21,Map-Reduce Framework.Reduce output records:7"
Job JOBID="job_200809211811_0001" FINISH_TIME="1221994511484" JOB_STATUS="SUCCESS" FINISHED_MAPS="7" FINISHED_REDUCES="1" FAILED_MAPS="0" FAILED_REDUCES="0" COUNTERS="Job Counters .Launched map tasks:7,Job Counters .Launched reduce tasks:1,Job Counters .Data-local map tasks:7,Map-Reduce Framework.Map input records:7,Map-Reduce Framework.Map output records:3649,Map-Reduce Framework.Map input bytes:21909,Map-Reduce Framework.Map output bytes:36511,Map-Reduce Framework.Combine input records:3649,Map-Reduce Framework.Combine output records:21,Map-Reduce Framework.Reduce input groups:7,Map-Reduce Framework.Reduce input records:21,Map-Reduce Framework.Reduce output records:7"
 

上面主要描述了,一个Job是如何进行分割的;一个Job是有哪些Task完成的;这些Tasks执行任务的信息,比如执行时间、任务完成状况等等。

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:http://www.heiqu.com/pxpfd.html