对于一些线上任务,如果在一定时间没有结束,下一时刻任务会启动,那么上一时刻的任务将变得没有意义,但是仍然会占用Hadoop资源,所以需要程序检测并自动结束。
示例:
hadoop jar /opt/hadoop/mapred/contrib/streaming/hadoop-0.21.0-streaming.jar \
-D mapreduce.job.name="jobname" \
-D mapreduce.job.reduces="100" \
-D mapreduce.job.map.capacity="500" \
-D mapreduce.job.reduce.capacity="100" \
-mapper "$mapper" \
-reducer "$reducer" \
-input "${INPUT}" \
-output "${OUTPUT}" \
-file "${mapper_file}" \
-file ${reducer_file} \
-jobconf mapred.min.split.size=536870912 \
-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 1>log 2>&1 &
PID=$!
ps_status=`ps -ef |grep $PID|grep -v grep|wc -l`
echo "$ps_status"
KILL_CMD=`sed /-Dmapreduce.jobtracker.address=${tracker_name}:${port}/p '-n' log | awk '{print $5" "$6" "$7" "$8" "$9}'`
while [ "X$KILL_CMD" = "X" ]
do
echo "wait for kill command for a while."
sleep 1s
KILL_CMD=`sed /-Dmapreduce.jobtracker.address=${tracker_name}:${port}/p '-n' log | awk '{print $5" "$6" "$7" "$8" "$9}'`
done
echo $KILL_CMD
wait_time=1
while [ 1 ]
do
ps_status=`ps -ef |grep $PID|grep -v grep|wc -l`
if [ $ps_status -ge 1 ]; then
echo "hadoop is running"
wait_time=`expr $wait_time + 1`
else
echo "hadoop job finished."
break
fi
if [ $wait_time -ge 60 ]; then #等待5分钟,如果5分钟内任务没有结束,就结束该任务
echo "timeout..."
echo "[exec]$KILL_COMMAND"
$KILL_COMMAND
break
fi
sleep 5s
done
exit 0