Hadoop启动脚本全面详解(2)

/usr/lib/Hadoop/libexec/hadoop-config.sh

#1.加载/usr/lib/hadoop/libexec/hadoop-layout.sh
 hadoop-layout.sh主要描述了hadoop的lib的文件夹结构,主要内容如下

HADOOP_COMMON_DIR="./"
HADOOP_COMMON_LIB_JARS_DIR="lib"
HADOOP_COMMON_LIB_NATIVE_DIR="lib/native"
HDFS_DIR="./"
HDFS_LIB_JARS_DIR="lib"
YARN_DIR="./"
YARN_LIB_JARS_DIR="lib"
MAPRED_DIR="./"
MAPRED_LIB_JARS_DIR="lib"

HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-"/usr/lib/hadoop/libexec"}
HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop/conf"}
HADOOP_COMMON_HOME=${HADOOP_COMMON_HOME:-"/usr/lib/hadoop"}
HADOOP_HDFS_HOME=${HADOOP_HDFS_HOME:-"/usr/lib/hadoop-hdfs"}
HADOOP_MAPRED_HOME=${HADOOP_MAPRED_HOME:-"/usr/lib/hadoop-0.20-mapreduce"}
YARN_HOME=${YARN_HOME:-"/usr/lib/hadoop-yarn"}

#2.指定HDFS和YARN的lib

HADOOP_COMMON_DIR=${HADOOP_COMMON_DIR:-"share/hadoop/common"}
HADOOP_COMMON_LIB_JARS_DIR=${HADOOP_COMMON_LIB_JARS_DIR:-"share/hadoop/common/lib"}
HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_COMMON_LIB_NATIVE_DIR:-"lib/native"}
HDFS_DIR=${HDFS_DIR:-"share/hadoop/hdfs"}
HDFS_LIB_JARS_DIR=${HDFS_LIB_JARS_DIR:-"share/hadoop/hdfs/lib"}
YARN_DIR=${YARN_DIR:-"share/hadoop/yarn"}
YARN_LIB_JARS_DIR=${YARN_LIB_JARS_DIR:-"share/hadoop/yarn/lib"}
MAPRED_DIR=${MAPRED_DIR:-"share/hadoop/mapreduce"}
MAPRED_LIB_JARS_DIR=${MAPRED_LIB_JARS_DIR:-"share/hadoop/mapreduce/lib"}

# the root of the Hadoop installation
# See HADOOP-6255 for directory structure layout
HADOOP_DEFAULT_PREFIX=$(cd -P -- "$common_bin"/.. && pwd -P)
HADOOP_PREFIX=${HADOOP_PREFIX:-$HADOOP_DEFAULT_PREFIX}
export HADOOP_PREFIX

#3.对slave文件判断。但cdh的hadoop不是依靠slave来启动集群的,而是要用户自己写集群启动脚本(也许是为了逼用户用他的CloudManager。。。)
 
#4.再次指定env文件

if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
fi

#5.指定java home

# Attempt to set JAVA_HOME if it is not set
if [[ -z $JAVA_HOME ]]; then
  # On OSX use java_home (or /Library for older versions)
  if [ "Darwin" == "$(uname -s)" ]; then
    if [ -x /usr/libexec/java_home ]; then
      export JAVA_HOME=($(/usr/libexec/java_home))
    else
      export JAVA_HOME=(/Library/Java/Home)
    fi
  fi

# Bail if we did not detect it
  if [[ -z $JAVA_HOME ]]; then
    echo "Error: JAVA_HOME is not set and could not be found." 1>&2
    exit 1
  fi
fi

#6.指定Java程序启动的heapsize,如果用户在hadoop-env.sh中指定了HADOOP_HEAPSIZE字段则会覆盖默认值1000m

# some Java parameters
JAVA_HEAP_MAX=-Xmx1000m

# check envvars which might override default args
if [ "$HADOOP_HEAPSIZE" != "" ]; then
  #echo "run with heapsize $HADOOP_HEAPSIZE"
  JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
  #echo $JAVA_HEAP_MAX
fi

#7.指定程序的classpath,一大串代码,总结下就是
 HADOOP_CONF_DIR+HADOOP_CLASSPATH+HADOOP_COMMON_DIR+HADOOP_COMMON_LIB_JARS_DIR+
 HADOOP_COMMON_LIB_JARS_DIR+HADOOP_COMMON_LIB_NATIVE_DIR+HDFS_DIR+HDFS_LIB_JARS_DIR
 +YARN_DIR+YARN_LIB_JARS_DIR+MAPRED_DIR+MAPRED_LIB_JARS_DIR
 
有一个要注意的,hadoop比较贴心的提供了HADOOP_USER_CLASSPATH_FIRST属性,如何设置了,
 则HADOOP_CLASSPATH(用户自定义classpath)会在hadoop自身的jar包前加载,用来解决用户
 想最先加载自定义的jar包情况。
 
#8.指定HADOOP_OPTS,-Dhadoop.log.dir这些类似参数会在conf下的log4j配置中用到

HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
  HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
  export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$JAVA_LIBRARY_PATH
fi
HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"

# Disable ipv6 as it can cause issues
HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:http://www.heiqu.com/27dea5bfb551815ab20432c31aab93ba.html