Nagios安装部署全攻略(3)

贴出一个新增加的配置:
define host{
        use            linux-server
        host_name      linhost
        alias          My Linux Host
        address        192.168.1.101
        }
define service{
        use                    generic-service
        host_name              linhost
        service_description    CHECK USERS
        check_command          check_nrpe!check_users
        }
define service{
        use                    generic-service
        host_name              linhost
        service_description    Load
        check_command          check_nrpe!check_load
        }
define service{
        use                    generic-service
        host_name              linhost
        service_description    SDA1
        check_command          check_nrpe!check_sd1
        }
define service{
        use                    generic-service
        host_name              linhost
        service_description    SDA2
        check_command          check_nrpe!check_sd2
        }
define service{
        use                    generic-service
        host_name              linhost
        service_description    Zombie
        check_command          check_nrpe!check_zombie_procs
        }
define service{
        use                    generic-service
        host_name              linhost
        service_description    total procs
        check_command          check_nrpe!check_total_procs
        }

五、增加监控脚本
比如CPU、内存、LVS等、需要自己写脚本来做、其实so easy,只要注意2个点就OK,控制输入(参数等)、格式化输出。只要输出格式符合Nagios的格式识别方式就行
1、CPU监控
vi check_cpu.sh

#!/bin/sh
# Filename: check_cpu.sh
procinfo=`which procinfo 2>/dev/null`
sar=`which sar 2>/dev/null`
function help {
 echo -e "\n\tThis plugin shows the % of used CPU, using either procinfo or sar (whichever is available)\n\n\t$0:\n\t\t-c <integer>\tIf the % of used CPU is above <integer>, returns CRITICAL state\n\t\t-w <integer>\tIf the % of used CPU is below CRITICAL and above <integer>, returns WARNING state\n"
 exit -1
}
# Getting parameters:
while getopts "w:c:h" OPT; do
 case $OPT in
  "w") warning=$OPTARG;;
  "c") critical=$OPTARG;;
  "h") help;;
 esac
done
# Checking parameters:
( [ "$warning" == "" ] || [ "$critical" == "" ] ) && echo "ERROR: You must specify warning and critical levels" && help
[[ "$warning" -ge  "$critical" ]] && echo "ERROR: critical level must be highter than warning level" && help
# Assuring that the needed tools exist:
( ( [ -f $procinfo ] && command="procinfo") ||  [ -f $sar ] ) || \
 ( echo "ERROR: You must have either procinfo or sar installer in order to run this plugin" && exit -1 )
# Doing the actual check:
( [ "$command" == "procinfo" ] && idle=`$procinfo | grep idle | cut -d% -f1 | awk '{print $NF}' | cut -d. -f1`) || \
 idle=`$sar | tail -1 | awk '{print $8}' | cut -d. -f1`
used=`expr 100 - $idle`
# Comparing the result and setting the correct level:
if [[ $used -ge $critical ]]; then
        msg="CRITICAL"
        status=2
else if [[ $used -ge $warning ]]; then
        msg="WARNING"
        status=1
    else
        msg="OK"
        status=0
    fi
fi
# Printing the results:
echo "$msg - CPU used=$used% idle=$idle% | 'CPU Usage'=$used%;$warning;$critical;"
# Bye!
exit $status


修改用户数组和加权限,以下操作都一样
#chown nagios.nagios check_cpu.sh
#chmod +x check_cpu.sh
#./check_cpu.sh -w 60 -c 80

内容版权声明:除非注明,否则皆为本站原创文章。

转载注明出处:https://www.heiqu.com/2182f8f0aa99ea86b2db6cbcc2b1c5f1.html