MySQL主从同步不一致实现邮件报警
方法一:
环境:由于我们采用腾讯云数据库做的主从,不能登录到云数据库上面,只能从其他一台云服务器上面进行监控。
定期验证MYSQL主从数据一致性在某些时候是一项很值得做的事情,google还提供了MYSQL补丁来自动实现该功能。但某些时候我们能够定期检查到复制的状态也足够了,这里借鉴网上一位网友自动检查mysql主从复制状态的脚本,该脚本实现功能:
1.通过mysql命令判断mysql从服务器三个主要的复制状态值是否正常,检测到有问题后,根据设定时间间隔值,再重复检测两遍,其中Seconds_Behind_Master值因为不能准确地描述复制延迟,所以在3600秒以下都判断为正常。
2.检测到有问题后记录日志并发送邮件通知状态值内容,问题持续则根据设定时间间隔值再发邮件通知,恢复正常也邮件通知。另外,无论正常与否,每天都发送一次邮件通知,由计划任务和时间相关的变量值决定。
该脚本可以监控主从复制状态,如果要验证主从MYSQL数据一致性,可以研究下google提供的MYSQL补丁。
第一步:首先安装mailx组件并配置好能够通过三方邮箱发送邮件
yum -y install mailx
然后编辑mailx的配置文件
vi /etc/mail.rc
在最末尾添加如下信息
set from=xxxxxx@qq.com smtp=smtp.qq.com
set smtp-auth-user=xxxxxx@qq.com smtp-auth-password=xxxxxx
set smtp-auth=login
保存退出后测试邮件是否能够正常发送出去
echo"zabbix test mail" |mail -s "zabbix" xxxxxx@qq.com
[root@monitor scripts]# cat check_replication_status.sh
#/bin/bash
#############################################
# author zhaoyanan
# date 2013/01/25 create
# update 2013/01/28 Adding duplicate detection
#
# Execution:
# touch /root/sh/mysql_slave_status.sh; chmod 700 /root/sh/mysql_slave_status.sh
# vi /etc/crontab
# 5,15,25,35,45,55 * * * * root /root/sh/mysql_slave_status.sh >> /root/sh/mysql_slave_status.log 2>&1
#############################################
######### set variables ############
tmpdir=/tmp
mysqlhost="x.x.x.x"
mysqlport="3306"
mysqlsocket="/var/lib/mysql/mysql.sock"
mysqlbinpath="/usr/bin/"
mysqluser="test"
mysqlpw="xxxx"
servername="123"
normal_status="$servername slave status ok!"
problem_status="$servername slave status problems!"
problem_many_status="$servername slave status problems! (too many times)"
returm_to_nomal="$servername slave status return to normal from the question!"
repeat_alarm_time=12 # Repeat alarm time interval ( About *10 minute. value of 12, about two hours)
failure_interval=10 # Interval after a problem is detected, the unit: seconds ( < 25 seconds)
reporting_time=0855 # Must be notified of the time, even if normal.
current_time=$(date +%H%M) # Current time
export PATH="$mysqlbinpath":/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin
###### do date ####################################
echo ""
echo "`date` Start monitoring."
echo ""
#### function ####
function mailto1() {
echo "$servername slave status: $slaveiostatus $slavesqlstatus $slavebehind" | mail -s "$normal_status" linuxidc@linuxidc.com
#/bin/mail -s "$normal_status" xxx@qq.com < /tmp/normal_status.txt
}
function mailto2() {
echo "$servername slave status: $slaveiostatus $lastioerror $slavesqlstatus $lastsqlerror $slavebehind " | mail -s "$problem_status" linuxidc@linuxidc.com
#/bin/mail -s "$problem_status" xxx@qq.com < /tmp/problem_status.txt
}
function mailto3() {
echo "$servername slave status: $slaveiostatus $lastioerror $slavesqlstatus $lastsqlerror $slavebehind" | mail -s "$problem_many_status" linuxidc@linuxidc.com
#/bin/mail -s "$problem_many_status" xxxx@qq.com < /tmp/problem_status.txt
}
function mailto4() {
echo "$servername slave status: $slaveiostatus $slavesqlstatus $slavebehind" | mail -s "$returm_to_nomal" linuxidc@linuxidc.com
#/bin/mail -s "$returm_to_nomal" xxxx@qq.com < /tmp/return_to_nomail.txt
}
function showstatus() {
/usr/bin/mysql -S $mysqlsocket -u"$mysqluser" -h"$mysqlhost" -p"$mysqlpw" -e "show slave status\G" > "$tmpdir"/"$servername"_status.txt
slaveiostatus=`cat "$tmpdir"/"$servername"_status.txt | grep "Slave_IO_Running" | sed 's/^[ \t]*//g'`
lastioerror=`cat "$tmpdir"/"$servername"_status.txt | grep "Last_IO_Error" | sed 's/^[ \t]*//g'`
slavesqlstatus=`cat "$tmpdir"/"$servername"_status.txt | grep "Slave_SQL_Running" | sed 's/^[ \t]*//g'`
lastsqlerror=`cat "$tmpdir"/"$servername"_status.txt | grep "Last_SQL_Error" | sed 's/^[ \t]*//g'`
slavebehind=`cat "$tmpdir"/"$servername"_status.txt | grep "Seconds_Behind_Master" | sed 's/^[ \t]*//g'`
slaveiovalue=`cat "$tmpdir"/"$servername"_status.txt | grep "Slave_IO_Running" | sed 's/^[ \t]*//g' | awk -F ': ' '{print $2}'`
slavesqlvalue=`cat "$tmpdir"/"$servername"_status.txt | grep "Slave_SQL_Running" | sed 's/^[ \t]*//g' | awk -F ': ' '{print $2}'`
slavebehindvalue=`echo "$slavebehind" | awk -F ': ' '{print $2}' | grep '[0-9]'`
}
### check ####
test -f "$tmpdir"/"$servername"_m_value || echo "0" > "$tmpdir"/"$servername"_m_value
test -f "$tmpdir"/"$servername"_n_value || echo "0" > "$tmpdir"/"$servername"_n_value
m=`cat "$tmpdir"/"$servername"_m_value`
n=`cat "$tmpdir"/"$servername"_n_value`
if showstatus;sync;sleep 1
[ "$slaveiovalue" == "Yes" ] && [ "$slavesqlvalue" == "Yes" ] && [ "$slavebehindvalue" -le 3600 ];then
echo "`date` First detected, $servername slave status ok!"
m=0; echo $m > "$tmpdir"/"$servername"_m_value
elif sleep $failure_interval
showstatus;sync;sleep 1
[ "$slaveiovalue" == "Yes" ] && [ "$slavesqlvalue" == "Yes" ] && [ "$slavebehindvalue" -le 3600 ];then
echo "`date` Second detection, $servername slave status ok!"
m=0; echo $m > "$tmpdir"/"$servername"_m_value
elif sleep $failure_interval
showstatus;sync;sleep 1
[ "$slaveiovalue" == "Yes" ] && [ "$slavesqlvalue" == "Yes" ] && [ "$slavebehindvalue" -le 3600 ];then
echo "`date` Third detection, $servername slave status ok!"
m=0; echo $m > "$tmpdir"/"$servername"_m_value
else
echo "`date` After three detection, $servername slave problems!"
m=$(($m+1)); echo $m > "$tmpdir"/"$servername"_m_value
fi
#### log and mail ####
if [ "$reporting_time" -eq "$current_time" ] && [ "$m" -eq 0 ] && [ "$n" -eq 0 ]; then
echo "mailto."
mailto1
elif [ "$m" -eq 1 ] && [ "$n" -eq 0 ]; then
echo "`date` $servername slave status problems! mailto2."
echo "$servername slave status: "
echo "$slaveiostatus"
echo "$lastioerror"
echo "$slavesqlstatus"
echo "$lastsqlerror"
echo "$slavebehind"
mailto2
n=1; echo $n > "$tmpdir"/"$servername"_n_value
elif [ "$m" -eq "$repeat_alarm_time" ] && [ "$n" -eq 1 ]; then
mailto3
echo "`date` $servername slave status problems! too many times, mailto3."
m=0; echo $m > "$tmpdir"/"$servername"_m_value
elif [ "$m" -eq 0 ] && [ "$n" -eq 1 ]; then
mailto4
echo "$servername slave status return to normal from the question. mailto4."
n=0; echo $n > "$tmpdir"/"$servername"_n_value
elif [ "$m" -ne 0 ] && [ "$n" -eq 1 ]; then
echo "`date` $servername slave status problems! too many times, nomailto"
fi
[root@monitor scripts]#
收到报警邮件
方法二:
下载并解压sendEmail
tar -zxvf sendEmail-v1.56.tar.gz
cp sendEmail-v1.56/sendEmail /usr/local/bin/
拷贝之后就能用了
发邮件命令:
/usr/local/bin/sendEmail -f zhang_peicheng@163.com -t 756475064@qq.com -s smtp.163.com -u "test" -xu zhang_peicheng -xp xxxxxxxxxxxxx -m "this is a test email" -a /tmp/123.txt
简要说明 :
–f 发送方地址
–t 发给谁
–s 发送方smtp服务器
–u 主题
-xu 发送邮件账号
–xp 发送方邮箱的密码
-m 内容
–a 邮件附件
2.编写mysql主从监控脚本
# cat mysql_check_replication_status.sh
#!/bin/bash
SLAVE_IO_S=`/usr/bin/mysql -uroot -h 10.10.10.10 -p123456 -e "show slave status\G" | grep "Slave_IO_Running" | awk -F ':' '{print $2}'`
SLAVE_SQL_S=`/usr/bin/mysql -uroot -h 10.10.10.10 -p123456 -e "show slave status\G" | grep "Slave_SQL_Running" | awk -F ':' '{print $2}'`
date1=`date +%Y%m%d`
if [ $SLAVE_IO_S == "Yes" ] && [ $SLAVE_SQL_S == "Yes" ];then
echo "the mysql-master-slave-status is OK"
else
echo "the mysql-master-slave-status is falied"
if [ ! -d /tmp/$data1 ];then
mkdir -p /tmp/$data1
fi
/usr/bin/mysql -uroot -h 10.10.10.10 -p123456 -e "show slave status\G" > /tmp/$data1/mysql-master-slave-status.txt
/usr/local/bin/sendEmail -f linuxidc@linuxidc.com -t linuxidc@linuxidc.com -s smtp.exmail.qq.com -u "mysql-master-slave-status" -xu linuxidc@linuxidc.com -xp 123456 -m "the mysql-master-slave status is failed" -a /tmp/$data1/mysql-master-slave-status.txt
fi
#!/bin/bash
mysql_user="root"
mysql_pass="123456"
email_addr="slave@linuxidc.com"
mysql_status=`netstat -nl | awk 'NR>2{if ($4 ~ /.*:3306/) {print "Yes";exit 0}}'`
if [ "$mysql_status" == "Yes" ];then
slave_status=`mysql -u${mysql_user} -p${mysql_pass} -e"show slave status\G" | grep "Running" | awk '{if ($2 != "Yes") {print "No";exit 1}}'`
if [ "$slave_status" == "No" ];then
echo "slave is not working!"
[ ! -f "/tmp/slave" ] && echo "Slave is not working!" | mail -s "Warn!MySQL Slave is not working" ${email_addr}
touch /tmp/slave
else
echo "slave is working."
[ -f "/tmp/slave" ] && rm -f /tmp/slave
fi
[ -f "/tmp/mysql_down" ] && rm -f /tmp/mysql_down
else
[ ! -f "/tmp/mysql_down" ] && echo "Mysql Server is down!" | mail -s "Warn!MySQL server is down!" ${email_addr}
touch /tmp/mysql_down
fi
监控检测redis主从状态的脚本
# cat check_replication_status_redis.sh
#!/bin/bash
STATUS=`/usr/local/bin/redis-cli -h 127.0.0.1 -p 16379 -a 123456 info | grep -v "^#" | grep 'master_link_status' | awk -F: '{print $2}'`
DATE=`date +%Y%m%d`
if [ $STATUS != 'down' ];then
echo "The redis-master-slave-satus is ok"
else
echo "The redis-master-slave-status is down, please to check redis-slave service status !!!"
if [ ! -d /tmp/$DATE ];then
mkdir -p /tmp/$DATE
fi
/usr/local/bin/redis-cli -h 127.0.0.1 -p 16379 -a 123456 info > /tmp/$DATE/redis-master-slave-status.txt
/usr/local/bin/sendEmail -f 123456@qq.com.com -t 123456@qq.com -s smtp.exmail.qq.com -u "redis-master-slave-status" -xu 123456@qq.com -xp 123456 -m "The redis-master-slave status is down" -a /tmp/$DATE/redis-master-slave-status.txt
fi