304 lines
8.6 KiB
Plaintext
304 lines
8.6 KiB
Plaintext
#!/bin/bash
|
|
|
|
#It is a changed version of the original https://github.com/fridim/nagios-plugin-check_galera_cluster/blob/master/check_galera_cluster
|
|
|
|
|
|
PROGNAME=`basename $0`
|
|
VERSION="Version 1.1.5"
|
|
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>, Claudio Kuenzler <claudiokuenzler.com>"
|
|
|
|
ST_OK=0
|
|
ST_WR=1
|
|
ST_CR=2
|
|
ST_UK=3
|
|
|
|
warnAlerts=0
|
|
critAlerts=0
|
|
unknAlerts=0
|
|
|
|
warnText=""
|
|
critText=""
|
|
|
|
print_version() {
|
|
echo "$VERSION $AUTHOR"
|
|
}
|
|
|
|
print_help() {
|
|
print_version $PROGNAME $VERSION
|
|
echo ""
|
|
echo "$PROGNAME is a monitoring plugin to monitor Galera cluster status."
|
|
echo ""
|
|
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-0]"
|
|
echo ""
|
|
echo "Options:"
|
|
echo " u)"
|
|
echo " MySQL user."
|
|
echo " p)"
|
|
echo " MySQL password."
|
|
echo " H)"
|
|
echo " MySQL host."
|
|
echo " P)"
|
|
echo " MySQL port."
|
|
echo " m)"
|
|
echo " MySQL extra my.cnf configuration file."
|
|
echo " w)"
|
|
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
|
|
echo " c)"
|
|
echo " Sets minimum number of nodes in the cluster when CRITICAL is raised. (default is 2)."
|
|
#echo " f)"
|
|
#echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
|
|
echo " 0)"
|
|
echo " Rise CRITICAL if the node is not primary"
|
|
echo " s)"
|
|
echo " Create state file, detect disconnected nodes"
|
|
exit $ST_UK
|
|
}
|
|
|
|
# default values
|
|
crit=2
|
|
#fcp=0.1
|
|
|
|
check_executable() {
|
|
if [ -z "$1" ]; then
|
|
echo "check_executable: no parameter given!"
|
|
exit $ST_UK
|
|
fi
|
|
|
|
if ! command -v "$1" &>/dev/null; then
|
|
echo "UNKNOWN: Cannot find $1"
|
|
exit $ST_UK
|
|
fi
|
|
}
|
|
|
|
check_executable docker
|
|
check_executable bc
|
|
|
|
while getopts “hvu:p:H:P:w:c:m:s:0” OPTION; do
|
|
case $OPTION in
|
|
h)
|
|
print_help
|
|
exit $ST_UK
|
|
;;
|
|
v)
|
|
print_version $PROGNAME $VERSION
|
|
exit $ST_UK
|
|
;;
|
|
u)
|
|
mysqluser=$OPTARG
|
|
;;
|
|
p)
|
|
password=$OPTARG
|
|
;;
|
|
H)
|
|
mysqlhost=$OPTARG
|
|
;;
|
|
P)
|
|
port=$OPTARG
|
|
;;
|
|
m)
|
|
myconfig=$OPTARG
|
|
;;
|
|
w)
|
|
warn=$OPTARG
|
|
;;
|
|
c)
|
|
crit=$OPTARG
|
|
;;
|
|
0)
|
|
primary='TRUE'
|
|
;;
|
|
s)
|
|
stateFile=$OPTARG
|
|
;;
|
|
?)
|
|
echo "Unknown argument: $1"
|
|
print_help
|
|
exit $ST_UK
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [ -z "$warn" ]; then
|
|
warn=$crit
|
|
fi
|
|
|
|
create_param() {
|
|
if [ -n "$2" ]; then
|
|
echo $1$2
|
|
fi
|
|
}
|
|
|
|
param_mysqlhost=$(create_param -h "$mysqlhost")
|
|
param_port=$(create_param -P "$port")
|
|
param_mysqluser=$(create_param -u "$mysqluser")
|
|
param_password=$(create_param -p "$password")
|
|
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
|
|
export MYSQL_PWD=$password
|
|
|
|
param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile"
|
|
|
|
#
|
|
# verify the database connection
|
|
#
|
|
|
|
/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e '\s;' >/dev/null 2>&1 || {
|
|
echo "CRITICAL: mysql connection check failed"
|
|
exit $ST_CR
|
|
}
|
|
|
|
#
|
|
# retrieve the mysql status
|
|
#
|
|
|
|
#rMysqlStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "show status like 'wsrep_%';")
|
|
|
|
#
|
|
# verify that the node is part of a cluster
|
|
#
|
|
|
|
rClusterStateUuid=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_state_uuid'")
|
|
|
|
if [ -z $rClusterStateUuid ]; then
|
|
echo "CRITICAL: node is not part of a cluster."
|
|
exit $ST_CR
|
|
fi
|
|
|
|
#rFlowControl=$(echo "$rMysqlStatus" | awk '/wsrep_flow_control_paused\t/ {print $2}') # < 0.1
|
|
#rFlowControl=$(printf "%.14f" $rFlowControl) # issue #4
|
|
|
|
|
|
rClusterSize=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_size'")
|
|
|
|
|
|
rClusterStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_status'") # Primary
|
|
|
|
|
|
rReady=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_ready'") # ON
|
|
|
|
|
|
rConnected=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_connected'") # ON
|
|
|
|
|
|
rLocalStateComment=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state_comment'") # Synced
|
|
|
|
#rIncommingAddresses=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_incoming_addresses'")
|
|
|
|
rSSTMethod=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_VARIABLES where VARIABLE_NAME = 'wsrep_sst_method'") # mariabackup
|
|
|
|
|
|
#if [ -z "$rFlowControl" ]; then
|
|
# echo "UNKNOWN: wsrep_flow_control_paused is empty"
|
|
# unknAlerts=$(($unknAlerts+1))
|
|
#fi
|
|
|
|
#if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
|
|
# echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
|
|
# critAlerts=$(($criticalAlerts+1))
|
|
#fi
|
|
|
|
if [ "$primary" = 'TRUE' ]; then
|
|
if [ $rClusterStatus != 'Primary' ]; then
|
|
critText+="CRITICAL: node is not primary (wsrep_cluster_status). "
|
|
critAlerts=$(($criticalAlerts+1))
|
|
fi
|
|
fi
|
|
|
|
if [ $rReady != 'ON' ]; then
|
|
critText+="CRITICAL: node is not ready (wsrep_ready). "
|
|
critAlerts=$(($criticalAlerts+1))
|
|
fi
|
|
|
|
if [ $rConnected != 'ON' ]; then
|
|
critText+="CRITICAL: node is not connected (wsrep_connected). "
|
|
critAlerts=$(($criticalAlerts+1))
|
|
fi
|
|
|
|
if [ $rLocalStateComment != 'Synced' ]; then
|
|
critText+="CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment). "
|
|
critAlerts=$(($criticalAlerts+1))
|
|
fi
|
|
|
|
if [ $rSSTMethod != 'mariabackup' ]; then
|
|
critText+="CRITICAL: node is not backed up - actual state is: $rSSTMethod (wsrep_sst_method). "
|
|
critAlerts=$(($criticalAlerts+1))
|
|
fi
|
|
|
|
if [ $rClusterSize -gt $warn ]; then
|
|
# only display the ok message if the state check not enabled
|
|
if [ -z "$stateFile" ]; then
|
|
echo "OK: number of NODES = $rClusterSize"
|
|
fi
|
|
elif [ $rClusterSize -le $crit ]; then
|
|
critText+="CRITICAL: number of NODES = $rClusterSize. "
|
|
critAlerts=$(($criticalAlerts+1))
|
|
elif [ $rClusterSize -le $warn ]; then
|
|
warnText+="WARNING: number of NODES = $rClusterSize."
|
|
warnAlerts=$(($warnAlerts+1))
|
|
else
|
|
exit $ST_UK
|
|
fi
|
|
|
|
#
|
|
# detect is the connection is lost automatically
|
|
#
|
|
|
|
if [ ! -z "$stateFile" ]; then
|
|
|
|
touch $stateFile
|
|
|
|
if [ $? != "0" ]; then
|
|
|
|
echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
|
|
unknAlerts=$(($unknAlerts+1))
|
|
|
|
else
|
|
|
|
if [ $rConnected = "ON" ]; then
|
|
# get the current connected Nodes
|
|
currentNodes=$(echo $rIncommingAddresses} | tr "," "\n" | sort -u)
|
|
if [ -f "$stateFile" ]; then
|
|
# get the nodes added to the cluster
|
|
newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
|
|
# get the nodes that were removed from the cluster
|
|
missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
|
|
if [ ! -z "$newNodes" ]; then
|
|
# add the new nodes to the cluster to the state file
|
|
echo $newNodes | tr " " "\n" >> $stateFile
|
|
fi
|
|
else
|
|
# there is no state file yet, creating new one.
|
|
echo $currentNodes | tr " " "\n" > $stateFile
|
|
fi # -f stateFile
|
|
# get the numeber of nodes that were part of the cluster before
|
|
maxClusterSize=$(cat $stateFile | wc -l)
|
|
|
|
if [ $maxClusterSize -eq $rClusterSize ]; then
|
|
if [ $maxClusterSize -eq 1 ]; then
|
|
if [ $crit -eq 0 -a $warn -eq 0 ]; then
|
|
echo "OK: running single-node database cluster"
|
|
fi
|
|
else
|
|
echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
|
|
fi
|
|
else
|
|
echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes"
|
|
warnAlerts=$(($warnAlerts+1))
|
|
fi
|
|
|
|
fi # rConnected
|
|
|
|
fi # -w stateFile
|
|
|
|
fi # -z stateFile
|
|
|
|
|
|
#
|
|
# exit
|
|
#
|
|
|
|
[ "$critAlerts" -gt "0" ] && echo $critText && exit $ST_CR
|
|
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
|
|
[ "$warnAlerts" -gt "0" ] && echo $warnText && exit $ST_WR
|
|
|
|
exit 0
|