sunetdrive/templates/mariadb/check_galera_cluster.erb

304 lines
8.6 KiB
Plaintext
Raw Permalink Normal View History

2023-02-13 09:44:56 +00:00
#!/bin/bash
#It is a changed version of the original https://github.com/fridim/nagios-plugin-check_galera_cluster/blob/master/check_galera_cluster
PROGNAME=`basename $0`
VERSION="Version 1.1.5"
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>, Claudio Kuenzler <claudiokuenzler.com>"
ST_OK=0
ST_WR=1
ST_CR=2
ST_UK=3
warnAlerts=0
critAlerts=0
unknAlerts=0
warnText=""
critText=""
print_version() {
echo "$VERSION $AUTHOR"
}
print_help() {
print_version $PROGNAME $VERSION
echo ""
echo "$PROGNAME is a monitoring plugin to monitor Galera cluster status."
echo ""
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-0]"
echo ""
echo "Options:"
echo " u)"
echo " MySQL user."
echo " p)"
echo " MySQL password."
echo " H)"
echo " MySQL host."
echo " P)"
echo " MySQL port."
echo " m)"
echo " MySQL extra my.cnf configuration file."
echo " w)"
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
echo " c)"
echo " Sets minimum number of nodes in the cluster when CRITICAL is raised. (default is 2)."
#echo " f)"
#echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
echo " 0)"
echo " Rise CRITICAL if the node is not primary"
echo " s)"
echo " Create state file, detect disconnected nodes"
exit $ST_UK
}
# default values
crit=2
#fcp=0.1
check_executable() {
if [ -z "$1" ]; then
echo "check_executable: no parameter given!"
exit $ST_UK
fi
if ! command -v "$1" &>/dev/null; then
echo "UNKNOWN: Cannot find $1"
exit $ST_UK
fi
}
check_executable docker
check_executable bc
while getopts “hvu:p:H:P:w:c:m:s:0” OPTION; do
case $OPTION in
h)
print_help
exit $ST_UK
;;
v)
print_version $PROGNAME $VERSION
exit $ST_UK
;;
u)
mysqluser=$OPTARG
;;
p)
password=$OPTARG
;;
H)
mysqlhost=$OPTARG
;;
P)
port=$OPTARG
;;
m)
myconfig=$OPTARG
;;
w)
warn=$OPTARG
;;
c)
crit=$OPTARG
;;
0)
primary='TRUE'
;;
s)
stateFile=$OPTARG
;;
?)
echo "Unknown argument: $1"
print_help
exit $ST_UK
;;
esac
done
if [ -z "$warn" ]; then
warn=$crit
fi
create_param() {
if [ -n "$2" ]; then
echo $1$2
fi
}
param_mysqlhost=$(create_param -h "$mysqlhost")
param_port=$(create_param -P "$port")
param_mysqluser=$(create_param -u "$mysqluser")
param_password=$(create_param -p "$password")
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
export MYSQL_PWD=$password
param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile"
#
# verify the database connection
#
/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e '\s;' >/dev/null 2>&1 || {
echo "CRITICAL: mysql connection check failed"
exit $ST_CR
}
#
# retrieve the mysql status
#
#rMysqlStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "show status like 'wsrep_%';")
#
# verify that the node is part of a cluster
#
rClusterStateUuid=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_state_uuid'")
if [ -z $rClusterStateUuid ]; then
echo "CRITICAL: node is not part of a cluster."
exit $ST_CR
fi
#rFlowControl=$(echo "$rMysqlStatus" | awk '/wsrep_flow_control_paused\t/ {print $2}') # < 0.1
#rFlowControl=$(printf "%.14f" $rFlowControl) # issue #4
rClusterSize=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_size'")
rClusterStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_status'") # Primary
rReady=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_ready'") # ON
rConnected=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_connected'") # ON
rLocalStateComment=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state_comment'") # Synced
#rIncommingAddresses=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_incoming_addresses'")
rSSTMethod=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_VARIABLES where VARIABLE_NAME = 'wsrep_sst_method'") # mariabackup
#if [ -z "$rFlowControl" ]; then
# echo "UNKNOWN: wsrep_flow_control_paused is empty"
# unknAlerts=$(($unknAlerts+1))
#fi
#if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
# echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
# critAlerts=$(($criticalAlerts+1))
#fi
if [ "$primary" = 'TRUE' ]; then
if [ $rClusterStatus != 'Primary' ]; then
critText+="CRITICAL: node is not primary (wsrep_cluster_status). "
critAlerts=$(($criticalAlerts+1))
fi
fi
if [ $rReady != 'ON' ]; then
critText+="CRITICAL: node is not ready (wsrep_ready). "
critAlerts=$(($criticalAlerts+1))
fi
if [ $rConnected != 'ON' ]; then
critText+="CRITICAL: node is not connected (wsrep_connected). "
critAlerts=$(($criticalAlerts+1))
fi
if [ $rLocalStateComment != 'Synced' ]; then
critText+="CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment). "
critAlerts=$(($criticalAlerts+1))
fi
if [ $rSSTMethod != 'mariabackup' ]; then
critText+="CRITICAL: node is not backed up - actual state is: $rSSTMethod (wsrep_sst_method). "
critAlerts=$(($criticalAlerts+1))
fi
if [ $rClusterSize -gt $warn ]; then
# only display the ok message if the state check not enabled
if [ -z "$stateFile" ]; then
echo "OK: number of NODES = $rClusterSize"
fi
elif [ $rClusterSize -le $crit ]; then
critText+="CRITICAL: number of NODES = $rClusterSize. "
critAlerts=$(($criticalAlerts+1))
elif [ $rClusterSize -le $warn ]; then
warnText+="WARNING: number of NODES = $rClusterSize."
warnAlerts=$(($warnAlerts+1))
else
exit $ST_UK
fi
#
# detect is the connection is lost automatically
#
if [ ! -z "$stateFile" ]; then
touch $stateFile
if [ $? != "0" ]; then
echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
unknAlerts=$(($unknAlerts+1))
else
if [ $rConnected = "ON" ]; then
# get the current connected Nodes
currentNodes=$(echo $rIncommingAddresses} | tr "," "\n" | sort -u)
if [ -f "$stateFile" ]; then
# get the nodes added to the cluster
newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
# get the nodes that were removed from the cluster
missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
if [ ! -z "$newNodes" ]; then
# add the new nodes to the cluster to the state file
echo $newNodes | tr " " "\n" >> $stateFile
fi
else
# there is no state file yet, creating new one.
echo $currentNodes | tr " " "\n" > $stateFile
fi # -f stateFile
# get the numeber of nodes that were part of the cluster before
maxClusterSize=$(cat $stateFile | wc -l)
if [ $maxClusterSize -eq $rClusterSize ]; then
if [ $maxClusterSize -eq 1 ]; then
if [ $crit -eq 0 -a $warn -eq 0 ]; then
echo "OK: running single-node database cluster"
fi
else
echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
fi
else
echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes"
warnAlerts=$(($warnAlerts+1))
fi
fi # rConnected
fi # -w stateFile
fi # -z stateFile
#
# exit
#
[ "$critAlerts" -gt "0" ] && echo $critText && exit $ST_CR
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
[ "$warnAlerts" -gt "0" ] && echo $warnText && exit $ST_WR
exit 0