#!/bin/bash #It is a changed version of the original https://github.com/fridim/nagios-plugin-check_galera_cluster/blob/master/check_galera_cluster PROGNAME=`basename $0` VERSION="Version 1.1.5" AUTHOR="Guillaume Coré , Ales Nosek , Staf Wagemakers , Claudio Kuenzler " ST_OK=0 ST_WR=1 ST_CR=2 ST_UK=3 warnAlerts=0 critAlerts=0 unknAlerts=0 warnText="" critText="" print_version() { echo "$VERSION $AUTHOR" } print_help() { print_version $PROGNAME $VERSION echo "" echo "$PROGNAME is a monitoring plugin to monitor Galera cluster status." echo "" echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-0]" echo "" echo "Options:" echo " u)" echo " MySQL user." echo " p)" echo " MySQL password." echo " H)" echo " MySQL host." echo " P)" echo " MySQL port." echo " m)" echo " MySQL extra my.cnf configuration file." echo " w)" echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)." echo " c)" echo " Sets minimum number of nodes in the cluster when CRITICAL is raised. (default is 2)." #echo " f)" #echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)." echo " 0)" echo " Rise CRITICAL if the node is not primary" echo " s)" echo " Create state file, detect disconnected nodes" exit $ST_UK } # default values crit=2 #fcp=0.1 check_executable() { if [ -z "$1" ]; then echo "check_executable: no parameter given!" exit $ST_UK fi if ! command -v "$1" &>/dev/null; then echo "UNKNOWN: Cannot find $1" exit $ST_UK fi } check_executable docker check_executable bc while getopts “hvu:p:H:P:w:c:m:s:0” OPTION; do case $OPTION in h) print_help exit $ST_UK ;; v) print_version $PROGNAME $VERSION exit $ST_UK ;; u) mysqluser=$OPTARG ;; p) password=$OPTARG ;; H) mysqlhost=$OPTARG ;; P) port=$OPTARG ;; m) myconfig=$OPTARG ;; w) warn=$OPTARG ;; c) crit=$OPTARG ;; 0) primary='TRUE' ;; s) stateFile=$OPTARG ;; ?) echo "Unknown argument: $1" print_help exit $ST_UK ;; esac done if [ -z "$warn" ]; then warn=$crit fi create_param() { if [ -n "$2" ]; then echo $1$2 fi } param_mysqlhost=$(create_param -h "$mysqlhost") param_port=$(create_param -P "$port") param_mysqluser=$(create_param -u "$mysqluser") param_password=$(create_param -p "$password") param_configfile=$(create_param --defaults-extra-file= "$myconfig") export MYSQL_PWD=$password param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile" # # verify the database connection # /usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e '\s;' >/dev/null 2>&1 || { echo "CRITICAL: mysql connection check failed" exit $ST_CR } # # retrieve the mysql status # #rMysqlStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "show status like 'wsrep_%';") # # verify that the node is part of a cluster # rClusterStateUuid=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_state_uuid'") if [ -z $rClusterStateUuid ]; then echo "CRITICAL: node is not part of a cluster." exit $ST_CR fi #rFlowControl=$(echo "$rMysqlStatus" | awk '/wsrep_flow_control_paused\t/ {print $2}') # < 0.1 #rFlowControl=$(printf "%.14f" $rFlowControl) # issue #4 rClusterSize=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_size'") rClusterStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_status'") # Primary rReady=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_ready'") # ON rConnected=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_connected'") # ON rLocalStateComment=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state_comment'") # Synced #rIncommingAddresses=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_incoming_addresses'") rSSTMethod=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup -B -N -e "select variable_value from information_schema.GLOBAL_VARIABLES where VARIABLE_NAME = 'wsrep_sst_method'") # mariabackup #if [ -z "$rFlowControl" ]; then # echo "UNKNOWN: wsrep_flow_control_paused is empty" # unknAlerts=$(($unknAlerts+1)) #fi #if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then # echo "CRITICAL: wsrep_flow_control_paused is > $fcp" # critAlerts=$(($criticalAlerts+1)) #fi if [ "$primary" = 'TRUE' ]; then if [ $rClusterStatus != 'Primary' ]; then critText+="CRITICAL: node is not primary (wsrep_cluster_status). " critAlerts=$(($criticalAlerts+1)) fi fi if [ $rReady != 'ON' ]; then critText+="CRITICAL: node is not ready (wsrep_ready). " critAlerts=$(($criticalAlerts+1)) fi if [ $rConnected != 'ON' ]; then critText+="CRITICAL: node is not connected (wsrep_connected). " critAlerts=$(($criticalAlerts+1)) fi if [ $rLocalStateComment != 'Synced' ]; then critText+="CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment). " critAlerts=$(($criticalAlerts+1)) fi if [ $rSSTMethod != 'mariabackup' ]; then critText+="CRITICAL: node is not backed up - actual state is: $rSSTMethod (wsrep_sst_method). " critAlerts=$(($criticalAlerts+1)) fi if [ $rClusterSize -gt $warn ]; then # only display the ok message if the state check not enabled if [ -z "$stateFile" ]; then echo "OK: number of NODES = $rClusterSize" fi elif [ $rClusterSize -le $crit ]; then critText+="CRITICAL: number of NODES = $rClusterSize. " critAlerts=$(($criticalAlerts+1)) elif [ $rClusterSize -le $warn ]; then warnText+="WARNING: number of NODES = $rClusterSize." warnAlerts=$(($warnAlerts+1)) else exit $ST_UK fi # # detect is the connection is lost automatically # if [ ! -z "$stateFile" ]; then touch $stateFile if [ $? != "0" ]; then echo "UNKNOWN: stateFile \"$stateFile\" is not writeable" unknAlerts=$(($unknAlerts+1)) else if [ $rConnected = "ON" ]; then # get the current connected Nodes currentNodes=$(echo $rIncommingAddresses} | tr "," "\n" | sort -u) if [ -f "$stateFile" ]; then # get the nodes added to the cluster newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile) # get the nodes that were removed from the cluster missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile) if [ ! -z "$newNodes" ]; then # add the new nodes to the cluster to the state file echo $newNodes | tr " " "\n" >> $stateFile fi else # there is no state file yet, creating new one. echo $currentNodes | tr " " "\n" > $stateFile fi # -f stateFile # get the numeber of nodes that were part of the cluster before maxClusterSize=$(cat $stateFile | wc -l) if [ $maxClusterSize -eq $rClusterSize ]; then if [ $maxClusterSize -eq 1 ]; then if [ $crit -eq 0 -a $warn -eq 0 ]; then echo "OK: running single-node database cluster" fi else echo "OK: running redundant $rClusterSize online / $maxClusterSize total" fi else echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes" warnAlerts=$(($warnAlerts+1)) fi fi # rConnected fi # -w stateFile fi # -z stateFile # # exit # [ "$critAlerts" -gt "0" ] && echo $critText && exit $ST_CR [ "$unknAlerts" -gt "0" ] && exit $ST_UK [ "$warnAlerts" -gt "0" ] && echo $warnText && exit $ST_WR exit 0