#!/bin/bash

#It is a changed version of the original https://github.com/fridim/nagios-plugin-check_galera_cluster/blob/master/check_galera_cluster


PROGNAME=`basename $0`
VERSION="Version 1.1.5"
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>, Claudio Kuenzler <claudiokuenzler.com>"

ST_OK=0
ST_WR=1
ST_CR=2
ST_UK=3

warnAlerts=0
critAlerts=0
unknAlerts=0

warnText=""
critText=""

print_version() {
  echo "$VERSION $AUTHOR"
}

print_help() {
  print_version $PROGNAME $VERSION
  echo ""
  echo "$PROGNAME is a monitoring plugin to monitor Galera cluster status."
  echo ""
  echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-0]"
  echo ""
  echo "Options:"
  echo "  u)"
  echo "    MySQL user."
  echo "  p)"
  echo "    MySQL password."
  echo "  H)"
  echo "    MySQL host."
  echo "  P)"
  echo "    MySQL port."
  echo "  m)"
  echo "    MySQL extra my.cnf configuration file."
  echo "  w)"
  echo "    Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
  echo "  c)"
  echo "    Sets minimum number of nodes in the cluster when CRITICAL is raised. (default is 2)."
  #echo "  f)"
  #echo "    Sets critical value of wsrep_flow_control_paused (default is 0.1)."
  echo "  0)"
  echo "    Rise CRITICAL if the node is not primary"
  echo "  s)"
  echo "    Create state file, detect disconnected nodes"
  exit $ST_UK
}

# default values
crit=2
#fcp=0.1

check_executable() {
    if [ -z "$1" ]; then
        echo "check_executable: no parameter given!"
        exit $ST_UK
    fi

    if ! command -v "$1" &>/dev/null; then
        echo "UNKNOWN: Cannot find $1"
        exit $ST_UK
    fi
}

check_executable docker
check_executable bc

while getopts “hvu:p:H:P:w:c:m:s:0” OPTION; do
  case $OPTION in
    h)
      print_help
      exit $ST_UK
      ;;
    v)
      print_version $PROGNAME $VERSION
      exit $ST_UK
      ;;
    u)
      mysqluser=$OPTARG
      ;;
    p)
      password=$OPTARG
      ;;
    H)
      mysqlhost=$OPTARG
      ;;
    P)
      port=$OPTARG
      ;;
    m)
      myconfig=$OPTARG
      ;;
    w)
      warn=$OPTARG
      ;;
    c)
      crit=$OPTARG
      ;;
    0)
      primary='TRUE'
      ;;
    s)
      stateFile=$OPTARG
      ;;
    ?)
      echo "Unknown argument: $1"
      print_help
      exit $ST_UK
      ;;
  esac
done

if [ -z "$warn" ]; then
  warn=$crit
fi

create_param() {
  if [ -n "$2" ]; then
    echo $1$2
  fi
}

param_mysqlhost=$(create_param -h "$mysqlhost")
param_port=$(create_param -P "$port")
param_mysqluser=$(create_param -u "$mysqluser")
param_password=$(create_param -p "$password")
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
export MYSQL_PWD=$password

param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile"

#
# verify the database connection
#

/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N  -e '\s;' >/dev/null 2>&1 || {
  echo "CRITICAL: mysql connection check failed"
  exit $ST_CR
}

#
# retrieve the mysql status
#

#rMysqlStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "show status like 'wsrep_%';")

#
# verify that the node is part of a cluster
#

rClusterStateUuid=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_state_uuid'")

if [ -z $rClusterStateUuid ]; then
  echo "CRITICAL: node is not part of a cluster."
  exit $ST_CR
fi

#rFlowControl=$(echo "$rMysqlStatus" | awk '/wsrep_flow_control_paused\t/ {print $2}') # < 0.1
#rFlowControl=$(printf "%.14f" $rFlowControl) # issue #4


rClusterSize=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_size'")


rClusterStatus=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_cluster_status'") # Primary


rReady=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_ready'") # ON


rConnected=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_connected'") # ON


rLocalStateComment=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_local_state_comment'") # Synced

#rIncommingAddresses=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_STATUS where VARIABLE_NAME = 'wsrep_incoming_addresses'")

rSSTMethod=$(/usr/bin/docker exec mariadb_db_1 mysql --defaults-group-suffix=_backup  -B -N -e "select variable_value from information_schema.GLOBAL_VARIABLES where VARIABLE_NAME = 'wsrep_sst_method'") # mariabackup


#if [ -z "$rFlowControl" ]; then
#  echo "UNKNOWN: wsrep_flow_control_paused is empty"
#  unknAlerts=$(($unknAlerts+1))
#fi

#if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
#  echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
#  critAlerts=$(($criticalAlerts+1))
#fi

if [ "$primary" = 'TRUE' ]; then
  if [ $rClusterStatus != 'Primary' ]; then
    critText+="CRITICAL: node is not primary (wsrep_cluster_status). "
    critAlerts=$(($criticalAlerts+1))
  fi
fi

if [ $rReady != 'ON' ]; then
  critText+="CRITICAL: node is not ready (wsrep_ready). "
  critAlerts=$(($criticalAlerts+1))
fi

if [ $rConnected != 'ON' ]; then
  critText+="CRITICAL: node is not connected (wsrep_connected). "
  critAlerts=$(($criticalAlerts+1))
fi

if [ $rLocalStateComment != 'Synced' ]; then
   critText+="CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment). "
   critAlerts=$(($criticalAlerts+1))
fi

if [ $rSSTMethod != 'mariabackup' ]; then
   critText+="CRITICAL: node is not backed up - actual state is: $rSSTMethod (wsrep_sst_method). "
   critAlerts=$(($criticalAlerts+1))
fi

if [ $rClusterSize -gt $warn ]; then
  # only display the ok message if the state check not enabled
  if [ -z "$stateFile" ]; then
    echo "OK: number of NODES = $rClusterSize"
  fi
elif [ $rClusterSize  -le $crit ]; then
  critText+="CRITICAL: number of NODES = $rClusterSize. "
  critAlerts=$(($criticalAlerts+1))
elif [ $rClusterSize -le $warn ]; then
    warnText+="WARNING: number of NODES = $rClusterSize."
    warnAlerts=$(($warnAlerts+1))
  else
   exit $ST_UK
fi

#
# detect is the connection is lost automatically
#

if [ ! -z "$stateFile" ]; then

  touch $stateFile

  if [ $? != "0" ]; then

    echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
    unknAlerts=$(($unknAlerts+1))

  else

    if [ $rConnected = "ON" ]; then
      # get the current connected Nodes
      currentNodes=$(echo $rIncommingAddresses} | tr "," "\n" | sort -u)
      if [ -f "$stateFile" ]; then
        # get the nodes added to the cluster
        newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
        # get the nodes that were removed from the cluster
        missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
        if [ ! -z "$newNodes" ]; then
          # add the new nodes to the cluster to the state file
          echo $newNodes | tr " " "\n" >> $stateFile
        fi
      else
        # there is no state file yet, creating new one.
        echo $currentNodes | tr " " "\n" > $stateFile
      fi # -f stateFile
      # get the numeber of nodes that were part of the cluster before
      maxClusterSize=$(cat $stateFile | wc -l)

      if [ $maxClusterSize -eq $rClusterSize ]; then
        if [ $maxClusterSize -eq 1 ]; then
            if [ $crit -eq 0 -a  $warn -eq 0 ]; then
              echo "OK: running single-node database cluster"
            fi
        else
            echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
        fi
      else
            echo "WARNING: redundant  $rClusterSize online / $maxClusterSize  total, missing peers: $missingNodes"
            warnAlerts=$(($warnAlerts+1))
      fi

    fi # rConnected

  fi # -w stateFile

fi # -z stateFile


#
# exit
#

[ "$critAlerts" -gt "0" ] && echo $critText && exit $ST_CR
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
[ "$warnAlerts" -gt "0" ] && echo $warnText && exit $ST_WR 

exit 0