diff --git a/global/overlay/etc/puppet/modules/rut/manifests/controller_nrpe.pp b/global/overlay/etc/puppet/modules/rut/manifests/controller_nrpe.pp index 7fa2317..0da41ed 100644 --- a/global/overlay/etc/puppet/modules/rut/manifests/controller_nrpe.pp +++ b/global/overlay/etc/puppet/modules/rut/manifests/controller_nrpe.pp @@ -2,13 +2,19 @@ class rut::controller_nrpe { sunet::nagios::nrpe_command {'check_rut_pods': command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh' } - file { "/usr/lib/nagios/plugins/check_rut_pods.sh": ensure => "file", content => template("rut/check_rut_pods.sh.erb"), mode => '0755', } - + sunet::nagios::nrpe_command {'check_rut_postgres': + command_line => '/usr/lib/nagios/plugins/check_rut_postgres.sh' + } + file { "/usr/lib/nagios/plugins/check_rut_postgres.sh": + ensure => "file", + content => template("rut/check_rut_postgres.sh.erb"), + mode => '0755', + } user { 'nagios': ensure => present, groups => ['microk8s'], diff --git a/global/overlay/etc/puppet/modules/rut/manifests/rut_mon.pp b/global/overlay/etc/puppet/modules/rut/manifests/rut_mon.pp index 5d8fdaa..6dc136c 100644 --- a/global/overlay/etc/puppet/modules/rut/manifests/rut_mon.pp +++ b/global/overlay/etc/puppet/modules/rut/manifests/rut_mon.pp @@ -4,4 +4,9 @@ class rut::rut_mon { check_command => 'check_nrpe!check_rut_pods', description => 'Microk8s cluster health', } + nagioscfg::service {'check_rut_postgres': + host_name => ['internal-sto4-prod-k8sc-0.rut.sunet.se', 'internal-sto4-prod-k8sc-1.rut.sunet.se', 'internal-sto4-prod-k8sc-2.rut.sunet.se'], + check_command => 'check_nrpe!check_rut_postgres', + description => 'Postgres cluster health', + } } diff --git a/global/overlay/etc/puppet/modules/rut/templates/check_rut_postgres.sh.erb b/global/overlay/etc/puppet/modules/rut/templates/check_rut_postgres.sh.erb new file mode 100644 index 0000000..86264bd --- /dev/null +++ b/global/overlay/etc/puppet/modules/rut/templates/check_rut_postgres.sh.erb @@ -0,0 +1,29 @@ +#!/bin/bash +# This file is managed by puppet. + +num_replica=$(kubectl get pods -n sunet-cnpg -o json \ +| jq -r ' + .items[] + | select(.metadata.labels.role=="replica") + | "\(.metadata.name)\t\(.metadata.labels.role)\t\(.status.phase)" + ' | wc -l) + +num_primary=$(kubectl get pods -n sunet-cnpg -o json \ +| jq -r ' + .items[] + | select(.metadata.labels.role=="primary") + | "\(.metadata.name)\t\(.metadata.labels.role)\t\(.status.phase)" + ' | wc -l) + +if [ $num_primary != 1 ]; then + + echo "CRITICAL: No primary" + exit 2 +fi + +if [ $num_replica != 2 ]; then + echo "WARNING: Not enouch replicas" + exit 1 +fi + +echo "OK: $num_primary primary and $num_replica replicas found." diff --git a/scripts/change_back_ttl_on_host.sh b/scripts/change_back_ttl_on_host.sh new file mode 100755 index 0000000..710944f --- /dev/null +++ b/scripts/change_back_ttl_on_host.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +SERVERNAME=$1 +DOMAIN=rut.sunet.se +ipv4=$(dig -t a +short $SERVERNAME) +ipv6=$(dig -t aaaa +short $SERVERNAME) + +knotctl update -z $DOMAIN -n ${SERVERNAME}. -a ttl=3600 -r AAAA -d $ipv6 +knotctl update -z $DOMAIN -n ${SERVERNAME}. -a ttl=3600 -r A -d $ipv4 + +