added postgres monitoring

This commit is contained in:
Rasmus Thorslund 2025-04-22 12:43:04 +02:00
parent 8406109c38
commit 184f680fd6
No known key found for this signature in database
GPG key ID: 502D33332E9E305D
4 changed files with 53 additions and 2 deletions

View file

@ -2,13 +2,19 @@ class rut::controller_nrpe {
sunet::nagios::nrpe_command {'check_rut_pods': sunet::nagios::nrpe_command {'check_rut_pods':
command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh' command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh'
} }
file { "/usr/lib/nagios/plugins/check_rut_pods.sh": file { "/usr/lib/nagios/plugins/check_rut_pods.sh":
ensure => "file", ensure => "file",
content => template("rut/check_rut_pods.sh.erb"), content => template("rut/check_rut_pods.sh.erb"),
mode => '0755', mode => '0755',
} }
sunet::nagios::nrpe_command {'check_rut_postgres':
command_line => '/usr/lib/nagios/plugins/check_rut_postgres.sh'
}
file { "/usr/lib/nagios/plugins/check_rut_postgres.sh":
ensure => "file",
content => template("rut/check_rut_postgres.sh.erb"),
mode => '0755',
}
user { 'nagios': user { 'nagios':
ensure => present, ensure => present,
groups => ['microk8s'], groups => ['microk8s'],

View file

@ -4,4 +4,9 @@ class rut::rut_mon {
check_command => 'check_nrpe!check_rut_pods', check_command => 'check_nrpe!check_rut_pods',
description => 'Microk8s cluster health', description => 'Microk8s cluster health',
} }
nagioscfg::service {'check_rut_postgres':
host_name => ['internal-sto4-prod-k8sc-0.rut.sunet.se', 'internal-sto4-prod-k8sc-1.rut.sunet.se', 'internal-sto4-prod-k8sc-2.rut.sunet.se'],
check_command => 'check_nrpe!check_rut_postgres',
description => 'Postgres cluster health',
}
} }

View file

@ -0,0 +1,29 @@
#!/bin/bash
# This file is managed by puppet.
num_replica=$(kubectl get pods -n sunet-cnpg -o json \
| jq -r '
.items[]
| select(.metadata.labels.role=="replica")
| "\(.metadata.name)\t\(.metadata.labels.role)\t\(.status.phase)"
' | wc -l)
num_primary=$(kubectl get pods -n sunet-cnpg -o json \
| jq -r '
.items[]
| select(.metadata.labels.role=="primary")
| "\(.metadata.name)\t\(.metadata.labels.role)\t\(.status.phase)"
' | wc -l)
if [ $num_primary != 1 ]; then
echo "CRITICAL: No primary"
exit 2
fi
if [ $num_replica != 2 ]; then
echo "WARNING: Not enouch replicas"
exit 1
fi
echo "OK: $num_primary primary and $num_replica replicas found."

View file

@ -0,0 +1,11 @@
#!/bin/bash
SERVERNAME=$1
DOMAIN=rut.sunet.se
ipv4=$(dig -t a +short $SERVERNAME)
ipv6=$(dig -t aaaa +short $SERVERNAME)
knotctl update -z $DOMAIN -n ${SERVERNAME}. -a ttl=3600 -r AAAA -d $ipv6
knotctl update -z $DOMAIN -n ${SERVERNAME}. -a ttl=3600 -r A -d $ipv4