Compare commits
12 commits
cosmos-ops
...
main
Author | SHA1 | Date | |
---|---|---|---|
70acff31bd | |||
fd836c8480 | |||
a092cefca6 | |||
f5404ec114 | |||
8b88f929dc | |||
e96d41c899 | |||
681c004a8a | |||
c9709e1509 | |||
6ea773035e | |||
82a29b6abf | |||
4f7a796383 | |||
5eaa1f6189 |
7 changed files with 99 additions and 1 deletions
|
@ -21,6 +21,7 @@
|
||||||
|
|
||||||
'^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$':
|
'^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$':
|
||||||
rut::infra_ca_rp:
|
rut::infra_ca_rp:
|
||||||
|
rut::controller_nrpe:
|
||||||
sunet::microk8s::node:
|
sunet::microk8s::node:
|
||||||
channel: 1.31/stable
|
channel: 1.31/stable
|
||||||
drain_reboot_cron: true
|
drain_reboot_cron: true
|
||||||
|
@ -30,7 +31,7 @@
|
||||||
frontends:
|
frontends:
|
||||||
- sthb-lb-1.sunet.se
|
- sthb-lb-1.sunet.se
|
||||||
- tug-lb-1.sunet.se
|
- tug-lb-1.sunet.se
|
||||||
port: '30443'
|
port: '443'
|
||||||
sunet::otel::alloy:
|
sunet::otel::alloy:
|
||||||
otel_receiver: monitor-prod.rut.sunet.se
|
otel_receiver: monitor-prod.rut.sunet.se
|
||||||
sunet::fleetlock_client:
|
sunet::fleetlock_client:
|
||||||
|
@ -46,6 +47,7 @@
|
||||||
|
|
||||||
'^internal-sto4-prod-monitor-[0-9].rut.sunet.se$':
|
'^internal-sto4-prod-monitor-[0-9].rut.sunet.se$':
|
||||||
sunet::dockerhost2:
|
sunet::dockerhost2:
|
||||||
|
rut::rut_mon:
|
||||||
sunet::naemon_monitor:
|
sunet::naemon_monitor:
|
||||||
domain: monitor-prod.rut.sunet.se
|
domain: monitor-prod.rut.sunet.se
|
||||||
naemon_tag: latest
|
naemon_tag: latest
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
class rut::controller_nrpe {
|
||||||
|
sunet::nagios::nrpe_command {'check_rut_pods':
|
||||||
|
command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh'
|
||||||
|
}
|
||||||
|
|
||||||
|
file { "/usr/lib/nagios/plugins/check_rut_pods.sh":
|
||||||
|
ensure => "file",
|
||||||
|
content => template("rut/check_rut_pods.sh.erb"),
|
||||||
|
mode => '0755',
|
||||||
|
}
|
||||||
|
|
||||||
|
user { 'nagios':
|
||||||
|
ensure => present,
|
||||||
|
groups => ['microk8s'],
|
||||||
|
membership => minimum,
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
class rut::rut_mon {
|
||||||
|
nagioscfg::service {'check_rut_pods':
|
||||||
|
host_name => ['internal-sto4-prod-k8sc-0.rut.sunet.se', 'internal-sto4-prod-k8sc-1.rut.sunet.se', 'internal-sto4-prod-k8sc-2.rut.sunet.se'],
|
||||||
|
check_command => 'check_nrpe!check_rut_pods',
|
||||||
|
description => 'Microk8s cluster health',
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,69 @@
|
||||||
|
#!/bin/bash
|
||||||
|
# This file is managed by puppet.
|
||||||
|
|
||||||
|
STATUS=$(/snap/bin/kubectl get events --all-namespaces -o json)
|
||||||
|
|
||||||
|
# number warnings required to make critical status (any warning makes warning and any critical makes critical)
|
||||||
|
critical_warning_num_threshold=3
|
||||||
|
|
||||||
|
num_warnings=$(echo "$STATUS" | jq '[.items[] | select(.type == "Warning")] | length')
|
||||||
|
num_normal=$(echo "$STATUS" | jq '[.items[] | select(.type == "Normal")] | length')
|
||||||
|
num_critical=$(echo "$STATUS" | jq '[.items[] | select(.type == "Critical")] | length')
|
||||||
|
|
||||||
|
function print_info {
|
||||||
|
# echo "$msg: Criticals: $num_critical", "Warnings: $num_warnings"
|
||||||
|
output="$msg - "
|
||||||
|
|
||||||
|
if [[ $num_critical -gt 0 ]]; then
|
||||||
|
output+="Criticals: $num_critical "
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ $num_warnings -gt 0 ]]; then
|
||||||
|
[[ $num_critical -gt 0 ]] && output+=", " # Add a comma if both exist
|
||||||
|
output+="Warnings: $num_warnings "
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$output"
|
||||||
|
if [[ "$num_critical" -gt 0 ]]; then
|
||||||
|
echo "----------------------------------------"
|
||||||
|
echo "$STATUS" | jq -r '
|
||||||
|
.items[] | select(.type == "Critical") |
|
||||||
|
"Host: " + .source.host +
|
||||||
|
"\nType: " + .type +
|
||||||
|
"\nPod: " + .involvedObject.name +
|
||||||
|
"\nMessage: " + .message +
|
||||||
|
"\n----------------------------------------"
|
||||||
|
'
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
if [[ "$num_warnings" -gt 0 ]]; then
|
||||||
|
echo "----------------------------------------"
|
||||||
|
echo "$STATUS" | jq -r '
|
||||||
|
.items[] | select(.type == "Warning") |
|
||||||
|
"Host: " + .source.host +
|
||||||
|
"\nType: " + .type +
|
||||||
|
"\nPod: " + .involvedObject.name +
|
||||||
|
"\nMessage: " + .message +
|
||||||
|
"\n----------------------------------------"
|
||||||
|
'
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "run \"kubectl get events --all-namespaces\" on $HOSTNAME to get more info"
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ "$num_critical" -gt 0 || "$num_warnings" -ge "$critical_warning_num_threshold" ]]; then
|
||||||
|
msg="CRITICAL"
|
||||||
|
print_info
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ "$num_warnings" -gt 0 ]]; then
|
||||||
|
msg="WARNING"
|
||||||
|
print_info
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
msg="OK"
|
||||||
|
print_info
|
||||||
|
exit 0
|
|
@ -0,0 +1 @@
|
||||||
|
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
|
@ -0,0 +1 @@
|
||||||
|
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
|
@ -0,0 +1 @@
|
||||||
|
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
Loading…
Add table
Reference in a new issue