Compare commits
10 commits
cosmos-ops
...
main
Author | SHA1 | Date | |
---|---|---|---|
70acff31bd | |||
fd836c8480 | |||
a092cefca6 | |||
f5404ec114 | |||
8b88f929dc | |||
e96d41c899 | |||
681c004a8a | |||
c9709e1509 | |||
6ea773035e | |||
82a29b6abf |
7 changed files with 99 additions and 4 deletions
global/overlay/etc/puppet
internal-sto4-prod-k8sc-0.rut.sunet.se/overlay/etc/nftables/conf.d
internal-sto4-prod-k8sc-1.rut.sunet.se/overlay/etc/nftables/conf.d
internal-sto4-prod-k8sc-2.rut.sunet.se/overlay/etc/nftables/conf.d
|
@ -21,6 +21,7 @@
|
|||
|
||||
'^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$':
|
||||
rut::infra_ca_rp:
|
||||
rut::controller_nrpe:
|
||||
sunet::microk8s::node:
|
||||
channel: 1.31/stable
|
||||
drain_reboot_cron: true
|
||||
|
@ -30,7 +31,7 @@
|
|||
frontends:
|
||||
- sthb-lb-1.sunet.se
|
||||
- tug-lb-1.sunet.se
|
||||
port: '30443'
|
||||
port: '443'
|
||||
sunet::otel::alloy:
|
||||
otel_receiver: monitor-prod.rut.sunet.se
|
||||
sunet::fleetlock_client:
|
||||
|
@ -46,6 +47,7 @@
|
|||
|
||||
'^internal-sto4-prod-monitor-[0-9].rut.sunet.se$':
|
||||
sunet::dockerhost2:
|
||||
rut::rut_mon:
|
||||
sunet::naemon_monitor:
|
||||
domain: monitor-prod.rut.sunet.se
|
||||
naemon_tag: latest
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
class rut::controller_nrpe {
|
||||
sunet::nagios::nrpe_command {'check_rut_pods':
|
||||
command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh'
|
||||
}
|
||||
|
||||
file { "/usr/lib/nagios/plugins/check_rut_pods.sh":
|
||||
ensure => "file",
|
||||
content => template("rut/check_rut_pods.sh.erb"),
|
||||
mode => '0755',
|
||||
}
|
||||
|
||||
user { 'nagios':
|
||||
ensure => present,
|
||||
groups => ['microk8s'],
|
||||
membership => minimum,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
class rut::rut_mon {
|
||||
nagioscfg::service {'check_rut_pods':
|
||||
host_name => ['internal-sto4-prod-k8sc-0.rut.sunet.se', 'internal-sto4-prod-k8sc-1.rut.sunet.se', 'internal-sto4-prod-k8sc-2.rut.sunet.se'],
|
||||
check_command => 'check_nrpe!check_rut_pods',
|
||||
description => 'Microk8s cluster health',
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
#!/bin/bash
|
||||
# This file is managed by puppet.
|
||||
|
||||
STATUS=$(/snap/bin/kubectl get events --all-namespaces -o json)
|
||||
|
||||
# number warnings required to make critical status (any warning makes warning and any critical makes critical)
|
||||
critical_warning_num_threshold=3
|
||||
|
||||
num_warnings=$(echo "$STATUS" | jq '[.items[] | select(.type == "Warning")] | length')
|
||||
num_normal=$(echo "$STATUS" | jq '[.items[] | select(.type == "Normal")] | length')
|
||||
num_critical=$(echo "$STATUS" | jq '[.items[] | select(.type == "Critical")] | length')
|
||||
|
||||
function print_info {
|
||||
# echo "$msg: Criticals: $num_critical", "Warnings: $num_warnings"
|
||||
output="$msg - "
|
||||
|
||||
if [[ $num_critical -gt 0 ]]; then
|
||||
output+="Criticals: $num_critical "
|
||||
fi
|
||||
|
||||
if [[ $num_warnings -gt 0 ]]; then
|
||||
[[ $num_critical -gt 0 ]] && output+=", " # Add a comma if both exist
|
||||
output+="Warnings: $num_warnings "
|
||||
fi
|
||||
|
||||
echo "$output"
|
||||
if [[ "$num_critical" -gt 0 ]]; then
|
||||
echo "----------------------------------------"
|
||||
echo "$STATUS" | jq -r '
|
||||
.items[] | select(.type == "Critical") |
|
||||
"Host: " + .source.host +
|
||||
"\nType: " + .type +
|
||||
"\nPod: " + .involvedObject.name +
|
||||
"\nMessage: " + .message +
|
||||
"\n----------------------------------------"
|
||||
'
|
||||
fi
|
||||
|
||||
|
||||
if [[ "$num_warnings" -gt 0 ]]; then
|
||||
echo "----------------------------------------"
|
||||
echo "$STATUS" | jq -r '
|
||||
.items[] | select(.type == "Warning") |
|
||||
"Host: " + .source.host +
|
||||
"\nType: " + .type +
|
||||
"\nPod: " + .involvedObject.name +
|
||||
"\nMessage: " + .message +
|
||||
"\n----------------------------------------"
|
||||
'
|
||||
fi
|
||||
|
||||
echo "run \"kubectl get events --all-namespaces\" on $HOSTNAME to get more info"
|
||||
}
|
||||
|
||||
if [[ "$num_critical" -gt 0 || "$num_warnings" -ge "$critical_warning_num_threshold" ]]; then
|
||||
msg="CRITICAL"
|
||||
print_info
|
||||
exit 2
|
||||
fi
|
||||
|
||||
if [[ "$num_warnings" -gt 0 ]]; then
|
||||
msg="WARNING"
|
||||
print_info
|
||||
exit 1
|
||||
fi
|
||||
|
||||
msg="OK"
|
||||
print_info
|
||||
exit 0
|
|
@ -1 +1 @@
|
|||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443} counter accept comment "nft_public" }
|
||||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
||||
|
|
|
@ -1 +1 @@
|
|||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443} counter accept comment "nft_public" }
|
||||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
||||
|
|
|
@ -1 +1 @@
|
|||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443} counter accept comment "nft_public" }
|
||||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
||||
|
|
Loading…
Add table
Reference in a new issue