Compare commits

..

No commits in common. "main" and "cosmos-ops-2025-02-17-v01" have entirely different histories.

8 changed files with 2 additions and 102 deletions

View file

@ -21,7 +21,6 @@
'^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$': '^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$':
rut::infra_ca_rp: rut::infra_ca_rp:
rut::controller_nrpe:
sunet::microk8s::node: sunet::microk8s::node:
channel: 1.31/stable channel: 1.31/stable
drain_reboot_cron: true drain_reboot_cron: true
@ -31,7 +30,7 @@
frontends: frontends:
- sthb-lb-1.sunet.se - sthb-lb-1.sunet.se
- tug-lb-1.sunet.se - tug-lb-1.sunet.se
port: '443' port: '30443'
sunet::otel::alloy: sunet::otel::alloy:
otel_receiver: monitor-prod.rut.sunet.se otel_receiver: monitor-prod.rut.sunet.se
sunet::fleetlock_client: sunet::fleetlock_client:
@ -40,14 +39,12 @@
rut::infra_ca_rp: rut::infra_ca_rp:
sunet::microk8s::node: sunet::microk8s::node:
channel: 1.31/stable channel: 1.31/stable
drain_reboot_cron: true
sunet::otel::alloy: sunet::otel::alloy:
otel_receiver: monitor-prod.rut.sunet.se otel_receiver: monitor-prod.rut.sunet.se
sunet::fleetlock_client: sunet::fleetlock_client:
'^internal-sto4-prod-monitor-[0-9].rut.sunet.se$': '^internal-sto4-prod-monitor-[0-9].rut.sunet.se$':
sunet::dockerhost2: sunet::dockerhost2:
rut::rut_mon:
sunet::naemon_monitor: sunet::naemon_monitor:
domain: monitor-prod.rut.sunet.se domain: monitor-prod.rut.sunet.se
naemon_tag: latest naemon_tag: latest
@ -88,7 +85,6 @@ internal-sto4-prod-satosa-1.rut.sunet.se:
rut::infra_ca_rp: rut::infra_ca_rp:
sunet::microk8s::node: sunet::microk8s::node:
channel: 1.31/stable channel: 1.31/stable
drain_reboot_cron: true
sunet::otel::alloy: sunet::otel::alloy:
otel_receiver: monitor-prod.rut.sunet.se otel_receiver: monitor-prod.rut.sunet.se
sunet::fleetlock_client: sunet::fleetlock_client:

View file

@ -1,17 +0,0 @@
class rut::controller_nrpe {
sunet::nagios::nrpe_command {'check_rut_pods':
command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh'
}
file { "/usr/lib/nagios/plugins/check_rut_pods.sh":
ensure => "file",
content => template("rut/check_rut_pods.sh.erb"),
mode => '0755',
}
user { 'nagios':
ensure => present,
groups => ['microk8s'],
membership => minimum,
}
}

View file

@ -1,7 +0,0 @@
class rut::rut_mon {
nagioscfg::service {'check_rut_pods':
host_name => ['internal-sto4-prod-k8sc-0.rut.sunet.se', 'internal-sto4-prod-k8sc-1.rut.sunet.se', 'internal-sto4-prod-k8sc-2.rut.sunet.se'],
check_command => 'check_nrpe!check_rut_pods',
description => 'Microk8s cluster health',
}
}

View file

@ -1,69 +0,0 @@
#!/bin/bash
# This file is managed by puppet.
STATUS=$(/snap/bin/kubectl get events --all-namespaces -o json)
# number warnings required to make critical status (any warning makes warning and any critical makes critical)
critical_warning_num_threshold=3
num_warnings=$(echo "$STATUS" | jq '[.items[] | select(.type == "Warning")] | length')
num_normal=$(echo "$STATUS" | jq '[.items[] | select(.type == "Normal")] | length')
num_critical=$(echo "$STATUS" | jq '[.items[] | select(.type == "Critical")] | length')
function print_info {
# echo "$msg: Criticals: $num_critical", "Warnings: $num_warnings"
output="$msg - "
if [[ $num_critical -gt 0 ]]; then
output+="Criticals: $num_critical "
fi
if [[ $num_warnings -gt 0 ]]; then
[[ $num_critical -gt 0 ]] && output+=", " # Add a comma if both exist
output+="Warnings: $num_warnings "
fi
echo "$output"
if [[ "$num_critical" -gt 0 ]]; then
echo "----------------------------------------"
echo "$STATUS" | jq -r '
.items[] | select(.type == "Critical") |
"Host: " + .source.host +
"\nType: " + .type +
"\nPod: " + .involvedObject.name +
"\nMessage: " + .message +
"\n----------------------------------------"
'
fi
if [[ "$num_warnings" -gt 0 ]]; then
echo "----------------------------------------"
echo "$STATUS" | jq -r '
.items[] | select(.type == "Warning") |
"Host: " + .source.host +
"\nType: " + .type +
"\nPod: " + .involvedObject.name +
"\nMessage: " + .message +
"\n----------------------------------------"
'
fi
echo "run \"kubectl get events --all-namespaces\" on $HOSTNAME to get more info"
}
if [[ "$num_critical" -gt 0 || "$num_warnings" -ge "$critical_warning_num_threshold" ]]; then
msg="CRITICAL"
print_info
exit 2
fi
if [[ "$num_warnings" -gt 0 ]]; then
msg="WARNING"
print_info
exit 1
fi
msg="OK"
print_info
exit 0

View file

@ -1 +0,0 @@
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"

View file

@ -1 +0,0 @@
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"

View file

@ -1 +0,0 @@
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"