Compare commits
No commits in common. "main" and "cosmos-ops-2025-02-17-v01" have entirely different histories.
main
...
cosmos-ops
8 changed files with 2 additions and 102 deletions
|
@ -21,7 +21,6 @@
|
||||||
|
|
||||||
'^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$':
|
'^internal-sto4-prod-k8sc-[0-9].rut.sunet.se$':
|
||||||
rut::infra_ca_rp:
|
rut::infra_ca_rp:
|
||||||
rut::controller_nrpe:
|
|
||||||
sunet::microk8s::node:
|
sunet::microk8s::node:
|
||||||
channel: 1.31/stable
|
channel: 1.31/stable
|
||||||
drain_reboot_cron: true
|
drain_reboot_cron: true
|
||||||
|
@ -31,7 +30,7 @@
|
||||||
frontends:
|
frontends:
|
||||||
- sthb-lb-1.sunet.se
|
- sthb-lb-1.sunet.se
|
||||||
- tug-lb-1.sunet.se
|
- tug-lb-1.sunet.se
|
||||||
port: '443'
|
port: '30443'
|
||||||
sunet::otel::alloy:
|
sunet::otel::alloy:
|
||||||
otel_receiver: monitor-prod.rut.sunet.se
|
otel_receiver: monitor-prod.rut.sunet.se
|
||||||
sunet::fleetlock_client:
|
sunet::fleetlock_client:
|
||||||
|
@ -40,14 +39,12 @@
|
||||||
rut::infra_ca_rp:
|
rut::infra_ca_rp:
|
||||||
sunet::microk8s::node:
|
sunet::microk8s::node:
|
||||||
channel: 1.31/stable
|
channel: 1.31/stable
|
||||||
drain_reboot_cron: true
|
|
||||||
sunet::otel::alloy:
|
sunet::otel::alloy:
|
||||||
otel_receiver: monitor-prod.rut.sunet.se
|
otel_receiver: monitor-prod.rut.sunet.se
|
||||||
sunet::fleetlock_client:
|
sunet::fleetlock_client:
|
||||||
|
|
||||||
'^internal-sto4-prod-monitor-[0-9].rut.sunet.se$':
|
'^internal-sto4-prod-monitor-[0-9].rut.sunet.se$':
|
||||||
sunet::dockerhost2:
|
sunet::dockerhost2:
|
||||||
rut::rut_mon:
|
|
||||||
sunet::naemon_monitor:
|
sunet::naemon_monitor:
|
||||||
domain: monitor-prod.rut.sunet.se
|
domain: monitor-prod.rut.sunet.se
|
||||||
naemon_tag: latest
|
naemon_tag: latest
|
||||||
|
@ -88,7 +85,6 @@ internal-sto4-prod-satosa-1.rut.sunet.se:
|
||||||
rut::infra_ca_rp:
|
rut::infra_ca_rp:
|
||||||
sunet::microk8s::node:
|
sunet::microk8s::node:
|
||||||
channel: 1.31/stable
|
channel: 1.31/stable
|
||||||
drain_reboot_cron: true
|
|
||||||
sunet::otel::alloy:
|
sunet::otel::alloy:
|
||||||
otel_receiver: monitor-prod.rut.sunet.se
|
otel_receiver: monitor-prod.rut.sunet.se
|
||||||
sunet::fleetlock_client:
|
sunet::fleetlock_client:
|
||||||
|
|
|
@ -1,17 +0,0 @@
|
||||||
class rut::controller_nrpe {
|
|
||||||
sunet::nagios::nrpe_command {'check_rut_pods':
|
|
||||||
command_line => '/usr/lib/nagios/plugins/check_rut_pods.sh'
|
|
||||||
}
|
|
||||||
|
|
||||||
file { "/usr/lib/nagios/plugins/check_rut_pods.sh":
|
|
||||||
ensure => "file",
|
|
||||||
content => template("rut/check_rut_pods.sh.erb"),
|
|
||||||
mode => '0755',
|
|
||||||
}
|
|
||||||
|
|
||||||
user { 'nagios':
|
|
||||||
ensure => present,
|
|
||||||
groups => ['microk8s'],
|
|
||||||
membership => minimum,
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,7 +0,0 @@
|
||||||
class rut::rut_mon {
|
|
||||||
nagioscfg::service {'check_rut_pods':
|
|
||||||
host_name => ['internal-sto4-prod-k8sc-0.rut.sunet.se', 'internal-sto4-prod-k8sc-1.rut.sunet.se', 'internal-sto4-prod-k8sc-2.rut.sunet.se'],
|
|
||||||
check_command => 'check_nrpe!check_rut_pods',
|
|
||||||
description => 'Microk8s cluster health',
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,69 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
# This file is managed by puppet.
|
|
||||||
|
|
||||||
STATUS=$(/snap/bin/kubectl get events --all-namespaces -o json)
|
|
||||||
|
|
||||||
# number warnings required to make critical status (any warning makes warning and any critical makes critical)
|
|
||||||
critical_warning_num_threshold=3
|
|
||||||
|
|
||||||
num_warnings=$(echo "$STATUS" | jq '[.items[] | select(.type == "Warning")] | length')
|
|
||||||
num_normal=$(echo "$STATUS" | jq '[.items[] | select(.type == "Normal")] | length')
|
|
||||||
num_critical=$(echo "$STATUS" | jq '[.items[] | select(.type == "Critical")] | length')
|
|
||||||
|
|
||||||
function print_info {
|
|
||||||
# echo "$msg: Criticals: $num_critical", "Warnings: $num_warnings"
|
|
||||||
output="$msg - "
|
|
||||||
|
|
||||||
if [[ $num_critical -gt 0 ]]; then
|
|
||||||
output+="Criticals: $num_critical "
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ $num_warnings -gt 0 ]]; then
|
|
||||||
[[ $num_critical -gt 0 ]] && output+=", " # Add a comma if both exist
|
|
||||||
output+="Warnings: $num_warnings "
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "$output"
|
|
||||||
if [[ "$num_critical" -gt 0 ]]; then
|
|
||||||
echo "----------------------------------------"
|
|
||||||
echo "$STATUS" | jq -r '
|
|
||||||
.items[] | select(.type == "Critical") |
|
|
||||||
"Host: " + .source.host +
|
|
||||||
"\nType: " + .type +
|
|
||||||
"\nPod: " + .involvedObject.name +
|
|
||||||
"\nMessage: " + .message +
|
|
||||||
"\n----------------------------------------"
|
|
||||||
'
|
|
||||||
fi
|
|
||||||
|
|
||||||
|
|
||||||
if [[ "$num_warnings" -gt 0 ]]; then
|
|
||||||
echo "----------------------------------------"
|
|
||||||
echo "$STATUS" | jq -r '
|
|
||||||
.items[] | select(.type == "Warning") |
|
|
||||||
"Host: " + .source.host +
|
|
||||||
"\nType: " + .type +
|
|
||||||
"\nPod: " + .involvedObject.name +
|
|
||||||
"\nMessage: " + .message +
|
|
||||||
"\n----------------------------------------"
|
|
||||||
'
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "run \"kubectl get events --all-namespaces\" on $HOSTNAME to get more info"
|
|
||||||
}
|
|
||||||
|
|
||||||
if [[ "$num_critical" -gt 0 || "$num_warnings" -ge "$critical_warning_num_threshold" ]]; then
|
|
||||||
msg="CRITICAL"
|
|
||||||
print_info
|
|
||||||
exit 2
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$num_warnings" -gt 0 ]]; then
|
|
||||||
msg="WARNING"
|
|
||||||
print_info
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
msg="OK"
|
|
||||||
print_info
|
|
||||||
exit 0
|
|
|
@ -1 +0,0 @@
|
||||||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
|
|
@ -1 +0,0 @@
|
||||||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
|
|
@ -1 +0,0 @@
|
||||||
add rule inet filter input tcp dport { 80, 443 , 30080, 30443 } counter accept comment "nft_public"
|
|
Loading…
Add table
Reference in a new issue