From 449fc872cee7c8b0fde34361956b212161245a9c Mon Sep 17 00:00:00 2001 From: Mikael Frykholm Date: Tue, 4 Feb 2025 10:18:57 +0100 Subject: [PATCH] Add taints and debug info. --- README.md | 8 +++++++- k8s/cnpg-cluster.yaml | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 1f70514..cf43611 100644 --- a/README.md +++ b/README.md @@ -48,6 +48,7 @@ * Add all other _Controller_ nodes with `microk8s join 89.46.21.119:25000/12345678987654345678976543/1234565` * Add all other _Worker_ nodes with `microk8s join 89.46.21.119:25000/12345678987654345678976543/1234565 --worker` * Taint controller nodes so they wont get workload:` microk8s.kubectl taint nodes --selector=node.kubernetes.io/microk8s-controlplane=microk8s-controlplane cp-node=true:NoExecute` +* Taint Postgres nodes so they wont get workload:` microk8s.kubectl taint nodes --selector=sunet.se/role=cnpg pg-node=true:NoExecute` * `kubectl get nodes` should show something like: ``` @@ -78,9 +79,14 @@ internal-sto4-test-k8sc-1.rut.sunet.se Ready 16d v1.28.7 ## Day 2 operations: -Rolling upgrade: +###Rolling upgrade: On controllers: kubectl drain internal-sto4-test-k8sc-0.rut.sunet.se --ignore-daemonset On workers: kubectl drain internal-sto4-test-k8sw-0.rut.sunet.se --force --ignore-daemonsets --delete-emptydir-data --disable-eviction + +After upgrade: monitor that calico has working access to the cluster and look for problems like `Candidate IP leak handle och too old resource version` in calico-kube-controllers pod. If theese are found calico cane be restarted with: +kubectl rollout restart deployment calico-kube-controllers -n kube-system +kubectl rollout restart daemonset calico-node -n kube-system + diff --git a/k8s/cnpg-cluster.yaml b/k8s/cnpg-cluster.yaml index 6df8646..0638322 100644 --- a/k8s/cnpg-cluster.yaml +++ b/k8s/cnpg-cluster.yaml @@ -12,3 +12,8 @@ spec: topologyKey: failure-domain.beta.kubernetes.io/zone nodeSelector: sunet.se/role: cnpg + tolerations: + - effect: NoExecute + key: pg-node + operator: Equal + value: "true"