diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/apiserver.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/apiserver.json new file mode 100644 index 0000000..f78083f --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/apiserver.json @@ -0,0 +1,870 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only.", + "gridPos": { + "h": 2, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "content": "The SLO (service level objective) and other metrics displayed on this dashboard are for informational purposes only." + }, + "pluginVersion": "v11.1.0", + "title": "Notice", + "type": "text" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many percent of requests (both read and write) in 30 days have been answered successfully and fast enough?", + "fieldConfig": { + "defaults": { + "decimals": 3, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 2 + }, + "id": 2, + "interval": "1m", + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"}" + } + ], + "title": "Availability (30d) > 99.000%", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How much error budget is left looking at our 0.990% availability guarantees?", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100 + }, + "decimals": 3, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 2 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "100 * (apiserver_request:availability30d{verb=\"all\", cluster=\"$cluster\"} - 0.990000)", + "legendFormat": "errorbudget" + } + ], + "title": "ErrorBudget (30d) > 99.000%", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many percent of read requests (LIST,GET) in 30 days have been answered successfully and fast enough?", + "fieldConfig": { + "defaults": { + "decimals": 3, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 9 + }, + "id": 4, + "interval": "1m", + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "apiserver_request:availability30d{verb=\"read\", cluster=\"$cluster\"}" + } + ], + "title": "Read Availability (30d)", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many read requests (LIST,GET) per second do the apiservers get by code?", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "stacking": { + "mode": "normal" + } + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/2../i" + }, + "properties": [ + { + "id": "color", + "value": "#56A64B" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/3../i" + }, + "properties": [ + { + "id": "color", + "value": "#F2CC0C" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/4../i" + }, + "properties": [ + { + "id": "color", + "value": "#3274D9" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/5../i" + }, + "properties": [ + { + "id": "color", + "value": "#E02F44" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 9 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", + "legendFormat": "{{ code }}" + } + ], + "title": "Read SLI - Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many percent of read requests (LIST,GET) per second are returned with errors (5xx)?", + "fieldConfig": { + "defaults": { + "min": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 9 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"read\", cluster=\"$cluster\"})", + "legendFormat": "{{ resource }}" + } + ], + "title": "Read SLI - Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many seconds is the 99th percentile for reading (LIST|GET) a given resource?", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 9 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"read\", cluster=\"$cluster\"}", + "legendFormat": "{{ resource }}" + } + ], + "title": "Read SLI - Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) in 30 days have been answered successfully and fast enough?", + "fieldConfig": { + "defaults": { + "decimals": 3, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 16 + }, + "id": 8, + "interval": "1m", + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "apiserver_request:availability30d{verb=\"write\", cluster=\"$cluster\"}" + } + ], + "title": "Write Availability (30d)", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many write requests (POST|PUT|PATCH|DELETE) per second do the apiservers get by code?", + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 100, + "stacking": { + "mode": "normal" + } + }, + "unit": "reqps" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/2../i" + }, + "properties": [ + { + "id": "color", + "value": "#56A64B" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/3../i" + }, + "properties": [ + { + "id": "color", + "value": "#F2CC0C" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/4../i" + }, + "properties": [ + { + "id": "color", + "value": "#3274D9" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/5../i" + }, + "properties": [ + { + "id": "color", + "value": "#E02F44" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 6, + "y": 16 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (code) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", + "legendFormat": "{{ code }}" + } + ], + "title": "Write SLI - Requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many percent of write requests (POST|PUT|PATCH|DELETE) per second are returned with errors (5xx)?", + "fieldConfig": { + "defaults": { + "min": 0, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 12, + "y": 16 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\",code=~\"5..\", cluster=\"$cluster\"}) / sum by (resource) (code_resource:apiserver_request_total:rate5m{verb=\"write\", cluster=\"$cluster\"})", + "legendFormat": "{{ resource }}" + } + ], + "title": "Write SLI - Errors", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "description": "How many seconds is the 99th percentile for writing (POST|PUT|PATCH|DELETE) a given resource?", + "fieldConfig": { + "defaults": { + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 16 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "cluster_quantile:apiserver_request_sli_duration_seconds:histogram_quantile{verb=\"write\", cluster=\"$cluster\"}", + "legendFormat": "{{ resource }}" + } + ], + "title": "Write SLI - Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 23 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(workqueue_adds_total{job=\"kube-apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", + "legendFormat": "{{instance}} {{name}}" + } + ], + "title": "Work Queue Add Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 23 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": false + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(workqueue_depth{job=\"kube-apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name)", + "legendFormat": "{{instance}} {{name}}" + } + ], + "title": "Work Queue Depth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 14, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{job=\"kube-apiserver\", instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])) by (instance, name, le))", + "legendFormat": "{{instance}} {{name}}" + } + ], + "title": "Work Queue Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 37 + }, + "id": 15, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "process_resident_memory_bytes{job=\"kube-apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "min": 0, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 37 + }, + "id": 16, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(process_cpu_seconds_total{job=\"kube-apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}[$__rate_interval])", + "legendFormat": "{{instance}}" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 37 + }, + "id": 17, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "go_goroutines{job=\"kube-apiserver\",instance=~\"$instance\", cluster=\"$cluster\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Goroutines", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-apiserver\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "name": "instance", + "query": "label_values(up{job=\"kube-apiserver\", cluster=\"$cluster\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / API server", + "uid": "09ec8aa1e996d6ffcd6817bbaff4db1b" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/cluster-total.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/cluster-total.json new file mode 100644 index 0000000..45ad32f --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/cluster-total.json @@ -0,0 +1,802 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bytes/" + }, + "properties": [ + { + "id": "unit", + "value": "binBps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Namespace" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down", + "url": "/d/8b7a8b326d7a6f1f04244066368c67af/kubernetes-networking-namespace-pods?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${__data.fields.Namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 3, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg by (namespace) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg by (namespace) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + } + ], + "title": "Current Status", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "namespace", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Time 7": 6, + "Time 8": 7, + "Value #A": 9, + "Value #B": 10, + "Value #C": 11, + "Value #D": 12, + "Value #E": 13, + "Value #F": 14, + "Value #G": 15, + "Value #H": 16, + "namespace": 8 + }, + "renameByName": { + "Value #A": "Rx Bytes", + "Value #B": "Tx Bytes", + "Value #C": "Rx Bytes (Avg)", + "Value #D": "Tx Bytes (Avg)", + "Value #E": "Rx Packets", + "Value #F": "Tx Packets", + "Value #G": "Rx Packets Dropped", + "Value #H": "Tx Packets Dropped", + "namespace": "Namespace" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg by (namespace) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Average Rate of Bytes Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg by (namespace) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Average Rate of Bytes Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (namespace) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 54 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (instance) (\n rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$__rate_interval]) / rate(node_netstat_Tcp_OutSegs{cluster=\"$cluster\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of TCP Retransmits out of all sent segments", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "percentunit" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 54 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (instance) (\n rate(node_netstat_TcpExt_TCPSynRetrans{cluster=\"$cluster\"}[$__rate_interval]) / rate(node_netstat_Tcp_RetransSegs{cluster=\"$cluster\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of TCP SYN Retransmits out of all retransmits", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"cadvisor\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Cluster", + "uid": "ff635a025bcfea7bc3dd4f508990a3e9" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/controller-manager.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/controller-manager.json new file mode 100644 index 0000000..eaa4fc8 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/controller-manager.json @@ -0,0 +1,592 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-controller-manager\"})", + "instant": true + } + ], + "title": "Up", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 20, + "x": 4, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(workqueue_adds_total{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", + "legendFormat": "{{cluster}} {{instance}} {{name}}" + } + ], + "title": "Work Queue Add Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(workqueue_depth{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name)", + "legendFormat": "{{cluster}} {{instance}} {{name}}" + } + ], + "title": "Work Queue Depth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, name, le))", + "legendFormat": "{{cluster}} {{instance}} {{name}}" + } + ], + "title": "Work Queue Latency", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 21 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "legendFormat": "2xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "legendFormat": "3xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "legendFormat": "4xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{job=\"kube-controller-manager\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "legendFormat": "5xx" + } + ], + "title": "Kube API Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 21 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "legendFormat": "{{verb}} {{url}}" + } + ], + "title": "Post Request Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-controller-manager\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "legendFormat": "{{verb}} {{url}}" + } + ], + "title": "Get Request Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 35 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 35 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}[$__rate_interval])", + "legendFormat": "{{instance}}" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 35 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-controller-manager\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Goroutines", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-controller-manager\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "instance", + "name": "instance", + "query": "label_values(up{cluster=\"$cluster\", job=\"kube-controller-manager\"}, instance)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Controller Manager", + "uid": "72e0e05bef5099e5f049b05fdc429ed4" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-cluster.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-cluster.json new file mode 100644 index 0000000..5891ed7 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-cluster.json @@ -0,0 +1,1575 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "cluster:node_cpu:ratio_rate5m{cluster=\"$cluster\"}", + "instant": true + } + ], + "title": "CPU Utilisation", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", + "instant": true + } + ], + "title": "CPU Requests Commitment", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"cpu\",cluster=\"$cluster\"})", + "instant": true + } + ], + "title": "CPU Limits Commitment", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 4, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "1 - sum(:node_memory_MemAvailable_bytes:sum{cluster=\"$cluster\"}) / sum(node_memory_MemTotal_bytes{job=\"node-exporter\",cluster=\"$cluster\"})", + "instant": true + } + ], + "title": "Memory Utilisation", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 5, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", + "instant": true + } + ], + "title": "Memory Requests Commitment", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 6, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",resource=\"memory\",cluster=\"$cluster\"})", + "instant": true + } + ], + "title": "Memory Limits Commitment", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + } + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Namespace" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 8, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\"}) by (namespace) / sum(namespace_cpu:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "namespace", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Time 7": 6, + "Value #A": 8, + "Value #B": 9, + "Value #C": 10, + "Value #D": 11, + "Value #E": 12, + "Value #F": 13, + "Value #G": 14, + "namespace": 7 + }, + "renameByName": { + "Value #A": "Pods", + "Value #B": "Workloads", + "Value #C": "CPU Usage", + "Value #D": "CPU Requests", + "Value #E": "CPU Requests %", + "Value #F": "CPU Limits", + "Value #G": "CPU Limits %", + "namespace": "Namespace" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Usage" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Requests" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Memory Limits" + }, + "properties": [ + { + "id": "unit", + "value": "bytes" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Namespace" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 24 + }, + "id": 10, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_pod_owner{job=\"kube-state-metrics\", cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(avg(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\"}) by (workload, namespace)) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_requests:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", container!=\"\"}) by (namespace) / sum(namespace_memory:kube_pod_container_resource_limits:sum{cluster=\"$cluster\"}) by (namespace)", + "format": "table", + "instant": true + } + ], + "title": "Memory Requests by Namespace", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "namespace", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Time 7": 6, + "Value #A": 8, + "Value #B": 9, + "Value #C": 10, + "Value #D": 11, + "Value #E": 12, + "Value #F": 13, + "Value #G": 14, + "namespace": 7 + }, + "renameByName": { + "Value #A": "Pods", + "Value #B": "Workloads", + "Value #C": "Memory Usage", + "Value #D": "Memory Requests", + "Value #E": "Memory Requests %", + "Value #F": "Memory Limits", + "Value #G": "Memory Limits %", + "namespace": "Namespace" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bandwidth/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Namespace" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 30 + }, + "id": 11, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "format": "table", + "instant": true + } + ], + "title": "Current Network Usage", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "namespace", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "namespace": 6 + }, + "renameByName": { + "Value #A": "Current Receive Bandwidth", + "Value #B": "Current Transmit Bandwidth", + "Value #C": "Rate of Received Packets", + "Value #D": "Rate of Transmitted Packets", + "Value #E": "Rate of Received Packets Dropped", + "Value #F": "Rate of Transmitted Packets Dropped", + "namespace": "Namespace" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 36 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 42 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 14, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Namespace: Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 54 + }, + "id": 15, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "avg(irate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Namespace: Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 60 + }, + "id": 16, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 66 + }, + "id": 17, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 72 + }, + "id": 18, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 78 + }, + "id": 19, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=~\".+\"}[$__rate_interval])) by (namespace)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "iops" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 84 + }, + "id": 20, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ceil(sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval])))", + "legendFormat": "__auto" + } + ], + "title": "IOPS(Reads+Writes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 90 + }, + "id": 21, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "legendFormat": "__auto" + } + ], + "title": "ThroughPut(Read+Write)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/IOPS/" + }, + "properties": [ + { + "id": "unit", + "value": "iops" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Throughput/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Namespace" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/85a562078cdf77779eaa1add43ccec1e/k8s-resources-namespace?${datasource:queryparam}&var-cluster=$cluster&var-namespace=${__data.fields.Namespace}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 96 + }, + "id": 22, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(namespace) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace!=\"\"}[$__rate_interval]))", + "format": "table", + "instant": true + } + ], + "title": "Current Storage IO", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "namespace", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "namespace": 6 + }, + "renameByName": { + "Value #A": "IOPS(Reads)", + "Value #B": "IOPS(Writes)", + "Value #C": "IOPS(Reads + Writes)", + "Value #D": "Throughput(Read)", + "Value #E": "Throughput(Write)", + "Value #F": "Throughput(Read + Write)", + "namespace": "Namespace" + } + } + } + ], + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"cadvisor\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Cluster", + "uid": "efa86fd1d0c121a26444b636a3f509a8" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-namespace.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-namespace.json new file mode 100644 index 0000000..c2a6a4f --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-namespace.json @@ -0,0 +1,1506 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "instant": true + } + ], + "title": "CPU Utilisation (from requests)", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 6, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"})", + "instant": true + } + ], + "title": "CPU Utilisation (from limits)", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 12, + "y": 0 + }, + "id": 3, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "instant": true + } + ], + "title": "Memory Utilisation (from requests)", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "percentunit" + } + }, + "gridPos": { + "h": 3, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 4, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})", + "instant": true + } + ], + "title": "Memory Utilisation (from limits)", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "legendFormat": "__auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"}))", + "legendFormat": "quota - requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"}))", + "legendFormat": "quota - limits" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 6, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Value #A": 6, + "Value #B": 7, + "Value #C": 8, + "Value #D": 9, + "Value #E": 10, + "pod": 5 + }, + "renameByName": { + "Value #A": "CPU Usage", + "Value #B": "CPU Requests", + "Value #C": "CPU Requests %", + "Value #D": "CPU Limits", + "Value #E": "CPU Limits %", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}) by (pod)", + "legendFormat": "__auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"}))", + "legendFormat": "quota - requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"}))", + "legendFormat": "quota - limits" + } + ], + "title": "Memory Usage (w/o cache)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 8, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\", image!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Time 7": 6, + "Time 8": 7, + "Value #A": 9, + "Value #B": 10, + "Value #C": 11, + "Value #D": 12, + "Value #E": 13, + "Value #F": 14, + "Value #G": 15, + "Value #H": 16, + "pod": 8 + }, + "renameByName": { + "Value #A": "Memory Usage", + "Value #B": "Memory Requests", + "Value #C": "Memory Requests %", + "Value #D": "Memory Limits", + "Value #E": "Memory Limits %", + "Value #F": "Memory Usage (RSS)", + "Value #G": "Memory Usage (Cache)", + "Value #H": "Memory Usage (Swap)", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bandwidth/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 9, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "format": "table", + "instant": true + } + ], + "title": "Current Network Usage", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "pod": 6 + }, + "renameByName": { + "Value #A": "Current Receive Bandwidth", + "Value #B": "Current Transmit Bandwidth", + "Value #C": "Rate of Received Packets", + "Value #D": "Rate of Transmitted Packets", + "Value #E": "Rate of Received Packets Dropped", + "Value #F": "Rate of Transmitted Packets Dropped", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_receive_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_transmit_packets_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 14, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_receive_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 15, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "iops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 16, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])))", + "legendFormat": "__auto" + } + ], + "title": "IOPS(Reads+Writes)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 17, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{container!=\"\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "legendFormat": "__auto" + } + ], + "title": "ThroughPut(Read+Write)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/IOPS/" + }, + "properties": [ + { + "id": "unit", + "value": "iops" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Throughput/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 18, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval]))", + "format": "table", + "instant": true + } + ], + "title": "Current Storage IO", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "pod": 6 + }, + "renameByName": { + "Value #A": "IOPS(Reads)", + "Value #B": "IOPS(Writes)", + "Value #C": "IOPS(Reads + Writes)", + "Value #D": "Throughput(Read)", + "Value #E": "Throughput(Write)", + "Value #F": "Throughput(Read + Write)", + "pod": "Pod" + } + } + } + ], + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "namespace", + "name": "namespace", + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Namespace (Pods)", + "uid": "85a562078cdf77779eaa1add43ccec1e" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-node.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-node.json new file mode 100644 index 0000000..9fbdf51 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-node.json @@ -0,0 +1,566 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "mode": "normal" + } + } + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.stacking", + "value": { + "mode": "none" + } + }, + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": false + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"cpu\"})", + "legendFormat": "max capacity" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "legendFormat": "{{pod}}" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 6 + }, + "id": 2, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", node=~\"$node\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "renameByName": { + "Value #A": "CPU Usage", + "Value #B": "CPU Requests", + "Value #C": "CPU Requests %", + "Value #D": "CPU Limits", + "Value #E": "CPU Limits %", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true, + "stacking": { + "mode": "normal" + } + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "max capacity" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + }, + { + "id": "custom.stacking", + "value": { + "mode": "none" + } + }, + { + "id": "custom.hideFrom", + "value": { + "legend": false, + "tooltip": true, + "viz": false + } + }, + { + "id": "custom.lineStyle", + "value": { + "dash": [ + 10, + 10 + ], + "fill": "dash" + } + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 12 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kube_node_status_capacity{cluster=\"$cluster\", job=\"kube-state-metrics\", node=~\"$node\", resource=\"memory\"})", + "legendFormat": "max capacity" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\", container!=\"\"}) by (pod)", + "legendFormat": "{{pod}}" + } + ], + "title": "Memory Usage (w/o cache)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 4, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_working_set_bytes{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", node=~\"$node\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_rss{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_cache{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_memory_swap{cluster=\"$cluster\", node=~\"$node\",container!=\"\"}) by (pod)", + "format": "table", + "instant": true + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true + }, + "renameByName": { + "Value #A": "Memory Usage", + "Value #B": "Memory Requests", + "Value #C": "Memory Requests %", + "Value #D": "Memory Limits", + "Value #E": "Memory Limits %", + "Value #F": "Memory Usage (RSS)", + "Value #G": "Memory Usage (Cache)", + "Value #H": "Memory Usage (Swap)", + "pod": "Pod" + } + } + } + ], + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "node", + "multi": true, + "name": "node", + "query": "label_values(kube_node_info{cluster=\"$cluster\"}, node)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Node (Pods)", + "uid": "200ac8fdbfbb74b39aff88118e4d1c2c" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-pod.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-pod.json new file mode 100644 index 0000000..030e1b3 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-pod.json @@ -0,0 +1,1372 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{namespace=\"$namespace\", pod=\"$pod\", cluster=\"$cluster\", container!=\"\"}) by (container)", + "legendFormat": "__auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "legendFormat": "requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"cpu\"}\n)\n", + "legendFormat": "limits" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "axisColorMode": "thresholds", + "axisSoftMax": 1, + "axisSoftMin": 0, + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true, + "thresholdsStyle": { + "mode": "dashed+area" + } + }, + "unit": "percentunit" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "A" + }, + "properties": [ + { + "id": "thresholds", + "value": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.25 + } + ] + } + }, + { + "id": "color", + "value": { + "mode": "thresholds", + "seriesBy": "lastNotNull" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 2, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(increase(container_cpu_cfs_throttled_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container) /sum(increase(container_cpu_cfs_periods_total{job=\"cadvisor\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", cluster=\"$cluster\"}[$__rate_interval])) by (container)", + "legendFormat": "__auto" + } + ], + "title": "CPU Throttling", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 3, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\"}) by (container)", + "format": "table", + "instant": true + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "container", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Value #A": 6, + "Value #B": 7, + "Value #C": 8, + "Value #D": 9, + "Value #E": 10, + "container": 5 + }, + "renameByName": { + "Value #A": "CPU Usage", + "Value #B": "CPU Requests", + "Value #C": "CPU Requests %", + "Value #D": "CPU Limits", + "Value #E": "CPU Limits %", + "container": "Container" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "legendFormat": "__auto" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "legendFormat": "requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", resource=\"memory\"}\n)\n", + "legendFormat": "limits" + } + ], + "title": "Memory Usage (WSS)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 5, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container!=\"\", image!=\"\"}) by (container) / sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_limits{cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_rss{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_cache{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(container_memory_swap{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\", container != \"\", container != \"POD\"}) by (container)", + "format": "table", + "instant": true + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "container", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Time 7": 6, + "Time 8": 7, + "Value #A": 9, + "Value #B": 10, + "Value #C": 11, + "Value #D": 12, + "Value #E": 13, + "Value #F": 14, + "Value #G": 15, + "Value #H": 16, + "container": 8 + }, + "renameByName": { + "Value #A": "Memory Usage", + "Value #B": "Memory Requests", + "Value #C": "Memory Requests %", + "Value #D": "Memory Limits", + "Value #E": "Memory Limits %", + "Value #F": "Memory Usage (RSS)", + "Value #G": "Memory Usage (Cache)", + "Value #H": "Memory Usage (Swap)", + "container": "Container" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(irate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "iops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ceil(sum by(pod) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "legendFormat": "Reads" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ceil(sum by(pod) (rate(container_fs_writes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\",namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval])))", + "legendFormat": "Writes" + } + ], + "title": "IOPS (Pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "legendFormat": "Reads" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(pod) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "legendFormat": "Writes" + } + ], + "title": "ThroughPut (Pod)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "iops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 14, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "ceil(sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval])))", + "legendFormat": "__auto" + } + ], + "title": "IOPS (Containers)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 15, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "legendFormat": "__auto" + } + ], + "title": "ThroughPut (Containers)", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/IOPS/" + }, + "properties": [ + { + "id": "unit", + "value": "iops" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Throughput/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 16, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_writes_total{job=\"cadvisor\",device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_reads_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by(container) (rate(container_fs_reads_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]) + rate(container_fs_writes_bytes_total{job=\"cadvisor\", device=~\"(/dev.+)|mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|dasd.+\", container!=\"\", cluster=\"$cluster\", namespace=\"$namespace\", pod=\"$pod\"}[$__rate_interval]))", + "format": "table", + "instant": true + } + ], + "title": "Current Storage IO", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "container", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "container": 6 + }, + "renameByName": { + "Value #A": "IOPS(Reads)", + "Value #B": "IOPS(Writes)", + "Value #C": "IOPS(Reads + Writes)", + "Value #D": "Throughput(Read)", + "Value #E": "Throughput(Write)", + "Value #F": "Throughput(Read + Write)", + "container": "Container" + } + } + } + ], + "type": "table" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "namespace", + "name": "namespace", + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "pod", + "name": "pod", + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\"}, pod)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Pod", + "uid": "6581e46e4e5c7ba40a07646395ef7b23" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-workload.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-workload.json new file mode 100644 index 0000000..03f4de8 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-workload.json @@ -0,0 +1,1054 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + } + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "legendFormat": "__auto" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 2, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Value #A": 6, + "Value #B": 7, + "Value #C": 8, + "Value #D": 9, + "Value #E": 10, + "pod": 5 + }, + "renameByName": { + "Value #A": "CPU Usage", + "Value #B": "CPU Requests", + "Value #C": "CPU Requests %", + "Value #D": "CPU Limits", + "Value #E": "CPU Limits %", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "legendFormat": "__auto" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 4, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=\"$workload\", workload_type=~\"$type\"}\n) by (pod)\n", + "format": "table", + "instant": true + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Value #A": 9, + "Value #B": 10, + "Value #C": 11, + "Value #D": 12, + "Value #E": 13, + "pod": 8 + }, + "renameByName": { + "Value #A": "Memory Usage", + "Value #B": "Memory Requests", + "Value #C": "Memory Requests %", + "Value #D": "Memory Limits", + "Value #E": "Memory Limits %", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bandwidth/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to pods", + "url": "/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 5, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "format": "table", + "instant": true + } + ], + "title": "Current Network Usage", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "pod": 6 + }, + "renameByName": { + "Value #A": "Current Receive Bandwidth", + "Value #B": "Current Transmit Bandwidth", + "Value #C": "Rate of Received Packets", + "Value #D": "Rate of Transmitted Packets", + "Value #E": "Rate of Received Packets Dropped", + "Value #F": "Rate of Transmitted Packets Dropped", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(avg(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Pod: Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(avg(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Pod: Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "namespace", + "name": "namespace", + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "workload_type", + "name": "type", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\"}, workload_type)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "workload", + "name": "workload", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}, workload)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Workload", + "uid": "a164a7f0339f99e89cea5cb47e9be617" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-workloads-namespace.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-workloads-namespace.json new file mode 100644 index 0000000..39bb9b7 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/k8s-resources-workloads-namespace.json @@ -0,0 +1,1251 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + } + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "legendFormat": "{{workload}} - {{workload_type}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"}))", + "legendFormat": "quota - requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"}))", + "legendFormat": "quota - limits" + } + ], + "title": "CPU Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workload" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to workloads", + "url": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running Pods" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 7 + }, + "id": 2, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\", namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"cpu\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + } + ], + "title": "CPU Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "workload_type 2": true, + "workload_type 3": true, + "workload_type 4": true, + "workload_type 5": true, + "workload_type 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 8, + "Value #B": 9, + "Value #C": 10, + "Value #D": 11, + "Value #E": 12, + "Value #F": 13, + "workload": 6, + "workload_type 1": 7, + "workload_type 2": 14, + "workload_type 3": 15, + "workload_type 4": 16, + "workload_type 5": 17, + "workload_type 6": 18 + }, + "renameByName": { + "Value #A": "Running Pods", + "Value #B": "CPU Usage", + "Value #C": "CPU Requests", + "Value #D": "CPU Requests %", + "Value #E": "CPU Limits", + "Value #F": "CPU Limits %", + "workload": "Workload", + "workload_type 1": "Type" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byFrameRefID", + "options": "B" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byFrameRefID", + "options": "C" + }, + "properties": [ + { + "id": "custom.lineStyle", + "value": { + "fill": "dash" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "color", + "value": { + "fixedColor": "orange", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "legendFormat": "{{workload}} - {{workload_type}}" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"}))", + "legendFormat": "quota - requests" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "scalar(max(kube_resourcequota{cluster=\"$cluster\", namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"}))", + "legendFormat": "quota - limits" + } + ], + "title": "Memory Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/%/" + }, + "properties": [ + { + "id": "unit", + "value": "percentunit" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workload" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to workloads", + "url": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Running Pods" + }, + "properties": [ + { + "id": "unit", + "value": "none" + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 4, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_requests{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(\n container_memory_working_set_bytes{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\", container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n/sum(\n kube_pod_container_resource_limits{job=\"kube-state-metrics\", cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"}\n* on(namespace,pod)\n group_left(workload, workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n", + "format": "table", + "instant": true + } + ], + "title": "Memory Quota", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "workload_type 2": true, + "workload_type 3": true, + "workload_type 4": true, + "workload_type 5": true, + "workload_type 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 8, + "Value #B": 9, + "Value #C": 10, + "Value #D": 11, + "Value #E": 12, + "Value #F": 13, + "workload": 6, + "workload_type 1": 7, + "workload_type 2": 14, + "workload_type 3": 15, + "workload_type 4": 16, + "workload_type 5": 17, + "workload_type 6": 18 + }, + "renameByName": { + "Value #A": "Running Pods", + "Value #B": "Memory Usage", + "Value #C": "Memory Requests", + "Value #D": "Memory Requests %", + "Value #E": "Memory Limits", + "Value #F": "Memory Limits %", + "workload": "Workload", + "workload_type 1": "Type" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bandwidth/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workload" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down to workloads", + "url": "/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 28 + }, + "id": 5, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload))\n", + "format": "table", + "instant": true + } + ], + "title": "Current Network Usage", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "workload": 6 + }, + "renameByName": { + "Value #A": "Current Receive Bandwidth", + "Value #B": "Current Transmit Bandwidth", + "Value #C": "Rate of Received Packets", + "Value #D": "Rate of Transmitted Packets", + "Value #E": "Rate of Received Packets Dropped", + "Value #F": "Rate of Transmitted Packets Dropped", + "workload": "Workload" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 35 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(avg(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Workload: Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(avg(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Workload: Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "namespace", + "name": "namespace", + "query": "label_values(kube_namespace_status_phase{job=\"kube-state-metrics\", cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "workload_type", + "name": "type", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Compute Resources / Namespace (Workloads)", + "uid": "a87fb0d919ec0ea5f6543124e16c42a5" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/kubelet.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/kubelet.json new file mode 100644 index 0000000..def79f0 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/kubelet.json @@ -0,0 +1,1241 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kubelet_node_name{cluster=\"$cluster\", job=\"kubelet\"})", + "instant": true + } + ], + "title": "Running Kubelets", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 4, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kubelet_running_pods{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "instant": true + } + ], + "title": "Running Pods", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 8, + "y": 0 + }, + "id": 3, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(kubelet_running_containers{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"})", + "instant": true + } + ], + "title": "Running Containers", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 12, + "y": 0 + }, + "id": 4, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\", state=\"actual_state_of_world\"})", + "instant": true + } + ], + "title": "Actual Volume Count", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 16, + "y": 0 + }, + "id": 5, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(volume_manager_total_volumes{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\",state=\"desired_state_of_world\"})", + "instant": true + } + ], + "title": "Desired Volume Count", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 20, + "y": 0 + }, + "id": 6, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_node_config_error{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval]))", + "instant": true + } + ], + "title": "Config Error Count", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_runtime_operations_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (operation_type, instance)", + "legendFormat": "{{instance}} {{operation_type}}" + } + ], + "title": "Operation Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_runtime_operations_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", + "legendFormat": "{{instance}} {{operation_type}}" + } + ], + "title": "Operation Error Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_runtime_operations_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "legendFormat": "{{instance}} {{operation_type}}" + } + ], + "title": "Operation Duration 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_pod_start_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "legendFormat": "{{instance}} pod" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_pod_worker_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "legendFormat": "{{instance}} worker" + } + ], + "title": "Pod Start Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_start_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "legendFormat": "{{instance}} pod" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "legendFormat": "{{instance}} worker" + } + ], + "title": "Pod Start Duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 12, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(storage_operation_duration_seconds_count{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}" + } + ], + "title": "Storage Operation Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 13, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(storage_operation_errors_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin)", + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}" + } + ], + "title": "Storage Operation Error Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 14, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(storage_operation_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_name, volume_plugin, le))", + "legendFormat": "{{instance}} {{operation_name}} {{volume_plugin}}" + } + ], + "title": "Storage Operation Duration 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 42 + }, + "id": 15, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_cgroup_manager_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type)", + "legendFormat": "{{operation_type}}" + } + ], + "title": "Cgroup manager operation rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 42 + }, + "id": 16, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_cgroup_manager_duration_seconds_bucket{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, operation_type, le))", + "legendFormat": "{{instance}} {{operation_type}}" + } + ], + "title": "Cgroup manager 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 17, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubelet_pleg_relist_duration_seconds_count{cluster=\"$cluster\", job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance)", + "legendFormat": "{{instance}}" + } + ], + "title": "PLEG relist rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 18, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_interval_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "legendFormat": "{{instance}}" + } + ], + "title": "PLEG relist interval", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 56 + }, + "id": 19, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "legendFormat": "{{instance}}" + } + ], + "title": "PLEG relist duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 63 + }, + "id": 20, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "legendFormat": "2xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "legendFormat": "3xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "legendFormat": "4xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "legendFormat": "5xx" + } + ], + "title": "RPC rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 70 + }, + "id": 21, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\",job=\"kubelet\", instance=~\"$instance\"}[$__rate_interval])) by (instance, verb, url, le))", + "legendFormat": "{{instance}} {{verb}} {{url}}" + } + ], + "title": "Request duration 99th quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 77 + }, + "id": 22, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "process_resident_memory_bytes{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 77 + }, + "id": 23, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}[$__rate_interval])", + "legendFormat": "{{instance}}" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 77 + }, + "id": 24, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "go_goroutines{cluster=\"$cluster\",job=\"kubelet\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Goroutines", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kubelet\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "instance", + "name": "instance", + "query": "label_values(up{job=\"kubelet\",cluster=\"$cluster\"}, instance)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Kubelet", + "uid": "3138fa155d5915769fbded898ac09fd9" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/namespace-by-pod.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/namespace-by-pod.json new file mode 100644 index 0000000..9c76406 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/namespace-by-pod.json @@ -0,0 +1,626 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "displayName": "$namespace", + "max": 10000000000, + "min": 0, + "thresholds": { + "steps": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Received", + "type": "gauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "displayName": "$namespace", + "max": 10000000000, + "min": 0, + "thresholds": { + "steps": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "gauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bandwidth/" + }, + "properties": [ + { + "id": "unit", + "value": "Bps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Pod" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down", + "url": "/d/7a18067ce943a40ae25454675c19ff5c/kubernetes-networking-pod?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${namespace}&var-pod=${__data.fields.Pod}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 3, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "format": "table", + "instant": true + } + ], + "title": "Current Network Usage", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "pod", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Value #A": 7, + "Value #B": 8, + "Value #C": 9, + "Value #D": 10, + "Value #E": 11, + "Value #F": 12, + "pod": 6 + }, + "renameByName": { + "Value #A": "Current Receive Bandwidth", + "Value #B": "Current Transmit Bandwidth", + "Value #C": "Rate of Received Packets", + "Value #D": "Rate of Transmitted Packets", + "Value #E": "Rate of Received Packets Dropped", + "Value #F": "Rate of Transmitted Packets Dropped", + "pod": "Pod" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace!=\"\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum by (pod) (\n rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n * on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n)\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"cadvisor\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "kube-system", + "value": "kube-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "namespace", + "name": "namespace", + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Namespace (Pods)", + "uid": "8b7a8b326d7a6f1f04244066368c67af" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/namespace-by-workload.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/namespace-by-workload.json new file mode 100644 index 0000000..931786a --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/namespace-by-workload.json @@ -0,0 +1,784 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "displayMode": "basic", + "showUnfilled": false + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Received", + "type": "bargauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "displayMode": "basic", + "showUnfilled": false + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "bargauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "overrides": [ + { + "matcher": { + "id": "byRegexp", + "options": "/Bytes/" + }, + "properties": [ + { + "id": "unit", + "value": "binBps" + } + ] + }, + { + "matcher": { + "id": "byRegexp", + "options": "/Packets/" + }, + "properties": [ + { + "id": "unit", + "value": "pps" + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "Workload" + }, + "properties": [ + { + "id": "links", + "value": [ + { + "title": "Drill down", + "url": "/d/728bf77cc1166d2f3133bf25846876cc/kubernetes-networking-workload?${datasource:queryparam}&var-cluster=${cluster}&var-namespace=${namespace}&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 3, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(avg(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(avg(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (namespace,pod) kube_pod_info{cluster=\"$cluster\",namespace=\"$namespace\",host_network=\"false\"}\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload, workload_type))\n", + "format": "table", + "instant": true + } + ], + "title": "Current Status", + "transformations": [ + { + "id": "joinByField", + "options": { + "byField": "workload", + "mode": "outer" + } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true, + "Time 7": true, + "Time 8": true, + "workload_type 2": true, + "workload_type 3": true, + "workload_type 4": true, + "workload_type 5": true, + "workload_type 6": true, + "workload_type 7": true, + "workload_type 8": true + }, + "indexByName": { + "Time 1": 0, + "Time 2": 1, + "Time 3": 2, + "Time 4": 3, + "Time 5": 4, + "Time 6": 5, + "Time 7": 6, + "Time 8": 7, + "Value #A": 10, + "Value #B": 11, + "Value #C": 12, + "Value #D": 13, + "Value #E": 14, + "Value #F": 15, + "Value #G": 16, + "Value #H": 17, + "workload": 8, + "workload_type 1": 9, + "workload_type 2": 18, + "workload_type 3": 19, + "workload_type 4": 20, + "workload_type 5": 21, + "workload_type 6": 22, + "workload_type 7": 23, + "workload_type 8": 24 + }, + "renameByName": { + "Value #A": "Rx Bytes", + "Value #B": "Tx Bytes", + "Value #C": "Rx Bytes (Avg)", + "Value #D": "Tx Bytes (Avg)", + "Value #E": "Rx Packets", + "Value #F": "Tx Packets", + "Value #G": "Rx Packets Dropped", + "Value #H": "Tx Packets Dropped", + "workload": "Workload", + "workload_type 1": "Type" + } + } + } + ], + "type": "table" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(avg(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Workload: Received", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(avg(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Container Bandwidth by Workload: Transmitted", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 45 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 45 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=\"$namespace\"}[$__rate_interval])\n* on (cluster,namespace,pod) group_left ()\n topk by (cluster,namespace,pod) (\n 1,\n max by (cluster,namespace,pod) (kube_pod_info{host_network=\"false\"})\n )\n* on (cluster,namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=\"$namespace\", workload=~\".+\", workload_type=~\"$type\"}) by (workload))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"cadvisor\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "kube-system", + "value": "kube-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "namespace", + "name": "namespace", + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "workload_type", + "name": "type", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\", workload=~\".+\"}, workload_type)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Namespace (Workload)", + "uid": "bbb2a765a623ae38130206c7d94a160f" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/persistentvolumesusage.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/persistentvolumesusage.json new file mode 100644 index 0000000..db3d4c8 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/persistentvolumesusage.json @@ -0,0 +1,310 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 18, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "legendFormat": "Used Space" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n", + "legendFormat": "Free Space" + } + ], + "title": "Volume Space Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 0 + }, + "id": 2, + "interval": "1m", + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "max without(instance,node) (\n(\n topk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n -\n topk(1, kubelet_volume_stats_available_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n)\n/\ntopk(1, kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "instant": true + } + ], + "title": "Volume Space Usage", + "type": "gauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 18, + "y": 7 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))", + "legendFormat": "Used inodes" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "(\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n -\n sum without(instance, node) (topk(1, (kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})))\n)\n", + "legendFormat": "Free inodes" + } + ], + "title": "Volume inodes Usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "max": 100, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "orange", + "value": 80 + }, + { + "color": "red", + "value": 90 + } + ] + }, + "unit": "percent" + } + }, + "gridPos": { + "h": 7, + "w": 6, + "x": 18, + "y": 7 + }, + "id": 4, + "interval": "1m", + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "max without(instance,node) (\ntopk(1, kubelet_volume_stats_inodes_used{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n/\ntopk(1, kubelet_volume_stats_inodes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\", persistentvolumeclaim=\"$volume\"})\n* 100)\n", + "instant": true + } + ], + "title": "Volume inodes Usage", + "type": "gauge" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(kubelet_volume_stats_capacity_bytes{job=\"kubelet\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "Namespace", + "name": "namespace", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "PersistentVolumeClaim", + "name": "volume", + "query": "label_values(kubelet_volume_stats_capacity_bytes{cluster=\"$cluster\", job=\"kubelet\", namespace=\"$namespace\"}, persistentvolumeclaim)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Persistent Volumes", + "uid": "919b92a8e8041bd567af9edab12c840c" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/pod-total.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/pod-total.json new file mode 100644 index 0000000..55dbce6 --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/pod-total.json @@ -0,0 +1,480 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "displayName": "$pod", + "max": 10000000000, + "min": 0, + "thresholds": { + "steps": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Received", + "type": "gauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "displayName": "$pod", + "max": 10000000000, + "min": 0, + "thresholds": { + "steps": [ + { + "color": "dark-green", + "index": 0, + "value": null + }, + { + "color": "dark-yellow", + "index": 1, + "value": 5000000000 + }, + { + "color": "dark-red", + "index": 2, + "value": 7000000000 + } + ] + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval]))", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "gauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_bytes_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_receive_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "showPoints": "never" + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(container_network_transmit_packets_dropped_total{cluster=\"$cluster\",namespace=~\"$namespace\", pod=~\"$pod\"}[$__rate_interval])) by (pod)", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"cadvisor\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "kube-system", + "value": "kube-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "namespace", + "name": "namespace", + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "current": { + "selected": false, + "text": "kube-system", + "value": "kube-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "pod", + "name": "pod", + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\",namespace=~\"$namespace\"}, pod)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Pod", + "uid": "7a18067ce943a40ae25454675c19ff5c" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/proxy.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/proxy.json new file mode 100644 index 0000000..145340d --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/proxy.json @@ -0,0 +1,643 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-proxy\"})", + "instant": true + } + ], + "title": "Up", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 4, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubeproxy_sync_proxy_rules_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", + "legendFormat": "rate" + } + ], + "title": "Rules Sync Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 0 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99,rate(kubeproxy_sync_proxy_rules_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", + "legendFormat": "{{instance}}" + } + ], + "title": "Rules Sync Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 7 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(kubeproxy_network_programming_duration_seconds_count{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval]))", + "legendFormat": "rate" + } + ], + "title": "Network Programming Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 7 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(kubeproxy_network_programming_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\"}[$__rate_interval])) by (instance, le))", + "legendFormat": "{{instance}}" + } + ], + "title": "Network Programming Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 14 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "legendFormat": "2xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "legendFormat": "3xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "legendFormat": "4xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\",job=\"kube-proxy\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "legendFormat": "5xx" + } + ], + "title": "Kube API Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 14 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\",verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "legendFormat": "{{verb}} {{url}}" + } + ], + "title": "Post Request Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-proxy\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "legendFormat": "{{verb}} {{url}}" + } + ], + "title": "Get Request Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 28 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 28 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}[$__rate_interval])", + "legendFormat": "{{instance}}" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 28 + }, + "id": 11, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-proxy\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Goroutines", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-proxy\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "instance", + "name": "instance", + "query": "label_values(up{job=\"kube-proxy\", cluster=\"$cluster\", job=\"kube-proxy\"}, instance)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Proxy", + "uid": "632e265de029684c40b21cb76bca4f94" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/scheduler.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/scheduler.json new file mode 100644 index 0000000..a8a57fa --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/scheduler.json @@ -0,0 +1,589 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "unit": "none" + } + }, + "gridPos": { + "h": 7, + "w": 4, + "x": 0, + "y": 0 + }, + "id": 1, + "interval": "1m", + "options": { + "colorMode": "none" + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(up{cluster=\"$cluster\", job=\"kube-scheduler\"})", + "instant": true + } + ], + "title": "Up", + "type": "stat" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 4, + "y": 0 + }, + "id": 2, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(scheduler_e2e_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "legendFormat": "{{cluster}} {{instance}} e2e" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(scheduler_binding_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "legendFormat": "{{cluster}} {{instance}} binding" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(scheduler_scheduling_algorithm_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "legendFormat": "{{cluster}} {{instance}} scheduling algorithm" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(scheduler_volume_scheduling_duration_seconds_count{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance)", + "legendFormat": "{{cluster}} {{instance}} volume" + } + ], + "title": "Scheduling Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 10, + "x": 14, + "y": 0 + }, + "id": 3, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(scheduler_e2e_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "legendFormat": "{{cluster}} {{instance}} e2e" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(scheduler_binding_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "legendFormat": "{{cluster}} {{instance}} binding" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(scheduler_scheduling_algorithm_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "legendFormat": "{{cluster}} {{instance}} scheduling algorithm" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(scheduler_volume_scheduling_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}[$__rate_interval])) by (cluster, instance, le))", + "legendFormat": "{{cluster}} {{instance}} volume" + } + ], + "title": "Scheduling latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 7 + }, + "id": 4, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"2..\"}[$__rate_interval]))", + "legendFormat": "2xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"3..\"}[$__rate_interval]))", + "legendFormat": "3xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"4..\"}[$__rate_interval]))", + "legendFormat": "4xx" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sum(rate(rest_client_requests_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\",code=~\"5..\"}[$__rate_interval]))", + "legendFormat": "5xx" + } + ], + "title": "Kube API Request Rate", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "ops" + } + }, + "gridPos": { + "h": 7, + "w": 16, + "x": 8, + "y": 7 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"POST\"}[$__rate_interval])) by (verb, url, le))", + "legendFormat": "{{verb}} {{url}}" + } + ], + "title": "Post Request Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "s" + } + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_duration_seconds_bucket{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\", verb=\"GET\"}[$__rate_interval])) by (verb, url, le))", + "legendFormat": "{{verb}} {{url}}" + } + ], + "title": "Get Request Latency 99th Quantile", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "bytes" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 0, + "y": 21 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "process_resident_memory_bytes{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Memory", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 8, + "y": 21 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "rate(process_cpu_seconds_total{cluster=\"$cluster\", job=\"kube-scheduler\", instance=~\"$instance\"}[$__rate_interval])", + "legendFormat": "{{instance}}" + } + ], + "title": "CPU usage", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "short" + } + }, + "gridPos": { + "h": 7, + "w": 8, + "x": 16, + "y": 21 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "go_goroutines{cluster=\"$cluster\", job=\"kube-scheduler\",instance=~\"$instance\"}", + "legendFormat": "{{instance}}" + } + ], + "title": "Goroutines", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(up{job=\"kube-scheduler\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "instance", + "name": "instance", + "query": "label_values(up{job=\"kube-scheduler\", cluster=\"$cluster\"}, instance)", + "refresh": 2, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Scheduler", + "uid": "2e6b6a3b4bddf1427b3a55aa1311c656" +} diff --git a/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/workload-total.json b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/workload-total.json new file mode 100644 index 0000000..b0be78f --- /dev/null +++ b/internal-sto4-test-monitor-1.rut.sunet.se/overlay/opt/naemon_monitor/grafana-provisioning/dashboards/k8s/workload-total.json @@ -0,0 +1,570 @@ +{ + "editable": false, + "links": [ + { + "asDropdown": true, + "includeVars": true, + "keepTime": true, + "tags": [ + "kubernetes-mixin" + ], + "targetBlank": false, + "title": "Kubernetes", + "type": "dashboards" + } + ], + "panels": [ + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 1, + "options": { + "displayMode": "basic", + "showUnfilled": false + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Received", + "type": "bargauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 2, + "options": { + "displayMode": "basic", + "showUnfilled": false + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Current Rate of Bytes Transmitted", + "type": "bargauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 9 + }, + "id": 3, + "options": { + "displayMode": "basic", + "showUnfilled": false + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(avg(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Rate of Bytes Received", + "type": "bargauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "green", + "mode": "fixed" + }, + "unit": "Bps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 9 + }, + "id": 4, + "options": { + "displayMode": "basic", + "showUnfilled": false + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(avg(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Average Rate of Bytes Transmitted", + "type": "bargauge" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 18 + }, + "id": 5, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Receive Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "binBps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 6, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_bytes_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Transmit Bandwidth", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 27 + }, + "id": 7, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 27 + }, + "id": 8, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_packets_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 9, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_receive_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Received Packets Dropped", + "type": "timeseries" + }, + { + "datasource": { + "type": "datasource", + "uid": "-- Mixed --" + }, + "fieldConfig": { + "defaults": { + "custom": { + "fillOpacity": 10, + "showPoints": "never", + "spanNulls": true + }, + "unit": "pps" + } + }, + "gridPos": { + "h": 9, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 10, + "interval": "1m", + "options": { + "legend": { + "asTable": true, + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single" + } + }, + "pluginVersion": "v11.1.0", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "expr": "sort_desc(sum(rate(container_network_transmit_packets_dropped_total{job=\"cadvisor\", cluster=\"$cluster\",namespace=~\"$namespace\"}[$__rate_interval])\n* on (namespace,pod)\ngroup_left(workload,workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",namespace=~\"$namespace\", workload=~\"$workload\", workload_type=~\"$type\"}) by (pod))\n", + "legendFormat": "__auto" + } + ], + "title": "Rate of Transmitted Packets Dropped", + "type": "timeseries" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "tags": [ + "kubernetes-mixin" + ], + "templating": { + "list": [ + { + "current": { + "selected": true, + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "query": "prometheus", + "regex": "", + "type": "datasource" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 2, + "label": "cluster", + "name": "cluster", + "query": "label_values(kube_pod_info{job=\"kube-state-metrics\"}, cluster)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "current": { + "selected": false, + "text": "kube-system", + "value": "kube-system" + }, + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "namespace", + "name": "namespace", + "query": "label_values(container_network_receive_packets_total{cluster=\"$cluster\"}, namespace)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "label": "workload", + "name": "workload", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\".+\"}, workload)", + "refresh": 2, + "sort": 1, + "type": "query" + }, + { + "allValue": ".+", + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "hide": 0, + "includeAll": true, + "label": "workload_type", + "name": "type", + "query": "label_values(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=~\"$namespace\", workload=~\"$workload\"}, workload_type)", + "refresh": 2, + "sort": 1, + "type": "query" + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timezone": "UTC", + "title": "Kubernetes / Networking / Workload", + "uid": "728bf77cc1166d2f3133bf25846876cc" +}