forked from SUNET/soc-ops
Copy class sunet::naemon_monitor to soc as we use acme-d for certs.
This commit is contained in:
parent
66b4b1783d
commit
f01b8efa63
24 changed files with 25857 additions and 2 deletions
|
@ -46,8 +46,7 @@ intelmq-dev.cert.sunet.se:
|
|||
|
||||
monitor-dev.cert.sunet.se:
|
||||
sunet::dockerhost2:
|
||||
sunet::certbot::acmed:
|
||||
sunet::naemon_monitor:
|
||||
soc::naemon_monitor:
|
||||
domain: monitor-dev.cert.sunet.se
|
||||
thruk_admins:
|
||||
- bjorklund@sunet.se
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
do '/usr/share/thruk/menu.conf';
|
||||
insert_item('General', { 'href' => '/grafana', 'name' => 'Grafana', target => '_self' });
|
|
@ -0,0 +1,496 @@
|
|||
# @summary Run naemon with Thruk.
|
||||
# @param receive_otel Feature flag to enable the LGTM stack
|
||||
# @param otel_retention Number of hours to keep logs, metrics and traces, defaults to 3 months
|
||||
#
|
||||
class soc::naemon_monitor (
|
||||
String $domain,
|
||||
String $influx_password = lookup('influx_password', String, undef, ''),
|
||||
String $naemon_tag = 'latest',
|
||||
Array $naemon_extra_volumes = [],
|
||||
Array $thruk_extra_volumes = [],
|
||||
Array $resolvers = [],
|
||||
String $thruk_tag = 'latest',
|
||||
Array $thruk_admins = ['placeholder'],
|
||||
Array $thruk_users = [],
|
||||
String $influxdb_tag = '1.8',
|
||||
String $histou_tag = 'latest',
|
||||
String $nagflux_tag = 'latest',
|
||||
String $grafana_tag = '11.1.4',
|
||||
String $grafana_default_role = 'Viewer',
|
||||
String $loki_tag = '3.1.1',
|
||||
String $mimir_tag = '2.13.0',
|
||||
String $tempo_tag = '2.6.0',
|
||||
String $alloy_tag = 'v1.3.0',
|
||||
Hash $manual_hosts = {},
|
||||
Hash $additional_entities = {},
|
||||
String $nrpe_group = 'nrpe',
|
||||
String $interface = 'ens3',
|
||||
Array $exclude_hosts = [],
|
||||
Optional[String] $default_host_group = undef,
|
||||
Array[Optional[String]] $optout_checks = [],
|
||||
Optional[Boolean] $receive_otel = false,
|
||||
String $otel_retention = '2232h',
|
||||
String $acme_provider = 'acme-d',
|
||||
) {
|
||||
include sunet::systemd_reload
|
||||
|
||||
$naemon_container = $::facts['dockerhost2'] ? {
|
||||
'yes' => 'naemon_monitor-naemon-1',
|
||||
default => 'naemon_monitor_naemon_1',
|
||||
}
|
||||
|
||||
if $::facts['sunet_nftables_enabled'] == 'yes' {
|
||||
sunet::nftables::docker_expose { 'allow_http' :
|
||||
iif => $interface,
|
||||
allow_clients => 'any',
|
||||
port => 80,
|
||||
}
|
||||
sunet::nftables::docker_expose { 'allow_https' :
|
||||
iif => $interface,
|
||||
allow_clients => 'any',
|
||||
port => 443,
|
||||
}
|
||||
if $receive_otel {
|
||||
sunet::nftables::docker_expose { 'allow_otel_grpc' :
|
||||
iif => $interface,
|
||||
allow_clients => 'any',
|
||||
port => 4317,
|
||||
}
|
||||
sunet::nftables::docker_expose { 'allow_otel_http' :
|
||||
iif => $interface,
|
||||
allow_clients => 'any',
|
||||
port => 4318,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sunet::misc::ufw_allow { 'allow-http':
|
||||
from => 'any',
|
||||
port => '80',
|
||||
}
|
||||
sunet::misc::ufw_allow { 'allow-https':
|
||||
from => 'any',
|
||||
port => '443',
|
||||
}
|
||||
if $receive_otel {
|
||||
sunet::misc::ufw_allow { 'allow-otel-grpc':
|
||||
from => 'any',
|
||||
port => '4317',
|
||||
}
|
||||
sunet::misc::ufw_allow { 'allow-otel-http':
|
||||
from => 'any',
|
||||
port => '4318',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if $acme_provider == 'dehydrated' {
|
||||
class { 'sunet::dehydrated::client': domain => $domain, ssl_links => true }
|
||||
} elsif $acme_provider == 'acme-d' {
|
||||
class { 'sunet::certbot::acmed' }
|
||||
file { '/opt/naemon_monitor/apache-thruk.conf':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/apache-thruk.conf.erb'),
|
||||
mode => '0444',
|
||||
}
|
||||
}
|
||||
|
||||
if lookup('shib_key', undef, undef, undef) != undef {
|
||||
sunet::snippets::secret_file { '/opt/naemon_monitor/shib-certs/sp-key.pem': hiera_key => 'shib_key' }
|
||||
# assume cert is in cosmos repo (overlay)
|
||||
}
|
||||
|
||||
$thruk_admins_string = inline_template('ADMIN_USERS=<%- @thruk_admins.each do |user| -%><%= user %>,<%- end -%>')
|
||||
$thruk_users_string = inline_template('READONLY_USERS=<%- @thruk_users.each do |user| -%><%= user %>,<%- end -%>')
|
||||
$thruk_env = [$thruk_admins_string, $thruk_users_string]
|
||||
|
||||
if $influx_password == '' {
|
||||
err('ERROR: influx password not set')
|
||||
}
|
||||
$influx_env = ['INFLUXDB_ADMIN_USER=admin',"INFLUXDB_ADMIN_PASSWORD=${influx_password}", 'INFLUXDB_DB=nagflux']
|
||||
$nagflux_env = ["INFLUXDB_ADMIN_PASSWORD=${influx_password}"]
|
||||
|
||||
file { '/etc/systemd/system/sunet-naemon_monitor.service.d/':
|
||||
ensure => directory,
|
||||
recurse => true,
|
||||
}
|
||||
file { '/opt/naemon_monitor/menu_local.conf':
|
||||
ensure => file,
|
||||
content => file('soc/naemon_monitor/menu_local.conf'),
|
||||
}
|
||||
|
||||
file { '/etc/systemd/system/sunet-naemon_monitor.service.d/override.conf':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/service-override.conf.erb'),
|
||||
require => File['/etc/systemd/system/sunet-naemon_monitor.service.d/'],
|
||||
notify => Class['sunet::systemd_reload'],
|
||||
}
|
||||
|
||||
sunet::docker_compose { 'naemon_monitor':
|
||||
content => template('soc/naemon_monitor/docker-compose.yml.erb'),
|
||||
service_name => 'naemon_monitor',
|
||||
compose_dir => '/opt/',
|
||||
compose_filename => 'docker-compose.yml',
|
||||
description => 'Naemon monitoring (with Thruk)',
|
||||
require => File['/etc/systemd/system/sunet-naemon_monitor.service.d/override.conf'],
|
||||
}
|
||||
|
||||
|
||||
# This section can be removed when the class is run on all machines
|
||||
file { '/opt/naemon_monitor/stop-monitor.sh':
|
||||
ensure => absent,
|
||||
}
|
||||
#
|
||||
|
||||
file { '/etc/logrotate.d/naemon_monitor':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/logrotate.erb'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
|
||||
file { '/opt/naemon_monitor/grafana.ini':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana.ini'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/histou.js':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/histou.js'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning':
|
||||
ensure => directory,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/datasources':
|
||||
ensure => directory,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/dashboards':
|
||||
ensure => directory,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/datasources/influxdb.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/datasources/influxdb.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/data':
|
||||
ensure => directory,
|
||||
owner => 'www-data',
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
}
|
||||
if $receive_otel {
|
||||
# Grafana can only use one group via the apache proxy auth module, so we cheat and make everyone editors
|
||||
# and admins can be manually assigned via gui.
|
||||
$allowed_users_string = join($thruk_admins + $thruk_users,' ')
|
||||
file { '/opt/naemon_monitor/groups.txt':
|
||||
ensure => file,
|
||||
content => inline_template('editors:<%= @allowed_users_string-%>'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/datasources/loki.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/datasources/loki.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/datasources/mimir.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/datasources/mimir.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/datasources/tempo.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/datasources/tempo.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/dashboards/default.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/dashboards/default.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/dashboards/overview.json':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/dashboards/overview.json'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana-provisioning/dashboards/node-export-full.json':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/grafana-provisioning/dashboards/node-export-full.json'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/loki-server.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/loki-server.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/loki':
|
||||
ensure => directory,
|
||||
owner => '10001',
|
||||
mode => '0644',
|
||||
group => '10001',
|
||||
}
|
||||
file { '/opt/naemon_monitor/mimir':
|
||||
ensure => directory,
|
||||
owner => 'root',
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/mimir-server.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/mimir-server.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/tempo':
|
||||
ensure => directory,
|
||||
owner => '10001',
|
||||
mode => '0644',
|
||||
group => '10001',
|
||||
}
|
||||
file { '/opt/naemon_monitor/tempo-server.yaml':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/tempo-server.yaml'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
file { '/opt/naemon_monitor/alloy-server.alloy':
|
||||
ensure => file,
|
||||
content => template('soc/naemon_monitor/alloy-server.alloy'),
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
}
|
||||
file { '/opt/naemon_monitor/grafana':
|
||||
ensure => directory,
|
||||
owner => 'www-data',
|
||||
mode => '0664',
|
||||
group => 'root',
|
||||
}
|
||||
|
||||
file { '/usr/lib/nagios/plugins/cosmos':
|
||||
ensure => directory,
|
||||
recurse => true,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
}
|
||||
|
||||
$nagioscfg_dirs = ['/etc/', '/etc/naemon/', '/etc/naemon/conf.d/', '/etc/naemon/conf.d/nagioscfg/', '/etc/naemon/conf.d/cosmos/']
|
||||
$nagioscfg_dirs.each |$dir| {
|
||||
ensure_resource('file',$dir, {
|
||||
ensure => directory,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
})
|
||||
}
|
||||
|
||||
nagioscfg::contactgroup { 'alerts': }
|
||||
|
||||
unless 'load' in $optout_checks {
|
||||
nagioscfg::service { 'check_load':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_load',
|
||||
description => 'System Load',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'users' in $optout_checks {
|
||||
nagioscfg::service { 'check_users':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_users',
|
||||
description => 'Active Users',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'zombie_procs' in $optout_checks {
|
||||
nagioscfg::service { 'check_zombie_procs':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_zombie_procs',
|
||||
description => 'Zombie Processes',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'total_procs' in $optout_checks {
|
||||
nagioscfg::service { 'check_total_procs':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_total_procs_lax',
|
||||
description => 'Total Processes',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'dynamic_disk' in $optout_checks {
|
||||
nagioscfg::service { 'check_dynamic_disk':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_dynamic_disk',
|
||||
description => 'Disk',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'uptime' in $optout_checks {
|
||||
nagioscfg::service { 'check_uptime':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_uptime',
|
||||
description => 'Uptime',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'reboot' in $optout_checks {
|
||||
nagioscfg::service { 'check_reboot':
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_reboot',
|
||||
description => 'Reboot Needed',
|
||||
contact_groups => ['alerts'],
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'memory' in $optout_checks {
|
||||
nagioscfg::service { 'check_memory':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_memory',
|
||||
description => 'System Memory',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'entropy' in $optout_checks {
|
||||
nagioscfg::service { 'check_entropy':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_entropy',
|
||||
description => 'System Entropy',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'ntp_time' in $optout_checks {
|
||||
nagioscfg::service { 'check_ntp_time':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_ntp_time',
|
||||
description => 'System NTP Time',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'scriptherder' in $optout_checks {
|
||||
nagioscfg::service { 'check_scriptherder':
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_scriptherder',
|
||||
description => 'Scriptherder Status',
|
||||
contact_groups => ['naemon-admins'],
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
unless 'apt' in $optout_checks {
|
||||
nagioscfg::service { 'check_apt':
|
||||
use => 'naemon-service',
|
||||
hostgroup_name => [$nrpe_group],
|
||||
check_command => 'check_nrpe!check_apt',
|
||||
description => 'Packages available for upgrade',
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
}
|
||||
}
|
||||
|
||||
require sunet::nagios::nrpe_check_cosmos_keys
|
||||
nagioscfg::service {'check_cosmos_keys':
|
||||
hostgroup_name => ['sunet::naemon_monitor'],
|
||||
check_command => 'check_nrpe!check_cosmos_keys',
|
||||
description => 'Cosmos GPG keys',
|
||||
}
|
||||
|
||||
file { '/etc/naemon/conf.d/cosmos/naemon-hostgroups.cfg':
|
||||
ensure => file,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
|
||||
content => template('soc/naemon_monitor/naemon-hostgroups.cfg.erb'),
|
||||
require => File['/etc/naemon/conf.d/cosmos/'],
|
||||
}
|
||||
file { '/etc/naemon/conf.d/cosmos/naemon-host.cfg':
|
||||
ensure => file,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
content => template('soc/naemon_monitor/naemon-host.cfg.erb'),
|
||||
require => File['/etc/naemon/conf.d/cosmos/'],
|
||||
}
|
||||
|
||||
file { '/etc/naemon/conf.d/cosmos/naemon-service.cfg':
|
||||
ensure => file,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
content => template('soc/naemon_monitor/naemon-service.cfg.erb'),
|
||||
require => File['/etc/naemon/conf.d/cosmos/'],
|
||||
}
|
||||
|
||||
file { '/etc/naemon/conf.d/cosmos/naemon-contactgroups.cfg':
|
||||
ensure => file,
|
||||
mode => '0644',
|
||||
group => 'root',
|
||||
owner => 'root',
|
||||
content => template('soc/naemon_monitor/naemon-contactgroups.cfg.erb'),
|
||||
require => File['/etc/naemon/conf.d/cosmos/'],
|
||||
}
|
||||
|
||||
sunet::scriptherder::cronjob { 'thrukmaintenance':
|
||||
cmd => '/usr/bin/docker exec --user www-data naemon_monitor-thruk-1 /usr/bin/thruk maintenance',
|
||||
minute => '50',
|
||||
ok_criteria => ['exit_status=0'],
|
||||
warn_criteria => ['exit_status=1', 'max_age=24h'],
|
||||
}
|
||||
|
||||
class { 'nagioscfg':
|
||||
additional_entities => $additional_entities,
|
||||
config => 'naemon_monitor',
|
||||
default_host_group => $default_host_group,
|
||||
manage_package => false,
|
||||
manage_service => false,
|
||||
cfgdir => '/etc/naemon/conf.d/nagioscfg',
|
||||
host_template => 'naemon-host',
|
||||
service => 'sunet-naemon_monitor',
|
||||
single_ip => true,
|
||||
require => File['/etc/naemon/conf.d/nagioscfg/'],
|
||||
exclude_hosts => $exclude_hosts,
|
||||
}
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
otelcol.receiver.otlp "example" {
|
||||
grpc {
|
||||
endpoint = "[::]:4317"
|
||||
tls {
|
||||
cert_file = "/etc/dehydrated/fullchain.pem"
|
||||
key_file = "/etc/dehydrated/privkey.pem"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
http {
|
||||
endpoint = "[::]:4318"
|
||||
tls {
|
||||
cert_file = "/etc/dehydrated/fullchain.pem"
|
||||
key_file = "/etc/dehydrated/privkey.pem"
|
||||
}
|
||||
}
|
||||
|
||||
output {
|
||||
metrics = [otelcol.processor.batch.example.input]
|
||||
logs = [otelcol.processor.batch.example.input]
|
||||
traces = [otelcol.processor.batch.example.input]
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.processor.batch "example" {
|
||||
output {
|
||||
metrics = [otelcol.exporter.prometheus.monitor_mimir.input]
|
||||
logs = [otelcol.exporter.loki.monitor_loki.input]
|
||||
traces = [otelcol.exporter.otlphttp.monitor_tempo.input]
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.exporter.otlphttp "monitor_tempo" {
|
||||
client {
|
||||
endpoint = "http://tempo:4318"
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.exporter.prometheus "monitor_mimir" {
|
||||
forward_to = [prometheus.remote_write.monitor_mimir.receiver]
|
||||
}
|
||||
|
||||
prometheus.remote_write "monitor_mimir" {
|
||||
endpoint {
|
||||
url = "http://mimir:9009/api/v1/push"
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
otelcol.exporter.loki "monitor_loki" {
|
||||
forward_to = [loki.write.monitor_loki.receiver]
|
||||
}
|
||||
|
||||
loki.write "monitor_loki" {
|
||||
endpoint {
|
||||
url = "http://loki:3100/loki/api/v1/push"
|
||||
|
||||
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
<VirtualHost _default_:443>
|
||||
SSLEngine on
|
||||
SSLCertificateFile /etc/letsencrypt/live/<%= @domain %>/cert.pem
|
||||
SSLCertificateKeyFile /etc/letsencrypt/live/<%= @domain %>/privkey.pem
|
||||
SSLCertificateChainFile /etc/letsencrypt/live/<%= @domain %>/chain.pem
|
||||
|
||||
Header always set Strict-Transport-Security "max-age=63072000"
|
||||
|
||||
SSLProtocol all -SSLv3 -TLSv1 -TLSv1.1
|
||||
SSLCipherSuite ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384
|
||||
SSLHonorCipherOrder off
|
||||
SSLSessionTickets off
|
||||
|
||||
SSLUseStapling On
|
||||
SSLCompression off
|
||||
|
||||
SSLOptions +StrictRequire
|
||||
|
||||
# Add vhost name to log entries:
|
||||
LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-agent}i\"" vhost_combined
|
||||
LogFormat "%v %h %l %u %t \"%r\" %>s %b" vhost_common
|
||||
BrowserMatch "MSIE [2-6]" \
|
||||
nokeepalive ssl-unclean-shutdown \
|
||||
downgrade-1.0 force-response-1.0
|
||||
BrowserMatch "MSIE [17-9]" ssl-unclean-shutdown
|
||||
|
||||
RedirectMatch ^/$ /thruk/
|
||||
Header set Content-Security-Policy "frame-src 'self' *.sunet.se *.swamid.se;"
|
||||
<Location />
|
||||
AuthType shibboleth
|
||||
ShibRequestSetting requireSession 1
|
||||
ShibRequestSetting entityIDSelf https://$hostname
|
||||
Require valid-user
|
||||
</Location>
|
||||
|
||||
<Location /grafana>
|
||||
<IfFile '/etc/apache2/groups.txt'>
|
||||
AuthGroupFile /etc/apache2/groups.txt
|
||||
Require group editors
|
||||
</IfFile>
|
||||
ProxyPass http://grafana:3000 retry=0 disablereuse=On
|
||||
ProxyPassReverse http://127.0.0.1:3000/grafana
|
||||
|
||||
RewriteEngine On
|
||||
RewriteRule .* - [E=PROXY_USER:%{LA-U:REMOTE_USER},NS]
|
||||
RequestHeader set X-WEBAUTH-USER "%{PROXY_USER}e"
|
||||
RequestHeader set X-WEBAUTH-NAME "%{PROXY_USER}e"
|
||||
|
||||
RewriteCond %{HTTP:Upgrade} websocket [NC] # For live view
|
||||
RewriteCond %{HTTP:Connection} upgrade [NC] #
|
||||
RewriteRule ^/?(.*) "ws://127.0.0.1:3000/$1" [P,L] #
|
||||
</Location>
|
||||
|
||||
<Location /histou>
|
||||
ProxyPass http://histou:80/histou retry=0 disablereuse=On
|
||||
ProxyPassReverse http://histou:80/histou
|
||||
</Location>
|
||||
|
||||
</VirtualHost>
|
||||
SSLStaplingCache "shmcb:logs/ssl_stapling(32768)"
|
|
@ -0,0 +1,139 @@
|
|||
version: '3.2'
|
||||
|
||||
services:
|
||||
always-https:
|
||||
image: docker.sunet.se/always-https
|
||||
<% unless @resolvers.empty? -%>
|
||||
dns:
|
||||
<% @resolvers.each do |resolver| -%>
|
||||
- <%= resolver %>
|
||||
<% end -%>
|
||||
<% end -%>
|
||||
ports:
|
||||
- '80:80'
|
||||
environment:
|
||||
- 'ACME_URL=http://acme-c.sunet.se/'
|
||||
|
||||
naemon:
|
||||
init: true
|
||||
image: docker.sunet.se/naemon:<%= @naemon_tag %>
|
||||
<% unless @resolvers.empty? -%>
|
||||
dns:
|
||||
<% @resolvers.each do |resolver| -%>
|
||||
- <%= resolver %>
|
||||
<% end -%>
|
||||
<% end -%>
|
||||
ports:
|
||||
- '127.0.0.1:6666:6666'
|
||||
volumes:
|
||||
- '/etc/naemon/conf.d/nagioscfg:/etc/naemon/conf.d/nagioscfg:ro'
|
||||
- '/etc/naemon/conf.d/cosmos:/etc/naemon/conf.d/cosmos:ro'
|
||||
- '/usr/lib/nagios/plugins/check_inodes:/usr/lib/nagios/plugins/check_inodes:ro'
|
||||
- '/usr/lib/nagios/plugins/cosmos:/usr/lib/nagios/plugins/cosmos:ro'
|
||||
- '/var/lib/naemon/:/var/lib/naemon/'
|
||||
- '/var/log/naemon/:/var/log/naemon/'
|
||||
- '/var/nagflux/:/var/nagflux/'
|
||||
<%- @naemon_extra_volumes.each do |extra_volume| -%>
|
||||
- "<%= extra_volume %>"
|
||||
<%- end -%>
|
||||
|
||||
thruk:
|
||||
image: docker.sunet.se/thruk:<%= @thruk_tag %>
|
||||
<% unless @resolvers.empty? -%>
|
||||
dns:
|
||||
<% @resolvers.each do |resolver| -%>
|
||||
- <%= resolver %>
|
||||
<% end -%>
|
||||
<% end -%>
|
||||
ports:
|
||||
- '443:443'
|
||||
volumes:
|
||||
<%- if @acme_provider == 'dehydrated' -%>
|
||||
- "/etc/dehydrated/certs/<%= @domain %>:/etc/dehydrated:ro"
|
||||
<% end -%>
|
||||
<%- if @acme_provider == 'acme-d' -%>
|
||||
- "/etc/letsencrypt:/etc/letsencrypt:ro"
|
||||
- "/opt/naemon_monitor/apache-thruk.cfg:/etc/apache2/sites-enabled/thruk.conf:ro'
|
||||
<% end -%>
|
||||
- '/opt/naemon_monitor/shib-certs:/etc/shibboleth/certs'
|
||||
- '/opt/naemon_monitor/data:/var/lib/thruk'
|
||||
- '/opt/naemon_monitor/menu_local.conf:/etc/thruk/menu_local.conf'
|
||||
<%- @thruk_extra_volumes.each do |extra_volume| -%>
|
||||
- "<%= extra_volume %>"
|
||||
<%- end -%>
|
||||
environment:
|
||||
<%- @thruk_env.each do |environ| -%>
|
||||
- "<%= environ %>"
|
||||
<%- end -%>
|
||||
|
||||
influxdb:
|
||||
image: influxdb:<%= @influxdb_tag %>
|
||||
volumes:
|
||||
- '/var/lib/influxdb:/var/lib/influxdb'
|
||||
environment:
|
||||
<%- @influx_env.each do |environ| -%>
|
||||
- "<%= environ %>"
|
||||
<%- end -%>
|
||||
|
||||
histou:
|
||||
image: docker.sunet.se/histou:<%= @histou_tag %>
|
||||
|
||||
nagflux:
|
||||
image: docker.sunet.se/nagflux:<%= @nagflux_tag %>
|
||||
volumes:
|
||||
- '/var/nagflux/:/var/nagflux/'
|
||||
environment:
|
||||
<%- @nagflux_env.each do |environ| -%>
|
||||
- "<%= environ %>"
|
||||
<%- end -%>
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:<%= @grafana_tag %>
|
||||
volumes:
|
||||
- '/opt/naemon_monitor/grafana.ini/:/etc/grafana/grafana.ini'
|
||||
- '/opt/naemon_monitor/grafana-provisioning:/etc/grafana/provisioning:ro'
|
||||
- '/opt/naemon_monitor/histou.js:/usr/share/grafana/public/dashboards/histou.js:ro'
|
||||
- '/opt/naemon_monitor/grafana:/var/lib/grafana:rw'
|
||||
|
||||
<% if @receive_otel -%>
|
||||
loki:
|
||||
image: grafana/loki:<%= @loki_tag %>
|
||||
ports:
|
||||
- "3100:3100"
|
||||
volumes:
|
||||
- '/opt/naemon_monitor/loki:/loki:rw'
|
||||
- '/opt/naemon_monitor/loki-server.yaml:/etc/loki/local-config.yaml:ro'
|
||||
command: -config.file=/etc/loki/local-config.yaml
|
||||
|
||||
tempo:
|
||||
image: grafana/tempo:<%= @tempo_tag %>
|
||||
ports:
|
||||
- "14268:14268" # jaeger ingest
|
||||
- "3200:3200" # tempo
|
||||
- "9095:9095" # tempo grpc
|
||||
- "9411:9411" # zipkin
|
||||
expose: #Only used between dockers
|
||||
- "4317" #grpc otel
|
||||
- "4318" #http otel
|
||||
command: [ "-config.file=/etc/tempo.yaml" ]
|
||||
volumes:
|
||||
- "/opt/naemon_monitor/tempo-server.yaml:/etc/tempo.yaml"
|
||||
- "/opt/naemon_monitor/tempo:/var/tempo:rw"
|
||||
|
||||
mimir:
|
||||
image: grafana/mimir:<%= @mimir_tag %>
|
||||
command: ["-ingester.native-histograms-ingestion-enabled=true", "-config.file=/etc/mimir.yaml"]
|
||||
ports:
|
||||
- "9009:9009"
|
||||
volumes:
|
||||
- "/opt/naemon_monitor/mimir-server.yaml:/etc/mimir.yaml:ro"
|
||||
- "/opt/naemon_monitor/mimir:/data:rw"
|
||||
alloy: #Router for otel
|
||||
image: grafana/alloy:<%= @alloy_tag %>
|
||||
command: ["run", "/etc/alloy/config.alloy"]
|
||||
ports:
|
||||
- "4317-4318:4317-4318"
|
||||
volumes:
|
||||
- "/opt/naemon_monitor/alloy-server.alloy:/etc/alloy/config.alloy:ro"
|
||||
- "/etc/dehydrated/certs/<%= @domain %>:/etc/dehydrated:ro"
|
||||
<% end -%>
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: "dashboards"
|
||||
orgId: 1
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 60
|
||||
allowUiUpdates: false
|
||||
options:
|
||||
path: /etc/grafana/provisioning
|
||||
foldersFromFilesStructure: true
|
File diff suppressed because it is too large
Load diff
|
@ -0,0 +1,282 @@
|
|||
{
|
||||
"__inputs": [
|
||||
{
|
||||
"name": "DS_PROMETHEUS",
|
||||
"label": "Prometheus",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "prometheus",
|
||||
"pluginName": "Prometheus"
|
||||
},
|
||||
{
|
||||
"name": "DS_LOKI",
|
||||
"label": "Loki",
|
||||
"description": "",
|
||||
"type": "datasource",
|
||||
"pluginId": "loki",
|
||||
"pluginName": "Loki"
|
||||
}
|
||||
],
|
||||
"__elements": {},
|
||||
"__requires": [
|
||||
{
|
||||
"type": "grafana",
|
||||
"id": "grafana",
|
||||
"name": "Grafana",
|
||||
"version": "11.1.4"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "logs",
|
||||
"name": "Logs",
|
||||
"version": ""
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "loki",
|
||||
"name": "Loki",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "datasource",
|
||||
"id": "prometheus",
|
||||
"name": "Prometheus",
|
||||
"version": "1.0.0"
|
||||
},
|
||||
{
|
||||
"type": "panel",
|
||||
"id": "timeseries",
|
||||
"name": "Time series",
|
||||
"version": ""
|
||||
}
|
||||
],
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "datasource",
|
||||
"uid": "grafana"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"collapsed": false,
|
||||
"gridPos": {
|
||||
"h": 1,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"panels": [],
|
||||
"repeat": "instance",
|
||||
"repeatDirection": "h",
|
||||
"title": "$instance",
|
||||
"type": "row"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "mimir"
|
||||
},
|
||||
"description": "",
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"axisBorderShow": false,
|
||||
"axisCenteredZero": false,
|
||||
"axisColorMode": "text",
|
||||
"axisLabel": "",
|
||||
"axisPlacement": "auto",
|
||||
"barAlignment": 0,
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 0,
|
||||
"gradientMode": "none",
|
||||
"hideFrom": {
|
||||
"legend": false,
|
||||
"tooltip": false,
|
||||
"viz": false
|
||||
},
|
||||
"insertNulls": false,
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"pointSize": 5,
|
||||
"scaleDistribution": {
|
||||
"type": "linear"
|
||||
},
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": {
|
||||
"group": "A",
|
||||
"mode": "none"
|
||||
},
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": [
|
||||
{
|
||||
"__systemRef": "hideSeriesFrom",
|
||||
"matcher": {
|
||||
"id": "byNames",
|
||||
"options": {
|
||||
"mode": "exclude",
|
||||
"names": [
|
||||
"{__name__=\"up\",job=\"node\"}"
|
||||
],
|
||||
"prefix": "All except:",
|
||||
"readOnly": true
|
||||
}
|
||||
},
|
||||
"properties": []
|
||||
}
|
||||
]
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 1
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "mimir"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(node_cpu_seconds_total{job=\"node\", instance=~\"$instance\", mode=\"user\"}[$__rate_interval])",
|
||||
"instant": false,
|
||||
"interval": "1m",
|
||||
"legendFormat": "__auto",
|
||||
"range": true,
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "CPU",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "loki"
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 9
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableLogDetails": true,
|
||||
"prettifyLogMessage": false,
|
||||
"showCommonLabels": false,
|
||||
"showLabels": false,
|
||||
"showTime": true,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLogMessage": false
|
||||
},
|
||||
"pluginVersion": "11.1.4",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "loki",
|
||||
"uid": "loki"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "{job=\"loki.source.journal.read\",hostname=~\"$instance\"}",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Panel Title",
|
||||
"type": "logs"
|
||||
}
|
||||
],
|
||||
"schemaVersion": 39,
|
||||
"tags": [],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "mimir"
|
||||
},
|
||||
"definition": "label_values(up,instance)",
|
||||
"hide": 2,
|
||||
"includeAll": true,
|
||||
"multi": true,
|
||||
"name": "instance",
|
||||
"options": [],
|
||||
"query": {
|
||||
"qryType": 1,
|
||||
"query": "label_values(up,instance)",
|
||||
"refId": "PrometheusVariableQueryEditor-VariableQuery"
|
||||
},
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 0,
|
||||
"type": "query"
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Overview",
|
||||
"uid": "sunet-overview",
|
||||
"version": 8,
|
||||
"weekStart": ""
|
||||
}
|
|
@ -0,0 +1,14 @@
|
|||
apiVersion: 1
|
||||
|
||||
deleteDatasources:
|
||||
- name: nagflux
|
||||
|
||||
datasources:
|
||||
- name: nagflux
|
||||
type: influxdb
|
||||
url: http://influxdb:8086
|
||||
access: proxy
|
||||
database: nagflux
|
||||
isDefault: true
|
||||
version: 1
|
||||
editable: true
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Loki
|
||||
type: loki
|
||||
uid: loki
|
||||
access: proxy
|
||||
orgId: 1
|
||||
url: http://loki:3100
|
||||
basicAuth: false
|
||||
isDefault: false
|
||||
version: 1
|
||||
editable: false
|
|
@ -0,0 +1,16 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
# Access mode - proxy (server in the UI) or direct (browser in the UI).
|
||||
url: "http://mimir:9009/prometheus"
|
||||
uid: mimir
|
||||
jsonData:
|
||||
httpMethod: POST
|
||||
manageAlerts: true
|
||||
prometheusType: Mimir
|
||||
cacheLevel: 'High'
|
||||
disableRecordingRules: false
|
||||
incrementalQueryOverlapWindow: 10m
|
|
@ -0,0 +1,48 @@
|
|||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Tempo
|
||||
type: tempo
|
||||
uid: tempo
|
||||
url: http://tempo:3200
|
||||
access: proxy
|
||||
basicAuth: false
|
||||
jsonData:
|
||||
tracesToLogsV2:
|
||||
# Field with an internal link pointing to a logs data source in Grafana.
|
||||
# datasourceUid value must match the uid value of the logs data source.
|
||||
datasourceUid: 'loki'
|
||||
spanStartTimeShift: '-1h'
|
||||
spanEndTimeShift: '1h'
|
||||
tags: ['job', 'instance', 'pod', 'namespace']
|
||||
filterByTraceID: false
|
||||
filterBySpanID: false
|
||||
customQuery: true
|
||||
query: 'method="$${__span.tags.method}"'
|
||||
tracesToMetrics:
|
||||
datasourceUid: 'prom'
|
||||
spanStartTimeShift: '1h'
|
||||
spanEndTimeShift: '-1h'
|
||||
tags: [{ key: 'service.name', value: 'service' }, { key: 'job' }]
|
||||
queries:
|
||||
- name: 'Sample query'
|
||||
query: 'sum(rate(traces_spanmetrics_latency_bucket{$$__tags}[5m]))'
|
||||
tracesToProfiles:
|
||||
datasourceUid: 'grafana-pyroscope-datasource'
|
||||
tags: ['job', 'instance', 'pod', 'namespace']
|
||||
profileTypeId: 'process_cpu:cpu:nanoseconds:cpu:nanoseconds'
|
||||
customQuery: true
|
||||
query: 'method="$${__span.tags.method}"'
|
||||
serviceMap:
|
||||
datasourceUid: 'mimir'
|
||||
nodeGraph:
|
||||
enabled: true
|
||||
search:
|
||||
hide: false
|
||||
traceQuery:
|
||||
timeShiftEnabled: true
|
||||
spanStartTimeShift: '1h'
|
||||
spanEndTimeShift: '-1h'
|
||||
spanBar:
|
||||
type: 'Tag'
|
||||
tag: 'http.path'
|
|
@ -0,0 +1,30 @@
|
|||
[log]
|
||||
mode = console
|
||||
level = debug
|
||||
|
||||
[server]
|
||||
root_url = https://<%= @domain %>/grafana/
|
||||
|
||||
[users]
|
||||
default_theme = light
|
||||
allow_sign_up = false
|
||||
auto_assign_org_role = <%= @grafana_default_role %>
|
||||
|
||||
[auth]
|
||||
disable_signout_menu = true
|
||||
|
||||
[auth.proxy]
|
||||
enabled = true
|
||||
header_name = X-WEBAUTH-USER
|
||||
header_property = username
|
||||
auto_sign_up = true
|
||||
sync_ttl = 60
|
||||
|
||||
[alerting]
|
||||
enabled = false
|
||||
|
||||
[unified_alerting]
|
||||
enabled = true
|
||||
|
||||
[security]
|
||||
allow_embedding = true
|
|
@ -0,0 +1,223 @@
|
|||
/* global _ */
|
||||
|
||||
// accessible variables in this scope
|
||||
var window, document, ARGS, $, jQuery, moment, kbn;
|
||||
|
||||
//parse arguments
|
||||
parseArgs()
|
||||
|
||||
|
||||
return function (callback) {
|
||||
if (window.location.href.search('/dashboard-solo/') != -1) {
|
||||
document.documentElement.style.background = '#FFF';
|
||||
}
|
||||
|
||||
var url = location.protocol+'//'+window.location.hostname+'/histou/';
|
||||
var configUrl = url+'index.php?host='+host+'&service='+service+'&height='+height+'&legend='+legend+debug+disablePanelTitle+disablePerfdataLookup+specificTemplate+'&annotations='+annotations;
|
||||
|
||||
var flotAddons = url + 'flotAddons.js';
|
||||
$.getScript(flotAddons, function (){});
|
||||
if (!_.isUndefined(ARGS.customCSSFile)) {
|
||||
$('head').append('<link rel="stylesheet" href="' + ARGS.customCSSFile + '" type="text/css" />');
|
||||
}
|
||||
cssLoaded = false;
|
||||
jQuery('body').on('DOMNodeInserted', 'DIV.drop-popover', function (e) {
|
||||
var cssUrl = url+'lightbox/css/light.css'
|
||||
if (!cssLoaded) {
|
||||
$('head').append('<link rel="stylesheet" href="'+url+'lightbox/css/light.css" type="text/css" />');
|
||||
$.getScript(url+'lightbox/js/light.js', function(){});
|
||||
cssLoaded = true;
|
||||
}
|
||||
|
||||
var box = $( e.currentTarget ).find( "DIV.sakuli-popup" );
|
||||
if (box.length > 0 ){
|
||||
$(box[0]).attr('class', 'sakuli-image');
|
||||
var sakuliUrl = site[1] + box[0].innerHTML;
|
||||
var svcoutput;
|
||||
var imagename;
|
||||
jQuery.when(
|
||||
// fetch Sakuli serviceoutput file
|
||||
$.get( sakuliUrl + "output.txt").always(function(data ,state) {
|
||||
if (state != "success" ) {
|
||||
data = "Could not find Sakuli service outputfile at " + sakuliUrl + "output.txt !"
|
||||
}
|
||||
console.log(data);
|
||||
svcoutput = $("<div>").text(data).html().replace(/['"]+/g, '');
|
||||
console.log("Sakuli service output: " + svcoutput);
|
||||
}) &&
|
||||
// fetch Sakuli screenshot (jpg/png)
|
||||
$.get( sakuliUrl ).always(function(imgdata ,state) {
|
||||
if (state != "success" ) {
|
||||
imgdata = "Could not access screenshot list page at " + sakuliUrl + "!"
|
||||
}
|
||||
// the 3rd href on the apache index page contains the img name
|
||||
imagename = $(imgdata).find('a')[2].text.trim();
|
||||
console.log("Sakuli screenshot image name: " + imagename);
|
||||
})
|
||||
).then ( function() {
|
||||
box[0].innerHTML = '<a href="' + sakuliUrl + imagename + '" data-lightbox="sakuli" data-title="'+ svcoutput +'"><img src="'+ sakuliUrl + imagename +'" alt="Sakuli error image" width=250px /></a>';
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
|
||||
$.ajax(
|
||||
{
|
||||
method: 'GET',
|
||||
url: configUrl,
|
||||
dataType: "jsonp",
|
||||
}
|
||||
).done(
|
||||
function (result) {
|
||||
console.log(result);
|
||||
callback(result);
|
||||
}
|
||||
).fail(
|
||||
function (result) {
|
||||
console.log(result);
|
||||
console.log(configUrl);
|
||||
if (result.status == 200) {
|
||||
callback(createErrorDashboard('# HTTP code: '+result.status+'\n# Message: '+result.statusText+'\n# Url: '+configUrl+'\n# Probably the output is not valid json, because the returncode is 200!'));
|
||||
} else {
|
||||
callback(createErrorDashboard('# HTTP code: '+result.status+'\n# Message: '+result.statusText+'\n# Url: '+configUrl));
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
function createErrorDashboard(message)
|
||||
{
|
||||
return {
|
||||
rows : [{
|
||||
title: 'Chart',
|
||||
height: '300px',
|
||||
panels : [{
|
||||
title: 'Error Message below',
|
||||
type: 'text',
|
||||
span: 12,
|
||||
fill: 1,
|
||||
content: message,
|
||||
}]
|
||||
}],
|
||||
services : {},
|
||||
title : 'JS Error / HTTP Error'
|
||||
};
|
||||
}
|
||||
|
||||
function parseArgs()
|
||||
{
|
||||
if (!_.isUndefined(ARGS.reduce)) {
|
||||
$('head').append('<style>.panel-fullscreen {top:0}</style>');
|
||||
|
||||
//change ui to our needs
|
||||
clearUi();
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.dynUnit)) {
|
||||
dynUnit = true;
|
||||
} else {
|
||||
dynUnit = false;
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.host)) {
|
||||
host = ARGS.host;
|
||||
} else {
|
||||
host = "";
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.service)) {
|
||||
service = ARGS.service;
|
||||
} else {
|
||||
service = "";
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.command)) {
|
||||
command = ARGS.command;
|
||||
} else {
|
||||
command = "";
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.perf)) {
|
||||
perf = ARGS.perf;
|
||||
} else {
|
||||
perf = "";
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.height)) {
|
||||
height = ARGS.height;
|
||||
} else {
|
||||
height = "";
|
||||
}
|
||||
|
||||
if (_.isUndefined(ARGS.debug)) {
|
||||
debug = '';
|
||||
} else {
|
||||
debug = "&debug";
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.legend)) {
|
||||
legend = ARGS.legend;
|
||||
} else {
|
||||
legend = true;
|
||||
}
|
||||
|
||||
if (!_.isUndefined(ARGS.annotations)) {
|
||||
annotations = ARGS.annotations;
|
||||
} else {
|
||||
annotations = false;
|
||||
}
|
||||
|
||||
if(_.isUndefined(ARGS.disablePanelTitle)) {
|
||||
disablePanelTitle = '';
|
||||
}else{
|
||||
disablePanelTitle = "&disablePanelTitle";
|
||||
}
|
||||
|
||||
if(_.isUndefined(ARGS.disablePerfdataLookup)) {
|
||||
disablePerfdataLookup = '';
|
||||
}else{
|
||||
disablePerfdataLookup = "&disablePerfdataLookup";
|
||||
}
|
||||
|
||||
if(_.isUndefined(ARGS.specificTemplate)) {
|
||||
specificTemplate = '';
|
||||
}else{
|
||||
specificTemplate = "&specificTemplate="+ARGS.specificTemplate;
|
||||
}
|
||||
}
|
||||
|
||||
function clearUi()
|
||||
{
|
||||
//removes white space
|
||||
var checkExist = setInterval(
|
||||
function () {
|
||||
if ($('.panel-content').length) {
|
||||
clearInterval(checkExist);
|
||||
document.getElementsByClassName("panel-content")[0].style.paddingBottom = '0px';
|
||||
}
|
||||
},
|
||||
100
|
||||
);
|
||||
/*
|
||||
.panel-header removes the headline of the graphs
|
||||
.navbar-static-top removes the menubar on the top
|
||||
.row-control-inner removes the row controll button on the left
|
||||
.span12 removes the add new row button on the bottom
|
||||
*/
|
||||
divs = ['.panel-header','.navbar-static-top','.row-control-inner','.span12']
|
||||
for (index = 0; index < divs.length; index++) {
|
||||
waitForDivAndDeleteIt(divs[index]);
|
||||
}
|
||||
function waitForDivAndDeleteIt(div)
|
||||
{
|
||||
var checkExist = setInterval(
|
||||
function () {
|
||||
if ($(div).length) {
|
||||
clearInterval(checkExist);
|
||||
$(div).remove();
|
||||
}
|
||||
},
|
||||
100
|
||||
);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
/var/log/naemon/naemon.log {
|
||||
daily
|
||||
rotate 3650
|
||||
nocompress
|
||||
olddir archives
|
||||
dateext
|
||||
dateformat -%Y%m%d
|
||||
missingok
|
||||
notifempty
|
||||
postrotate
|
||||
/usr/bin/docker exec naemon_monitor-naemon-1 pkill --signal USR1 -f '/usr/bin/naemon --allow-root /etc/naemon/naemon.cfg'
|
||||
endscript
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
auth_enabled: false
|
||||
|
||||
server:
|
||||
http_listen_port: 3100
|
||||
|
||||
common:
|
||||
path_prefix: /loki
|
||||
storage:
|
||||
filesystem:
|
||||
chunks_directory: /loki/chunks
|
||||
rules_directory: /loki/rules
|
||||
replication_factor: 1
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory
|
||||
compactor:
|
||||
working_directory: /loki/retention
|
||||
compaction_interval: 10m
|
||||
retention_enabled: true
|
||||
retention_delete_delay: 2h
|
||||
retention_delete_worker_count: 150
|
||||
delete_request_store: filesystem
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24
|
||||
store: boltdb-shipper
|
||||
object_store: filesystem
|
||||
schema: v11
|
||||
index:
|
||||
prefix: index_
|
||||
period: 24h
|
||||
- from: "2023-01-05" # <---- A date in the future
|
||||
index:
|
||||
period: 24h
|
||||
prefix: index_
|
||||
object_store: filesystem
|
||||
schema: v13
|
||||
store: tsdb
|
||||
storage_config:
|
||||
tsdb_shipper:
|
||||
active_index_directory: /loki/tsdb-index
|
||||
cache_location: /loki/tsdb-cache
|
||||
ruler:
|
||||
alertmanager_url: http://localhost:9093
|
||||
|
||||
limits_config:
|
||||
retention_period: <%= @otel_retention %>
|
||||
retention_stream:
|
||||
- selector: '{namespace="debug"}'
|
||||
priority: 1
|
||||
period: 48h
|
||||
|
||||
# By default, Loki will send anonymous, but uniquely-identifiable usage and configuration
|
||||
# analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/
|
||||
#
|
||||
# Statistics help us better understand how Loki is used, and they show us performance
|
||||
# levels for most users. This helps us prioritize features and documentation.
|
||||
# For more information on what's sent, look at
|
||||
# https://github.com/grafana/loki/blob/main/pkg/usagestats/stats.go
|
||||
# Refer to the buildReport method to see what goes into a report.
|
||||
#
|
||||
# If you would like to disable reporting, uncomment the following lines:
|
||||
#analytics:
|
||||
# reporting_enabled: false
|
|
@ -0,0 +1,45 @@
|
|||
multitenancy_enabled: false
|
||||
|
||||
blocks_storage:
|
||||
backend: filesystem
|
||||
bucket_store:
|
||||
sync_dir: /data/mimir/tsdb-sync
|
||||
filesystem:
|
||||
dir: /data/mimir/data/tsdb
|
||||
tsdb:
|
||||
dir: /data/mimir/tsdb
|
||||
|
||||
compactor:
|
||||
data_dir: /data/mimir/compactor
|
||||
sharding_ring:
|
||||
kvstore:
|
||||
store: memberlist
|
||||
|
||||
distributor:
|
||||
ring:
|
||||
instance_addr: 127.0.0.1
|
||||
kvstore:
|
||||
store: memberlist
|
||||
|
||||
ingester:
|
||||
ring:
|
||||
instance_addr: 127.0.0.1
|
||||
kvstore:
|
||||
store: memberlist
|
||||
replication_factor: 1
|
||||
|
||||
ruler_storage:
|
||||
backend: filesystem
|
||||
filesystem:
|
||||
dir: /data/mimir/rules
|
||||
|
||||
server:
|
||||
http_listen_port: 9009
|
||||
log_level: error
|
||||
|
||||
store_gateway:
|
||||
sharding_ring:
|
||||
replication_factor: 1
|
||||
limits:
|
||||
# Delete from storage metrics data older than x.
|
||||
compactor_blocks_retention_period: <%= @otel_retention %>
|
|
@ -0,0 +1,4 @@
|
|||
define contactgroup {
|
||||
contactgroup_name naemon-admins
|
||||
alias Naemon Administrators
|
||||
}
|
|
@ -0,0 +1,19 @@
|
|||
# Generic host definition template - This is NOT a real host, just a template!
|
||||
|
||||
define host{
|
||||
name naemon-host ; The name of this host template
|
||||
action_url /grafana/dashboard/script/histou.js?host=$HOSTNAME$&theme=light&annotations=true
|
||||
notifications_enabled 1 ; Host notifications are enabled
|
||||
event_handler_enabled 1 ; Host event handler is enabled
|
||||
flap_detection_enabled 1 ; Flap detection is enabled
|
||||
process_perf_data 1 ; Process performance data
|
||||
retain_status_information 1 ; Retain status information across program restarts
|
||||
retain_nonstatus_information 1 ; Retain non-status information across program restarts
|
||||
check_command check-host-alive
|
||||
max_check_attempts 10
|
||||
notification_interval 0
|
||||
notification_period 24x7
|
||||
notification_options d,u,r
|
||||
contact_groups admins
|
||||
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
|
||||
}
|
|
@ -0,0 +1,12 @@
|
|||
# A simple wildcard hostgroup
|
||||
define hostgroup {
|
||||
hostgroup_name all
|
||||
alias All Servers
|
||||
members *
|
||||
}
|
||||
# Predefine empty group that we can use in puppet-nagioscfg in order to migrate
|
||||
# away from the 'nrpe' group created from the cosmos-db.
|
||||
define hostgroup {
|
||||
hostgroup_name from_puppet-nagioscfg
|
||||
alias from_puppet-nagioscfg
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
define service {
|
||||
name naemon-service ; The 'name' of this service template
|
||||
action_url /grafana/dashboard/script/histou.js?host=$HOSTNAME$&service=$SERVICEDISPLAYNAME$&theme=light&annotations=true'
|
||||
contact_groups naemon-admins ; Notifications get sent out to everyone in the 'admins' group
|
||||
event_handler_enabled 1 ; Service event handler is enabled
|
||||
flap_detection_enabled 1 ; Flap detection is enabled
|
||||
max_check_attempts 3 ; Re-check the service up to 3 times in order to determine its final (hard) state
|
||||
notification_interval 0 ; Re-notify about service problems every hour
|
||||
notification_options u,w,c,r ; Send notifications about warning, unknown, critical, and recovery events
|
||||
notification_period 24x7 ; Notifications can be sent out at any time
|
||||
obsess_over_service 1 ; We should obsess over this service (if necessary)
|
||||
process_perf_data 1 ; Process performance data
|
||||
register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
|
||||
retain_nonstatus_information 1 ; Retain non-status information across program restarts
|
||||
retain_status_information 1 ; Retain status information across program restarts
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
[Service]
|
||||
# livestatus.so can't handle HUP with a TCP listner
|
||||
# https://github.com/naemon/naemon-livestatus/issues/117
|
||||
ExecReload=/usr/bin/docker restart <%= @naemon_container %>
|
|
@ -0,0 +1,56 @@
|
|||
stream_over_http_enabled: true
|
||||
server:
|
||||
http_listen_port: 3200
|
||||
log_level: info
|
||||
|
||||
query_frontend:
|
||||
search:
|
||||
duration_slo: 5s
|
||||
throughput_bytes_slo: 1.073741824e+09
|
||||
trace_by_id:
|
||||
duration_slo: 5s
|
||||
|
||||
distributor:
|
||||
receivers: # this configuration will listen on all ports and protocols that tempo is capable of.
|
||||
jaeger: # the receives all come from the OpenTelemetry collector. more configuration information can
|
||||
protocols: # be found there: https://github.com/open-telemetry/opentelemetry-collector/tree/main/receiver
|
||||
thrift_http: #
|
||||
grpc: # for a production deployment you should only enable the receivers you need!
|
||||
thrift_binary:
|
||||
thrift_compact:
|
||||
zipkin:
|
||||
otlp:
|
||||
protocols:
|
||||
http:
|
||||
grpc:
|
||||
opencensus:
|
||||
|
||||
compactor:
|
||||
compaction:
|
||||
block_retention: <%= @otel_retention %>
|
||||
|
||||
metrics_generator:
|
||||
registry:
|
||||
external_labels:
|
||||
source: tempo
|
||||
cluster: docker-compose
|
||||
storage:
|
||||
path: /var/tempo/generator/wal
|
||||
remote_write:
|
||||
- url: http://prometheus:9009/api/v1/write
|
||||
send_exemplars: true
|
||||
traces_storage:
|
||||
path: /var/tempo/generator/traces
|
||||
|
||||
storage:
|
||||
trace:
|
||||
backend: local # backend configuration to use
|
||||
wal:
|
||||
path: /var/tempo/wal # where to store the wal locally
|
||||
local:
|
||||
path: /var/tempo/blocks
|
||||
|
||||
overrides:
|
||||
defaults:
|
||||
metrics_generator:
|
||||
processors: [service-graphs, span-metrics, local-blocks] # enables metrics generator
|
Loading…
Add table
Reference in a new issue