eid-ops/scripts/cosmos-facts
2024-05-13 18:50:11 +02:00

441 lines
15 KiB
Python
Executable file

#!/usr/bin/env python3
#
# Copyright 2017 SUNET. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are
# permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this list of
# conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice, this list
# of conditions and the following disclaimer in the documentation and/or other materials
# provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUNET OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those of the
# authors and should not be interpreted as representing official policies, either expressed
# or implied, of SUNET.
#
# Author : Fredrik Thulin <fredrik@thulin.net>
#
#
# TL; DR: This script produces a Puppet fact called 'cosmos', with information
# about hosts in the current Cosmos environment:
#
# $facts['cosmos']['mongodb_server_addrs'] = [130.242.130.220, 130.242.130.221,
# '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd']
#
# $facts['cosmos']['mongodb_server_hosts'] = [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se]
#
#
# This script creates the $facts['cosmos'] Puppet fact. This fact holds
# information about the Cosmos environment. To start with, this fact is
# used to define roles of hosts in the Cosmos environment.
#
# An example to provide the merits for this fact, and how it works:
#
# eduID has the two applications 'signup' and 'dashboard'. They are
# quite common and both have access to MongoDB, both register with
# frontends etc.
#
# eduID had lots of Hiera data like this: (different values in test
# and production, making it harder than necessary to deploy things
# from testing to production):
#
# eduid_frontend_servers:
# - fe-tug-1.eduid.se
# - fe-fre-1.eduid.se
#
# eduid_frontend_ips:
# - 130.242.131.3
# - 130.242.131.4
# - 2001:6b0:54:c4::3
# - 2001:6b0:54:c4::4
#
# eduid_mongodb_servers:
# - userdb-tug-1.eduid.se
# - userdb-fre-1.eduid.se
# - userdb-tug-2.eduid.se
#
# eduid_signup_haproxy_backends:
# - signup-tug-1.eduid.se
# #- signup-fre-1.eduid.se
#
# eduid_dashboard_haproxy_backends:
# - dash-tug-1.eduid.se
# - dash-fre-1.eduid.se
#
# eduid_signup_ips:
# - 130.242.130.212 # signup-tug-1.eduid.se
# - 2001:6b0:54:c3:5054:ff:fea0:1d4 # signup-tug-1.eduid.se
# eduid_dashboard_ips:
# - 130.242.130.213 # dash-fre-1.eduid.se
# - 130.242.130.197 # dash-tug-1.eduid.se
# - 2001:6b0:54:c3:5054:ff:fea0:1d5 # dash-fre-1.eduid.se
# - 2001:6b0:54:c3:5054:ff:fea0:1c5 # dash-tug-1.eduid.se
#
# This probably looks like a lot, but unfortunately it is probably not
# even a third of all such parameters that were necessary to accomplish
# two applications behind load balancers and with access to a few
# backend services such as MongoDB, redis, etcd and AMQP.
#
# Now, an improvement is to have a script (this) group hosts in
# a Cosmos environment based on hostnames, and provide name-to-address
# mappings in either Hiera or in a fact.
#
# For eduID, this can be implemented rather trivially since the naming
# standard is pretty much function-site-number.eduid.se. For nunoc-ops
# it is, with a couple of exceptions, functionnumber.domain.
#
# So, having automatic mappings of groups-of-hosts to addresses would
# allow lots of improvements, but one level of indirection is missing -
# services moves between hosts. eduID currently runs Redis and etcd on
# the same hosts that run MongoDB, but it would not be ideal to bet on
# that being true forever. Even better is to define roles that maps to
# the automatically deduced groups, which in turn maps to hosts that
# maps to addresses.
#
# As a starting point, this script is used with a roles-in.yaml file
# containing just some roles-to-groups maps:
#
# ---
# cosmos:
#
# roles:
# mongodb_server:
# groups: [userdb]
# etcd_server:
# groups: [userdb]
# redis_server:
# groups: [userdb]
# app_server:
# groups: [signup, dashboard]
# frontend_server:
# groups: [fe]
#
#
# When this script runs (like this:
#
# ./scripts/cosmos-facts --dirs .eduid.se \
# --roles metadata/roles-in.yaml \
# --outfile global/overlay/etc/puppet/static-cosmos-facts.yaml)
#
# it will scan for all directories ending with '.eduid.se' in the Cosmos repository,
# resolve the hosts using DNS and update the data with hosts and groups entrys like this
# (data not in DNS _could_ be provided in the YAML file above, under cosmos['hosts']):
#
# ---
# cosmos:
# hosts:
# userdb-fre-1.eduid.se:
# addrs: [130.242.130.220, '2001:6b0:54:c3:5054:ff:fea0:1dc']
# userdb-tug-1.eduid.se:
# addrs: [130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dd']
# ...
# groups:
# userdb:
# addrs: [130.242.130.220, 130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dc',
# '2001:6b0:54:c3:5054:ff:fea0:1dd']
# hosts: [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se]
# ...
#
# The paramount use cases for this information in all our Puppet manifests
# is to do something (update firewall rules, write configuration files) with lists
# of IP addresses or hostnames. While making (sorted) lists of hostnames and addresses
# under the 'roles' section would be beautiful from a programmers perspective:
#
# $facts['cosmos']['roles']['mongodb_server']['hosts'] = ['userdb-fre-1.eduid.se', ...]
#
# it feels like it would be rather tedious to write and maintain such Puppet manifests.
# As a compromise, the data is still structurally under the 'cosmos' key but made
# available in the more palatable form:
#
# $facts['cosmos']['mongodb_server_addrs'] = ['130.242.130.220', '130.242.130.221',
# '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd']
#
# $facts['cosmos']['mongodb_server_hosts'] = ['userdb-fre-1.eduid.se', 'userdb-tug-1.eduid.se']
#
# To really encourage the use of roles rather than hosts/groups directly, the
# top-level 'cosmos' keys 'hosts', 'groups' and 'roles' are emptied before this script
# writes it's output unless the --output_hosts, --output_groups and --output_roles
# arguments are provided. Please don't use them ;).
#
import re
import sys
import copy
import yaml
import cosmosdata
from cosmosdata import DNSResolver, HostsResolver
DEFAULT_REGEXPS = [re.compile('(.+?)-'), # eduID style, function-site-digit
re.compile('([a-z]+)\d'), # nunoc-ops style, functiondigit
]
class Hosts(object):
def __init__(self, data_in, args):
self._data = {} if data_in is None else data_in
self._data.update({'hosts': {},
'groups': {},
})
self._args = args
if args.roles:
with open(args.roles) as fd:
file_y = yaml.safe_load(fd)
self._data.update(file_y.get('cosmos'))
@property
def hosts(self):
return self._data['hosts']
@property
def groups(self):
return self._data['groups']
def add_host(self, hostname, addrs):
if hostname not in self.hosts:
self.hosts[hostname] = {}
prev_addrs = self.hosts[hostname].get('addrs', [])
self.hosts[hostname]['addrs'] = _dedup(addrs + prev_addrs)
def add_group(self, groupname, data):
# rename 'members' to 'hosts'
data['hosts'] = data.pop('members')
self.groups.update({groupname: data})
# Make a list of all the addresses of the hosts
if 'addrs' not in self.groups[groupname]:
self.groups[groupname]['addrs'] = []
for this in self.groups[groupname]['hosts']:
addrs = self.groups[groupname]['addrs']
if this in self.hosts:
addrs += self.hosts[this]['addrs']
self.groups[groupname]['addrs'] = _dedup(addrs)
def load_datasource(self, data_s):
"""
:type data_s: cosmosdata.DataSource
:return:
"""
# Load all the hosts
if '_regexps' in self.groups:
group_r = [re.compile(x) for x in self.groups.pop('_regexps')]
else:
group_r = [re.compile(x) for x in DEFAULT_REGEXPS]
for this in data_s.all_hosts():
this = this.lower()
addrs = data_s.lookup(this)
if not addrs:
continue
self.add_host(this, addrs)
for r in group_r:
match = r.match(this)
if match:
data_s.add_members_to_group(match.group(1), [this])
# Add all the groups from the datasource
for this, values in data_s.groups().items():
self.add_group(this, values)
def to_dict(self, ignore_nonex):
res = copy.deepcopy(self._data)
if self._args.addfile:
# Add content from a specified file
fd = open(self._args.addfile)
file_y = yaml.safe_load(fd)
res.update(file_y)
res = _resolve_roles(res, ignore_nonex)
return dict(cosmos = res)
def _dedup(data):
t = {}
for x in data:
t[x] = 1
return sorted(t.keys())
def _resolve_roles(data, ignore_nonex):
for groupname, values in data.get('roles').items():
# Roles look like this:
# monitor_server:
# groups: [nagios]
# hosts: [monitor.sunet.se]
#
groupname = groupname.lower()
if 'groups' not in values:
continue
# merge 'hosts' and 'addrs' from all the referred groups
addrs = []
hosts = []
for g in values.get('groups', []):
if g not in data['groups']:
if ignore_nonex:
continue
raise ValueError('While resolving role {!r}, could not find group {!r} in groups: {}'.format(
groupname, g, list(data['groups'].keys())))
addrs = addrs + data['groups'][g]['addrs']
hosts = hosts + data['groups'][g]['hosts']
for h in values.get('hosts', []):
if h not in data['hosts']:
if ignore_nonex:
continue
raise ValueError('While resolving role {!r}, could not find host {!r}'.format(
groupname, h))
addrs = addrs + data['hosts'][h]['addrs']
hosts = hosts + [h]
# While structured data is nice, Puppet code like
# $facts['cosmos']['roles']['frontend_servers']['hosts']
# would be pretty tedious to maintain. Turn the output of
# role resolving (which is what is expected would be used most) into
# $facts['cosmos']['frontend_server_hosts']
# $facts['cosmos']['frontend_server_addrs']
# instead.
data[groupname + '_hosts'] = _dedup(hosts)
data[groupname + '_addrs'] = _dedup(addrs)
return data
def _make_header(data, args):
""" Make a header with the three hyphens indicating YAML, and some comments. """
roles = {}
groups = {}
# Make hash of roles -> groups and groups -> roles
for role, values in data['roles'].items():
for g in values.get('groups', []):
if role not in roles:
roles[role] = []
roles[role] += [g]
if g not in groups:
groups[g] = []
groups[g] += [role]
res = ['#',
'# This file was created by {}.'.format(sys.argv[0]),
'#',
'# {} roles defined in {}'.format(len(data['roles']), args.roles),
'# {} groups formed in the current Cosmos environment'.format(len(data['groups'])),
'#',
'# Roles <- groups:',
]
for role, _groups in sorted(roles.items()):
res += ['# {:20s} <- {}'.format(role, ', '.join(_dedup(_groups)))]
res += ['#',
'# Groups -> roles:',
]
for group, _roles in sorted(groups.items()):
res += ['# {:20s} -> {}'.format(group, ', '.join(_dedup(_roles)))]
unused = []
for g in data['groups'].keys():
if g not in groups:
unused += [g]
if unused:
res += ['#',
'# Unused groups: {}'.format(', '.join(sorted(unused))),
]
res += ['#',
'---',
'']
return '\n'.join(res)
def parse_args():
"""
Parse the command line arguments
"""
parser = cosmosdata.get_parser('Tool to create the "cosmos" fact',
['debug', 'dirs', 'outfile', 'hostsfile', 'addfile',
'classesfile'],
{})
parser.add_argument('--roles',
dest='roles',
default=None,
help='Input YAML file',
metavar='FILENAME',
)
for attr in ['hosts', 'groups', 'roles']:
parser.add_argument('--output_{}'.format(attr),
dest='output_{}'.format(attr),
action='store_true',
help='Output the "{}" section'.format(attr),
)
parser.add_argument('--ignore_nonexistent',
dest='ignore_nonex',
action='store_true',
help='Ignore non-existent data',
)
return parser.parse_args()
def main(args=None, data_in=None):
if args is None:
args = parse_args()
hosts = Hosts(data_in, args)
if args.dirs:
data_s = DNSResolver(args.dirs)
elif args.hostsfile:
data_s = HostsResolver(args.hostsfile)
else:
raise SyntaxError('Neither dirs nor hostsfile supplied')
if args.classesfile:
data_s.load_classes(args.classesfile)
hosts.load_datasource(data_s)
res = hosts.to_dict(ignore_nonex=args.ignore_nonex)
header = _make_header(res['cosmos'], args)
# To encourage use of roles, delete all this other data from the
# output unless specifically asked to keep them
for attr in ['hosts', 'groups', 'roles']:
if not getattr(args, 'output_' + attr) and attr in res['cosmos']:
del(res['cosmos'][attr])
else:
print('Keeping {}'.format(attr))
if args.outfile:
with open(args.outfile, 'w') as fd:
fd.write(header)
yaml.safe_dump(res, fd)
else:
print(header)
print(yaml.safe_dump(res))
return True
if __name__=='__main__':
try:
if main():
sys.exit(0)
sys.exit(1)
except KeyboardInterrupt:
pass