441 lines
15 KiB
Python
Executable file
441 lines
15 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
#
|
|
# Copyright 2017 SUNET. All rights reserved.
|
|
#
|
|
# Redistribution and use in source and binary forms, with or without modification, are
|
|
# permitted provided that the following conditions are met:
|
|
#
|
|
# 1. Redistributions of source code must retain the above copyright notice, this list of
|
|
# conditions and the following disclaimer.
|
|
#
|
|
# 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
|
# of conditions and the following disclaimer in the documentation and/or other materials
|
|
# provided with the distribution.
|
|
#
|
|
# THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
|
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
|
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUNET OR
|
|
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
|
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
#
|
|
# The views and conclusions contained in the software and documentation are those of the
|
|
# authors and should not be interpreted as representing official policies, either expressed
|
|
# or implied, of SUNET.
|
|
#
|
|
# Author : Fredrik Thulin <fredrik@thulin.net>
|
|
#
|
|
|
|
#
|
|
# TL; DR: This script produces a Puppet fact called 'cosmos', with information
|
|
# about hosts in the current Cosmos environment:
|
|
#
|
|
# $facts['cosmos']['mongodb_server_addrs'] = [130.242.130.220, 130.242.130.221,
|
|
# '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd']
|
|
#
|
|
# $facts['cosmos']['mongodb_server_hosts'] = [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se]
|
|
#
|
|
|
|
#
|
|
# This script creates the $facts['cosmos'] Puppet fact. This fact holds
|
|
# information about the Cosmos environment. To start with, this fact is
|
|
# used to define roles of hosts in the Cosmos environment.
|
|
#
|
|
# An example to provide the merits for this fact, and how it works:
|
|
#
|
|
# eduID has the two applications 'signup' and 'dashboard'. They are
|
|
# quite common and both have access to MongoDB, both register with
|
|
# frontends etc.
|
|
#
|
|
# eduID had lots of Hiera data like this: (different values in test
|
|
# and production, making it harder than necessary to deploy things
|
|
# from testing to production):
|
|
#
|
|
# eduid_frontend_servers:
|
|
# - fe-tug-1.eduid.se
|
|
# - fe-fre-1.eduid.se
|
|
#
|
|
# eduid_frontend_ips:
|
|
# - 130.242.131.3
|
|
# - 130.242.131.4
|
|
# - 2001:6b0:54:c4::3
|
|
# - 2001:6b0:54:c4::4
|
|
#
|
|
# eduid_mongodb_servers:
|
|
# - userdb-tug-1.eduid.se
|
|
# - userdb-fre-1.eduid.se
|
|
# - userdb-tug-2.eduid.se
|
|
#
|
|
# eduid_signup_haproxy_backends:
|
|
# - signup-tug-1.eduid.se
|
|
# #- signup-fre-1.eduid.se
|
|
#
|
|
# eduid_dashboard_haproxy_backends:
|
|
# - dash-tug-1.eduid.se
|
|
# - dash-fre-1.eduid.se
|
|
#
|
|
# eduid_signup_ips:
|
|
# - 130.242.130.212 # signup-tug-1.eduid.se
|
|
# - 2001:6b0:54:c3:5054:ff:fea0:1d4 # signup-tug-1.eduid.se
|
|
# eduid_dashboard_ips:
|
|
# - 130.242.130.213 # dash-fre-1.eduid.se
|
|
# - 130.242.130.197 # dash-tug-1.eduid.se
|
|
# - 2001:6b0:54:c3:5054:ff:fea0:1d5 # dash-fre-1.eduid.se
|
|
# - 2001:6b0:54:c3:5054:ff:fea0:1c5 # dash-tug-1.eduid.se
|
|
#
|
|
# This probably looks like a lot, but unfortunately it is probably not
|
|
# even a third of all such parameters that were necessary to accomplish
|
|
# two applications behind load balancers and with access to a few
|
|
# backend services such as MongoDB, redis, etcd and AMQP.
|
|
#
|
|
# Now, an improvement is to have a script (this) group hosts in
|
|
# a Cosmos environment based on hostnames, and provide name-to-address
|
|
# mappings in either Hiera or in a fact.
|
|
#
|
|
# For eduID, this can be implemented rather trivially since the naming
|
|
# standard is pretty much function-site-number.eduid.se. For nunoc-ops
|
|
# it is, with a couple of exceptions, functionnumber.domain.
|
|
#
|
|
# So, having automatic mappings of groups-of-hosts to addresses would
|
|
# allow lots of improvements, but one level of indirection is missing -
|
|
# services moves between hosts. eduID currently runs Redis and etcd on
|
|
# the same hosts that run MongoDB, but it would not be ideal to bet on
|
|
# that being true forever. Even better is to define roles that maps to
|
|
# the automatically deduced groups, which in turn maps to hosts that
|
|
# maps to addresses.
|
|
#
|
|
# As a starting point, this script is used with a roles-in.yaml file
|
|
# containing just some roles-to-groups maps:
|
|
#
|
|
# ---
|
|
# cosmos:
|
|
#
|
|
# roles:
|
|
# mongodb_server:
|
|
# groups: [userdb]
|
|
# etcd_server:
|
|
# groups: [userdb]
|
|
# redis_server:
|
|
# groups: [userdb]
|
|
# app_server:
|
|
# groups: [signup, dashboard]
|
|
# frontend_server:
|
|
# groups: [fe]
|
|
#
|
|
#
|
|
# When this script runs (like this:
|
|
#
|
|
# ./scripts/cosmos-facts --dirs .eduid.se \
|
|
# --roles metadata/roles-in.yaml \
|
|
# --outfile global/overlay/etc/puppet/static-cosmos-facts.yaml)
|
|
#
|
|
# it will scan for all directories ending with '.eduid.se' in the Cosmos repository,
|
|
# resolve the hosts using DNS and update the data with hosts and groups entrys like this
|
|
# (data not in DNS _could_ be provided in the YAML file above, under cosmos['hosts']):
|
|
#
|
|
# ---
|
|
# cosmos:
|
|
# hosts:
|
|
# userdb-fre-1.eduid.se:
|
|
# addrs: [130.242.130.220, '2001:6b0:54:c3:5054:ff:fea0:1dc']
|
|
# userdb-tug-1.eduid.se:
|
|
# addrs: [130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dd']
|
|
# ...
|
|
# groups:
|
|
# userdb:
|
|
# addrs: [130.242.130.220, 130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dc',
|
|
# '2001:6b0:54:c3:5054:ff:fea0:1dd']
|
|
# hosts: [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se]
|
|
# ...
|
|
#
|
|
# The paramount use cases for this information in all our Puppet manifests
|
|
# is to do something (update firewall rules, write configuration files) with lists
|
|
# of IP addresses or hostnames. While making (sorted) lists of hostnames and addresses
|
|
# under the 'roles' section would be beautiful from a programmers perspective:
|
|
#
|
|
# $facts['cosmos']['roles']['mongodb_server']['hosts'] = ['userdb-fre-1.eduid.se', ...]
|
|
#
|
|
# it feels like it would be rather tedious to write and maintain such Puppet manifests.
|
|
# As a compromise, the data is still structurally under the 'cosmos' key but made
|
|
# available in the more palatable form:
|
|
#
|
|
# $facts['cosmos']['mongodb_server_addrs'] = ['130.242.130.220', '130.242.130.221',
|
|
# '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd']
|
|
#
|
|
# $facts['cosmos']['mongodb_server_hosts'] = ['userdb-fre-1.eduid.se', 'userdb-tug-1.eduid.se']
|
|
#
|
|
# To really encourage the use of roles rather than hosts/groups directly, the
|
|
# top-level 'cosmos' keys 'hosts', 'groups' and 'roles' are emptied before this script
|
|
# writes it's output unless the --output_hosts, --output_groups and --output_roles
|
|
# arguments are provided. Please don't use them ;).
|
|
#
|
|
|
|
|
|
import re
|
|
import sys
|
|
import copy
|
|
import yaml
|
|
|
|
|
|
import cosmosdata
|
|
from cosmosdata import DNSResolver, HostsResolver
|
|
|
|
|
|
DEFAULT_REGEXPS = [re.compile('(.+?)-'), # eduID style, function-site-digit
|
|
re.compile('([a-z]+)\d'), # nunoc-ops style, functiondigit
|
|
]
|
|
|
|
|
|
class Hosts(object):
|
|
|
|
def __init__(self, data_in, args):
|
|
self._data = {} if data_in is None else data_in
|
|
|
|
self._data.update({'hosts': {},
|
|
'groups': {},
|
|
})
|
|
|
|
self._args = args
|
|
|
|
if args.roles:
|
|
with open(args.roles) as fd:
|
|
file_y = yaml.safe_load(fd)
|
|
self._data.update(file_y.get('cosmos'))
|
|
|
|
@property
|
|
def hosts(self):
|
|
return self._data['hosts']
|
|
|
|
@property
|
|
def groups(self):
|
|
return self._data['groups']
|
|
|
|
def add_host(self, hostname, addrs):
|
|
if hostname not in self.hosts:
|
|
self.hosts[hostname] = {}
|
|
prev_addrs = self.hosts[hostname].get('addrs', [])
|
|
self.hosts[hostname]['addrs'] = _dedup(addrs + prev_addrs)
|
|
|
|
def add_group(self, groupname, data):
|
|
# rename 'members' to 'hosts'
|
|
data['hosts'] = data.pop('members')
|
|
self.groups.update({groupname: data})
|
|
# Make a list of all the addresses of the hosts
|
|
if 'addrs' not in self.groups[groupname]:
|
|
self.groups[groupname]['addrs'] = []
|
|
for this in self.groups[groupname]['hosts']:
|
|
addrs = self.groups[groupname]['addrs']
|
|
if this in self.hosts:
|
|
addrs += self.hosts[this]['addrs']
|
|
self.groups[groupname]['addrs'] = _dedup(addrs)
|
|
|
|
def load_datasource(self, data_s):
|
|
"""
|
|
:type data_s: cosmosdata.DataSource
|
|
:return:
|
|
"""
|
|
# Load all the hosts
|
|
if '_regexps' in self.groups:
|
|
group_r = [re.compile(x) for x in self.groups.pop('_regexps')]
|
|
else:
|
|
group_r = [re.compile(x) for x in DEFAULT_REGEXPS]
|
|
for this in data_s.all_hosts():
|
|
this = this.lower()
|
|
addrs = data_s.lookup(this)
|
|
if not addrs:
|
|
continue
|
|
self.add_host(this, addrs)
|
|
for r in group_r:
|
|
match = r.match(this)
|
|
if match:
|
|
data_s.add_members_to_group(match.group(1), [this])
|
|
# Add all the groups from the datasource
|
|
for this, values in data_s.groups().items():
|
|
self.add_group(this, values)
|
|
|
|
def to_dict(self, ignore_nonex):
|
|
res = copy.deepcopy(self._data)
|
|
if self._args.addfile:
|
|
# Add content from a specified file
|
|
fd = open(self._args.addfile)
|
|
file_y = yaml.safe_load(fd)
|
|
res.update(file_y)
|
|
res = _resolve_roles(res, ignore_nonex)
|
|
return dict(cosmos = res)
|
|
|
|
|
|
def _dedup(data):
|
|
t = {}
|
|
for x in data:
|
|
t[x] = 1
|
|
return sorted(t.keys())
|
|
|
|
def _resolve_roles(data, ignore_nonex):
|
|
for groupname, values in data.get('roles').items():
|
|
# Roles look like this:
|
|
# monitor_server:
|
|
# groups: [nagios]
|
|
# hosts: [monitor.sunet.se]
|
|
#
|
|
groupname = groupname.lower()
|
|
if 'groups' not in values:
|
|
continue
|
|
# merge 'hosts' and 'addrs' from all the referred groups
|
|
addrs = []
|
|
hosts = []
|
|
for g in values.get('groups', []):
|
|
if g not in data['groups']:
|
|
if ignore_nonex:
|
|
continue
|
|
raise ValueError('While resolving role {!r}, could not find group {!r} in groups: {}'.format(
|
|
groupname, g, list(data['groups'].keys())))
|
|
addrs = addrs + data['groups'][g]['addrs']
|
|
hosts = hosts + data['groups'][g]['hosts']
|
|
for h in values.get('hosts', []):
|
|
if h not in data['hosts']:
|
|
if ignore_nonex:
|
|
continue
|
|
raise ValueError('While resolving role {!r}, could not find host {!r}'.format(
|
|
groupname, h))
|
|
addrs = addrs + data['hosts'][h]['addrs']
|
|
hosts = hosts + [h]
|
|
# While structured data is nice, Puppet code like
|
|
# $facts['cosmos']['roles']['frontend_servers']['hosts']
|
|
# would be pretty tedious to maintain. Turn the output of
|
|
# role resolving (which is what is expected would be used most) into
|
|
# $facts['cosmos']['frontend_server_hosts']
|
|
# $facts['cosmos']['frontend_server_addrs']
|
|
# instead.
|
|
data[groupname + '_hosts'] = _dedup(hosts)
|
|
data[groupname + '_addrs'] = _dedup(addrs)
|
|
|
|
return data
|
|
|
|
def _make_header(data, args):
|
|
""" Make a header with the three hyphens indicating YAML, and some comments. """
|
|
roles = {}
|
|
groups = {}
|
|
|
|
# Make hash of roles -> groups and groups -> roles
|
|
for role, values in data['roles'].items():
|
|
for g in values.get('groups', []):
|
|
if role not in roles:
|
|
roles[role] = []
|
|
roles[role] += [g]
|
|
|
|
if g not in groups:
|
|
groups[g] = []
|
|
groups[g] += [role]
|
|
|
|
res = ['#',
|
|
'# This file was created by {}.'.format(sys.argv[0]),
|
|
'#',
|
|
'# {} roles defined in {}'.format(len(data['roles']), args.roles),
|
|
'# {} groups formed in the current Cosmos environment'.format(len(data['groups'])),
|
|
'#',
|
|
'# Roles <- groups:',
|
|
]
|
|
for role, _groups in sorted(roles.items()):
|
|
res += ['# {:20s} <- {}'.format(role, ', '.join(_dedup(_groups)))]
|
|
res += ['#',
|
|
'# Groups -> roles:',
|
|
]
|
|
for group, _roles in sorted(groups.items()):
|
|
res += ['# {:20s} -> {}'.format(group, ', '.join(_dedup(_roles)))]
|
|
|
|
unused = []
|
|
for g in data['groups'].keys():
|
|
if g not in groups:
|
|
unused += [g]
|
|
if unused:
|
|
res += ['#',
|
|
'# Unused groups: {}'.format(', '.join(sorted(unused))),
|
|
]
|
|
|
|
|
|
res += ['#',
|
|
'---',
|
|
'']
|
|
return '\n'.join(res)
|
|
|
|
def parse_args():
|
|
"""
|
|
Parse the command line arguments
|
|
"""
|
|
parser = cosmosdata.get_parser('Tool to create the "cosmos" fact',
|
|
['debug', 'dirs', 'outfile', 'hostsfile', 'addfile',
|
|
'classesfile'],
|
|
{})
|
|
parser.add_argument('--roles',
|
|
dest='roles',
|
|
default=None,
|
|
help='Input YAML file',
|
|
metavar='FILENAME',
|
|
)
|
|
|
|
for attr in ['hosts', 'groups', 'roles']:
|
|
parser.add_argument('--output_{}'.format(attr),
|
|
dest='output_{}'.format(attr),
|
|
action='store_true',
|
|
help='Output the "{}" section'.format(attr),
|
|
)
|
|
parser.add_argument('--ignore_nonexistent',
|
|
dest='ignore_nonex',
|
|
action='store_true',
|
|
help='Ignore non-existent data',
|
|
)
|
|
|
|
|
|
return parser.parse_args()
|
|
|
|
|
|
def main(args=None, data_in=None):
|
|
if args is None:
|
|
args = parse_args()
|
|
|
|
hosts = Hosts(data_in, args)
|
|
if args.dirs:
|
|
data_s = DNSResolver(args.dirs)
|
|
elif args.hostsfile:
|
|
data_s = HostsResolver(args.hostsfile)
|
|
else:
|
|
raise SyntaxError('Neither dirs nor hostsfile supplied')
|
|
|
|
if args.classesfile:
|
|
data_s.load_classes(args.classesfile)
|
|
|
|
hosts.load_datasource(data_s)
|
|
|
|
res = hosts.to_dict(ignore_nonex=args.ignore_nonex)
|
|
|
|
header = _make_header(res['cosmos'], args)
|
|
|
|
# To encourage use of roles, delete all this other data from the
|
|
# output unless specifically asked to keep them
|
|
for attr in ['hosts', 'groups', 'roles']:
|
|
if not getattr(args, 'output_' + attr) and attr in res['cosmos']:
|
|
del(res['cosmos'][attr])
|
|
else:
|
|
print('Keeping {}'.format(attr))
|
|
|
|
if args.outfile:
|
|
with open(args.outfile, 'w') as fd:
|
|
fd.write(header)
|
|
yaml.safe_dump(res, fd)
|
|
else:
|
|
print(header)
|
|
print(yaml.safe_dump(res))
|
|
|
|
return True
|
|
|
|
|
|
if __name__=='__main__':
|
|
try:
|
|
if main():
|
|
sys.exit(0)
|
|
sys.exit(1)
|
|
except KeyboardInterrupt:
|
|
pass
|