#!/usr/bin/env python3 # # Copyright 2017 SUNET. All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, are # permitted provided that the following conditions are met: # # 1. Redistributions of source code must retain the above copyright notice, this list of # conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright notice, this list # of conditions and the following disclaimer in the documentation and/or other materials # provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND # FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUNET OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF # ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # The views and conclusions contained in the software and documentation are those of the # authors and should not be interpreted as representing official policies, either expressed # or implied, of SUNET. # # Author : Fredrik Thulin # # # TL; DR: This script produces a Puppet fact called 'cosmos', with information # about hosts in the current Cosmos environment: # # $facts['cosmos']['mongodb_server_addrs'] = [130.242.130.220, 130.242.130.221, # '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd'] # # $facts['cosmos']['mongodb_server_hosts'] = [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se] # # # This script creates the $facts['cosmos'] Puppet fact. This fact holds # information about the Cosmos environment. To start with, this fact is # used to define roles of hosts in the Cosmos environment. # # An example to provide the merits for this fact, and how it works: # # eduID has the two applications 'signup' and 'dashboard'. They are # quite common and both have access to MongoDB, both register with # frontends etc. # # eduID had lots of Hiera data like this: (different values in test # and production, making it harder than necessary to deploy things # from testing to production): # # eduid_frontend_servers: # - fe-tug-1.eduid.se # - fe-fre-1.eduid.se # # eduid_frontend_ips: # - 130.242.131.3 # - 130.242.131.4 # - 2001:6b0:54:c4::3 # - 2001:6b0:54:c4::4 # # eduid_mongodb_servers: # - userdb-tug-1.eduid.se # - userdb-fre-1.eduid.se # - userdb-tug-2.eduid.se # # eduid_signup_haproxy_backends: # - signup-tug-1.eduid.se # #- signup-fre-1.eduid.se # # eduid_dashboard_haproxy_backends: # - dash-tug-1.eduid.se # - dash-fre-1.eduid.se # # eduid_signup_ips: # - 130.242.130.212 # signup-tug-1.eduid.se # - 2001:6b0:54:c3:5054:ff:fea0:1d4 # signup-tug-1.eduid.se # eduid_dashboard_ips: # - 130.242.130.213 # dash-fre-1.eduid.se # - 130.242.130.197 # dash-tug-1.eduid.se # - 2001:6b0:54:c3:5054:ff:fea0:1d5 # dash-fre-1.eduid.se # - 2001:6b0:54:c3:5054:ff:fea0:1c5 # dash-tug-1.eduid.se # # This probably looks like a lot, but unfortunately it is probably not # even a third of all such parameters that were necessary to accomplish # two applications behind load balancers and with access to a few # backend services such as MongoDB, redis, etcd and AMQP. # # Now, an improvement is to have a script (this) group hosts in # a Cosmos environment based on hostnames, and provide name-to-address # mappings in either Hiera or in a fact. # # For eduID, this can be implemented rather trivially since the naming # standard is pretty much function-site-number.eduid.se. For nunoc-ops # it is, with a couple of exceptions, functionnumber.domain. # # So, having automatic mappings of groups-of-hosts to addresses would # allow lots of improvements, but one level of indirection is missing - # services moves between hosts. eduID currently runs Redis and etcd on # the same hosts that run MongoDB, but it would not be ideal to bet on # that being true forever. Even better is to define roles that maps to # the automatically deduced groups, which in turn maps to hosts that # maps to addresses. # # As a starting point, this script is used with a roles-in.yaml file # containing just some roles-to-groups maps: # # --- # cosmos: # # roles: # mongodb_server: # groups: [userdb] # etcd_server: # groups: [userdb] # redis_server: # groups: [userdb] # app_server: # groups: [signup, dashboard] # frontend_server: # groups: [fe] # # # When this script runs (like this: # # ./scripts/cosmos-facts --dirs .eduid.se \ # --roles metadata/roles-in.yaml \ # --outfile global/overlay/etc/puppet/static-cosmos-facts.yaml) # # it will scan for all directories ending with '.eduid.se' in the Cosmos repository, # resolve the hosts using DNS and update the data with hosts and groups entrys like this # (data not in DNS _could_ be provided in the YAML file above, under cosmos['hosts']): # # --- # cosmos: # hosts: # userdb-fre-1.eduid.se: # addrs: [130.242.130.220, '2001:6b0:54:c3:5054:ff:fea0:1dc'] # userdb-tug-1.eduid.se: # addrs: [130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dd'] # ... # groups: # userdb: # addrs: [130.242.130.220, 130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dc', # '2001:6b0:54:c3:5054:ff:fea0:1dd'] # hosts: [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se] # ... # # The paramount use cases for this information in all our Puppet manifests # is to do something (update firewall rules, write configuration files) with lists # of IP addresses or hostnames. While making (sorted) lists of hostnames and addresses # under the 'roles' section would be beautiful from a programmers perspective: # # $facts['cosmos']['roles']['mongodb_server']['hosts'] = ['userdb-fre-1.eduid.se', ...] # # it feels like it would be rather tedious to write and maintain such Puppet manifests. # As a compromise, the data is still structurally under the 'cosmos' key but made # available in the more palatable form: # # $facts['cosmos']['mongodb_server_addrs'] = ['130.242.130.220', '130.242.130.221', # '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd'] # # $facts['cosmos']['mongodb_server_hosts'] = ['userdb-fre-1.eduid.se', 'userdb-tug-1.eduid.se'] # # To really encourage the use of roles rather than hosts/groups directly, the # top-level 'cosmos' keys 'hosts', 'groups' and 'roles' are emptied before this script # writes it's output unless the --output_hosts, --output_groups and --output_roles # arguments are provided. Please don't use them ;). # import re import sys import copy import yaml import cosmosdata from cosmosdata import DNSResolver, HostsResolver DEFAULT_REGEXPS = [re.compile('(.+?)-'), # eduID style, function-site-digit re.compile('([a-z]+)\d'), # nunoc-ops style, functiondigit ] class Hosts(object): def __init__(self, data_in, args): self._data = {} if data_in is None else data_in self._data.update({'hosts': {}, 'groups': {}, }) self._args = args if args.roles: with open(args.roles) as fd: file_y = yaml.safe_load(fd) self._data.update(file_y.get('cosmos')) @property def hosts(self): return self._data['hosts'] @property def groups(self): return self._data['groups'] def add_host(self, hostname, addrs): if hostname not in self.hosts: self.hosts[hostname] = {} prev_addrs = self.hosts[hostname].get('addrs', []) self.hosts[hostname]['addrs'] = _dedup(addrs + prev_addrs) def add_group(self, groupname, data): # rename 'members' to 'hosts' data['hosts'] = data.pop('members') self.groups.update({groupname: data}) # Make a list of all the addresses of the hosts if 'addrs' not in self.groups[groupname]: self.groups[groupname]['addrs'] = [] for this in self.groups[groupname]['hosts']: addrs = self.groups[groupname]['addrs'] if this in self.hosts: addrs += self.hosts[this]['addrs'] self.groups[groupname]['addrs'] = _dedup(addrs) def load_datasource(self, data_s): """ :type data_s: cosmosdata.DataSource :return: """ # Load all the hosts if '_regexps' in self.groups: group_r = [re.compile(x) for x in self.groups.pop('_regexps')] else: group_r = [re.compile(x) for x in DEFAULT_REGEXPS] for this in data_s.all_hosts(): this = this.lower() addrs = data_s.lookup(this) if not addrs: continue self.add_host(this, addrs) for r in group_r: match = r.match(this) if match: data_s.add_members_to_group(match.group(1), [this]) # Add all the groups from the datasource for this, values in data_s.groups().items(): self.add_group(this, values) def to_dict(self, ignore_nonex): res = copy.deepcopy(self._data) if self._args.addfile: # Add content from a specified file fd = open(self._args.addfile) file_y = yaml.safe_load(fd) res.update(file_y) res = _resolve_roles(res, ignore_nonex) return dict(cosmos = res) def _dedup(data): t = {} for x in data: t[x] = 1 return sorted(t.keys()) def _resolve_roles(data, ignore_nonex): for groupname, values in data.get('roles').items(): # Roles look like this: # monitor_server: # groups: [nagios] # hosts: [monitor.sunet.se] # groupname = groupname.lower() if 'groups' not in values: continue # merge 'hosts' and 'addrs' from all the referred groups addrs = [] hosts = [] for g in values.get('groups', []): if g not in data['groups']: if ignore_nonex: continue raise ValueError('While resolving role {!r}, could not find group {!r} in groups: {}'.format( groupname, g, list(data['groups'].keys()))) addrs = addrs + data['groups'][g]['addrs'] hosts = hosts + data['groups'][g]['hosts'] for h in values.get('hosts', []): if h not in data['hosts']: if ignore_nonex: continue raise ValueError('While resolving role {!r}, could not find host {!r}'.format( groupname, h)) addrs = addrs + data['hosts'][h]['addrs'] hosts = hosts + [h] # While structured data is nice, Puppet code like # $facts['cosmos']['roles']['frontend_servers']['hosts'] # would be pretty tedious to maintain. Turn the output of # role resolving (which is what is expected would be used most) into # $facts['cosmos']['frontend_server_hosts'] # $facts['cosmos']['frontend_server_addrs'] # instead. data[groupname + '_hosts'] = _dedup(hosts) data[groupname + '_addrs'] = _dedup(addrs) return data def _make_header(data, args): """ Make a header with the three hyphens indicating YAML, and some comments. """ roles = {} groups = {} # Make hash of roles -> groups and groups -> roles for role, values in data['roles'].items(): for g in values.get('groups', []): if role not in roles: roles[role] = [] roles[role] += [g] if g not in groups: groups[g] = [] groups[g] += [role] res = ['#', '# This file was created by {}.'.format(sys.argv[0]), '#', '# {} roles defined in {}'.format(len(data['roles']), args.roles), '# {} groups formed in the current Cosmos environment'.format(len(data['groups'])), '#', '# Roles <- groups:', ] for role, _groups in sorted(roles.items()): res += ['# {:20s} <- {}'.format(role, ', '.join(_dedup(_groups)))] res += ['#', '# Groups -> roles:', ] for group, _roles in sorted(groups.items()): res += ['# {:20s} -> {}'.format(group, ', '.join(_dedup(_roles)))] unused = [] for g in data['groups'].keys(): if g not in groups: unused += [g] if unused: res += ['#', '# Unused groups: {}'.format(', '.join(sorted(unused))), ] res += ['#', '---', ''] return '\n'.join(res) def parse_args(): """ Parse the command line arguments """ parser = cosmosdata.get_parser('Tool to create the "cosmos" fact', ['debug', 'dirs', 'outfile', 'hostsfile', 'addfile', 'classesfile'], {}) parser.add_argument('--roles', dest='roles', default=None, help='Input YAML file', metavar='FILENAME', ) for attr in ['hosts', 'groups', 'roles']: parser.add_argument('--output_{}'.format(attr), dest='output_{}'.format(attr), action='store_true', help='Output the "{}" section'.format(attr), ) parser.add_argument('--ignore_nonexistent', dest='ignore_nonex', action='store_true', help='Ignore non-existent data', ) return parser.parse_args() def main(args=None, data_in=None): if args is None: args = parse_args() hosts = Hosts(data_in, args) if args.dirs: data_s = DNSResolver(args.dirs) elif args.hostsfile: data_s = HostsResolver(args.hostsfile) else: raise SyntaxError('Neither dirs nor hostsfile supplied') if args.classesfile: data_s.load_classes(args.classesfile) hosts.load_datasource(data_s) res = hosts.to_dict(ignore_nonex=args.ignore_nonex) header = _make_header(res['cosmos'], args) # To encourage use of roles, delete all this other data from the # output unless specifically asked to keep them for attr in ['hosts', 'groups', 'roles']: if not getattr(args, 'output_' + attr) and attr in res['cosmos']: del(res['cosmos'][attr]) else: print('Keeping {}'.format(attr)) if args.outfile: with open(args.outfile, 'w') as fd: fd.write(header) yaml.safe_dump(res, fd) else: print(header) print(yaml.safe_dump(res)) return True if __name__=='__main__': try: if main(): sys.exit(0) sys.exit(1) except KeyboardInterrupt: pass