#!/usr/bin/env python3
#
# Copyright 2017 SUNET. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without modification, are
# permitted provided that the following conditions are met:
#
#    1. Redistributions of source code must retain the above copyright notice, this list of
#       conditions and the following disclaimer.
#
#    2. Redistributions in binary form must reproduce the above copyright notice, this list
#       of conditions and the following disclaimer in the documentation and/or other materials
#       provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY SUNET ``AS IS'' AND ANY EXPRESS OR IMPLIED
# WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
# FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUNET OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those of the
# authors and should not be interpreted as representing official policies, either expressed
# or implied, of SUNET.
#
# Author : Fredrik Thulin <fredrik@thulin.net>
#

#
# TL; DR: This script produces a Puppet fact called 'cosmos', with information
# about hosts in the current Cosmos environment:
#
#   $facts['cosmos']['mongodb_server_addrs'] = [130.242.130.220, 130.242.130.221,
#     '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd']
#
#   $facts['cosmos']['mongodb_server_hosts'] = [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se]
#

#
# This script creates the $facts['cosmos'] Puppet fact. This fact holds
# information about the Cosmos environment. To start with, this fact is
# used to define roles of hosts in the Cosmos environment.
#
# An example to provide the merits for this fact, and how it works:
#
#   eduID has the two applications 'signup' and 'dashboard'. They are
#   quite common and both have access to MongoDB, both register with
#   frontends etc.
#
#   eduID had lots of Hiera data like this: (different values in test
#   and production, making it harder than necessary to deploy things
#   from testing to production):
#
#     eduid_frontend_servers:
#       - fe-tug-1.eduid.se
#       - fe-fre-1.eduid.se
#
#     eduid_frontend_ips:
#       - 130.242.131.3
#       - 130.242.131.4
#       - 2001:6b0:54:c4::3
#       - 2001:6b0:54:c4::4
#
#     eduid_mongodb_servers:
#       - userdb-tug-1.eduid.se
#       - userdb-fre-1.eduid.se
#       - userdb-tug-2.eduid.se
#
#     eduid_signup_haproxy_backends:
#       - signup-tug-1.eduid.se
#       #- signup-fre-1.eduid.se
#
#     eduid_dashboard_haproxy_backends:
#       - dash-tug-1.eduid.se
#       - dash-fre-1.eduid.se
#
#     eduid_signup_ips:
#       - 130.242.130.212                     # signup-tug-1.eduid.se
#       - 2001:6b0:54:c3:5054:ff:fea0:1d4     # signup-tug-1.eduid.se
#     eduid_dashboard_ips:
#       - 130.242.130.213                     # dash-fre-1.eduid.se
#       - 130.242.130.197                     # dash-tug-1.eduid.se
#       - 2001:6b0:54:c3:5054:ff:fea0:1d5     # dash-fre-1.eduid.se
#       - 2001:6b0:54:c3:5054:ff:fea0:1c5     # dash-tug-1.eduid.se
#
# This probably looks like a lot, but unfortunately it is probably not
# even a third of all such parameters that were necessary to accomplish
# two applications behind load balancers and with access to a few
# backend services such as MongoDB, redis, etcd and AMQP.
#
# Now, an improvement is to have a script (this) group hosts in
# a Cosmos environment based on hostnames, and provide name-to-address
# mappings in either Hiera or in a fact.
#
# For eduID, this can be implemented rather trivially since the naming
# standard is pretty much function-site-number.eduid.se. For nunoc-ops
# it is, with a couple of exceptions, functionnumber.domain.
#
# So, having automatic mappings of groups-of-hosts to addresses would
# allow lots of improvements, but one level of indirection is missing -
# services moves between hosts. eduID currently runs Redis and etcd on
# the same hosts that run MongoDB, but it would not be ideal to bet on
# that being true forever. Even better is to define roles that maps to
# the automatically deduced groups, which in turn maps to hosts that
# maps to addresses.
#
# As a starting point, this script is used with a roles-in.yaml file
# containing just some roles-to-groups maps:
#
#   ---
#   cosmos:
#
#     roles:
#       mongodb_server:
#         groups: [userdb]
#       etcd_server:
#         groups: [userdb]
#       redis_server:
#         groups: [userdb]
#       app_server:
#         groups: [signup, dashboard]
#      frontend_server:
#         groups: [fe]
#
#
# When this script runs (like this:
#
#    ./scripts/cosmos-facts --dirs .eduid.se \
#        --roles metadata/roles-in.yaml \
#        --outfile global/overlay/etc/puppet/static-cosmos-facts.yaml)
#
# it will scan for all directories ending with '.eduid.se' in the Cosmos repository,
# resolve the hosts using DNS and update the data with hosts and groups entrys like this
# (data not in DNS _could_ be provided in the YAML file above, under cosmos['hosts']):
#
# ---
# cosmos:
#   hosts:
#     userdb-fre-1.eduid.se:
#       addrs: [130.242.130.220, '2001:6b0:54:c3:5054:ff:fea0:1dc']
#     userdb-tug-1.eduid.se:
#       addrs: [130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dd']
#     ...
#   groups:
#     userdb:
#       addrs: [130.242.130.220, 130.242.130.221, '2001:6b0:54:c3:5054:ff:fea0:1dc',
#         '2001:6b0:54:c3:5054:ff:fea0:1dd']
#       hosts: [userdb-fre-1.eduid.se, userdb-tug-1.eduid.se]
#     ...
#
# The paramount use cases for this information in all our Puppet manifests
# is to do something (update firewall rules, write configuration files) with lists
# of IP addresses or hostnames. While making (sorted) lists of hostnames and addresses
# under the 'roles' section would be beautiful from a programmers perspective:
#
#   $facts['cosmos']['roles']['mongodb_server']['hosts'] = ['userdb-fre-1.eduid.se', ...]
#
# it feels like it would be rather tedious to write and maintain such Puppet manifests.
# As a compromise, the data is still structurally under the 'cosmos' key but made
# available in the more palatable form:
#
#   $facts['cosmos']['mongodb_server_addrs'] = ['130.242.130.220', '130.242.130.221',
#     '2001:6b0:54:c3:5054:ff:fea0:1dc', '2001:6b0:54:c3:5054:ff:fea0:1dd']
#
#   $facts['cosmos']['mongodb_server_hosts'] = ['userdb-fre-1.eduid.se', 'userdb-tug-1.eduid.se']
#
# To really encourage the use of roles rather than hosts/groups directly, the
# top-level 'cosmos' keys 'hosts', 'groups' and 'roles' are emptied before this script
# writes it's output unless the --output_hosts, --output_groups and --output_roles
# arguments are provided. Please don't use them ;).
#


import re
import sys
import copy
import yaml


import cosmosdata
from cosmosdata import DNSResolver, HostsResolver


DEFAULT_REGEXPS = [re.compile('(.+?)-'),      # eduID style, function-site-digit
                   re.compile('([a-z]+)\d'),  # nunoc-ops style, functiondigit
                   ]


class Hosts(object):

    def __init__(self, data_in, args):
        self._data = {} if data_in is None else data_in

        self._data.update({'hosts': {},
                           'groups': {},
                           })

        self._args = args

        if args.roles:
            with open(args.roles) as fd:
                file_y = yaml.safe_load(fd)
                self._data.update(file_y.get('cosmos'))

    @property
    def hosts(self):
        return self._data['hosts']

    @property
    def groups(self):
        return self._data['groups']

    def add_host(self, hostname, addrs):
        if hostname not in self.hosts:
            self.hosts[hostname] = {}
        prev_addrs = self.hosts[hostname].get('addrs', [])
        self.hosts[hostname]['addrs'] = _dedup(addrs + prev_addrs)

    def add_group(self, groupname, data):
        # rename 'members' to 'hosts'
        data['hosts'] = data.pop('members')
        self.groups.update({groupname: data})
        # Make a list of all the addresses of the hosts
        if 'addrs' not in self.groups[groupname]:
            self.groups[groupname]['addrs'] = []
        for this in self.groups[groupname]['hosts']:
            addrs = self.groups[groupname]['addrs']
            if this in self.hosts:
                addrs += self.hosts[this]['addrs']
            self.groups[groupname]['addrs'] = _dedup(addrs)

    def load_datasource(self, data_s):
        """
        :type data_s: cosmosdata.DataSource
        :return:
        """
        # Load all the hosts
        if '_regexps' in self.groups:
            group_r = [re.compile(x) for x in self.groups.pop('_regexps')]
        else:
            group_r =  [re.compile(x) for x in DEFAULT_REGEXPS]
        for this in data_s.all_hosts():
            this = this.lower()
            addrs = data_s.lookup(this)
            if not addrs:
                continue
            self.add_host(this, addrs)
            for r in group_r:
                match = r.match(this)
                if match:
                    data_s.add_members_to_group(match.group(1), [this])
        # Add all the groups from the datasource
        for this, values in data_s.groups().items():
            self.add_group(this, values)

    def to_dict(self, ignore_nonex):
        res = copy.deepcopy(self._data)
        if self._args.addfile:
            # Add content from a specified file
            fd = open(self._args.addfile)
            file_y = yaml.safe_load(fd)
            res.update(file_y)
        res = _resolve_roles(res, ignore_nonex)
        return dict(cosmos = res)


def _dedup(data):
    t = {}
    for x in data:
        t[x] = 1
    return sorted(t.keys())

def _resolve_roles(data, ignore_nonex):
    for groupname, values in data.get('roles').items():
        # Roles look like this:
        #    monitor_server:
        #      groups: [nagios]
        #      hosts: [monitor.sunet.se]
        #
        groupname = groupname.lower()
        if 'groups' not in values:
            continue
        # merge 'hosts' and 'addrs' from all the referred groups
        addrs = []
        hosts = []
        for g in values.get('groups', []):
            if g not in data['groups']:
                if ignore_nonex:
                    continue
                raise ValueError('While resolving role {!r}, could not find group {!r} in groups: {}'.format(
                    groupname, g, list(data['groups'].keys())))
            addrs = addrs + data['groups'][g]['addrs']
            hosts = hosts + data['groups'][g]['hosts']
        for h in values.get('hosts', []):
            if h not in data['hosts']:
                if ignore_nonex:
                    continue
                raise ValueError('While resolving role {!r}, could not find host {!r}'.format(
                    groupname, h))
            addrs = addrs + data['hosts'][h]['addrs']
            hosts = hosts + [h]
        # While structured data is nice, Puppet code like
        #   $facts['cosmos']['roles']['frontend_servers']['hosts']
        # would be pretty tedious to maintain. Turn the output of
        # role resolving (which is what is expected would be used most) into
        #   $facts['cosmos']['frontend_server_hosts']
        #   $facts['cosmos']['frontend_server_addrs']
        # instead.
        data[groupname + '_hosts'] = _dedup(hosts)
        data[groupname + '_addrs'] = _dedup(addrs)

    return data

def _make_header(data, args):
    """ Make a header with the three hyphens indicating YAML, and some comments. """
    roles = {}
    groups = {}

    # Make hash of roles -> groups and groups -> roles
    for role, values in data['roles'].items():
        for g in values.get('groups', []):
            if role not in roles:
                roles[role] = []
            roles[role] += [g]

            if g not in groups:
                groups[g] = []
            groups[g] += [role]

    res = ['#',
           '# This file was created by {}.'.format(sys.argv[0]),
           '#',
           '# {} roles defined in {}'.format(len(data['roles']), args.roles),
           '# {} groups formed in the current Cosmos environment'.format(len(data['groups'])),
           '#',
           '# Roles <- groups:',
           ]
    for role, _groups in sorted(roles.items()):
        res += ['#    {:20s} <- {}'.format(role, ', '.join(_dedup(_groups)))]
    res += ['#',
            '# Groups -> roles:',
            ]
    for group, _roles in sorted(groups.items()):
        res += ['#    {:20s} -> {}'.format(group, ', '.join(_dedup(_roles)))]

    unused = []
    for g in data['groups'].keys():
        if g not in groups:
            unused += [g]
    if unused:
        res += ['#',
                '# Unused groups: {}'.format(', '.join(sorted(unused))),
                ]


    res += ['#',
            '---',
            '']
    return '\n'.join(res)

def parse_args():
    """
    Parse the command line arguments
    """
    parser = cosmosdata.get_parser('Tool to create the "cosmos" fact',
                                  ['debug', 'dirs', 'outfile', 'hostsfile', 'addfile',
                                   'classesfile'],
                                  {})
    parser.add_argument('--roles',
                        dest='roles',
                        default=None,
                        help='Input YAML file',
                        metavar='FILENAME',
                        )

    for attr in ['hosts', 'groups', 'roles']:
        parser.add_argument('--output_{}'.format(attr),
                            dest='output_{}'.format(attr),
                            action='store_true',
                            help='Output the "{}" section'.format(attr),
                            )
    parser.add_argument('--ignore_nonexistent',
                        dest='ignore_nonex',
                        action='store_true',
                        help='Ignore non-existent data',
                        )


    return parser.parse_args()


def main(args=None, data_in=None):
    if args is None:
        args = parse_args()

    hosts = Hosts(data_in, args)
    if args.dirs:
        data_s = DNSResolver(args.dirs)
    elif args.hostsfile:
        data_s = HostsResolver(args.hostsfile)
    else:
        raise SyntaxError('Neither dirs nor hostsfile supplied')

    if args.classesfile:
        data_s.load_classes(args.classesfile)

    hosts.load_datasource(data_s)

    res = hosts.to_dict(ignore_nonex=args.ignore_nonex)

    header = _make_header(res['cosmos'], args)

    # To encourage use of roles, delete all this other data from the
    # output unless specifically asked to keep them
    for attr in ['hosts', 'groups', 'roles']:
        if not getattr(args, 'output_' + attr) and attr in res['cosmos']:
            del(res['cosmos'][attr])
        else:
            print('Keeping {}'.format(attr))

    if args.outfile:
        with open(args.outfile, 'w') as fd:
            fd.write(header)
            yaml.safe_dump(res, fd)
    else:
        print(header)
        print(yaml.safe_dump(res))

    return True


if __name__=='__main__':
    try:
        if main():
            sys.exit(0)
        sys.exit(1)
    except KeyboardInterrupt:
        pass