diff --git a/Makefile b/Makefile index 464dcf58..a284f955 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,16 @@ +DIST := "ubuntu:latest" + cosmos: - fab all cosmos + fab all cosmos upgrade: fab upgrade -db: - @python3 ./fabfile/db.py > global/overlay/etc/puppet/cosmos-db.yaml - @git add global/overlay/etc/puppet/cosmos-db.yaml && git commit -m "update db" global/overlay/etc/puppet/cosmos-db.yaml - -tag: db +tag: ./bump-tag + +test_in_docker: + docker run --rm -it \ + -v ${CURDIR}:/multiverse:ro \ + \ + $(DIST) /multiverse/scripts/test-in-docker.sh diff --git a/addhost b/addhost index 92f9b98e..883d1728 100755 --- a/addhost +++ b/addhost @@ -1,57 +1,69 @@ -#!/bin/sh +#!/bin/bash cmd_hostname="" cmd_do_bootstrap="no" cmd_fqdn="" -set -- $(getopt b?h?n: "$@") +function usage() { + echo "Usage: $0 [-h] [-b] [-n fqdn] [--] []" + echo " -h show help" + echo " -b bootstrap (using ssh)" + echo " -n specify FQDN (if not the same as )" + echo "" + echo " can be an IP number, or something that resolves to one" +} -while [ $# -gt 0 ]; do - case "$1" in - (-h) echo "Usage: $0 [-h] [-b] [--] []"; exit 0;; - (-b) cmd_do_bootstrap="yes" ;; - (-n) cmd_fqdn="$2" ; shift ;; - (--) shift; break;; - (-*) echo "Unknown option $1\nUsage: $0 [-b] [-h] [-n fqdn] [--] "; exit 1;; - (*) break;; +while getopts "bhnp:" this; do + case "${this}" in + h) usage; exit 0;; + b) cmd_do_bootstrap="yes" ;; + n) cmd_fqdn="${OPTARG}" ; shift ;; + p) cmd_proxy="${OPTARG}" ; shift ;; + *) echo "Unknown option ${this}"; echo ""; usage; exit 1;; esac - shift done +shift $((OPTIND-1)) -if [ ! -z "$1" -a -z "$cmd_hostname" ]; then +if [[ ! $cmd_hostname ]]; then cmd_hostname="$1" fi -if [ ! -z "$cmd_hostname" -a -z "$cmd_fqdn" ]; then +if [[ ! $cmd_fqdn ]]; then cmd_fqdn="$cmd_hostname" fi if test -z "$cmd_hostname"; then - echo "Usage: $0 [-h] [-b] [-n fqdn] [--] " + usage exit 1 fi +if [[ -n $cmd_proxy ]]; then + proxyjump="-o ProxyJump=${cmd_proxy}" +fi + test -f cosmos.conf && . ./cosmos.conf -defrepo=`git remote -v | grep ${remote:="ro"} | grep fetch | awk '{print $2}'` +_remote=${remote:='ro'} +defrepo=$(git remote get-url "${_remote}" 2>/dev/null) rrepo=${repo:="$defrepo"} rtag=${tag:="changeme"} -if [ "x$rrepo" = "x" ]; then - echo "$0: repo not set in cosmos.conf and no git remote named 'ro' found" +if [[ ! $rrepo ]]; then + echo "$0: repo not set in cosmos.conf and no git remote named '${_remote}' found" exit 1 fi -if [ ! -d $cmd_hostname ]; then - cp -pr default $cmd_fqdn - git add $cmd_fqdn - git commit -m "$cmd_fqdn added" $cmd_fqdn +if [ ! -d "$cmd_fqdn" ]; then + cp -pr default "$cmd_fqdn" + git add "$cmd_fqdn" + git commit -m "$cmd_fqdn added" "$cmd_fqdn" ./bump-tag fi if [ "$cmd_do_bootstrap" = "yes" ]; then - scp apt/cosmos_1.5-1_all.deb apt/bootstrap-cosmos.sh root@$cmd_hostname: - ssh root@$cmd_hostname ./bootstrap-cosmos.sh $cmd_fqdn $rrepo $rtag - ssh root@$cmd_hostname cosmos -v update - ssh root@$cmd_hostname cosmos -v apply + cosmos_deb=$(find apt/ -maxdepth 1 -name 'cosmos_*.deb' | sort -V | tail -1) + scp $proxyjump "$cosmos_deb" apt/bootstrap-cosmos.sh root@"$cmd_hostname": + ssh root@"$cmd_hostname" $proxyjump ./bootstrap-cosmos.sh "$cmd_fqdn" "$rrepo" "$rtag" + ssh root@"$cmd_hostname" $proxyjump cosmos update + ssh root@"$cmd_hostname" $proxyjump cosmos apply fi diff --git a/bump-tag b/bump-tag index aed35e24..93522450 100755 --- a/bump-tag +++ b/bump-tag @@ -1,46 +1,275 @@ -#!/bin/sh +#!/bin/bash -set -e - -test -f cosmos.conf && . ./cosmos.conf - -git pull - -deftag=`basename $PWD` -tagpfx=${tag:="$deftag"} - -last_tag=`git tag -l "${tagpfx}-*"|sort|tail -1` - -echo "Verifying last tag $last_tag:" -(git tag -v $last_tag | grep ^gpg:) || true -# again to not mask exit status of git with grep -git tag -v $last_tag > /dev/null 2>&1 +echo "Fetching updates from $(git remote get-url origin) ..." echo "" +if ! git pull --verify-signatures; then + echo "WARNING: git pull did not exit successfully." + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 +fi -echo "Differences between tag $last_tag and what you are about to sign:" -PAGER=cat git diff $last_tag..master +if [[ -f ./cosmos.conf ]]; then + # shellcheck disable=SC1091 + source ./cosmos.conf +fi +# A tab will be used in multiple commands for git +t=$'\t' + +# Set the default tag according to the repo +# or by entering a name as the first argument. +if [[ -z "${1}" ]]; then + deftag="$(basename "${PWD}")" +else + deftag="${1}" +fi + +# Set the tag prefix according to: +# 1. $tag, if specified in cosmos.conf, +# 2. or $deftag, as specified above. +# shellcheck disable=SC2154 +if [[ -n "${tag}" ]]; then + tagpfx="${tag}" +else + tagpfx="${deftag}" +fi + +# This is the current branch that Git will diff against. +this_branch=$(git rev-parse --abbrev-ref HEAD) + +# Check why the tag couldn't be verified +# First argument: the tag to investigate +check_tag_sig_failure() +{ + local __tag_to_check="${1}" + + # shellcheck disable=SC2155 + local __verify_tag_output="$(git verify-tag --raw "${__tag_to_check}" 2>&1)" + + if echo "${__verify_tag_output}" | grep -q "VALIDSIG"; then + + if echo "${__verify_tag_output}" | grep -q "EXPKEYSIG"; then + + echo "" + echo "WARNING: The tag was correctly signed, but the copy of" + echo "the key that you have stored on your computer has expired." + echo "Check for an updated key in:" + echo "global/overlay/etc/cosmos/keys/" + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 + + else + + echo "" + echo "WARNING: The tag was probably correctly signed," + echo "but it still didn't pass the verification check." + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 + + fi + + else + + echo "" + echo "WARNING: The signature of the tag could not be verified." + echo "Please make sure that you have imported the key and that" + echo "the key is signed by a trusted party." + echo "Keys used for signing in a Cosmos repo can be found at:" + echo "global/overlay/etc/cosmos/keys/" + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 + + fi +} + +check_commit_sig_failure() +{ + local __commit_to_check="${1}" + local __file_related_to_commit="${2}" + + # shellcheck disable=SC2155 + local __verify_commit_output="$(git verify-commit --raw "${__commit_to_check}" 2>&1)" + + if echo "${__verify_commit_output}" | grep -q "VALIDSIG"; then + + if echo "${__verify_commit_output}" | grep -q "EXPKEYSIG"; then + + echo "WARNING: The commit to ${__file_related_to_commit}" + echo "was correctly signed, but the copy of the key that" + echo "you have stored on your computer has expired." + echo "Check for an updated key in:" + echo "global/overlay/etc/cosmos/keys/" + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 + + else + + echo "WARNING: The commit to ${__file_related_to_commit}" + echo "was probably correctly signed, but it still didn't" + echo "pass the verification check." + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 + + fi + + else + + echo "WARNING: The commit to ${__file_related_to_commit}" + echo "could not be verified. Please make sure that you have" + echo "imported the key and that the key is signed by a trusted party." + echo "" + echo "EXITING the script. In order to tag your changes," + echo "investigate and then run bump-tag again." + exit 1 + + fi +} + +# Verify the last commit of a file +# First argument: the file to verify +verify_last_commit() +{ + local __file_to_verify="${1}" + + if [[ ! -f "${__file_to_verify}" ]]; then + return 1 + fi + + if [[ -n "$(git status --porcelain "${__file_to_verify}")" ]]; then + echo "" + echo "INFO: local changes detected in ${__file_to_verify}," + echo "Not checking the signature of the last commit to ${__file_to_verify}." + echo "" + return 1 + fi + + # shellcheck disable=SC2155 + local __last_commit="$(git log -n 1 --pretty=format:%H -- "${__file_to_verify}")" + + if ! git verify-commit "${__last_commit}" 2> /dev/null; then + echo "" + echo "WARNING: Untrusted modification to ${__file_to_verify}:" + echo "----------------------------" + git verify-commit "$(git log -n 1 --pretty=format:%H -- "${__file_to_verify}")" + echo "----------------------------" + + check_commit_sig_failure "${__last_commit}" "${__file_to_verify}" + fi +} + +tag_list="$(git tag -l "${tagpfx}-*")" +# shellcheck disable=SC2181 +if [[ ${?} -ne 0 ]] || [[ -z "${tag_list}" ]]; then + + if [[ -z ${ALLOW_UNSIGNED_COMMITS_WITHOUT_TAGS} ]]; then + echo "No tags found, verifying all commits instead." + echo "Please set environment variable ALLOW_UNSIGNED_COMMITS_WITHOUT_TAGS if you want to disable this check." + # %H = commit hash + # %G? = show "G" for a good (valid) signature + git_log="$(git log --pretty="format:%H${t}%G?" \ + --first-parent \ + | grep -v "${t}G$")" + fi + +else + + last_tag="$(echo "${tag_list}" | sort | tail -1)" + echo "Verifying last tag: ${last_tag} and the commits after that" + + if ! git verify-tag "${last_tag}"; then + check_tag_sig_failure "${last_tag}" + fi + + tag_object="$(git verify-tag -v "${last_tag}" 2>&1 | grep ^object | cut -d' ' -f2)" + + # The commits after the last valid signed git tag that we need to check + revision_range="${tag_object}..HEAD" + + # Filter out the commits that are unsigned or untrusted + # %H = commit hash + # %G? = show "G" for a good (valid) signature + git_log="$(git log --pretty="format:%H${t}%G?" "${revision_range}" \ + --first-parent \ + | grep -v "${t}G$")" + +fi + +if [[ -n "${git_log}" ]]; then + echo "" + echo -e "------WARNING: unsigned or untrusted commits after the last tag------" + echo "${git_log}" + echo -e "---------------------------------------------------------------------" + echo "Quick referens on how to configure signing of commits in ~/.gitconfig:" + echo "[user]" + echo " signingkey = your-prefered-key-id" + echo "[commit]" + echo " gpgsign = true" + echo "" + echo "EXITING the script. In order to tag your changes," + echo "please make sure that you have configured signing of" + echo "your own commits and that the listed unsigned commits" + echo "have been made by a trusted party and are not malicous." + exit 1 +fi + +# Always check that the last commit of certain +# sensitive files is trusted, without taking into +# account whether the last tag was trusted or not. +verify_last_commit "./scripts/jsonyaml-no-output.py" +verify_last_commit "./bump-tag" + +# Test the syntax of each YAML-file to be tagged. +for file in $(git diff --name-only "${last_tag}..${this_branch}" | grep -E "^.*\.(yaml|yml)$"); do + if [[ -f "${file}" ]]; then + ./scripts/jsonyaml-no-output.py yaml "${file}" + fi +done + +echo "Differences between tag ${last_tag} and what you are about to sign:" +# With PAGER=cat, git diff will simply dump the output to the screen. +# shellcheck disable=SC2037 +PAGER="cat" git diff --color "${last_tag}..${this_branch}" + +# Iterate over the $last_tag until $this_tag is set to a later version iter=1 ok= -while test -z "$ok"; do - this_tag=$(date +${tagpfx}-%Y-%m-%d-v`printf "%02d" $iter`) - iter=`expr $iter + 1` - case `(echo $this_tag; echo $last_tag) | sort | tail -1` in - $last_tag) - ;; - $this_tag) - ok=yes - ;; +while [[ -z "${ok}" ]]; do + this_tag="$(date +"${tagpfx}-%Y-%m-%d-v$(printf "%02d" "${iter}")")" + iter="$(( iter + 1))" + + case "$( (echo "${this_tag}"; echo "${last_tag}") | sort | tail -1 )" in + "${last_tag}") + ;; + "${this_tag}") + ok=yes + ;; esac done -echo "" -echo "Using new tag $this_tag" -echo ONLY SIGN IF YOU APPROVE OF VERIFICATION AND DIFF ABOVE +if [[ "${deftag}" != "${tagpfx}" ]]; then + echo -e "Using new tag \e[94m${this_tag}\e[0m according to pattern in cosmos.conf" +else + echo -e "Using new tag \e[94m${this_tag}\e[0m" +fi + +echo -e "\e[1mONLY SIGN IF YOU APPROVE OF VERIFICATION AND DIFF ABOVE\e[0m" # GITTAGEXTRA is for putting things like "-u 2117364A" - -git tag $GITTAGEXTRA -m bump. -s $this_tag +# Note that this variable cannot be quoted if left empty. +# shellcheck disable=SC2086 +git tag ${GITTAGEXTRA} -m bump. -s "${this_tag}" git push git push --tags diff --git a/docs/cosmos-puppet-ops.mkd b/docs/cosmos-puppet-ops.mkd index 46ceb508..afbd9143 100644 --- a/docs/cosmos-puppet-ops.mkd +++ b/docs/cosmos-puppet-ops.mkd @@ -1,14 +1,14 @@ % System Operations using Cosmos & Puppet -% Leif Johansson / SUNET / 2013 / v0.0.3 +% Leif Johansson / SUNET / 2017 / v0.0.5 Introduction ============ -This document describes how to setup and run systems and service operations for a small to midsized +This document describes how to setup and run systems and service operations for a small to mid-sized systems collection while maintaining scalability, security and auditability for changes. -The process described below is based on opensource components and assumes a Linux-based hosting -infrastructure. These limitations could easily be removed though. This document describes the +The process described below is based on open source components and assumes a Linux-based hosting +infrastructure. These limitations could easily be removed though. This document describes the multiverse template for combining cosmos and puppet. @@ -16,18 +16,18 @@ Design Requirements =================== The cosmos system has been used to operate security-critical infrastructure for a few years before -it was combined with puppet into the multiverse template. +it was combined with puppet into the multiverse template. -Several of the design requirements below are fulfilled by comos alone, while some (eg consistency) +Several of the design requirements below are fulfilled by cosmos alone, while some (eg consistency) are easier to achieve using puppet than with cosmos alone. Consistency ----------- -Changes should be applied atomically (locally on each host) across multiple system components on multiple +Changes should be applied atomically (locally on each host) across multiple system components on multiple physical and logical hosts (aka system state). The change mechanism should permit verification of state -consistency and all modifications should be idempotents, i.e the same operation -performend twice on the same system state should not in itself cause a problem. +consistency and all modifications should be idempotents, i.e the same operation +performed twice on the same system state should not in itself cause a problem. Auditability ------------ @@ -40,12 +40,12 @@ Authenticity ------------ All changes must be authenticated by private keys in the personal possession of privileged -system operators before applied to system state aswell as at any point in the future. +system operators before applied to system state as well as at any point in the future. Simplicity ---------- -The system must be simple and must not rely on external services to be online to maintain +The system must be simple and must not rely on external services to be online to maintain state except when new state is being requested and applied. When new state is being requested external dependencies must be kept to a minimum. @@ -53,8 +53,8 @@ Architecture ============ The basic architecture of puppet is to use a VCS (git) to manage and distribute changes to a -staging area on each managed host. At the staging area the changes are authenticated (using -tag signatures) and if valid, distributed to the host using local rsync. Before and after +staging area on each managed host. At the staging area the changes are authenticated (using +tag signatures) and if valid, distributed to the host using local rsync. Before and after hooks (using run-parts) are used to provide programmatic hooks. Administrative Scope @@ -62,15 +62,15 @@ Administrative Scope The repository constitutes the administrative domain of a multiverse setup: each host is connected to (i.e runs cosmos off of) a single GIT repository and derives trust from signed -tags on that repository. A host cannot belong to more than 1 administratve domain but each -administrative domains can host multiple DNS domains - all hosts in a single repository +tags on that repository. A host cannot belong to more than 1 administrative domain but each +administrative domains can host multiple DNS domains - all hosts in a single repository doesn't need to be in the same zone. The role of Puppet ------------------ -In the multiverse template, the cosmos system is used to authenticate and distribute changes -and prepare the system state for running puppet. Puppet is used to apply idempotent changes +In the multiverse template, the cosmos system is used to authenticate and distribute changes +and prepare the system state for running puppet. Puppet is used to apply idempotent changes to the system state using "puppet apply". ~~~~~ {.ditaa .no-separation} @@ -79,7 +79,7 @@ to the system state using "puppet apply". +------------+ +------+ | ^ | | | - (change) (manifests) + (change) (manifests) | | +--------+ | | puppet |<---+ @@ -87,44 +87,44 @@ to the system state using "puppet apply". ~~~~~ Note that there is no puppet master in this setup so collective resources cannot be used -in multiverse. Instead 'fabric' is used to provide a simple way to loop over subsets of +in multiverse. Instead 'fabric' is used to provide a simple way to loop over subsets of the hosts in a managed domain. -Private data (eg system credentials, application passwords, or private keys) are encrypted +Private data (eg system credentials, application passwords, or private keys) are encrypted to a master host-specific PGP key before stored in the cosmos repo. System state can be tied to classes used to classify systems into roles (eg "database server" -or "webserver"). System classes can be assigned by regular expressions on the fqdn (eg all -hosts named db-\* is assigned to the "database server" class) using a custom puppet ENC. +or "webserver"). System classes can be assigned by regular expressions on the fqdn (eg all +hosts named db-\* is assigned to the "database server" class) using a custom puppet ENC. The system classes are also made available to 'fabric' in a custom fabfile. Fabric (or fab) -is a simple frontend to ssh that allows an operator to run commands on multiple remote +is a simple frontend to ssh that allows an operator to run commands on multiple remote hosts at once. Trust ----- -All data in the system is maintained in a cosmos GIT repository. A change is -requested by signing a tag in the repository with a system-wide well-known name-prefix. -The tag name typically includes the date and a counter to make it unique. +All data in the system is maintained in a cosmos GIT repository. A change is +requested by signing a tag in the repository with a system-wide well-known name-prefix. +The tag name typically includes the date and a counter to make it unique. -The signature on the tag is authenticated against a set of trusted keys maintained in the +The signature on the tag is authenticated against a set of trusted keys maintained in the repository itself - so that one trusted system operator must be present to authenticate addition or -removal of another trusted system operator. This authentication of tags is done in addition +removal of another trusted system operator. This authentication of tags is done in addition to authenticating access to the GIT repository when the changes are pushed. Trust is typically -bootstrapped when a repository is first established. This model also serves to provide auditability +bootstrapped when a repository is first established. This model also serves to provide auditability of all changes for as long as repository history is retained. Access to hosts is done through ssh with ssh-key access. The ssh keys are typically maintained -using either puppet or cosmos natively. +using either puppet or cosmos natively. Consistency ----------- As a master-less architecture, multiverse relies on _eventual consistency_: changes will eventually -be applied to all hosts. In such a model it becomes very imporant that changes are idempotent, so +be applied to all hosts. In such a model it becomes very important that changes are idempotent, so that applying a change multiple times (in an effort to get dependent changes through) won't cause -an issue. Using native cosmos, such changes are achived using timestamp-files that control entry +an issue. Using native cosmos, such changes are archived using timestamp-files that control entry into code-blocks: ``` @@ -136,23 +136,23 @@ fi ``` This pattern is mostly replaced in multiverse by using puppet manifests and modules that -are inherently indempotent but it can nevertheless be a useful addition to the toolchain. +are inherently idempotent but it can nevertheless be a useful addition to the toolchain. Implementation ============== Implementation is based on two major components: cosmos and puppet. The cosmos system was created by Simon Josefsson and Fredrik Thulin as a simple and secure way to distribute files -and run pre- and post-processors (using run-parts). This allows for a simple, yet complete +and run pre- and post-processors (using run-parts). This allows for a simple, yet complete mechanism for updating system state. The second component is puppet which is run in masterless (aka puppet apply) mode on files distributed and authenticated using cosmos. Puppet is a widely deployed way to describe system state using a set of idempotent operations. In theory, anything that can de done -using puppet can be done using cosmos post-processors but puppet allows for greater +using puppet can be done using cosmos post-processors but puppet allows for greater abstraction which greatly increases readability. -The combination of puppet and cosmos is maintained on github in the 'leifj/multiverse' +The combination of puppet and cosmos is maintained on github in the 'SUNET/multiverse' project. The Cosmos Puppet Module @@ -160,7 +160,7 @@ The Cosmos Puppet Module Although not necessary, a few nice-to-have utilities in the form of puppet modules have been collected as the cosmos puppet module (for want of a better name). The source for -this module is at http://github.com/leifj/puppet-cosmos and it is included (but commented +this module is at https://github.com/SUNET/puppet-cosmos and it is included (but commented out) in the cosmos-modules.conf file (cf below) for easy inclusion. @@ -177,20 +177,20 @@ this is in the 'git-core' package: # apt-get install git-core ``` -Also install 'fabric' - a very useful too for multiple-host-ssh that is integrated into +Also install 'fabric' - a very useful too for multiple-host-ssh that is integrated into multiverse. Fabric provides the 'fab' command which will be introduced later on. ``` # apt-get install fabric ``` -These two tools (git & fabric) are only needed on mashines where system operators work. +These two tools (git & fabric) are only needed on machines where system operators work. -Next clone git://github.com/leifj/multiverse.git - this will form the basis of your cosmos+puppet +Next clone git@github.com:SUNET/multiverse.git - this will form the basis of your cosmos+puppet repository: ``` -# git clone git://github.com/leifj/multiverse.git myproj-cosmos +# git clone git@github.com:SUNET/multiverse.git myproj-cosmos # cd myproj-cosmos ``` @@ -201,27 +201,40 @@ features as the multiverse codebase evolves. # git remote rename origin multiverse ``` -Now add a new remote pointing to the git repo where you are going to be pushing -changes for your administrative domain. Also add a read-only version of this remote +Now add a new remote pointing to the git repo where you are going to be pushing +changes for your administrative domain. Also add a read-only version of this remote as 'ro'. The read-only remote is used by multiverse scripts during host bootstrap. ``` -# git remote add origin git@yourhost:myproj-cosmos.git -# git remote add ro git://yourhost/myproj-cosmos.git +# git remote add origin git+ssh://git@yourhost:myproj-cosmos.git +# git remote add ro https://yourhost/myproj-cosmos.git ``` -Now edit .git/config and rename the 'master' branch to use the new 'origin' remote or -you'll try to push to the multiverse remote! Finally create a branch for the 'multiverse' -upstream so you can merge changes to multiverse: +Now edit .git/config and rename the 'main' branch to use the new 'origin' remote or +you'll try to push to the multiverse remote! ``` -# git checkout -b multiverse --track multiverse/master +[branch "main"] + remote = origin + merge = refs/heads/main +``` + +Now create a branch for the 'multiverse' upstream so you can merge changes to multiverse: + +``` +# git checkout -b multiverse --track multiverse/main +``` + +Finally, you might need to push you main branch upstream to the new origin +``` +# git checkout main +# git push -u origin main ``` Note that you can maintain your repo on just about any git hosting platform, including -github, gitorius or your own local setup as long as it supports read-only "git://" access -to your repository. It is important that the remotes called 'origin' and 'ro' refer to -your repository and not to anything else (like a private version of multiverse). +github, gitorious or your own local setup as long as it supports read-only access to your +repository. It is important that the remotes called 'origin' and 'ro' refer to your +repository and not to anything else (like a private version of multiverse). Now add at least one key to 'global/overlay/etc/cosmos/keys/' in a file with a .pub extension (eg 'operator.pub') - the name of the file doesn't matter other than the extension. @@ -238,6 +251,10 @@ At this point you should create and sign your first tag: # ./bump-tag ``` +If Git complains during the first run of bump-tag that "Your configuration specifies to +merge with the ref 'main' from the remote, but no such ref was fetched." then you +have run 'git push' to initialize the connection with the remote repository. + Make sure that you are using the key whose public key you just added to the repository! You can now start adding hosts. @@ -247,7 +264,7 @@ Adding a host Bootstrapping a host is done using the 'addhost' command: ``` -# ./addhost [-b] $fqdn +# ./addhost -b $fqdn ``` The -b flag causes addhost to attempt to bootstrap cosmos on the remote host using @@ -255,7 +272,7 @@ ssh as root. This requires that root key trust be established in advance. The ad command creates and commits the necessary changes to the repository to add a host named $fqdn. Only fully qualified hostnames should ever be used in cosmos+puppet. -The boostrap process will create a cron-job on $fqdn that runs +The bootstrap process will create a cron-job on $fqdn that runs ``` # cosmos update && cosmos apply @@ -264,11 +281,17 @@ The boostrap process will create a cron-job on $fqdn that runs every 15 minutes. This should be a good starting point for your domain. Now you may want to add some 'naming rules'. +To bootstrap a machine that is not yet configured in DNS, use the following options: + +``` +# ./addhost -b -n $fqdn-to-add-later-in-dns -- IP-address +``` + Defining naming rules --------------------- -A naming rule is a mapping from a name to a set of puppet classes. These are defined in -the file 'global/overlay/etc/puppet/cosmos-rules.yaml' (linked to the toplevel directory +A naming rule is a mapping from a name to a set of puppet classes. These are defined in +the file 'global/overlay/etc/puppet/cosmos-rules.yaml' (linked to the top level directory in multiverse). This is a YAML format file whose keys are regular expressions and whose values are lists of puppet class definitions. Here is an example that assigns all hosts with names on the form ns\.example.com to the 'nameserver' class. @@ -278,7 +301,7 @@ with names on the form ns\.example.com to the 'nameserver' class. nameserver: ``` -Note that the value is a hash with an empty value ('namserver:') and not just a string +Note that the value is a hash with an empty value ('nameserver:') and not just a string value. Since regular expressions can also match on whole strings so the following is also @@ -290,7 +313,7 @@ smtp.example.com: relay: smtp.upstream.example.com ``` -In this example the mailserver puppet class is given the relay argument (cf puppet +In this example the mailserver puppet class is given the relay argument (cf puppet documentation). Fabric integration @@ -306,11 +329,11 @@ Given the above example the following command would reload all nameservers: Creating a change-request ------------------------- -After performing whatever changes you want to the reqpository, commit the changes as usual +After performing whatever changes you want to the repository, commit the changes as usual and then sign an appropriately formatted tag. This last operation is wrapped in the 'bump-tag' command: ``` -# git commit -m "some changes" global/overlay/somethig or/other/files +# git commit -m "some changes" global/overlay/something or/other/files # ./bump-tag ``` @@ -320,39 +343,52 @@ gpg commands to create, sign and push the correct tag. Puppet modules -------------- -Puppet modules can be maintained using a designated cosmos pre-task that reads a file -global/overlay/etc/puppet/cosmos-modules.conf. This file is a simple text-format file -with 3 columns: +Puppet modules can be maintained using a designated cosmos pre-task that reads the file +/etc/puppet/cosmos-modules.conf. This file is a simple text-format file +with either three (for puppetlabs modules) or four columns: ``` # -# name source (puppetlabs fq name or git url) upgrade (yes/no) +# name source (puppetlabs fq name or git url) upgrade (yes/no) tag_pattern # -concat puppetlabs/concat no -stdlib puppetlabs/stdlib no -cosmos git://github.com/leifj/puppet-cosmos.git yes -ufw git://github.com/fredrikt/puppet-module-ufw.git yes apt puppetlabs/apt no +concat puppetlabs/concat no +cosmos https://github.com/SUNET/puppet-cosmos.git yes sunet-2* +#golang elithrar/golang yes +python https://github.com/SUNET/puppet-python.git yes sunet-2* +stdlib puppetlabs/stdlib no +ufw https://github.com/SUNET/puppet-module-ufw.git yes sunet-2* vcsrepo puppetlabs/vcsrepo no xinetd puppetlabs/xinetd no -#golang elithrar/golang yes -python git://github.com/fredrikt/puppet-python.git yes -hiera-gpg git://github.com/fredrikt/hiera-gpg.git no ``` -This is an example file - the first field is the name of the module, the second is -the source: either a puppetlabs path or a git URL. The final field is 'yes' if the -module should be automatically updated or 'no' if it should only be installed. As usual -lines beginning with '#' are silently ignored. +This is an example file - the first field is the name of the module, the second is +the source: either a puppetlabs path or a git URL. The third field is 'yes' if the +module should be automatically updated or 'no' if it should only be installed. The +fourth field is a tag pattern to use (same style as the cosmos tag pattern). +As usual lines beginning with '#' are silently ignored. -This file is processed in a cosmos pre-hook so the modules should be available for +This file is processed in a cosmos pre-hook so the modules should be available for use in the puppet post-hook. By default the file contains several lines that are commented out so review this file as you start a new multiverse setup. In order to add a new module, the best way is to commit a change to this file and -tag this change, allowing time for the module to get installed everywhere before +tag this change, allowing time for the module to get installed everywhere before adding a change that relies on this module. +As there might be a need to use different sets of modules (or different tag patterns) +on different hosts in an ops-repo, the contents of this file can be controlled in +different ways: + + 1. If the file is present in the model, it is used as such. + 2. If there is a script called /etc/puppet/setup_cosmos_modules, that script is executed. + If the file /etc/puppet/cosmos-modules.conf does not exist after this script runs, + proceed to step 3, otherwise use this dynamically generated list of modules. + 3. Use a (very small) default set of modules from the pre-hook global/post-tasks.d/010cosmos-modules. + +There is an example implementation of the script to help you get started with writing your own, +available in docs/setup_cosmos_modules.example. + HOWTO and Common Tasks ====================== @@ -362,7 +398,7 @@ Adding a new operator Add the ascii-armoured key in a file in `global/overlay/etc/cosmos/keys` with a `.pub` extension ``` -# git add global/overlay/etc/cosmos/keys/thenewoperator.pub +# git add global/overlay/etc/cosmos/keys/thenewoperator.pub # git commit -m "the new operator" \ global/overlay/etc/cosmos/keys/thenewoperator.pub # ./bump-tag @@ -371,7 +407,7 @@ Add the ascii-armoured key in a file in `global/overlay/etc/cosmos/keys` with a Removing an operator -------------------- -Identitfy the public key file in `global/overlay/etc/cosmos/keys` +Identify the public key file in `global/overlay/etc/cosmos/keys` ``` # git rm global/overlay/etc/cosmos/keys/X.pub @@ -388,7 +424,7 @@ The multiverse template will continue to evolve and sometimes it may be desirabl ``` # git checkout multiverse # git pull -# git checkout master +# git checkout main # git merge multiverse ``` diff --git a/docs/setup_cosmos_modules.eduid.example b/docs/setup_cosmos_modules.eduid.example new file mode 100755 index 00000000..2b9dfdc4 --- /dev/null +++ b/docs/setup_cosmos_modules.eduid.example @@ -0,0 +1,300 @@ +#!/usr/bin/env python3 +# +# This script is responsible for creating/updating /etc/puppet/cosmos-modules.conf. +# +# If this script exits without creating that file, a default list of modules will be +# selected (by post-tasks.d/010cosmos-modules, the script that invokes this script). +# +# NOTES ABOUT THE IMPLEMENTATION: +# +# - Avoid any third party modules. We want this script to be re-usable in all ops-repos. +# - To make merging easier, try to keep all local alterations in the local_* functions. +# - Format with black and isort. Line width 120. +# - You probably ONLY want to change things in the local_get_modules_hook() function. +# + +import argparse +import csv +import json +import logging +import logging.handlers +import os +import re +import socket +import sys +from pathlib import Path +from typing import Dict, NewType, Optional, cast + +from pkg_resources import parse_version + +logger = logging.getLogger(__name__) # will be overwritten by _setup_logging() + +# Set up types for data that is passed around in functions in this script. +# Need to use Dict (not dict) here since these aren't stripped by strip-hints, and doesn't work on Ubuntu <= 20.04. +Arguments = NewType("Arguments", argparse.Namespace) +OSInfo = Dict[str, str] +HostInfo = Dict[str, Optional[str]] +Modules = Dict[str, Dict[str, str]] + + +def parse_args() -> Arguments: + """ + Parse the command line arguments + """ + parser = argparse.ArgumentParser( + description="Setup cosmos-modules.conf", + add_help=True, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + + parser.add_argument("--debug", dest="debug", action="store_true", default=False, help="Enable debug operation") + parser.add_argument( + "--filename", dest="filename", type=str, default="/etc/puppet/cosmos-modules.conf", help="Filename to write to" + ) + + return cast(Arguments, parser.parse_args()) + + +def get_os_info() -> OSInfo: + """Load info about the current OS (distro, release etc.)""" + os_info: OSInfo = {} + if Path("/etc/os-release").exists(): + os_info.update({k.lower(): v for k, v in _parse_bash_vars("/etc/os-release").items()}) + res = local_os_info_hook(os_info) + logger.debug(f"OS info:\n{json.dumps(res, sort_keys=True, indent=4)}") + return res + + +def get_host_info() -> HostInfo: + """Load info about the current host (hostname, fqdn, domain name etc.)""" + try: + fqdn = socket.getfqdn() + hostname = socket.gethostname() + except OSError: + host_info = {} + else: + _domainname = fqdn[len(hostname + ".") :] + + host_info: HostInfo = { + "domainname": _domainname, + "fqdn": fqdn, + "hostname": hostname, + } + res = local_host_info_hook(host_info) + logger.debug(f"Host info: {json.dumps(res, sort_keys=True, indent=4)}") + return res + + +def _parse_bash_vars(path: str) -> dict[str, str]: + """ + Parses a bash script and returns a dictionary representing the + variables declared in that script. + + Source: https://dev.to/htv2012/how-to-parse-bash-variables-b4f + + :param path: The path to the bash script + :return: Variables as a dictionary + """ + with open(path) as stream: + contents = stream.read().strip() + + var_declarations = re.findall(r"^[a-zA-Z0-9_]+=.*$", contents, flags=re.MULTILINE) + reader = csv.reader(var_declarations, delimiter="=") + bash_vars = dict(reader) + return bash_vars + + +def get_modules(os_info: OSInfo, host_info: HostInfo) -> Modules: + """Load the list of default modules. + + This is more or less an inventory of all the modules we have. If you don't want + to use all modules in your OPS repo, you can filter them in the local hook. + + If you want to use a different tag for a module on a specific host/os, you can + do that in the local hook as well. + """ + default_modules = """ + # name repo upgrade tag + apparmor https://github.com/SUNET/puppet-apparmor.git yes sunet-2* + apt https://github.com/SUNET/puppetlabs-apt.git yes sunet-2* + augeas https://github.com/SUNET/puppet-augeas.git yes sunet-2* + bastion https://github.com/SUNET/puppet-bastion.git yes sunet-2* + concat https://github.com/SUNET/puppetlabs-concat.git yes sunet-2* + cosmos https://github.com/SUNET/puppet-cosmos.git yes sunet-2* + dhcp https://github.com/SUNET/puppetlabs-dhcp.git yes sunet_dev-2* + docker https://github.com/SUNET/garethr-docker.git yes sunet-2* + hiera-gpg https://github.com/SUNET/hiera-gpg.git yes sunet-2* + munin https://github.com/SUNET/ssm-munin.git yes sunet-2* + nagioscfg https://github.com/SUNET/puppet-nagioscfg.git yes sunet-2* + network https://github.com/SUNET/attachmentgenie-network.git yes sunet-2* + pound https://github.com/SUNET/puppet-pound.git yes sunet-2* + pyff https://github.com/samlbits/puppet-pyff.git yes puppet-pyff-* + python https://github.com/SUNET/puppet-python.git yes sunet-2* + stdlib https://github.com/SUNET/puppetlabs-stdlib.git yes sunet-2* + sunet https://github.com/SUNET/puppet-sunet.git yes sunet-2* + sysctl https://github.com/SUNET/puppet-sysctl.git yes sunet-2* + ufw https://github.com/SUNET/puppet-module-ufw.git yes sunet-2* + varnish https://github.com/samlbits/puppet-varnish.git yes puppet-varnish-* + vcsrepo https://github.com/SUNET/puppetlabs-vcsrepo.git yes sunet-2* + xinetd https://github.com/SUNET/puppetlabs-xinetd.git yes sunet-2* + """ + modules: Modules = {} + for line in default_modules.splitlines(): + try: + if not line.strip() or line.strip().startswith("#"): + continue + _name, _url, _upgrade, _tag = line.split() + modules[_name] = { + "repo": _url, + "upgrade": _upgrade, + "tag": _tag, + } + except ValueError: + logger.error(f"Failed to parse line: {repr(line)}") + raise + + # Remove the UFW module on Ubuntu >= 22.04 (nftables is used there instead) + if os_info.get("name") == "Ubuntu": + ver = os_info.get("version_id") + if ver: + if parse_version(ver) >= parse_version("22.04"): + logger.debug("Removing UFW module for Ubuntu >= 22.04") + del modules["ufw"] + else: + logger.debug("Keeping UFW module for Ubuntu < 22.04") + else: + logger.debug("Unknown Ubuntu module version, keeping UFW module") + + return local_get_modules_hook(os_info, host_info, modules) + + +def local_os_info_hook(os_info: OSInfo) -> OSInfo: + """Local hook to modify os_info in an OPS repo.""" + # Start local changes in this repository + # End local changes + return os_info + + +def local_host_info_hook(host_info: HostInfo) -> HostInfo: + """Local hook to modify host_info in an OPS repo.""" + # Start local changes in this repository + + # Regular expression to tease apart an eduID hostname + hostname_re = re.compile( + r"""^ + (\w+) # function ('idp', 'apps', ...) + - + (\w+) # site ('tug', 'sthb', ...) + - + (\d+) # 1 for staging, 3 for production + """, + re.VERBOSE, + ) + _hostname = host_info.get("hostname") + if _hostname: + m = hostname_re.match(_hostname) + if m: + _function, _site, _num = m.groups() + host_info["function"] = _function + host_info["site"] = _site + if _num == "1": + host_info["environment"] = "staging" + + # End local changes + return host_info + + +def local_get_modules_hook(os_info: OSInfo, host_info: HostInfo, modules: Modules) -> Modules: + """Local hook to modify default set of modules in an OPS repo.""" + # Start local changes in this repository + + _eduid_modules = { + "apparmor", + "apt", + "augeas", + "bastion", + "concat", + "docker", + "munin", + "stdlib", + "sunet", + "ufw", + } + # Only keep the modules eduID actually uses + modules = {k: v for k, v in modules.items() if k in _eduid_modules} + logger.debug(f"Adding modules: {json.dumps(modules, sort_keys=True, indent=4)}") + + # Use eduID tag for puppet-sunet + modules["sunet"]["tag"] = "eduid-stable-2*" + if host_info.get("environment") == "staging": + modules["sunet"]["tag"] = "eduid_dev-2*" + + # use sunet_dev-2* for some modules in staging + for dev_module in ["munin"]: + if host_info.get("environment") == "staging" and dev_module in modules: + modules[dev_module]["tag"] = "sunet_dev-2*" + + # End local changes + return modules + + +def update_cosmos_modules(filename: str, modules: Modules) -> None: + """Create/update the cosmos-modules.conf file. + + First, we check if the file already have the right content. If so, we do nothing. + """ + content = "# This file is automatically generated by the setup_cosmos_modules script.\n# Do not edit it manually.\n" + for k, v in sorted(modules.items()): + content += f"{k:15} {v['repo']:55} {v['upgrade']:5} {v['tag']}\n" + _file = Path(filename) + if _file.exists(): + # Check if the content is already correct, and avoid updating the file if so (so that the timestamp + # of the file at least indicates when the content was last updated) + with _file.open("r") as f: + current = f.read() + if current == content: + logger.debug(f"{filename} is up to date") + return + + # Create/update the file by writing the content to a temporary file and then renaming it + _tmp_file = _file.with_suffix(".tmp") + with _tmp_file.open("w") as f: + f.write(content) + _tmp_file.rename(_file) + logger.debug(f"Updated {filename}") + + +def _setup_logging(my_name: str, args: Arguments): + level = logging.INFO + if args.debug: + level = logging.DEBUG + logging.basicConfig(level=level, stream=sys.stderr, format="{asctime} | {levelname:7} | {message}", style="{") + global logger + logger = logging.getLogger(my_name) + # If stderr is not a TTY, change the log level of the StreamHandler (stream = sys.stderr above) to ERROR + if not sys.stderr.isatty() and not args.debug: + for this_h in logging.getLogger("").handlers: + this_h.setLevel(logging.ERROR) + if args.debug: + logger.setLevel(logging.DEBUG) + + +def main(my_name: str, args: Arguments) -> bool: + _setup_logging(my_name, args) + + os_info = get_os_info() + host_info = get_host_info() + modules = get_modules(os_info, host_info) + + update_cosmos_modules(args.filename, modules) + + return True + + +if __name__ == "__main__": + my_name = os.path.basename(sys.argv[0]) + args = parse_args() + res = main(my_name, args=args) + if res: + sys.exit(0) + sys.exit(1) diff --git a/docs/setup_cosmos_modules.example b/docs/setup_cosmos_modules.example new file mode 100755 index 00000000..6b1b9c61 --- /dev/null +++ b/docs/setup_cosmos_modules.example @@ -0,0 +1,216 @@ +#!/usr/bin/env python3 +""" Write out a puppet cosmos-modules.conf """ + +import hashlib +import os +import os.path +import sys + +try: + from configobj import ConfigObj + + OS_INFO = ConfigObj("/etc/os-release") +except (IOError, ModuleNotFoundError): + OS_INFO = None + + +def get_file_hash(modulesfile): + """ + Based on https://github.com/python/cpython/pull/31930: should use + hashlib.file_digest() but it is only available in python 3.11 + """ + try: + with open(modulesfile, "rb") as fileobj: + digestobj = hashlib.sha256() + _bufsize = 2**18 + buf = bytearray(_bufsize) # Reusable buffer to reduce allocations. + view = memoryview(buf) + while True: + size = fileobj.readinto(buf) + if size == 0: + break # EOF + digestobj.update(view[:size]) + except FileNotFoundError: + return "" + + return digestobj.hexdigest() + + +def get_list_hash(file_lines): + """Get hash of list contents""" + + file_lines_hash = hashlib.sha256() + for line in file_lines: + file_lines_hash.update(line) + + return file_lines_hash.hexdigest() + + +def create_file_content(modules): + """ + Write out the expected file contents to a list so we can check the + expected checksum before writing anything + """ + file_lines = [] + file_lines.append( + "# Generated by {}\n".format( # pylint: disable=consider-using-f-string + os.path.basename(sys.argv[0]) + ).encode("utf-8") + ) + for key in modules: + file_lines.append( + "{0:11} {1} {2} {3}\n".format( # pylint: disable=consider-using-f-string + key, + modules[key]["repo"], + modules[key]["upgrade"], + modules[key]["tag"], + ).encode("utf-8") + ) + + return file_lines + + +def main(): + """Starting point of the program""" + + modulesfile: str = "/etc/puppet/cosmos-modules.conf" + modulesfile_tmp: str = modulesfile + ".tmp" + + modules: dict = { + "concat": { + "repo": "https://github.com/SUNET/puppetlabs-concat.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "stdlib": { + "repo": "https://github.com/SUNET/puppetlabs-stdlib.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "cosmos": { + "repo": "https://github.com/SUNET/puppet-cosmos.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "ufw": { + "repo": "https://github.com/SUNET/puppet-module-ufw.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "apt": { + "repo": "https://github.com/SUNET/puppetlabs-apt.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "vcsrepo": { + "repo": "https://github.com/SUNET/puppetlabs-vcsrepo.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "xinetd": { + "repo": "https://github.com/SUNET/puppetlabs-xinetd.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "python": { + "repo": "https://github.com/SUNET/puppet-python.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "hiera-gpg": { + "repo": "https://github.com/SUNET/hiera-gpg.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "pound": { + "repo": "https://github.com/SUNET/puppet-pound.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "augeas": { + "repo": "https://github.com/SUNET/puppet-augeas.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "bastion": { + "repo": "https://github.com/SUNET/puppet-bastion.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "pyff": { + "repo": "https://github.com/samlbits/puppet-pyff.git", + "upgrade": "yes", + "tag": "puppet-pyff-*", + }, + "dhcp": { + "repo": "https://github.com/SUNET/puppetlabs-dhcp.git", + "upgrade": "yes", + "tag": "sunet_dev-2*", + }, + "varnish": { + "repo": "https://github.com/samlbits/puppet-varnish.git", + "upgrade": "yes", + "tag": "puppet-varnish-*", + }, + "apparmor": { + "repo": "https://github.com/SUNET/puppet-apparmor.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "docker": { + "repo": "https://github.com/SUNET/garethr-docker.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "network": { + "repo": "https://github.com/SUNET/attachmentgenie-network.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "sunet": { + "repo": "https://github.com/SUNET/puppet-sunet.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "sysctl": { + "repo": "https://github.com/SUNET/puppet-sysctl.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + "nagioscfg": { + "repo": "https://github.com/SUNET/puppet-nagioscfg.git", + "upgrade": "yes", + "tag": "sunet-2*", + }, + } + + # When/if we want we can do stuff to modules here + if OS_INFO: + if OS_INFO["VERSION_CODENAME"] == "bullseye": + pass + + # Build list of expected file content + file_lines = create_file_content(modules) + + # Get hash of the list + list_hash = get_list_hash(file_lines) + + # Get hash of the existing file on disk + file_hash = get_file_hash(modulesfile) + + # Update the file if necessary + if list_hash != file_hash: + # Since we are reading the file with 'rb' when computing our hash use 'wb' when + # writing so we dont end up creating a file that does not match the + # expected hash + with open(modulesfile_tmp, "wb") as fileobj: + for line in file_lines: + fileobj.write(line) + + # Rename it in place so the update is atomic for anything else trying to + # read the file + os.rename(modulesfile_tmp, modulesfile) + + +if __name__ == "__main__": + main() diff --git a/edit-secrets b/edit-secrets index 0921be6d..a2c67ac3 100755 --- a/edit-secrets +++ b/edit-secrets @@ -43,6 +43,35 @@ if [[ ! $1 ]]; then exit 1 fi +function patch_broken_eyaml { + # + # Ubuntu 22.04 (jammy) has a broken hiera-eyaml package, a bug report + # exists here: https://bugs.launchpad.net/ubuntu/+source/hiera-eyaml/+bug/1974059 + # + + if [ "$(lsb_release -cs)" == "jammy" ]; then + plugins_file="/usr/share/rubygems-integration/all/gems/hiera-eyaml-3.2.2/lib/hiera/backend/eyaml/plugins.rb" + if [ -f $plugins_file ]; then + # We only want to try patching the file if it is the known broken version + bad_sum="1d0f14765ebcfcdae300d8ac5d715845ef9b283345d19114a23d96161556618f" + sum=$(sha256sum $plugins_file | awk '{print $1}') + if [ "$sum" == "$bad_sum" ]; then + patch --fuzz=0 --directory=/ --strip=0 <<'EOF' +--- /usr/share/rubygems-integration/all/gems/hiera-eyaml-3.2.2/lib/hiera/backend/eyaml/plugins.rb.orig 2023-01-18 08:20:22.140338419 +0000 ++++ /usr/share/rubygems-integration/all/gems/hiera-eyaml-3.2.2/lib/hiera/backend/eyaml/plugins.rb 2023-01-18 08:21:05.654053501 +0000 +@@ -32,6 +32,7 @@ + specs = Gem::VERSION >= "1.6.0" ? source.latest_specs(true) : source.latest_specs + + specs.each do |spec| ++ spec = spec.to_spec if spec.respond_to?(:to_spec) + next if @@plugins.include? spec + + dependency = spec.dependencies.find { |d| d.name == "hiera-eyaml" } +EOF + fi + fi + fi +} function edit_copy_and_commit() { @@ -76,17 +105,13 @@ function edit_copy_and_commit() elif grep ^"STATUS=EYAML_UPDATED" $TMPFILE > /dev/null; then save_to="${host}/overlay/etc/hiera/data/local.eyaml" - # remove the STATUS= line - grep -v '^STATUS=EYAML_UPDATED' $TMPFILE > $TMPFILE2 + # extract the eyaml output + perl -e '$a = 0; while (<>) { $a = 1 if ($_ =~ /^---$/); + print $_ if $a }' < $TMPFILE > $TMPFILE2 - # check syntax - if [ -x $(dirname $0)/scripts/jsonyaml-no-output.py ]; then - if ! $(dirname $0)/scripts/jsonyaml-no-output.py yaml $TMPFILE2; then - echo "$0: Error: $TMPFILE2 doesn't look like a YAML file" - exit 1 - fi - else - echo "$0: Warning: Unable to check syntax of $TMPFILE2" + if ! grep "^---$" $TMPFILE2 > /dev/null; then + echo "$0: Failed extracting yaml output from file $TMPFILE into $TMPFILE2" + exit 1 fi else echo "" @@ -126,7 +151,7 @@ function edit_file_on_host() { edit_gpg_file ${SECRETFILE} elif [ -f /etc/hiera/eyaml/public_certkey.pkcs7.pem ]; then # default to eyaml if the key exists and none of the secrets-file above exist - touch ${EYAMLFILE} + echo "---" > ${EYAMLFILE} edit_eyaml_file ${EYAMLFILE} fi } @@ -176,7 +201,7 @@ function edit_gpg_file() echo "$0: No changes detected" else # figure out this hosts gpg key id - if lsb_release -r | grep -q 18.04; then + if lsb_release -r | grep -qE '(18|20).04'; then recipient=$($GPG --list-secret-keys | grep -A1 '^sec' | tail -1 | awk '{print $1}') else recipient=$($GPG --list-secret-key | grep ^sec | head -1 | awk '{print $2}' | cut -d / -f 2) @@ -208,6 +233,8 @@ function edit_eyaml_file() test -f "${f}" || { echo "$0: eyaml key file ${f} not found"; exit 1; } done + patch_broken_eyaml + # save source file for comparision afterwards cp "${EYAMLFILE}" "${TMPFILE}" eyaml edit --pkcs7-private-key "${privkey}" --pkcs7-public-key "${pubkey}" "${EYAMLFILE}" diff --git a/fabfile/db.py b/fabfile/db.py deleted file mode 100644 index 7f21ed51..00000000 --- a/fabfile/db.py +++ /dev/null @@ -1,63 +0,0 @@ -import os -import sys -import yaml -import re - -# disallow python2 as the output will not be correct -if sys.version_info.major != 3: - sys.stderr.write('python2 no longer supported\n') - sys.exit(1) - - -def _all_hosts(): - return list(filter(lambda fn: '.' in fn and not fn.startswith('.') and os.path.isdir(fn), os.listdir("."))) - - -def _load_db(): - rules_file = "cosmos-rules.yaml" - if not os.path.exists(rules_file): - sys.stderr.write('%s not found'.format(rules_file)) - sys.exit(1) - - with open(rules_file) as fd: - rules = yaml.load(fd, Loader=yaml.SafeLoader) - - all_hosts = _all_hosts() - - members = dict() - for node_name in all_hosts: - for reg, cls in rules.items(): - if re.match(reg, node_name): - for cls_name in cls.keys(): - h = members.get(cls_name, []) - h.append(node_name) - members[cls_name] = h - members['all'] = all_hosts - - classes = dict() - for node_name in all_hosts: - node_classes = dict() - for reg, cls in rules.items(): - if re.match(reg, node_name): - node_classes.update(cls) - classes[node_name] = node_classes - - # Sort member lists for a more easy to read diff - for cls in members.keys(): - members[cls].sort() - - return dict(classes=classes, members=members) - - -_db = None - - -def cosmos_db(): - global _db - if _db is None: - _db = _load_db() - return _db - - -if __name__ == '__main__': - print(yaml.dump(cosmos_db(), default_flow_style=None)) diff --git a/global/overlay/etc/cosmos/apt/bootstrap-cosmos.sh b/global/overlay/etc/cosmos/apt/bootstrap-cosmos.sh index 1534dc5d..5e27f3dd 100755 --- a/global/overlay/etc/cosmos/apt/bootstrap-cosmos.sh +++ b/global/overlay/etc/cosmos/apt/bootstrap-cosmos.sh @@ -1,7 +1,6 @@ #!/bin/sh -#set -e -# not all breakage is un-recoverable... +set -e cmd_hostname="$1" if test -z "$cmd_hostname"; then @@ -21,27 +20,99 @@ if test -z "$cmd_tags"; then exit 3 fi +set -x + + +# cloud-init runs with LANG='US-ASCII' which is likely to fail because of non-US-ASCII chars in the manifest +export LANG='en_US.UTF-8' + +export DEBIAN_FRONTEND='noninteractive' + apt-get -y update apt-get -y upgrade -for pkg in rsync git git-core wget; do - apt-get -y install $pkg +for pkg in rsync git git-core wget gpg jq; do + # script is running with "set -e", use "|| true" to allow packages to not + # exist without stopping the script + apt-get -y install $pkg || true done -dpkg -i cosmos_1.5-1_all.deb +cosmos_deb=$(find ./ -maxdepth 1 -name 'cosmos_*.deb' | sort -V | tail -1) +dpkg -i "$cosmos_deb" if ! test -d /var/cache/cosmos/repo; then cosmos clone "$cmd_repo" fi +# Re-run cosmos at reboot until it succeeds - use bash -l to get working proxy settings. +# It is possible the file does not exist or contains no matching lines, +# both cases are OK +grep -v "^exit 0" /etc/rc.local > /etc/rc.local.new || true +(echo "" + echo "test -f /etc/run-cosmos-at-boot && (bash -l cosmos -v update; bash -l cosmos -v apply && rm /etc/run-cosmos-at-boot)" + echo "" + echo "exit 0" +) >> /etc/rc.local.new +mv -f /etc/rc.local.new /etc/rc.local + +touch /etc/run-cosmos-at-boot + +# If this cloud-config is set, it will interfere with our changes to /etc/hosts +# The configuration seems to move around between cloud-config versions +for file in /etc/cloud/cloud.cfg /etc/cloud/cloud.cfg.d/01_debian_cloud.cfg; do + if [ -f ${file} ]; then + sed -i 's/manage_etc_hosts: true/manage_etc_hosts: false/g' ${file} + fi +done + +# Remove potential $hostname.novalocal, added by cloud-init or Debian default +# from /etc/hosts. We add our own further down. +# +# From # https://www.debian.org/doc/manuals/debian-reference/ch05.en.html#_the_hostname_resolution: +# "For a system with a permanent IP address, that permanent IP address should +# be used here instead of 127.0.1.1." +sed -i.bak -e "/127\.0\.1\.1/d" /etc/hosts + +vendor=$(lsb_release -is) +version=$(lsb_release -rs) +min_version=1337 +host_ip=127.0.1.1 +if [ "${vendor}" = "Ubuntu" ]; then + min_version=20.04 +elif [ "${vendor}" = "Debian" ]; then + min_version=11 +fi + hostname $cmd_hostname short=`echo ${cmd_hostname} | awk -F. '{print $1}'` -echo "127.0.1.1 ${cmd_hostname} ${short}" >> /etc/hosts +# Only change behavior on modern OS where `ip -j` outputs a json predictuble +# enought to work with. +# +# Use `dpkg` to easier compare ubuntu versions. +if dpkg --compare-versions "${version}" "ge" "${min_version}"; then + # When hostname pointed to loopback in /etc/hosts containers running on the + # host tried to connect to the container itself instead of the host. + host_ip=$(ip -j address show "$(ip -j route show default | jq -r '.[0].dev')" | jq -r .[0].addr_info[0].local) +fi +echo "${host_ip} ${cmd_hostname} ${short}" >> /etc/hosts -perl -pi -e "s,#COSMOS_REPO_MODELS=.*,COSMOS_REPO_MODELS=\"\\\$COSMOS_REPO/global/:\\\$COSMOS_REPO/$cmd_hostname/\"," /etc/cosmos/cosmos.conf +# Set up cosmos models. They are in the order of most significant first, so we want +# +_host_type=`echo $cmd_hostname | cut -d - -f 1` +models=$( + echo -n '\\$COSMOS_REPO/'"$cmd_hostname/:" + test -d /var/cache/cosmos/repo/${_host_type}-common && echo -n '\\$COSMOS_REPO/'"${_host_type}-common/:" + echo -n '\\$COSMOS_REPO/global/' +) +echo "Configuring cosmos with the following models:" +echo "${models}" + +perl -pi -e "s,#COSMOS_REPO_MODELS=.*,COSMOS_REPO_MODELS=\"${models}\"," /etc/cosmos/cosmos.conf perl -pi -e "s,#COSMOS_UPDATE_VERIFY_GIT_TAG_PATTERN=.*,COSMOS_UPDATE_VERIFY_GIT_TAG_PATTERN=\"${cmd_tags}*\"," /etc/cosmos/cosmos.conf env COSMOS_BASE=/var/cache/cosmos COSMOS_KEYS=/var/cache/cosmos/repo/global/overlay/etc/cosmos/keys /var/cache/cosmos/repo/global/post-tasks.d/015cosmos-trust -(date; nohup cosmos -v update && nohup cosmos -v apply; date) 2>&1 | tee /var/log/cosmos.log +mkdir -p /var/cache/scriptherder + +(date; nohup cosmos -v update && nohup cosmos -v apply && rm /etc/run-cosmos-at-boot; date) 2>&1 | tee /var/log/cosmos.log exit 0 diff --git a/global/overlay/etc/cosmos/apt/cosmos_1.2-2_all.deb b/global/overlay/etc/cosmos/apt/cosmos_1.2-2_all.deb deleted file mode 100644 index 9fe44341..00000000 Binary files a/global/overlay/etc/cosmos/apt/cosmos_1.2-2_all.deb and /dev/null differ diff --git a/global/overlay/etc/cosmos/apt/cosmos_1.5-2~sunet20220414_all.deb b/global/overlay/etc/cosmos/apt/cosmos_1.5-2~sunet20220414_all.deb new file mode 100644 index 00000000..c1350a45 Binary files /dev/null and b/global/overlay/etc/cosmos/apt/cosmos_1.5-2~sunet20220414_all.deb differ diff --git a/global/overlay/etc/cron.d/cosmos b/global/overlay/etc/cron.d/cosmos index 2aadd2ef..3840f8c4 100644 --- a/global/overlay/etc/cron.d/cosmos +++ b/global/overlay/etc/cron.d/cosmos @@ -1,4 +1,6 @@ SHELL=/bin/sh PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin -*/15 * * * * root test -f /etc/no-automatic-cosmos || scriptherder --mode wrap --syslog --name cosmos -- /usr/local/bin/run-cosmos +*/15 * * * * root /usr/local/libexec/cosmos-cron-wrapper + +@reboot root sleep 30; /usr/local/libexec/cosmos-cron-wrapper diff --git a/global/overlay/etc/logrotate.d/docker-containers b/global/overlay/etc/logrotate.d/docker-containers deleted file mode 100644 index e9c90b8c..00000000 --- a/global/overlay/etc/logrotate.d/docker-containers +++ /dev/null @@ -1,7 +0,0 @@ -/var/lib/docker/containers/*/*.log { - rotate 7 - daily - compress - delaycompress - copytruncate -} diff --git a/global/overlay/etc/puppet/cosmos_enc.py b/global/overlay/etc/puppet/cosmos_enc.py index 852fb25c..dca12d33 100755 --- a/global/overlay/etc/puppet/cosmos_enc.py +++ b/global/overlay/etc/puppet/cosmos_enc.py @@ -1,18 +1,37 @@ #!/usr/bin/env python3 +# +# Puppet 'External Node Classifier' to tell puppet what classes to apply to this node. +# +# Docs: https://puppet.com/docs/puppet/5.3/nodes_external.html +# -import sys -import yaml import os import re +import sys + +import yaml + +rules_path = os.environ.get("COSMOS_RULES_PATH", "/etc/puppet") node_name = sys.argv[1] -db_file = os.environ.get("COSMOS_ENC_DB","/etc/puppet/cosmos-db.yaml") -db = dict(classes=dict()) +rules = dict() +for p in rules_path.split(":"): + rules_file = os.path.join(p, "cosmos-rules.yaml") + if os.path.exists(rules_file): + with open(rules_file) as fd: + rules.update(yaml.safe_load(fd)) -if os.path.exists(db_file): - with open(db_file) as fd: - db.update(yaml.load(fd)) +found = False +classes = dict() +for reg, cls in rules.items(): + if re.search(reg, node_name): + classes.update(cls) + found = True -print(yaml.dump(dict(classes=db['classes'].get(node_name,dict()),parameters=dict(roles=db.get('members',[]))))) +if not found: + sys.stderr.write(f"{sys.argv[0]}: {node_name} not found in cosmos-rules.yaml\n") +print("---\n" + yaml.dump(dict(classes=classes))) + +sys.exit(0) diff --git a/global/overlay/etc/puppet/hiera.yaml b/global/overlay/etc/puppet/hiera.yaml index 5948d3a8..3de986b9 100644 --- a/global/overlay/etc/puppet/hiera.yaml +++ b/global/overlay/etc/puppet/hiera.yaml @@ -1,32 +1,27 @@ +# Hiera version 5 configuration +# --- -:backends: - - eyaml - - yaml - - gpg +version: 5 +defaults: + datadir: /etc/hiera/data + data_hash: yaml_data -:logger: console +hierarchy: + - name: "Per-node data" + path: "local.yaml" -:hierarchy: - - local - - group - - secrets.yaml - - "dist_%{::lsbdistcodename}_override" - - common + - name: "Per-group data" + path: "group.yaml" + - name: "Per-host secrets" + path: "local.eyaml" + lookup_key: eyaml_lookup_key + options: + pkcs7_private_key: /etc/hiera/eyaml/private_key.pkcs7.pem + pkcs7_public_key: /etc/hiera/eyaml/public_certkey.pkcs7.pem -:yaml: - :datadir: /etc/hiera/data + - name: "Overrides per distribution" + path: "dist_%{::lsbdistcodename}_override.yaml" -:gpg: - :datadir: /etc/hiera/data - :key_dir: /etc/hiera/gpg - -:eyaml: - :datadir: '/etc/hiera/data' - - # If using the pkcs7 encryptor (default) - :pkcs7_private_key: /etc/hiera/eyaml/private_key.pkcs7.pem - :pkcs7_public_key: /etc/hiera/eyaml/public_certkey.pkcs7.pem - - # Optionally cache decrypted data (default: false) - :cache_decrypted: false + - name: "Data common to whole environment" + path: "common.yaml" \ No newline at end of file diff --git a/global/overlay/usr/local/bin/run-cosmos b/global/overlay/usr/local/bin/run-cosmos index 5f2cbc1e..7da725e6 100755 --- a/global/overlay/usr/local/bin/run-cosmos +++ b/global/overlay/usr/local/bin/run-cosmos @@ -6,6 +6,11 @@ readonly PROGNAME=$(basename "$0") readonly LOCKFILE_DIR=/tmp readonly LOCK_FD=200 +readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf +readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable +readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock +readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy +readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable lock() { local prefix=$1 @@ -16,29 +21,70 @@ lock() { eval "exec $fd>$lock_file" # acquier the lock - flock -n $fd \ + flock -n "$fd" \ && return 0 \ || return 1 } eexit() { - local error_str="$@" + local error_str="$*" - echo $error_str + echo "$error_str" exit 1 } +fleetlock_lock() { + if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then + local fleetlock_group="" + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -z "$fleetlock_group" ]; then + echo "Unable to set fleetlock_group" + return 1 + fi + echo "Getting fleetlock lock" + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1 + fi + return 0 +} + +fleetlock_unlock() { + if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then + local fleetlock_group="" + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -z "$fleetlock_group" ]; then + echo "Unable to set fleetlock_group" + return 1 + fi + machine_is_healthy || return 1 + echo "Releasing fleetlock lock" + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1 + fi + return 0 +} + +machine_is_healthy() { + if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then + echo "Running any health checks" + $HEALTHCHECK_TOOL || return 1 + fi + return 0 +} + main () { - lock $PROGNAME || eexit "Only one instance of $PROGNAME can run at one time." - cosmos $* update - cosmos $* apply + lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time." + fleetlock_lock || eexit "Unable to acquire fleetlock lock." + cosmos "$@" update + cosmos "$@" apply + fleetlock_unlock || eexit "Unable to release fleetlock lock." touch /var/run/last-cosmos-ok.stamp - find /var/lib/puppet/reports/ -type f -mtime +10 | xargs rm -f + find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f } -main $* +main "$@" if [ -f /cosmos-reboot ]; then rm -f /cosmos-reboot diff --git a/global/overlay/usr/local/bin/sunet-fleetlock b/global/overlay/usr/local/bin/sunet-fleetlock new file mode 100755 index 00000000..e2ee6d98 --- /dev/null +++ b/global/overlay/usr/local/bin/sunet-fleetlock @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# pylint: disable=invalid-name +# pylint: enable=invalid-name +""" Tool for taking and releasing fleetlock locks, used by run-cosmos if fleetlock is configured """ + +# +# You need a config file in "configparser" format with a section for the +# lock group you are using, so if the file describes two lock groups where one +# is called "fl-test1" and the other "fl-test2" then example contents would +# look like this: +# === +# [fl-test1] +# server = https://fleetlock-server1.example.com +# password = mysecret1 +# +# [fl-test2] +# server = https://fleetlock-server2.example.com +# password = mysecret2 +# === +# +# The password needs to match an acl configured for the lock group in the +# knubbis-fleetlock service. +# +# When modifying this code please make sure it is passed through the following +# tools: +# === +# black +# pylint +# mypy --strict +# === + +import platform +import sys +import signal +import time +import argparse +import configparser +import os.path +from typing import Optional, Union +from types import FrameType + +import requests + + +class TimeoutException(Exception): + """Exception raised when we hit tool timeout""" + + +def timeout_handler(signum: int, frame: Optional[FrameType]) -> None: + """This is called if the tool takes too long to run""" + raise TimeoutException(f"{os.path.basename(sys.argv[0])} hit --timeout limit") + + +def do_fleetlock_request( + config: configparser.ConfigParser, args: argparse.Namespace, operation: str +) -> bool: + """Perform fleetlock request based on given operation and return true if it succeeded""" + fleetlock_data = { + "client_params": { + "group": args.lock_group, + "id": args.lock_id, + }, + } + + fleetlock_headers = { + "fleet-lock-protocol": "true", + } + + if operation == "lock": + fleetlock_path = "/v1/pre-reboot" + url = config[args.lock_group]["server"] + fleetlock_path + elif operation == "unlock": + fleetlock_path = "/v1/steady-state" + url = config[args.lock_group]["server"] + fleetlock_path + else: + raise ValueError(f"unsupported operation: {operation}") + + # Log the request-id header from responses so we can track requests in + # the knubbis-fleetlock logs more easily + request_id_key = "request-id" + request_id = None + + # Loop forever: we depend on the SIGALRM timout to raise an error if it + # takes too long + while True: + if args.verbose: + print(f"{operation} POST at url {url}") + + resp = requests.post( + url, + headers=fleetlock_headers, + json=fleetlock_data, + timeout=args.timeout, + auth=("", config[args.lock_group]["password"]), + ) + + if request_id_key in resp.headers: + request_id = resp.headers[request_id_key] + + if resp.status_code == requests.codes.ok: # pylint: disable=no-member + if args.verbose: + print( + f"successful {operation} request for lock ID '{args.lock_id}'", + f"in lock group '{args.lock_group}' ({request_id_key}: {request_id})", + ) + + return True + + # If the request is unauthorized this means we probably either try to + # use a lock group that does not exist, or we are using the wrong + # credentials and in either case we can give up immediately + if resp.status_code == requests.codes.unauthorized: # pylint: disable=no-member + print( + f"{operation} request unauthorized: incorrect lock group name '{args.lock_group}'", + f"or wrong credentials? ({request_id_key}: {request_id})", + ) + return False + + # If the request failed in some other way we expect a JSON formatted + # response message: + print( + f"{operation} request failed:" + + " " + + resp.content.decode("utf-8").rstrip() + + " " + + f"({request_id_key}: {request_id})" + ) + + time.sleep(1) + + +def read_config(args: argparse.Namespace) -> Union[configparser.ConfigParser, None]: + """Read lock group specific settings from config file""" + config = configparser.ConfigParser() + with open(args.config, encoding="utf-8") as config_fileobj: + config.read_file(config_fileobj) + + if args.lock_group not in config: + print(f"missing required config section for lock group '{args.lock_group}'") + return None + + required_settings = { + "server", + "password", + } + + have_required_settings = True + for setting in required_settings: + if setting not in config[args.lock_group]: + print( + f"missing required setting '{setting}' in lock group '{args.lock_group}'" + ) + have_required_settings = False + + if not have_required_settings: + return None + + return config + + +def main() -> None: + """Starting point of the program""" + + # How long to wait per HTTP request to fleetlock service + default_request_timeout = 5 + + # How to long before giving up and exiting the tool with a failure + default_timeout = 60 + + default_config_file = "/etc/sunet-fleetlock/sunet-fleetlock.conf" + parser = argparse.ArgumentParser(description="Take and release fleetlock lock.") + parser.add_argument("--verbose", help="print more information", action="store_true") + parser.add_argument( + "--config", + help=f"the conf file to read (default: {default_config_file})", + default=default_config_file, + ) + parser.add_argument( + "--lock-group", required=True, help="the group to take a lock in" + ) + parser.add_argument( + "--lock-id", + help=f"the lock ID to use in the group (default: {platform.node()})", + default=platform.node(), + ) + parser.add_argument( + "--timeout", + type=int, + help=f"how many seconds before giving up and exiting tool (default: {default_timeout}s)", + default=default_timeout, + ) + parser.add_argument( + "--request_timeout", + type=int, + help=f"individal fleetlock HTTP request timeout (default: {default_request_timeout}s)", + default=default_request_timeout, + ) + action_group = parser.add_mutually_exclusive_group(required=True) + action_group.add_argument("--lock", action="store_true", help="lock a reboot slot") + action_group.add_argument( + "--unlock", action="store_true", help="unlock a reboot slot" + ) + args = parser.parse_args() + + config = read_config(args) + + if config is None: + sys.exit(1) + + # Give up if tool has been running for more than --timeout seconds: + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(args.timeout) + + if args.lock: + locked = False + + try: + locked = do_fleetlock_request(config, args, "lock") + except TimeoutException as exc: + print(exc) + + if locked: + sys.exit(0) + + if args.unlock: + unlocked = False + + try: + unlocked = do_fleetlock_request(config, args, "unlock") + except TimeoutException as exc: + print(exc) + + if unlocked: + sys.exit(0) + + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/global/overlay/usr/local/bin/sunet-machine-healthy b/global/overlay/usr/local/bin/sunet-machine-healthy new file mode 100755 index 00000000..b34664cb --- /dev/null +++ b/global/overlay/usr/local/bin/sunet-machine-healthy @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# pylint: disable=invalid-name +# pylint: enable=invalid-name + +""" Run any check tools in a directory to decide if the machine is considered +healthy, called by run-cosmos if fleetlock locking is configured """ + +import pathlib +import os +import os.path +import subprocess +import sys +import signal +import argparse + +from typing import List, Optional +from types import FrameType + + +class TimeoutException(Exception): + """Exception returned when checks takes too long""" + + +def timeout_handler(signum: int, frame: Optional[FrameType]) -> None: + """This is called if the tool takes too long to run""" + raise TimeoutException(f"{os.path.basename(sys.argv[0])} hit --timeout limit") + + +def find_checks(check_dir: str) -> List[pathlib.Path]: + """Find all executable .check files in the given directory""" + check_files = [] + + dirobj = pathlib.Path(check_dir) + + # iterdir() will raise error if the directory does not exist, and in this + # case we will just return an empty list + try: + for entry in dirobj.iterdir(): + if entry.is_file(): + if str(entry).endswith(".check") and os.access(entry, os.X_OK): + check_files.append(entry) + + # run checks in alphabetical order + check_files = sorted(check_files) + except FileNotFoundError: + pass + + return check_files + + +def run_checks(check_files: List[pathlib.Path]) -> bool: + """Run all checks""" + for check_file in check_files: + try: + subprocess.run([str(check_file)], check=True) + except subprocess.CalledProcessError as exc: + print(f"error: {exc}") + return False + + return True + + +def main() -> None: + """Starting point of the program""" + + default_timeout = 60 + default_health_check_dir = "/etc/sunet-machine-healthy/health-checks.d" + + parser = argparse.ArgumentParser( + description="Determine if machine is considered healthy." + ) + parser.add_argument("--verbose", help="print more information", action="store_true") + parser.add_argument( + "--health-check-dir", + help=f"directory to run checks from (default: {default_health_check_dir}", + default=default_health_check_dir, + ) + parser.add_argument( + "--timeout", + type=int, + help=f"seconds before giving up and exiting tool (default: {default_timeout}s)", + default=default_timeout, + ) + args = parser.parse_args() + + checks_ok = False + + # Give up if checks has been running for more than --timeout seconds: + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(args.timeout) + + check_files = find_checks(args.health_check_dir) + + checks_ok = run_checks(check_files) + + if checks_ok: + sys.exit(0) + + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/global/overlay/usr/local/libexec/cosmos-cron-wrapper b/global/overlay/usr/local/libexec/cosmos-cron-wrapper new file mode 100755 index 00000000..ae668108 --- /dev/null +++ b/global/overlay/usr/local/libexec/cosmos-cron-wrapper @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +test -f /etc/no-automatic-cosmos && exit 0 + +RUN_COSMOS='/usr/local/bin/run-cosmos' +SCRIPTHERDER_CMD='' + +if [ -x /usr/local/bin/scriptherder ]; then + SCRIPTHERDER_CMD='/usr/local/bin/scriptherder --mode wrap --syslog --name cosmos --' +fi + +exec ${SCRIPTHERDER_CMD} ${RUN_COSMOS} "$@" diff --git a/global/overlay/usr/local/sbin/cosmos_vm b/global/overlay/usr/local/sbin/cosmos_vm index bf275766..5eec8f71 100755 --- a/global/overlay/usr/local/sbin/cosmos_vm +++ b/global/overlay/usr/local/sbin/cosmos_vm @@ -9,8 +9,8 @@ hostname="default" bridge="br0" cpus="1" mem="1024" -repo="git://code.mnt.se/mnt-cosmos.git" -tag="eduid-cosmos" +repo="https://yourhost/myproj-cosmos.git" +tag="cosmos-ops" ip="" gateway="" netmask="" @@ -63,7 +63,7 @@ ssh_authorized_keys: runcmd: - ["mkdir","/tmp/seed"] - ["mount","/dev/vdb","/tmp/seed"] - - ["cp","/tmp/seed/bootstrap-cosmos.sh","/tmp/seed/cosmos_1.2-2_all.deb","/root"] + - ["cp","/tmp/seed/bootstrap-cosmos.sh","/tmp/seed/cosmos_1.5-1_all.deb","/root"] - ["cd","/root"] - "cd /root && /root/bootstrap-cosmos.sh ${hostname} ${repo} ${tag}" @@ -112,7 +112,7 @@ fi mcopy -i ${seed} ${user_data} ::user-data 2>/dev/null mcopy -i ${seed} ${meta_data} ::meta-data 2>/dev/null -mcopy -i ${seed} /etc/cosmos/apt/bootstrap-cosmos.sh /etc/cosmos/apt/cosmos_1.2-2_all.deb :: +mcopy -i ${seed} /etc/cosmos/apt/bootstrap-cosmos.sh /etc/cosmos/apt/cosmos_1.5-1_all.deb :: mv ${seed} /var/lib/libvirt/images/ virsh pool-refresh default diff --git a/global/post-tasks.d/010cosmos-modules b/global/post-tasks.d/010cosmos-modules new file mode 100755 index 00000000..092815a2 --- /dev/null +++ b/global/post-tasks.d/010cosmos-modules @@ -0,0 +1,39 @@ +#!/bin/sh +# +# Dynamically configure /etc/puppet/cosmos-modules.conf +# +# The content of that file is chosen according to: +# +# 1. If the file is actually present in the model, use that. +# 2. If there is a script called /etc/puppet/setup_cosmos_modules, run that. +# 3. If the file still doesn't exist, create it with the defaults in this script. +# + +set -e + +if [ -f "${COSMOS_MODEL}/overlay/etc/puppet/cosmos-modules.conf" ]; then + test "$COSMOS_VERBOSE" = "y" && \ + echo "$0: /etc/puppet/cosmos-modules.conf is present in the model, exiting" + exit 0 +fi + +if [ -x /etc/puppet/setup_cosmos_modules ]; then + test "$COSMOS_VERBOSE" = "y" && \ + echo "$0: Updating /etc/puppet/cosmos-modules.conf with /etc/puppet/setup_cosmos_modules" + /etc/puppet/setup_cosmos_modules + + test -f /etc/puppet/cosmos-modules.conf && exit 0 +fi + +test "$COSMOS_VERBOSE" = "y" && \ + echo "$0: Creating/updating /etc/puppet/cosmos-modules.conf with defaults from this script" + +cat > /etc/puppet/cosmos-modules.conf << EOF +# File created/updated by $0 +# +concat puppetlabs/concat yes +stdlib puppetlabs/stdlib yes +#ufw attachmentgenie/ufw yes +#apt puppetlabs/apt yes +#cosmos https://github.com/SUNET/puppet-cosmos.git yes +EOF diff --git a/global/post-tasks.d/015cosmos-trust b/global/post-tasks.d/015cosmos-trust index 1f3e7484..85649e88 100755 --- a/global/post-tasks.d/015cosmos-trust +++ b/global/post-tasks.d/015cosmos-trust @@ -1,5 +1,12 @@ #!/bin/bash +gnupg_show_options='--import --import-options show-only,import-minimal' +if [[ $(lsb_release -sr | awk -F . '{ print $1 }') -le 16 ]]; then + # gpg on Ubuntu 16 and less is gnupg < 2, which doesn't have --import-options show-only + # but on the other hand defaults to this mode (https://dev.gnupg.org/T2943) + gnupg_show_options='--dry-run' +fi + if [ -z "$COSMOS_KEYS" ]; then COSMOS_KEYS=/etc/cosmos/keys fi @@ -32,7 +39,9 @@ for k in $COSMOS_KEYS/*.pub; do # Silently ignore empty files continue fi - pubkeys_in_file=$(cosmos gpg --with-colons --with-fingerprint < $k 2>&1 | grep "^pub:") + pubkeys_in_file=$(cosmos gpg ${gnupg_show_options} \ + --with-colons --with-fingerprint --quiet < $k \ + | grep "^pub:") non_expired_pubkeys_in_file=$(echo ${pubkeys_in_file} | awk -F: '$2 != "e" { print $0 }') if [[ ! $non_expired_pubkeys_in_file ]]; then echo -e "$0: ${red}Ignoring file with expired pubkey: ${k}${reset}" @@ -53,6 +62,12 @@ for k in $COSMOS_KEYS/*.pub; do fi done +if [[ ! ${#SEEN[@]} ]]; then + echo "$0: ${red}NO trusted keys found in directory ${COSMOS_KEYS} - aborting${reset}" + echo "(this is probably a syntax problem with the gpg commands in this script)" + exit 1 +fi + # Delete keys no longer present (or expired) in $COSMOS_KEYS directory for fp in ${!KEYRING[@]}; do if [[ ! ${SEEN[$fp]} ]]; then diff --git a/global/post-tasks.d/018packages b/global/post-tasks.d/018packages index bd8d9eea..39569b28 100755 --- a/global/post-tasks.d/018packages +++ b/global/post-tasks.d/018packages @@ -6,6 +6,7 @@ CACHE_DIR=/var/cache/puppet-modules MODULES_DIR=${MODULES_DIR:=/etc/puppet/cosmos-modules} export GNUPGHOME=/etc/cosmos/gnupg +# /etc/puppet/cosmos_enc.py needs the YAML module python3 -c "import yaml" 2>/dev/null || apt-get -y install python3-yaml bold='\e[1m' @@ -41,7 +42,10 @@ if [ -f $CONFIG -o $LOCALCONFIG ]; then if [ "$src" != "$(git config remote.origin.url)" ]; then git config remote.origin.url $src fi - git pull -q + # Support master branch being renamed to main + git branch --all | grep -q '^[[:space:]]*remotes/origin/main$' && git checkout main + # Update repo and clean out any local inconsistencies + git pull -q || (git fetch && git reset --hard) else continue fi diff --git a/global/post-tasks.d/020reports b/global/post-tasks.d/020reports index 380f31a7..c6033bb5 100755 --- a/global/post-tasks.d/020reports +++ b/global/post-tasks.d/020reports @@ -2,3 +2,4 @@ #rm -f /var/run/facts.json #facter -p -y > /var/run/facts.yaml +rm -f /var/run/facts.yaml diff --git a/global/post-tasks.d/030puppet b/global/post-tasks.d/030puppet index af450057..561ebc4b 100755 --- a/global/post-tasks.d/030puppet +++ b/global/post-tasks.d/030puppet @@ -1,13 +1,15 @@ #!/bin/sh +set -e + if [ "x$COSMOS_VERBOSE" = "xy" ]; then args="--verbose --show_diff" else args="--logdest=syslog" fi -if [ -f /usr/bin/puppet -a -d /etc/puppet/manifests ]; then - for m in `find /etc/puppet/manifests -name \*.pp`; do +if [ -f /usr/bin/puppet ] && [ -d /etc/puppet/manifests ]; then + find /etc/puppet/manifests -name \*.pp | while read -r m; do test "x$COSMOS_VERBOSE" = "xy" && echo "$0: Applying Puppet manifest $m" puppet apply $args $m done diff --git a/global/post-tasks.d/099autoremove b/global/post-tasks.d/099autoremove index 9911ae2f..c3c809ca 100755 --- a/global/post-tasks.d/099autoremove +++ b/global/post-tasks.d/099autoremove @@ -1,5 +1,7 @@ #!/bin/bash +export DEBIAN_FRONTEND='noninteractive' + if (( $RANDOM % 20 == 0)); then apt-get -qq update apt-get -qq -y autoremove diff --git a/global/post-tasks.d/999reboot b/global/post-tasks.d/999reboot index bc27e6ef..e0a05e1d 100755 --- a/global/post-tasks.d/999reboot +++ b/global/post-tasks.d/999reboot @@ -4,7 +4,7 @@ if [[ -f /var/run/reboot-required && -f /etc/cosmos-automatic-reboot ]]; then if [[ $HOSTNAME =~ -tug- ]]; then # Reboot hosts in site TUG with 15 seconds delay (enough to manually - # cancel the reboot if logged in and seeind the 'emerg' message broadcasted to console) + # cancel the reboot if logged in and seeing the 'emerg' message broadcasted to console) sleep=15 elif [[ $HOSTNAME =~ -fre- ]]; then # reboot hosts in site FRE with 15+180 to 15+180+180 seconds delay diff --git a/global/pre-tasks.d/015set-overlay-permissions b/global/pre-tasks.d/015set-overlay-permissions new file mode 100755 index 00000000..37f98441 --- /dev/null +++ b/global/pre-tasks.d/015set-overlay-permissions @@ -0,0 +1,23 @@ +#!/bin/sh +# +# Set overlay file permissions in model directory before apply.d/60overlay +# rsyncs it to / +# + +set -e +self=$(basename "$0") + +MODEL_OVERLAY="$COSMOS_MODEL/overlay" + +if ! test -d "$MODEL_OVERLAY"; then + test -z "$COSMOS_VERBOSE" || echo "$self: overlay is a no-op" + exit 0 +fi + +if [ -d "$MODEL_OVERLAY/root" ]; then + args="" + if [ "x$COSMOS_VERBOSE" = "xy" ]; then + args="-v" + fi + chmod ${args} 0700 "$MODEL_OVERLAY"/root +fi diff --git a/global/pre-tasks.d/030puppet b/global/pre-tasks.d/030puppet index 709d83c9..c431f0a3 100755 --- a/global/pre-tasks.d/030puppet +++ b/global/pre-tasks.d/030puppet @@ -7,10 +7,25 @@ set -e stamp="$COSMOS_BASE/stamps/puppet-tools-v01.stamp" -if ! test -f $stamp -a -f /usr/bin/puppet; then +if ! test -f "${stamp}" -a -f /usr/bin/puppet; then apt-get update - apt-get -y install puppet-common || apt-get -y install puppet + apt-get -y install puppet + . /etc/os-release - mkdir -p `dirname $stamp` - touch $stamp + # Note: in posix shell, string comparison is done with a single = + if [ "${ID}" = "debian" ] && [ "${VERSION_ID}" -ge 12 ]; then + apt-get -y install \ + cron \ + puppet-module-camptocamp-augeas \ + puppet-module-puppetlabs-apt \ + puppet-module-puppetlabs-concat \ + puppet-module-puppetlabs-cron-core \ + puppet-module-puppetlabs-stdlib \ + puppet-module-puppetlabs-vcsrepo + + fi + + mkdir -p "$(dirname "${stamp}")" + touch "${stamp}" fi + diff --git a/global/pre-tasks.d/040hiera-eyaml b/global/pre-tasks.d/040hiera-eyaml new file mode 100755 index 00000000..96564138 --- /dev/null +++ b/global/pre-tasks.d/040hiera-eyaml @@ -0,0 +1,39 @@ +#!/bin/sh +# +# Set up eyaml for Hiera +# + +set -e + +EYAMLDIR=/etc/hiera/eyaml + +vendor=$(lsb_release -is) +version=$(lsb_release -rs) +# eyaml is only used on Ubuntu 20.04 and newer, and Debian 11 and newer (earlier OSes use hiera-gpg instead) +test "${vendor}" = "Ubuntu" && dpkg --compare-versions "${version}" "lt" "18.04" && exit 0 +test "${vendor}" = "Debian" && dpkg --compare-versions "${version}" "lt" "10" && exit 0 + +stamp="$COSMOS_BASE/stamps/hiera-eyaml-v01.stamp" + +test -f "$stamp" && exit 0 + +if [ ! -f /usr/bin/eyaml ] || [ ! -d /usr/share/doc/yaml-mode ]; then + apt-get update + # If we don't install emacs before yaml-mode the default emacs package + # will be emacs-gtk which brings x11 with friends which we don't need. + apt-get -y install emacs-nox + apt-get -y install hiera-eyaml yaml-mode +fi + +if [ ! -f ${EYAMLDIR}/public_certkey.pkcs7.pem ] || [ ! -f ${EYAMLDIR}/private_key.pkcs7.pem ]; then + # hiera-eyaml wants a certificate and public key, not just a public key oddly enough + echo "$0: Generating eyaml key in ${EYAMLDIR} - this might take a while..." + mkdir -p /etc/hiera/eyaml + openssl req -x509 -newkey rsa:4096 -keyout ${EYAMLDIR}/private_key.pkcs7.pem \ + -out ${EYAMLDIR}/public_certkey.pkcs7.pem -days 3653 -nodes -sha256 \ + -subj "/C=SE/O=SUNET/OU=EYAML/CN=$(hostname)" + rm -f ${EYAMLDIR}/public_key.pkcs7.pem # cleanup +fi + +mkdir -p "$(dirname "${stamp}")" +touch "$stamp" diff --git a/global/pre-tasks.d/040hiera-gpg b/global/pre-tasks.d/040hiera-gpg index 6f6be9d8..bc1da35f 100755 --- a/global/pre-tasks.d/040hiera-gpg +++ b/global/pre-tasks.d/040hiera-gpg @@ -6,34 +6,18 @@ set -e -EYAMLDIR=/etc/hiera/eyaml GNUPGHOME=/etc/hiera/gpg export GNUPGHOME +vendor=$(lsb_release -is) +version=$(lsb_release -rs) +# If the OS is Ubuntu 18.04 or newer, or Debian 10 or newer, we don't need to do anything (those use eyaml instead) +test "${vendor}" = "Ubuntu" && dpkg --compare-versions "${version}" "ge" "18.04" && exit 0 +test "${vendor}" = "Debian" && dpkg --compare-versions "${version}" "ge" "10" && exit 0 -# There is no hiera-eyaml on Ubuntu < 16.04 -if [ "x`lsb_release -r | awk '{print $NF}'`" != "x12.04" -a "x`lsb_release -r | awk '{print $NF}'`" != "x14.04" ]; then - if [ ! -f /usr/bin/eyaml ]; then - apt-get update - apt-get -y install hiera-eyaml - fi -fi +stamp="$COSMOS_BASE/stamps/hiera-gpg-v01.stamp" -if [ -f /usr/bin/eyaml ]; then - # Create eyaml keypair if eyaml is installed but there are no keys - if [ ! -f ${EYAMLDIR}/public_certkey.pkcs7.pem -o ! -f ${EYAMLDIR}/private_key.pkcs7.pem ]; then - # hiera-eyaml wants a certificate and public key, not just a public key oddly enough - echo "$0: Generating eyaml key in ${EYAMLDIR} - this might take a while..." - mkdir -p ${EYAMLDIR} - openssl req -x509 -newkey rsa:4096 -keyout ${EYAMLDIR}/private_key.pkcs7.pem \ - -out ${EYAMLDIR}/public_certkey.pkcs7.pem -days 3653 -nodes -sha256 \ - -subj "/C=SE/O=SUNET/OU=EYAML/CN=`hostname`" - rm -f ${EYAMLDIR}/public_key.pkcs7.pem # cleanup - fi -fi - - -# Old stuff below this point +test -f "$stamp" && exit 0 if [ ! -f /usr/lib/ruby/vendor_ruby/gpgme.rb ]; then apt-get update @@ -60,19 +44,21 @@ if [ ! -s $GNUPGHOME/secring.gpg ]; then chmod 700 $GNUPGHOME TMPFILE=$(mktemp /tmp/hiera-gpg.XXXXXX) - cat > $TMPFILE < "$TMPFILE" </etc/cosmos/update.d/50update-while-testing << EOF +#!/bin/sh + +rsync -a --delete --exclude .git /multiverse/ /var/cache/cosmos/repo +EOF +chmod 755 /etc/cosmos/update.d/50update-while-testing + +sed -i -e 's!^#COSMOS_REPO_MODELS=.*!COSMOS_REPO_MODELS="\$COSMOS_REPO/global/"!' /etc/cosmos/cosmos.conf + +export DEBIAN_FRONTEND=noninteractive + +echo "" +echo "***" +echo "" +echo "$0: Configured docker container for testing of files in /multiverse." +echo "" +echo "You should now be able to do" +echo "" +echo " cosmos -v update" +echo " cosmos -v apply" +echo "" + +exec bash -l