diff --git a/edit-secrets b/edit-secrets index a2c67ac3..68d3630a 100755 --- a/edit-secrets +++ b/edit-secrets @@ -67,6 +67,39 @@ function patch_broken_eyaml { next if @@plugins.include? spec dependency = spec.dependencies.find { |d| d.name == "hiera-eyaml" } +EOF + fi + fi + fi + + # + # Ubuntu 24.04 (noble) has a hiera-eyaml version that is incompatible with ruby 3.2+ (default in ubuntu24). + # This is fixed in hiera-eyaml version 3.3.0: https://github.com/voxpupuli/hiera-eyaml/pull/340/files + # https://github.com/voxpupuli/hiera-eyaml/blob/master/CHANGELOG.md + # But there is no modern version of hiera-eyaml packaged in debian or ubuntu. + # https://github.com/puppetlabs/puppet/wiki/Puppet-8-Compatibility#filedirexists-removed + # + + . /etc/os-release + if [ "${VERSION_CODENAME}" == "noble" ]; then + plugins_file="/usr/share/rubygems-integration/all/gems/hiera-eyaml-3.3.0/lib/hiera/backend/eyaml/subcommands/edit.rb" + if [ -f $plugins_file ]; then + # We only want to try patching the file if it is the known broken version + bad_sum="59c6eb910ab2eb44f8c75aeaa79bff097038feb673b5c6bdccde23d9b2a393e2" + sum=$(sha256sum $plugins_file | awk '{print $1}') + if [ "$sum" == "$bad_sum" ]; then + patch --fuzz=0 --directory=/ --strip=0 <<'EOF' +--- /usr/share/rubygems-integration/all/gems/hiera-eyaml-3.3.0/lib/hiera/backend/eyaml/subcommands/edit.rb.orig 2022-06-11 16:30:10.000000000 +0000 ++++ /usr/share/rubygems-integration/all/gems/hiera-eyaml-3.3.0/lib/hiera/backend/eyaml/subcommands/edit.rb 2024-09-09 14:13:19.306342025 +0000 +@@ -59,7 +59,7 @@ + Optimist::die "You must specify an eyaml file" if ARGV.empty? + options[:source] = :eyaml + options[:eyaml] = ARGV.shift +- if File.exists? options[:eyaml] ++ if File.exist? options[:eyaml] + begin + options[:input_data] = File.read options[:eyaml] + rescue EOF fi fi diff --git a/global/overlay/etc/puppet/cosmos_enc.py b/global/overlay/etc/puppet/cosmos_enc.py index dca12d33..7769a9aa 100755 --- a/global/overlay/etc/puppet/cosmos_enc.py +++ b/global/overlay/etc/puppet/cosmos_enc.py @@ -26,7 +26,8 @@ found = False classes = dict() for reg, cls in rules.items(): if re.search(reg, node_name): - classes.update(cls) + if cls: + classes.update(cls) found = True if not found: diff --git a/global/overlay/etc/puppet/hiera.yaml b/global/overlay/etc/puppet/hiera.yaml index 3de986b9..a301376f 100644 --- a/global/overlay/etc/puppet/hiera.yaml +++ b/global/overlay/etc/puppet/hiera.yaml @@ -21,7 +21,7 @@ hierarchy: pkcs7_public_key: /etc/hiera/eyaml/public_certkey.pkcs7.pem - name: "Overrides per distribution" - path: "dist_%{::lsbdistcodename}_override.yaml" + path: "dist_%{facts.os.distro.codename}_override.yaml" - name: "Data common to whole environment" path: "common.yaml" \ No newline at end of file diff --git a/global/overlay/etc/systemd/system/run-cosmos-fleetlock-unlocker.service b/global/overlay/etc/systemd/system/run-cosmos-fleetlock-unlocker.service new file mode 100644 index 00000000..7507ff79 --- /dev/null +++ b/global/overlay/etc/systemd/system/run-cosmos-fleetlock-unlocker.service @@ -0,0 +1,11 @@ +[Unit] +Description=run-cosmos fleetlock unlocker +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/run-cosmos fleetlock-unlock + +[Install] +WantedBy=multi-user.target diff --git a/global/overlay/usr/local/bin/run-cosmos b/global/overlay/usr/local/bin/run-cosmos index 7da725e6..268fd6a3 100755 --- a/global/overlay/usr/local/bin/run-cosmos +++ b/global/overlay/usr/local/bin/run-cosmos @@ -9,6 +9,7 @@ readonly LOCK_FD=200 readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock +readonly FLEETLOCK_UNLOCK_SERVICE=run-cosmos-fleetlock-unlocker.service readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable @@ -33,60 +34,119 @@ eexit() { exit 1 } +oexit() { + local info_str="$*" + + echo "$info_str" + exit 0 +} + +fleetlock_enable_unlock_service() { + # In case e.g. the unit file has been removed "FragmentPath" will still + # return the old filename until daemon-reload is called, so do that here + # before we try checking for the FragmentPath. + need_reload=$(systemctl show --property NeedDaemonReload $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}') + if [ "$need_reload" = "yes" ]; then + systemctl daemon-reload + fi + + unit_file=$(systemctl show --property FragmentPath $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}') + if [ -z "$unit_file" ]; then + # No unit file matching the service name, do nothing + return 0 + fi + + # Enable the service if needed + systemctl is-enabled --quiet $FLEETLOCK_UNLOCK_SERVICE || systemctl enable --quiet $FLEETLOCK_UNLOCK_SERVICE +} + fleetlock_lock() { - if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then - local fleetlock_group="" - # shellcheck source=/dev/null - . $FLEETLOCK_CONFIG || return 1 - if [ -z "$fleetlock_group" ]; then - echo "Unable to set fleetlock_group" - return 1 - fi - echo "Getting fleetlock lock" - $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1 - fi - return 0 + if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then + # Make sure the unlock service is enabled before we take a lock if + # cosmos ends up rebooting the machine before fleetlock_unlock() is + # called. + fleetlock_enable_unlock_service || return 1 + local fleetlock_group="" + local optional_args=() + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -z "$fleetlock_group" ]; then + echo "Unable to set fleetlock_group" + return 1 + fi + if [ -n "$fleetlock_lock_timeout" ]; then + optional_args+=("--timeout") + optional_args+=("$fleetlock_lock_timeout") + fi + echo "Getting fleetlock lock" + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock "${optional_args[@]}" || return 1 + fi + return 0 } fleetlock_unlock() { - if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then - local fleetlock_group="" - # shellcheck source=/dev/null - . $FLEETLOCK_CONFIG || return 1 - if [ -z "$fleetlock_group" ]; then - echo "Unable to set fleetlock_group" - return 1 - fi - machine_is_healthy || return 1 - echo "Releasing fleetlock lock" - $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1 - fi - return 0 + if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then + local fleetlock_group="" + local optional_args=() + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -z "$fleetlock_group" ]; then + echo "Unable to set fleetlock_group" + return 1 + fi + if [ -n "$fleetlock_unlock_timeout" ]; then + optional_args+=("--timeout") + optional_args+=("$fleetlock_unlock_timeout") + fi + machine_is_healthy || return 1 + echo "Releasing fleetlock lock" + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock "${optional_args[@]}" || return 1 + fi + return 0 } machine_is_healthy() { - if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then - echo "Running any health checks" - $HEALTHCHECK_TOOL || return 1 - fi - return 0 + if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then + local fleetlock_healthcheck_timeout="" + local optional_args=() + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -n "$fleetlock_healthcheck_timeout" ]; then + optional_args+=("--timeout") + optional_args+=("$fleetlock_healthcheck_timeout") + fi + echo "Running any health checks" + $HEALTHCHECK_TOOL "${optional_args[@]}" || return 1 + fi + return 0 } main () { - lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time." - fleetlock_lock || eexit "Unable to acquire fleetlock lock." - cosmos "$@" update - cosmos "$@" apply - fleetlock_unlock || eexit "Unable to release fleetlock lock." + lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time." + fleetlock_lock || eexit "Unable to acquire fleetlock lock." + cosmos "$@" update + cosmos "$@" apply + fleetlock_unlock || eexit "Unable to release fleetlock lock." - touch /var/run/last-cosmos-ok.stamp + touch /var/run/last-cosmos-ok.stamp - find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f + find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f + + if [ -f /cosmos-reboot ]; then + rm -f /cosmos-reboot + reboot + fi } -main "$@" - -if [ -f /cosmos-reboot ]; then - rm -f /cosmos-reboot - reboot -fi +# Most of the time we just pass on any arguments to the underlying cosmos +# tools, if adding special cases here make sure to not shadow any arguments +# (like "-v") which users expect to be passed on to cosmos. +case "$1" in + "fleetlock-unlock") + lock "$PROGNAME" || oexit "$PROGNAME appears locked by a running run-cosmos, let it handle unlocking instead." + fleetlock_unlock || eexit "Unable to release fleetlock lock." + ;; + *) + main "$@" + ;; +esac diff --git a/global/overlay/usr/local/bin/sunet-fleetlock b/global/overlay/usr/local/bin/sunet-fleetlock index e2ee6d98..64effec0 100755 --- a/global/overlay/usr/local/bin/sunet-fleetlock +++ b/global/overlay/usr/local/bin/sunet-fleetlock @@ -24,20 +24,21 @@ # When modifying this code please make sure it is passed through the following # tools: # === +# isort # black # pylint # mypy --strict # === -import platform -import sys -import signal -import time import argparse import configparser import os.path -from typing import Optional, Union +import platform +import signal +import sys +import time from types import FrameType +from typing import Optional, Union import requests @@ -80,19 +81,29 @@ def do_fleetlock_request( request_id_key = "request-id" request_id = None + retry_sleep_delay = 1 + # Loop forever: we depend on the SIGALRM timout to raise an error if it # takes too long while True: if args.verbose: print(f"{operation} POST at url {url}") - resp = requests.post( - url, - headers=fleetlock_headers, - json=fleetlock_data, - timeout=args.timeout, - auth=("", config[args.lock_group]["password"]), - ) + try: + resp = requests.post( + url, + headers=fleetlock_headers, + json=fleetlock_data, + timeout=args.request_timeout, + auth=("", config[args.lock_group]["password"]), + ) + except ( + requests.exceptions.ConnectionError, + requests.exceptions.ReadTimeout, + ) as e: + print(f"POST request failed: {e}") + time.sleep(retry_sleep_delay) + continue if request_id_key in resp.headers: request_id = resp.headers[request_id_key] @@ -126,7 +137,7 @@ def do_fleetlock_request( + f"({request_id_key}: {request_id})" ) - time.sleep(1) + time.sleep(retry_sleep_delay) def read_config(args: argparse.Namespace) -> Union[configparser.ConfigParser, None]: diff --git a/global/post-tasks.d/010fix-ssh-perms b/global/post-tasks.d/010fix-ssh-perms index 87636d79..f32a6a44 100755 --- a/global/post-tasks.d/010fix-ssh-perms +++ b/global/post-tasks.d/010fix-ssh-perms @@ -17,7 +17,7 @@ if test -f /root/.ssh/authorized_keys; then if test `stat -t /root/.ssh/authorized_keys | cut -d\ -f5` != 0; then chown root.root /root/.ssh/authorized_keys fi - if test `stat --printf=%a /root/.ssh/authorized_keys` != 600; then - chmod 600 /root/.ssh/authorized_keys + if test `stat --printf=%a /root/.ssh/authorized_keys` != 440; then + chmod 440 /root/.ssh/authorized_keys fi fi diff --git a/global/post-tasks.d/014set-cosmos-permissions b/global/post-tasks.d/014set-cosmos-permissions new file mode 100755 index 00000000..08992b5f --- /dev/null +++ b/global/post-tasks.d/014set-cosmos-permissions @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Set Cosmos directory permissions so that +# the files cannot be read by anyone but root, +# since it's possible that the directory +# can contain files that after applying the +# overlay to / only should be read or writable +# by root. + +set -e +self=$(basename "$0") + +if ! test -d "$COSMOS_BASE"; then + test -z "$COSMOS_VERBOSE" || echo "$self: COSMOS_BASE was not found. Aborting change of permissions." + exit 0 +fi + +args="" +if [ "x$COSMOS_VERBOSE" = "xy" ]; then + args="-v" +fi + +chown ${args} root:root "$COSMOS_BASE" +chmod ${args} 750 "$COSMOS_BASE" diff --git a/global/pre-tasks.d/014set-cosmos-permissions b/global/pre-tasks.d/014set-cosmos-permissions new file mode 100755 index 00000000..08992b5f --- /dev/null +++ b/global/pre-tasks.d/014set-cosmos-permissions @@ -0,0 +1,24 @@ +#!/bin/sh +# +# Set Cosmos directory permissions so that +# the files cannot be read by anyone but root, +# since it's possible that the directory +# can contain files that after applying the +# overlay to / only should be read or writable +# by root. + +set -e +self=$(basename "$0") + +if ! test -d "$COSMOS_BASE"; then + test -z "$COSMOS_VERBOSE" || echo "$self: COSMOS_BASE was not found. Aborting change of permissions." + exit 0 +fi + +args="" +if [ "x$COSMOS_VERBOSE" = "xy" ]; then + args="-v" +fi + +chown ${args} root:root "$COSMOS_BASE" +chmod ${args} 750 "$COSMOS_BASE" diff --git a/global/pre-tasks.d/015set-overlay-permissions b/global/pre-tasks.d/015set-overlay-permissions index 37f98441..205180b9 100755 --- a/global/pre-tasks.d/015set-overlay-permissions +++ b/global/pre-tasks.d/015set-overlay-permissions @@ -14,10 +14,17 @@ if ! test -d "$MODEL_OVERLAY"; then exit 0 fi +args="" +if [ "x$COSMOS_VERBOSE" = "xy" ]; then + args="-v" +fi + if [ -d "$MODEL_OVERLAY/root" ]; then - args="" - if [ "x$COSMOS_VERBOSE" = "xy" ]; then - args="-v" - fi + chown ${args} root:root "$MODEL_OVERLAY"/root chmod ${args} 0700 "$MODEL_OVERLAY"/root fi + +if [ -d "$MODEL_OVERLAY/root/.ssh" ]; then + chown ${args} -R root:root "$MODEL_OVERLAY"/root/.ssh + chmod ${args} 0700 "$MODEL_OVERLAY"/root/.ssh +fi