diff --git a/global/overlay/etc/systemd/system/run-cosmos-fleetlock-unlocker.service b/global/overlay/etc/systemd/system/run-cosmos-fleetlock-unlocker.service new file mode 100644 index 0000000..7507ff7 --- /dev/null +++ b/global/overlay/etc/systemd/system/run-cosmos-fleetlock-unlocker.service @@ -0,0 +1,11 @@ +[Unit] +Description=run-cosmos fleetlock unlocker +After=network-online.target +Wants=network-online.target + +[Service] +Type=oneshot +ExecStart=/usr/local/bin/run-cosmos fleetlock-unlock + +[Install] +WantedBy=multi-user.target diff --git a/global/overlay/usr/local/bin/run-cosmos b/global/overlay/usr/local/bin/run-cosmos index 7da725e..7274b62 100755 --- a/global/overlay/usr/local/bin/run-cosmos +++ b/global/overlay/usr/local/bin/run-cosmos @@ -9,6 +9,7 @@ readonly LOCK_FD=200 readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock +readonly FLEETLOCK_UNLOCK_SERVICE=run-cosmos-fleetlock-unlocker.service readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable @@ -33,60 +34,109 @@ eexit() { exit 1 } +oexit() { + local info_str="$*" + + echo "$info_str" + exit 0 +} + +fleetlock_enable_unlock_service() { + # In case e.g. the unit file has been removed "FragmentPath" will still + # return the old filename until daemon-reload is called, so do that here + # before we try checking for the FragmentPath. + need_reload=$(systemctl show --property NeedDaemonReload $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}') + if [ "$need_reload" = "yes" ]; then + systemctl daemon-reload + fi + + unit_file=$(systemctl show --property FragmentPath $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}') + if [ -z "$unit_file" ]; then + # No unit file matching the service name, do nothing + return 0 + fi + + # Enable the service if needed + systemctl is-enabled --quiet $FLEETLOCK_UNLOCK_SERVICE || systemctl enable --quiet $FLEETLOCK_UNLOCK_SERVICE +} + fleetlock_lock() { - if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then - local fleetlock_group="" - # shellcheck source=/dev/null - . $FLEETLOCK_CONFIG || return 1 - if [ -z "$fleetlock_group" ]; then - echo "Unable to set fleetlock_group" - return 1 - fi - echo "Getting fleetlock lock" - $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1 - fi - return 0 + if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then + # Make sure the unlock service is enabled before we take a lock if + # cosmos ends up rebooting the machine before fleetlock_unlock() is + # called. + fleetlock_enable_unlock_service || return 1 + local fleetlock_group="" + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -z "$fleetlock_group" ]; then + echo "Unable to set fleetlock_group" + return 1 + fi + echo "Getting fleetlock lock" + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1 + fi + return 0 } fleetlock_unlock() { - if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then - local fleetlock_group="" - # shellcheck source=/dev/null - . $FLEETLOCK_CONFIG || return 1 - if [ -z "$fleetlock_group" ]; then - echo "Unable to set fleetlock_group" - return 1 - fi - machine_is_healthy || return 1 - echo "Releasing fleetlock lock" - $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1 - fi - return 0 + if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then + local fleetlock_group="" + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -z "$fleetlock_group" ]; then + echo "Unable to set fleetlock_group" + return 1 + fi + machine_is_healthy || return 1 + echo "Releasing fleetlock lock" + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1 + fi + return 0 } machine_is_healthy() { - if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then - echo "Running any health checks" - $HEALTHCHECK_TOOL || return 1 - fi - return 0 + if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then + local fleetlock_healthcheck_timeout="" + local optional_args=() + # shellcheck source=/dev/null + . $FLEETLOCK_CONFIG || return 1 + if [ -n "$fleetlock_healthcheck_timeout" ]; then + optional_args+=("--timeout") + optional_args+=("$fleetlock_healthcheck_timeout") + fi + echo "Running any health checks" + $HEALTHCHECK_TOOL "${optional_args[@]}" || return 1 + fi + return 0 } main () { - lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time." - fleetlock_lock || eexit "Unable to acquire fleetlock lock." - cosmos "$@" update - cosmos "$@" apply - fleetlock_unlock || eexit "Unable to release fleetlock lock." + lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time." + fleetlock_lock || eexit "Unable to acquire fleetlock lock." + cosmos "$@" update + cosmos "$@" apply + fleetlock_unlock || eexit "Unable to release fleetlock lock." - touch /var/run/last-cosmos-ok.stamp + touch /var/run/last-cosmos-ok.stamp - find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f + find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f + + if [ -f /cosmos-reboot ]; then + rm -f /cosmos-reboot + reboot + fi } -main "$@" - -if [ -f /cosmos-reboot ]; then - rm -f /cosmos-reboot - reboot -fi +# Most of the time we just pass on any arguments to the underlying cosmos +# tools, if adding special cases here make sure to not shadow any arguments +# (like "-v") which users expect to be passed on to cosmos. +case "$1" in + "fleetlock-unlock") + lock "$PROGNAME" || oexit "$PROGNAME appears locked by a running run-cosmos, let it handle unlocking instead." + fleetlock_unlock || eexit "Unable to release fleetlock lock." + ;; + *) + main "$@" + ;; +esac