run-cosmos: support fleetlock unlocking at boot

This extends run-cosmos with a new argument that calls the unlock
function already included in the script as well as using the already
existing lock() function to make sure there is no race between the
bootup process and cron starting a normal run-cosmos process at the same
time.

The oexit() function is added to support exiting with a OK exit value
the same way eexit() is used to signal something is wrong.

This change also adds the systemd unit file that runs run-cosmos with the
new fleetlock-unlock argument at boot if fleetlock is configured.

While here fix indentation that was mixed between 3 and 4 spaces: it is
now 4 spaces everywhere.
This commit is contained in:
Patrik Lundin 2024-01-24 14:58:21 +01:00
parent cacb97a22c
commit 4b93d9c426
Signed by untrusted user: patlu
GPG key ID: A0A812BA2249F294
2 changed files with 104 additions and 43 deletions

View file

@ -0,0 +1,11 @@
[Unit]
Description=run-cosmos fleetlock unlocker
After=network-online.target
Wants=network-online.target
[Service]
Type=oneshot
ExecStart=/usr/local/bin/run-cosmos fleetlock-unlock
[Install]
WantedBy=multi-user.target

View file

@ -9,6 +9,7 @@ readonly LOCK_FD=200
readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf
readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable
readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock
readonly FLEETLOCK_UNLOCK_SERVICE=run-cosmos-fleetlock-unlocker.service
readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy
readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable
@ -33,8 +34,38 @@ eexit() {
exit 1 exit 1
} }
oexit() {
local info_str="$*"
echo "$info_str"
exit 0
}
fleetlock_enable_unlock_service() {
# In case e.g. the unit file has been removed "FragmentPath" will still
# return the old filename until daemon-reload is called, so do that here
# before we try checking for the FragmentPath.
need_reload=$(systemctl show --property NeedDaemonReload $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}')
if [ "$need_reload" = "yes" ]; then
systemctl daemon-reload
fi
unit_file=$(systemctl show --property FragmentPath $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}')
if [ -z "$unit_file" ]; then
# No unit file matching the service name, do nothing
return 0
fi
# Enable the service if needed
systemctl is-enabled --quiet $FLEETLOCK_UNLOCK_SERVICE || systemctl enable --quiet $FLEETLOCK_UNLOCK_SERVICE
}
fleetlock_lock() { fleetlock_lock() {
if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then
# Make sure the unlock service is enabled before we take a lock if
# cosmos ends up rebooting the machine before fleetlock_unlock() is
# called.
fleetlock_enable_unlock_service || return 1
local fleetlock_group="" local fleetlock_group=""
# shellcheck source=/dev/null # shellcheck source=/dev/null
. $FLEETLOCK_CONFIG || return 1 . $FLEETLOCK_CONFIG || return 1
@ -66,8 +97,16 @@ fleetlock_unlock() {
machine_is_healthy() { machine_is_healthy() {
if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then
local fleetlock_healthcheck_timeout=""
local optional_args=()
# shellcheck source=/dev/null
. $FLEETLOCK_CONFIG || return 1
if [ -n "$fleetlock_healthcheck_timeout" ]; then
optional_args+=("--timeout")
optional_args+=("$fleetlock_healthcheck_timeout")
fi
echo "Running any health checks" echo "Running any health checks"
$HEALTHCHECK_TOOL || return 1 $HEALTHCHECK_TOOL "${optional_args[@]}" || return 1
fi fi
return 0 return 0
} }
@ -82,11 +121,22 @@ main () {
touch /var/run/last-cosmos-ok.stamp touch /var/run/last-cosmos-ok.stamp
find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f
}
main "$@"
if [ -f /cosmos-reboot ]; then if [ -f /cosmos-reboot ]; then
rm -f /cosmos-reboot rm -f /cosmos-reboot
reboot reboot
fi fi
}
# Most of the time we just pass on any arguments to the underlying cosmos
# tools, if adding special cases here make sure to not shadow any arguments
# (like "-v") which users expect to be passed on to cosmos.
case "$1" in
"fleetlock-unlock")
lock "$PROGNAME" || oexit "$PROGNAME appears locked by a running run-cosmos, let it handle unlocking instead."
fleetlock_unlock || eexit "Unable to release fleetlock lock."
;;
*)
main "$@"
;;
esac