#!/bin/bash # # Simplify running cosmos, with serialization if flock is available. # readonly PROGNAME=$(basename "$0") readonly LOCKFILE_DIR=/tmp readonly LOCK_FD=200 readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock readonly FLEETLOCK_UNLOCK_SERVICE=run-cosmos-fleetlock-unlocker.service readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable lock() { local prefix=$1 local fd=${2:-$LOCK_FD} local lock_file=$LOCKFILE_DIR/$prefix.lock # create lock file eval "exec $fd>$lock_file" # acquier the lock flock -n "$fd" \ && return 0 \ || return 1 } eexit() { local error_str="$*" echo "$error_str" exit 1 } oexit() { local info_str="$*" echo "$info_str" exit 0 } fleetlock_enable_unlock_service() { # In case e.g. the unit file has been removed "FragmentPath" will still # return the old filename until daemon-reload is called, so do that here # before we try checking for the FragmentPath. need_reload=$(systemctl show --property NeedDaemonReload $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}') if [ "$need_reload" = "yes" ]; then systemctl daemon-reload fi unit_file=$(systemctl show --property FragmentPath $FLEETLOCK_UNLOCK_SERVICE | awk -F= '{print $2}') if [ -z "$unit_file" ]; then # No unit file matching the service name, do nothing return 0 fi # Enable the service if needed systemctl is-enabled --quiet $FLEETLOCK_UNLOCK_SERVICE || systemctl enable --quiet $FLEETLOCK_UNLOCK_SERVICE } fleetlock_lock() { if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then # Make sure the unlock service is enabled before we take a lock if # cosmos ends up rebooting the machine before fleetlock_unlock() is # called. fleetlock_enable_unlock_service || return 1 local fleetlock_group="" # shellcheck source=/dev/null . $FLEETLOCK_CONFIG || return 1 if [ -z "$fleetlock_group" ]; then echo "Unable to set fleetlock_group" return 1 fi echo "Getting fleetlock lock" $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1 fi return 0 } fleetlock_unlock() { if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then local fleetlock_group="" # shellcheck source=/dev/null . $FLEETLOCK_CONFIG || return 1 if [ -z "$fleetlock_group" ]; then echo "Unable to set fleetlock_group" return 1 fi machine_is_healthy || return 1 echo "Releasing fleetlock lock" $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1 fi return 0 } machine_is_healthy() { if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then local fleetlock_healthcheck_timeout="" local optional_args=() # shellcheck source=/dev/null . $FLEETLOCK_CONFIG || return 1 if [ -n "$fleetlock_healthcheck_timeout" ]; then optional_args+=("--timeout") optional_args+=("$fleetlock_healthcheck_timeout") fi echo "Running any health checks" $HEALTHCHECK_TOOL "${optional_args[@]}" || return 1 fi return 0 } main () { lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time." fleetlock_lock || eexit "Unable to acquire fleetlock lock." cosmos "$@" update cosmos "$@" apply fleetlock_unlock || eexit "Unable to release fleetlock lock." touch /var/run/last-cosmos-ok.stamp find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f if [ -f /cosmos-reboot ]; then rm -f /cosmos-reboot reboot fi } # Most of the time we just pass on any arguments to the underlying cosmos # tools, if adding special cases here make sure to not shadow any arguments # (like "-v") which users expect to be passed on to cosmos. case "$1" in "fleetlock-unlock") lock "$PROGNAME" || oexit "$PROGNAME appears locked by a running run-cosmos, let it handle unlocking instead." fleetlock_unlock || eexit "Unable to release fleetlock lock." ;; *) main "$@" ;; esac