Patrik Lundin
7baf9affb1
Makes run-cosmos request a fleetlock lock before running cosmos "update" and "apply" steps. This is helpful for making sure only one (or several) machine out of some set of machines runs cosmos changes at a time. This way if cosmos (or puppet) decides that a service needs to be restarted this will only happen on a subset of machines at a time. When the cosmos "apply" is done a fleetlock unlock request will be performed so the other machines can progress. The unlock code in run-cosmos will also run the new tool sunet-machine-healthy to decide things are good before unlocking. This way if a restarted service breaks this will stop the unlock attempt and in turn make it so the others should not break their service as well, giving an operator time to figure out what is wrong.
93 lines
2.4 KiB
Bash
Executable file
93 lines
2.4 KiB
Bash
Executable file
#!/bin/bash
|
|
#
|
|
# Simplify running cosmos, with serialization if flock is available.
|
|
#
|
|
|
|
readonly PROGNAME=$(basename "$0")
|
|
readonly LOCKFILE_DIR=/tmp
|
|
readonly LOCK_FD=200
|
|
readonly FLEETLOCK_CONFIG=/etc/run-cosmos-fleetlock-conf
|
|
readonly FLEETLOCK_DISABLE_FILE=/etc/run-cosmos-fleetlock-disable
|
|
readonly FLEETLOCK_TOOL=/usr/local/bin/sunet-fleetlock
|
|
readonly HEALTHCHECK_TOOL=/usr/local/bin/sunet-machine-healthy
|
|
readonly HEALTHCHECK_DISABLE_FILE=/etc/run-cosmos-healthcheck-disable
|
|
|
|
lock() {
|
|
local prefix=$1
|
|
local fd=${2:-$LOCK_FD}
|
|
local lock_file=$LOCKFILE_DIR/$prefix.lock
|
|
|
|
# create lock file
|
|
eval "exec $fd>$lock_file"
|
|
|
|
# acquier the lock
|
|
flock -n "$fd" \
|
|
&& return 0 \
|
|
|| return 1
|
|
}
|
|
|
|
eexit() {
|
|
local error_str="$*"
|
|
|
|
echo "$error_str"
|
|
exit 1
|
|
}
|
|
|
|
fleetlock_lock() {
|
|
if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then
|
|
local fleetlock_group=""
|
|
# shellcheck source=/dev/null
|
|
. $FLEETLOCK_CONFIG || return 1
|
|
if [ -z "$fleetlock_group" ]; then
|
|
echo "Unable to set fleetlock_group"
|
|
return 1
|
|
fi
|
|
echo "Getting fleetlock lock"
|
|
$FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
fleetlock_unlock() {
|
|
if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then
|
|
local fleetlock_group=""
|
|
# shellcheck source=/dev/null
|
|
. $FLEETLOCK_CONFIG || return 1
|
|
if [ -z "$fleetlock_group" ]; then
|
|
echo "Unable to set fleetlock_group"
|
|
return 1
|
|
fi
|
|
machine_is_healthy || return 1
|
|
echo "Releasing fleetlock lock"
|
|
$FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
machine_is_healthy() {
|
|
if [ ! -f $HEALTHCHECK_DISABLE_FILE ] && [ -x $HEALTHCHECK_TOOL ]; then
|
|
echo "Running any health checks"
|
|
$HEALTHCHECK_TOOL || return 1
|
|
fi
|
|
return 0
|
|
}
|
|
|
|
main () {
|
|
lock "$PROGNAME" || eexit "Only one instance of $PROGNAME can run at one time."
|
|
fleetlock_lock || eexit "Unable to acquire fleetlock lock."
|
|
cosmos "$@" update
|
|
cosmos "$@" apply
|
|
fleetlock_unlock || eexit "Unable to release fleetlock lock."
|
|
|
|
touch /var/run/last-cosmos-ok.stamp
|
|
|
|
find /var/lib/puppet/reports/ -type f -mtime +10 -print0 | xargs -0 rm -f
|
|
}
|
|
|
|
main "$@"
|
|
|
|
if [ -f /cosmos-reboot ]; then
|
|
rm -f /cosmos-reboot
|
|
reboot
|
|
fi
|