From 01768129f0ce4a7551cb39e46e64c034fcd5606d Mon Sep 17 00:00:00 2001 From: Patrik Lundin Date: Wed, 3 Jul 2024 13:27:52 +0200 Subject: [PATCH 1/2] fleetlock: configurable lock/unlock timeout While we already support setting a healthcheck timeout it probably makes sense to be able to control how long we wait for a fleetlock_lock() or fleetlock_unlock() call. This becomes important if only running cosmos once a night or something like that. In that case we you probably want to give a physical machine more than than 1 minute to complete a reboot etc. This can now be controlled by setting fleetlock_lock_timeout and fleetlock_unlock_timeout in /etc/run-cosmos-fleetlock-conf. Keep in mind that while it can make sense to increase the time for taking a lock, releasing a lock should always be fast (either you have it and release it, or you dont have it and it is a no-op) so setting a long unlock timeout should probably never be done. Since we also potentially wait the unlock timeout at boot (if the fleetlock server is broken etc) that is another reason to keep it short. The default 1m is probably OK for most uses. --- global/overlay/usr/local/bin/run-cosmos | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/global/overlay/usr/local/bin/run-cosmos b/global/overlay/usr/local/bin/run-cosmos index 7274b62..268fd6a 100755 --- a/global/overlay/usr/local/bin/run-cosmos +++ b/global/overlay/usr/local/bin/run-cosmos @@ -67,14 +67,19 @@ fleetlock_lock() { # called. fleetlock_enable_unlock_service || return 1 local fleetlock_group="" + local optional_args=() # shellcheck source=/dev/null . $FLEETLOCK_CONFIG || return 1 if [ -z "$fleetlock_group" ]; then echo "Unable to set fleetlock_group" return 1 fi + if [ -n "$fleetlock_lock_timeout" ]; then + optional_args+=("--timeout") + optional_args+=("$fleetlock_lock_timeout") + fi echo "Getting fleetlock lock" - $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock || return 1 + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --lock "${optional_args[@]}" || return 1 fi return 0 } @@ -82,15 +87,20 @@ fleetlock_lock() { fleetlock_unlock() { if [ ! -f $FLEETLOCK_DISABLE_FILE ] && [ -f $FLEETLOCK_CONFIG ] && [ -x $FLEETLOCK_TOOL ]; then local fleetlock_group="" + local optional_args=() # shellcheck source=/dev/null . $FLEETLOCK_CONFIG || return 1 if [ -z "$fleetlock_group" ]; then echo "Unable to set fleetlock_group" return 1 fi + if [ -n "$fleetlock_unlock_timeout" ]; then + optional_args+=("--timeout") + optional_args+=("$fleetlock_unlock_timeout") + fi machine_is_healthy || return 1 echo "Releasing fleetlock lock" - $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock || return 1 + $FLEETLOCK_TOOL --lock-group "$fleetlock_group" --unlock "${optional_args[@]}" || return 1 fi return 0 } From aa88795ee09e1fb0ffd1b0ffee948e1ad4f33dd9 Mon Sep 17 00:00:00 2001 From: Patrik Lundin Date: Wed, 3 Jul 2024 14:13:22 +0200 Subject: [PATCH 2/2] sunet-fleetlock: also handle ReadTimeout Turns out this was not caught by ConnectionError. --- global/overlay/usr/local/bin/sunet-fleetlock | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/global/overlay/usr/local/bin/sunet-fleetlock b/global/overlay/usr/local/bin/sunet-fleetlock index 6f0b557..64effec 100755 --- a/global/overlay/usr/local/bin/sunet-fleetlock +++ b/global/overlay/usr/local/bin/sunet-fleetlock @@ -97,7 +97,10 @@ def do_fleetlock_request( timeout=args.request_timeout, auth=("", config[args.lock_group]["password"]), ) - except requests.exceptions.ConnectionError as e: + except ( + requests.exceptions.ConnectionError, + requests.exceptions.ReadTimeout, + ) as e: print(f"POST request failed: {e}") time.sleep(retry_sleep_delay) continue