From 4b8b8887f62761759486940b81ea1142af6ae8bb Mon Sep 17 00:00:00 2001 From: Patrik Lundin Date: Mon, 17 Jun 2024 11:54:28 +0200 Subject: [PATCH 1/2] sunet-fleetlock: handle connection errors In order to handle upgrades of the fleetlock server when running only one server we need to handle connection errors like connection refused or timed out errors gracefully. Because there are several different ways the connection can fail and it is hard to keep track of them all, just catch everything. We then also need special handling of our own timeout execption so we are not accidentally stuck retrying forever. Also fix so we actually use the request_timeout arg for individual HTTP requests instead of the global timeout. While here run isort to keep imports tidy. --- global/overlay/usr/local/bin/sunet-fleetlock | 37 +++++++++++++------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/global/overlay/usr/local/bin/sunet-fleetlock b/global/overlay/usr/local/bin/sunet-fleetlock index e2ee6d9..5c4e887 100755 --- a/global/overlay/usr/local/bin/sunet-fleetlock +++ b/global/overlay/usr/local/bin/sunet-fleetlock @@ -24,20 +24,21 @@ # When modifying this code please make sure it is passed through the following # tools: # === +# isort # black # pylint # mypy --strict # === -import platform -import sys -import signal -import time import argparse import configparser import os.path -from typing import Optional, Union +import platform +import signal +import sys +import time from types import FrameType +from typing import Optional, Union import requests @@ -80,19 +81,29 @@ def do_fleetlock_request( request_id_key = "request-id" request_id = None + retry_sleep_delay = 1 + # Loop forever: we depend on the SIGALRM timout to raise an error if it # takes too long while True: if args.verbose: print(f"{operation} POST at url {url}") - resp = requests.post( - url, - headers=fleetlock_headers, - json=fleetlock_data, - timeout=args.timeout, - auth=("", config[args.lock_group]["password"]), - ) + try: + resp = requests.post( + url, + headers=fleetlock_headers, + json=fleetlock_data, + timeout=args.request_timeout, + auth=("", config[args.lock_group]["password"]), + ) + except Exception as e: # pylint: disable=broad-exception-caught + if isinstance(e, TimeoutException): + # This means our global timer is up, no more time to retry + raise e + print(f"POST request failed: {e}") + time.sleep(retry_sleep_delay) + continue if request_id_key in resp.headers: request_id = resp.headers[request_id_key] @@ -126,7 +137,7 @@ def do_fleetlock_request( + f"({request_id_key}: {request_id})" ) - time.sleep(1) + time.sleep(retry_sleep_delay) def read_config(args: argparse.Namespace) -> Union[configparser.ConfigParser, None]: From e315282bc55025c199483fbb5c94d7a053d047f0 Mon Sep 17 00:00:00 2001 From: Patrik Lundin Date: Mon, 17 Jun 2024 12:40:12 +0200 Subject: [PATCH 2/2] Use more strict exception checking This is probably wide enough and we do not need weird extra handling of our own execption etc. Thanks to @mickenordin for keeping me honest :). --- global/overlay/usr/local/bin/sunet-fleetlock | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/global/overlay/usr/local/bin/sunet-fleetlock b/global/overlay/usr/local/bin/sunet-fleetlock index 5c4e887..6f0b557 100755 --- a/global/overlay/usr/local/bin/sunet-fleetlock +++ b/global/overlay/usr/local/bin/sunet-fleetlock @@ -97,10 +97,7 @@ def do_fleetlock_request( timeout=args.request_timeout, auth=("", config[args.lock_group]["password"]), ) - except Exception as e: # pylint: disable=broad-exception-caught - if isinstance(e, TimeoutException): - # This means our global timer is up, no more time to retry - raise e + except requests.exceptions.ConnectionError as e: print(f"POST request failed: {e}") time.sleep(retry_sleep_delay) continue