#!/usr/bin/env python3.9 # vim: set filetype=python: import subprocess import sys import time import urllib.parse import requests from drive_utils import (build_fqdn, run_remote_command, smoketest_nextcloud_node) def add_downtime(fqdn: str, apikey: str, monitor_host: str = 'monitor.drive.test.sunet.se') -> None: if not apikey: return print("Adding downtime for: {}".format(fqdn)) action = 'schedule_host_svc_downtime' start_time = int(time.time()) end_time = start_time + (10 * 60) # 10 minutes data = { 'comment_data': 'Reboot from script', 'start_time': start_time, 'end_time': end_time } post_url = 'https://{}/thruk/r/hosts/{}/cmd/{}'.format( monitor_host, fqdn, action) headers = {'X-Thruk-Auth-Key': apikey} requests.post(post_url, data=data, headers=headers) def remove_downtime(fqdn: str, apikey: str, monitor_host: str = 'monitor.drive.test.sunet.se') -> None: if not apikey: return print("Removing downtime for: {}".format(fqdn)) get_url = 'https://{}/thruk/r/hosts?name={}&columns=services'.format( monitor_host, fqdn) headers = {'X-Thruk-Auth-Key': apikey} req = requests.get(get_url, headers=headers) action = 'del_active_service_downtimes' for service in req.json()[0]['services']: post_url = 'https://{}/thruk/r/services/{}/{}/cmd/{}'.format( monitor_host, fqdn, urllib.parse.quote(service), action) requests.post(post_url, headers=headers) def run_command(command: list[str]) -> tuple: with subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as proc: outs, errs = proc.communicate() try: reply = outs.decode().strip('\n') except AttributeError: reply = str() return (reply, errs) def main() -> int: customer = "<%= @customer %>" environment = "<%= @environment %>" apikey_test = "<%= @apikey_test %>" apikey_prod = "<%= @apikey_prod %>" cosmos_command = ['sudo run-cosmos'] nc_upgrade_command = 'sudo /usr/local/bin/occ config:editable --on ' nc_upgrade_command += '&& sudo /usr/local/bin/occ upgrade ' nc_upgrade_command += '&& sudo /usr/local/bin/occ config:editable --off' repair_command = 'sudo /usr/local/bin/occ config:editable --on ' repair_command += '&& sudo /usr/local/bin/occ maintenance:repair ' repair_command += '&& sudo /usr/local/bin/occ config:editable --off ' repair_command += '&& sudo /usr/local/bin/occ db:add-missing-indices ' repair_command += '&& sudo /usr/local/bin/occ db:add-missing-columns ' repair_command += '&& sudo /usr/local/bin/occ db:add-missing-primary-keys' reboot_command = ['sudo /usr/local/bin/safer_reboot'] server_type = "node" if customer == "common": customer = "gss" server_type = "gss" for number in reversed(range(1, 4)): fqdn = build_fqdn(customer, environment, number, server_type) add_downtime(fqdn, apikey_test) add_downtime(fqdn, apikey_prod, monitor_host="monitor.drive.sunet.se") print("Upgrading: {}".format(fqdn)) print("\tRunning cosmos command at {}".format(fqdn)) run_remote_command(fqdn, cosmos_command, user="script", output=subprocess.DEVNULL) print("\tRunning reboot command at {}".format(fqdn)) run_remote_command(fqdn, reboot_command, user="script") success = False for testnumber in reversed(range(1, 32, 2)): print("\tSleeping for {} seconds before smoketest on {}".format( testnumber, fqdn)) time.sleep(testnumber) if smoketest_nextcloud_node(fqdn): success = True break remove_downtime(fqdn, apikey_test) remove_downtime(fqdn, apikey_prod, monitor_host="monitor.drive.sunet.se") if success: print("Upgrade cycle succeeded on {} ".format(fqdn)) else: print("Smoketest failed on {} after server reboot command".format( fqdn)) return 6 fqdn = build_fqdn(customer, environment, 3, server_type) print("Running nextcloud upgrade command at {}".format(fqdn)) run_remote_command(fqdn, [nc_upgrade_command], user="script", tty=True) print("Running repair command on {}".format(fqdn)) run_remote_command(fqdn, [repair_command], user="script", tty=True) print("All {}-servers successfully upgraded for {}".format( environment, customer)) return 0 if __name__ == "__main__": sys.exit(main())