sunetdrive/templates/script/restart-nextcloud-farm.erb

151 lines
5.3 KiB
Plaintext
Raw Permalink Normal View History

2023-11-01 14:20:23 +00:00
#!/usr/bin/env python3
2023-02-13 09:44:56 +00:00
# vim: set filetype=python:
import subprocess
import sys
import time
import urllib.parse
import requests
from drive_utils import (build_fqdn, run_remote_command,
smoketest_nextcloud_node)
def add_downtime(fqdn: str,
apikey: str,
monitor_host: str = 'monitor.drive.test.sunet.se') -> None:
if not apikey:
return
print("Adding downtime for: {}".format(fqdn))
action = 'schedule_host_svc_downtime'
start_time = int(time.time())
end_time = start_time + (10 * 60) # 10 minutes
data = {
'comment_data': 'Reboot from script',
'start_time': start_time,
'end_time': end_time
}
post_url = 'https://{}/thruk/r/hosts/{}/cmd/{}'.format(
monitor_host, fqdn, action)
headers = {'X-Thruk-Auth-Key': apikey}
2024-05-07 08:50:55 +00:00
try:
requests.post(post_url, data=data, headers=headers)
except Exception:
print("Failed to add downtime for: {}".format(fqdn))
2023-02-13 09:44:56 +00:00
def remove_downtime(fqdn: str,
apikey: str,
monitor_host: str = 'monitor.drive.test.sunet.se') -> None:
if not apikey:
return
print("Removing downtime for: {}".format(fqdn))
get_url = 'https://{}/thruk/r/hosts?name={}&columns=services'.format(
monitor_host, fqdn)
headers = {'X-Thruk-Auth-Key': apikey}
2024-05-07 08:50:55 +00:00
try:
req = requests.get(get_url, headers=headers)
action = 'del_active_service_downtimes'
for service in req.json()[0]['services']:
post_url = 'https://{}/thruk/r/services/{}/{}/cmd/{}'.format(
monitor_host, fqdn, urllib.parse.quote(service), action)
requests.post(post_url, headers=headers)
except Exception:
print("Failed to remove downtime for: {}".format(fqdn))
2023-02-13 09:44:56 +00:00
2023-11-07 13:01:12 +00:00
def run_command(command: list) -> tuple:
2023-02-13 09:44:56 +00:00
with subprocess.Popen(command,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE) as proc:
outs, errs = proc.communicate()
try:
reply = outs.decode().strip('\n')
except AttributeError:
reply = str()
return (reply, errs)
def main() -> int:
customer = "<%= @customer %>"
environment = "<%= @environment %>"
apikey_test = "<%= @apikey_test %>"
apikey_prod = "<%= @apikey_prod %>"
2023-09-02 10:30:16 +00:00
backup_command = ['sudo /home/script/bin/backup_db.sh']
2023-02-13 09:44:56 +00:00
cosmos_command = ['sudo run-cosmos']
2023-04-26 09:22:25 +00:00
nc_upgrade_command = 'sudo /usr/local/bin/occ config:editable --on '
2023-04-25 15:48:13 +00:00
nc_upgrade_command += '&& sudo /usr/local/bin/occ upgrade '
nc_upgrade_command += '&& sudo /usr/local/bin/occ config:editable --off'
2023-04-26 09:22:25 +00:00
repair_command = 'sudo /usr/local/bin/occ config:editable --on '
repair_command += '&& sudo /usr/local/bin/occ maintenance:repair '
repair_command += '&& sudo /usr/local/bin/occ config:editable --off '
repair_command += '&& sudo /usr/local/bin/occ db:add-missing-indices '
repair_command += '&& sudo /usr/local/bin/occ db:add-missing-columns '
repair_command += '&& sudo /usr/local/bin/occ db:add-missing-primary-keys'
2023-02-13 09:44:56 +00:00
reboot_command = ['sudo /usr/local/bin/safer_reboot']
server_type = "node"
2023-09-02 10:30:16 +00:00
backup_type = "backup"
2023-02-13 09:44:56 +00:00
if customer == "common":
customer = "gss"
server_type = "gss"
2023-09-02 10:30:16 +00:00
backup_type = "gssbackup"
backup = build_fqdn(customer, environment, 1, backup_type)
print("\tRunning backup command at {}".format(backup))
run_remote_command(backup,
backup_command,
user="script",
output=subprocess.DEVNULL)
2023-02-13 09:44:56 +00:00
for number in reversed(range(1, 4)):
fqdn = build_fqdn(customer, environment, number, server_type)
add_downtime(fqdn, apikey_test)
add_downtime(fqdn, apikey_prod, monitor_host="monitor.drive.sunet.se")
print("Upgrading: {}".format(fqdn))
print("\tRunning cosmos command at {}".format(fqdn))
run_remote_command(fqdn,
cosmos_command,
user="script",
output=subprocess.DEVNULL)
print("\tRunning reboot command at {}".format(fqdn))
run_remote_command(fqdn, reboot_command, user="script")
success = False
for testnumber in reversed(range(1, 32, 2)):
print("\tSleeping for {} seconds before smoketest on {}".format(
testnumber, fqdn))
time.sleep(testnumber)
if smoketest_nextcloud_node(fqdn):
success = True
break
remove_downtime(fqdn, apikey_test)
remove_downtime(fqdn,
apikey_prod,
monitor_host="monitor.drive.sunet.se")
if success:
print("Upgrade cycle succeeded on {} ".format(fqdn))
else:
print("Smoketest failed on {} after server reboot command".format(
fqdn))
return 6
fqdn = build_fqdn(customer, environment, 3, server_type)
print("Running nextcloud upgrade command at {}".format(fqdn))
run_remote_command(fqdn, [nc_upgrade_command], user="script", tty=True)
2023-02-13 09:44:56 +00:00
print("Running repair command on {}".format(fqdn))
run_remote_command(fqdn, [repair_command], user="script", tty=True)
2023-02-13 09:44:56 +00:00
print("All {}-servers successfully upgraded for {}".format(
environment, customer))
return 0
if __name__ == "__main__":
sys.exit(main())