From 39d6f5dc21576112c94a170d1d564cd8aebd99ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Erik=20Bergstr=C3=B6m?= Date: Wed, 5 Jul 2023 11:43:59 +0200 Subject: [PATCH] updated scriherder and python3 fixes --- global/overlay/usr/local/bin/scriptherder | 1614 +++++++++++++-------- global/post-tasks.d/018packages | 2 +- 2 files changed, 1030 insertions(+), 586 deletions(-) diff --git a/global/overlay/usr/local/bin/scriptherder b/global/overlay/usr/local/bin/scriptherder index 1bada496..d8e1f7b5 100755 --- a/global/overlay/usr/local/bin/scriptherder +++ b/global/overlay/usr/local/bin/scriptherder @@ -1,6 +1,6 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -# Copyright 2014 SUNET. All rights reserved. +# Copyright 2014, 2015, 2017, 2018 SUNET. All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, are # permitted provided that the following conditions are met: @@ -30,43 +30,80 @@ # """ -Scriptherder can be run in one othe following modes: +The basic idea with Scriptherder is to run e.g. cronjobs and save metadata about the +execution better than sending e-mails to root that never gets read. - wrap -- Stores output, exit status etc. about a script invocation - ls -- Lists the logged script invocations - check -- Check if script execution results match given criterias, - output Nagios compatible result +While we're at it, we save more than just the output (time, exit status, ...) which +it is then possible to use to monitor that jobs are working. +Scriptherder can be run in one of the following modes: + + wrap -- Stores output, exit status etc. about a script invocation + ls -- Lists the logged script invocations + check -- Check if script execution results match given criteria, + output Nagios compatible result + lastlog -- Show last execution output of a job (or all jobs) + lastfaillog -- Show last failed execution output of a job (or all jobs) + +The 'check' mode compares job status against criteria in INI-files (in checkdir, default +/etc/scriptherder/check) and produces Nagios compatible output. + + +Example check file contents for job that is OK if it exited 0 and was last run less +than eight hours ago, WARNING if less than 24 and after that CRITICAL: + + [check] + ok = exit_status=0, max_age=8h + warning = exit_status=0, max_age=24h + + All criteria: + + exit_status=0 Must exit(0) + max_age=8h Must have executed less than 8h ago + not_running Job is not running + output_contains=OK Output contains the text OK + output_matches=.*OK.* Output matches the regexp + OR_file_exists=FILE Check if a file exists, such as a disable-file for a job + OR_running True if a job is running - useful for jobs that run @reboot etc. """ -import os -import re -import sys -import shutil -import time +import argparse import json import logging import logging.handlers -import argparse +import os +import re +import shutil import subprocess -import ConfigParser +import sys +import time +from datetime import datetime +from typing import Any, AnyStr, Dict, List, Mapping, NewType, Optional, Tuple, Union, cast +from configparser import ConfigParser -_defaults = {'debug': False, - 'syslog': False, - 'mode': 'ls', - 'datadir': '/var/cache/scriptherder', - 'checkdir': '/etc/scriptherder/check', - } +Arguments = NewType("Arguments", argparse.Namespace) -_check_defaults = {'ok': 'exit_status=0,max_age=8h', - 'warning': 'exit_status=0,max_age=24h', - } +# Default arguments +_defaults = { + "debug": False, + "syslog": False, + "mode": "ls", + "datadir": "/var/cache/scriptherder", + "checkdir": "/etc/scriptherder/check", + "umask": "033", +} -exit_status = {'OK': 0, - 'WARNING': 1, - 'CRITICAL': 2, - 'UNKNOWN': 3, - } +_check_defaults = { + "ok": "exit_status=0,max_age=8h", + "warning": "exit_status=0,max_age=24h", +} + +exit_status = { + "OK": 0, + "WARNING": 1, + "CRITICAL": 2, + "UNKNOWN": 3, +} class ScriptHerderError(Exception): @@ -74,7 +111,7 @@ class ScriptHerderError(Exception): Base exception class for scriptherder. """ - def __init__(self, reason, filename): + def __init__(self, reason: str, filename: str): self.reason = reason self.filename = filename @@ -91,386 +128,671 @@ class CheckLoadError(ScriptHerderError): """ -class Job(object): +class Job: """ Representation of an execution of a job. """ - def __init__(self, name, cmd=None): + def __init__(self, name: str, cmd: Optional[List[str]] = None, data: Optional[Dict[str, Any]] = None): if cmd is None: cmd = [] for x in cmd: - assert(isinstance(x, basestring)) - self._name = name - self._cmd = cmd - self._start_time = None - self._end_time = None - self._exit_status = None - self._pid = None - self._output = None - self._filename = None - self._output_filename = None - if self._name is None: - self._name = os.path.basename(self.cmd) + assert isinstance(x, str) + if data is None: + data = { + "version": 2, + "name": name, + "cmd": cmd, + } + if data.get("name") is None: + if cmd: + data["name"] = os.path.basename(cmd[0]) - def __repr__(self): - start = time.strftime('%Y-%m-%d %X', time.localtime(self.start_time)) - return '<{} instance at {:#x}: \'{name}\' start={start}, exit={exit}>'.format( + if data.get("version") not in [1, 2]: + raise JobLoadError("Unknown version: {!r}".format(data.get("version")), filename=data["filename"]) + + # Output of command is saved outside self._data between execution and save + self._output: Optional[bytes] = None + + self._data = data + + def __repr__(self) -> str: + return "<{} instance at {:#x}: {}>".format( self.__class__.__name__, id(self), + str(self), + ) + + def __str__(self) -> str: + if not self.is_running: + return "{!r} not_running".format(self.name) + start = time.strftime("%Y-%m-%d %X", time.localtime(self.start_time)) + status = "" + if self.check_status: + status = ", status={}".format(self.check_status) + return "{name} start={start} ({age} ago), duration={duration}, exit={exit}{status}".format( name=self.name, - start = start, - exit = self.exit_status, + start=start, + age=self.age, + duration=self.duration_str, + exit=self.exit_status, + status=status, ) - def __str__(self): - start = time.strftime('%Y-%m-%d %X', time.localtime(self.start_time)) - return '\'{name}\' start={start}, duration={duration:>6}, exit={exit}'.format( - name = self.name, - start = start, - duration = self.duration_str, - exit = self.exit_status, - ) + @property + def age(self) -> str: + """Return how long ago this job executed.""" + if self.start_time is None: + return "N/A" + return _time_to_str(time.time() - self.start_time) - def status_summary(self): + def status_summary(self) -> str: """ Return short string with status of job. E.g. 'name[exit=0,age=19h]' """ - if self._end_time is None or self._start_time is None: - return '{name}[not_running]'.format(name = self.name) - age = _time_to_str(time.time() - self._start_time) - return '{name}[exit={exit_status},age={age}]'.format( - name = self.name, - exit_status = self._exit_status, - age = age, - ) + if not self.is_running: + return "{name}[not_running]".format(name=self.name) + assert self.start_time is not None + age = _time_to_str(time.time() - self.start_time) + return "{name}[exit={exit_status},age={age}]".format( + name=self.name, + exit_status=self.exit_status, + age=age, + ) @property - def name(self): + def name(self) -> str: """ The name of the job. - - @rtype: string """ - if self._name is None: + if self._data.get("name") is None: return self.cmd - return self._name + assert isinstance(self._data["name"], str) + return self._data["name"] @property - def cmd(self): + def cmd(self) -> str: """ The wrapped scripts name. - - @rtype: string """ - return self._cmd[0] + assert isinstance(self._data["cmd"], list) + assert isinstance(self._data["cmd"][0], str) + return self._data["cmd"][0] @property - def args(self): + def args(self) -> List[str]: """ The wrapped scripts arguments. - - @rtype: [string] """ - return self._cmd[1:] + cmd: List[str] = self._data.get("cmd", []) + assert len(cmd) + for x in cmd: + assert isinstance(x, str) + return cmd[1:] @property - def start_time(self): + def start_time(self) -> Optional[float]: """ The start time of the script invocation. - - @rtype: int() or None """ - if self._start_time is None: + if "start_time" not in self._data: return None - return int(self._start_time) + return float(self._data["start_time"]) @property - def end_time(self): + def end_time(self) -> Optional[float]: """ The end time of the script invocation. - - @rtype: int() or None """ - if self._end_time is None: + if "end_time" not in self._data: return None - return int(self._end_time) + return float(self._data["end_time"]) @property - def duration_str(self): + def duration_str(self) -> str: """ Time spent executing job, as a human readable string. - - @rtype: string """ - if self._end_time is None or self._start_time is None: - return 'NaN' - duration = self._end_time - self._start_time + if self.end_time is None or self.start_time is None: + return "NaN" + duration = self.end_time - self.start_time return _time_to_str(duration) @property - def exit_status(self): + def exit_status(self) -> Optional[int]: """ The exit status of the script invocation. - - @rtype: int() or None """ - return self._exit_status + return self._data.get("exit_status") @property - def pid(self): + def pid(self) -> Optional[int]: """ The process ID of the script invocation. - - @rtype: int() or None """ - return self._pid + pid = self._data.get("pid") + assert isinstance(pid, int) or pid is None + return pid @property - def filename(self): + def filename(self) -> Optional[str]: """ The filename this job is stored in. - - @rtype: string or None """ - return self._filename + return self._data.get("filename") @property - def output(self): + def output(self) -> Optional[bytes]: """ The output (STDOUT and STDERR) of the script invocation. - - @rtype: [string] """ - if not self._output and self.output_filename: - f = open(self.output_filename, 'r') - self._output = f.read() + if self._output is not None: + return self._output + if not self._data.get("output") and self.output_filename: + f = open(self.output_filename, "r") + self._data["output"] = f.read() f.close() - return self._output + return self._data.get("output") @property - def output_filename(self): + def output_filename(self) -> Optional[str]: """ The name of the file holding the output (STDOUT and STDERR) of the script invocation. - - @rtype: [string] """ - return self._output_filename + return self._data.get("output_filename") - def run(self): + @property + def check_status(self) -> Optional[str]: + """ + The check verdict for this job, if checked ('OK', 'WARNING', ...) + """ + return self._data.get("check_status", None) + + @check_status.setter + def check_status(self, value: str) -> None: + if value not in exit_status: + raise ValueError("Unknown check_status {!r}".format(value)) + self._data["check_status"] = value + + @property + def check_reason(self) -> Optional[str]: + """ + Text reason for check verdict for this job, if checked. + """ + return self._data.get("check_reason") + + @check_reason.setter + def check_reason(self, value: str) -> None: + self._data["check_reason"] = value + + def run(self) -> None: """ Run script, storing various aspects of the results. """ - self._start_time = time.time() - proc = subprocess.Popen(self._cmd, - cwd='/', - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - close_fds=True, - ) + self._data["start_time"] = time.time() + proc = subprocess.Popen( + self._data["cmd"], + cwd="/", + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + close_fds=True, + ) (stdout, _stderr) = proc.communicate() - self._end_time = time.time() + self._data["end_time"] = time.time() + self._data["exit_status"] = proc.returncode + self._data["pid"] = proc.pid self._output = stdout - self._exit_status = proc.returncode - self._pid = proc.pid + return None - def save_to_file(self, datadir, logger, filename=None): + def save_to_file(self, datadir: str, logger: logging.Logger, filename: Optional[str] = None) -> None: """ Create a record with the details of a script invocation. @param datadir: Directory to keep records in @param logger: logging logger @param filename: Filename to use - default is reasonably constructed - - @type datadir: string - @type logger: logging.logger - @type filename: string or None """ if filename is None: - fn = '' + fn = "" for x in self.name: if x.isalnum(): fn += x else: - fn += '_' - filename = '{!s}_{!s}_{!s}'.format(fn, self.start_time, self.pid) - fn = os.path.join(datadir, filename) - logger.debug("Saving job metadata to file {!r}.tmp".format(fn)) - output_fn = fn + '_output' - f = open(fn + '.tmp', 'w') - data = {'name': self.name, - 'cmd': self._cmd, - 'start_time': self._start_time, - 'end_time': self._end_time, - 'pid': self.pid, - 'exit_status': self.exit_status, - 'version': 2, - } - if self._output: - data['output_filename'] = output_fn + '.data' - data['output_size'] = len(self._output) - f.write(json.dumps(data, indent = 4, sort_keys = True)) - f.write('\n') + fn += "_" + assert self.start_time is not None + _ts = datetime.fromtimestamp(self.start_time) + _time_str = "{!s}.{:03}".format( + datetime.fromtimestamp(self.start_time).strftime("%Y%m%dT%H%M%S"), _ts.microsecond + ) + filename = "{}__ts-{}_pid-{}".format(fn, _time_str, self.pid) + fn = str(os.path.join(datadir, filename)) + _umask = int(f"0o{args.umask}", 8) + logger.debug(f"Setting umask to 0o{_umask:03o}") + old_umask = os.umask(_umask) + logger.debug("Saving job metadata to file '{!s}.tmp'".format(fn)) + output_fn = fn + "_output" + f = open(fn + ".tmp", "w") + if self._output is not None: + self._data["output_filename"] = output_fn + ".data" + self._data["output_size"] = len(self._output) + f.write(json.dumps(self._data, indent=4, sort_keys=True)) + f.write("\n") f.close() - os.rename(fn + '.tmp', fn + '.json') - self._filename = fn + os.rename(fn + ".tmp", fn + ".json") + self._data["filename"] = fn + os.umask(old_umask) - if self._output: + if self._output is not None: + assert self.output_filename is not None + output_fn = self.output_filename logger.debug("Saving job output to file {!r}".format(output_fn)) - f = open(output_fn + '.tmp', 'w') - f.write(self._output) - f.close() - os.rename(output_fn + '.tmp', output_fn + '.data') - self._output_filename = output_fn + with open(output_fn + ".tmp", "wb") as fd: + fd.write(self._output) + os.rename(output_fn + ".tmp", output_fn) + self._output = None - def from_file(self, filename): + def check(self, check: "Check", logger: logging.Logger) -> None: + """ + Figure out status of this job, based on it's check criteria. + + :type check: Check + :type logger: logging.logger + :return: None + """ + status, msg = check.job_is_ok(self) + logger.debug("OK check result: {} {}".format(status, msg)) + if status is True: + self.check_status = "OK" + self.check_reason = ", ".join(msg) + else: + status, warn_msg = check.job_is_warning(self) + logger.debug("Warning check result: {} {}".format(status, warn_msg)) + msg += [x for x in warn_msg if x not in msg] + self.check_status = "WARNING" if status is True else "CRITICAL" + self.check_reason = ", ".join(msg) + logger.debug("Stored check status {}, {}".format(self.check_status, self.check_reason)) + + def is_ok(self) -> bool: + return self.check_status == "OK" + + def is_warning(self) -> bool: + return self.check_status == "WARNING" + + @property + def is_running(self) -> bool: + """ + Check if job has executed or not. + """ + return self.start_time is not None and self.end_time is not None + + @classmethod + def from_file(cls, filename: str) -> "Job": """ Initialize this Job instance with data loaded from a file (previously created with `save_to_file()'. @param filename: Filename to load data from - @type filename: string - - @rtype: Job """ - f = open(filename, 'r') - try: - data = json.loads(f.read(100 * 1024 * 1024)) - except ValueError: - raise JobLoadError('JSON parsing failed', filename=filename) - f.close() - if data.get('version') == 1: - self._name = data.get('name') - for x in data['cmd']: - assert(isinstance(x, basestring)) - self._cmd = data['cmd'] - self._start_time = data['start_time'] - self._end_time = data['end_time'] - self._pid = data['pid'] - self._exit_status = data['exit_status'] - self._output = data['output'] - self._output_filename = None - self._filename = filename - elif data.get('version') == 2: - self._name = data.get('name') - for x in data['cmd']: - assert(isinstance(x, basestring)) - self._cmd = data['cmd'] - self._start_time = data['start_time'] - self._end_time = data['end_time'] - self._pid = data['pid'] - self._exit_status = data['exit_status'] - self._output_filename = data.get('output_filename') - #self._output_size = data.get('output_size') # currently not used in scriptherder - self._filename = filename - else: - raise JobLoadError('Unknown version: {!r}'.format(data.get('version')), filename=filename) - return self + with open(filename, "rt") as f: + try: + data = json.loads(f.read(100 * 1024 * 1024)) + except ValueError: + raise JobLoadError("JSON parsing failed", filename=filename) + except Exception as exc: + raise JobLoadError("Error ({}) loading job output".format(repr(exc)), filename=filename) + data["filename"] = filename + return cls("", data=data) -class Check(object): +class JobsList: + """ + Load all jobs matching any specified name on the command line. + + @param args: Parsed command line arguments + @param logger: logging logger + @param jobs: List of jobs + """ + + def __init__( + self, args: Arguments, logger: logging.Logger, jobs: Optional[List[Job]] = None, load_not_running: bool = True + ): + self.jobs: List[Job] = [] + self._by_name: Dict[str, List[Job]] = {} + self._args = args + self._logger = logger + + if jobs is None: + jobs = [] + files = [f for f in os.listdir(args.datadir) if os.path.isfile(os.path.join(args.datadir, f))] + for this in files: + if not this.endswith(".json"): + continue + filename = os.path.join(args.datadir, this) + try: + job = Job.from_file(filename) + except JobLoadError as exc: + logger.warning("Failed loading job file {!r} ({!s})".format(exc.filename, exc.reason)) + continue + if args.names and args.names != ["ALL"]: + if job.name not in args.names: + logger.debug( + "Skipping {!r} not matching {!r} (file {!s})".format(job.name, args.names, filename) + ) + continue + jobs.append(job) + # Sort jobs, oldest first + self.jobs = sorted(jobs, key=lambda x: x.start_time if x.start_time is not None else 0) + + if load_not_running: + self._load_not_running() + + def _load_not_running(self) -> None: + """ + Look for jobs that have not executed at all. + + To figure out which jobs _should_ be executed, we make an inventory of all the check files in + args.checkdir. For some jobs, not_running is an OK/WARNING status, so call the check.not_running() + to figure that out. + """ + files = [f for f in os.listdir(self._args.checkdir) if os.path.isfile(os.path.join(self._args.checkdir, f))] + for this in files: + if not this.endswith(".ini"): + continue + name = this[:-4] # remove the '.ini' suffix + if self._args.names and self._args.names != ["ALL"]: + if name not in self._args.names: + self._logger.debug( + "Skipping not-running {!r} not matching {!r} (file {!s})".format(name, self._args.names, this) + ) + continue + if name not in self.by_name: + filename = os.path.join(self._args.checkdir, this) + self._logger.debug("Check {!r} (filename {!r}) not found in jobs".format(name, filename)) + job = Job(name) + self.jobs.append(job) + if job not in self.by_name.get(name, []): + assert self._by_name is not None + self._by_name[name] = [job] + + @property + def by_name(self) -> Dict[str, List[Job]]: + """ + Group jobs by name into a dict - in chronological order. + """ + if not self._by_name: + jobs_by_name: Dict[str, List[Job]] = {} + for job in self.jobs: + # Jobs in self.jobs are sorted by start_time, oldest first + if job.name not in jobs_by_name: + jobs_by_name[job.name] = [] + jobs_by_name[job.name].append(job) + self._by_name = jobs_by_name + return self._by_name + + @property + def last_of_each(self) -> List[Job]: + """ + Get a list of just the last job of each + """ + res: List[Job] = [] + for jobs in self.by_name.values(): + res.append(jobs[-1]) + self._logger.debug("Last of each: {}".format(res)) + return res + + +TCriteria = NewType("TCriteria", Tuple[str, Optional[str], bool]) + + +class Check: """ Conditions for the 'check' command. Loaded from file (one file per job name), and used to check if a Job instance is OK or WARNING or ... """ - def __init__(self, filename, logger): + def __init__(self, ok_str: str, warning_str: str, filename: str, logger: logging.Logger, runtime_mode: bool): """ - Load check criteria from a file. + Check criteria typically loaded from a file (using Check.from_file). - Example file contents: - - [check] - ok = exit_status=0, max_age=8h - warning = exit_status=0, max_age=24h - - @param filename: INI file with check criterias for a specific job - @param logger: logging logger - - @type filename: string - @type logger: logging.logger + See top-level comment in this script for syntax. """ - self.logger = logger - self.config = ConfigParser.ConfigParser(_check_defaults) - if not self.config.read([filename]): - raise ScriptHerderError('Failed loading config file', filename) - _section = 'check' - self._ok_criteria = [x.strip() for x in self.config.get(_section, 'ok').split(',')] - self._warning_criteria = [x.strip() for x in self.config.get(_section, 'warning').split(',')] + self._logger = logger + self.filename = filename + try: + self._ok_criteria = self._parse_criteria(ok_str, runtime_mode) + self._warning_criteria = self._parse_criteria(warning_str, runtime_mode) + except CheckLoadError: + raise + except Exception: + logger.exception("Failed parsing criteria") + raise CheckLoadError("Failed loading file", filename) + if not runtime_mode: + self._ok_criteria += [cast(TCriteria, ("stored_status", "OK", False))] - def job_is_ok(self, job): + def _parse_criteria(self, data_str: str, runtime_mode: bool) -> List[TCriteria]: """ - Evaluate a Job against the OK criterias for this check. + Parse a full set of criteria, such as 'exit_status=0, max_age=25h' - @type job: Job - - @rtype: bool + :param data_str: Criteria + :return: [(what, value, negate)] """ - res = True - for this in self._ok_criteria: - if not self._evaluate(this, job): - self.logger.debug("Job {!r} failed OK criteria {!r}".format(job, this)) - res = False - self.logger.debug("{!r} is OK result: {!r}".format(job, res)) + res: List[TCriteria] = [] + self._logger.debug("Parsing criteria: {!r}".format(data_str)) + for this in data_str.split(","): + this = this.strip() + if not this: + continue + # + # Backwards-compat for renamed criteria + # + replace = { + "not_running": "!OR_running", + "output_not_contains": "!output_contains", + } + for old, new in replace.items(): + if this == old or this.startswith(old + "="): + self._logger.warning( + "Criteria {!r} in file {} is obsoleted by {!r}".format(old, self.filename, new) + ) + this = new + this[len(old) :] + + negate = False + if this.startswith("!"): + negate = True + this = this[1:] + if "=" not in this: + # check for allowed single-value criteria + if this not in ["OR_running"]: + self._logger.debug("Unrecognized token: {!r}".format(this)) + raise CheckLoadError("Bad criteria: {!r}".format(this), self.filename) + res += [cast(TCriteria, (this, None, negate))] + continue + # parse regular what=value criteria + (what, value) = this.split("=") + what = what.strip() + value = value.strip() + is_runtime_check = what not in ["max_age", "OR_file_exists"] + if runtime_mode != is_runtime_check: + self._logger.debug("Skipping criteria {} for runtime_mode={}".format(this, runtime_mode)) + continue + res += [cast(TCriteria, (what, value, negate))] return res - def job_is_warning(self, job): + def job_is_ok(self, job: Job) -> Tuple[bool, List[str]]: """ - Evaluate a Job against the WARNING criterias for this check. + Evaluate a Job against the OK criteria for this check. - @type job: Job - - @rtype: bool """ - res = True - for this in self._warning_criteria: - if not self._evaluate(this, job): - self.logger.debug("Job {!r} failed WARNING criteria {!r}".format(job, this)) - res = False - self.logger.debug("{!r} is WARNING result: {!r}".format(job, res)) - return res + return self._evaluate("OK", self._ok_criteria, job) - def _evaluate(self, criteria, job): + def job_is_warning(self, job: Job) -> Tuple[bool, List[str]]: + """ + Evaluate a Job against the WARNING criteria for this check. + """ + return self._evaluate("warning", self._warning_criteria, job) + + def _evaluate(self, name: str, criteria: List[TCriteria], job: Job) -> Tuple[bool, List[str]]: """ The actual evaluation engine. - @param criteria: The criteria to test ('max_age=8h' for example) + For each criteria `foo', look for a corresponding check_foo function and call it. + + @param name: Name of criteria, used for logging only + @param criteria: List of criteria to test ([('max_age', '8h', False)] for example) @param job: The job - @type criteria: string - @type job: Job + @returns: True or False, and a list of strings describing success/failure """ - (what, value) = criteria.split('=') - what.strip() - value.strip() - if what == 'exit_status': - value = int(value) - res = (job.exit_status == value) - self.logger.debug("Evaluate criteria {!r}: ({!r} == {!r}) {!r}".format( - criteria, job.exit_status, value, res)) - return res - elif what == 'max_age': - value = _parse_time_value(value) - now = int(time.time()) - res = (job.end_time > (now - value)) - self.logger.debug("Evaluate criteria {!r}: ({!r} > ({!r} - {!r}) {!r}".format( - criteria, job.end_time, now, value, res)) - return res - elif what == 'output_contains' or what == 'output_not_contains': - value = value.strip() - res = re.match(value, job.output) is not None - if what == 'output_not_contains': - res = not res # invert result - self.logger.debug("Evaluate criteria {!r}: {!r} matching in {!s} bytes output: {!r}".format( - criteria, value, len(job.output), res)) - return res - self.logger.debug("Evaluation of unknown criteria {!r}, defaulting to False".format(criteria)) - return False + ok_msgs: List[str] = [] + fail_msgs: List[str] = [] + + def separate_or(criteria: List[TCriteria]) -> Tuple[List[TCriteria], List[TCriteria]]: + """Separate OR_ criteria from the other""" + _or: List[TCriteria] = [] + _and: List[TCriteria] = [] + for this in criteria: + what, _value, _negate = this + if what.startswith("OR_"): + _or += [this] + else: + _and += [this] + return _or, _and + + or_criteria, and_criteria = separate_or(criteria) + + # First, evaluate the OR criteria. If any of them return True, we are done with this check. + for this in or_criteria: + self._logger.debug("Evaluating {!r} condition OR {!s}".format(name, _criteria_to_str(this))) + status, msg = self._call_check(this, job) + if status: + self._logger.debug("{!r} OR criteria {} fulfilled: {}".format(name, this, msg)) + return True, [msg] + else: + fail_msgs += [msg] + if not and_criteria: + return False, fail_msgs + + res = True + for this in and_criteria: + self._logger.debug("Evaluating {!r} condition AND {!s}".format(name, _criteria_to_str(this))) + status, msg = self._call_check(this, job) + if not status: + self._logger.debug( + "Job {!r} failed {!r} AND criteria {!r} with status {!r}".format(job, name, this, status) + ) + res = False + fail_msgs += [msg] + else: + ok_msgs += [msg] + + self._logger.debug("Check {!r} result: {!r}, messages: {!r} / {!r}".format(name, res, ok_msgs, fail_msgs)) + if res: + return True, ok_msgs + return False, fail_msgs + + def _call_check(self, criteria: TCriteria, job: Job) -> Tuple[bool, str]: + what, value, negate = criteria + func = getattr(self, "check_" + what) + if not func: + return False, "{}=unknown_criteria".format(what) + status, msg = func(job, value, negate) + self._logger.debug("Function check_{}({!r}) returned: {} {}".format(what, value, status, msg)) + if msg == "": + # default message is the criteria as a string + neg_str = "!" if negate else "" + msg = "{}{}={}".format(neg_str, what, value) + return status, msg + + # Functions named check_ are the actual criteria that can be entered in the INI files. + # These functions should return True, False and a string describing why they succeeded or failed. + # + # Negating isn't done in _call_check because some checks formulate their message differently + # when they are negated. + + def check_exit_status(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + """Check if job exit status matches 'value'""" + res = job.exit_status == int(value) + if negate: + res = not res + if res: + # short message for happy-case + return True, "exit={}".format(value) + if negate: + return False, "exit={}=={}".format(job.exit_status, value) + return False, "exit={}!={}".format(job.exit_status, value) + + def check_max_age(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + _value = _parse_time_value(value) + assert _value is not None + now = int(time.time()) + if job.end_time is None: + res = False + else: + res = job.end_time > (now - _value) + if negate: + res = not res + if res: + # No message for happy-case + return True, "" + if negate: + return False, "age={}<={}".format(job.age, _time_to_str(_value)) + return False, "age={}>{}".format(job.age, _time_to_str(_value)) + + def check_output_contains(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + _output_bytes = b"" if job.output is None else _to_bytes(job.output) + res = _to_bytes(value) in _output_bytes + if negate: + res = not res # invert result + neg_str = "!" if negate else "" + return res, "{}output_contains={}=={}".format(neg_str, value, res) + + def check_output_matches(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + res = re.match(_to_bytes(value), _to_bytes(job.output)) is not None + if negate: + res = not res # invert result + neg_str = "!" if negate else "" + return res, "{}output_matches={}=={}".format(neg_str, value, res) + + def check_OR_running(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + res = job.is_running + msg = "is_running" if res else "not_running" + if negate: + res = not res + return res, msg + + def check_OR_file_exists(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + res = os.path.isfile(value) + msg = "file_exists=" if res else "file_does_not_exist=" + msg += value + if negate: + res = not res + return res, msg + + def check_stored_status(self, job: Job, value: str, negate: bool) -> Tuple[bool, str]: + res = job.check_status == value + if negate: + res = not res # invert result + neg_str = "!" if negate else "" + return res, "{}stored_status={}=={}".format(neg_str, value, res) + + @classmethod + def from_file(cls, filename: str, logger: logging.Logger, runtime_mode: bool = False) -> "Check": + config = ConfigParser(_check_defaults) + if not config.read([filename]): + raise CheckLoadError("Failed reading file", filename) + _section = "check" + try: + _ok_criteria = config.get(_section, "ok") + _warning_criteria = config.get(_section, "warning") + except Exception as exc: + logger.exception(exc) + raise CheckLoadError("Failed loading file", filename) + return cls(_ok_criteria, _warning_criteria, filename, logger, runtime_mode) -class CheckStatus(object): +class CheckStatus: """ Aggregated status of job invocations for --mode check. @@ -481,190 +803,299 @@ class CheckStatus(object): checks_critical: List of checks in CRITICAL state ([Job()]). """ - def __init__(self, args, logger): + def __init__( + self, + args: Arguments, + logger: logging.Logger, + runtime_mode: bool = False, + jobs: Optional[JobsList] = None, + checks: Optional[Dict[str, Check]] = None, + ): """ @param args: Parsed command line arguments @param logger: logging logger + @param runtime_mode: Execute runtime-checks (not age) or the other way around + """ + + self.checks_ok: List[Job] = [] + self.checks_warning: List[Job] = [] + self.checks_unknown: List[Job] = [] + self.checks_critical: List[Job] = [] + + self._checks: Dict[str, Check] = {} if checks is None else checks + self._args = args + self._logger = logger + self._runtime_mode = runtime_mode + self._last_num_checked = 0 + + if jobs is not None: + self.check_jobs(jobs) + + def check_jobs(self, jobs: JobsList) -> None: + """ + Run checks on a number of jobs. + + Look for job execution entries (parsed into Job() instances), group them + per check name and determine the status. For each group, append status + to one of the three aggregate status lists of this object (checks_ok, + checks_warning or checks_critical). """ self.checks_ok = [] self.checks_warning = [] + self.checks_unknown = [] self.checks_critical = [] - self._jobs = _get_job_results(args, logger) - # group the jobs by their name - _by_name = {} - for this in self._jobs: - if this.name not in _by_name: - _by_name[this.name] = [] - _by_name[this.name].append(this) - self._jobs_by_name = _by_name - - self._job_count = len(_by_name) - - self._check_running_jobs(args, logger) - if not args.cmd: - self._check_not_running(args, logger) - - def _check_running_jobs(self, args, logger): - """ - Look for job execution entrys (parsed into Job() instances), group them - per check name and determine the status. For each group, append status - to one of the three aggregate status lists of this object (checks_ok, - checks_warning or checks_critical). - - @param args: Parsed command line arguments - @param logger: logging logger - """ # determine total check status based on all logged invocations of this job - for (name, jobs) in self._jobs_by_name.items(): - # Load the evaluation criterias for this job - check_filename = os.path.join(args.checkdir, name + '.ini') - logger.debug("Loading check definition from {!r}".format(check_filename)) + for (name, these_jobs) in jobs.by_name.items(): + self._logger.debug("") try: - check = Check(check_filename, logger) - except ScriptHerderError as exc: - logger.warning("Failed loading check: {!r}".format(exc), exc_info=True) - raise CheckLoadError('Failed loading check', filename = check_filename) - - # Sort jobs, oldest first - jobs = sorted(jobs, key=lambda x: x.start_time) - logger.debug("Checking {!r}: {!r}".format(name, jobs)) - - jobs_ok = [] - jobs_warning = [] - jobs_critical = [] - for job in jobs: - if check.job_is_ok(job): - jobs_ok.append(job) - elif check.job_is_warning(job): - jobs_warning.append(job) - else: - jobs_critical.append(job) - - logger.debug("Raw status OK : {!r}".format(jobs_ok)) - logger.debug("Raw status WARN : {!r}".format(jobs_warning)) - logger.debug("Raw status CRITICAL: {!r}".format(jobs_critical)) - - # add most recent job status to the totals - if jobs_ok: - self.checks_ok.append(jobs_ok[-1]) - elif jobs_warning: - self.checks_warning.append(jobs_warning[-1]) - else: - self.checks_critical.append(jobs_critical[-1]) - - def _check_not_running(self, args, logger): - """ - Look for job execution entrys (parsed into Job() instances), group them - per check name and determine the status. For each group, append status - to one of the three aggregate status lists of this object (checks_ok, - checks_warning or checks_critical). - - @param args: Parsed command line arguments - @param logger: logging logger - """ - files = [f for f in os.listdir(args.checkdir) if os.path.isfile(os.path.join(args.checkdir, f))] - for this in files: - if not this.endswith('.ini'): + check = self.get_check(name) + except CheckLoadError as exc: + self._logger.error("Failed loading check for {}: {}".format(name, exc.reason)) + this_job = these_jobs[-1] + this_job.check_status = "UNKNOWN" + this_job.check_reason = "Failed to load check" + self.checks_unknown.append(this_job) continue - filename = os.path.join(args.checkdir, this) - logger.debug("Loading check definition from {!r}".format(filename)) - try: - # validate check loads - Check(filename, logger) - except ValueError as exc: - logger.warning("Failed loading check: {!r}".format(exc), exc_info=True) - raise CheckLoadError(filename = filename) - name = this[:-4] # remove the '.ini' suffix - if name not in self._jobs_by_name: - logger.debug('Check {!r} (filename {!r}) not found in jobs'.format(name, filename)) - job = Job(name=name) - self.checks_critical.append(job) - self._job_count += 1 - else: - logger.debug('Check {!r} has {!r} logged results'.format(name, len(self._jobs_by_name[name]))) - def num_jobs(self): + # Check most recent job first since it is pretty probable one + # will be OK or WARNING. More efficient than wading through tens or + # hundreds of jobs to find that the last one is OK. + these_jobs.reverse() + + matched = False + for job in these_jobs: + self._logger.debug("Checking {!r}: {!r}".format(name, job)) + job.check(check, self._logger) + self._logger.debug("Checking for OK status") + if job.is_ok(): + self._logger.debug("Job status is OK") + self.checks_ok.append(job) + matched = True + break + else: + self._logger.debug("Checking for WARNING status") + if job.is_warning(): + self._logger.debug("Job status is WARNING") + self.checks_warning.append(job) + matched = True + break + + if not matched: + self._logger.debug("Concluding CRITICAL status") + self.checks_critical.append(these_jobs[0]) + + self._last_num_checked = len(jobs.by_name) + + def get_check(self, name: str) -> Check: + """ + Load and cache the evaluation criteria for this job. + + :param name: Name of job + :return: The check + """ + if name not in self._checks: + check_filename = os.path.join(self._args.checkdir, name + ".ini") + self._logger.debug("Loading check definition from {!r}".format(check_filename)) + try: + self._checks[name] = Check.from_file(check_filename, self._logger, runtime_mode=self._runtime_mode) + except ScriptHerderError: + raise CheckLoadError("Failed loading check", filename=check_filename) + + return self._checks[name] + + @property + def num_jobs(self) -> int: """ Return number of jobs processed. This is number of different jobs running + not running. - - @rtype: int """ - return self._job_count + return self._last_num_checked + + def aggregate_status(self) -> Tuple[str, Optional[str]]: + """ + Return the aggregate status of all jobs checked. + + The level returned is 'OK', 'WARNING', 'CRITICAL' or 'UNKNOWN'. + + :return: Level and message + """ + if self.num_jobs == 1: + # Single job check requested, output detailed information + if self.checks_ok: + return "OK", self.checks_ok[-1].check_reason + if self.checks_warning: + return "WARNING", self.checks_warning[-1].check_reason + if self.checks_critical: + return "CRITICAL", self.checks_critical[-1].check_reason + if self.checks_unknown: + return "UNKNOWN", self.checks_unknown[-1].check_reason + return "FAIL", "No jobs found for {!r}?".format(self._args.names) + + # When looking at multiple jobs at once, logic gets a bit reversed - if ANY + # job invocation is CRITICAL/WARNING, the aggregate message given to + # Nagios will have to be a failure. + if self.checks_critical: + return "CRITICAL", _status_summary(self.num_jobs, self.checks_critical) + if self.checks_warning: + return "WARNING", _status_summary(self.num_jobs, self.checks_warning) + if self.checks_unknown: + return "UNKNOWN", _status_summary(self.num_jobs, self.checks_unknown) + if self.checks_ok: + return "OK", _status_summary(self.num_jobs, self.checks_ok) + return "UNKNOWN", "No jobs found?" -def job_from_file(filename): - """ - Recreate Job() instance from saved file. - - @param filename: Filename to load script invocation details from - - @type filename: string - @rtype: Job - """ - job = Job('') - return job.from_file(filename) - - -def parse_args(defaults): +def parse_args(defaults: Mapping[str, Any]) -> Arguments: """ Parse the command line arguments @param defaults: Argument defaults - - @type defaults: dict """ - parser = argparse.ArgumentParser(description = 'Script herder script', - add_help = True, - formatter_class = argparse.ArgumentDefaultsHelpFormatter, - ) + parser = argparse.ArgumentParser( + description="Script herder script", + add_help=True, + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) - parser.add_argument('--debug', - dest = 'debug', - action = 'store_true', default = defaults['debug'], - help = 'Enable debug operation', - ) - parser.add_argument('--syslog', - dest = 'syslog', - action = 'store_true', default = defaults['syslog'], - help = 'Enable syslog output', - ) - parser.add_argument('--mode', - dest = 'mode', - choices = ['wrap', 'ls', 'check', 'lastlog', 'lastfaillog'], default = defaults['mode'], - help = 'What mode to run in', - ) - parser.add_argument('-d', '--datadir', - dest = 'datadir', - default = defaults['datadir'], - help = 'Data directory', - metavar = 'PATH', - ) - parser.add_argument('--checkdir', - dest = 'checkdir', - default = defaults['checkdir'], - help = 'Check definitions directory', - metavar = 'PATH', - ) - parser.add_argument('-N', '--name', - dest = 'name', - help = 'Job name', - metavar = 'NAME', - ) + parser.add_argument( + "--debug", dest="debug", action="store_true", default=defaults["debug"], help="Enable debug operation" + ) + parser.add_argument( + "-d", "--datadir", dest="datadir", default=defaults["datadir"], help="Data directory", metavar="PATH" + ) + parser.add_argument( + "--checkdir", dest="checkdir", default=defaults["checkdir"], help="Check definitions directory", metavar="PATH" + ) - parser.add_argument('cmd', - nargs = '*', default = [], - help = 'Script command', - metavar = 'CMD', - ) + subparsers = parser.add_subparsers( + help="Mode of operation", + dest="mode", + ) - args = parser.parse_args() + parser_wrap = subparsers.add_parser("wrap", help="Wrap a command and store metadata about it") + parser_ls = subparsers.add_parser("ls", help="List jobs (jobs are created with 'wrap'") + parser_check = subparsers.add_parser("check", help="Return status of jobs in a Nagios compatible way") + parser_lastlog = subparsers.add_parser("lastlog", help="Show last entry for a job") + parser_lastfaillog = subparsers.add_parser("lastfaillog", help="Show last failure entry for a job") - return args + parser_wrap.add_argument("-N", "--name", dest="name", help="Job name", metavar="NAME", required=True) + parser_wrap.add_argument( + "--umask", + dest="umask", + help=f"Job output file umask (default: {defaults['umask']})", + metavar="OCTAL", + default=defaults["umask"], + ) + parser_wrap.add_argument("cmd", nargs="+", default=[], help="Script command", metavar="CMD") + parser_wrap.add_argument( + "--syslog", dest="syslog", action="store_true", default=defaults["syslog"], help="Enable syslog output" + ) + + parser_ls.add_argument("names", nargs="*", default=[], help="Names of jobs to include", metavar="NAME") + parser_check.add_argument("names", nargs="*", default=[], help="Names of jobs to include", metavar="NAME") + parser_lastlog.add_argument("names", nargs="*", default=[], help="Names of jobs to include", metavar="NAME") + parser_lastfaillog.add_argument("names", nargs="*", default=[], help="Names of jobs to include", metavar="NAME") + + _args = sys.argv[1:] + if _args and _args[0] == "--mode": + # Old style invocation. Need to remove the "--mode" argument to have the subparser execute. + _args = _args[1:] + if not _args: + # If we set subparsers.default to "ls", the parser_ls won't execute and there won't be an args.name + # which causes issues later on. So we need to add a dummy argument to make the parser execute. + _args = ["ls"] + + args = parser.parse_args(_args) + + if args.mode == "wrap" and len(args.umask) != 3: + parser.error(f"Umask must be 3 digits (e.g. the default '{defaults['umask']}')") + + return cast(Arguments, args) -def mode_wrap(args, logger): +class ColumnMeta: + """ + Metadata for a column + """ + + def __init__(self, name: str, width: int = 0, align: str = ""): + self.name = name + self.width = width + self.align = align + self.update_width(len(name)) + + def update_width(self, value: int) -> None: + if value > self.width: + self.width = value + + def to_string(self, element: Tuple[str, int]) -> str: + (value, print_width) = element + _pad = " " * (self.width - print_width) + if self.align == "right": + return _pad + value + return value + _pad + + +class DataTable: + """Format data in fixed-width columns""" + + def __init__(self, meta: List[ColumnMeta]) -> None: + self.rows: List[List[Tuple[str, int]]] = [] + self._curr: List[Tuple[str, int]] = [] + self._meta = meta + + self.without_ANSI = re.compile( + r""" + \x1b # literal ESC + \[ # literal [ + [;\d]* # zero or more digits or semicolons + [A-Za-z] # a letter + """, + re.VERBOSE, + ).sub + + def push(self, value: str) -> None: + """Add a value to the current row""" + _print_width = len(self.without_ANSI("", value)) # get the actual print width for this value + self._curr.append((value, _print_width)) + + if len(self._meta) >= len(self._curr): + _meta = self._meta[len(self._curr) - 1] + _meta.update_width(len(self.without_ANSI("", value))) + + def new_line(self) -> None: + self.rows.append(self._curr) + self._curr = [] + + def __str__(self) -> str: + """Return the formatted table""" + res: List[str] = [] + + # Output field names + _this = "" + for header in self._meta: + _element = (header.name, len(header.name)) + _this += f"{header.to_string(_element)} " + _this = _this.rstrip() + res.append(_this) + + # Output data rows + for row in self.rows: + _this = "" + for idx in range(len(row)): + if len(self._meta) >= idx: + _meta = self._meta[idx] + _this += _meta.to_string(row[idx]) + else: + _this += str(row[idx]) + _this += " " + _this = _this.rstrip() + res.append(_this) + return "\n".join(res) + + +def mode_wrap(args: Arguments, logger: logging.Logger) -> bool: """ Execute a job and save result state in a file. @@ -672,39 +1103,98 @@ def mode_wrap(args, logger): @param logger: logging logger """ job = Job(args.name, cmd=args.cmd) - logger.debug("Invoking '{!s}'".format(''.join(args.cmd))) + logger.debug("Invoking '{!s}'".format("".join(args.cmd))) job.run() logger.debug("Finished, exit status {!r}".format(job.exit_status)) logger.debug("Job output:\n{!s}".format(job.output)) + # Record what the jobs status evaluates to at the time of execution + checkstatus = CheckStatus(args, logger, runtime_mode=True) + try: + check = checkstatus.get_check(job.name) + except CheckLoadError: + check = None + if check: + job.check(check, logger) + level = logging.INFO if job.is_ok() else logging.WARNING + logger.log(level, "Job {!r} check status is {} ({})".format(job.name, job.check_status, job.check_reason)) job.save_to_file(args.datadir, logger) return True -def mode_ls(args, logger): +def mode_ls(args: Arguments, logger: logging.Logger) -> bool: """ List all the saved states for jobs. @param args: Parsed command line arguments @param logger: logging logger """ - jobs = _get_job_results(args, logger) - for this in sorted(jobs, key=lambda x: x.start_time): - start = time.strftime('%Y-%m-%d %X', time.localtime(this.start_time)) - print('{start} {duration:>6} exit={exit} name={name} {filename}'.format( - start = start, - duration = this.duration_str, - exit = this.exit_status, - name = this.name, - filename = this.filename, - )) + jobs = JobsList(args, logger) + last_of_each = jobs.last_of_each + if not args.names: + # Short-mode, just show the last execution for all jobs + print("\n=== Showing the last execution of each job, use 'ls ALL' to see all executions\n") + chosen_jobs = last_of_each + else: + chosen_jobs = jobs.jobs + + checkstatus = CheckStatus(args, logger) + + _fields = [ + ColumnMeta("Start time", align="right"), + ColumnMeta("Duration"), + ColumnMeta("Age"), + ColumnMeta("Status"), + ColumnMeta("Criteria"), + ColumnMeta("Name"), + ColumnMeta("Filename"), + ] + data = DataTable(meta=_fields) + + for this in chosen_jobs: + start = "***" + if this.start_time: + start = time.strftime("%Y-%m-%d %X", time.localtime(this.start_time)) + data.push(start) + data.push(this.duration_str) + data.push(this.age + " ago") + + if this in last_of_each: + # For the last instance of each job, evaluate full check-mode status + temp_jobs = JobsList(args, logger, jobs=[this], load_not_running=False) + checkstatus.check_jobs(temp_jobs) + (level, msg) = checkstatus.aggregate_status() + else: + level = "-" + if this.exit_status != 0: + level = "Non-zero" + msg = "exit={}, age={}".format(this.exit_status, this.age) + + color1 = "" + color2 = "" + reset = "" + if level not in ["OK", "-"] and sys.stdout.isatty(): + color1 = "\033[;1m" # bold + color2 = "\033[;1m" # bold + reset = "\033[0;0m" + if level == "CRITICAL": + color1 = "\033[1;31m" # red + + data.push(color1 + level + reset) + data.push(color2 + (msg or "") + reset) + + data.push(this.name) + data.push(this.filename or "") + data.new_line() + + print(data) return True -def mode_check(args, logger): +def mode_check(args: Arguments, logger: logging.Logger) -> int: """ Evaluate the stored states for either a specific job, or all jobs. - Return Nagios compatible output (scriptherder --mode check is intended to + Return Nagios compatible output ("scriptherder check" is intended to run using Nagios NRPE or similar). @param args: Parsed command line arguments @@ -712,169 +1202,128 @@ def mode_check(args, logger): """ try: - status = CheckStatus(args, logger) + status = CheckStatus(args, logger, jobs=JobsList(args, logger)) except CheckLoadError as exc: print("UNKNOWN: Failed loading check from file '{!s}' ({!s})".format(exc.filename, exc.reason)) - return exit_status['UNKNOWN'] + return exit_status["UNKNOWN"] - if args.cmd: - # Single job check requested, output detailed information - if status.checks_ok: - print('OK: {!s}'.format(status.checks_ok[-1])) - return exit_status['OK'] - if status.checks_warning: - print('WARNING: {!s}'.format(status.checks_warning[-1])) - return exit_status['WARNING'] - if status.checks_critical: - print('CRITICAL: {!s}'.format(status.checks_critical[-1])) - return exit_status['CRITICAL'] - print "UNKNOWN - no jobs found for {!r}?".format(args.cmd) - return exit_status['UNKNOWN'] - - # When looking at multiple jobs at once, logic gets a bit reversed - if ANY - # job invocation is CRITICAL/WARNING, the aggregate message given to - # Nagios will have to be a failure. - if status.checks_critical: - print('CRITICAL: {!s}'.format( - _status_summary(status.num_jobs(), status.checks_critical))) - return exit_status['CRITICAL'] - if status.checks_warning: - print('WARNING: {!s}'.format( - _status_summary(status.num_jobs(), status.checks_warning))) - return exit_status['WARNING'] - if status.checks_ok: - print('OK: {!s}'.format( - _status_summary(status.num_jobs(), status.checks_ok))) - return exit_status['OK'] - print "UNKNOWN - no jobs found?" - return exit_status['UNKNOWN'] + level, msg = status.aggregate_status() + print("{!s}: {!s}".format(level, msg)) + return exit_status[level] -def mode_lastlog(args, logger, fail_status=False): +def mode_lastlog(args: Arguments, logger: logging.Logger, fail_status: bool = False) -> Optional[int]: """ View script output for the last execution for either a specific job, or all jobs. @param args: Parsed command line arguments @param logger: logging logger + @param fail_status: Show last failed log if True """ - _jobs = sorted(_get_job_results(args, logger), key=lambda x: x.start_time) - jobs_by_name = {} - for job in _jobs: - if job.name not in jobs_by_name: - jobs_by_name[job.name] = [] - jobs_by_name[job.name].append(job) + _jobs = JobsList(args, logger) - if len(jobs_by_name) > 0: - view_jobs = [] - for (name, jobs) in jobs_by_name.items(): - job = jobs[-1] # last job (any status) - if job.output_filename: - if fail_status and job.exit_status != 0: - view_jobs.append(job) - elif not fail_status: - view_jobs.append(job) + if not _jobs.jobs: + print("No jobs found") + return None - if view_jobs: - for job in view_jobs: - if os.path.isfile(job.output_filename): - with open(job.output_filename, 'r') as f: - print '=== Script output of {!r}'.format(job) - shutil.copyfileobj(f, sys.stdout) - print '=== End of script output\n' - else: - print('No script output found for {!s} with fail_status={!s}'.format(', '.join(jobs_by_name.keys()), fail_status)) + view_jobs: List[Job] = [] + for job in _jobs.last_of_each: + if job.output_filename and os.path.isfile(job.output_filename): + if fail_status and job.exit_status != 0: + view_jobs.append(job) + elif not fail_status: + view_jobs.append(job) + + if view_jobs: + for job in view_jobs: + if not job.output_filename: + continue + with open(job.output_filename, "r") as f: + print("=== Script output of {!r}".format(job)) + shutil.copyfileobj(f, sys.stdout) + print("=== End of script output\n") else: - print "No jobs found" + print( + "No script output found for {!s} with fail_status={!s}".format(", ".join(_jobs.by_name.keys()), fail_status) + ) + + return bool(view_jobs) -def _status_summary(num_jobs, failed): +def _status_summary(num_jobs: int, failed: List[Job]) -> str: """ String format routine used in output of checks status. """ - fmt = '{jobs} job in this state: {summary}' - if len(failed) == 1: - fmt = '{jobs}/{num_jobs} job in this state: {summary}' + plural = "s" if num_jobs != 1 else "" - summary = ', '.join(sorted([str(x.status_summary()) for x in failed])) - return fmt.format(jobs = len(failed), - num_jobs = num_jobs, - summary = summary, - ) + summary = ", ".join(sorted([str(x.status_summary()) for x in failed])) + return "{jobs}/{num_jobs} job{plural} in this state: {summary}".format( + jobs=len(failed), + num_jobs=num_jobs, + summary=summary, + plural=plural, + ) -def _get_job_results(args, logger): - """ - Load all jobs matching any specified name on the command line. - - @param args: Parsed command line arguments - @param logger: logging logger - - @rtype: [Job] - """ - files = [f for f in os.listdir(args.datadir) if os.path.isfile(os.path.join(args.datadir, f))] - jobs = [] - for this in files: - if not this.endswith('.json'): - continue - filename = os.path.join(args.datadir, this) - try: - job = job_from_file(filename) - except JobLoadError as exc: - logger.warning("Failed loading job file '{!s}' ({!s})".format(exc.filename, exc.reason)) - if args.cmd: - if args.cmd[0] != job.name: - logger.debug("Skipping '{!s}' not matching '{!s}' (file {!s})".format(job.name, args.cmd[0], filename)) - continue - jobs.append(job) - return jobs - - -def _parse_time_value(value): +def _parse_time_value(value: str) -> Optional[int]: """ Parse time period strings such as 1d. A lone number is considered number of seconds. + A composit string like 1d1h will be split in to two parts and evaluated recursivly. Return parsed value as number of seconds. @param value: Value to parse - @type value: string - @rtype: int """ - match = re.match(r'^(\d+)([hmsd]*)$', value) + match = re.match(r"^(\d+)([hmsd]*)$", value) if match: num = int(match.group(1)) what = match.group(2) - if what == 'm': + if what == "m": return num * 60 - if what == 'h': + if what == "h": return num * 3600 - if what == 'd': + if what == "d": return num * 86400 return num + else: + alpha = list(filter(None, re.split('[0-9]', value))) + numeric = list(filter(None, re.split('[mhd]', value))) + return _parse_time_value(str(numeric[0]) + alpha[0]) + _parse_time_value(str(numeric[1] + alpha[1])) -def _time_to_str(value): +def _time_to_str(value: Union[float, int]) -> str: """ Format number of seconds to short readable string. - - @type value: float or int - - @rtype: string """ if value < 1: # milliseconds - return '{:0.3f}ms'.format(value * 1000) + return "{!s}ms".format(int(value * 1000)) if value < 60: - return '{!s}s'.format(int(value)) + return "{!s}s".format(int(value)) if value < 3600: - return '{!s}m'.format(int(value / 60)) + return "{!s}m".format(int(value / 60)) if value < 86400: - return '{!s}h'.format(int(value / 3600)) + return "{!s}h".format(int(value / 3600)) days = int(value / 86400) - return '{!s}d{!s}h'.format(days, int((value % 86400) / 3600)) + return "{!s}d{!s}h".format(days, int((value % 86400) / 3600)) -def main(myname = 'scriptherder', args = None, logger = None, defaults=_defaults): +def _to_bytes(data: Optional[AnyStr]) -> bytes: + if not data: + return b"" + if isinstance(data, bytes): + return data + return data.encode("utf-8") + + +def _criteria_to_str(criteria: TCriteria) -> str: + name, value, negate = criteria + eq = "!=" if negate else "==" + return "{}{}{}".format(name, eq, value) + + +def main(myname: str, args: Arguments, logger: Optional[logging.Logger] = None) -> Optional[Union[int, bool]]: """ Main entry point for either wrapping a script, or checking the status of it. @@ -882,53 +1331,48 @@ def main(myname = 'scriptherder', args = None, logger = None, defaults=_defaults @param args: Command line arguments @param logger: logging logger @param defaults: Default command line arguments - - @type myname: string - @type args: None or [string] - @type logger: logging.logger - @type defaults: dict """ - if not args: - args = parse_args(defaults) - # initialize various components if not logger: + level = logging.INFO + if args.debug: + level = logging.DEBUG + logging.basicConfig( + level=level, stream=sys.stderr, format="%(asctime)s: %(threadName)s %(levelname)s %(message)s" + ) logger = logging.getLogger(myname) + # If stderr is not a TTY, change the log level of the StreamHandler (stream = sys.stderr above) to ERROR + if not sys.stderr.isatty() and not args.debug: + for this_h in logging.getLogger("").handlers: + this_h.setLevel(logging.ERROR) if args.debug: logger.setLevel(logging.DEBUG) - # log to stderr when debugging - formatter = logging.Formatter('%(asctime)s %(name)s %(threadName)s: %(levelname)s %(message)s') - stream_h = logging.StreamHandler(sys.stderr) - stream_h.setFormatter(formatter) - logger.addHandler(stream_h) - if args.syslog: - syslog_h = logging.handlers.SysLogHandler() - formatter = logging.Formatter('%(name)s: %(levelname)s %(message)s') + if args.mode == "wrap" and args.syslog: + syslog_h = logging.handlers.SysLogHandler("/dev/log") + formatter = logging.Formatter("%(name)s: %(levelname)s %(message)s") syslog_h.setFormatter(formatter) + syslog_h.setLevel(logging.INFO) logger.addHandler(syslog_h) - if args.name and args.mode != 'wrap': - logger.error('Argument --name only applicable for --mode wrap') - return False - - if args.mode == 'wrap': + if args.mode == "wrap": return mode_wrap(args, logger) - elif args.mode == 'ls': + elif args.mode == "ls": return mode_ls(args, logger) - elif args.mode == 'check': + elif args.mode == "check": return mode_check(args, logger) - elif args.mode == 'lastlog': + elif args.mode == "lastlog": return mode_lastlog(args, logger) - elif args.mode == 'lastfaillog': + elif args.mode == "lastfaillog": return mode_lastlog(args, logger, fail_status=True) - else: - logger.error("Invalid mode {!r}".format(args.mode)) - return False + logger.error("Invalid mode {!r}".format(args.mode)) + return False -if __name__ == '__main__': + +if __name__ == "__main__": try: progname = os.path.basename(sys.argv[0]) - res = main(progname) + args = parse_args(_defaults) + res = main(progname, args=args) if isinstance(res, int): sys.exit(res) if res: diff --git a/global/post-tasks.d/018packages b/global/post-tasks.d/018packages index 79c33483..bd8d9eea 100755 --- a/global/post-tasks.d/018packages +++ b/global/post-tasks.d/018packages @@ -6,7 +6,7 @@ CACHE_DIR=/var/cache/puppet-modules MODULES_DIR=${MODULES_DIR:=/etc/puppet/cosmos-modules} export GNUPGHOME=/etc/cosmos/gnupg -python -c "import yaml" 2>/dev/null || apt-get -y install python-yaml +python3 -c "import yaml" 2>/dev/null || apt-get -y install python3-yaml bold='\e[1m' reset='\e[0m'