#!/usr/bin/env python3 """Scheduled backup status check for ns8-backup-monitor. This module is the entry point for the *scheduled* recap path, invoked directly by a systemd timer (ns8-backup-monitor-check.timer) rather than by an Alertmanager webhook. Why a separate timer path? --------------------------- The Alertmanager webhook path (receiver.py) only fires when Alertmanager emits a 'backup_failed' or 'backup_missing' alert — i.e. only on failure. Automatic scheduled backups that complete successfully never produce an Alertmanager alert, so the webhook is never called and no recap email is sent. This module solves that by running the full pipeline (correlator → repo_check on failure → notifier) on a schedule that mirrors the backup plan schedule. It reads Redis directly, classifies the outcome, and sends the recap email regardless of success or failure. Typical invocation (from systemd OnCalendar): python3 -m ns8_backup_monitor.scheduled_check The module exits 0 on success, non-zero on unrecoverable errors. """ import logging import sys from .correlator import correlate_backup_status from .notifier import send_notification from .repo_check import check_repositories from .utils import load_config, setup_logging log = logging.getLogger(__name__) def run_scheduled_check(config: dict): """Run the full analysis pipeline once and send the recap email. Unlike the webhook path, this function does NOT wait before reading Redis: it is meant to be invoked by a timer that fires *after* the backup window has closed (configure OnCalendar accordingly). Steps ----- 1. Run correlator with no specific backup_ids — scans all recent keys within the ``correlator.recent_window`` time window. 2. If outcome is not SUCCESS, run the repository health check. 3. Send the notification email (success or failure). Args: config: Parsed configuration dictionary (output of load_config). """ log.info("Scheduled check starting — reading backup status from Redis...") # Pass empty backup_ids list: the correlator will scan all plan status # keys updated within the recent_window (default 3600 seconds). correlation = correlate_backup_status(config, backup_ids=[]) outcome = correlation.get("outcome", "UNKNOWN") log.info("Correlator outcome: %s", outcome) # Run the repository health check on non-SUCCESS outcomes to provide # additional diagnostics in the email body. repo_status = None if outcome != "SUCCESS": log.info("Non-success outcome — running repository health check...") repo_status = check_repositories(config, correlation) log.info("Sending recap notification (outcome=%s)...", outcome) # Pass empty alerts list: the notifier will omit the 'Triggered by' # section cleanly when there is no originating Alertmanager alert. send_notification(config, alerts=[], correlation=correlation, repo_status=repo_status) log.info("Scheduled check complete.") def main(): """Entry point for the scheduled check (called by systemd timer).""" try: config = load_config() except (FileNotFoundError, ImportError) as exc: # Print to stderr so systemd captures it in the journal even if # logging has not been initialised yet. print(f"[ERROR] Could not load config: {exc}", file=sys.stderr) sys.exit(1) setup_logging(config) try: run_scheduled_check(config) except Exception as exc: # pylint: disable=broad-except log.exception("Scheduled check failed with unhandled exception: %s", exc) sys.exit(1) if __name__ == "__main__": main()