ns8_backup_monitor/scheduled_check.py

#!/usr/bin/env python3
"""Scheduled backup status check for ns8-backup-monitor.

This module is the entry point for the *scheduled* recap path, invoked
directly by a systemd timer (ns8-backup-monitor-check.timer) rather than
by an Alertmanager webhook.

Why a separate timer path?
---------------------------
The Alertmanager webhook path (receiver.py) only fires when Alertmanager
emits a 'backup_failed' or 'backup_missing' alert — i.e. only on failure.
Automatic scheduled backups that complete successfully never produce an
Alertmanager alert, so the webhook is never called and no recap email is sent.

This module solves that by running the full pipeline (correlator → repo_check
on failure → notifier) on a schedule that mirrors the backup plan schedule.
It reads Redis directly, classifies the outcome, and sends the recap email
regardless of success or failure.

Typical invocation (from systemd OnCalendar):
    python3 -m ns8_backup_monitor.scheduled_check

The module exits 0 on success, non-zero on unrecoverable errors.
"""

import logging
import sys

from .correlator import correlate_backup_status
from .notifier import send_notification
from .repo_check import check_repositories
from .utils import load_config, setup_logging

log = logging.getLogger(__name__)


def run_scheduled_check(config: dict):
    """Run the full analysis pipeline once and send the recap email.

    Unlike the webhook path, this function does NOT wait before reading
    Redis: it is meant to be invoked by a timer that fires *after* the
    backup window has closed (configure OnCalendar accordingly).

    Steps
    -----
    1. Run correlator with no specific backup_ids — scans all recent keys
       within the ``correlator.recent_window`` time window.
    2. If outcome is not SUCCESS, run the repository health check.
    3. Send the notification email (success or failure).

    Args:
        config: Parsed configuration dictionary (output of load_config).
    """
    log.info("Scheduled check starting — reading backup status from Redis...")

    # Pass empty backup_ids list: the correlator will scan all plan status
    # keys updated within the recent_window (default 3600 seconds).
    correlation = correlate_backup_status(config, backup_ids=[])

    outcome = correlation.get("outcome", "UNKNOWN")
    log.info("Correlator outcome: %s", outcome)

    # Run the repository health check on non-SUCCESS outcomes to provide
    # additional diagnostics in the email body.
    repo_status = None
    if outcome != "SUCCESS":
        log.info("Non-success outcome — running repository health check...")
        repo_status = check_repositories(config, correlation)

    log.info("Sending recap notification (outcome=%s)...", outcome)
    # Pass empty alerts list: the notifier will omit the 'Triggered by'
    # section cleanly when there is no originating Alertmanager alert.
    send_notification(config, alerts=[], correlation=correlation, repo_status=repo_status)
    log.info("Scheduled check complete.")


def main():
    """Entry point for the scheduled check (called by systemd timer)."""
    try:
        config = load_config()
    except (FileNotFoundError, ImportError) as exc:
        # Print to stderr so systemd captures it in the journal even if
        # logging has not been initialised yet.
        print(f"[ERROR] Could not load config: {exc}", file=sys.stderr)
        sys.exit(1)

    setup_logging(config)

    try:
        run_scheduled_check(config)
    except Exception as exc:  # pylint: disable=broad-except
        log.exception("Scheduled check failed with unhandled exception: %s", exc)
        sys.exit(1)


if __name__ == "__main__":
    main()
feat: add scheduled_check module — timer-based daily recap independent of Alertmanager 2026-05-18 22:18:45 +00:00			`#!/usr/bin/env python3`
			`"""Scheduled backup status check for ns8-backup-monitor.`

			`This module is the entry point for the scheduled recap path, invoked`
			`directly by a systemd timer (ns8-backup-monitor-check.timer) rather than`
			`by an Alertmanager webhook.`

			`Why a separate timer path?`
			`---------------------------`
			`The Alertmanager webhook path (receiver.py) only fires when Alertmanager`
			`emits a 'backup_failed' or 'backup_missing' alert — i.e. only on failure.`
			`Automatic scheduled backups that complete successfully never produce an`
			`Alertmanager alert, so the webhook is never called and no recap email is sent.`

			`This module solves that by running the full pipeline (correlator → repo_check`
			`on failure → notifier) on a schedule that mirrors the backup plan schedule.`
			`It reads Redis directly, classifies the outcome, and sends the recap email`
			`regardless of success or failure.`

			`Typical invocation (from systemd OnCalendar):`
			`python3 -m ns8_backup_monitor.scheduled_check`

			`The module exits 0 on success, non-zero on unrecoverable errors.`
			`"""`

			`import logging`
			`import sys`

			`from .correlator import correlate_backup_status`
			`from .notifier import send_notification`
			`from .repo_check import check_repositories`
			`from .utils import load_config, setup_logging`

			`log = logging.getLogger(__name__)`


			`def run_scheduled_check(config: dict):`
			`"""Run the full analysis pipeline once and send the recap email.`

			`Unlike the webhook path, this function does NOT wait before reading`
			`Redis: it is meant to be invoked by a timer that fires after the`
			`backup window has closed (configure OnCalendar accordingly).`

			`Steps`
			`-----`
			`1. Run correlator with no specific backup_ids — scans all recent keys`
			within the ``correlator.recent_window`` time window.
			`2. If outcome is not SUCCESS, run the repository health check.`
			`3. Send the notification email (success or failure).`

			`Args:`
			`config: Parsed configuration dictionary (output of load_config).`
			`"""`
			`log.info("Scheduled check starting — reading backup status from Redis...")`

			`# Pass empty backup_ids list: the correlator will scan all plan status`
			`# keys updated within the recent_window (default 3600 seconds).`
			`correlation = correlate_backup_status(config, backup_ids=[])`

			`outcome = correlation.get("outcome", "UNKNOWN")`
			`log.info("Correlator outcome: %s", outcome)`

			`# Run the repository health check on non-SUCCESS outcomes to provide`
			`# additional diagnostics in the email body.`
			`repo_status = None`
			`if outcome != "SUCCESS":`
			`log.info("Non-success outcome — running repository health check...")`
			`repo_status = check_repositories(config, correlation)`

			`log.info("Sending recap notification (outcome=%s)...", outcome)`
			`# Pass empty alerts list: the notifier will omit the 'Triggered by'`
			`# section cleanly when there is no originating Alertmanager alert.`
			`send_notification(config, alerts=[], correlation=correlation, repo_status=repo_status)`
			`log.info("Scheduled check complete.")`


			`def main():`
			`"""Entry point for the scheduled check (called by systemd timer)."""`
			`try:`
			`config = load_config()`
			`except (FileNotFoundError, ImportError) as exc:`
			`# Print to stderr so systemd captures it in the journal even if`
			`# logging has not been initialised yet.`
			`print(f"[ERROR] Could not load config: {exc}", file=sys.stderr)`
			`sys.exit(1)`

			`setup_logging(config)`

			`try:`
			`run_scheduled_check(config)`
			`except Exception as exc: # pylint: disable=broad-except`
			`log.exception("Scheduled check failed with unhandled exception: %s", exc)`
			`sys.exit(1)`


			`if __name__ == "__main__":`
			`main()`