Files
ns8-backup-monitor/ns8_backup_monitor/notifier.py
T

310 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Build and send backup outcome email notifications via ns8-sendmail.
This module is the final stage of the pipeline. It takes the correlation
result and the optional repository health check result, renders both a
plain-text and an HTML email body, and dispatches the message through the
NS8 mail relay using ``runagent ns8-sendmail``.
Why ns8-sendmail / runagent?
----------------------------
NS8 modules are containerised; the cluster mail relay is exposed through
the ``runagent`` helper which bridges the host and the container network.
``ns8-sendmail`` reads the relay configuration from the NS8 cluster state,
so no SMTP settings need to be stored in this project's config file.
Correct invocation (verified against NS8 source):
runagent ns8-sendmail -s <subject> [-f <from>] <to> [<to> ...]
Body is read from stdin as plain text.
ns8-sendmail does NOT parse To:/From:/Subject: headers from the body.
Outcome labels and colours used in the HTML email
--------------------------------------------------
SUCCESS label "OK", header background #2e7d32 (green)
PARTIAL label "WARNING", header background #e65100 (orange)
REPO_FAILURE label "CRITICAL", header background #b71c1c (red)
"""
import logging
import subprocess
from datetime import datetime, timezone
from typing import Optional
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Outcome presentation maps
# ---------------------------------------------------------------------------
# Maps the three internal outcome codes to a short label and a background
# colour used in the HTML email header banner.
OUTCOME_LABEL = {
"SUCCESS": "OK",
"PARTIAL": "WARNING",
"REPO_FAILURE": "CRITICAL",
}
OUTCOME_COLOR = {
"SUCCESS": "#2e7d32", # Material green 800
"PARTIAL": "#e65100", # Material deep-orange 900
"REPO_FAILURE": "#b71c1c", # Material red 900
}
# ---------------------------------------------------------------------------
# Command builder
# ---------------------------------------------------------------------------
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
"""Build the argv list for invoking ns8-sendmail via runagent.
``runagent`` must be in PATH, which is guaranteed for root on NS8 nodes.
Args:
subject: Email subject line (already includes prefix and date).
mail_from: Envelope From address (may be empty; ns8-sendmail has a default).
mail_to: List of recipient addresses.
Returns:
List of strings ready to pass to subprocess.run().
"""
cmd = ["runagent", "ns8-sendmail", "-s", subject]
if mail_from:
cmd += ["-f", mail_from]
cmd += mail_to
return cmd
# ---------------------------------------------------------------------------
# Plain-text body renderer
# ---------------------------------------------------------------------------
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
"""Render a plain-text email body from the correlation and repo results.
Produces a human-readable report suitable for terminal mail clients and
as a fallback for email clients that do not render HTML.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
Returns:
Multi-line string (Unix line endings).
"""
outcome = correlation["outcome"]
lines = [
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}",
f"Time: {datetime.now(timezone.utc).isoformat()}",
f"Plans: {', '.join(correlation.get('backup_ids', []))}",
f"Modules: {correlation['succeeded']} OK / "
f"{correlation['failed']} FAILED / {correlation['total']} total",
"",
]
# List each failed module with its error message.
if correlation["failed_modules"]:
lines.append("Failed modules:")
for m in correlation["failed_modules"]:
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}")
lines.append("")
# Append repository health check details when available.
if repo_status:
lines.append("Repository check:")
for dest in repo_status.get("destinations", []):
lines.append(
f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}"
)
if repo_status.get("note"):
lines.append(f" NOTE: {repo_status['note']}")
lines.append("")
if correlation.get("note"):
lines.append(f"Note: {correlation['note']}")
return "\n".join(lines)
# ---------------------------------------------------------------------------
# HTML body renderer
# ---------------------------------------------------------------------------
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str:
"""Render an HTML email body from the correlation and repo results.
Produces a self-contained HTML document with:
- A coloured header banner showing the outcome and timestamp.
- A summary line with module counts.
- A per-module status table with colour-coded rows (green/red).
- An optional repository check table appended when repo_status is present.
Inline styles are used throughout to maximise compatibility with
webmail clients that strip <style> blocks.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
Returns:
HTML string.
"""
outcome = correlation["outcome"]
color = OUTCOME_COLOR[outcome]
label = OUTCOME_LABEL[outcome]
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
# ---------------------------------------------------------------------------
# Per-module status table rows
# ---------------------------------------------------------------------------
rows = ""
for m in correlation.get("modules", []):
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
icon = "&#10003;" if m["result"] == "success" else "&#10007;" # ✓ / ✗
rows += (
f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{icon}</td>'
f'<td style="padding:4px 8px">{m["module_id"]}</td>'
f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
f'<td style="padding:4px 8px">{m.get("timestamp", "")}</td>'
f'<td style="padding:4px 8px">{m.get("error", "") or ""}</td>'
"</tr>"
)
# ---------------------------------------------------------------------------
# Repository health check section (optional)
# ---------------------------------------------------------------------------
repo_section = ""
if repo_status:
rr = ""
for dest in repo_status.get("destinations", []):
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
rr += (
f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{dest["repo_id"]}</td>'
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
f'<td style="padding:4px 8px">{dest.get("error", "")}</td>'
"</tr>"
)
if rr:
repo_section = (
"<h3 style='margin-top:24px'>Repository check</h3>"
"<table border='1' cellspacing='0' "
"style='border-collapse:collapse;font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'>Repo</th>"
"<th style='padding:4px 8px'>Status</th>"
f"<th style='padding:4px 8px'>Detail</th></tr></thead>"
f"<tbody>{rr}</tbody></table>"
)
if repo_status.get("note"):
repo_section += (
f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
)
# ---------------------------------------------------------------------------
# Assemble the full HTML document
# ---------------------------------------------------------------------------
return (
"<html><body style='font-family:monospace;font-size:14px;"
"max-width:800px;margin:auto'>"
# Header banner
f"<div style='background:{color};color:#fff;padding:16px 20px;"
f"border-radius:6px 6px 0 0'>"
f"<b>NS8 Backup Monitor &mdash; {label}: {outcome}</b><br>"
f"<small>{ts} &bull; Plans: {plan_ids}</small></div>"
# Body
"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;"
"border-radius:0 0 6px 6px'>"
f"<p><b>{correlation['succeeded']}</b> OK &nbsp;|&nbsp;"
f"<b>{correlation['failed']}</b> FAILED &nbsp;|&nbsp;"
f"<b>{correlation['total']}</b> total</p>"
# Per-module table
"<table border='1' cellspacing='0' style='border-collapse:collapse;"
"font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'></th>"
"<th style='padding:4px 8px'>Module</th>"
"<th style='padding:4px 8px'>Backup ID</th>"
"<th style='padding:4px 8px'>Timestamp</th>"
"<th style='padding:4px 8px'>Error</th>"
f"</tr></thead><tbody>{rows}</tbody></table>"
f"{repo_section}</div></body></html>"
)
# ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
def send_notification(
config: dict,
alerts: list,
correlation: dict,
repo_status: Optional[dict] = None,
):
"""Build and send the backup outcome email via ns8-sendmail.
Sends the plain-text body to ns8-sendmail on stdin. The HTML body is
currently built but not used because ns8-sendmail does not support
multipart/alternative; it is kept for future use with a direct SMTP path.
Args:
config: Parsed configuration dictionary.
alerts: Raw alert list from Alertmanager (used for context only).
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
"""
outcome = correlation["outcome"]
mail_cfg = config.get("mail", {})
# ---------------------------------------------------------------------------
# Build subject line
# ---------------------------------------------------------------------------
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
subject = (
f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
# ---------------------------------------------------------------------------
# Validate recipients
# ---------------------------------------------------------------------------
mail_to = mail_cfg.get("to", [])
if not mail_to:
log.error("No mail.to recipients configured — notification not sent")
return
mail_from = mail_cfg.get("from", "")
# ---------------------------------------------------------------------------
# Render body and dispatch
# ---------------------------------------------------------------------------
# ns8-sendmail accepts plain text on stdin.
body = _build_text(correlation, repo_status)
cmd = _send_cmd(subject, mail_from, mail_to)
log.debug("send cmd: %s", cmd)
try:
proc = subprocess.run(
cmd,
input=body,
text=True,
capture_output=True,
timeout=30,
)
if proc.returncode != 0:
# Log the first non-empty output stream as the error detail.
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
log.error("ns8-sendmail failed: %s", err)
else:
log.info("Notification sent: %s -> %s", subject, mail_to)
except FileNotFoundError:
# runagent is missing — this host is not an NS8 node, or PATH is wrong.
log.error("'runagent' not found in PATH — is this an NS8 node?")
except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out after 30s")
except Exception as e:
log.error("Failed to send notification: %s", e)