docs: add section-by-section comments — notifier.py

This commit is contained in:
2026-05-18 21:04:12 +00:00
parent 20d7ecc8c4
commit 7f40fe5d48
+230 -57
View File
@@ -1,13 +1,30 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """Build and send backup outcome email notifications via ns8-sendmail.
notifier.py - Sends email via ns8-sendmail.
Correct invocation (from working reference code): This module is the final stage of the pipeline. It takes the correlation
runagent ns8-sendmail -s <subject> -f <from> <to> [<to> ...] result and the optional repository health check result, renders both a
plain-text and an HTML email body, and dispatches the message through the
NS8 mail relay using ``runagent ns8-sendmail``.
Body is passed on stdin as plain text. Why ns8-sendmail / runagent?
ns8-sendmail does NOT read To:/From:/Subject: from headers. ----------------------------
NS8 modules are containerised; the cluster mail relay is exposed through
the ``runagent`` helper which bridges the host and the container network.
``ns8-sendmail`` reads the relay configuration from the NS8 cluster state,
so no SMTP settings need to be stored in this project's config file.
Correct invocation (verified against NS8 source):
runagent ns8-sendmail -s <subject> [-f <from>] <to> [<to> ...]
Body is read from stdin as plain text.
ns8-sendmail does NOT parse To:/From:/Subject: headers from the body.
Outcome labels and colours used in the HTML email
--------------------------------------------------
SUCCESS label "OK", header background #2e7d32 (green)
PARTIAL label "WARNING", header background #e65100 (orange)
REPO_FAILURE label "CRITICAL", header background #b71c1c (red)
""" """
import logging import logging
import subprocess import subprocess
from datetime import datetime, timezone from datetime import datetime, timezone
@@ -15,14 +32,41 @@ from typing import Optional
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
OUTCOME_LABEL = {"SUCCESS": "OK", "PARTIAL": "WARNING", "REPO_FAILURE": "CRITICAL"} # ---------------------------------------------------------------------------
OUTCOME_COLOR = {"SUCCESS": "#2e7d32", "PARTIAL": "#e65100", "REPO_FAILURE": "#b71c1c"} # Outcome presentation maps
# ---------------------------------------------------------------------------
# Maps the three internal outcome codes to a short label and a background
# colour used in the HTML email header banner.
OUTCOME_LABEL = {
"SUCCESS": "OK",
"PARTIAL": "WARNING",
"REPO_FAILURE": "CRITICAL",
}
OUTCOME_COLOR = {
"SUCCESS": "#2e7d32", # Material green 800
"PARTIAL": "#e65100", # Material deep-orange 900
"REPO_FAILURE": "#b71c1c", # Material red 900
}
# ---------------------------------------------------------------------------
# Command builder
# ---------------------------------------------------------------------------
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list: def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
""" """Build the argv list for invoking ns8-sendmail via runagent.
Build argv for ns8-sendmail.
runagent must be in PATH (it is, for root on NS8 nodes). ``runagent`` must be in PATH, which is guaranteed for root on NS8 nodes.
Args:
subject: Email subject line (already includes prefix and date).
mail_from: Envelope From address (may be empty; ns8-sendmail has a default).
mail_to: List of recipient addresses.
Returns:
List of strings ready to pass to subprocess.run().
""" """
cmd = ["runagent", "ns8-sendmail", "-s", subject] cmd = ["runagent", "ns8-sendmail", "-s", subject]
if mail_from: if mail_from:
@@ -31,106 +75,235 @@ def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
return cmd return cmd
# ---------------------------------------------------------------------------
# Plain-text body renderer
# ---------------------------------------------------------------------------
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str: def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
"""Render a plain-text email body from the correlation and repo results.
Produces a human-readable report suitable for terminal mail clients and
as a fallback for email clients that do not render HTML.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
Returns:
Multi-line string (Unix line endings).
"""
outcome = correlation["outcome"] outcome = correlation["outcome"]
lines = [ lines = [
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}", f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}",
f"Time: {datetime.now(timezone.utc).isoformat()}", f"Time: {datetime.now(timezone.utc).isoformat()}",
f"Plans: {', '.join(correlation.get('backup_ids', []))}", f"Plans: {', '.join(correlation.get('backup_ids', []))}",
f"Modules: {correlation['succeeded']} OK / {correlation['failed']} FAILED / {correlation['total']} total", f"Modules: {correlation['succeeded']} OK / "
f"{correlation['failed']} FAILED / {correlation['total']} total",
"", "",
] ]
# List each failed module with its error message.
if correlation["failed_modules"]: if correlation["failed_modules"]:
lines.append("Failed modules:") lines.append("Failed modules:")
for m in correlation["failed_modules"]: for m in correlation["failed_modules"]:
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}") lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}")
lines.append("") lines.append("")
# Append repository health check details when available.
if repo_status: if repo_status:
lines.append("Repository check:") lines.append("Repository check:")
for dest in repo_status.get("destinations", []): for dest in repo_status.get("destinations", []):
lines.append(f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}") lines.append(
f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}"
)
if repo_status.get("note"): if repo_status.get("note"):
lines.append(f" NOTE: {repo_status['note']}") lines.append(f" NOTE: {repo_status['note']}")
lines.append("") lines.append("")
if correlation.get("note"): if correlation.get("note"):
lines.append(f"Note: {correlation['note']}") lines.append(f"Note: {correlation['note']}")
return "\n".join(lines) return "\n".join(lines)
# ---------------------------------------------------------------------------
# HTML body renderer
# ---------------------------------------------------------------------------
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str: def _build_html(correlation: dict, repo_status: Optional[dict]) -> str:
outcome = correlation["outcome"] """Render an HTML email body from the correlation and repo results.
color = OUTCOME_COLOR[outcome]
label = OUTCOME_LABEL[outcome] Produces a self-contained HTML document with:
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC") - A coloured header banner showing the outcome and timestamp.
- A summary line with module counts.
- A per-module status table with colour-coded rows (green/red).
- An optional repository check table appended when repo_status is present.
Inline styles are used throughout to maximise compatibility with
webmail clients that strip <style> blocks.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
Returns:
HTML string.
"""
outcome = correlation["outcome"]
color = OUTCOME_COLOR[outcome]
label = OUTCOME_LABEL[outcome]
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A" plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
# ---------------------------------------------------------------------------
# Per-module status table rows
# ---------------------------------------------------------------------------
rows = "" rows = ""
for m in correlation.get("modules", []): for m in correlation.get("modules", []):
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee" bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
icon = "&#10003;" if m["result"] == "success" else "&#10007;" icon = "&#10003;" if m["result"] == "success" else "&#10007;" # ✓ / ✗
rows += (f'<tr style="background:{bg}"><td style="padding:4px 8px">{icon}</td>' rows += (
f'<td style="padding:4px 8px">{m["module_id"]}</td>' f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{m["backup_id"]}</td>' f'<td style="padding:4px 8px">{icon}</td>'
f'<td style="padding:4px 8px">{m.get("timestamp","")}</td>' f'<td style="padding:4px 8px">{m["module_id"]}</td>'
f'<td style="padding:4px 8px">{m.get("error","") or ""}</td></tr>') f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
f'<td style="padding:4px 8px">{m.get("timestamp", "")}</td>'
f'<td style="padding:4px 8px">{m.get("error", "") or ""}</td>'
"</tr>"
)
# ---------------------------------------------------------------------------
# Repository health check section (optional)
# ---------------------------------------------------------------------------
repo_section = "" repo_section = ""
if repo_status: if repo_status:
rr = "" rr = ""
for dest in repo_status.get("destinations", []): for dest in repo_status.get("destinations", []):
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee" bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
rr += (f'<tr style="background:{bg}"><td style="padding:4px 8px">{dest["repo_id"]}</td>' rr += (
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>' f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{dest.get("error","")}</td></tr>') f'<td style="padding:4px 8px">{dest["repo_id"]}</td>'
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
f'<td style="padding:4px 8px">{dest.get("error", "")}</td>'
"</tr>"
)
if rr: if rr:
repo_section = ("<h3 style='margin-top:24px'>Repository check</h3>" repo_section = (
"<table border='1' cellspacing='0' style='border-collapse:collapse;font-size:13px;width:100%'>" "<h3 style='margin-top:24px'>Repository check</h3>"
"<thead><tr style='background:#f5f5f5'>" "<table border='1' cellspacing='0' "
"<th style='padding:4px 8px'>Repo</th><th style='padding:4px 8px'>Status</th>" "style='border-collapse:collapse;font-size:13px;width:100%'>"
f"<th style='padding:4px 8px'>Detail</th></tr></thead><tbody>{rr}</tbody></table>") "<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'>Repo</th>"
"<th style='padding:4px 8px'>Status</th>"
f"<th style='padding:4px 8px'>Detail</th></tr></thead>"
f"<tbody>{rr}</tbody></table>"
)
if repo_status.get("note"): if repo_status.get("note"):
repo_section += f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>" repo_section += (
return (f"<html><body style='font-family:monospace;font-size:14px;max-width:800px;margin:auto'>" f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
f"<div style='background:{color};color:#fff;padding:16px 20px;border-radius:6px 6px 0 0'>" )
f"<b>NS8 Backup Monitor &mdash; {label}: {outcome}</b><br>"
f"<small>{ts} &bull; Plans: {plan_ids}</small></div>" # ---------------------------------------------------------------------------
f"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;border-radius:0 0 6px 6px'>" # Assemble the full HTML document
f"<p><b>{correlation['succeeded']}</b> OK &nbsp;|&nbsp;" # ---------------------------------------------------------------------------
f"<b>{correlation['failed']}</b> FAILED &nbsp;|&nbsp;" return (
f"<b>{correlation['total']}</b> total</p>" "<html><body style='font-family:monospace;font-size:14px;"
"<table border='1' cellspacing='0' style='border-collapse:collapse;font-size:13px;width:100%'>" "max-width:800px;margin:auto'>"
"<thead><tr style='background:#f5f5f5'><th style='padding:4px 8px'></th>" # Header banner
"<th style='padding:4px 8px'>Module</th><th style='padding:4px 8px'>Backup ID</th>" f"<div style='background:{color};color:#fff;padding:16px 20px;"
"<th style='padding:4px 8px'>Timestamp</th><th style='padding:4px 8px'>Error</th>" f"border-radius:6px 6px 0 0'>"
f"</tr></thead><tbody>{rows}</tbody></table>{repo_section}</div></body></html>") f"<b>NS8 Backup Monitor &mdash; {label}: {outcome}</b><br>"
f"<small>{ts} &bull; Plans: {plan_ids}</small></div>"
# Body
"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;"
"border-radius:0 0 6px 6px'>"
f"<p><b>{correlation['succeeded']}</b> OK &nbsp;|&nbsp;"
f"<b>{correlation['failed']}</b> FAILED &nbsp;|&nbsp;"
f"<b>{correlation['total']}</b> total</p>"
# Per-module table
"<table border='1' cellspacing='0' style='border-collapse:collapse;"
"font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'></th>"
"<th style='padding:4px 8px'>Module</th>"
"<th style='padding:4px 8px'>Backup ID</th>"
"<th style='padding:4px 8px'>Timestamp</th>"
"<th style='padding:4px 8px'>Error</th>"
f"</tr></thead><tbody>{rows}</tbody></table>"
f"{repo_section}</div></body></html>"
)
def send_notification(config: dict, alerts: list, correlation: dict, repo_status: Optional[dict] = None): # ---------------------------------------------------------------------------
# Main entry point
# ---------------------------------------------------------------------------
def send_notification(
config: dict,
alerts: list,
correlation: dict,
repo_status: Optional[dict] = None,
):
"""Build and send the backup outcome email via ns8-sendmail.
Sends the plain-text body to ns8-sendmail on stdin. The HTML body is
currently built but not used because ns8-sendmail does not support
multipart/alternative; it is kept for future use with a direct SMTP path.
Args:
config: Parsed configuration dictionary.
alerts: Raw alert list from Alertmanager (used for context only).
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
"""
outcome = correlation["outcome"] outcome = correlation["outcome"]
mail_cfg = config.get("mail", {}) mail_cfg = config.get("mail", {})
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
subject = (f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}")
# ---------------------------------------------------------------------------
# Build subject line
# ---------------------------------------------------------------------------
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
subject = (
f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
# ---------------------------------------------------------------------------
# Validate recipients
# ---------------------------------------------------------------------------
mail_to = mail_cfg.get("to", []) mail_to = mail_cfg.get("to", [])
if not mail_to: if not mail_to:
log.error("No mail.to recipients configured") log.error("No mail.to recipients configured — notification not sent")
return return
mail_from = mail_cfg.get("from", "") mail_from = mail_cfg.get("from", "")
# ns8-sendmail accepts plain text on stdin; send the text body # ---------------------------------------------------------------------------
# Render body and dispatch
# ---------------------------------------------------------------------------
# ns8-sendmail accepts plain text on stdin.
body = _build_text(correlation, repo_status) body = _build_text(correlation, repo_status)
cmd = _send_cmd(subject, mail_from, mail_to) cmd = _send_cmd(subject, mail_from, mail_to)
log.debug("send cmd: %s", cmd) log.debug("send cmd: %s", cmd)
try: try:
proc = subprocess.run(cmd, input=body, text=True, capture_output=True, timeout=30) proc = subprocess.run(
cmd,
input=body,
text=True,
capture_output=True,
timeout=30,
)
if proc.returncode != 0: if proc.returncode != 0:
# Log the first non-empty output stream as the error detail.
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}" err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
log.error("ns8-sendmail failed: %s", err) log.error("ns8-sendmail failed: %s", err)
else: else:
log.info("notification sent: %s -> %s", subject, mail_to) log.info("Notification sent: %s -> %s", subject, mail_to)
except FileNotFoundError: except FileNotFoundError:
log.error("'runagent' not found in PATH - is this an NS8 node?") # runagent is missing — this host is not an NS8 node, or PATH is wrong.
log.error("'runagent' not found in PATH — is this an NS8 node?")
except subprocess.TimeoutExpired: except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out after 30s") log.error("ns8-sendmail timed out after 30s")
except Exception as e: except Exception as e:
log.error("failed to send notification: %s", e) log.error("Failed to send notification: %s", e)