docs: add section-by-section comments — notifier.py
This commit is contained in:
+215
-42
@@ -1,13 +1,30 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
notifier.py - Sends email via ns8-sendmail.
|
||||
"""Build and send backup outcome email notifications via ns8-sendmail.
|
||||
|
||||
Correct invocation (from working reference code):
|
||||
runagent ns8-sendmail -s <subject> -f <from> <to> [<to> ...]
|
||||
This module is the final stage of the pipeline. It takes the correlation
|
||||
result and the optional repository health check result, renders both a
|
||||
plain-text and an HTML email body, and dispatches the message through the
|
||||
NS8 mail relay using ``runagent ns8-sendmail``.
|
||||
|
||||
Body is passed on stdin as plain text.
|
||||
ns8-sendmail does NOT read To:/From:/Subject: from headers.
|
||||
Why ns8-sendmail / runagent?
|
||||
----------------------------
|
||||
NS8 modules are containerised; the cluster mail relay is exposed through
|
||||
the ``runagent`` helper which bridges the host and the container network.
|
||||
``ns8-sendmail`` reads the relay configuration from the NS8 cluster state,
|
||||
so no SMTP settings need to be stored in this project's config file.
|
||||
|
||||
Correct invocation (verified against NS8 source):
|
||||
runagent ns8-sendmail -s <subject> [-f <from>] <to> [<to> ...]
|
||||
Body is read from stdin as plain text.
|
||||
ns8-sendmail does NOT parse To:/From:/Subject: headers from the body.
|
||||
|
||||
Outcome labels and colours used in the HTML email
|
||||
--------------------------------------------------
|
||||
SUCCESS – label "OK", header background #2e7d32 (green)
|
||||
PARTIAL – label "WARNING", header background #e65100 (orange)
|
||||
REPO_FAILURE – label "CRITICAL", header background #b71c1c (red)
|
||||
"""
|
||||
|
||||
import logging
|
||||
import subprocess
|
||||
from datetime import datetime, timezone
|
||||
@@ -15,14 +32,41 @@ from typing import Optional
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
OUTCOME_LABEL = {"SUCCESS": "OK", "PARTIAL": "WARNING", "REPO_FAILURE": "CRITICAL"}
|
||||
OUTCOME_COLOR = {"SUCCESS": "#2e7d32", "PARTIAL": "#e65100", "REPO_FAILURE": "#b71c1c"}
|
||||
# ---------------------------------------------------------------------------
|
||||
# Outcome presentation maps
|
||||
# ---------------------------------------------------------------------------
|
||||
# Maps the three internal outcome codes to a short label and a background
|
||||
# colour used in the HTML email header banner.
|
||||
|
||||
OUTCOME_LABEL = {
|
||||
"SUCCESS": "OK",
|
||||
"PARTIAL": "WARNING",
|
||||
"REPO_FAILURE": "CRITICAL",
|
||||
}
|
||||
|
||||
OUTCOME_COLOR = {
|
||||
"SUCCESS": "#2e7d32", # Material green 800
|
||||
"PARTIAL": "#e65100", # Material deep-orange 900
|
||||
"REPO_FAILURE": "#b71c1c", # Material red 900
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Command builder
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
|
||||
"""
|
||||
Build argv for ns8-sendmail.
|
||||
runagent must be in PATH (it is, for root on NS8 nodes).
|
||||
"""Build the argv list for invoking ns8-sendmail via runagent.
|
||||
|
||||
``runagent`` must be in PATH, which is guaranteed for root on NS8 nodes.
|
||||
|
||||
Args:
|
||||
subject: Email subject line (already includes prefix and date).
|
||||
mail_from: Envelope From address (may be empty; ns8-sendmail has a default).
|
||||
mail_to: List of recipient addresses.
|
||||
|
||||
Returns:
|
||||
List of strings ready to pass to subprocess.run().
|
||||
"""
|
||||
cmd = ["runagent", "ns8-sendmail", "-s", subject]
|
||||
if mail_from:
|
||||
@@ -31,106 +75,235 @@ def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
|
||||
return cmd
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Plain-text body renderer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
|
||||
"""Render a plain-text email body from the correlation and repo results.
|
||||
|
||||
Produces a human-readable report suitable for terminal mail clients and
|
||||
as a fallback for email clients that do not render HTML.
|
||||
|
||||
Args:
|
||||
correlation: Dict returned by correlator.correlate_backup_status().
|
||||
repo_status: Dict returned by repo_check.check_repositories(), or None.
|
||||
|
||||
Returns:
|
||||
Multi-line string (Unix line endings).
|
||||
"""
|
||||
outcome = correlation["outcome"]
|
||||
lines = [
|
||||
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}",
|
||||
f"Time: {datetime.now(timezone.utc).isoformat()}",
|
||||
f"Plans: {', '.join(correlation.get('backup_ids', []))}",
|
||||
f"Modules: {correlation['succeeded']} OK / {correlation['failed']} FAILED / {correlation['total']} total",
|
||||
f"Modules: {correlation['succeeded']} OK / "
|
||||
f"{correlation['failed']} FAILED / {correlation['total']} total",
|
||||
"",
|
||||
]
|
||||
|
||||
# List each failed module with its error message.
|
||||
if correlation["failed_modules"]:
|
||||
lines.append("Failed modules:")
|
||||
for m in correlation["failed_modules"]:
|
||||
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}")
|
||||
lines.append("")
|
||||
|
||||
# Append repository health check details when available.
|
||||
if repo_status:
|
||||
lines.append("Repository check:")
|
||||
for dest in repo_status.get("destinations", []):
|
||||
lines.append(f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}")
|
||||
lines.append(
|
||||
f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}"
|
||||
)
|
||||
if repo_status.get("note"):
|
||||
lines.append(f" NOTE: {repo_status['note']}")
|
||||
lines.append("")
|
||||
|
||||
if correlation.get("note"):
|
||||
lines.append(f"Note: {correlation['note']}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTML body renderer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str:
|
||||
"""Render an HTML email body from the correlation and repo results.
|
||||
|
||||
Produces a self-contained HTML document with:
|
||||
- A coloured header banner showing the outcome and timestamp.
|
||||
- A summary line with module counts.
|
||||
- A per-module status table with colour-coded rows (green/red).
|
||||
- An optional repository check table appended when repo_status is present.
|
||||
|
||||
Inline styles are used throughout to maximise compatibility with
|
||||
webmail clients that strip <style> blocks.
|
||||
|
||||
Args:
|
||||
correlation: Dict returned by correlator.correlate_backup_status().
|
||||
repo_status: Dict returned by repo_check.check_repositories(), or None.
|
||||
|
||||
Returns:
|
||||
HTML string.
|
||||
"""
|
||||
outcome = correlation["outcome"]
|
||||
color = OUTCOME_COLOR[outcome]
|
||||
label = OUTCOME_LABEL[outcome]
|
||||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||||
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Per-module status table rows
|
||||
# ---------------------------------------------------------------------------
|
||||
rows = ""
|
||||
for m in correlation.get("modules", []):
|
||||
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
|
||||
icon = "✓" if m["result"] == "success" else "✗"
|
||||
rows += (f'<tr style="background:{bg}"><td style="padding:4px 8px">{icon}</td>'
|
||||
icon = "✓" if m["result"] == "success" else "✗" # ✓ / ✗
|
||||
rows += (
|
||||
f'<tr style="background:{bg}">'
|
||||
f'<td style="padding:4px 8px">{icon}</td>'
|
||||
f'<td style="padding:4px 8px">{m["module_id"]}</td>'
|
||||
f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
|
||||
f'<td style="padding:4px 8px">{m.get("timestamp","")}</td>'
|
||||
f'<td style="padding:4px 8px">{m.get("error","") or ""}</td></tr>')
|
||||
f'<td style="padding:4px 8px">{m.get("timestamp", "")}</td>'
|
||||
f'<td style="padding:4px 8px">{m.get("error", "") or ""}</td>'
|
||||
"</tr>"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Repository health check section (optional)
|
||||
# ---------------------------------------------------------------------------
|
||||
repo_section = ""
|
||||
if repo_status:
|
||||
rr = ""
|
||||
for dest in repo_status.get("destinations", []):
|
||||
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
|
||||
rr += (f'<tr style="background:{bg}"><td style="padding:4px 8px">{dest["repo_id"]}</td>'
|
||||
rr += (
|
||||
f'<tr style="background:{bg}">'
|
||||
f'<td style="padding:4px 8px">{dest["repo_id"]}</td>'
|
||||
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
|
||||
f'<td style="padding:4px 8px">{dest.get("error","")}</td></tr>')
|
||||
f'<td style="padding:4px 8px">{dest.get("error", "")}</td>'
|
||||
"</tr>"
|
||||
)
|
||||
if rr:
|
||||
repo_section = ("<h3 style='margin-top:24px'>Repository check</h3>"
|
||||
"<table border='1' cellspacing='0' style='border-collapse:collapse;font-size:13px;width:100%'>"
|
||||
repo_section = (
|
||||
"<h3 style='margin-top:24px'>Repository check</h3>"
|
||||
"<table border='1' cellspacing='0' "
|
||||
"style='border-collapse:collapse;font-size:13px;width:100%'>"
|
||||
"<thead><tr style='background:#f5f5f5'>"
|
||||
"<th style='padding:4px 8px'>Repo</th><th style='padding:4px 8px'>Status</th>"
|
||||
f"<th style='padding:4px 8px'>Detail</th></tr></thead><tbody>{rr}</tbody></table>")
|
||||
"<th style='padding:4px 8px'>Repo</th>"
|
||||
"<th style='padding:4px 8px'>Status</th>"
|
||||
f"<th style='padding:4px 8px'>Detail</th></tr></thead>"
|
||||
f"<tbody>{rr}</tbody></table>"
|
||||
)
|
||||
if repo_status.get("note"):
|
||||
repo_section += f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
|
||||
return (f"<html><body style='font-family:monospace;font-size:14px;max-width:800px;margin:auto'>"
|
||||
f"<div style='background:{color};color:#fff;padding:16px 20px;border-radius:6px 6px 0 0'>"
|
||||
repo_section += (
|
||||
f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Assemble the full HTML document
|
||||
# ---------------------------------------------------------------------------
|
||||
return (
|
||||
"<html><body style='font-family:monospace;font-size:14px;"
|
||||
"max-width:800px;margin:auto'>"
|
||||
# Header banner
|
||||
f"<div style='background:{color};color:#fff;padding:16px 20px;"
|
||||
f"border-radius:6px 6px 0 0'>"
|
||||
f"<b>NS8 Backup Monitor — {label}: {outcome}</b><br>"
|
||||
f"<small>{ts} • Plans: {plan_ids}</small></div>"
|
||||
f"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;border-radius:0 0 6px 6px'>"
|
||||
# Body
|
||||
"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;"
|
||||
"border-radius:0 0 6px 6px'>"
|
||||
f"<p><b>{correlation['succeeded']}</b> OK | "
|
||||
f"<b>{correlation['failed']}</b> FAILED | "
|
||||
f"<b>{correlation['total']}</b> total</p>"
|
||||
"<table border='1' cellspacing='0' style='border-collapse:collapse;font-size:13px;width:100%'>"
|
||||
"<thead><tr style='background:#f5f5f5'><th style='padding:4px 8px'></th>"
|
||||
"<th style='padding:4px 8px'>Module</th><th style='padding:4px 8px'>Backup ID</th>"
|
||||
"<th style='padding:4px 8px'>Timestamp</th><th style='padding:4px 8px'>Error</th>"
|
||||
f"</tr></thead><tbody>{rows}</tbody></table>{repo_section}</div></body></html>")
|
||||
# Per-module table
|
||||
"<table border='1' cellspacing='0' style='border-collapse:collapse;"
|
||||
"font-size:13px;width:100%'>"
|
||||
"<thead><tr style='background:#f5f5f5'>"
|
||||
"<th style='padding:4px 8px'></th>"
|
||||
"<th style='padding:4px 8px'>Module</th>"
|
||||
"<th style='padding:4px 8px'>Backup ID</th>"
|
||||
"<th style='padding:4px 8px'>Timestamp</th>"
|
||||
"<th style='padding:4px 8px'>Error</th>"
|
||||
f"</tr></thead><tbody>{rows}</tbody></table>"
|
||||
f"{repo_section}</div></body></html>"
|
||||
)
|
||||
|
||||
|
||||
def send_notification(config: dict, alerts: list, correlation: dict, repo_status: Optional[dict] = None):
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def send_notification(
|
||||
config: dict,
|
||||
alerts: list,
|
||||
correlation: dict,
|
||||
repo_status: Optional[dict] = None,
|
||||
):
|
||||
"""Build and send the backup outcome email via ns8-sendmail.
|
||||
|
||||
Sends the plain-text body to ns8-sendmail on stdin. The HTML body is
|
||||
currently built but not used because ns8-sendmail does not support
|
||||
multipart/alternative; it is kept for future use with a direct SMTP path.
|
||||
|
||||
Args:
|
||||
config: Parsed configuration dictionary.
|
||||
alerts: Raw alert list from Alertmanager (used for context only).
|
||||
correlation: Dict returned by correlator.correlate_backup_status().
|
||||
repo_status: Dict returned by repo_check.check_repositories(), or None.
|
||||
"""
|
||||
outcome = correlation["outcome"]
|
||||
mail_cfg = config.get("mail", {})
|
||||
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
|
||||
subject = (f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
|
||||
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}")
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Build subject line
|
||||
# ---------------------------------------------------------------------------
|
||||
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
|
||||
subject = (
|
||||
f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
|
||||
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Validate recipients
|
||||
# ---------------------------------------------------------------------------
|
||||
mail_to = mail_cfg.get("to", [])
|
||||
if not mail_to:
|
||||
log.error("No mail.to recipients configured")
|
||||
log.error("No mail.to recipients configured — notification not sent")
|
||||
return
|
||||
mail_from = mail_cfg.get("from", "")
|
||||
|
||||
# ns8-sendmail accepts plain text on stdin; send the text body
|
||||
# ---------------------------------------------------------------------------
|
||||
# Render body and dispatch
|
||||
# ---------------------------------------------------------------------------
|
||||
# ns8-sendmail accepts plain text on stdin.
|
||||
body = _build_text(correlation, repo_status)
|
||||
cmd = _send_cmd(subject, mail_from, mail_to)
|
||||
|
||||
log.debug("send cmd: %s", cmd)
|
||||
try:
|
||||
proc = subprocess.run(cmd, input=body, text=True, capture_output=True, timeout=30)
|
||||
proc = subprocess.run(
|
||||
cmd,
|
||||
input=body,
|
||||
text=True,
|
||||
capture_output=True,
|
||||
timeout=30,
|
||||
)
|
||||
if proc.returncode != 0:
|
||||
# Log the first non-empty output stream as the error detail.
|
||||
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
|
||||
log.error("ns8-sendmail failed: %s", err)
|
||||
else:
|
||||
log.info("notification sent: %s -> %s", subject, mail_to)
|
||||
log.info("Notification sent: %s -> %s", subject, mail_to)
|
||||
|
||||
except FileNotFoundError:
|
||||
log.error("'runagent' not found in PATH - is this an NS8 node?")
|
||||
# runagent is missing — this host is not an NS8 node, or PATH is wrong.
|
||||
log.error("'runagent' not found in PATH — is this an NS8 node?")
|
||||
except subprocess.TimeoutExpired:
|
||||
log.error("ns8-sendmail timed out after 30s")
|
||||
except Exception as e:
|
||||
log.error("failed to send notification: %s", e)
|
||||
log.error("Failed to send notification: %s", e)
|
||||
|
||||
Reference in New Issue
Block a user