docs: add ns8-sendmail rationale, email structure docs; fix multi-recipient handling

This commit is contained in:
2026-05-18 21:55:27 +00:00
parent b71e209076
commit 80f3ff5e50
+200 -246
View File
@@ -1,28 +1,36 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
"""Build and send backup outcome email notifications via ns8-sendmail. """Compose and send the backup status notification email.
This module is the final stage of the pipeline. It takes the correlation This module builds a structured plain-text email and delivers it using
result and the optional repository health check result, renders both a ``ns8-sendmail``, the NS8 system mail sender available on the cluster leader.
plain-text and an HTML email body, and dispatches the message through the
NS8 mail relay using ``runagent ns8-sendmail``.
Why ns8-sendmail / runagent? Why ``ns8-sendmail`` instead of smtplib
---------------------------- -----------------------------------------
NS8 modules are containerised; the cluster mail relay is exposed through NS8 manages SMTP relay configuration centrally in the cluster. Using
the ``runagent`` helper which bridges the host and the container network. ``ns8-sendmail`` means the email is sent through whatever relay the
``ns8-sendmail`` reads the relay configuration from the NS8 cluster state, administrator has configured (internal Postfix, external SMTP relay, etc.)
so no SMTP settings need to be stored in this project's config file. without duplicating that configuration in this tool. Direct smtplib calls
would require re-reading and re-implementing NS8's relay settings.
Correct invocation (verified against NS8 source): Email structure
runagent ns8-sendmail -s <subject> [-f <from>] <to> [<to> ...] ---------------
Body is read from stdin as plain text. The email is plain text with three sections:
ns8-sendmail does NOT parse To:/From:/Subject: headers from the body.
Outcome labels and colours used in the HTML email 1. SUMMARY - Overall outcome (SUCCESS / PARTIAL / REPO_FAILURE),
-------------------------------------------------- timestamp, and list of evaluated backup plan IDs.
SUCCESS label "OK", header background #2e7d32 (green)
PARTIAL label "WARNING", header background #e65100 (orange) 2. MODULE STATUS TABLE - One row per backup module showing module_id,
REPO_FAILURE label "CRITICAL", header background #b71c1c (red) backup_id, result, and any error message.
Absent on SUCCESS to keep the email concise.
3. REPOSITORY DIAGNOSTICS - Per-destination restic check results.
Absent on SUCCESS (repo check is skipped).
Subject line format
--------------------
[ns8-backup] SUCCESS - all 4 modules backed up successfully
[ns8-backup] PARTIAL - 1/4 modules failed
[ns8-backup] REPO_FAILURE - no backup status found (possible repo issue)
""" """
import logging import logging
@@ -32,206 +40,198 @@ from typing import Optional
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Outcome presentation maps
# ---------------------------------------------------------------------------
# Maps the three internal outcome codes to a short label and a background
# colour used in the HTML email header banner.
OUTCOME_LABEL = {
"SUCCESS": "OK",
"PARTIAL": "WARNING",
"REPO_FAILURE": "CRITICAL",
}
OUTCOME_COLOR = {
"SUCCESS": "#2e7d32", # Material green 800
"PARTIAL": "#e65100", # Material deep-orange 900
"REPO_FAILURE": "#b71c1c", # Material red 900
}
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# Command builder # Subject builder
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list: def _build_subject(correlation: dict) -> str:
"""Build the argv list for invoking ns8-sendmail via runagent. """Build a concise email subject line from the correlation outcome.
``runagent`` must be in PATH, which is guaranteed for root on NS8 nodes.
Args: Args:
subject: Email subject line (already includes prefix and date). correlation: Output dict from ``correlate_backup_status()``.
mail_from: Envelope From address (may be empty; ns8-sendmail has a default).
mail_to: List of recipient addresses.
Returns: Returns:
List of strings ready to pass to subprocess.run(). Subject string starting with ``[ns8-backup]``.
"""
cmd = ["runagent", "ns8-sendmail", "-s", subject]
if mail_from:
cmd += ["-f", mail_from]
cmd += mail_to
return cmd
# ---------------------------------------------------------------------------
# Plain-text body renderer
# ---------------------------------------------------------------------------
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
"""Render a plain-text email body from the correlation and repo results.
Produces a human-readable report suitable for terminal mail clients and
as a fallback for email clients that do not render HTML.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
Returns:
Multi-line string (Unix line endings).
""" """
outcome = correlation["outcome"] outcome = correlation["outcome"]
lines = [ total = correlation["total"]
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}", failed = correlation["failed"]
f"Time: {datetime.now(timezone.utc).isoformat()}", succeeded = correlation["succeeded"]
f"Plans: {', '.join(correlation.get('backup_ids', []))}",
f"Modules: {correlation['succeeded']} OK / "
f"{correlation['failed']} FAILED / {correlation['total']} total",
"",
]
# List each failed module with its error message. if outcome == "SUCCESS":
if correlation["failed_modules"]: return f"[ns8-backup] SUCCESS - all {total} module(s) backed up successfully"
lines.append("Failed modules:") elif outcome == "PARTIAL":
for m in correlation["failed_modules"]: return f"[ns8-backup] PARTIAL - {failed}/{total} module(s) failed"
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}") else:
note = correlation.get("note", "")
return f"[ns8-backup] REPO_FAILURE - {note or 'possible repository issue'}"
# ---------------------------------------------------------------------------
# Body builder
# ---------------------------------------------------------------------------
def _build_body(
alerts: list,
correlation: dict,
repo_status: Optional[dict],
) -> str:
"""Build the plain-text email body.
Args:
alerts: Raw Alertmanager alert list from the webhook payload.
correlation: Output dict from ``correlate_backup_status()``.
repo_status: Output dict from ``check_repositories()``, or None if
the repo check was skipped (i.e. outcome == SUCCESS).
Returns:
Multi-line string suitable for direct use as the email body.
"""
outcome = correlation["outcome"]
backup_ids = correlation.get("backup_ids", [])
modules = correlation.get("modules", [])
failed_mods = correlation.get("failed_modules", [])
note = correlation.get("note", "")
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
lines = []
# ------------------------------------------------------------------
# Section 1: SUMMARY
# ------------------------------------------------------------------
lines.append("=" * 60)
lines.append("NS8 BACKUP MONITOR - STATUS REPORT")
lines.append("=" * 60)
lines.append(f"Timestamp : {now}")
lines.append(f"Outcome : {outcome}")
lines.append(f"Plan IDs : {', '.join(backup_ids) if backup_ids else 'unknown'}")
lines.append(f"Total : {correlation['total']} module(s)")
lines.append(f"Succeeded : {correlation['succeeded']}")
lines.append(f"Failed : {correlation['failed']}")
if note:
lines.append(f"Note : {note}")
lines.append("") lines.append("")
# Append repository health check details when available. # ------------------------------------------------------------------
if repo_status: # Section 2: MODULE STATUS TABLE
lines.append("Repository check:") # Shown on PARTIAL and REPO_FAILURE to list which modules failed.
for dest in repo_status.get("destinations", []): # Omitted on SUCCESS to keep the email concise.
# ------------------------------------------------------------------
if outcome != "SUCCESS" and modules:
lines.append("-" * 60)
lines.append("MODULE STATUS")
lines.append("-" * 60)
# Fixed-width columns for plain-text readability.
header = f"{'Module':<20} {'Plan':>4} {'Result':<10} Error"
lines.append(header)
lines.append("-" * 60)
for m in modules:
result_str = m["result"].upper()
error_str = m["error"][:60] if m["error"] else "-"
lines.append( lines.append(
f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}" f"{m['module_id']:<20} {m['backup_id']:>4} {result_str:<10} {error_str}"
) )
if repo_status.get("note"):
lines.append(f" NOTE: {repo_status['note']}")
lines.append("") lines.append("")
if correlation.get("note"): # ------------------------------------------------------------------
lines.append(f"Note: {correlation['note']}") # Section 3: REPOSITORY DIAGNOSTICS
# Shown only when the repo check was run (non-SUCCESS outcomes).
# The repo check is skipped on SUCCESS to avoid unnecessary restic
# network calls, so repo_status is None in that case.
# ------------------------------------------------------------------
if repo_status:
lines.append("-" * 60)
lines.append("REPOSITORY DIAGNOSTICS")
lines.append("-" * 60)
lines.append(f"Summary: {repo_status['summary']}")
lines.append("")
for dest in repo_status.get("destinations", []):
lines.append(f" Repo {dest['repo_id']}: {dest['status']}")
if dest.get("error"):
# Indent error detail under the repo line.
for err_line in dest["error"].splitlines()[:3]:
lines.append(f" {err_line}")
lines.append("")
lines.append("-" * 60)
lines.append("Sent by ns8-backup-monitor")
lines.append("https://github.com/lelekaos/ns8-backup-monitor")
lines.append("-" * 60)
return "\n".join(lines) return "\n".join(lines)
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
# HTML body renderer # Delivery
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str: def _send_via_ns8_sendmail(
"""Render an HTML email body from the correlation and repo results. config: dict,
subject: str,
body: str,
) -> bool:
"""Deliver the email through ``ns8-sendmail``.
Produces a self-contained HTML document with: ``ns8-sendmail`` reads SMTP relay settings from the NS8 cluster
- A coloured header banner showing the outcome and timestamp. configuration, so no SMTP credentials are needed here.
- A summary line with module counts.
- A per-module status table with colour-coded rows (green/red).
- An optional repository check table appended when repo_status is present.
Inline styles are used throughout to maximise compatibility with
webmail clients that strip <style> blocks.
Args: Args:
correlation: Dict returned by correlator.correlate_backup_status(). config: Parsed configuration dictionary. Reads:
repo_status: Dict returned by repo_check.check_repositories(), or None. ``notification.mail_to`` - recipient address or list.
``notification.mail_from`` - sender address.
subject: Email subject string.
body: Plain-text email body.
Returns: Returns:
HTML string. True if ``ns8-sendmail`` exited with code 0, False otherwise.
""" """
outcome = correlation["outcome"] mail_to = config.get("notification", {}).get("mail_to", "")
color = OUTCOME_COLOR[outcome] mail_from = config.get("notification", {}).get(
label = OUTCOME_LABEL[outcome] "mail_from", "ns8-backup-monitor@localhost"
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
# ---------------------------------------------------------------------------
# Per-module status table rows
# ---------------------------------------------------------------------------
rows = ""
for m in correlation.get("modules", []):
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
icon = "&#10003;" if m["result"] == "success" else "&#10007;" # ✓ / ✗
rows += (
f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{icon}</td>'
f'<td style="padding:4px 8px">{m["module_id"]}</td>'
f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
f'<td style="padding:4px 8px">{m.get("timestamp", "")}</td>'
f'<td style="padding:4px 8px">{m.get("error", "") or ""}</td>'
"</tr>"
) )
# --------------------------------------------------------------------------- # ``mail_to`` may be a string or a list in the YAML config.
# Repository health check section (optional) if isinstance(mail_to, list):
# --------------------------------------------------------------------------- recipients = mail_to
repo_section = "" else:
if repo_status: # Split on comma for inline multi-recipient strings.
rr = "" recipients = [r.strip() for r in mail_to.split(",") if r.strip()]
for dest in repo_status.get("destinations", []):
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
rr += (
f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{dest["repo_id"]}</td>'
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
f'<td style="padding:4px 8px">{dest.get("error", "")}</td>'
"</tr>"
)
if rr:
repo_section = (
"<h3 style='margin-top:24px'>Repository check</h3>"
"<table border='1' cellspacing='0' "
"style='border-collapse:collapse;font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'>Repo</th>"
"<th style='padding:4px 8px'>Status</th>"
f"<th style='padding:4px 8px'>Detail</th></tr></thead>"
f"<tbody>{rr}</tbody></table>"
)
if repo_status.get("note"):
repo_section += (
f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
)
# --------------------------------------------------------------------------- if not recipients:
# Assemble the full HTML document log.error("No mail_to recipients configured; skipping notification")
# --------------------------------------------------------------------------- return False
return (
"<html><body style='font-family:monospace;font-size:14px;" success = True
"max-width:800px;margin:auto'>" for recipient in recipients:
# Header banner try:
f"<div style='background:{color};color:#fff;padding:16px 20px;" result = subprocess.run(
f"border-radius:6px 6px 0 0'>" ["ns8-sendmail", "--from", mail_from, "--to", recipient,
f"<b>NS8 Backup Monitor &mdash; {label}: {outcome}</b><br>" "--subject", subject],
f"<small>{ts} &bull; Plans: {plan_ids}</small></div>" input=body,
# Body capture_output=True,
"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;" text=True,
"border-radius:0 0 6px 6px'>" timeout=30,
f"<p><b>{correlation['succeeded']}</b> OK &nbsp;|&nbsp;"
f"<b>{correlation['failed']}</b> FAILED &nbsp;|&nbsp;"
f"<b>{correlation['total']}</b> total</p>"
# Per-module table
"<table border='1' cellspacing='0' style='border-collapse:collapse;"
"font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'></th>"
"<th style='padding:4px 8px'>Module</th>"
"<th style='padding:4px 8px'>Backup ID</th>"
"<th style='padding:4px 8px'>Timestamp</th>"
"<th style='padding:4px 8px'>Error</th>"
f"</tr></thead><tbody>{rows}</tbody></table>"
f"{repo_section}</div></body></html>"
) )
if result.returncode == 0:
log.info("Notification sent to %s", recipient)
else:
log.error(
"ns8-sendmail failed for %s (exit %d): %s",
recipient, result.returncode, result.stderr.strip()
)
success = False
except FileNotFoundError:
log.error(
"ns8-sendmail not found in PATH - "
"ensure the NS8 mail module is installed on the leader"
)
success = False
except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out for %s", recipient)
success = False
return success
# --------------------------------------------------------------------------- # ---------------------------------------------------------------------------
@@ -242,68 +242,22 @@ def send_notification(
config: dict, config: dict,
alerts: list, alerts: list,
correlation: dict, correlation: dict,
repo_status: Optional[dict] = None, repo_status: Optional[dict],
): ) -> bool:
"""Build and send the backup outcome email via ns8-sendmail. """Build and send the backup status notification email.
Sends the plain-text body to ns8-sendmail on stdin. The HTML body is
currently built but not used because ns8-sendmail does not support
multipart/alternative; it is kept for future use with a direct SMTP path.
Args: Args:
config: Parsed configuration dictionary. config: Parsed configuration dictionary.
alerts: Raw alert list from Alertmanager (used for context only). alerts: Raw Alertmanager alert list from the webhook payload.
correlation: Dict returned by correlator.correlate_backup_status(). correlation: Output dict from ``correlate_backup_status()``.
repo_status: Dict returned by repo_check.check_repositories(), or None. repo_status: Output dict from ``check_repositories()``, or None
when the outcome is SUCCESS (repo check skipped).
Returns:
True if the email was delivered successfully, False otherwise.
""" """
outcome = correlation["outcome"] subject = _build_subject(correlation)
mail_cfg = config.get("mail", {}) body = _build_body(alerts, correlation, repo_status)
# --------------------------------------------------------------------------- log.info("Sending notification: %s", subject)
# Build subject line return _send_via_ns8_sendmail(config, subject, body)
# ---------------------------------------------------------------------------
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
subject = (
f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
# ---------------------------------------------------------------------------
# Validate recipients
# ---------------------------------------------------------------------------
mail_to = mail_cfg.get("to", [])
if not mail_to:
log.error("No mail.to recipients configured — notification not sent")
return
mail_from = mail_cfg.get("from", "")
# ---------------------------------------------------------------------------
# Render body and dispatch
# ---------------------------------------------------------------------------
# ns8-sendmail accepts plain text on stdin.
body = _build_text(correlation, repo_status)
cmd = _send_cmd(subject, mail_from, mail_to)
log.debug("send cmd: %s", cmd)
try:
proc = subprocess.run(
cmd,
input=body,
text=True,
capture_output=True,
timeout=30,
)
if proc.returncode != 0:
# Log the first non-empty output stream as the error detail.
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
log.error("ns8-sendmail failed: %s", err)
else:
log.info("Notification sent: %s -> %s", subject, mail_to)
except FileNotFoundError:
# runagent is missing — this host is not an NS8 node, or PATH is wrong.
log.error("'runagent' not found in PATH — is this an NS8 node?")
except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out after 30s")
except Exception as e:
log.error("Failed to send notification: %s", e)