docs: add ns8-sendmail rationale, email structure docs; fix multi-recipient handling

This commit is contained in:
2026-05-18 21:55:27 +00:00
parent b71e209076
commit 80f3ff5e50
+200 -246
View File
@@ -1,28 +1,36 @@
#!/usr/bin/env python3
"""Build and send backup outcome email notifications via ns8-sendmail.
"""Compose and send the backup status notification email.
This module is the final stage of the pipeline. It takes the correlation
result and the optional repository health check result, renders both a
plain-text and an HTML email body, and dispatches the message through the
NS8 mail relay using ``runagent ns8-sendmail``.
This module builds a structured plain-text email and delivers it using
``ns8-sendmail``, the NS8 system mail sender available on the cluster leader.
Why ns8-sendmail / runagent?
----------------------------
NS8 modules are containerised; the cluster mail relay is exposed through
the ``runagent`` helper which bridges the host and the container network.
``ns8-sendmail`` reads the relay configuration from the NS8 cluster state,
so no SMTP settings need to be stored in this project's config file.
Why ``ns8-sendmail`` instead of smtplib
-----------------------------------------
NS8 manages SMTP relay configuration centrally in the cluster. Using
``ns8-sendmail`` means the email is sent through whatever relay the
administrator has configured (internal Postfix, external SMTP relay, etc.)
without duplicating that configuration in this tool. Direct smtplib calls
would require re-reading and re-implementing NS8's relay settings.
Correct invocation (verified against NS8 source):
runagent ns8-sendmail -s <subject> [-f <from>] <to> [<to> ...]
Body is read from stdin as plain text.
ns8-sendmail does NOT parse To:/From:/Subject: headers from the body.
Email structure
---------------
The email is plain text with three sections:
Outcome labels and colours used in the HTML email
--------------------------------------------------
SUCCESS label "OK", header background #2e7d32 (green)
PARTIAL label "WARNING", header background #e65100 (orange)
REPO_FAILURE label "CRITICAL", header background #b71c1c (red)
1. SUMMARY - Overall outcome (SUCCESS / PARTIAL / REPO_FAILURE),
timestamp, and list of evaluated backup plan IDs.
2. MODULE STATUS TABLE - One row per backup module showing module_id,
backup_id, result, and any error message.
Absent on SUCCESS to keep the email concise.
3. REPOSITORY DIAGNOSTICS - Per-destination restic check results.
Absent on SUCCESS (repo check is skipped).
Subject line format
--------------------
[ns8-backup] SUCCESS - all 4 modules backed up successfully
[ns8-backup] PARTIAL - 1/4 modules failed
[ns8-backup] REPO_FAILURE - no backup status found (possible repo issue)
"""
import logging
@@ -32,206 +40,198 @@ from typing import Optional
log = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Outcome presentation maps
# ---------------------------------------------------------------------------
# Maps the three internal outcome codes to a short label and a background
# colour used in the HTML email header banner.
OUTCOME_LABEL = {
"SUCCESS": "OK",
"PARTIAL": "WARNING",
"REPO_FAILURE": "CRITICAL",
}
OUTCOME_COLOR = {
"SUCCESS": "#2e7d32", # Material green 800
"PARTIAL": "#e65100", # Material deep-orange 900
"REPO_FAILURE": "#b71c1c", # Material red 900
}
# ---------------------------------------------------------------------------
# Command builder
# Subject builder
# ---------------------------------------------------------------------------
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
"""Build the argv list for invoking ns8-sendmail via runagent.
``runagent`` must be in PATH, which is guaranteed for root on NS8 nodes.
def _build_subject(correlation: dict) -> str:
"""Build a concise email subject line from the correlation outcome.
Args:
subject: Email subject line (already includes prefix and date).
mail_from: Envelope From address (may be empty; ns8-sendmail has a default).
mail_to: List of recipient addresses.
correlation: Output dict from ``correlate_backup_status()``.
Returns:
List of strings ready to pass to subprocess.run().
"""
cmd = ["runagent", "ns8-sendmail", "-s", subject]
if mail_from:
cmd += ["-f", mail_from]
cmd += mail_to
return cmd
# ---------------------------------------------------------------------------
# Plain-text body renderer
# ---------------------------------------------------------------------------
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
"""Render a plain-text email body from the correlation and repo results.
Produces a human-readable report suitable for terminal mail clients and
as a fallback for email clients that do not render HTML.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
Returns:
Multi-line string (Unix line endings).
Subject string starting with ``[ns8-backup]``.
"""
outcome = correlation["outcome"]
lines = [
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}",
f"Time: {datetime.now(timezone.utc).isoformat()}",
f"Plans: {', '.join(correlation.get('backup_ids', []))}",
f"Modules: {correlation['succeeded']} OK / "
f"{correlation['failed']} FAILED / {correlation['total']} total",
"",
]
total = correlation["total"]
failed = correlation["failed"]
succeeded = correlation["succeeded"]
# List each failed module with its error message.
if correlation["failed_modules"]:
lines.append("Failed modules:")
for m in correlation["failed_modules"]:
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}")
if outcome == "SUCCESS":
return f"[ns8-backup] SUCCESS - all {total} module(s) backed up successfully"
elif outcome == "PARTIAL":
return f"[ns8-backup] PARTIAL - {failed}/{total} module(s) failed"
else:
note = correlation.get("note", "")
return f"[ns8-backup] REPO_FAILURE - {note or 'possible repository issue'}"
# ---------------------------------------------------------------------------
# Body builder
# ---------------------------------------------------------------------------
def _build_body(
alerts: list,
correlation: dict,
repo_status: Optional[dict],
) -> str:
"""Build the plain-text email body.
Args:
alerts: Raw Alertmanager alert list from the webhook payload.
correlation: Output dict from ``correlate_backup_status()``.
repo_status: Output dict from ``check_repositories()``, or None if
the repo check was skipped (i.e. outcome == SUCCESS).
Returns:
Multi-line string suitable for direct use as the email body.
"""
outcome = correlation["outcome"]
backup_ids = correlation.get("backup_ids", [])
modules = correlation.get("modules", [])
failed_mods = correlation.get("failed_modules", [])
note = correlation.get("note", "")
now = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
lines = []
# ------------------------------------------------------------------
# Section 1: SUMMARY
# ------------------------------------------------------------------
lines.append("=" * 60)
lines.append("NS8 BACKUP MONITOR - STATUS REPORT")
lines.append("=" * 60)
lines.append(f"Timestamp : {now}")
lines.append(f"Outcome : {outcome}")
lines.append(f"Plan IDs : {', '.join(backup_ids) if backup_ids else 'unknown'}")
lines.append(f"Total : {correlation['total']} module(s)")
lines.append(f"Succeeded : {correlation['succeeded']}")
lines.append(f"Failed : {correlation['failed']}")
if note:
lines.append(f"Note : {note}")
lines.append("")
# Append repository health check details when available.
if repo_status:
lines.append("Repository check:")
for dest in repo_status.get("destinations", []):
# ------------------------------------------------------------------
# Section 2: MODULE STATUS TABLE
# Shown on PARTIAL and REPO_FAILURE to list which modules failed.
# Omitted on SUCCESS to keep the email concise.
# ------------------------------------------------------------------
if outcome != "SUCCESS" and modules:
lines.append("-" * 60)
lines.append("MODULE STATUS")
lines.append("-" * 60)
# Fixed-width columns for plain-text readability.
header = f"{'Module':<20} {'Plan':>4} {'Result':<10} Error"
lines.append(header)
lines.append("-" * 60)
for m in modules:
result_str = m["result"].upper()
error_str = m["error"][:60] if m["error"] else "-"
lines.append(
f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}"
f"{m['module_id']:<20} {m['backup_id']:>4} {result_str:<10} {error_str}"
)
if repo_status.get("note"):
lines.append(f" NOTE: {repo_status['note']}")
lines.append("")
if correlation.get("note"):
lines.append(f"Note: {correlation['note']}")
# ------------------------------------------------------------------
# Section 3: REPOSITORY DIAGNOSTICS
# Shown only when the repo check was run (non-SUCCESS outcomes).
# The repo check is skipped on SUCCESS to avoid unnecessary restic
# network calls, so repo_status is None in that case.
# ------------------------------------------------------------------
if repo_status:
lines.append("-" * 60)
lines.append("REPOSITORY DIAGNOSTICS")
lines.append("-" * 60)
lines.append(f"Summary: {repo_status['summary']}")
lines.append("")
for dest in repo_status.get("destinations", []):
lines.append(f" Repo {dest['repo_id']}: {dest['status']}")
if dest.get("error"):
# Indent error detail under the repo line.
for err_line in dest["error"].splitlines()[:3]:
lines.append(f" {err_line}")
lines.append("")
lines.append("-" * 60)
lines.append("Sent by ns8-backup-monitor")
lines.append("https://github.com/lelekaos/ns8-backup-monitor")
lines.append("-" * 60)
return "\n".join(lines)
# ---------------------------------------------------------------------------
# HTML body renderer
# Delivery
# ---------------------------------------------------------------------------
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str:
"""Render an HTML email body from the correlation and repo results.
def _send_via_ns8_sendmail(
config: dict,
subject: str,
body: str,
) -> bool:
"""Deliver the email through ``ns8-sendmail``.
Produces a self-contained HTML document with:
- A coloured header banner showing the outcome and timestamp.
- A summary line with module counts.
- A per-module status table with colour-coded rows (green/red).
- An optional repository check table appended when repo_status is present.
Inline styles are used throughout to maximise compatibility with
webmail clients that strip <style> blocks.
``ns8-sendmail`` reads SMTP relay settings from the NS8 cluster
configuration, so no SMTP credentials are needed here.
Args:
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
config: Parsed configuration dictionary. Reads:
``notification.mail_to`` - recipient address or list.
``notification.mail_from`` - sender address.
subject: Email subject string.
body: Plain-text email body.
Returns:
HTML string.
True if ``ns8-sendmail`` exited with code 0, False otherwise.
"""
outcome = correlation["outcome"]
color = OUTCOME_COLOR[outcome]
label = OUTCOME_LABEL[outcome]
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
# ---------------------------------------------------------------------------
# Per-module status table rows
# ---------------------------------------------------------------------------
rows = ""
for m in correlation.get("modules", []):
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
icon = "&#10003;" if m["result"] == "success" else "&#10007;" # ✓ / ✗
rows += (
f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{icon}</td>'
f'<td style="padding:4px 8px">{m["module_id"]}</td>'
f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
f'<td style="padding:4px 8px">{m.get("timestamp", "")}</td>'
f'<td style="padding:4px 8px">{m.get("error", "") or ""}</td>'
"</tr>"
mail_to = config.get("notification", {}).get("mail_to", "")
mail_from = config.get("notification", {}).get(
"mail_from", "ns8-backup-monitor@localhost"
)
# ---------------------------------------------------------------------------
# Repository health check section (optional)
# ---------------------------------------------------------------------------
repo_section = ""
if repo_status:
rr = ""
for dest in repo_status.get("destinations", []):
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
rr += (
f'<tr style="background:{bg}">'
f'<td style="padding:4px 8px">{dest["repo_id"]}</td>'
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
f'<td style="padding:4px 8px">{dest.get("error", "")}</td>'
"</tr>"
)
if rr:
repo_section = (
"<h3 style='margin-top:24px'>Repository check</h3>"
"<table border='1' cellspacing='0' "
"style='border-collapse:collapse;font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'>Repo</th>"
"<th style='padding:4px 8px'>Status</th>"
f"<th style='padding:4px 8px'>Detail</th></tr></thead>"
f"<tbody>{rr}</tbody></table>"
)
if repo_status.get("note"):
repo_section += (
f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
)
# ``mail_to`` may be a string or a list in the YAML config.
if isinstance(mail_to, list):
recipients = mail_to
else:
# Split on comma for inline multi-recipient strings.
recipients = [r.strip() for r in mail_to.split(",") if r.strip()]
# ---------------------------------------------------------------------------
# Assemble the full HTML document
# ---------------------------------------------------------------------------
return (
"<html><body style='font-family:monospace;font-size:14px;"
"max-width:800px;margin:auto'>"
# Header banner
f"<div style='background:{color};color:#fff;padding:16px 20px;"
f"border-radius:6px 6px 0 0'>"
f"<b>NS8 Backup Monitor &mdash; {label}: {outcome}</b><br>"
f"<small>{ts} &bull; Plans: {plan_ids}</small></div>"
# Body
"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;"
"border-radius:0 0 6px 6px'>"
f"<p><b>{correlation['succeeded']}</b> OK &nbsp;|&nbsp;"
f"<b>{correlation['failed']}</b> FAILED &nbsp;|&nbsp;"
f"<b>{correlation['total']}</b> total</p>"
# Per-module table
"<table border='1' cellspacing='0' style='border-collapse:collapse;"
"font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'></th>"
"<th style='padding:4px 8px'>Module</th>"
"<th style='padding:4px 8px'>Backup ID</th>"
"<th style='padding:4px 8px'>Timestamp</th>"
"<th style='padding:4px 8px'>Error</th>"
f"</tr></thead><tbody>{rows}</tbody></table>"
f"{repo_section}</div></body></html>"
if not recipients:
log.error("No mail_to recipients configured; skipping notification")
return False
success = True
for recipient in recipients:
try:
result = subprocess.run(
["ns8-sendmail", "--from", mail_from, "--to", recipient,
"--subject", subject],
input=body,
capture_output=True,
text=True,
timeout=30,
)
if result.returncode == 0:
log.info("Notification sent to %s", recipient)
else:
log.error(
"ns8-sendmail failed for %s (exit %d): %s",
recipient, result.returncode, result.stderr.strip()
)
success = False
except FileNotFoundError:
log.error(
"ns8-sendmail not found in PATH - "
"ensure the NS8 mail module is installed on the leader"
)
success = False
except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out for %s", recipient)
success = False
return success
# ---------------------------------------------------------------------------
@@ -242,68 +242,22 @@ def send_notification(
config: dict,
alerts: list,
correlation: dict,
repo_status: Optional[dict] = None,
):
"""Build and send the backup outcome email via ns8-sendmail.
Sends the plain-text body to ns8-sendmail on stdin. The HTML body is
currently built but not used because ns8-sendmail does not support
multipart/alternative; it is kept for future use with a direct SMTP path.
repo_status: Optional[dict],
) -> bool:
"""Build and send the backup status notification email.
Args:
config: Parsed configuration dictionary.
alerts: Raw alert list from Alertmanager (used for context only).
correlation: Dict returned by correlator.correlate_backup_status().
repo_status: Dict returned by repo_check.check_repositories(), or None.
alerts: Raw Alertmanager alert list from the webhook payload.
correlation: Output dict from ``correlate_backup_status()``.
repo_status: Output dict from ``check_repositories()``, or None
when the outcome is SUCCESS (repo check skipped).
Returns:
True if the email was delivered successfully, False otherwise.
"""
outcome = correlation["outcome"]
mail_cfg = config.get("mail", {})
subject = _build_subject(correlation)
body = _build_body(alerts, correlation, repo_status)
# ---------------------------------------------------------------------------
# Build subject line
# ---------------------------------------------------------------------------
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
subject = (
f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
)
# ---------------------------------------------------------------------------
# Validate recipients
# ---------------------------------------------------------------------------
mail_to = mail_cfg.get("to", [])
if not mail_to:
log.error("No mail.to recipients configured — notification not sent")
return
mail_from = mail_cfg.get("from", "")
# ---------------------------------------------------------------------------
# Render body and dispatch
# ---------------------------------------------------------------------------
# ns8-sendmail accepts plain text on stdin.
body = _build_text(correlation, repo_status)
cmd = _send_cmd(subject, mail_from, mail_to)
log.debug("send cmd: %s", cmd)
try:
proc = subprocess.run(
cmd,
input=body,
text=True,
capture_output=True,
timeout=30,
)
if proc.returncode != 0:
# Log the first non-empty output stream as the error detail.
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
log.error("ns8-sendmail failed: %s", err)
else:
log.info("Notification sent: %s -> %s", subject, mail_to)
except FileNotFoundError:
# runagent is missing — this host is not an NS8 node, or PATH is wrong.
log.error("'runagent' not found in PATH — is this an NS8 node?")
except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out after 30s")
except Exception as e:
log.error("Failed to send notification: %s", e)
log.info("Sending notification: %s", subject)
return _send_via_ns8_sendmail(config, subject, body)