310 lines
12 KiB
Python
310 lines
12 KiB
Python
#!/usr/bin/env python3
|
||
"""Build and send backup outcome email notifications via ns8-sendmail.
|
||
|
||
This module is the final stage of the pipeline. It takes the correlation
|
||
result and the optional repository health check result, renders both a
|
||
plain-text and an HTML email body, and dispatches the message through the
|
||
NS8 mail relay using ``runagent ns8-sendmail``.
|
||
|
||
Why ns8-sendmail / runagent?
|
||
----------------------------
|
||
NS8 modules are containerised; the cluster mail relay is exposed through
|
||
the ``runagent`` helper which bridges the host and the container network.
|
||
``ns8-sendmail`` reads the relay configuration from the NS8 cluster state,
|
||
so no SMTP settings need to be stored in this project's config file.
|
||
|
||
Correct invocation (verified against NS8 source):
|
||
runagent ns8-sendmail -s <subject> [-f <from>] <to> [<to> ...]
|
||
Body is read from stdin as plain text.
|
||
ns8-sendmail does NOT parse To:/From:/Subject: headers from the body.
|
||
|
||
Outcome labels and colours used in the HTML email
|
||
--------------------------------------------------
|
||
SUCCESS – label "OK", header background #2e7d32 (green)
|
||
PARTIAL – label "WARNING", header background #e65100 (orange)
|
||
REPO_FAILURE – label "CRITICAL", header background #b71c1c (red)
|
||
"""
|
||
|
||
import logging
|
||
import subprocess
|
||
from datetime import datetime, timezone
|
||
from typing import Optional
|
||
|
||
log = logging.getLogger(__name__)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Outcome presentation maps
|
||
# ---------------------------------------------------------------------------
|
||
# Maps the three internal outcome codes to a short label and a background
|
||
# colour used in the HTML email header banner.
|
||
|
||
OUTCOME_LABEL = {
|
||
"SUCCESS": "OK",
|
||
"PARTIAL": "WARNING",
|
||
"REPO_FAILURE": "CRITICAL",
|
||
}
|
||
|
||
OUTCOME_COLOR = {
|
||
"SUCCESS": "#2e7d32", # Material green 800
|
||
"PARTIAL": "#e65100", # Material deep-orange 900
|
||
"REPO_FAILURE": "#b71c1c", # Material red 900
|
||
}
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Command builder
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
|
||
"""Build the argv list for invoking ns8-sendmail via runagent.
|
||
|
||
``runagent`` must be in PATH, which is guaranteed for root on NS8 nodes.
|
||
|
||
Args:
|
||
subject: Email subject line (already includes prefix and date).
|
||
mail_from: Envelope From address (may be empty; ns8-sendmail has a default).
|
||
mail_to: List of recipient addresses.
|
||
|
||
Returns:
|
||
List of strings ready to pass to subprocess.run().
|
||
"""
|
||
cmd = ["runagent", "ns8-sendmail", "-s", subject]
|
||
if mail_from:
|
||
cmd += ["-f", mail_from]
|
||
cmd += mail_to
|
||
return cmd
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Plain-text body renderer
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
|
||
"""Render a plain-text email body from the correlation and repo results.
|
||
|
||
Produces a human-readable report suitable for terminal mail clients and
|
||
as a fallback for email clients that do not render HTML.
|
||
|
||
Args:
|
||
correlation: Dict returned by correlator.correlate_backup_status().
|
||
repo_status: Dict returned by repo_check.check_repositories(), or None.
|
||
|
||
Returns:
|
||
Multi-line string (Unix line endings).
|
||
"""
|
||
outcome = correlation["outcome"]
|
||
lines = [
|
||
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}",
|
||
f"Time: {datetime.now(timezone.utc).isoformat()}",
|
||
f"Plans: {', '.join(correlation.get('backup_ids', []))}",
|
||
f"Modules: {correlation['succeeded']} OK / "
|
||
f"{correlation['failed']} FAILED / {correlation['total']} total",
|
||
"",
|
||
]
|
||
|
||
# List each failed module with its error message.
|
||
if correlation["failed_modules"]:
|
||
lines.append("Failed modules:")
|
||
for m in correlation["failed_modules"]:
|
||
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}")
|
||
lines.append("")
|
||
|
||
# Append repository health check details when available.
|
||
if repo_status:
|
||
lines.append("Repository check:")
|
||
for dest in repo_status.get("destinations", []):
|
||
lines.append(
|
||
f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}"
|
||
)
|
||
if repo_status.get("note"):
|
||
lines.append(f" NOTE: {repo_status['note']}")
|
||
lines.append("")
|
||
|
||
if correlation.get("note"):
|
||
lines.append(f"Note: {correlation['note']}")
|
||
|
||
return "\n".join(lines)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# HTML body renderer
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str:
|
||
"""Render an HTML email body from the correlation and repo results.
|
||
|
||
Produces a self-contained HTML document with:
|
||
- A coloured header banner showing the outcome and timestamp.
|
||
- A summary line with module counts.
|
||
- A per-module status table with colour-coded rows (green/red).
|
||
- An optional repository check table appended when repo_status is present.
|
||
|
||
Inline styles are used throughout to maximise compatibility with
|
||
webmail clients that strip <style> blocks.
|
||
|
||
Args:
|
||
correlation: Dict returned by correlator.correlate_backup_status().
|
||
repo_status: Dict returned by repo_check.check_repositories(), or None.
|
||
|
||
Returns:
|
||
HTML string.
|
||
"""
|
||
outcome = correlation["outcome"]
|
||
color = OUTCOME_COLOR[outcome]
|
||
label = OUTCOME_LABEL[outcome]
|
||
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
|
||
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Per-module status table rows
|
||
# ---------------------------------------------------------------------------
|
||
rows = ""
|
||
for m in correlation.get("modules", []):
|
||
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
|
||
icon = "✓" if m["result"] == "success" else "✗" # ✓ / ✗
|
||
rows += (
|
||
f'<tr style="background:{bg}">'
|
||
f'<td style="padding:4px 8px">{icon}</td>'
|
||
f'<td style="padding:4px 8px">{m["module_id"]}</td>'
|
||
f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
|
||
f'<td style="padding:4px 8px">{m.get("timestamp", "")}</td>'
|
||
f'<td style="padding:4px 8px">{m.get("error", "") or ""}</td>'
|
||
"</tr>"
|
||
)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Repository health check section (optional)
|
||
# ---------------------------------------------------------------------------
|
||
repo_section = ""
|
||
if repo_status:
|
||
rr = ""
|
||
for dest in repo_status.get("destinations", []):
|
||
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
|
||
rr += (
|
||
f'<tr style="background:{bg}">'
|
||
f'<td style="padding:4px 8px">{dest["repo_id"]}</td>'
|
||
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
|
||
f'<td style="padding:4px 8px">{dest.get("error", "")}</td>'
|
||
"</tr>"
|
||
)
|
||
if rr:
|
||
repo_section = (
|
||
"<h3 style='margin-top:24px'>Repository check</h3>"
|
||
"<table border='1' cellspacing='0' "
|
||
"style='border-collapse:collapse;font-size:13px;width:100%'>"
|
||
"<thead><tr style='background:#f5f5f5'>"
|
||
"<th style='padding:4px 8px'>Repo</th>"
|
||
"<th style='padding:4px 8px'>Status</th>"
|
||
f"<th style='padding:4px 8px'>Detail</th></tr></thead>"
|
||
f"<tbody>{rr}</tbody></table>"
|
||
)
|
||
if repo_status.get("note"):
|
||
repo_section += (
|
||
f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
|
||
)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Assemble the full HTML document
|
||
# ---------------------------------------------------------------------------
|
||
return (
|
||
"<html><body style='font-family:monospace;font-size:14px;"
|
||
"max-width:800px;margin:auto'>"
|
||
# Header banner
|
||
f"<div style='background:{color};color:#fff;padding:16px 20px;"
|
||
f"border-radius:6px 6px 0 0'>"
|
||
f"<b>NS8 Backup Monitor — {label}: {outcome}</b><br>"
|
||
f"<small>{ts} • Plans: {plan_ids}</small></div>"
|
||
# Body
|
||
"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;"
|
||
"border-radius:0 0 6px 6px'>"
|
||
f"<p><b>{correlation['succeeded']}</b> OK | "
|
||
f"<b>{correlation['failed']}</b> FAILED | "
|
||
f"<b>{correlation['total']}</b> total</p>"
|
||
# Per-module table
|
||
"<table border='1' cellspacing='0' style='border-collapse:collapse;"
|
||
"font-size:13px;width:100%'>"
|
||
"<thead><tr style='background:#f5f5f5'>"
|
||
"<th style='padding:4px 8px'></th>"
|
||
"<th style='padding:4px 8px'>Module</th>"
|
||
"<th style='padding:4px 8px'>Backup ID</th>"
|
||
"<th style='padding:4px 8px'>Timestamp</th>"
|
||
"<th style='padding:4px 8px'>Error</th>"
|
||
f"</tr></thead><tbody>{rows}</tbody></table>"
|
||
f"{repo_section}</div></body></html>"
|
||
)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Main entry point
|
||
# ---------------------------------------------------------------------------
|
||
|
||
def send_notification(
|
||
config: dict,
|
||
alerts: list,
|
||
correlation: dict,
|
||
repo_status: Optional[dict] = None,
|
||
):
|
||
"""Build and send the backup outcome email via ns8-sendmail.
|
||
|
||
Sends the plain-text body to ns8-sendmail on stdin. The HTML body is
|
||
currently built but not used because ns8-sendmail does not support
|
||
multipart/alternative; it is kept for future use with a direct SMTP path.
|
||
|
||
Args:
|
||
config: Parsed configuration dictionary.
|
||
alerts: Raw alert list from Alertmanager (used for context only).
|
||
correlation: Dict returned by correlator.correlate_backup_status().
|
||
repo_status: Dict returned by repo_check.check_repositories(), or None.
|
||
"""
|
||
outcome = correlation["outcome"]
|
||
mail_cfg = config.get("mail", {})
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Build subject line
|
||
# ---------------------------------------------------------------------------
|
||
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
|
||
subject = (
|
||
f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
|
||
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}"
|
||
)
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Validate recipients
|
||
# ---------------------------------------------------------------------------
|
||
mail_to = mail_cfg.get("to", [])
|
||
if not mail_to:
|
||
log.error("No mail.to recipients configured — notification not sent")
|
||
return
|
||
mail_from = mail_cfg.get("from", "")
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Render body and dispatch
|
||
# ---------------------------------------------------------------------------
|
||
# ns8-sendmail accepts plain text on stdin.
|
||
body = _build_text(correlation, repo_status)
|
||
cmd = _send_cmd(subject, mail_from, mail_to)
|
||
|
||
log.debug("send cmd: %s", cmd)
|
||
try:
|
||
proc = subprocess.run(
|
||
cmd,
|
||
input=body,
|
||
text=True,
|
||
capture_output=True,
|
||
timeout=30,
|
||
)
|
||
if proc.returncode != 0:
|
||
# Log the first non-empty output stream as the error detail.
|
||
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
|
||
log.error("ns8-sendmail failed: %s", err)
|
||
else:
|
||
log.info("Notification sent: %s -> %s", subject, mail_to)
|
||
|
||
except FileNotFoundError:
|
||
# runagent is missing — this host is not an NS8 node, or PATH is wrong.
|
||
log.error("'runagent' not found in PATH — is this an NS8 node?")
|
||
except subprocess.TimeoutExpired:
|
||
log.error("ns8-sendmail timed out after 30s")
|
||
except Exception as e:
|
||
log.error("Failed to send notification: %s", e)
|