Files
ns8-backup-monitor/ns8_backup_monitor/notifier.py
T

137 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""
notifier.py - Sends email via ns8-sendmail.
Correct invocation (from working reference code):
runagent ns8-sendmail -s <subject> -f <from> <to> [<to> ...]
Body is passed on stdin as plain text.
ns8-sendmail does NOT read To:/From:/Subject: from headers.
"""
import logging
import subprocess
from datetime import datetime, timezone
from typing import Optional
log = logging.getLogger(__name__)
OUTCOME_LABEL = {"SUCCESS": "OK", "PARTIAL": "WARNING", "REPO_FAILURE": "CRITICAL"}
OUTCOME_COLOR = {"SUCCESS": "#2e7d32", "PARTIAL": "#e65100", "REPO_FAILURE": "#b71c1c"}
def _send_cmd(subject: str, mail_from: str, mail_to: list) -> list:
"""
Build argv for ns8-sendmail.
runagent must be in PATH (it is, for root on NS8 nodes).
"""
cmd = ["runagent", "ns8-sendmail", "-s", subject]
if mail_from:
cmd += ["-f", mail_from]
cmd += mail_to
return cmd
def _build_text(correlation: dict, repo_status: Optional[dict]) -> str:
outcome = correlation["outcome"]
lines = [
f"NS8 Backup Monitor - {OUTCOME_LABEL[outcome]}: {outcome}",
f"Time: {datetime.now(timezone.utc).isoformat()}",
f"Plans: {', '.join(correlation.get('backup_ids', []))}",
f"Modules: {correlation['succeeded']} OK / {correlation['failed']} FAILED / {correlation['total']} total",
"",
]
if correlation["failed_modules"]:
lines.append("Failed modules:")
for m in correlation["failed_modules"]:
lines.append(f" - [{m['module_id']}] {m['backup_id']}: {m.get('error', '?')}")
lines.append("")
if repo_status:
lines.append("Repository check:")
for dest in repo_status.get("destinations", []):
lines.append(f" - [{dest['repo_id']}] {dest['status']}: {dest.get('error', '')}")
if repo_status.get("note"):
lines.append(f" NOTE: {repo_status['note']}")
lines.append("")
if correlation.get("note"):
lines.append(f"Note: {correlation['note']}")
return "\n".join(lines)
def _build_html(correlation: dict, repo_status: Optional[dict]) -> str:
outcome = correlation["outcome"]
color = OUTCOME_COLOR[outcome]
label = OUTCOME_LABEL[outcome]
ts = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")
plan_ids = ", ".join(correlation.get("backup_ids", [])) or "N/A"
rows = ""
for m in correlation.get("modules", []):
bg = "#e8f5e9" if m["result"] == "success" else "#ffebee"
icon = "&#10003;" if m["result"] == "success" else "&#10007;"
rows += (f'<tr style="background:{bg}"><td style="padding:4px 8px">{icon}</td>'
f'<td style="padding:4px 8px">{m["module_id"]}</td>'
f'<td style="padding:4px 8px">{m["backup_id"]}</td>'
f'<td style="padding:4px 8px">{m.get("timestamp","")}</td>'
f'<td style="padding:4px 8px">{m.get("error","") or ""}</td></tr>')
repo_section = ""
if repo_status:
rr = ""
for dest in repo_status.get("destinations", []):
bg = "#e8f5e9" if dest["status"] == "OK" else "#ffebee"
rr += (f'<tr style="background:{bg}"><td style="padding:4px 8px">{dest["repo_id"]}</td>'
f'<td style="padding:4px 8px"><b>{dest["status"]}</b></td>'
f'<td style="padding:4px 8px">{dest.get("error","")}</td></tr>')
if rr:
repo_section = ("<h3 style='margin-top:24px'>Repository check</h3>"
"<table border='1' cellspacing='0' style='border-collapse:collapse;font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'>"
"<th style='padding:4px 8px'>Repo</th><th style='padding:4px 8px'>Status</th>"
f"<th style='padding:4px 8px'>Detail</th></tr></thead><tbody>{rr}</tbody></table>")
if repo_status.get("note"):
repo_section += f"<p style='color:#777;font-size:12px'>{repo_status['note']}</p>"
return (f"<html><body style='font-family:monospace;font-size:14px;max-width:800px;margin:auto'>"
f"<div style='background:{color};color:#fff;padding:16px 20px;border-radius:6px 6px 0 0'>"
f"<b>NS8 Backup Monitor &mdash; {label}: {outcome}</b><br>"
f"<small>{ts} &bull; Plans: {plan_ids}</small></div>"
f"<div style='border:1px solid #ddd;border-top:none;padding:16px 20px;border-radius:0 0 6px 6px'>"
f"<p><b>{correlation['succeeded']}</b> OK &nbsp;|&nbsp;"
f"<b>{correlation['failed']}</b> FAILED &nbsp;|&nbsp;"
f"<b>{correlation['total']}</b> total</p>"
"<table border='1' cellspacing='0' style='border-collapse:collapse;font-size:13px;width:100%'>"
"<thead><tr style='background:#f5f5f5'><th style='padding:4px 8px'></th>"
"<th style='padding:4px 8px'>Module</th><th style='padding:4px 8px'>Backup ID</th>"
"<th style='padding:4px 8px'>Timestamp</th><th style='padding:4px 8px'>Error</th>"
f"</tr></thead><tbody>{rows}</tbody></table>{repo_section}</div></body></html>")
def send_notification(config: dict, alerts: list, correlation: dict, repo_status: Optional[dict] = None):
outcome = correlation["outcome"]
mail_cfg = config.get("mail", {})
subject_prefix = mail_cfg.get("subject_prefix", "[NS8 Backup]")
subject = (f"{subject_prefix} {OUTCOME_LABEL[outcome]}: {outcome} - "
f"{datetime.now(timezone.utc).strftime('%Y-%m-%d')}")
mail_to = mail_cfg.get("to", [])
if not mail_to:
log.error("No mail.to recipients configured")
return
mail_from = mail_cfg.get("from", "")
# ns8-sendmail accepts plain text on stdin; send the text body
body = _build_text(correlation, repo_status)
cmd = _send_cmd(subject, mail_from, mail_to)
log.debug("send cmd: %s", cmd)
try:
proc = subprocess.run(cmd, input=body, text=True, capture_output=True, timeout=30)
if proc.returncode != 0:
err = proc.stderr.strip() or proc.stdout.strip() or f"exit {proc.returncode}"
log.error("ns8-sendmail failed: %s", err)
else:
log.info("notification sent: %s -> %s", subject, mail_to)
except FileNotFoundError:
log.error("'runagent' not found in PATH - is this an NS8 node?")
except subprocess.TimeoutExpired:
log.error("ns8-sendmail timed out after 30s")
except Exception as e:
log.error("failed to send notification: %s", e)