feat: install timer+check service, ask recap time, fix tmpdir+ns8-sendmail checks

This commit is contained in:
2026-05-18 22:20:04 +00:00
parent 82fc83cb7c
commit bdfd827f78
+74 -39
View File
@@ -9,18 +9,28 @@
# Requires: root, python3, curl (no git needed)
# Tested on: AlmaLinux 8/9, Rocky Linux 8/9 (NS8 supported distros)
#
# Two systemd units are installed:
# ns8-backup-monitor.service - long-running webhook receiver for
# Alertmanager failure alerts
# ns8-backup-monitor-check.timer - daily one-shot timer that sends a
# scheduled recap email regardless of
# backup outcome (success or failure)
#
# ns8-sendmail is NOT in the standard root PATH on NS8 nodes.
# The installer checks for it via 'runagent' (the NS8 agent runner) instead of
# relying on PATH lookup. This is the correct way to invoke NS8 built-in tools.
# The installer verifies it via 'runagent' the NS8 agent runner — which
# is always present when ns8-sendmail is available.
# =============================================================================
set -euo pipefail
# --- constants ----------------------------------------------------------------
SERVICE_NAME="ns8-backup-monitor"
CHECK_SERVICE_NAME="ns8-backup-monitor-check"
INSTALL_DIR="/opt/ns8-backup-monitor"
CONFIG_DIR="/etc/ns8-backup-monitor"
CONFIG_FILE="${CONFIG_DIR}/config.yml"
SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service"
CHECK_SERVICE_FILE="/etc/systemd/system/${CHECK_SERVICE_NAME}.service"
CHECK_TIMER_FILE="/etc/systemd/system/${CHECK_SERVICE_NAME}.timer"
# Gitea raw base URL for downloading individual files
RAW_BASE="https://repo.lelekaos.com/admin/ns8-backup-monitor/raw/branch/main"
@@ -41,21 +51,19 @@ error() { echo -e "${RED}[ERROR]${RESET} $*" >&2; exit 1; }
# =============================================================================
# CHECK NS8-SENDMAIL
# On NS8 nodes, ns8-sendmail is not in the standard root PATH.
# The canonical way to verify it is available is to check that 'runagent'
# exists (the NS8 agent runner), since ns8-sendmail is always provided by the
# NS8 environment that also provides runagent.
# The canonical check is to verify that 'runagent' exists: ns8-sendmail is
# always available when the NS8 environment (and runagent) is present.
# =============================================================================
check_ns8_sendmail() {
# First try direct PATH lookup (covers non-standard installs)
# 1. Direct PATH lookup (covers non-standard or manual installs)
if command -v ns8-sendmail &>/dev/null; then
return 0
fi
# On NS8 nodes, ns8-sendmail is invoked through the NS8 environment.
# If runagent is present, ns8-sendmail is available at runtime.
# 2. runagent presence confirms this is an NS8 node where ns8-sendmail
# is available at runtime even if not in root's PATH.
if command -v runagent &>/dev/null; then
return 0
fi
# Neither found: genuine warning
return 1
}
@@ -64,25 +72,29 @@ check_ns8_sendmail() {
# =============================================================================
do_uninstall() {
echo -e "${BOLD}=== ns8-backup-monitor UNINSTALL ===${RESET}"
warn "This will stop and remove the service, code and config."
warn "This will stop and remove the service, timer, code and config."
read -rp "Continue? [y/N] " confirm
[[ "$confirm" =~ ^[Yy]$ ]] || { info "Aborted."; exit 0; }
if systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then
info "Stopping service..."
systemctl stop "$SERVICE_NAME"
fi
if systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then
info "Disabling service..."
systemctl disable "$SERVICE_NAME"
fi
for unit in "$CHECK_SERVICE_NAME" "$SERVICE_NAME"; do
if systemctl is-active --quiet "$unit" 2>/dev/null; then
info "Stopping ${unit}..."
systemctl stop "$unit"
fi
if systemctl is-enabled --quiet "$unit" 2>/dev/null; then
info "Disabling ${unit}..."
systemctl disable "$unit"
fi
done
if [[ -f "$SERVICE_FILE" ]]; then
info "Removing systemd unit..."
rm -f "$SERVICE_FILE"
systemctl daemon-reload
ok "Systemd unit removed."
fi
for f in "$CHECK_TIMER_FILE" "$CHECK_SERVICE_FILE" "$SERVICE_FILE"; do
if [[ -f "$f" ]]; then
info "Removing ${f}..."
rm -f "$f"
fi
done
systemctl daemon-reload
ok "Systemd units removed."
if [[ -d "$INSTALL_DIR" ]]; then
info "Removing ${INSTALL_DIR}..."
@@ -105,12 +117,11 @@ do_uninstall() {
# =============================================================================
# DOWNLOAD SOURCE
# Bug fix: declare and assign tmpdir on the same line so that 'set -u' never
# sees an unset variable, even if the trap fires before mktemp completes.
# =============================================================================
download_source() {
# BUG FIX: declare and assign on the same line so that 'set -u' never sees
# an unset variable, even if the trap fires before mktemp succeeds.
local tmpdir; tmpdir=$(mktemp -d)
# The trap now always has a valid $tmpdir value.
trap 'rm -rf "$tmpdir"' RETURN
info "Downloading source archive..."
@@ -125,7 +136,6 @@ download_source() {
extracted_dir=$(find "${tmpdir}" -mindepth 1 -maxdepth 1 -type d | head -n1)
[[ -n "$extracted_dir" ]] || error "Could not find extracted directory in archive."
# Sync into INSTALL_DIR (rsync-style, pure bash)
cp -a "${extracted_dir}/." "$INSTALL_DIR/"
ok "Source ready at ${INSTALL_DIR}."
}
@@ -142,7 +152,6 @@ do_install() {
command -v curl &>/dev/null || error "curl not found."
command -v tar &>/dev/null || error "tar not found."
# Check ns8-sendmail availability the NS8-correct way (via runagent).
if check_ns8_sendmail; then
ok "ns8-sendmail available (NS8 node confirmed)."
else
@@ -171,10 +180,24 @@ do_install() {
read -rp "Subject prefix [[NS8 Backup]]: " SUBJECT_PREFIX
SUBJECT_PREFIX="${SUBJECT_PREFIX:-[NS8 Backup]}"
# --- scheduled check time -------------------------------------------------
echo
info "From: $MAIL_FROM"
info "To: ${MAIL_TO_LIST[*]}"
info "Prefix: $SUBJECT_PREFIX"
echo -e "${BOLD}Scheduled daily recap${RESET}"
echo -e "A daily timer will send a backup recap email regardless of outcome."
echo -e "Set this to ~30 minutes after your last backup is expected to finish."
read -rp "Daily recap time (HH:MM, 24h) [07:00]: " RECAP_TIME
RECAP_TIME="${RECAP_TIME:-07:00}"
# Validate format
[[ "$RECAP_TIME" =~ ^([01][0-9]|2[0-3]):[0-5][0-9]$ ]] \
|| error "Invalid time format '${RECAP_TIME}'. Use HH:MM (e.g. 07:00)."
RECAP_HOUR="${RECAP_TIME%%:*}"
RECAP_MIN="${RECAP_TIME##*:}"
echo
info "From: $MAIL_FROM"
info "To: ${MAIL_TO_LIST[*]}"
info "Prefix: $SUBJECT_PREFIX"
info "Recap at: ${RECAP_TIME} daily"
echo
read -rp "Confirm and proceed with install? [Y/n] " go
[[ "$go" =~ ^[Nn]$ ]] && { info "Aborted."; exit 0; }
@@ -197,7 +220,6 @@ do_install() {
else
info "Writing ${CONFIG_FILE}..."
# Build YAML 'to' list
local to_yaml=""
for addr in "${MAIL_TO_LIST[@]}"; do
to_yaml+=" - \"${addr}\"\n"
@@ -237,19 +259,32 @@ EOF
ok "Config written."
fi
# --- systemd unit ---------------------------------------------------------
info "Installing systemd unit..."
# --- systemd units --------------------------------------------------------
info "Installing systemd units..."
# Webhook receiver (long-running)
cp "${INSTALL_DIR}/deploy/ns8-backup-monitor.service" "$SERVICE_FILE"
# Scheduled check service (one-shot, invoked by timer)
cp "${INSTALL_DIR}/deploy/ns8-backup-monitor-check.service" "$CHECK_SERVICE_FILE"
# Timer: inject the configured recap time into the unit file
sed "s/OnCalendar=\*-\*-\* 07:00:00/OnCalendar=*-*-* ${RECAP_HOUR}:${RECAP_MIN}:00/" \
"${INSTALL_DIR}/deploy/ns8-backup-monitor-check.timer" > "$CHECK_TIMER_FILE"
systemctl daemon-reload
systemctl enable --now "$SERVICE_NAME"
ok "Service enabled and started."
systemctl enable --now "${CHECK_SERVICE_NAME}.timer"
ok "Webhook service and daily recap timer enabled and started."
# --- done -----------------------------------------------------------------
echo
echo -e "${GREEN}${BOLD}Installation complete.${RESET}"
echo -e " Config: ${CONFIG_FILE}"
echo -e " Status: systemctl status ${SERVICE_NAME}"
echo -e " Logs: journalctl -u ${SERVICE_NAME} -f"
echo -e " Config: ${CONFIG_FILE}"
echo -e " Webhook: systemctl status ${SERVICE_NAME}"
echo -e " Daily recap: systemctl status ${CHECK_SERVICE_NAME}.timer"
echo -e " Logs: journalctl -u ${SERVICE_NAME} -f"
echo -e " Manual test: systemctl start ${CHECK_SERVICE_NAME}"
echo
echo -e "To uninstall: ${BOLD}bash ${INSTALL_DIR}/deploy/install.sh --uninstall${RESET}"
echo -e "To update: ${BOLD}bash <(curl -fsSL ${RAW_BASE}/deploy/install.sh)${RESET}"