diff --git a/deploy/install.sh b/deploy/install.sh index d197574..fc03865 100644 --- a/deploy/install.sh +++ b/deploy/install.sh @@ -9,18 +9,28 @@ # Requires: root, python3, curl (no git needed) # Tested on: AlmaLinux 8/9, Rocky Linux 8/9 (NS8 supported distros) # +# Two systemd units are installed: +# ns8-backup-monitor.service - long-running webhook receiver for +# Alertmanager failure alerts +# ns8-backup-monitor-check.timer - daily one-shot timer that sends a +# scheduled recap email regardless of +# backup outcome (success or failure) +# # ns8-sendmail is NOT in the standard root PATH on NS8 nodes. -# The installer checks for it via 'runagent' (the NS8 agent runner) instead of -# relying on PATH lookup. This is the correct way to invoke NS8 built-in tools. +# The installer verifies it via 'runagent' — the NS8 agent runner — which +# is always present when ns8-sendmail is available. # ============================================================================= set -euo pipefail # --- constants ---------------------------------------------------------------- SERVICE_NAME="ns8-backup-monitor" +CHECK_SERVICE_NAME="ns8-backup-monitor-check" INSTALL_DIR="/opt/ns8-backup-monitor" CONFIG_DIR="/etc/ns8-backup-monitor" CONFIG_FILE="${CONFIG_DIR}/config.yml" SERVICE_FILE="/etc/systemd/system/${SERVICE_NAME}.service" +CHECK_SERVICE_FILE="/etc/systemd/system/${CHECK_SERVICE_NAME}.service" +CHECK_TIMER_FILE="/etc/systemd/system/${CHECK_SERVICE_NAME}.timer" # Gitea raw base URL for downloading individual files RAW_BASE="https://repo.lelekaos.com/admin/ns8-backup-monitor/raw/branch/main" @@ -41,21 +51,19 @@ error() { echo -e "${RED}[ERROR]${RESET} $*" >&2; exit 1; } # ============================================================================= # CHECK NS8-SENDMAIL # On NS8 nodes, ns8-sendmail is not in the standard root PATH. -# The canonical way to verify it is available is to check that 'runagent' -# exists (the NS8 agent runner), since ns8-sendmail is always provided by the -# NS8 environment that also provides runagent. +# The canonical check is to verify that 'runagent' exists: ns8-sendmail is +# always available when the NS8 environment (and runagent) is present. # ============================================================================= check_ns8_sendmail() { - # First try direct PATH lookup (covers non-standard installs) + # 1. Direct PATH lookup (covers non-standard or manual installs) if command -v ns8-sendmail &>/dev/null; then return 0 fi - # On NS8 nodes, ns8-sendmail is invoked through the NS8 environment. - # If runagent is present, ns8-sendmail is available at runtime. + # 2. runagent presence confirms this is an NS8 node where ns8-sendmail + # is available at runtime even if not in root's PATH. if command -v runagent &>/dev/null; then return 0 fi - # Neither found: genuine warning return 1 } @@ -64,25 +72,29 @@ check_ns8_sendmail() { # ============================================================================= do_uninstall() { echo -e "${BOLD}=== ns8-backup-monitor UNINSTALL ===${RESET}" - warn "This will stop and remove the service, code and config." + warn "This will stop and remove the service, timer, code and config." read -rp "Continue? [y/N] " confirm [[ "$confirm" =~ ^[Yy]$ ]] || { info "Aborted."; exit 0; } - if systemctl is-active --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Stopping service..." - systemctl stop "$SERVICE_NAME" - fi - if systemctl is-enabled --quiet "$SERVICE_NAME" 2>/dev/null; then - info "Disabling service..." - systemctl disable "$SERVICE_NAME" - fi + for unit in "$CHECK_SERVICE_NAME" "$SERVICE_NAME"; do + if systemctl is-active --quiet "$unit" 2>/dev/null; then + info "Stopping ${unit}..." + systemctl stop "$unit" + fi + if systemctl is-enabled --quiet "$unit" 2>/dev/null; then + info "Disabling ${unit}..." + systemctl disable "$unit" + fi + done - if [[ -f "$SERVICE_FILE" ]]; then - info "Removing systemd unit..." - rm -f "$SERVICE_FILE" - systemctl daemon-reload - ok "Systemd unit removed." - fi + for f in "$CHECK_TIMER_FILE" "$CHECK_SERVICE_FILE" "$SERVICE_FILE"; do + if [[ -f "$f" ]]; then + info "Removing ${f}..." + rm -f "$f" + fi + done + systemctl daemon-reload + ok "Systemd units removed." if [[ -d "$INSTALL_DIR" ]]; then info "Removing ${INSTALL_DIR}..." @@ -105,12 +117,11 @@ do_uninstall() { # ============================================================================= # DOWNLOAD SOURCE +# Bug fix: declare and assign tmpdir on the same line so that 'set -u' never +# sees an unset variable, even if the trap fires before mktemp completes. # ============================================================================= download_source() { - # BUG FIX: declare and assign on the same line so that 'set -u' never sees - # an unset variable, even if the trap fires before mktemp succeeds. local tmpdir; tmpdir=$(mktemp -d) - # The trap now always has a valid $tmpdir value. trap 'rm -rf "$tmpdir"' RETURN info "Downloading source archive..." @@ -125,7 +136,6 @@ download_source() { extracted_dir=$(find "${tmpdir}" -mindepth 1 -maxdepth 1 -type d | head -n1) [[ -n "$extracted_dir" ]] || error "Could not find extracted directory in archive." - # Sync into INSTALL_DIR (rsync-style, pure bash) cp -a "${extracted_dir}/." "$INSTALL_DIR/" ok "Source ready at ${INSTALL_DIR}." } @@ -142,7 +152,6 @@ do_install() { command -v curl &>/dev/null || error "curl not found." command -v tar &>/dev/null || error "tar not found." - # Check ns8-sendmail availability the NS8-correct way (via runagent). if check_ns8_sendmail; then ok "ns8-sendmail available (NS8 node confirmed)." else @@ -171,10 +180,24 @@ do_install() { read -rp "Subject prefix [[NS8 Backup]]: " SUBJECT_PREFIX SUBJECT_PREFIX="${SUBJECT_PREFIX:-[NS8 Backup]}" + # --- scheduled check time ------------------------------------------------- echo - info "From: $MAIL_FROM" - info "To: ${MAIL_TO_LIST[*]}" - info "Prefix: $SUBJECT_PREFIX" + echo -e "${BOLD}Scheduled daily recap${RESET}" + echo -e "A daily timer will send a backup recap email regardless of outcome." + echo -e "Set this to ~30 minutes after your last backup is expected to finish." + read -rp "Daily recap time (HH:MM, 24h) [07:00]: " RECAP_TIME + RECAP_TIME="${RECAP_TIME:-07:00}" + # Validate format + [[ "$RECAP_TIME" =~ ^([01][0-9]|2[0-3]):[0-5][0-9]$ ]] \ + || error "Invalid time format '${RECAP_TIME}'. Use HH:MM (e.g. 07:00)." + RECAP_HOUR="${RECAP_TIME%%:*}" + RECAP_MIN="${RECAP_TIME##*:}" + + echo + info "From: $MAIL_FROM" + info "To: ${MAIL_TO_LIST[*]}" + info "Prefix: $SUBJECT_PREFIX" + info "Recap at: ${RECAP_TIME} daily" echo read -rp "Confirm and proceed with install? [Y/n] " go [[ "$go" =~ ^[Nn]$ ]] && { info "Aborted."; exit 0; } @@ -197,7 +220,6 @@ do_install() { else info "Writing ${CONFIG_FILE}..." - # Build YAML 'to' list local to_yaml="" for addr in "${MAIL_TO_LIST[@]}"; do to_yaml+=" - \"${addr}\"\n" @@ -237,19 +259,32 @@ EOF ok "Config written." fi - # --- systemd unit --------------------------------------------------------- - info "Installing systemd unit..." + # --- systemd units -------------------------------------------------------- + info "Installing systemd units..." + + # Webhook receiver (long-running) cp "${INSTALL_DIR}/deploy/ns8-backup-monitor.service" "$SERVICE_FILE" + + # Scheduled check service (one-shot, invoked by timer) + cp "${INSTALL_DIR}/deploy/ns8-backup-monitor-check.service" "$CHECK_SERVICE_FILE" + + # Timer: inject the configured recap time into the unit file + sed "s/OnCalendar=\*-\*-\* 07:00:00/OnCalendar=*-*-* ${RECAP_HOUR}:${RECAP_MIN}:00/" \ + "${INSTALL_DIR}/deploy/ns8-backup-monitor-check.timer" > "$CHECK_TIMER_FILE" + systemctl daemon-reload systemctl enable --now "$SERVICE_NAME" - ok "Service enabled and started." + systemctl enable --now "${CHECK_SERVICE_NAME}.timer" + ok "Webhook service and daily recap timer enabled and started." # --- done ----------------------------------------------------------------- echo echo -e "${GREEN}${BOLD}Installation complete.${RESET}" - echo -e " Config: ${CONFIG_FILE}" - echo -e " Status: systemctl status ${SERVICE_NAME}" - echo -e " Logs: journalctl -u ${SERVICE_NAME} -f" + echo -e " Config: ${CONFIG_FILE}" + echo -e " Webhook: systemctl status ${SERVICE_NAME}" + echo -e " Daily recap: systemctl status ${CHECK_SERVICE_NAME}.timer" + echo -e " Logs: journalctl -u ${SERVICE_NAME} -f" + echo -e " Manual test: systemctl start ${CHECK_SERVICE_NAME}" echo echo -e "To uninstall: ${BOLD}bash ${INSTALL_DIR}/deploy/install.sh --uninstall${RESET}" echo -e "To update: ${BOLD}bash <(curl -fsSL ${RAW_BASE}/deploy/install.sh)${RESET}"