#!/usr/bin/env python3 """ repo_check.py - Verifies reachability and health of NS8 backup repositories. For each backup destination configured in the cluster, attempts a `restic snapshots --last` command to verify the repo is accessible. Distinguishes between: - UNREACHABLE: network/mount error, cannot connect at all - LOCKED: restic repo is locked (previous backup crashed) - CORRUPTED: repo exists but integrity check fails - OK: repo is accessible Handles NS8 multi-backend credentials: - local / fs: path only - S3 / B2: url + AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY or B2_ACCOUNT_ID / B2_ACCOUNT_KEY from Redis hash - SFTP: url with sftp: prefix - rclone: rclone: prefix """ import logging import os import subprocess from typing import Optional log = logging.getLogger(__name__) def _redis_cmd(config: dict, *args) -> str: socket = config.get("redis", {}).get("socket", "/var/lib/nethserver/cluster/state/redis.sock") cmd = ["redis-cli", "-s", socket] + list(args) result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) return result.stdout.strip() def _redis_hgetall(config: dict, key: str) -> dict: """Return all fields of a Redis hash as a dict.""" socket = config.get("redis", {}).get("socket", "/var/lib/nethserver/cluster/state/redis.sock") cmd = ["redis-cli", "-s", socket, "HGETALL", key] result = subprocess.run(cmd, capture_output=True, text=True, timeout=10) lines = [l for l in result.stdout.strip().splitlines() if l] # redis-cli HGETALL returns alternating key/value lines return dict(zip(lines[::2], lines[1::2])) def _get_backup_destinations(config: dict) -> list: """ Read all configured backup destinations from NS8 Redis. Key pattern: cluster/backup_repository//parameters Returns list of dicts with full repo config. """ raw = _redis_cmd(config, "KEYS", "cluster/backup_repository/*/parameters") keys = [k for k in raw.splitlines() if k] destinations = [] for key in keys: parts = key.split("/") repo_id = parts[2] if len(parts) > 2 else "unknown" fields = _redis_hgetall(config, key) destinations.append({ "repo_id": repo_id, "url": fields.get("url", ""), "path": fields.get("path", ""), "password": fields.get("password", ""), "backend": fields.get("backend", ""), # S3 / B2 credentials "aws_access_key": fields.get("aws_access_key_id", fields.get("b2_account_id", "")), "aws_secret_key": fields.get("aws_secret_access_key", fields.get("b2_account_key", "")), # rclone / extra "rclone_config": fields.get("rclone_config", ""), "extra_env": fields.get("extra_env", ""), }) return destinations def _build_env(dest: dict) -> dict: """ Build the environment dict for restic based on the backend type. Always inherits from os.environ so system-level creds are preserved. """ env = dict(os.environ) backend = dest.get("backend", "").lower() if dest.get("password"): env["RESTIC_PASSWORD"] = dest["password"] if backend in ("s3", "aws") and dest.get("aws_access_key"): env["AWS_ACCESS_KEY_ID"] = dest["aws_access_key"] env["AWS_SECRET_ACCESS_KEY"] = dest["aws_secret_key"] elif backend in ("b2", "backblaze") and dest.get("aws_access_key"): env["B2_ACCOUNT_ID"] = dest["aws_access_key"] env["B2_ACCOUNT_KEY"] = dest["aws_secret_key"] elif backend == "rclone" and dest.get("rclone_config"): env["RCLONE_CONFIG"] = dest["rclone_config"] return env def _check_restic_repo(dest: dict, config: dict) -> dict: """Run restic snapshots --last to verify repo is accessible.""" timeout = config.get("repo_check", {}).get("timeout", 60) extra_flags = config.get("repo_check", {}).get("restic_flags", "") repo_url = dest.get("url") or dest.get("path") or "" if not repo_url: return {"repo_id": dest["repo_id"], "status": "UNCONFIGURED", "error": "No URL or path found"} cmd = ["restic", "-r", repo_url, "snapshots", "--last", "--no-cache"] if extra_flags: cmd += extra_flags.split() env = _build_env(dest) try: result = subprocess.run( cmd, capture_output=True, text=True, timeout=timeout, env=env ) stderr = result.stderr.lower() if result.returncode == 0: return {"repo_id": dest["repo_id"], "status": "OK", "error": ""} elif any(x in stderr for x in ("unable to open config", "no such file", "does not exist", "connection refused", "network", "timeout", "no route")): return {"repo_id": dest["repo_id"], "status": "UNREACHABLE", "error": result.stderr.strip()} elif "locked" in stderr or "lock" in stderr: return {"repo_id": dest["repo_id"], "status": "LOCKED", "error": result.stderr.strip()} elif "pack" in stderr and "error" in stderr: return {"repo_id": dest["repo_id"], "status": "CORRUPTED", "error": result.stderr.strip()} elif "error" in stderr or "fatal" in stderr: return {"repo_id": dest["repo_id"], "status": "ERROR", "error": result.stderr.strip()} else: return {"repo_id": dest["repo_id"], "status": "UNKNOWN", "error": result.stderr.strip()} except subprocess.TimeoutExpired: return {"repo_id": dest["repo_id"], "status": "UNREACHABLE", "error": f"Timeout after {timeout}s"} except FileNotFoundError: return {"repo_id": dest["repo_id"], "status": "ERROR", "error": "restic binary not found in PATH"} except Exception as e: return {"repo_id": dest["repo_id"], "status": "ERROR", "error": str(e)} def check_repositories(config: dict, correlation: dict) -> dict: """ Main entry point for repository check. Returns: { "destinations": [ {"repo_id": ..., "status": OK|UNREACHABLE|LOCKED|CORRUPTED|ERROR, "error": ...}, ... ], "any_unreachable": bool, "any_locked": bool, "all_ok": bool, } """ destinations = _get_backup_destinations(config) if not destinations: log.warning("No backup destinations found in Redis") return { "destinations": [], "any_unreachable": True, "any_locked": False, "all_ok": False, "note": "No backup destinations configured or readable from Redis" } results = [] for dest in destinations: log.info(f"Checking repository {dest['repo_id']} (backend={dest.get('backend','unknown')})...") res = _check_restic_repo(dest, config) log.info(f" -> {res['status']}: {res.get('error', '')}") results.append(res) any_unreachable = any(r["status"] == "UNREACHABLE" for r in results) any_locked = any(r["status"] == "LOCKED" for r in results) all_ok = all(r["status"] == "OK" for r in results) return { "destinations": results, "any_unreachable": any_unreachable, "any_locked": any_locked, "all_ok": all_ok, }