fix: repo_check - handle multi-backend NS8 credentials (B2, S3, SFTP, local)
This commit is contained in:
@@ -3,15 +3,23 @@
|
|||||||
repo_check.py - Verifies reachability and health of NS8 backup repositories.
|
repo_check.py - Verifies reachability and health of NS8 backup repositories.
|
||||||
|
|
||||||
For each backup destination configured in the cluster, attempts a
|
For each backup destination configured in the cluster, attempts a
|
||||||
`restic stats` or `restic snapshots` command to verify the repo is
|
`restic snapshots --last` command to verify the repo is accessible.
|
||||||
accessible and readable. Distinguishes between:
|
Distinguishes between:
|
||||||
- UNREACHABLE: network/mount error, cannot connect at all
|
- UNREACHABLE: network/mount error, cannot connect at all
|
||||||
- LOCKED: restic repo is locked (previous backup crashed)
|
- LOCKED: restic repo is locked (previous backup crashed)
|
||||||
- CORRUPTED: repo exists but integrity check fails
|
- CORRUPTED: repo exists but integrity check fails
|
||||||
- OK: repo is accessible
|
- OK: repo is accessible
|
||||||
|
|
||||||
|
Handles NS8 multi-backend credentials:
|
||||||
|
- local / fs: path only
|
||||||
|
- S3 / B2: url + AWS_ACCESS_KEY_ID / AWS_SECRET_ACCESS_KEY or
|
||||||
|
B2_ACCOUNT_ID / B2_ACCOUNT_KEY from Redis hash
|
||||||
|
- SFTP: url with sftp: prefix
|
||||||
|
- rclone: rclone: prefix
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import logging
|
import logging
|
||||||
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
@@ -25,11 +33,21 @@ def _redis_cmd(config: dict, *args) -> str:
|
|||||||
return result.stdout.strip()
|
return result.stdout.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _redis_hgetall(config: dict, key: str) -> dict:
|
||||||
|
"""Return all fields of a Redis hash as a dict."""
|
||||||
|
socket = config.get("redis", {}).get("socket", "/var/lib/nethserver/cluster/state/redis.sock")
|
||||||
|
cmd = ["redis-cli", "-s", socket, "HGETALL", key]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||||
|
lines = [l for l in result.stdout.strip().splitlines() if l]
|
||||||
|
# redis-cli HGETALL returns alternating key/value lines
|
||||||
|
return dict(zip(lines[::2], lines[1::2]))
|
||||||
|
|
||||||
|
|
||||||
def _get_backup_destinations(config: dict) -> list:
|
def _get_backup_destinations(config: dict) -> list:
|
||||||
"""
|
"""
|
||||||
Read all configured backup destinations from NS8 Redis.
|
Read all configured backup destinations from NS8 Redis.
|
||||||
Key pattern: cluster/backup_repository/<repo_id>/parameters
|
Key pattern: cluster/backup_repository/<repo_id>/parameters
|
||||||
Returns list of dicts with repo config.
|
Returns list of dicts with full repo config.
|
||||||
"""
|
"""
|
||||||
raw = _redis_cmd(config, "KEYS", "cluster/backup_repository/*/parameters")
|
raw = _redis_cmd(config, "KEYS", "cluster/backup_repository/*/parameters")
|
||||||
keys = [k for k in raw.splitlines() if k]
|
keys = [k for k in raw.splitlines() if k]
|
||||||
@@ -38,28 +56,55 @@ def _get_backup_destinations(config: dict) -> list:
|
|||||||
for key in keys:
|
for key in keys:
|
||||||
parts = key.split("/")
|
parts = key.split("/")
|
||||||
repo_id = parts[2] if len(parts) > 2 else "unknown"
|
repo_id = parts[2] if len(parts) > 2 else "unknown"
|
||||||
url = _redis_cmd(config, "HGET", key, "url")
|
fields = _redis_hgetall(config, key)
|
||||||
repopath = _redis_cmd(config, "HGET", key, "path")
|
|
||||||
password = _redis_cmd(config, "HGET", key, "password")
|
|
||||||
backend = _redis_cmd(config, "HGET", key, "backend")
|
|
||||||
destinations.append({
|
destinations.append({
|
||||||
"repo_id": repo_id,
|
"repo_id": repo_id,
|
||||||
"url": url,
|
"url": fields.get("url", ""),
|
||||||
"path": repopath,
|
"path": fields.get("path", ""),
|
||||||
"password": password,
|
"password": fields.get("password", ""),
|
||||||
"backend": backend,
|
"backend": fields.get("backend", ""),
|
||||||
|
# S3 / B2 credentials
|
||||||
|
"aws_access_key": fields.get("aws_access_key_id", fields.get("b2_account_id", "")),
|
||||||
|
"aws_secret_key": fields.get("aws_secret_access_key", fields.get("b2_account_key", "")),
|
||||||
|
# rclone / extra
|
||||||
|
"rclone_config": fields.get("rclone_config", ""),
|
||||||
|
"extra_env": fields.get("extra_env", ""),
|
||||||
})
|
})
|
||||||
|
|
||||||
return destinations
|
return destinations
|
||||||
|
|
||||||
|
|
||||||
|
def _build_env(dest: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Build the environment dict for restic based on the backend type.
|
||||||
|
Always inherits from os.environ so system-level creds are preserved.
|
||||||
|
"""
|
||||||
|
env = dict(os.environ)
|
||||||
|
backend = dest.get("backend", "").lower()
|
||||||
|
|
||||||
|
if dest.get("password"):
|
||||||
|
env["RESTIC_PASSWORD"] = dest["password"]
|
||||||
|
|
||||||
|
if backend in ("s3", "aws") and dest.get("aws_access_key"):
|
||||||
|
env["AWS_ACCESS_KEY_ID"] = dest["aws_access_key"]
|
||||||
|
env["AWS_SECRET_ACCESS_KEY"] = dest["aws_secret_key"]
|
||||||
|
|
||||||
|
elif backend in ("b2", "backblaze") and dest.get("aws_access_key"):
|
||||||
|
env["B2_ACCOUNT_ID"] = dest["aws_access_key"]
|
||||||
|
env["B2_ACCOUNT_KEY"] = dest["aws_secret_key"]
|
||||||
|
|
||||||
|
elif backend == "rclone" and dest.get("rclone_config"):
|
||||||
|
env["RCLONE_CONFIG"] = dest["rclone_config"]
|
||||||
|
|
||||||
|
return env
|
||||||
|
|
||||||
|
|
||||||
def _check_restic_repo(dest: dict, config: dict) -> dict:
|
def _check_restic_repo(dest: dict, config: dict) -> dict:
|
||||||
"""Run restic snapshots --last to verify repo is accessible."""
|
"""Run restic snapshots --last to verify repo is accessible."""
|
||||||
timeout = config.get("repo_check", {}).get("timeout", 60)
|
timeout = config.get("repo_check", {}).get("timeout", 60)
|
||||||
repo_url = dest.get("url") or dest.get("path") or ""
|
|
||||||
password = dest.get("password", "")
|
|
||||||
extra_flags = config.get("repo_check", {}).get("restic_flags", "")
|
extra_flags = config.get("repo_check", {}).get("restic_flags", "")
|
||||||
|
|
||||||
|
repo_url = dest.get("url") or dest.get("path") or ""
|
||||||
if not repo_url:
|
if not repo_url:
|
||||||
return {"repo_id": dest["repo_id"], "status": "UNCONFIGURED", "error": "No URL or path found"}
|
return {"repo_id": dest["repo_id"], "status": "UNCONFIGURED", "error": "No URL or path found"}
|
||||||
|
|
||||||
@@ -67,7 +112,7 @@ def _check_restic_repo(dest: dict, config: dict) -> dict:
|
|||||||
if extra_flags:
|
if extra_flags:
|
||||||
cmd += extra_flags.split()
|
cmd += extra_flags.split()
|
||||||
|
|
||||||
env = {"RESTIC_PASSWORD": password} if password else {}
|
env = _build_env(dest)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
result = subprocess.run(
|
result = subprocess.run(
|
||||||
@@ -75,23 +120,28 @@ def _check_restic_repo(dest: dict, config: dict) -> dict:
|
|||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
env={**__import__("os").environ, **env}
|
env=env
|
||||||
)
|
)
|
||||||
stderr = result.stderr.lower()
|
stderr = result.stderr.lower()
|
||||||
|
|
||||||
if result.returncode == 0:
|
if result.returncode == 0:
|
||||||
return {"repo_id": dest["repo_id"], "status": "OK", "error": ""}
|
return {"repo_id": dest["repo_id"], "status": "OK", "error": ""}
|
||||||
elif "unable to open config" in stderr or "no such file" in stderr:
|
elif any(x in stderr for x in ("unable to open config", "no such file", "does not exist",
|
||||||
|
"connection refused", "network", "timeout", "no route")):
|
||||||
return {"repo_id": dest["repo_id"], "status": "UNREACHABLE", "error": result.stderr.strip()}
|
return {"repo_id": dest["repo_id"], "status": "UNREACHABLE", "error": result.stderr.strip()}
|
||||||
elif "locked" in stderr or "lock" in stderr:
|
elif "locked" in stderr or "lock" in stderr:
|
||||||
return {"repo_id": dest["repo_id"], "status": "LOCKED", "error": result.stderr.strip()}
|
return {"repo_id": dest["repo_id"], "status": "LOCKED", "error": result.stderr.strip()}
|
||||||
elif "error" in stderr:
|
elif "pack" in stderr and "error" in stderr:
|
||||||
|
return {"repo_id": dest["repo_id"], "status": "CORRUPTED", "error": result.stderr.strip()}
|
||||||
|
elif "error" in stderr or "fatal" in stderr:
|
||||||
return {"repo_id": dest["repo_id"], "status": "ERROR", "error": result.stderr.strip()}
|
return {"repo_id": dest["repo_id"], "status": "ERROR", "error": result.stderr.strip()}
|
||||||
else:
|
else:
|
||||||
return {"repo_id": dest["repo_id"], "status": "UNKNOWN", "error": result.stderr.strip()}
|
return {"repo_id": dest["repo_id"], "status": "UNKNOWN", "error": result.stderr.strip()}
|
||||||
|
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
return {"repo_id": dest["repo_id"], "status": "UNREACHABLE", "error": f"Timeout after {timeout}s"}
|
return {"repo_id": dest["repo_id"], "status": "UNREACHABLE", "error": f"Timeout after {timeout}s"}
|
||||||
|
except FileNotFoundError:
|
||||||
|
return {"repo_id": dest["repo_id"], "status": "ERROR", "error": "restic binary not found in PATH"}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"repo_id": dest["repo_id"], "status": "ERROR", "error": str(e)}
|
return {"repo_id": dest["repo_id"], "status": "ERROR", "error": str(e)}
|
||||||
|
|
||||||
@@ -103,7 +153,7 @@ def check_repositories(config: dict, correlation: dict) -> dict:
|
|||||||
Returns:
|
Returns:
|
||||||
{
|
{
|
||||||
"destinations": [
|
"destinations": [
|
||||||
{"repo_id": ..., "status": OK|UNREACHABLE|LOCKED|ERROR, "error": ...},
|
{"repo_id": ..., "status": OK|UNREACHABLE|LOCKED|CORRUPTED|ERROR, "error": ...},
|
||||||
...
|
...
|
||||||
],
|
],
|
||||||
"any_unreachable": bool,
|
"any_unreachable": bool,
|
||||||
@@ -125,7 +175,7 @@ def check_repositories(config: dict, correlation: dict) -> dict:
|
|||||||
|
|
||||||
results = []
|
results = []
|
||||||
for dest in destinations:
|
for dest in destinations:
|
||||||
log.info(f"Checking repository {dest['repo_id']} ({dest.get('backend', 'unknown')})...")
|
log.info(f"Checking repository {dest['repo_id']} (backend={dest.get('backend','unknown')})...")
|
||||||
res = _check_restic_repo(dest, config)
|
res = _check_restic_repo(dest, config)
|
||||||
log.info(f" -> {res['status']}: {res.get('error', '')}")
|
log.info(f" -> {res['status']}: {res.get('error', '')}")
|
||||||
results.append(res)
|
results.append(res)
|
||||||
|
|||||||
Reference in New Issue
Block a user