add healthChecks option

This commit is contained in:
2026-03-15 13:25:29 -04:00
parent 4cc1ae4e00
commit 7c0a617640
3 changed files with 373 additions and 0 deletions

View File

@@ -172,6 +172,17 @@ let
default = [ ]; default = [ ];
description = "Applications to register for indexer sync (Prowlarr only)."; description = "Applications to register for indexer sync (Prowlarr only).";
}; };
healthChecks = lib.mkOption {
type = lib.types.bool;
default = false;
description = ''
When enabled, the init service will verify connectivity after provisioning:
- Tests all download clients are reachable via the application's testall API
- For Prowlarr instances: tests all synced applications are reachable
The init service will fail if any health check fails.
'';
};
}; };
}; };
@@ -325,6 +336,53 @@ let
fi fi
''; '';
mkHealthCheckSection =
name: inst:
lib.optionalString inst.healthChecks ''
# Health checks
echo "Running ${name} health checks..."
${lib.optionalString (inst.downloadClients != [ ]) ''
# Test download client connectivity
echo "Testing download client connectivity..."
DC_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/downloadclient/testall" \
-H "X-Api-Key: $API_KEY" \
-H "Content-Type: application/json") || {
echo "Health check FAILED: could not reach ${name} API for download client test" >&2
exit 1
}
DC_FAILURES=$(echo "$DC_TEST" | ${jq} '[.[] | select(.isValid == false)]')
DC_FAIL_COUNT=$(echo "$DC_FAILURES" | ${jq} 'length')
if [ "$DC_FAIL_COUNT" -gt 0 ]; then
echo "Health check FAILED: $DC_FAIL_COUNT download client(s) unreachable:" >&2
echo "$DC_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"' >&2
exit 1
fi
echo "All download clients healthy"
''}
${lib.optionalString (inst.syncedApps != [ ]) ''
# Test synced application connectivity
echo "Testing synced application connectivity..."
APP_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/applications/testall" \
-H "X-Api-Key: $API_KEY" \
-H "Content-Type: application/json") || {
echo "Health check FAILED: could not reach ${name} API for synced app test" >&2
exit 1
}
APP_FAILURES=$(echo "$APP_TEST" | ${jq} '[.[] | select(.isValid == false)]')
APP_FAIL_COUNT=$(echo "$APP_FAILURES" | ${jq} 'length')
if [ "$APP_FAIL_COUNT" -gt 0 ]; then
echo "Health check FAILED: $APP_FAIL_COUNT synced application(s) unreachable:" >&2
echo "$APP_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"' >&2
exit 1
fi
echo "All synced applications healthy"
''}
echo "${name} health checks passed"
'';
mkInitScript = mkInitScript =
name: inst: name: inst:
pkgs.writeShellScript "${name}-init" '' pkgs.writeShellScript "${name}-init" ''
@@ -358,6 +416,8 @@ let
${lib.concatMapStringsSep "\n" mkRootFolderSection inst.rootFolders} ${lib.concatMapStringsSep "\n" mkRootFolderSection inst.rootFolders}
${lib.concatMapStringsSep "\n" mkSyncedAppSection inst.syncedApps} ${lib.concatMapStringsSep "\n" mkSyncedAppSection inst.syncedApps}
${mkHealthCheckSection name inst}
echo "${name} init complete" echo "${name} init complete"
''; '';

View File

@@ -10,4 +10,5 @@
bazarr = import ./bazarr.nix { inherit pkgs lib self; }; bazarr = import ./bazarr.nix { inherit pkgs lib self; };
error-handling = import ./error-handling.nix { inherit pkgs lib self; }; error-handling = import ./error-handling.nix { inherit pkgs lib self; };
edge-cases = import ./edge-cases.nix { inherit pkgs lib self; }; edge-cases = import ./edge-cases.nix { inherit pkgs lib self; };
health-checks = import ./health-checks.nix { inherit pkgs lib self; };
} }

312
tests/health-checks.nix Normal file
View File

@@ -0,0 +1,312 @@
{
pkgs,
lib,
self,
}:
pkgs.testers.runNixOSTest {
name = "arr-init-health-checks";
nodes.machine =
{ pkgs, lib, ... }:
{
imports = [ self.nixosModules.default ];
system.stateVersion = "24.11";
virtualisation.memorySize = 4096;
environment.systemPackages = with pkgs; [
curl
jq
gnugrep
];
systemd.services.mock-qbittorrent =
let
mockQbitScript = pkgs.writeScript "mock-qbittorrent.py" ''
import json
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.parse import parse_qs, urlparse
CATEGORIES = {
"tv": {"name": "tv", "savePath": "/downloads"},
"movies": {"name": "movies", "savePath": "/downloads"},
}
class QBitMock(BaseHTTPRequestHandler):
def _respond(self, code=200, body=b"Ok.", content_type="text/plain"):
self.send_response(code)
self.send_header("Content-Type", content_type)
self.send_header("Set-Cookie", "SID=mock_session_id; Path=/")
self.end_headers()
self.wfile.write(body if isinstance(body, bytes) else body.encode())
def do_GET(self):
path = self.path.split("?")[0]
if path == "/api/v2/app/webapiVersion":
self._respond(body=b"2.9.3")
elif path == "/api/v2/app/version":
self._respond(body=b"v5.0.0")
elif path == "/api/v2/torrents/info":
self._respond(body=b"[]", content_type="application/json")
elif path == "/api/v2/torrents/categories":
body = json.dumps(CATEGORIES).encode()
self._respond(body=body, content_type="application/json")
elif path == "/api/v2/app/preferences":
body = json.dumps({"save_path": "/tmp"}).encode()
self._respond(body=body, content_type="application/json")
else:
self._respond()
def do_POST(self):
content_length = int(self.headers.get("Content-Length", 0))
body = self.rfile.read(content_length).decode()
path = urlparse(self.path).path
query = parse_qs(urlparse(self.path).query)
form = parse_qs(body)
params = {**query, **form}
if path == "/api/v2/torrents/createCategory":
name = params.get("category", [""])[0]
save_path = params.get("savePath", params.get("save_path", [""]))[0] or "/downloads"
if name:
CATEGORIES[name] = {"name": name, "savePath": save_path}
if path in ["/api/v2/torrents/editCategory", "/api/v2/torrents/removeCategory"]:
self._respond()
return
self._respond()
def log_message(self, format, *args):
pass
HTTPServer(("0.0.0.0", 6011), QBitMock).serve_forever()
'';
in
{
description = "Mock qBittorrent API";
wantedBy = [ "multi-user.target" ];
before = [
"sonarr-init.service"
"radarr-init.service"
];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${mockQbitScript}";
Type = "simple";
};
};
systemd.tmpfiles.rules = [
"d /media/tv 0755 sonarr sonarr -"
"d /media/movies 0755 radarr radarr -"
];
services.sonarr = {
enable = true;
dataDir = "/var/lib/sonarr/.config/NzbDrone";
settings.server.port = lib.mkDefault 8989;
};
services.radarr = {
enable = true;
dataDir = "/var/lib/radarr/.config/Radarr";
settings.server.port = lib.mkDefault 7878;
};
services.prowlarr = {
enable = true;
};
services.arrInit.sonarr = {
enable = true;
serviceName = "sonarr";
dataDir = "/var/lib/sonarr/.config/NzbDrone";
port = 8989;
healthChecks = true;
downloadClients = [
{
name = "qBittorrent";
implementation = "QBittorrent";
configContract = "QBittorrentSettings";
protocol = "torrent";
fields = {
host = "127.0.0.1";
port = 6011;
useSsl = false;
tvCategory = "tv";
};
}
];
rootFolders = [ "/media/tv" ];
};
services.arrInit.radarr = {
enable = true;
serviceName = "radarr";
dataDir = "/var/lib/radarr/.config/Radarr";
port = 7878;
healthChecks = true;
downloadClients = [
{
name = "qBittorrent";
implementation = "QBittorrent";
configContract = "QBittorrentSettings";
protocol = "torrent";
fields = {
host = "127.0.0.1";
port = 6011;
useSsl = false;
movieCategory = "movies";
};
}
];
rootFolders = [ "/media/movies" ];
};
services.arrInit.prowlarr = {
enable = true;
serviceName = "prowlarr";
dataDir = "/var/lib/prowlarr";
port = 9696;
apiVersion = "v1";
healthChecks = true;
syncedApps = [
{
name = "Sonarr";
implementation = "Sonarr";
configContract = "SonarrSettings";
prowlarrUrl = "http://localhost:9696";
baseUrl = "http://localhost:8989";
apiKeyFrom = "/var/lib/sonarr/.config/NzbDrone/config.xml";
syncCategories = [
5000
5010
5020
];
serviceName = "sonarr";
}
{
name = "Radarr";
implementation = "Radarr";
configContract = "RadarrSettings";
prowlarrUrl = "http://localhost:9696";
baseUrl = "http://localhost:7878";
apiKeyFrom = "/var/lib/radarr/.config/Radarr/config.xml";
syncCategories = [
2000
2010
2020
];
serviceName = "radarr";
}
];
};
};
testScript = ''
start_all()
# Wait for services to start
machine.wait_for_unit("mock-qbittorrent.service")
machine.wait_until_succeeds("curl -sf http://localhost:6011/api/v2/app/version", timeout=30)
machine.wait_for_unit("sonarr.service")
machine.wait_for_unit("radarr.service")
machine.wait_for_unit("prowlarr.service")
# Wait for APIs to be ready
machine.wait_until_succeeds(
"API_KEY=$(grep -oP '(?<=<ApiKey>)[^<]+' /var/lib/sonarr/.config/NzbDrone/config.xml) && "
"curl -sf http://localhost:8989/api/v3/system/status -H \"X-Api-Key: $API_KEY\"",
timeout=120,
)
machine.wait_until_succeeds(
"API_KEY=$(grep -oP '(?<=<ApiKey>)[^<]+' /var/lib/radarr/.config/Radarr/config.xml) && "
"curl -sf http://localhost:7878/api/v3/system/status -H \"X-Api-Key: $API_KEY\"",
timeout=120,
)
machine.wait_until_succeeds(
"API_KEY=$(grep -oP '(?<=<ApiKey>)[^<]+' /var/lib/prowlarr/config.xml) && "
"curl -sf http://localhost:9696/api/v1/system/status -H \"X-Api-Key: $API_KEY\"",
timeout=180,
)
# Restart init services to ensure they run with config.xml present
machine.succeed("systemctl restart sonarr-init.service")
machine.succeed("systemctl restart radarr-init.service")
machine.wait_for_unit("sonarr-init.service")
machine.wait_for_unit("radarr-init.service")
machine.succeed("systemctl restart prowlarr-init.service")
machine.wait_for_unit("prowlarr-init.service")
with subtest("Health checks pass when download clients are reachable"):
# Sonarr init should succeed with healthChecks enabled since mock qBittorrent is running
exit_code = machine.succeed(
"systemctl show sonarr-init.service --property=ExecMainStatus | cut -d= -f2"
).strip()
assert exit_code == "0", f"sonarr-init should succeed when download client is reachable, got exit code {exit_code}"
# Radarr init should also succeed
exit_code = machine.succeed(
"systemctl show radarr-init.service --property=ExecMainStatus | cut -d= -f2"
).strip()
assert exit_code == "0", f"radarr-init should succeed when download client is reachable, got exit code {exit_code}"
with subtest("Health checks pass for Prowlarr synced apps"):
exit_code = machine.succeed(
"systemctl show prowlarr-init.service --property=ExecMainStatus | cut -d= -f2"
).strip()
assert exit_code == "0", f"prowlarr-init should succeed when synced apps are reachable, got exit code {exit_code}"
with subtest("Health check logs confirm validation ran"):
sonarr_journal = machine.succeed("journalctl -u sonarr-init.service --no-pager")
assert "health check" in sonarr_journal.lower() or "testing" in sonarr_journal.lower(), \
"Expected health check log messages in sonarr-init journal"
radarr_journal = machine.succeed("journalctl -u radarr-init.service --no-pager")
assert "health check" in radarr_journal.lower() or "testing" in radarr_journal.lower(), \
"Expected health check log messages in radarr-init journal"
prowlarr_journal = machine.succeed("journalctl -u prowlarr-init.service --no-pager")
assert "health check" in prowlarr_journal.lower() or "testing" in prowlarr_journal.lower(), \
"Expected health check log messages in prowlarr-init journal"
with subtest("Health check fails when download client is unreachable"):
# Stop mock qBittorrent to simulate failure
machine.succeed("systemctl stop mock-qbittorrent.service")
# Restart sonarr-init - it should FAIL because download client test will fail
machine.execute("systemctl restart sonarr-init.service")
# Wait for the service to settle into failed state (it has Restart=on-failure)
machine.wait_until_succeeds(
"systemctl show sonarr-init.service --property=Result | grep -q 'exit-code'",
timeout=60,
)
# Check journal for health check failure message
journal = machine.succeed("journalctl -u sonarr-init.service --no-pager")
assert "health check failed" in journal.lower(), \
"Expected health check failure message in sonarr-init journal, got: " + journal[-500:]
with subtest("Health check fails when Prowlarr synced app is unreachable"):
# Sonarr is already stopped from previous subtest
# Also stop radarr to ensure both synced apps are unreachable
machine.succeed("systemctl stop radarr.service")
# Restart prowlarr-init - it should FAIL because synced app connectivity test fails
machine.execute("systemctl restart prowlarr-init.service")
# Wait for the service to settle into failed state
# Prowlarr's testall may take up to 30s (--max-time) per attempt, plus restart delay
machine.wait_until_succeeds(
"systemctl show prowlarr-init.service --property=Result | grep -q 'exit-code'",
timeout=120,
)
journal = machine.succeed("journalctl -u prowlarr-init.service --no-pager")
assert "health check failed" in journal.lower(), \
"Expected health check failure message in prowlarr-init journal, got: " + journal[-500:]
'';
}