From 7f395bd9b3e6af2de6b17d9383bd621ad3849f1c Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Wed, 25 Mar 2026 10:14:07 -0700 Subject: [PATCH] add naming option + other stuff --- module.nix | 156 ++++++++++++++++++++++++++++++++-------- tests/health-checks.nix | 19 +++-- 2 files changed, 139 insertions(+), 36 deletions(-) diff --git a/module.nix b/module.nix index 2bb19b7..20b5141 100644 --- a/module.nix +++ b/module.nix @@ -191,9 +191,45 @@ let When enabled, the init service will verify connectivity after provisioning: - Tests all download clients are reachable via the application's testall API - For Prowlarr instances: tests all synced applications are reachable - The init service will fail if any health check fails. + The init service will fail if any health check fails after all retries. ''; }; + + healthCheckRetries = lib.mkOption { + type = lib.types.ints.unsigned; + default = 5; + description = '' + Number of times to retry health checks before failing. + Each retry waits healthCheckInterval seconds. This prevents transient + failures (e.g. download clients still starting) from triggering alerts. + ''; + }; + + healthCheckInterval = lib.mkOption { + type = lib.types.ints.positive; + default = 10; + description = '' + Seconds to wait between health check retries. + ''; + }; + + naming = lib.mkOption { + type = lib.types.attrsOf lib.types.anything; + default = { }; + description = '' + Naming configuration to set via the API's config/naming endpoint. + Keys/values map directly to the API fields (e.g. renameEpisodes, + standardEpisodeFormat for Sonarr; renameMovies, standardMovieFormat + for Radarr). Only specified fields are updated; unspecified fields + retain their current values. + ''; + example = { + renameEpisodes = true; + standardEpisodeFormat = "{Series Title} - S{season:00}E{episode:00} - {Episode Title} {Quality Full}"; + seasonFolderFormat = "Season {season}"; + seriesFolderFormat = "{Series Title}"; + }; + }; }; }; @@ -347,48 +383,104 @@ let fi ''; + mkNamingSection = + inst: + lib.optionalString (inst.naming != { }) '' + # Naming configuration + echo "Checking naming configuration..." + CURRENT_NAMING=$(${curl} -sf "$BASE_URL/config/naming" -H "X-Api-Key: $API_KEY") + DESIRED_NAMING=${lib.escapeShellArg (builtins.toJSON inst.naming)} + NEEDS_UPDATE=$(${jq} -n --argjson current "$CURRENT_NAMING" --argjson desired "$DESIRED_NAMING" \ + '[$desired | to_entries[] | select(.value != $current[.key])] | length > 0') + if [ "$NEEDS_UPDATE" = "true" ]; then + echo "Updating naming configuration..." + MERGED_NAMING=$(echo "$CURRENT_NAMING" | ${jq} --argjson desired "$DESIRED_NAMING" '. * $desired') + ${curl} -sf -X PUT "$BASE_URL/config/naming" \ + -H "X-Api-Key: $API_KEY" \ + -H "Content-Type: application/json" \ + -d "$MERGED_NAMING" + echo "Naming configuration updated" + else + echo "Naming configuration already correct, skipping" + fi + ''; + mkHealthCheckSection = name: inst: lib.optionalString inst.healthChecks '' # Health checks echo "Running ${name} health checks..." + HC_MAX_RETRIES=${builtins.toString inst.healthCheckRetries} + HC_INTERVAL=${builtins.toString inst.healthCheckInterval} ${lib.optionalString (inst.downloadClients != [ ]) '' - # Test download client connectivity + # Test download client connectivity (with retries) echo "Testing download client connectivity..." - DC_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/downloadclient/testall" \ - -H "X-Api-Key: $API_KEY" \ - -H "Content-Type: application/json") || { - echo "Health check FAILED: could not reach ${name} API for download client test" >&2 - exit 1 - } - DC_FAILURES=$(echo "$DC_TEST" | ${jq} '[.[] | select(.isValid == false)]') - DC_FAIL_COUNT=$(echo "$DC_FAILURES" | ${jq} 'length') - if [ "$DC_FAIL_COUNT" -gt 0 ]; then - echo "Health check FAILED: $DC_FAIL_COUNT download client(s) unreachable:" >&2 - echo "$DC_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"' >&2 - exit 1 - fi - echo "All download clients healthy" + DC_ATTEMPT=0 + while true; do + DC_HEALTHY=true + DC_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/downloadclient/testall" \ + -H "X-Api-Key: $API_KEY" \ + -H "Content-Type: application/json") || { + DC_HEALTHY=false + DC_LAST_ERROR="could not reach ${name} API for download client test" + } + if [ "$DC_HEALTHY" = true ]; then + DC_FAILURES=$(echo "$DC_TEST" | ${jq} '[.[] | select(.isValid == false)]') + DC_FAIL_COUNT=$(echo "$DC_FAILURES" | ${jq} 'length') + if [ "$DC_FAIL_COUNT" -gt 0 ]; then + DC_HEALTHY=false + DC_LAST_ERROR=$(echo "$DC_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"') + fi + fi + if [ "$DC_HEALTHY" = true ]; then + echo "All download clients healthy" + break + fi + DC_ATTEMPT=$((DC_ATTEMPT + 1)) + if [ "$DC_ATTEMPT" -gt "$HC_MAX_RETRIES" ]; then + echo "Health check FAILED after $DC_ATTEMPT attempts: download client(s) unreachable:" >&2 + echo "$DC_LAST_ERROR" >&2 + exit 1 + fi + echo "Download client health check failed (attempt $DC_ATTEMPT/$HC_MAX_RETRIES), retrying in ''${HC_INTERVAL}s..." + sleep "$HC_INTERVAL" + done ''} ${lib.optionalString (inst.syncedApps != [ ]) '' - # Test synced application connectivity + # Test synced application connectivity (with retries) echo "Testing synced application connectivity..." - APP_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/applications/testall" \ - -H "X-Api-Key: $API_KEY" \ - -H "Content-Type: application/json") || { - echo "Health check FAILED: could not reach ${name} API for synced app test" >&2 - exit 1 - } - APP_FAILURES=$(echo "$APP_TEST" | ${jq} '[.[] | select(.isValid == false)]') - APP_FAIL_COUNT=$(echo "$APP_FAILURES" | ${jq} 'length') - if [ "$APP_FAIL_COUNT" -gt 0 ]; then - echo "Health check FAILED: $APP_FAIL_COUNT synced application(s) unreachable:" >&2 - echo "$APP_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"' >&2 - exit 1 - fi - echo "All synced applications healthy" + APP_ATTEMPT=0 + while true; do + APP_HEALTHY=true + APP_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/applications/testall" \ + -H "X-Api-Key: $API_KEY" \ + -H "Content-Type: application/json") || { + APP_HEALTHY=false + APP_LAST_ERROR="could not reach ${name} API for synced app test" + } + if [ "$APP_HEALTHY" = true ]; then + APP_FAILURES=$(echo "$APP_TEST" | ${jq} '[.[] | select(.isValid == false)]') + APP_FAIL_COUNT=$(echo "$APP_FAILURES" | ${jq} 'length') + if [ "$APP_FAIL_COUNT" -gt 0 ]; then + APP_HEALTHY=false + APP_LAST_ERROR=$(echo "$APP_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"') + fi + fi + if [ "$APP_HEALTHY" = true ]; then + echo "All synced applications healthy" + break + fi + APP_ATTEMPT=$((APP_ATTEMPT + 1)) + if [ "$APP_ATTEMPT" -gt "$HC_MAX_RETRIES" ]; then + echo "Health check FAILED after $APP_ATTEMPT attempts: synced application(s) unreachable:" >&2 + echo "$APP_LAST_ERROR" >&2 + exit 1 + fi + echo "Synced app health check failed (attempt $APP_ATTEMPT/$HC_MAX_RETRIES), retrying in ''${HC_INTERVAL}s..." + sleep "$HC_INTERVAL" + done ''} echo "${name} health checks passed" @@ -427,6 +519,8 @@ let ${lib.concatMapStringsSep "\n" mkRootFolderSection inst.rootFolders} ${lib.concatMapStringsSep "\n" mkSyncedAppSection inst.syncedApps} + ${mkNamingSection inst} + ${mkHealthCheckSection name inst} echo "${name} init complete" diff --git a/tests/health-checks.nix b/tests/health-checks.nix index f691527..c48932c 100644 --- a/tests/health-checks.nix +++ b/tests/health-checks.nix @@ -121,6 +121,8 @@ pkgs.testers.runNixOSTest { dataDir = "/var/lib/sonarr/.config/NzbDrone"; port = 8989; healthChecks = true; + healthCheckRetries = 2; + healthCheckInterval = 2; downloadClients = [ { name = "qBittorrent"; @@ -145,6 +147,8 @@ pkgs.testers.runNixOSTest { dataDir = "/var/lib/radarr/.config/Radarr"; port = 7878; healthChecks = true; + healthCheckRetries = 2; + healthCheckInterval = 2; downloadClients = [ { name = "qBittorrent"; @@ -170,6 +174,8 @@ pkgs.testers.runNixOSTest { port = 9696; apiVersion = "v1"; healthChecks = true; + healthCheckRetries = 2; + healthCheckInterval = 2; syncedApps = [ { name = "Sonarr"; @@ -288,23 +294,26 @@ pkgs.testers.runNixOSTest { timeout=30, ) - with subtest("Health check fails when Prowlarr synced app is unreachable"): + with subtest("Health check fails with retries when Prowlarr synced app is unreachable"): # Stop radarr to ensure synced apps are unreachable - # (sonarr is already stopped since sonarr-init was pulled down above) - machine.succeed("systemctl stop radarr.service") + machine.succeed("systemctl stop sonarr.service || true") + machine.succeed("systemctl stop radarr.service || true") # Restart prowlarr-init - it should FAIL because synced app connectivity test fails + # even after retries (2 retries * 2s interval = ~4s + attempt time) machine.execute("systemctl restart prowlarr-init.service") # Wait for the service to settle into failed state - # Prowlarr's testall may take up to 30s (--max-time) per attempt, plus restart delay + # With retries: up to 3 attempts * (30s max-time + 2s interval) + restart delay machine.wait_until_succeeds( "systemctl show prowlarr-init.service --property=Result | grep -q 'exit-code'", - timeout=120, + timeout=300, ) journal = machine.succeed("journalctl -u prowlarr-init.service --no-pager") assert "health check failed" in journal.lower(), \ "Expected health check failure message in prowlarr-init journal, got: " + journal[-500:] + assert "retrying" in journal.lower(), \ + "Expected retry log messages before final failure in prowlarr-init journal, got: " + journal[-500:] ''; }