add naming option + other stuff

This commit is contained in:
2026-03-25 10:14:07 -07:00
parent ef0da7582c
commit 7f395bd9b3
2 changed files with 139 additions and 36 deletions

View File

@@ -191,9 +191,45 @@ let
When enabled, the init service will verify connectivity after provisioning: When enabled, the init service will verify connectivity after provisioning:
- Tests all download clients are reachable via the application's testall API - Tests all download clients are reachable via the application's testall API
- For Prowlarr instances: tests all synced applications are reachable - For Prowlarr instances: tests all synced applications are reachable
The init service will fail if any health check fails. The init service will fail if any health check fails after all retries.
''; '';
}; };
healthCheckRetries = lib.mkOption {
type = lib.types.ints.unsigned;
default = 5;
description = ''
Number of times to retry health checks before failing.
Each retry waits healthCheckInterval seconds. This prevents transient
failures (e.g. download clients still starting) from triggering alerts.
'';
};
healthCheckInterval = lib.mkOption {
type = lib.types.ints.positive;
default = 10;
description = ''
Seconds to wait between health check retries.
'';
};
naming = lib.mkOption {
type = lib.types.attrsOf lib.types.anything;
default = { };
description = ''
Naming configuration to set via the API's config/naming endpoint.
Keys/values map directly to the API fields (e.g. renameEpisodes,
standardEpisodeFormat for Sonarr; renameMovies, standardMovieFormat
for Radarr). Only specified fields are updated; unspecified fields
retain their current values.
'';
example = {
renameEpisodes = true;
standardEpisodeFormat = "{Series Title} - S{season:00}E{episode:00} - {Episode Title} {Quality Full}";
seasonFolderFormat = "Season {season}";
seriesFolderFormat = "{Series Title}";
};
};
}; };
}; };
@@ -347,48 +383,104 @@ let
fi fi
''; '';
mkNamingSection =
inst:
lib.optionalString (inst.naming != { }) ''
# Naming configuration
echo "Checking naming configuration..."
CURRENT_NAMING=$(${curl} -sf "$BASE_URL/config/naming" -H "X-Api-Key: $API_KEY")
DESIRED_NAMING=${lib.escapeShellArg (builtins.toJSON inst.naming)}
NEEDS_UPDATE=$(${jq} -n --argjson current "$CURRENT_NAMING" --argjson desired "$DESIRED_NAMING" \
'[$desired | to_entries[] | select(.value != $current[.key])] | length > 0')
if [ "$NEEDS_UPDATE" = "true" ]; then
echo "Updating naming configuration..."
MERGED_NAMING=$(echo "$CURRENT_NAMING" | ${jq} --argjson desired "$DESIRED_NAMING" '. * $desired')
${curl} -sf -X PUT "$BASE_URL/config/naming" \
-H "X-Api-Key: $API_KEY" \
-H "Content-Type: application/json" \
-d "$MERGED_NAMING"
echo "Naming configuration updated"
else
echo "Naming configuration already correct, skipping"
fi
'';
mkHealthCheckSection = mkHealthCheckSection =
name: inst: name: inst:
lib.optionalString inst.healthChecks '' lib.optionalString inst.healthChecks ''
# Health checks # Health checks
echo "Running ${name} health checks..." echo "Running ${name} health checks..."
HC_MAX_RETRIES=${builtins.toString inst.healthCheckRetries}
HC_INTERVAL=${builtins.toString inst.healthCheckInterval}
${lib.optionalString (inst.downloadClients != [ ]) '' ${lib.optionalString (inst.downloadClients != [ ]) ''
# Test download client connectivity # Test download client connectivity (with retries)
echo "Testing download client connectivity..." echo "Testing download client connectivity..."
DC_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/downloadclient/testall" \ DC_ATTEMPT=0
-H "X-Api-Key: $API_KEY" \ while true; do
-H "Content-Type: application/json") || { DC_HEALTHY=true
echo "Health check FAILED: could not reach ${name} API for download client test" >&2 DC_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/downloadclient/testall" \
exit 1 -H "X-Api-Key: $API_KEY" \
} -H "Content-Type: application/json") || {
DC_FAILURES=$(echo "$DC_TEST" | ${jq} '[.[] | select(.isValid == false)]') DC_HEALTHY=false
DC_FAIL_COUNT=$(echo "$DC_FAILURES" | ${jq} 'length') DC_LAST_ERROR="could not reach ${name} API for download client test"
if [ "$DC_FAIL_COUNT" -gt 0 ]; then }
echo "Health check FAILED: $DC_FAIL_COUNT download client(s) unreachable:" >&2 if [ "$DC_HEALTHY" = true ]; then
echo "$DC_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"' >&2 DC_FAILURES=$(echo "$DC_TEST" | ${jq} '[.[] | select(.isValid == false)]')
exit 1 DC_FAIL_COUNT=$(echo "$DC_FAILURES" | ${jq} 'length')
fi if [ "$DC_FAIL_COUNT" -gt 0 ]; then
echo "All download clients healthy" DC_HEALTHY=false
DC_LAST_ERROR=$(echo "$DC_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"')
fi
fi
if [ "$DC_HEALTHY" = true ]; then
echo "All download clients healthy"
break
fi
DC_ATTEMPT=$((DC_ATTEMPT + 1))
if [ "$DC_ATTEMPT" -gt "$HC_MAX_RETRIES" ]; then
echo "Health check FAILED after $DC_ATTEMPT attempts: download client(s) unreachable:" >&2
echo "$DC_LAST_ERROR" >&2
exit 1
fi
echo "Download client health check failed (attempt $DC_ATTEMPT/$HC_MAX_RETRIES), retrying in ''${HC_INTERVAL}s..."
sleep "$HC_INTERVAL"
done
''} ''}
${lib.optionalString (inst.syncedApps != [ ]) '' ${lib.optionalString (inst.syncedApps != [ ]) ''
# Test synced application connectivity # Test synced application connectivity (with retries)
echo "Testing synced application connectivity..." echo "Testing synced application connectivity..."
APP_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/applications/testall" \ APP_ATTEMPT=0
-H "X-Api-Key: $API_KEY" \ while true; do
-H "Content-Type: application/json") || { APP_HEALTHY=true
echo "Health check FAILED: could not reach ${name} API for synced app test" >&2 APP_TEST=$(${curl} -s --connect-timeout 10 --max-time 30 -X POST "$BASE_URL/applications/testall" \
exit 1 -H "X-Api-Key: $API_KEY" \
} -H "Content-Type: application/json") || {
APP_FAILURES=$(echo "$APP_TEST" | ${jq} '[.[] | select(.isValid == false)]') APP_HEALTHY=false
APP_FAIL_COUNT=$(echo "$APP_FAILURES" | ${jq} 'length') APP_LAST_ERROR="could not reach ${name} API for synced app test"
if [ "$APP_FAIL_COUNT" -gt 0 ]; then }
echo "Health check FAILED: $APP_FAIL_COUNT synced application(s) unreachable:" >&2 if [ "$APP_HEALTHY" = true ]; then
echo "$APP_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"' >&2 APP_FAILURES=$(echo "$APP_TEST" | ${jq} '[.[] | select(.isValid == false)]')
exit 1 APP_FAIL_COUNT=$(echo "$APP_FAILURES" | ${jq} 'length')
fi if [ "$APP_FAIL_COUNT" -gt 0 ]; then
echo "All synced applications healthy" APP_HEALTHY=false
APP_LAST_ERROR=$(echo "$APP_FAILURES" | ${jq} -r '.[] | " - ID \(.id): \(.validationFailures | map(.errorMessage) | join(", "))"')
fi
fi
if [ "$APP_HEALTHY" = true ]; then
echo "All synced applications healthy"
break
fi
APP_ATTEMPT=$((APP_ATTEMPT + 1))
if [ "$APP_ATTEMPT" -gt "$HC_MAX_RETRIES" ]; then
echo "Health check FAILED after $APP_ATTEMPT attempts: synced application(s) unreachable:" >&2
echo "$APP_LAST_ERROR" >&2
exit 1
fi
echo "Synced app health check failed (attempt $APP_ATTEMPT/$HC_MAX_RETRIES), retrying in ''${HC_INTERVAL}s..."
sleep "$HC_INTERVAL"
done
''} ''}
echo "${name} health checks passed" echo "${name} health checks passed"
@@ -427,6 +519,8 @@ let
${lib.concatMapStringsSep "\n" mkRootFolderSection inst.rootFolders} ${lib.concatMapStringsSep "\n" mkRootFolderSection inst.rootFolders}
${lib.concatMapStringsSep "\n" mkSyncedAppSection inst.syncedApps} ${lib.concatMapStringsSep "\n" mkSyncedAppSection inst.syncedApps}
${mkNamingSection inst}
${mkHealthCheckSection name inst} ${mkHealthCheckSection name inst}
echo "${name} init complete" echo "${name} init complete"

View File

@@ -121,6 +121,8 @@ pkgs.testers.runNixOSTest {
dataDir = "/var/lib/sonarr/.config/NzbDrone"; dataDir = "/var/lib/sonarr/.config/NzbDrone";
port = 8989; port = 8989;
healthChecks = true; healthChecks = true;
healthCheckRetries = 2;
healthCheckInterval = 2;
downloadClients = [ downloadClients = [
{ {
name = "qBittorrent"; name = "qBittorrent";
@@ -145,6 +147,8 @@ pkgs.testers.runNixOSTest {
dataDir = "/var/lib/radarr/.config/Radarr"; dataDir = "/var/lib/radarr/.config/Radarr";
port = 7878; port = 7878;
healthChecks = true; healthChecks = true;
healthCheckRetries = 2;
healthCheckInterval = 2;
downloadClients = [ downloadClients = [
{ {
name = "qBittorrent"; name = "qBittorrent";
@@ -170,6 +174,8 @@ pkgs.testers.runNixOSTest {
port = 9696; port = 9696;
apiVersion = "v1"; apiVersion = "v1";
healthChecks = true; healthChecks = true;
healthCheckRetries = 2;
healthCheckInterval = 2;
syncedApps = [ syncedApps = [
{ {
name = "Sonarr"; name = "Sonarr";
@@ -288,23 +294,26 @@ pkgs.testers.runNixOSTest {
timeout=30, timeout=30,
) )
with subtest("Health check fails when Prowlarr synced app is unreachable"): with subtest("Health check fails with retries when Prowlarr synced app is unreachable"):
# Stop radarr to ensure synced apps are unreachable # Stop radarr to ensure synced apps are unreachable
# (sonarr is already stopped since sonarr-init was pulled down above) machine.succeed("systemctl stop sonarr.service || true")
machine.succeed("systemctl stop radarr.service") machine.succeed("systemctl stop radarr.service || true")
# Restart prowlarr-init - it should FAIL because synced app connectivity test fails # Restart prowlarr-init - it should FAIL because synced app connectivity test fails
# even after retries (2 retries * 2s interval = ~4s + attempt time)
machine.execute("systemctl restart prowlarr-init.service") machine.execute("systemctl restart prowlarr-init.service")
# Wait for the service to settle into failed state # Wait for the service to settle into failed state
# Prowlarr's testall may take up to 30s (--max-time) per attempt, plus restart delay # With retries: up to 3 attempts * (30s max-time + 2s interval) + restart delay
machine.wait_until_succeeds( machine.wait_until_succeeds(
"systemctl show prowlarr-init.service --property=Result | grep -q 'exit-code'", "systemctl show prowlarr-init.service --property=Result | grep -q 'exit-code'",
timeout=120, timeout=300,
) )
journal = machine.succeed("journalctl -u prowlarr-init.service --no-pager") journal = machine.succeed("journalctl -u prowlarr-init.service --no-pager")
assert "health check failed" in journal.lower(), \ assert "health check failed" in journal.lower(), \
"Expected health check failure message in prowlarr-init journal, got: " + journal[-500:] "Expected health check failure message in prowlarr-init journal, got: " + journal[-500:]
assert "retrying" in journal.lower(), \
"Expected retry log messages before final failure in prowlarr-init journal, got: " + journal[-500:]
''; '';
} }