From c5ff0808d2a47ae3d076e2daf06ac30413fefd91 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Fri, 27 Mar 2026 23:05:39 -0700 Subject: [PATCH] be more leniant on startup time of *arr services --- module.nix | 56 ++++++++++-- tests/default.nix | 1 + tests/delayed-start.nix | 194 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 243 insertions(+), 8 deletions(-) create mode 100644 tests/delayed-start.nix diff --git a/module.nix b/module.nix index eecfe1d..10a00ef 100644 --- a/module.nix +++ b/module.nix @@ -219,6 +219,21 @@ let ''; }; + apiTimeout = lib.mkOption { + type = lib.types.ints.positive; + default = 90; + description = '' + Seconds to wait for the application API to become available before + considering the init attempt failed. When the API is not reachable + within this window, the service exits non-zero and systemd's + Restart=on-failure will schedule another attempt after RestartSec. + + The systemd start limit is computed from this value to allow 5 full + retry cycles before the unit enters permanent failure (which would + trigger any configured OnFailure= target). + ''; + }; + naming = lib.mkOption { type = lib.types.attrsOf lib.types.anything; default = { }; @@ -279,6 +294,21 @@ let description = "API port of Bazarr."; }; + apiTimeout = lib.mkOption { + type = lib.types.ints.positive; + default = 90; + description = '' + Seconds to wait for the Bazarr API to become available before + considering the init attempt failed. When the API is not reachable + within this window, the service exits non-zero and systemd's + Restart=on-failure will schedule another attempt after RestartSec. + + The systemd start limit is computed from this value to allow 5 full + retry cycles before the unit enters permanent failure (which would + trigger any configured OnFailure= target). + ''; + }; + sonarr = lib.mkOption { type = bazarrProviderModule; default = { @@ -547,14 +577,14 @@ let BASE_URL="http://127.0.0.1:${builtins.toString inst.port}/api/${inst.apiVersion}" # Wait for API to become available - echo "Waiting for ${name} API..." - for i in $(seq 1 90); do + echo "Waiting for ${name} API (timeout: ${builtins.toString inst.apiTimeout}s)..." + for i in $(seq 1 ${builtins.toString inst.apiTimeout}); do if ${curl} -sf --connect-timeout 5 "$BASE_URL/system/status" -H "X-Api-Key: $API_KEY" > /dev/null 2>&1; then echo "${name} API is ready" break fi - if [ "$i" -eq 90 ]; then - echo "${name} API not available after 90 seconds" >&2 + if [ "$i" -eq ${builtins.toString inst.apiTimeout} ]; then + echo "${name} API not available after ${builtins.toString inst.apiTimeout} seconds" >&2 exit 1 fi sleep 1 @@ -625,14 +655,14 @@ let BASE_URL="http://127.0.0.1:${builtins.toString bazarrCfg.port}" # Wait for API to become available - echo "Waiting for Bazarr API..." - for i in $(seq 1 90); do + echo "Waiting for Bazarr API (timeout: ${builtins.toString bazarrCfg.apiTimeout}s)..." + for i in $(seq 1 ${builtins.toString bazarrCfg.apiTimeout}); do if ${curl} -sf --connect-timeout 5 "$BASE_URL/api/system/status" -H "X-API-KEY: $API_KEY" > /dev/null 2>&1; then echo "Bazarr API is ready" break fi - if [ "$i" -eq 90 ]; then - echo "Bazarr API not available after 90 seconds" >&2 + if [ "$i" -eq ${builtins.toString bazarrCfg.apiTimeout} ]; then + echo "Bazarr API not available after ${builtins.toString bazarrCfg.apiTimeout} seconds" >&2 exit 1 fi sleep 1 @@ -686,6 +716,12 @@ in ++ (lib.optional (inst.networkNamespacePath != null) "wg.service"); requires = [ "${inst.serviceName}.service" ] ++ (getDownloadClientDeps inst); wantedBy = [ "multi-user.target" ]; + unitConfig = { + # Allow 5 full retry cycles (apiTimeout + RestartSec each) before + # entering permanent failure, which is what triggers OnFailure=. + StartLimitIntervalSec = 5 * (inst.apiTimeout + 30); + StartLimitBurst = 5; + }; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; @@ -706,6 +742,10 @@ in after = bazarrDeps; requires = bazarrDeps; wantedBy = [ "multi-user.target" ]; + unitConfig = { + StartLimitIntervalSec = 5 * (bazarrCfg.apiTimeout + 30); + StartLimitBurst = 5; + }; serviceConfig = { Type = "oneshot"; RemainAfterExit = true; diff --git a/tests/default.nix b/tests/default.nix index ec85a1c..7f7c6b9 100644 --- a/tests/default.nix +++ b/tests/default.nix @@ -11,4 +11,5 @@ error-handling = import ./error-handling.nix { inherit pkgs lib self; }; edge-cases = import ./edge-cases.nix { inherit pkgs lib self; }; health-checks = import ./health-checks.nix { inherit pkgs lib self; }; + delayed-start = import ./delayed-start.nix { inherit pkgs lib self; }; } diff --git a/tests/delayed-start.nix b/tests/delayed-start.nix new file mode 100644 index 0000000..1514943 --- /dev/null +++ b/tests/delayed-start.nix @@ -0,0 +1,194 @@ +{ + pkgs, + lib, + self, +}: + +pkgs.testers.runNixOSTest { + name = "arr-init-delayed-start"; + + nodes.machine = + { pkgs, lib, ... }: + { + imports = [ self.nixosModules.default ]; + + system.stateVersion = "24.11"; + + virtualisation.memorySize = 2048; + + environment.systemPackages = with pkgs; [ + curl + jq + gnugrep + ]; + + # Mock *arr service: returns 503 until /tmp/api-ready exists, then + # behaves like a minimal Servarr API. This simulates slow initialization. + systemd.services.mock-sonarr = + let + mockScript = pkgs.writeScript "mock-sonarr.py" '' + import os, json + from http.server import HTTPServer, BaseHTTPRequestHandler + from urllib.parse import urlparse + + DOWNLOAD_CLIENTS = [] + ROOT_FOLDERS = [] + + + class MockArr(BaseHTTPRequestHandler): + def _respond(self, code=200, body=b"", content_type="application/json"): + self.send_response(code) + self.send_header("Content-Type", content_type) + self.end_headers() + self.wfile.write(body if isinstance(body, bytes) else body.encode()) + + def do_GET(self): + path = urlparse(self.path).path + if not os.path.exists("/tmp/api-ready"): + self._respond(503, b"Service Unavailable") + return + if path == "/api/v3/system/status": + self._respond(200, json.dumps({"version": "4.0.0"}).encode()) + elif path == "/api/v3/downloadclient": + self._respond(200, json.dumps(DOWNLOAD_CLIENTS).encode()) + elif path == "/api/v3/rootfolder": + self._respond(200, json.dumps(ROOT_FOLDERS).encode()) + else: + self._respond(200, b"{}") + + def do_POST(self): + path = urlparse(self.path).path + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length) + if "/downloadclient" in path: + data = json.loads(body) + data["id"] = len(DOWNLOAD_CLIENTS) + 1 + DOWNLOAD_CLIENTS.append(data) + self._respond(201, json.dumps(data).encode()) + elif "/rootfolder" in path: + data = json.loads(body) + data["id"] = len(ROOT_FOLDERS) + 1 + ROOT_FOLDERS.append(data) + self._respond(201, json.dumps(data).encode()) + else: + self._respond(200, b"{}") + + def log_message(self, format, *args): + pass + + + HTTPServer(("0.0.0.0", 8989), MockArr).serve_forever() + ''; + in + { + description = "Mock Sonarr API with delayed initialization"; + wantedBy = [ "multi-user.target" ]; + serviceConfig = { + ExecStart = "${pkgs.python3}/bin/python3 ${mockScript}"; + Type = "simple"; + }; + }; + + # Provide a pre-seeded config.xml so the init script can read the API key + systemd.tmpfiles.rules = [ + "d /var/lib/mock-sonarr 0755 root root -" + "f /var/lib/mock-sonarr/config.xml 0644 root root - test-api-key-12345" + "d /media/tv 0755 root root -" + ]; + + services.arrInit.sonarr = { + enable = true; + serviceName = "mock-sonarr"; + dataDir = "/var/lib/mock-sonarr"; + port = 8989; + # Short timeout so the first attempt fails quickly + apiTimeout = 5; + downloadClients = [ + { + name = "qBittorrent"; + implementation = "QBittorrent"; + configContract = "QBittorrentSettings"; + protocol = "torrent"; + fields = { + host = "127.0.0.1"; + port = 6011; + useSsl = false; + }; + } + ]; + rootFolders = [ "/media/tv" ]; + }; + + # Override RestartSec for faster test execution (default 30 is too slow for CI) + systemd.services.mock-sonarr-init.serviceConfig.RestartSec = lib.mkForce 2; + }; + + testScript = '' + start_all() + + machine.wait_for_unit("mock-sonarr.service") + + with subtest("Unit has correct start limit configuration"): + unit_content = machine.succeed("systemctl cat mock-sonarr-init.service") + assert "StartLimitIntervalSec=175" in unit_content, \ + f"Expected StartLimitIntervalSec=175 (5*(5+30)), got:\n{unit_content}" + assert "StartLimitBurst=5" in unit_content, \ + f"Expected StartLimitBurst=5, got:\n{unit_content}" + + with subtest("Init service fails on first attempt due to API not ready"): + # The init service starts automatically after mock-sonarr and will + # timeout after 5 seconds because /tmp/api-ready does not exist yet. + machine.wait_until_succeeds( + "journalctl -u mock-sonarr-init.service --no-pager | grep -q 'not available after 5 seconds'", + timeout=30, + ) + + with subtest("Init service recovers after API becomes available"): + # Simulate the *arr service finishing its slow initialization + machine.succeed("touch /tmp/api-ready") + + # systemd will restart the service (RestartSec=2). On the next attempt + # the mock API responds normally and init should succeed. + machine.wait_for_unit("mock-sonarr-init.service", timeout=60) + + exit_code = machine.succeed( + "systemctl show mock-sonarr-init.service --property=ExecMainStatus | cut -d= -f2" + ).strip() + assert exit_code == "0", f"Expected exit code 0 after recovery, got {exit_code}" + + with subtest("Service is active, not in permanent failure"): + state = machine.succeed( + "systemctl show mock-sonarr-init.service --property=ActiveState | cut -d= -f2" + ).strip() + assert state == "active", f"Expected 'active' state, got '{state}'" + + # The sub-state should be 'exited' (oneshot + RemainAfterExit) + sub_state = machine.succeed( + "systemctl show mock-sonarr-init.service --property=SubState | cut -d= -f2" + ).strip() + assert sub_state == "exited", f"Expected 'exited' sub-state, got '{sub_state}'" + + with subtest("Download client was provisioned after recovery"): + machine.succeed( + "curl -sf http://localhost:8989/api/v3/downloadclient " + "-H 'X-Api-Key: test-api-key-12345' | " + "jq -e '.[] | select(.name == \"qBittorrent\")'" + ) + + with subtest("Root folder was provisioned after recovery"): + machine.succeed( + "curl -sf http://localhost:8989/api/v3/rootfolder " + "-H 'X-Api-Key: test-api-key-12345' | " + "jq -e '.[] | select(.path == \"/media/tv\")'" + ) + + with subtest("Journal shows retry sequence"): + journal = machine.succeed("journalctl -u mock-sonarr-init.service --no-pager") + # First attempt: timeout message + assert "not available after 5 seconds" in journal, \ + "Expected timeout message from first attempt" + # Second attempt: success message + assert "sonarr init complete" in journal.lower() or "API is ready" in journal.lower(), \ + f"Expected success message from second attempt, journal:\n{journal[-1000:]}" + ''; +}