be more leniant on startup time of *arr services
This commit is contained in:
56
module.nix
56
module.nix
@@ -219,6 +219,21 @@ let
|
||||
'';
|
||||
};
|
||||
|
||||
apiTimeout = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 90;
|
||||
description = ''
|
||||
Seconds to wait for the application API to become available before
|
||||
considering the init attempt failed. When the API is not reachable
|
||||
within this window, the service exits non-zero and systemd's
|
||||
Restart=on-failure will schedule another attempt after RestartSec.
|
||||
|
||||
The systemd start limit is computed from this value to allow 5 full
|
||||
retry cycles before the unit enters permanent failure (which would
|
||||
trigger any configured OnFailure= target).
|
||||
'';
|
||||
};
|
||||
|
||||
naming = lib.mkOption {
|
||||
type = lib.types.attrsOf lib.types.anything;
|
||||
default = { };
|
||||
@@ -279,6 +294,21 @@ let
|
||||
description = "API port of Bazarr.";
|
||||
};
|
||||
|
||||
apiTimeout = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 90;
|
||||
description = ''
|
||||
Seconds to wait for the Bazarr API to become available before
|
||||
considering the init attempt failed. When the API is not reachable
|
||||
within this window, the service exits non-zero and systemd's
|
||||
Restart=on-failure will schedule another attempt after RestartSec.
|
||||
|
||||
The systemd start limit is computed from this value to allow 5 full
|
||||
retry cycles before the unit enters permanent failure (which would
|
||||
trigger any configured OnFailure= target).
|
||||
'';
|
||||
};
|
||||
|
||||
sonarr = lib.mkOption {
|
||||
type = bazarrProviderModule;
|
||||
default = {
|
||||
@@ -547,14 +577,14 @@ let
|
||||
BASE_URL="http://127.0.0.1:${builtins.toString inst.port}/api/${inst.apiVersion}"
|
||||
|
||||
# Wait for API to become available
|
||||
echo "Waiting for ${name} API..."
|
||||
for i in $(seq 1 90); do
|
||||
echo "Waiting for ${name} API (timeout: ${builtins.toString inst.apiTimeout}s)..."
|
||||
for i in $(seq 1 ${builtins.toString inst.apiTimeout}); do
|
||||
if ${curl} -sf --connect-timeout 5 "$BASE_URL/system/status" -H "X-Api-Key: $API_KEY" > /dev/null 2>&1; then
|
||||
echo "${name} API is ready"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 90 ]; then
|
||||
echo "${name} API not available after 90 seconds" >&2
|
||||
if [ "$i" -eq ${builtins.toString inst.apiTimeout} ]; then
|
||||
echo "${name} API not available after ${builtins.toString inst.apiTimeout} seconds" >&2
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
@@ -625,14 +655,14 @@ let
|
||||
BASE_URL="http://127.0.0.1:${builtins.toString bazarrCfg.port}"
|
||||
|
||||
# Wait for API to become available
|
||||
echo "Waiting for Bazarr API..."
|
||||
for i in $(seq 1 90); do
|
||||
echo "Waiting for Bazarr API (timeout: ${builtins.toString bazarrCfg.apiTimeout}s)..."
|
||||
for i in $(seq 1 ${builtins.toString bazarrCfg.apiTimeout}); do
|
||||
if ${curl} -sf --connect-timeout 5 "$BASE_URL/api/system/status" -H "X-API-KEY: $API_KEY" > /dev/null 2>&1; then
|
||||
echo "Bazarr API is ready"
|
||||
break
|
||||
fi
|
||||
if [ "$i" -eq 90 ]; then
|
||||
echo "Bazarr API not available after 90 seconds" >&2
|
||||
if [ "$i" -eq ${builtins.toString bazarrCfg.apiTimeout} ]; then
|
||||
echo "Bazarr API not available after ${builtins.toString bazarrCfg.apiTimeout} seconds" >&2
|
||||
exit 1
|
||||
fi
|
||||
sleep 1
|
||||
@@ -686,6 +716,12 @@ in
|
||||
++ (lib.optional (inst.networkNamespacePath != null) "wg.service");
|
||||
requires = [ "${inst.serviceName}.service" ] ++ (getDownloadClientDeps inst);
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
unitConfig = {
|
||||
# Allow 5 full retry cycles (apiTimeout + RestartSec each) before
|
||||
# entering permanent failure, which is what triggers OnFailure=.
|
||||
StartLimitIntervalSec = 5 * (inst.apiTimeout + 30);
|
||||
StartLimitBurst = 5;
|
||||
};
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
@@ -706,6 +742,10 @@ in
|
||||
after = bazarrDeps;
|
||||
requires = bazarrDeps;
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
unitConfig = {
|
||||
StartLimitIntervalSec = 5 * (bazarrCfg.apiTimeout + 30);
|
||||
StartLimitBurst = 5;
|
||||
};
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
RemainAfterExit = true;
|
||||
|
||||
@@ -11,4 +11,5 @@
|
||||
error-handling = import ./error-handling.nix { inherit pkgs lib self; };
|
||||
edge-cases = import ./edge-cases.nix { inherit pkgs lib self; };
|
||||
health-checks = import ./health-checks.nix { inherit pkgs lib self; };
|
||||
delayed-start = import ./delayed-start.nix { inherit pkgs lib self; };
|
||||
}
|
||||
|
||||
194
tests/delayed-start.nix
Normal file
194
tests/delayed-start.nix
Normal file
@@ -0,0 +1,194 @@
|
||||
{
|
||||
pkgs,
|
||||
lib,
|
||||
self,
|
||||
}:
|
||||
|
||||
pkgs.testers.runNixOSTest {
|
||||
name = "arr-init-delayed-start";
|
||||
|
||||
nodes.machine =
|
||||
{ pkgs, lib, ... }:
|
||||
{
|
||||
imports = [ self.nixosModules.default ];
|
||||
|
||||
system.stateVersion = "24.11";
|
||||
|
||||
virtualisation.memorySize = 2048;
|
||||
|
||||
environment.systemPackages = with pkgs; [
|
||||
curl
|
||||
jq
|
||||
gnugrep
|
||||
];
|
||||
|
||||
# Mock *arr service: returns 503 until /tmp/api-ready exists, then
|
||||
# behaves like a minimal Servarr API. This simulates slow initialization.
|
||||
systemd.services.mock-sonarr =
|
||||
let
|
||||
mockScript = pkgs.writeScript "mock-sonarr.py" ''
|
||||
import os, json
|
||||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||||
from urllib.parse import urlparse
|
||||
|
||||
DOWNLOAD_CLIENTS = []
|
||||
ROOT_FOLDERS = []
|
||||
|
||||
|
||||
class MockArr(BaseHTTPRequestHandler):
|
||||
def _respond(self, code=200, body=b"", content_type="application/json"):
|
||||
self.send_response(code)
|
||||
self.send_header("Content-Type", content_type)
|
||||
self.end_headers()
|
||||
self.wfile.write(body if isinstance(body, bytes) else body.encode())
|
||||
|
||||
def do_GET(self):
|
||||
path = urlparse(self.path).path
|
||||
if not os.path.exists("/tmp/api-ready"):
|
||||
self._respond(503, b"Service Unavailable")
|
||||
return
|
||||
if path == "/api/v3/system/status":
|
||||
self._respond(200, json.dumps({"version": "4.0.0"}).encode())
|
||||
elif path == "/api/v3/downloadclient":
|
||||
self._respond(200, json.dumps(DOWNLOAD_CLIENTS).encode())
|
||||
elif path == "/api/v3/rootfolder":
|
||||
self._respond(200, json.dumps(ROOT_FOLDERS).encode())
|
||||
else:
|
||||
self._respond(200, b"{}")
|
||||
|
||||
def do_POST(self):
|
||||
path = urlparse(self.path).path
|
||||
content_length = int(self.headers.get("Content-Length", 0))
|
||||
body = self.rfile.read(content_length)
|
||||
if "/downloadclient" in path:
|
||||
data = json.loads(body)
|
||||
data["id"] = len(DOWNLOAD_CLIENTS) + 1
|
||||
DOWNLOAD_CLIENTS.append(data)
|
||||
self._respond(201, json.dumps(data).encode())
|
||||
elif "/rootfolder" in path:
|
||||
data = json.loads(body)
|
||||
data["id"] = len(ROOT_FOLDERS) + 1
|
||||
ROOT_FOLDERS.append(data)
|
||||
self._respond(201, json.dumps(data).encode())
|
||||
else:
|
||||
self._respond(200, b"{}")
|
||||
|
||||
def log_message(self, format, *args):
|
||||
pass
|
||||
|
||||
|
||||
HTTPServer(("0.0.0.0", 8989), MockArr).serve_forever()
|
||||
'';
|
||||
in
|
||||
{
|
||||
description = "Mock Sonarr API with delayed initialization";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = "${pkgs.python3}/bin/python3 ${mockScript}";
|
||||
Type = "simple";
|
||||
};
|
||||
};
|
||||
|
||||
# Provide a pre-seeded config.xml so the init script can read the API key
|
||||
systemd.tmpfiles.rules = [
|
||||
"d /var/lib/mock-sonarr 0755 root root -"
|
||||
"f /var/lib/mock-sonarr/config.xml 0644 root root - <Config><ApiKey>test-api-key-12345</ApiKey></Config>"
|
||||
"d /media/tv 0755 root root -"
|
||||
];
|
||||
|
||||
services.arrInit.sonarr = {
|
||||
enable = true;
|
||||
serviceName = "mock-sonarr";
|
||||
dataDir = "/var/lib/mock-sonarr";
|
||||
port = 8989;
|
||||
# Short timeout so the first attempt fails quickly
|
||||
apiTimeout = 5;
|
||||
downloadClients = [
|
||||
{
|
||||
name = "qBittorrent";
|
||||
implementation = "QBittorrent";
|
||||
configContract = "QBittorrentSettings";
|
||||
protocol = "torrent";
|
||||
fields = {
|
||||
host = "127.0.0.1";
|
||||
port = 6011;
|
||||
useSsl = false;
|
||||
};
|
||||
}
|
||||
];
|
||||
rootFolders = [ "/media/tv" ];
|
||||
};
|
||||
|
||||
# Override RestartSec for faster test execution (default 30 is too slow for CI)
|
||||
systemd.services.mock-sonarr-init.serviceConfig.RestartSec = lib.mkForce 2;
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
start_all()
|
||||
|
||||
machine.wait_for_unit("mock-sonarr.service")
|
||||
|
||||
with subtest("Unit has correct start limit configuration"):
|
||||
unit_content = machine.succeed("systemctl cat mock-sonarr-init.service")
|
||||
assert "StartLimitIntervalSec=175" in unit_content, \
|
||||
f"Expected StartLimitIntervalSec=175 (5*(5+30)), got:\n{unit_content}"
|
||||
assert "StartLimitBurst=5" in unit_content, \
|
||||
f"Expected StartLimitBurst=5, got:\n{unit_content}"
|
||||
|
||||
with subtest("Init service fails on first attempt due to API not ready"):
|
||||
# The init service starts automatically after mock-sonarr and will
|
||||
# timeout after 5 seconds because /tmp/api-ready does not exist yet.
|
||||
machine.wait_until_succeeds(
|
||||
"journalctl -u mock-sonarr-init.service --no-pager | grep -q 'not available after 5 seconds'",
|
||||
timeout=30,
|
||||
)
|
||||
|
||||
with subtest("Init service recovers after API becomes available"):
|
||||
# Simulate the *arr service finishing its slow initialization
|
||||
machine.succeed("touch /tmp/api-ready")
|
||||
|
||||
# systemd will restart the service (RestartSec=2). On the next attempt
|
||||
# the mock API responds normally and init should succeed.
|
||||
machine.wait_for_unit("mock-sonarr-init.service", timeout=60)
|
||||
|
||||
exit_code = machine.succeed(
|
||||
"systemctl show mock-sonarr-init.service --property=ExecMainStatus | cut -d= -f2"
|
||||
).strip()
|
||||
assert exit_code == "0", f"Expected exit code 0 after recovery, got {exit_code}"
|
||||
|
||||
with subtest("Service is active, not in permanent failure"):
|
||||
state = machine.succeed(
|
||||
"systemctl show mock-sonarr-init.service --property=ActiveState | cut -d= -f2"
|
||||
).strip()
|
||||
assert state == "active", f"Expected 'active' state, got '{state}'"
|
||||
|
||||
# The sub-state should be 'exited' (oneshot + RemainAfterExit)
|
||||
sub_state = machine.succeed(
|
||||
"systemctl show mock-sonarr-init.service --property=SubState | cut -d= -f2"
|
||||
).strip()
|
||||
assert sub_state == "exited", f"Expected 'exited' sub-state, got '{sub_state}'"
|
||||
|
||||
with subtest("Download client was provisioned after recovery"):
|
||||
machine.succeed(
|
||||
"curl -sf http://localhost:8989/api/v3/downloadclient "
|
||||
"-H 'X-Api-Key: test-api-key-12345' | "
|
||||
"jq -e '.[] | select(.name == \"qBittorrent\")'"
|
||||
)
|
||||
|
||||
with subtest("Root folder was provisioned after recovery"):
|
||||
machine.succeed(
|
||||
"curl -sf http://localhost:8989/api/v3/rootfolder "
|
||||
"-H 'X-Api-Key: test-api-key-12345' | "
|
||||
"jq -e '.[] | select(.path == \"/media/tv\")'"
|
||||
)
|
||||
|
||||
with subtest("Journal shows retry sequence"):
|
||||
journal = machine.succeed("journalctl -u mock-sonarr-init.service --no-pager")
|
||||
# First attempt: timeout message
|
||||
assert "not available after 5 seconds" in journal, \
|
||||
"Expected timeout message from first attempt"
|
||||
# Second attempt: success message
|
||||
assert "sonarr init complete" in journal.lower() or "API is ready" in journal.lower(), \
|
||||
f"Expected success message from second attempt, journal:\n{journal[-1000:]}"
|
||||
'';
|
||||
}
|
||||
Reference in New Issue
Block a user