phase 2: promote services/, tests/, patches/, lib/, scripts/

2026-04-18 00:47:39 -04:00
parent 99e98e39b7
commit 999ed05d9f
86 changed files with 0 additions and 0 deletions
--- a/tests/fail2ban-caddy.nix
+++ b/tests/fail2ban-caddy.nix
@@ -0,0 +1,124 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+pkgs.testers.runNixOSTest {
+  name = "fail2ban-caddy";
+
+  nodes = {
+    server =
+      {
+        config,
+        pkgs,
+        lib,
+        ...
+      }:
+      {
+        imports = [
+          ../modules/security.nix
+        ];
+
+        # Set up Caddy with basic auth (minimal config, no production stuff)
+        # Using bcrypt hash generated with: caddy hash-password --plaintext testpass
+        services.caddy = {
+          enable = true;
+          virtualHosts.":80".extraConfig = ''
+            log {
+              output file /var/log/caddy/access-server.log
+              format json
+            }
+            basic_auth {
+              testuser $2a$14$XqaQlGTdmofswciqrLlMz.rv0/jiGQq8aU.fP6mh6gCGiLf6Cl3.a
+            }
+            respond "Authenticated!" 200
+          '';
+        };
+
+        # Add the fail2ban jail for caddy-auth (same as in services/caddy.nix)
+        services.fail2ban.jails.caddy-auth = {
+          enabled = true;
+          settings = {
+            backend = "auto";
+            port = "http,https";
+            logpath = "/var/log/caddy/access-*.log";
+            maxretry = 3; # Lower for testing
+          };
+          filter.Definition = {
+            # Only match 401s where an Authorization header was actually sent
+            failregex = ''^.*"remote_ip":"<HOST>".*"Authorization":\["REDACTED"\].*"status":401.*$'';
+            ignoreregex = "";
+            datepattern = ''"ts":{Epoch}\.'';
+          };
+        };
+
+        # Create log directory and initial log file so fail2ban can start
+        systemd.tmpfiles.rules = [
+          "d /var/log/caddy 755 caddy caddy"
+          "f /var/log/caddy/access-server.log 644 caddy caddy"
+        ];
+
+        networking.firewall.allowedTCPPorts = [ 80 ];
+      };
+
+    client = {
+      environment.systemPackages = [ pkgs.curl ];
+    };
+  };
+
+  testScript = ''
+    import time
+    import re
+
+    start_all()
+    server.wait_for_unit("caddy.service")
+    server.wait_for_unit("fail2ban.service")
+    server.wait_for_open_port(80)
+    time.sleep(2)
+
+    with subtest("Verify caddy-auth jail is active"):
+        status = server.succeed("fail2ban-client status")
+        assert "caddy-auth" in status, f"caddy-auth jail not found in: {status}"
+
+    with subtest("Verify correct password works"):
+        # Use -4 to force IPv4 for consistency
+        result = client.succeed("curl -4 -s -u testuser:testpass http://server/")
+        print(f"Curl result: {result}")
+        assert "Authenticated" in result, f"Auth should succeed: {result}"
+
+    with subtest("Unauthenticated requests (browser probes) should not trigger ban"):
+        # Simulate browser probe requests - no Authorization header sent
+        # This is the normal HTTP Basic Auth challenge-response flow:
+        # browser sends request without credentials, gets 401, then resends with credentials
+        for i in range(5):
+            client.execute("curl -4 -s http://server/ || true")
+            time.sleep(0.5)
+        time.sleep(3)
+        status = server.succeed("fail2ban-client status caddy-auth")
+        print(f"caddy-auth jail status after unauthenticated requests: {status}")
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        banned = int(match.group(1)) if match else 0
+        assert banned == 0, f"Unauthenticated 401s should NOT trigger ban, but {banned} IPs were banned: {status}"
+
+    with subtest("Generate failed basic auth attempts (wrong password)"):
+        # Use -4 to force IPv4 for consistent IP tracking
+        # These send an Authorization header with wrong credentials
+        for i in range(4):
+            client.execute("curl -4 -s -u testuser:wrongpass http://server/ || true")
+            time.sleep(1)
+
+    with subtest("Verify IP is banned after wrong password attempts"):
+        time.sleep(5)
+        status = server.succeed("fail2ban-client status caddy-auth")
+        print(f"caddy-auth jail status: {status}")
+        # Check that at least 1 IP is banned
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        assert match and int(match.group(1)) >= 1, f"Expected at least 1 banned IP, got: {status}"
+
+    with subtest("Verify banned client cannot connect"):
+        # Use -4 to test with same IP that was banned
+        exit_code = client.execute("curl -4 -s --max-time 3 http://server/ 2>&1")[0]
+        assert exit_code != 0, "Connection should be blocked"
+  '';
+}
--- a/tests/fail2ban-gitea.nix
+++ b/tests/fail2ban-gitea.nix
@@ -0,0 +1,122 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  baseServiceConfigs = import ../service-configs.nix;
+  testServiceConfigs = lib.recursiveUpdate baseServiceConfigs {
+    zpool_ssds = "";
+    gitea = {
+      dir = "/var/lib/gitea";
+      domain = "git.test.local";
+    };
+    ports.private.gitea = {
+      port = 3000;
+      proto = "tcp";
+    };
+  };
+
+  testLib = lib.extend (
+    final: prev: {
+      serviceMountWithZpool =
+        serviceName: zpool: dirs:
+        { ... }:
+        { };
+      serviceFilePerms = serviceName: tmpfilesRules: { ... }: { };
+    }
+  );
+
+  giteaModule =
+    { config, pkgs, ... }:
+    {
+      imports = [
+        (import ../services/gitea.nix {
+          inherit config pkgs;
+          lib = testLib;
+          service_configs = testServiceConfigs;
+        })
+      ];
+    };
+in
+pkgs.testers.runNixOSTest {
+  name = "fail2ban-gitea";
+
+  nodes = {
+    server =
+      {
+        config,
+        lib,
+        pkgs,
+        ...
+      }:
+      {
+        imports = [
+          ../modules/security.nix
+          giteaModule
+        ];
+
+        # Enable postgres for gitea
+        services.postgresql.enable = true;
+
+        # Disable ZFS mount dependency
+        systemd.services."gitea-mounts".enable = lib.mkForce false;
+        systemd.services.gitea = {
+          wants = lib.mkForce [ ];
+          after = lib.mkForce [ "postgresql.service" ];
+          requires = lib.mkForce [ ];
+        };
+
+        # Override for faster testing and correct port
+        services.fail2ban.jails.gitea.settings = {
+          maxretry = lib.mkForce 3;
+          # In test, we connect directly to Gitea port, not via Caddy
+          port = lib.mkForce "3000";
+        };
+
+        networking.firewall.allowedTCPPorts = [ 3000 ];
+      };
+
+    client = {
+      environment.systemPackages = [ pkgs.curl ];
+    };
+  };
+
+  testScript = ''
+    import time
+    import re
+
+    start_all()
+    server.wait_for_unit("postgresql.service")
+    server.wait_for_unit("gitea.service")
+    server.wait_for_unit("fail2ban.service")
+    server.wait_for_open_port(3000)
+    time.sleep(3)
+
+    with subtest("Verify gitea jail is active"):
+        status = server.succeed("fail2ban-client status")
+        assert "gitea" in status, f"gitea jail not found in: {status}"
+
+    with subtest("Generate failed login attempts"):
+        # Use -4 to force IPv4 for consistent IP tracking
+        for i in range(4):
+            client.execute(
+                "curl -4 -s -X POST http://server:3000/user/login -d 'user_name=baduser&password=badpass' || true"
+            )
+            time.sleep(0.5)
+
+    with subtest("Verify IP is banned"):
+        time.sleep(3)
+        status = server.succeed("fail2ban-client status gitea")
+        print(f"gitea jail status: {status}")
+        # Check that at least 1 IP is banned
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        assert match and int(match.group(1)) >= 1, f"Expected at least 1 banned IP, got: {status}"
+
+    with subtest("Verify banned client cannot connect"):
+        # Use -4 to test with same IP that was banned
+        exit_code = client.execute("curl -4 -s --max-time 3 http://server:3000/ 2>&1")[0]
+        assert exit_code != 0, "Connection should be blocked"
+  '';
+}
--- a/tests/fail2ban-immich.nix
+++ b/tests/fail2ban-immich.nix
@@ -0,0 +1,133 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  baseServiceConfigs = import ../service-configs.nix;
+  testServiceConfigs = lib.recursiveUpdate baseServiceConfigs {
+    zpool_ssds = "";
+    https.domain = "test.local";
+    ports.private.immich = {
+      port = 2283;
+      proto = "tcp";
+    };
+    immich.dir = "/var/lib/immich";
+  };
+
+  testLib = lib.extend (
+    final: prev: {
+      serviceMountWithZpool =
+        serviceName: zpool: dirs:
+        { ... }:
+        { };
+      serviceFilePerms = serviceName: tmpfilesRules: { ... }: { };
+    }
+  );
+
+  immichModule =
+    { config, pkgs, ... }:
+    {
+      imports = [
+        (import ../services/immich.nix {
+          inherit config pkgs;
+          lib = testLib;
+          service_configs = testServiceConfigs;
+        })
+      ];
+    };
+in
+pkgs.testers.runNixOSTest {
+  name = "fail2ban-immich";
+
+  nodes = {
+    server =
+      {
+        config,
+        lib,
+        pkgs,
+        ...
+      }:
+      {
+        imports = [
+          ../modules/security.nix
+          immichModule
+        ];
+
+        # Immich needs postgres
+        services.postgresql.enable = true;
+
+        # Let immich create its own DB for testing
+        services.immich.database.createDB = lib.mkForce true;
+
+        # Disable ZFS mount dependencies
+        systemd.services."immich-server-mounts".enable = lib.mkForce false;
+        systemd.services."immich-machine-learning-mounts".enable = lib.mkForce false;
+        systemd.services.immich-server = {
+          wants = lib.mkForce [ ];
+          after = lib.mkForce [ "postgresql.service" ];
+          requires = lib.mkForce [ ];
+        };
+        systemd.services.immich-machine-learning = {
+          wants = lib.mkForce [ ];
+          after = lib.mkForce [ ];
+          requires = lib.mkForce [ ];
+        };
+
+        # Override for faster testing and correct port
+        services.fail2ban.jails.immich.settings = {
+          maxretry = lib.mkForce 3;
+          # In test, we connect directly to Immich port, not via Caddy
+          port = lib.mkForce "2283";
+        };
+
+        networking.firewall.allowedTCPPorts = [ 2283 ];
+
+        # Immich needs more resources
+        virtualisation.diskSize = 4 * 1024;
+        virtualisation.memorySize = 4 * 1024; # 4GB RAM for Immich
+      };
+
+    client = {
+      environment.systemPackages = [ pkgs.curl ];
+    };
+  };
+
+  testScript = ''
+    import time
+    import re
+
+    start_all()
+    server.wait_for_unit("postgresql.service")
+    server.wait_for_unit("immich-server.service", timeout=120)
+    server.wait_for_unit("fail2ban.service")
+    server.wait_for_open_port(2283, timeout=60)
+    time.sleep(3)
+
+    with subtest("Verify immich jail is active"):
+        status = server.succeed("fail2ban-client status")
+        assert "immich" in status, f"immich jail not found in: {status}"
+
+    with subtest("Generate failed login attempts"):
+        # Use -4 to force IPv4 for consistent IP tracking
+        for i in range(4):
+            client.execute(
+                "curl -4 -s -X POST http://server:2283/api/auth/login -H 'Content-Type: application/json' -d '{\"email\":\"bad@user.com\",\"password\":\"badpass\"}' || true"
+            )
+            time.sleep(0.5)
+
+    with subtest("Verify IP is banned"):
+        time.sleep(3)
+        status = server.succeed("fail2ban-client status immich")
+        print(f"immich jail status: {status}")
+        # Check that at least 1 IP is banned
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        assert match and int(match.group(1)) >= 1, f"Expected at least 1 banned IP, got: {status}"
+
+    with subtest("Verify banned client cannot connect"):
+        # Use -4 to test with same IP that was banned
+        exit_code = client.execute("curl -4 -s --max-time 3 http://server:2283/ 2>&1")[0]
+        assert exit_code != 0, "Connection should be blocked"
+  '';
+}
--- a/tests/fail2ban-jellyfin.nix
+++ b/tests/fail2ban-jellyfin.nix
@@ -0,0 +1,145 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  baseServiceConfigs = import ../service-configs.nix;
+  testServiceConfigs = lib.recursiveUpdate baseServiceConfigs {
+    zpool_ssds = "";
+    https.domain = "test.local";
+    jellyfin = {
+      dataDir = "/var/lib/jellyfin";
+      cacheDir = "/var/cache/jellyfin";
+    };
+  };
+
+  testLib = lib.extend (
+    final: prev: {
+      serviceMountWithZpool =
+        serviceName: zpool: dirs:
+        { ... }:
+        { };
+      serviceFilePerms = serviceName: tmpfilesRules: { ... }: { };
+      optimizePackage = pkg: pkg; # No-op for testing
+    }
+  );
+
+  jellyfinModule =
+    { config, pkgs, ... }:
+    {
+      imports = [
+        (import ../services/jellyfin/jellyfin.nix {
+          inherit config pkgs;
+          lib = testLib;
+          service_configs = testServiceConfigs;
+        })
+      ];
+    };
+in
+pkgs.testers.runNixOSTest {
+  name = "fail2ban-jellyfin";
+
+  nodes = {
+    server =
+      {
+        config,
+        lib,
+        pkgs,
+        ...
+      }:
+      {
+        imports = [
+          ../modules/security.nix
+          jellyfinModule
+        ];
+
+        # needed for testing
+        services.jellyfin.openFirewall = true;
+
+        # Create the media group
+        users.groups.media = { };
+
+        # Disable ZFS mount dependency
+        systemd.services."jellyfin-mounts".enable = lib.mkForce false;
+        systemd.services.jellyfin = {
+          wants = lib.mkForce [ ];
+          after = lib.mkForce [ ];
+          requires = lib.mkForce [ ];
+        };
+
+        # Override for faster testing and correct port
+        services.fail2ban.jails.jellyfin.settings = {
+          maxretry = lib.mkForce 3;
+          # In test, we connect directly to Jellyfin port, not via Caddy
+          port = lib.mkForce "8096";
+        };
+
+        # Create log directory and placeholder log file for fail2ban
+        # Jellyfin logs to files, not systemd journal
+        systemd.tmpfiles.rules = [
+          "d /var/lib/jellyfin/log 0755 jellyfin jellyfin"
+          "f /var/lib/jellyfin/log/log_placeholder.log 0644 jellyfin jellyfin"
+        ];
+
+        # Make fail2ban start after Jellyfin
+        systemd.services.fail2ban = {
+          wants = [ "jellyfin.service" ];
+          after = [ "jellyfin.service" ];
+        };
+
+        # Give jellyfin more disk space and memory
+        virtualisation.diskSize = 3 * 1024;
+        virtualisation.memorySize = 2 * 1024;
+      };
+
+    client = {
+      environment.systemPackages = [ pkgs.curl ];
+    };
+  };
+
+  testScript = ''
+    import time
+    import re
+
+    start_all()
+    server.wait_for_unit("jellyfin.service")
+    server.wait_for_unit("fail2ban.service")
+    server.wait_for_open_port(8096)
+    server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=120)
+    time.sleep(2)
+
+    # Wait for Jellyfin to create real log files and reload fail2ban
+    server.wait_until_succeeds("ls /var/lib/jellyfin/log/log_2*.log", timeout=30)
+    server.succeed("fail2ban-client reload jellyfin")
+
+    with subtest("Verify jellyfin jail is active"):
+        status = server.succeed("fail2ban-client status")
+        assert "jellyfin" in status, f"jellyfin jail not found in: {status}"
+
+    with subtest("Generate failed login attempts"):
+        # Use -4 to force IPv4 for consistent IP tracking
+        for i in range(4):
+            client.execute("""
+                curl -4 -s -X POST http://server:8096/Users/authenticatebyname \
+                  -H 'Content-Type: application/json' \
+                  -H 'X-Emby-Authorization: MediaBrowser Client="test", Device="test", DeviceId="test", Version="1.0"' \
+                  -d '{"Username":"baduser","Pw":"badpass"}' || true
+            """)
+            time.sleep(0.5)
+
+    with subtest("Verify IP is banned"):
+        time.sleep(3)
+        status = server.succeed("fail2ban-client status jellyfin")
+        print(f"jellyfin jail status: {status}")
+        # Check that at least 1 IP is banned
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        assert match and int(match.group(1)) >= 1, f"Expected at least 1 banned IP, got: {status}"
+
+    with subtest("Verify banned client cannot connect"):
+        # Use -4 to test with same IP that was banned
+        exit_code = client.execute("curl -4 -s --max-time 3 http://server:8096/ 2>&1")[0]
+        assert exit_code != 0, "Connection should be blocked"
+  '';
+}
--- a/tests/fail2ban-ssh.nix
+++ b/tests/fail2ban-ssh.nix
@@ -0,0 +1,99 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  securityModule = import ../modules/security.nix;
+
+  sshModule =
+    {
+      config,
+      lib,
+      pkgs,
+      ...
+    }:
+    {
+      imports = [
+        (import ../services/ssh.nix {
+          inherit config lib pkgs;
+          username = "testuser";
+        })
+      ];
+    };
+in
+pkgs.testers.runNixOSTest {
+  name = "fail2ban-ssh";
+
+  nodes = {
+    server =
+      {
+        config,
+        lib,
+        pkgs,
+        ...
+      }:
+      {
+        imports = [
+          securityModule
+          sshModule
+        ];
+
+        # Override for testing - enable password auth
+        services.openssh.settings.PasswordAuthentication = lib.mkForce true;
+
+        users.users.testuser = {
+          isNormalUser = true;
+          password = "correctpassword";
+        };
+
+        networking.firewall.allowedTCPPorts = [ 22 ];
+      };
+
+    client = {
+      environment.systemPackages = with pkgs; [
+        sshpass
+        openssh
+      ];
+    };
+  };
+
+  testScript = ''
+    import time
+
+    start_all()
+    server.wait_for_unit("sshd.service")
+    server.wait_for_unit("fail2ban.service")
+    server.wait_for_open_port(22)
+    time.sleep(2)
+
+    with subtest("Verify sshd jail is active"):
+        status = server.succeed("fail2ban-client status")
+        assert "sshd" in status, f"sshd jail not found in: {status}"
+
+    with subtest("Generate failed SSH login attempts"):
+        # Use -4 to force IPv4, timeout and NumberOfPasswordPrompts=1 to ensure quick failure
+        # maxRetry is 3 in our config, so 4 attempts should trigger a ban
+        for i in range(4):
+            client.execute(
+                "timeout 5 sshpass -p 'wrongpassword' ssh -4 -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -o ConnectTimeout=3 -o NumberOfPasswordPrompts=1 testuser@server echo test 2>/dev/null || true"
+            )
+            time.sleep(1)
+
+    with subtest("Verify IP is banned"):
+        # Wait for fail2ban to process the logs and apply the ban
+        time.sleep(5)
+        status = server.succeed("fail2ban-client status sshd")
+        print(f"sshd jail status: {status}")
+        # Check that at least 1 IP is banned
+        import re
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        assert match and int(match.group(1)) >= 1, f"Expected at least 1 banned IP, got: {status}"
+
+    with subtest("Verify banned client cannot connect"):
+        # Use -4 to test with same IP that was banned
+        exit_code = client.execute("timeout 3 nc -4 -z -w 2 server 22")[0]
+        assert exit_code != 0, "Connection should be blocked for banned IP"
+  '';
+}
--- a/tests/fail2ban-vaultwarden.nix
+++ b/tests/fail2ban-vaultwarden.nix
@@ -0,0 +1,130 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  baseServiceConfigs = import ../service-configs.nix;
+  testServiceConfigs = lib.recursiveUpdate baseServiceConfigs {
+    zpool_ssds = "";
+    https.domain = "test.local";
+  };
+
+  testLib = lib.extend (
+    final: prev: {
+      serviceMountWithZpool =
+        serviceName: zpool: dirs:
+        { ... }:
+        { };
+      serviceFilePerms = serviceName: tmpfilesRules: { ... }: { };
+    }
+  );
+
+  vaultwardenModule =
+    { config, pkgs, ... }:
+    {
+      imports = [
+        (import ../services/bitwarden.nix {
+          inherit config pkgs;
+          lib = testLib;
+          service_configs = testServiceConfigs;
+        })
+      ];
+    };
+in
+pkgs.testers.runNixOSTest {
+  name = "fail2ban-vaultwarden";
+
+  nodes = {
+    server =
+      {
+        config,
+        lib,
+        pkgs,
+        ...
+      }:
+      {
+        imports = [
+          ../modules/security.nix
+          vaultwardenModule
+        ];
+
+        # Disable ZFS mount dependencies
+        systemd.services."vaultwarden-mounts".enable = lib.mkForce false;
+        systemd.services."backup-vaultwarden-mounts".enable = lib.mkForce false;
+        systemd.services.vaultwarden = {
+          wants = lib.mkForce [ ];
+          after = lib.mkForce [ ];
+          requires = lib.mkForce [ ];
+        };
+        systemd.services.backup-vaultwarden = {
+          wants = lib.mkForce [ ];
+          after = lib.mkForce [ ];
+          requires = lib.mkForce [ ];
+        };
+
+        # Override Vaultwarden settings for testing
+        # - Listen on all interfaces (not just localhost)
+        # - Enable logging at info level to capture failed login attempts
+        services.vaultwarden.config = {
+          ROCKET_ADDRESS = lib.mkForce "0.0.0.0";
+          ROCKET_LOG = lib.mkForce "info";
+        };
+
+        # Override for faster testing and correct port
+        services.fail2ban.jails.vaultwarden.settings = {
+          maxretry = lib.mkForce 3;
+          # In test, we connect directly to Vaultwarden port, not via Caddy
+          port = lib.mkForce "8222";
+        };
+
+        networking.firewall.allowedTCPPorts = [ 8222 ];
+      };
+
+    client = {
+      environment.systemPackages = [ pkgs.curl ];
+    };
+  };
+
+  testScript = ''
+    import time
+    import re
+
+    start_all()
+    server.wait_for_unit("vaultwarden.service")
+    server.wait_for_unit("fail2ban.service")
+    server.wait_for_open_port(8222)
+    time.sleep(2)
+
+    with subtest("Verify vaultwarden jail is active"):
+        status = server.succeed("fail2ban-client status")
+        assert "vaultwarden" in status, f"vaultwarden jail not found in: {status}"
+
+    with subtest("Generate failed login attempts"):
+        # Use -4 to force IPv4 for consistent IP tracking
+        for i in range(4):
+            client.execute("""
+                curl -4 -s -X POST 'http://server:8222/identity/connect/token' \
+                  -H 'Content-Type: application/x-www-form-urlencoded' \
+                  -H 'Bitwarden-Client-Name: web' \
+                  -H 'Bitwarden-Client-Version: 2024.1.0' \
+                  -d 'grant_type=password&username=bad@user.com&password=badpass&scope=api+offline_access&client_id=web&deviceType=10&deviceIdentifier=test&deviceName=test' \
+                  || true
+            """)
+            time.sleep(0.5)
+
+    with subtest("Verify IP is banned"):
+        time.sleep(3)
+        status = server.succeed("fail2ban-client status vaultwarden")
+        print(f"vaultwarden jail status: {status}")
+        # Check that at least 1 IP is banned
+        match = re.search(r"Currently banned:\s*(\d+)", status)
+        assert match and int(match.group(1)) >= 1, f"Expected at least 1 banned IP, got: {status}"
+
+    with subtest("Verify banned client cannot connect"):
+        # Use -4 to test with same IP that was banned
+        exit_code = client.execute("curl -4 -s --max-time 3 http://server:8222/ 2>&1")[0]
+        assert exit_code != 0, "Connection should be blocked"
+  '';
+}
--- a/tests/file-perms.nix
+++ b/tests/file-perms.nix
@@ -0,0 +1,53 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  testPkgs = pkgs.appendOverlays [ (import ../modules/overlays.nix) ];
+in
+testPkgs.testers.runNixOSTest {
+  name = "file-perms test";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      imports = [
+        (lib.serviceFilePerms "test-service" [
+          "Z /tmp/test-perms-dir 0750 nobody nogroup"
+        ])
+      ];
+
+      systemd.services."test-service" = {
+        serviceConfig = {
+          Type = "oneshot";
+          RemainAfterExit = true;
+          ExecStart = lib.getExe pkgs.bash;
+        };
+      };
+    };
+
+  testScript = ''
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+
+    # Create test directory with wrong permissions
+    machine.succeed("mkdir -p /tmp/test-perms-dir")
+    machine.succeed("chown root:root /tmp/test-perms-dir")
+    machine.succeed("chmod 700 /tmp/test-perms-dir")
+
+    # Start service -- this should pull in test-service-file-perms
+    machine.succeed("systemctl start test-service")
+
+    # Verify file-perms service ran and is active
+    machine.succeed("systemctl is-active test-service-file-perms.service")
+
+    # Verify permissions were fixed by tmpfiles
+    result = machine.succeed("stat -c '%U:%G' /tmp/test-perms-dir").strip()
+    assert result == "nobody:nogroup", f"Expected nobody:nogroup, got {result}"
+
+    result = machine.succeed("stat -c '%a' /tmp/test-perms-dir").strip()
+    assert result == "750", f"Expected 750, got {result}"
+  '';
+}
--- a/tests/gitea-runner.nix
+++ b/tests/gitea-runner.nix
@@ -0,0 +1,60 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+pkgs.testers.runNixOSTest {
+  name = "gitea-runner";
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      services.gitea = {
+        enable = true;
+        database.type = "sqlite3";
+        settings = {
+          server = {
+            HTTP_PORT = 3000;
+            ROOT_URL = "http://localhost:3000";
+            DOMAIN = "localhost";
+          };
+          actions.ENABLED = true;
+          service.DISABLE_REGISTRATION = true;
+        };
+      };
+
+      specialisation.runner = {
+        inheritParentConfig = true;
+        configuration.services.gitea-actions-runner.instances.test = {
+          enable = true;
+          name = "ci";
+          url = "http://localhost:3000";
+          labels = [ "native:host" ];
+          tokenFile = "/var/lib/gitea/runner_token";
+        };
+      };
+    };
+
+  testScript = ''
+    start_all()
+
+    machine.wait_for_unit("gitea.service")
+    machine.wait_for_open_port(3000)
+
+    # Generate runner token
+    machine.succeed(
+        "su -l gitea -s /bin/sh -c '${pkgs.gitea}/bin/gitea actions generate-runner-token --work-path /var/lib/gitea' | tail -1 | sed 's/^/TOKEN=/' > /var/lib/gitea/runner_token"
+    )
+
+    # Switch to runner specialisation
+    machine.succeed(
+        "/run/current-system/specialisation/runner/bin/switch-to-configuration test"
+    )
+
+    # Start the runner (specialisation switch doesn't auto-start new services)
+    machine.succeed("systemctl start gitea-runner-test.service")
+    machine.wait_for_unit("gitea-runner-test.service")
+    machine.succeed("sleep 5")
+    machine.succeed("test -f /var/lib/gitea-runner/test/.runner")
+  '';
+}
--- a/tests/jellyfin-annotations.nix
+++ b/tests/jellyfin-annotations.nix
@@ -0,0 +1,190 @@
+{
+  lib,
+  pkgs,
+  ...
+}:
+let
+  jfLib = import ./jellyfin-test-lib.nix { inherit pkgs lib; };
+  mockGrafana = ./mock-grafana-server.py;
+  script = ../services/grafana/jellyfin-annotations.py;
+  python = pkgs.python3;
+in
+pkgs.testers.runNixOSTest {
+  name = "jellyfin-annotations";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      imports = [ jfLib.jellyfinTestConfig ];
+      environment.systemPackages = [ pkgs.python3 ];
+    };
+
+  testScript = ''
+    import json
+    import time
+
+    import importlib.util
+    _spec = importlib.util.spec_from_file_location("jf_helpers", "${jfLib.helpers}")
+    assert _spec and _spec.loader
+    _jf = importlib.util.module_from_spec(_spec)
+    _spec.loader.exec_module(_jf)
+    setup_jellyfin = _jf.setup_jellyfin
+    jellyfin_api = _jf.jellyfin_api
+
+    GRAFANA_PORT  = 13000
+    ANNOTS_FILE   = "/tmp/annotations.json"
+    STATE_FILE    = "/tmp/annotations-state.json"
+    CREDS_DIR     = "/tmp/test-creds"
+    PYTHON        = "${python}/bin/python3"
+    MOCK_GRAFANA  = "${mockGrafana}"
+    SCRIPT        = "${script}"
+
+    auth_header  = 'MediaBrowser Client="Infuse", DeviceId="test-dev-1", Device="iPhone", Version="1.0"'
+    auth_header2 = 'MediaBrowser Client="Jellyfin Web", DeviceId="test-dev-2", Device="Chrome", Version="1.0"'
+
+    def read_annotations():
+        out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'")
+        return json.loads(out.strip())
+
+    start_all()
+    token, user_id, movie_id, media_source_id = setup_jellyfin(
+        machine, retry, auth_header,
+        "${jfLib.payloads.auth}", "${jfLib.payloads.empty}",
+    )
+
+    with subtest("Setup mock Grafana and credentials"):
+        machine.succeed(f"mkdir -p {CREDS_DIR}")
+        machine.succeed(f"echo '{token}' > {CREDS_DIR}/jellyfin-api-key")
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+        machine.succeed(
+            f"systemd-run --unit=mock-grafana {PYTHON} {MOCK_GRAFANA} {GRAFANA_PORT} {ANNOTS_FILE}"
+        )
+        machine.wait_until_succeeds(
+            f"curl -sf -X POST http://127.0.0.1:{GRAFANA_PORT}/api/annotations "
+            f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id",
+            timeout=10,
+        )
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+
+    with subtest("Start annotation service"):
+        machine.succeed(
+            f"systemd-run --unit=annotations-svc "
+            f"--setenv=JELLYFIN_URL=http://127.0.0.1:8096 "
+            f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
+            f"--setenv=CREDENTIALS_DIRECTORY={CREDS_DIR} "
+            f"--setenv=STATE_FILE={STATE_FILE} "
+            f"--setenv=POLL_INTERVAL=3 "
+            f"{PYTHON} {SCRIPT}"
+        )
+        time.sleep(2)
+
+    with subtest("No annotations when no streams active"):
+        time.sleep(4)
+        annots = read_annotations()
+        assert annots == [], f"Expected no annotations, got: {annots}"
+
+    with subtest("Annotation created when playback starts"):
+        playback_start = json.dumps({
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-1",
+            "CanSeek": True,
+            "IsPaused": False,
+        })
+        machine.succeed(
+            f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing' "
+            f"-d '{playback_start}' -H 'Content-Type:application/json' "
+            f"-H 'X-Emby-Authorization:{auth_header}, Token={token}'"
+        )
+        machine.wait_until_succeeds(
+            f"cat {ANNOTS_FILE} | python3 -c \"import sys,json; a=json.load(sys.stdin); exit(0 if a else 1)\"",
+            timeout=15,
+        )
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
+        text = annots[0]["text"]
+        assert "jellyfin" in annots[0].get("tags", []), f"Missing jellyfin tag: {annots[0]}"
+        assert "Test Movie" in text, f"Missing title in: {text}"
+        assert "Infuse" in text, f"Missing client in: {text}"
+        assert "iPhone" in text, f"Missing device in: {text}"
+        assert "timeEnd" not in annots[0], f"timeEnd should not be set yet: {annots[0]}"
+
+    with subtest("Annotation closed when playback stops"):
+        playback_stop = json.dumps({
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-1",
+            "PositionTicks": 50000000,
+        })
+        machine.succeed(
+            f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing/Stopped' "
+            f"-d '{playback_stop}' -H 'Content-Type:application/json' "
+            f"-H 'X-Emby-Authorization:{auth_header}, Token={token}'"
+        )
+        machine.wait_until_succeeds(
+            f"cat {ANNOTS_FILE} | python3 -c \"import sys,json; a=json.load(sys.stdin); exit(0 if a and 'timeEnd' in a[0] else 1)\"",
+            timeout=15,
+        )
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
+        assert "timeEnd" in annots[0], f"timeEnd should be set: {annots[0]}"
+        assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time"
+
+    with subtest("Multiple concurrent streams each get their own annotation"):
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+
+        auth_result2 = json.loads(machine.succeed(
+            f"curl -sf -X POST 'http://localhost:8096/Users/AuthenticateByName' "
+            f"-d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' "
+            f"-H 'X-Emby-Authorization:{auth_header2}'"
+        ))
+        token2 = auth_result2["AccessToken"]
+
+        playback1 = json.dumps({
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-multi-1",
+            "CanSeek": True,
+            "IsPaused": False,
+        })
+        machine.succeed(
+            f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing' "
+            f"-d '{playback1}' -H 'Content-Type:application/json' "
+            f"-H 'X-Emby-Authorization:{auth_header}, Token={token}'"
+        )
+        playback2 = json.dumps({
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-multi-2",
+            "CanSeek": True,
+            "IsPaused": False,
+        })
+        machine.succeed(
+            f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing' "
+            f"-d '{playback2}' -H 'Content-Type:application/json' "
+            f"-H 'X-Emby-Authorization:{auth_header2}, Token={token2}'"
+        )
+        machine.wait_until_succeeds(
+            f"cat {ANNOTS_FILE} | python3 -c \"import sys,json; a=json.load(sys.stdin); exit(0 if len(a)==2 else 1)\"",
+            timeout=15,
+        )
+        annots = read_annotations()
+        assert len(annots) == 2, f"Expected 2 annotations, got: {annots}"
+
+    with subtest("State survives service restart (no duplicate annotations)"):
+        machine.succeed("systemctl stop annotations-svc || true")
+        time.sleep(1)
+        machine.succeed(
+            f"systemd-run --unit=annotations-svc-2 "
+            f"--setenv=JELLYFIN_URL=http://127.0.0.1:8096 "
+            f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
+            f"--setenv=CREDENTIALS_DIRECTORY={CREDS_DIR} "
+            f"--setenv=STATE_FILE={STATE_FILE} "
+            f"--setenv=POLL_INTERVAL=3 "
+            f"{PYTHON} {SCRIPT}"
+        )
+        time.sleep(6)
+        annots = read_annotations()
+        assert len(annots) == 2, f"Restart should not create duplicates, got: {annots}"
+  '';
+}
--- a/tests/jellyfin-qbittorrent-monitor.nix
+++ b/tests/jellyfin-qbittorrent-monitor.nix
@@ -0,0 +1,654 @@
+{
+  lib,
+  pkgs,
+  inputs,
+  ...
+}:
+let
+  jfLib = import ./jellyfin-test-lib.nix { inherit pkgs lib; };
+  webhookPlugin = import ../services/jellyfin/jellyfin-webhook-plugin.nix { inherit pkgs lib; };
+  configureWebhook = webhookPlugin.mkConfigureScript {
+    jellyfinUrl = "http://localhost:8096";
+    webhooks = [
+      {
+        name = "qBittorrent Monitor";
+        uri = "http://127.0.0.1:9898/";
+        notificationTypes = [
+          "PlaybackStart"
+          "PlaybackProgress"
+          "PlaybackStop"
+        ];
+      }
+    ];
+  };
+in
+pkgs.testers.runNixOSTest {
+  name = "jellyfin-qbittorrent-monitor";
+
+  nodes = {
+    server =
+      { ... }:
+      {
+        imports = [
+          jfLib.jellyfinTestConfig
+          inputs.vpn-confinement.nixosModules.default
+        ];
+
+        # Real qBittorrent service
+        services.qbittorrent = {
+          enable = true;
+          webuiPort = 8080;
+          openFirewall = true;
+
+          serverConfig.LegalNotice.Accepted = true;
+
+          serverConfig.Preferences = {
+            WebUI = {
+              # Disable authentication for testing
+              AuthSubnetWhitelist = "0.0.0.0/0,::/0";
+              AuthSubnetWhitelistEnabled = true;
+              LocalHostAuth = false;
+            };
+
+            Downloads = {
+              SavePath = "/var/lib/qbittorrent/downloads";
+              TempPath = "/var/lib/qbittorrent/incomplete";
+            };
+          };
+
+          serverConfig.BitTorrent.Session = {
+            # Normal speed - unlimited
+            GlobalUPSpeedLimit = 0;
+            GlobalDLSpeedLimit = 0;
+
+            # Alternate speed limits for when Jellyfin is streaming
+            AlternativeGlobalUPSpeedLimit = 100;
+            AlternativeGlobalDLSpeedLimit = 100;
+          };
+        };
+
+        networking.firewall.allowedTCPPorts = [
+          8096
+          8080
+        ];
+        networking.interfaces.eth1.ipv4.addresses = lib.mkForce [
+          {
+            address = "192.168.1.1";
+            prefixLength = 24;
+          }
+        ];
+        networking.interfaces.eth1.ipv4.routes = [
+          {
+            address = "203.0.113.0";
+            prefixLength = 24;
+          }
+        ];
+
+        # Create directories for qBittorrent.
+        systemd.tmpfiles.rules = [
+          "d /var/lib/qbittorrent/downloads 0755 qbittorrent qbittorrent"
+          "d /var/lib/qbittorrent/incomplete 0755 qbittorrent qbittorrent"
+        ];
+
+        # Install the Jellyfin Webhook plugin before Jellyfin starts, mirroring
+        # the production module. Jellyfin rewrites meta.json at runtime so a
+        # read-only nix-store symlink would fail — we materialise a writable copy.
+        systemd.services."jellyfin-webhook-install" = {
+          description = "Install Jellyfin Webhook plugin files";
+          before = [ "jellyfin.service" ];
+          wantedBy = [ "jellyfin.service" ];
+          serviceConfig = {
+            Type = "oneshot";
+            RemainAfterExit = true;
+            User = "jellyfin";
+            Group = "jellyfin";
+            UMask = "0077";
+            ExecStart = webhookPlugin.mkInstallScript {
+              pluginsDir = "/var/lib/jellyfin/plugins";
+            };
+          };
+        };
+      };
+
+    # Public test IP (RFC 5737 TEST-NET-3) so Jellyfin sees it as external
+    client = {
+      environment.systemPackages = [ pkgs.curl ];
+      networking.interfaces.eth1.ipv4.addresses = lib.mkForce [
+        {
+          address = "203.0.113.10";
+          prefixLength = 24;
+        }
+      ];
+      networking.interfaces.eth1.ipv4.routes = [
+        {
+          address = "192.168.1.0";
+          prefixLength = 24;
+        }
+      ];
+    };
+  };
+
+  testScript = ''
+    import json
+    import time
+
+    import importlib.util
+    _spec = importlib.util.spec_from_file_location("jf_helpers", "${jfLib.helpers}")
+    assert _spec and _spec.loader
+    _jf = importlib.util.module_from_spec(_spec)
+    _spec.loader.exec_module(_jf)
+    setup_jellyfin = _jf.setup_jellyfin
+    jellyfin_api = _jf.jellyfin_api
+
+    auth_header = 'MediaBrowser Client="NixOS Test", DeviceId="test-1337", Device="TestDevice", Version="1.0"'
+
+    def is_throttled():
+        return server.succeed("curl -s http://localhost:8080/api/v2/transfer/speedLimitsMode").strip() == "1"
+
+    def get_alt_dl_limit():
+        prefs = json.loads(server.succeed("curl -s http://localhost:8080/api/v2/app/preferences"))
+        return prefs["alt_dl_limit"]
+
+    def get_alt_up_limit():
+        prefs = json.loads(server.succeed("curl -s http://localhost:8080/api/v2/app/preferences"))
+        return prefs["alt_up_limit"]
+
+    def are_torrents_paused():
+        torrents = json.loads(server.succeed("curl -s 'http://localhost:8080/api/v2/torrents/info'"))
+        if not torrents:
+            return False
+        return all(t["state"].startswith("stopped") for t in torrents)
+
+    start_all()
+    server.wait_for_unit("qbittorrent.service")
+    server.wait_for_open_port(8080)
+    server.wait_until_succeeds("curl -sf http://localhost:8080/api/v2/app/version", timeout=30)
+
+    token, user_id, movie_id, media_source_id = setup_jellyfin(
+        server, retry, auth_header,
+        "${jfLib.payloads.auth}", "${jfLib.payloads.empty}",
+    )
+
+    with subtest("Start monitor service"):
+        python = "${pkgs.python3.withPackages (ps: [ ps.requests ])}/bin/python"
+        monitor = "${../services/jellyfin/jellyfin-qbittorrent-monitor.py}"
+        server.succeed(f"""
+          systemd-run --unit=monitor-test \
+            --setenv=JELLYFIN_URL=http://localhost:8096 \
+            --setenv=JELLYFIN_API_KEY={token} \
+            --setenv=QBITTORRENT_URL=http://localhost:8080 \
+            --setenv=CHECK_INTERVAL=1 \
+            --setenv=STREAMING_START_DELAY=1 \
+            --setenv=STREAMING_STOP_DELAY=1 \
+            --setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
+            --setenv=SERVICE_BUFFER=2000000 \
+            --setenv=DEFAULT_STREAM_BITRATE=10000000 \
+            --setenv=MIN_TORRENT_SPEED=100 \
+            {python} {monitor}
+        """)
+        time.sleep(2)
+        assert not is_throttled(), "Should start unthrottled"
+
+    client_auth = 'MediaBrowser Client="External Client", DeviceId="external-9999", Device="ExternalDevice", Version="1.0"'
+    client_auth2 = 'MediaBrowser Client="External Client 2", DeviceId="external-8888", Device="ExternalDevice2", Version="1.0"'
+    server_ip = "192.168.1.1"
+
+    with subtest("Client authenticates from external network"):
+        auth_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}'"
+        client_auth_result = json.loads(client.succeed(auth_cmd))
+        client_token = client_auth_result["AccessToken"]
+
+    with subtest("Second client authenticates from external network"):
+        auth_cmd2 = f"curl -sf -X POST 'http://{server_ip}:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth2}'"
+        client_auth_result2 = json.loads(client.succeed(auth_cmd2))
+        client_token2 = client_auth_result2["AccessToken"]
+
+    with subtest("External video playback triggers throttling"):
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-1",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        time.sleep(2)
+        assert is_throttled(), "Should throttle for external video playback"
+
+    with subtest("Pausing disables throttling"):
+        playback_progress = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-1",
+            "IsPaused": True,
+            "PositionTicks": 10000000,
+        }
+        progress_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Progress' -d '{json.dumps(playback_progress)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(progress_cmd)
+        time.sleep(2)
+
+        assert not is_throttled(), "Should unthrottle when paused"
+
+    with subtest("Resuming re-enables throttling"):
+        playback_progress["IsPaused"] = False
+        playback_progress["PositionTicks"] = 20000000
+        progress_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Progress' -d '{json.dumps(playback_progress)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(progress_cmd)
+        time.sleep(2)
+
+        assert is_throttled(), "Should re-throttle when resumed"
+
+    with subtest("Stopping playback disables throttling"):
+        playback_stop = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-1",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(stop_cmd)
+        time.sleep(2)
+
+        assert not is_throttled(), "Should unthrottle when playback stops"
+
+    with subtest("Single stream sets proportional alt speed limits"):
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-proportional",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        time.sleep(3)
+
+        assert is_throttled(), "Should be in alt speed mode during streaming"
+        dl_limit = get_alt_dl_limit()
+        ul_limit = get_alt_up_limit()
+        # Both upload and download should get remaining bandwidth (proportional)
+        assert dl_limit > 0, f"Download limit should be > 0, got {dl_limit}"
+        assert ul_limit == dl_limit, f"Upload limit ({ul_limit}) should equal download limit ({dl_limit})"
+
+        # Stop playback
+        playback_stop = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-proportional",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(stop_cmd)
+        time.sleep(3)
+
+    with subtest("Multiple streams reduce available bandwidth"):
+        # Start first stream
+        playback1 = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-multi-1",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd1 = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback1)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd1)
+        time.sleep(3)
+
+        single_dl_limit = get_alt_dl_limit()
+
+        # Start second stream with different client identity
+        playback2 = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-multi-2",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd2 = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback2)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth2}, Token={client_token2}'"
+        client.succeed(start_cmd2)
+        time.sleep(3)
+
+        dual_dl_limit = get_alt_dl_limit()
+        # Two streams should leave less bandwidth than one stream
+        assert dual_dl_limit < single_dl_limit, f"Two streams ({dual_dl_limit}) should have lower limit than one ({single_dl_limit})"
+
+        # Stop both streams
+        stop1 = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-multi-1",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd1 = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(stop1)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(stop_cmd1)
+
+        stop2 = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-multi-2",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd2 = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(stop2)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth2}, Token={client_token2}'"
+        client.succeed(stop_cmd2)
+        time.sleep(3)
+
+    with subtest("Budget exhaustion pauses all torrents"):
+        # Stop current monitor
+        server.succeed("systemctl stop monitor-test || true")
+        time.sleep(1)
+
+        # Add a dummy torrent so we can check pause state
+        server.succeed("curl -sf -X POST 'http://localhost:8080/api/v2/torrents/add' -d 'urls=magnet:?xt=urn:btih:0000000000000000000000000000000000000001%26dn=test-torrent'")
+        time.sleep(2)
+
+        # Start monitor with impossibly low budget
+        server.succeed(f"""
+          systemd-run --unit=monitor-exhaust \
+            --setenv=JELLYFIN_URL=http://localhost:8096 \
+            --setenv=JELLYFIN_API_KEY={token} \
+            --setenv=QBITTORRENT_URL=http://localhost:8080 \
+            --setenv=CHECK_INTERVAL=1 \
+            --setenv=STREAMING_START_DELAY=1 \
+            --setenv=STREAMING_STOP_DELAY=1 \
+            --setenv=TOTAL_BANDWIDTH_BUDGET=1000 \
+            --setenv=SERVICE_BUFFER=500 \
+            --setenv=DEFAULT_STREAM_BITRATE=10000000 \
+            --setenv=MIN_TORRENT_SPEED=100 \
+            {python} {monitor}
+        """)
+        time.sleep(2)
+
+        # Start a stream - this will exceed the tiny budget
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-exhaust",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        time.sleep(3)
+
+        assert are_torrents_paused(), "Torrents should be paused when budget is exhausted"
+
+    with subtest("Recovery from pause restores unlimited"):
+        # Stop the stream
+        playback_stop = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-exhaust",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(stop_cmd)
+        time.sleep(3)
+
+        assert not is_throttled(), "Should return to unlimited after streams stop"
+        assert not are_torrents_paused(), "Torrents should be resumed after streams stop"
+
+        # Clean up: stop exhaust monitor, restart normal monitor
+        server.succeed("systemctl stop monitor-exhaust || true")
+        time.sleep(1)
+        server.succeed(f"""
+          systemd-run --unit=monitor-test \
+            --setenv=JELLYFIN_URL=http://localhost:8096 \
+            --setenv=JELLYFIN_API_KEY={token} \
+            --setenv=QBITTORRENT_URL=http://localhost:8080 \
+            --setenv=CHECK_INTERVAL=1 \
+            --setenv=STREAMING_START_DELAY=1 \
+            --setenv=STREAMING_STOP_DELAY=1 \
+            --setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
+            --setenv=SERVICE_BUFFER=2000000 \
+            --setenv=DEFAULT_STREAM_BITRATE=10000000 \
+            --setenv=MIN_TORRENT_SPEED=100 \
+            {python} {monitor}
+        """)
+        time.sleep(2)
+
+    with subtest("Local playback does NOT trigger throttling"):
+        local_auth = 'MediaBrowser Client="Local Client", DeviceId="local-1111", Device="LocalDevice", Version="1.0"'
+        local_auth_result = json.loads(server.succeed(
+            f"curl -sf -X POST 'http://localhost:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{local_auth}'"
+        ))
+        local_token = local_auth_result["AccessToken"]
+
+        local_playback = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-local",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        server.succeed(f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing' -d '{json.dumps(local_playback)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{local_auth}, Token={local_token}'")
+        time.sleep(2)
+        assert not is_throttled(), "Should NOT throttle for local playback"
+
+        local_playback["PositionTicks"] = 50000000
+        server.succeed(f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing/Stopped' -d '{json.dumps(local_playback)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{local_auth}, Token={local_token}'")
+
+    # === WEBHOOK TESTS ===
+    #
+    # Configure the Jellyfin Webhook plugin to target the monitor, then verify
+    # the real Jellyfin → plugin → monitor path reacts faster than any possible
+    # poll. CHECK_INTERVAL=30 rules out polling as the cause.
+
+    WEBHOOK_PORT = 9898
+    WEBHOOK_CREDS = "/tmp/webhook-creds"
+
+    # Start a webhook-enabled monitor with long poll interval.
+    server.succeed("systemctl stop monitor-test || true")
+    time.sleep(1)
+    server.succeed(f"""
+      systemd-run --unit=monitor-webhook \
+        --setenv=JELLYFIN_URL=http://localhost:8096 \
+        --setenv=JELLYFIN_API_KEY={token} \
+        --setenv=QBITTORRENT_URL=http://localhost:8080 \
+        --setenv=CHECK_INTERVAL=30 \
+        --setenv=STREAMING_START_DELAY=1 \
+        --setenv=STREAMING_STOP_DELAY=1 \
+        --setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
+        --setenv=SERVICE_BUFFER=2000000 \
+        --setenv=DEFAULT_STREAM_BITRATE=10000000 \
+        --setenv=MIN_TORRENT_SPEED=100 \
+        --setenv=WEBHOOK_PORT={WEBHOOK_PORT} \
+        --setenv=WEBHOOK_BIND=127.0.0.1 \
+        {python} {monitor}
+    """)
+    server.wait_until_succeeds(f"ss -ltn | grep -q ':{WEBHOOK_PORT}'", timeout=15)
+    time.sleep(2)
+    assert not is_throttled(), "Should start unthrottled"
+
+    # Drop the admin token where the configure script expects it (production uses agenix).
+    server.succeed(f"mkdir -p {WEBHOOK_CREDS} && echo '{token}' > {WEBHOOK_CREDS}/jellyfin-api-key")
+    server.succeed(
+        f"systemd-run --wait --unit=webhook-configure-test "
+        f"--setenv=CREDENTIALS_DIRECTORY={WEBHOOK_CREDS} "
+        f"${configureWebhook}"
+    )
+
+    with subtest("Real PlaybackStart event throttles via the plugin"):
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-plugin-start",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        server.wait_until_succeeds(
+            "curl -sf http://localhost:8080/api/v2/transfer/speedLimitsMode | grep -q '^1$'",
+            timeout=5,
+        )
+        # Let STREAMING_STOP_DELAY (1s) elapse so the upcoming stop is not swallowed by hysteresis.
+        time.sleep(2)
+
+    with subtest("Real PlaybackStop event unthrottles via the plugin"):
+        playback_stop = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-plugin-start",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(stop_cmd)
+        server.wait_until_succeeds(
+            "curl -sf http://localhost:8080/api/v2/transfer/speedLimitsMode | grep -q '^0$'",
+            timeout=10,
+        )
+
+    # Restore fast-polling monitor for the service-restart tests below.
+    server.succeed("systemctl stop monitor-webhook || true")
+    time.sleep(1)
+    server.succeed(f"""
+      systemd-run --unit=monitor-test \
+        --setenv=JELLYFIN_URL=http://localhost:8096 \
+        --setenv=JELLYFIN_API_KEY={token} \
+        --setenv=QBITTORRENT_URL=http://localhost:8080 \
+        --setenv=CHECK_INTERVAL=1 \
+        --setenv=STREAMING_START_DELAY=1 \
+        --setenv=STREAMING_STOP_DELAY=1 \
+        --setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
+        --setenv=SERVICE_BUFFER=2000000 \
+        --setenv=DEFAULT_STREAM_BITRATE=10000000 \
+        --setenv=MIN_TORRENT_SPEED=100 \
+        {python} {monitor}
+    """)
+    time.sleep(2)
+
+
+    # === SERVICE RESTART TESTS ===
+
+    with subtest("qBittorrent restart during throttled state re-applies throttling"):
+        # Start external playback to trigger throttling
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-restart-1",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        time.sleep(2)
+        assert is_throttled(), "Should be throttled before qBittorrent restart"
+
+        # Restart qBittorrent (this resets alt_speed to its config default - disabled)
+        server.succeed("systemctl restart qbittorrent.service")
+        server.wait_for_unit("qbittorrent.service")
+        server.wait_for_open_port(8080)
+        server.wait_until_succeeds("curl -sf http://localhost:8080/api/v2/app/version", timeout=30)
+
+        # qBittorrent restarted - alt_speed is now False (default on startup)
+        # The monitor should detect this and re-apply throttling
+        time.sleep(3)  # Give monitor time to detect and re-apply
+        assert is_throttled(), "Monitor should re-apply throttling after qBittorrent restart"
+
+        # Stop playback to clean up
+        playback_stop = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-restart-1",
+            "PositionTicks": 50000000,
+        }
+        stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(stop_cmd)
+        time.sleep(2)
+
+    with subtest("qBittorrent restart during unthrottled state stays unthrottled"):
+        # Verify we're unthrottled (no active streams)
+        assert not is_throttled(), "Should be unthrottled before test"
+
+        # Restart qBittorrent
+        server.succeed("systemctl restart qbittorrent.service")
+        server.wait_for_unit("qbittorrent.service")
+        server.wait_for_open_port(8080)
+        server.wait_until_succeeds("curl -sf http://localhost:8080/api/v2/app/version", timeout=30)
+
+        # Give monitor time to check state
+        time.sleep(3)
+        assert not is_throttled(), "Should remain unthrottled after qBittorrent restart with no streams"
+
+    with subtest("Jellyfin restart during throttled state maintains throttling"):
+        # Start external playback to trigger throttling
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-restart-2",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        time.sleep(2)
+        assert is_throttled(), "Should be throttled before Jellyfin restart"
+
+        # Restart Jellyfin
+        server.succeed("systemctl restart jellyfin.service")
+        server.wait_for_unit("jellyfin.service")
+        server.wait_for_open_port(8096)
+        server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=60)
+
+        # During Jellyfin restart, monitor can't reach Jellyfin
+        # After restart, sessions are cleared - monitor should eventually unthrottle
+        # But during the unavailability window, throttling should be maintained (fail-safe)
+        time.sleep(3)
+
+        # Re-authenticate (old token invalid after restart)
+        client_auth_result = json.loads(client.succeed(
+            f"curl -sf -X POST 'http://{server_ip}:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}'"
+        ))
+        client_token = client_auth_result["AccessToken"]
+        client_auth_result2 = json.loads(client.succeed(
+            f"curl -sf -X POST 'http://{server_ip}:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth2}'"
+        ))
+        client_token2 = client_auth_result2["AccessToken"]
+
+        # No active streams after Jellyfin restart, should eventually unthrottle
+        time.sleep(3)
+        assert not is_throttled(), "Should unthrottle after Jellyfin restart clears sessions"
+
+    with subtest("Monitor recovers after Jellyfin temporary unavailability"):
+        # Re-authenticate with fresh token
+        client_auth_result = json.loads(client.succeed(
+            f"curl -sf -X POST 'http://{server_ip}:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}'"
+        ))
+        client_token = client_auth_result["AccessToken"]
+        client_auth_result2 = json.loads(client.succeed(
+            f"curl -sf -X POST 'http://{server_ip}:8096/Users/AuthenticateByName' -d '@${jfLib.payloads.auth}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth2}'"
+        ))
+        client_token2 = client_auth_result2["AccessToken"]
+
+        # Start playback
+        playback_start = {
+            "ItemId": movie_id,
+            "MediaSourceId": media_source_id,
+            "PlaySessionId": "test-play-session-restart-3",
+            "CanSeek": True,
+            "IsPaused": False,
+        }
+        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
+        client.succeed(start_cmd)
+        time.sleep(2)
+        assert is_throttled(), "Should be throttled"
+
+        # Stop Jellyfin briefly (simulating temporary unavailability)
+        server.succeed("systemctl stop jellyfin.service")
+        time.sleep(2)
+
+        # During unavailability, throttle state should be maintained (fail-safe)
+        assert is_throttled(), "Should maintain throttle during Jellyfin unavailability"
+
+        # Bring Jellyfin back
+        server.succeed("systemctl start jellyfin.service")
+        server.wait_for_unit("jellyfin.service")
+        server.wait_for_open_port(8096)
+        server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=60)
+
+        # After Jellyfin comes back, sessions are gone - should unthrottle
+        time.sleep(3)
+        assert not is_throttled(), "Should unthrottle after Jellyfin returns with no sessions"
+  '';
+}
--- a/tests/jellyfin-test-lib.nix
+++ b/tests/jellyfin-test-lib.nix
@@ -0,0 +1,20 @@
+{ pkgs, lib }:
+{
+  payloads = {
+    auth = pkgs.writeText "auth.json" (builtins.toJSON { Username = "jellyfin"; });
+    empty = pkgs.writeText "empty.json" (builtins.toJSON { });
+  };
+
+  helpers = ./jellyfin-test-lib.py;
+
+  jellyfinTestConfig =
+    { pkgs, ... }:
+    {
+      services.jellyfin.enable = true;
+      environment.systemPackages = with pkgs; [
+        curl
+        ffmpeg
+      ];
+      virtualisation.diskSize = lib.mkDefault (3 * 1024);
+    };
+}
--- a/tests/jellyfin-test-lib.py
+++ b/tests/jellyfin-test-lib.py
@@ -0,0 +1,90 @@
+import json
+from urllib.parse import urlencode
+
+
+def jellyfin_api(machine, method, path, auth_header, token=None, data_file=None, data=None):
+    hdr = auth_header + (f", Token={token}" if token else "")
+    cmd = f"curl -sf -X {method} 'http://localhost:8096{path}'"
+    if data_file:
+        cmd += f" -d '@{data_file}' -H 'Content-Type:application/json'"
+    elif data:
+        payload = json.dumps(data) if isinstance(data, dict) else data
+        cmd += f" -d '{payload}' -H 'Content-Type:application/json'"
+    cmd += f" -H 'X-Emby-Authorization:{hdr}'"
+    return machine.succeed(cmd)
+
+
+def setup_jellyfin(machine, retry, auth_header, auth_payload, empty_payload):
+    machine.wait_for_unit("jellyfin.service")
+    machine.wait_for_open_port(8096)
+    machine.wait_until_succeeds(
+        "curl -sf http://localhost:8096/health | grep -q Healthy", timeout=120
+    )
+
+    machine.wait_until_succeeds(
+        f"curl -sf 'http://localhost:8096/Startup/Configuration' "
+        f"-H 'X-Emby-Authorization:{auth_header}'"
+    )
+    jellyfin_api(machine, "GET", "/Startup/FirstUser", auth_header)
+    jellyfin_api(machine, "POST", "/Startup/Complete", auth_header)
+
+    result = json.loads(
+        jellyfin_api(
+            machine, "POST", "/Users/AuthenticateByName",
+            auth_header, data_file=auth_payload,
+        )
+    )
+    token = result["AccessToken"]
+    user_id = result["User"]["Id"]
+
+    tempdir = machine.succeed("mktemp -d -p /var/lib/jellyfin").strip()
+    machine.succeed(f"chmod 755 '{tempdir}'")
+    machine.succeed(
+        f"ffmpeg -f lavfi -i testsrc2=duration=5 -f lavfi -i sine=frequency=440:duration=5 "
+        f"-c:v libx264 -c:a aac '{tempdir}/Test Movie (2024).mkv'"
+    )
+
+    query = urlencode({
+        "name": "Test Library",
+        "collectionType": "Movies",
+        "paths": tempdir,
+        "refreshLibrary": "true",
+    })
+    jellyfin_api(
+        machine, "POST", f"/Library/VirtualFolders?{query}",
+        auth_header, token=token, data_file=empty_payload,
+    )
+
+    def is_ready(_):
+        folders = json.loads(
+            jellyfin_api(machine, "GET", "/Library/VirtualFolders", auth_header, token=token)
+        )
+        return all(f.get("RefreshStatus") == "Idle" for f in folders)
+    retry(is_ready, timeout=60)
+
+    movie_id = None
+    media_source_id = None
+
+    def get_movie(_):
+        nonlocal movie_id, media_source_id
+        items = json.loads(
+            jellyfin_api(
+                machine, "GET",
+                f"/Users/{user_id}/Items?IncludeItemTypes=Movie&Recursive=true",
+                auth_header, token=token,
+            )
+        )
+        if items["TotalRecordCount"] > 0:
+            movie_id = items["Items"][0]["Id"]
+            info = json.loads(
+                jellyfin_api(
+                    machine, "GET", f"/Users/{user_id}/Items/{movie_id}",
+                    auth_header, token=token,
+                )
+            )
+            media_source_id = info["MediaSources"][0]["Id"]
+            return True
+        return False
+    retry(get_movie, timeout=60)
+
+    return token, user_id, movie_id, media_source_id
--- a/tests/minecraft.nix
+++ b/tests/minecraft.nix
@@ -0,0 +1,97 @@
+{
+  config,
+  lib,
+  pkgs,
+  inputs,
+  ...
+}:
+let
+  baseServiceConfigs = import ../service-configs.nix;
+  testServiceConfigs = lib.recursiveUpdate baseServiceConfigs {
+    zpool_ssds = "";
+    https.domain = "test.local";
+    minecraft.parent_dir = "/var/lib/minecraft";
+    minecraft.memory = rec {
+      heap_size_m = 1000;
+    };
+  };
+
+  # Create pkgs with nix-minecraft overlay and unfree packages allowed
+  testPkgs = import inputs.nixpkgs {
+    system = pkgs.stdenv.targetPlatform.system;
+    config.allowUnfreePredicate = pkg: builtins.elem (lib.getName pkg) [ "minecraft-server" ];
+    overlays = [
+      inputs.nix-minecraft.overlay
+      (import ../modules/overlays.nix)
+    ];
+  };
+in
+testPkgs.testers.runNixOSTest {
+  name = "minecraft server startup test";
+
+  node.specialArgs = {
+    inherit inputs lib;
+    service_configs = testServiceConfigs;
+    username = "testuser";
+  };
+
+  nodes.machine =
+    { lib, ... }:
+    {
+      imports = [
+        ../services/minecraft.nix
+      ];
+
+      # Force to 0 because no huge pages in vms ?
+      boot.kernel.sysctl."vm.nr_hugepages" = lib.mkForce 0;
+
+      # Enable caddy service (required by minecraft service)
+      services.caddy.enable = true;
+
+      # Enable networking for the test (needed for minecraft mods to download mappings)
+      networking.dhcpcd.enable = true;
+
+      # Disable the ZFS mount dependency service in test environment
+      systemd.services."minecraft-server-main_mounts".enable = lib.mkForce false;
+
+      # Remove service dependencies that require ZFS
+      systemd.services.minecraft-server-main = {
+        wants = lib.mkForce [ ];
+        after = lib.mkForce [ ];
+        requires = lib.mkForce [ ];
+        serviceConfig = {
+          Nice = lib.mkForce 0;
+          LimitMEMLOCK = lib.mkForce "infinity";
+        };
+      };
+
+      # Test-specific overrides only - reduce memory for testing
+      services.minecraft-servers.servers.main.jvmOpts = lib.mkForce "-Xmx1G -Xms1G";
+
+      # Create test user
+      users.users.testuser = {
+        isNormalUser = true;
+        uid = 1000;
+        extraGroups = [ "minecraft" ];
+      };
+    };
+
+  testScript = ''
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+
+    # Wait for minecraft service to be available
+    machine.wait_for_unit("minecraft-server-main.service")
+
+    # Wait up to 60 seconds for the server to complete startup
+    with machine.nested("Waiting for minecraft server startup completion"):
+        try:
+            machine.wait_until_succeeds(
+                "grep -Eq '\\[[0-9]+:[0-9]+:[0-9]+\\] \\[Server thread/INFO\\]: Done \\([0-9]+\\.[0-9]+s\\)! For help, type \"help\"' /var/lib/minecraft/main/logs/latest.log",
+                timeout=120
+            )
+        except Exception:
+            print(machine.succeed("cat /var/lib/minecraft/main/logs/latest.log"))
+            raise
+  '';
+}
--- a/tests/mock-grafana-server.py
+++ b/tests/mock-grafana-server.py
@@ -0,0 +1,58 @@
+import http.server, json, sys
+
+PORT = int(sys.argv[1])
+DATA_FILE = sys.argv[2]
+
+class Handler(http.server.BaseHTTPRequestHandler):
+    def log_message(self, fmt, *args):
+        pass
+
+    def _read_body(self):
+        length = int(self.headers.get("Content-Length", 0))
+        return json.loads(self.rfile.read(length)) if length else {}
+
+    def _json(self, code, body):
+        data = json.dumps(body).encode()
+        self.send_response(code)
+        self.send_header("Content-Type", "application/json")
+        self.end_headers()
+        self.wfile.write(data)
+
+    def do_POST(self):
+        if self.path == "/api/annotations":
+            body = self._read_body()
+            try:
+                with open(DATA_FILE) as f:
+                    annotations = json.load(f)
+            except Exception:
+                annotations = []
+            aid = len(annotations) + 1
+            body["id"] = aid
+            annotations.append(body)
+            with open(DATA_FILE, "w") as f:
+                json.dump(annotations, f)
+            self._json(200, {"id": aid, "message": "Annotation added"})
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+    def do_PATCH(self):
+        if self.path.startswith("/api/annotations/"):
+            aid = int(self.path.rsplit("/", 1)[-1])
+            body = self._read_body()
+            try:
+                with open(DATA_FILE) as f:
+                    annotations = json.load(f)
+            except Exception:
+                annotations = []
+            for a in annotations:
+                if a["id"] == aid:
+                    a.update(body)
+            with open(DATA_FILE, "w") as f:
+                json.dump(annotations, f)
+            self._json(200, {"message": "Annotation patched"})
+        else:
+            self.send_response(404)
+            self.end_headers()
+
+http.server.HTTPServer(("127.0.0.1", PORT), Handler).serve_forever()
--- a/tests/ntfy-alerts.nix
+++ b/tests/ntfy-alerts.nix
@@ -0,0 +1,174 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  testPkgs = pkgs.appendOverlays [ (import ../modules/overlays.nix) ];
+in
+testPkgs.testers.runNixOSTest {
+  name = "ntfy-alerts";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      imports = [
+        ../modules/ntfy-alerts.nix
+      ];
+
+      system.stateVersion = config.system.stateVersion;
+
+      virtualisation.memorySize = 2048;
+
+      environment.systemPackages = with pkgs; [
+        curl
+        jq
+      ];
+
+      # Create test topic file
+      systemd.tmpfiles.rules = [
+        "f /run/ntfy-test-topic 0644 root root - test-alerts"
+      ];
+
+      # Mock ntfy server that records POST requests
+      systemd.services.mock-ntfy =
+        let
+          mockNtfyScript = pkgs.writeScript "mock-ntfy.py" ''
+            import json
+            import os
+            from http.server import HTTPServer, BaseHTTPRequestHandler
+            from datetime import datetime
+
+            REQUESTS_FILE = "/tmp/ntfy-requests.json"
+
+            class MockNtfy(BaseHTTPRequestHandler):
+                def _respond(self, code=200, body=b"Ok"):
+                    self.send_response(code)
+                    self.send_header("Content-Type", "application/json")
+                    self.end_headers()
+                    self.wfile.write(body if isinstance(body, bytes) else body.encode())
+
+                def do_GET(self):
+                    self._respond()
+
+                def do_POST(self):
+                    content_length = int(self.headers.get("Content-Length", 0))
+                    body = self.rfile.read(content_length).decode() if content_length > 0 else ""
+
+                    request_data = {
+                        "timestamp": datetime.now().isoformat(),
+                        "path": self.path,
+                        "headers": dict(self.headers),
+                        "body": body,
+                    }
+
+                    # Load existing requests or start new list
+                    requests = []
+                    if os.path.exists(REQUESTS_FILE):
+                        try:
+                            with open(REQUESTS_FILE, "r") as f:
+                                requests = json.load(f)
+                        except:
+                            requests = []
+
+                    requests.append(request_data)
+
+                    with open(REQUESTS_FILE, "w") as f:
+                        json.dump(requests, f, indent=2)
+
+                    self._respond()
+
+                def log_message(self, format, *args):
+                    pass
+
+            HTTPServer(("0.0.0.0", 8080), MockNtfy).serve_forever()
+          '';
+        in
+        {
+          description = "Mock ntfy server";
+          wantedBy = [ "multi-user.target" ];
+          before = [ "ntfy-alert@test-fail.service" ];
+          serviceConfig = {
+            ExecStart = "${pkgs.python3}/bin/python3 ${mockNtfyScript}";
+            Type = "simple";
+          };
+        };
+
+      # Test service that will fail
+      systemd.services.test-fail = {
+        description = "Test service that fails";
+        serviceConfig = {
+          Type = "oneshot";
+          ExecStart = "${pkgs.coreutils}/bin/false";
+        };
+      };
+
+      # Configure ntfy-alerts to use mock server
+      services.ntfyAlerts = {
+        enable = true;
+        serverUrl = "http://localhost:8080";
+        topicFile = "/run/ntfy-test-topic";
+
+      };
+    };
+
+  testScript = ''
+    import json
+    import time
+
+    start_all()
+
+    # Wait for mock ntfy server to be ready
+    machine.wait_for_unit("mock-ntfy.service")
+    machine.wait_until_succeeds("curl -sf http://localhost:8080/", timeout=30)
+
+    # Verify the ntfy-alert@ template service exists
+    machine.succeed("systemctl list-unit-files | grep ntfy-alert@")
+
+    # Verify the global OnFailure drop-in is configured
+    machine.succeed("cat /etc/systemd/system/service.d/onfailure.conf | grep -q 'OnFailure=ntfy-alert@%p.service'")
+
+    # Trigger the test-fail service
+    machine.succeed("systemctl start test-fail.service || true")
+
+    # Wait a moment for the failure notification to be sent
+    time.sleep(2)
+
+    # Verify the ntfy-alert@test-fail service ran
+    machine.succeed("systemctl is-active ntfy-alert@test-fail.service || systemctl is-failed ntfy-alert@test-fail.service || true")
+
+    # Check that the mock server received a POST request
+    machine.wait_until_succeeds("test -f /tmp/ntfy-requests.json", timeout=30)
+
+    # Verify the request content
+    result = machine.succeed("cat /tmp/ntfy-requests.json")
+    requests = json.loads(result)
+
+    assert len(requests) >= 1, f"Expected at least 1 request, got {len(requests)}"
+
+    # Check the first request
+    req = requests[0]
+    assert "/test-alerts" in req["path"], f"Expected path to contain /test-alerts, got {req['path']}"
+    assert "Title" in req["headers"], "Expected Title header"
+    assert "test-fail" in req["headers"]["Title"], f"Expected Title to contain 'test-fail', got {req['headers']['Title']}"
+    assert req["headers"]["Priority"] == "high", f"Expected Priority 'high', got {req['headers'].get('Priority')}"
+    assert req["headers"]["Tags"] == "warning", f"Expected Tags 'warning', got {req['headers'].get('Tags')}"
+
+    print(f"Received notification: Title={req['headers']['Title']}, Body={req['body'][:100]}...")
+
+    # Idempotency test: trigger failure again
+    machine.succeed("rm /tmp/ntfy-requests.json")
+    machine.succeed("systemctl reset-failed test-fail.service || true")
+    machine.succeed("systemctl start test-fail.service || true")
+    time.sleep(2)
+
+    # Verify another notification was sent
+    machine.wait_until_succeeds("test -f /tmp/ntfy-requests.json", timeout=30)
+    result = machine.succeed("cat /tmp/ntfy-requests.json")
+    requests = json.loads(result)
+    assert len(requests) >= 1, f"Expected at least 1 request after second failure, got {len(requests)}"
+
+    print("All tests passed!")
+  '';
+}
--- a/tests/testTest.nix
+++ b/tests/testTest.nix
@@ -0,0 +1,20 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+pkgs.testers.runNixOSTest {
+  name = "test of tests";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+    };
+
+  testScript = ''
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+    machine.succeed("echo hello!")
+  '';
+}
--- a/tests/tests.nix
+++ b/tests/tests.nix
@@ -0,0 +1,41 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}@args:
+let
+  handleTest = file: import file (args);
+in
+{
+  zfsTest = handleTest ./zfs.nix;
+  testTest = handleTest ./testTest.nix;
+  minecraftTest = handleTest ./minecraft.nix;
+  jellyfinQbittorrentMonitorTest = handleTest ./jellyfin-qbittorrent-monitor.nix;
+  filePermsTest = handleTest ./file-perms.nix;
+
+  # fail2ban tests
+  fail2banSshTest = handleTest ./fail2ban-ssh.nix;
+  fail2banCaddyTest = handleTest ./fail2ban-caddy.nix;
+  fail2banGiteaTest = handleTest ./fail2ban-gitea.nix;
+  fail2banVaultwardenTest = handleTest ./fail2ban-vaultwarden.nix;
+  fail2banImmichTest = handleTest ./fail2ban-immich.nix;
+  fail2banJellyfinTest = handleTest ./fail2ban-jellyfin.nix;
+
+  # jellyfin annotation service test
+  jellyfinAnnotationsTest = handleTest ./jellyfin-annotations.nix;
+
+  # zfs scrub annotations test
+  zfsScrubAnnotationsTest = handleTest ./zfs-scrub-annotations.nix;
+
+  # xmrig auto-pause test
+  xmrigAutoPauseTest = handleTest ./xmrig-auto-pause.nix;
+  # ntfy alerts test
+  ntfyAlertsTest = handleTest ./ntfy-alerts.nix;
+
+  # torrent audit test
+  torrentAuditTest = handleTest ./torrent-audit.nix;
+
+  # gitea runner test
+  giteaRunnerTest = handleTest ./gitea-runner.nix;
+}
--- a/tests/torrent-audit.nix
+++ b/tests/torrent-audit.nix
@@ -0,0 +1,422 @@
+{
+  config,
+  lib,
+  pkgs,
+  ...
+}:
+let
+  qbitPort = 18080;
+  radarrPort = 17878;
+  sonarrPort = 18989;
+
+  radarrConfig = pkgs.writeText "radarr-config.xml" ''
+    <Config><ApiKey>test-radarr-key</ApiKey></Config>
+  '';
+
+  sonarrConfig = pkgs.writeText "sonarr-config.xml" ''
+    <Config><ApiKey>test-sonarr-key</ApiKey></Config>
+  '';
+
+  python = "${
+    pkgs.python3.withPackages (ps: [
+      ps.pyarr
+      ps.qbittorrent-api
+    ])
+  }/bin/python3";
+  auditScript = ../services/arr/torrent-audit.py;
+
+  # Single mock API server script -- accepts SERVICE and PORT as CLI args.
+  # Routes responses based on SERVICE type (qbit / radarr / sonarr).
+  mockScript = pkgs.writeText "mock-api-server.py" ''
+    import json
+    import sys
+    from http.server import HTTPServer, BaseHTTPRequestHandler
+    from urllib.parse import urlparse, parse_qs
+
+    SERVICE = sys.argv[1]
+    PORT = int(sys.argv[2])
+
+    # ── Hash constants (uppercase, 40 hex chars) ──────────────────────────
+    # Movies
+    UNMANAGED_MOV   = "A" * 38 + "01"
+    MANAGED_MOV     = "A" * 38 + "02"
+    OLD_MOV         = "A" * 38 + "03"  # movieId=2, older import → abandoned SAFE
+    NEW_MOV         = "A" * 38 + "04"  # movieId=2, newer import → keeper
+    KEEPER_CROSS    = "A" * 38 + "05"  # keeper for movieId=3, old for movieId=4
+    KEEPER3_OLD     = "A" * 38 + "0B"  # movieId=3, older import (not in qBit)
+    KEEPER4_NEW     = "A" * 38 + "06"  # movieId=4, newer import → keeper
+    REMOVED_OLD     = "A" * 38 + "07"  # movieId=5, older import (movie removed)
+    REMOVED_NEW     = "A" * 38 + "08"  # movieId=5, newer import → keeper (not in qBit)
+    LARGER_OLD      = "A" * 38 + "09"  # movieId=6, older import (larger than current)
+    LARGER_NEW      = "A" * 38 + "0A"  # movieId=6, newer import → keeper
+    SINGLE_CROSS    = "A" * 38 + "0C"  # movieId=7 single import AND older import for movieId=8
+    SINGLE8_NEW     = "A" * 38 + "0D"  # movieId=8, newer import → keeper (not in qBit)
+    QUEUED_MOV      = "A" * 38 + "0E"  # in Radarr queue, not in history
+
+    # TV
+    UNMANAGED_TV    = "B" * 38 + "01"
+    MANAGED_TV      = "B" * 38 + "02"  # episodeId=100, single import
+    OLD_TV          = "B" * 38 + "03"  # episodeId=200, older import → abandoned SAFE
+    NEW_TV          = "B" * 38 + "04"  # episodeId=200, newer import → active
+    SEASON_PACK     = "B" * 38 + "05"  # episodeIds 300,301,302 (still active for 301,302)
+    REPACK          = "B" * 38 + "06"  # episodeId=300, newer import → active
+    REMOVED_TV      = "B" * 38 + "07"  # episodeId=400, older import (series removed)
+    REMOVED_TV_NEW  = "B" * 38 + "08"  # episodeId=400, newer import (not in qBit)
+
+    def make_torrent(h, name, size, added_on, state="uploading"):
+        return {
+            "hash": h.lower(),
+            "name": name,
+            "size": size,
+            "state": state,
+            "added_on": added_on,
+            "content_path": f"/downloads/{name}",
+        }
+
+    QBIT_DATA = {
+        "movies": [
+            make_torrent(UNMANAGED_MOV,  "Unmanaged.Movie.2024",      5_000_000_000, 1704067200),
+            make_torrent(MANAGED_MOV,    "Managed.Movie.2024",         4_000_000_000, 1704067201),
+            make_torrent(OLD_MOV,        "Old.Movie.Quality.2024",     3_000_000_000, 1704067202),
+            make_torrent(NEW_MOV,        "New.Movie.Quality.2024",     6_000_000_000, 1704067203),
+            make_torrent(KEEPER_CROSS,   "CrossRef.Movie.2024",        4_500_000_000, 1704067204),
+            make_torrent(REMOVED_OLD,    "Removed.Movie.2024",         3_500_000_000, 1704067205),
+            make_torrent(LARGER_OLD,     "Larger.Movie.2024",         10_737_418_240, 1704067206),
+            make_torrent(SINGLE_CROSS,   "SingleCross.Movie.2024",    4_000_000_000, 1704067207),
+            make_torrent(QUEUED_MOV,     "Queued.Movie.2024",         2_000_000_000, 1704067208),
+        ],
+        "tvshows": [
+            make_torrent(UNMANAGED_TV,   "Unmanaged.Show.S01E01",     1_000_000_000, 1704067200),
+            make_torrent(MANAGED_TV,     "Managed.Show.S01E01",         800_000_000, 1704067201),
+            make_torrent(OLD_TV,         "Old.Show.S01E01",              700_000_000, 1704067202),
+            make_torrent(NEW_TV,         "New.Show.S01E01",            1_200_000_000, 1704067203),
+            make_torrent(SEASON_PACK,    "Season.Pack.S02",            5_000_000_000, 1704067204),
+            make_torrent(REMOVED_TV,    "Removed.Show.S01E01",         900_000_000, 1704067205),
+        ],
+    }
+
+    # ── Radarr mock data ──────────────────────────────────────────────────
+    RADARR_HISTORY = [
+        {"movieId": 1, "downloadId": MANAGED_MOV,  "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"movieId": 2, "downloadId": OLD_MOV,       "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"movieId": 2, "downloadId": NEW_MOV,       "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+        {"movieId": 3, "downloadId": KEEPER3_OLD,   "eventType": "downloadFolderImported", "date": "2023-01-01T00:00:00Z"},
+        {"movieId": 3, "downloadId": KEEPER_CROSS,  "eventType": "downloadFolderImported", "date": "2024-03-01T00:00:00Z"},
+        {"movieId": 4, "downloadId": KEEPER_CROSS,  "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"movieId": 4, "downloadId": KEEPER4_NEW,   "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+        {"movieId": 5, "downloadId": REMOVED_OLD,   "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"movieId": 5, "downloadId": REMOVED_NEW,   "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+        {"movieId": 6, "downloadId": LARGER_OLD,    "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"movieId": 6, "downloadId": LARGER_NEW,    "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+        # Non-import event (should be ignored by abandoned detection)
+        {"movieId": 2, "downloadId": NEW_MOV,       "eventType": "grabbed",                "date": "2024-05-31T00:00:00Z"},
+        # Single-import keeper test (Fix 13): SINGLE_CROSS is only import for movieId=7
+        # AND an older import for movieId=8 (SINGLE8_NEW is newer for movieId=8)
+        {"movieId": 7, "downloadId": SINGLE_CROSS,  "eventType": "downloadFolderImported", "date": "2024-03-01T00:00:00Z"},
+        {"movieId": 8, "downloadId": SINGLE_CROSS,  "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"movieId": 8, "downloadId": SINGLE8_NEW,   "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+    ]
+
+    RADARR_MOVIES = [
+        {"id": 1, "hasFile": True, "movieFile": {"size": 4_000_000_000, "quality": {"quality": {"name": "Bluray-1080p"}}}},
+        {"id": 2, "hasFile": True, "movieFile": {"size": 6_000_000_000, "quality": {"quality": {"name": "Remux-1080p"}}}},
+        {"id": 3, "hasFile": True, "movieFile": {"size": 4_500_000_000, "quality": {"quality": {"name": "Bluray-1080p"}}}},
+        {"id": 4, "hasFile": True, "movieFile": {"size": 5_000_000_000, "quality": {"quality": {"name": "Remux-1080p"}}}},
+        # id=5 intentionally MISSING -- movie removed from Radarr
+        {"id": 6, "hasFile": True, "movieFile": {"size": 5_368_709_120, "quality": {"quality": {"name": "Bluray-720p"}}}},
+        {"id": 7, "hasFile": True, "movieFile": {"size": 4_000_000_000, "quality": {"quality": {"name": "Bluray-1080p"}}}},
+        {"id": 8, "hasFile": True, "movieFile": {"size": 5_000_000_000, "quality": {"quality": {"name": "Remux-1080p"}}}},
+    ]
+
+    # ── Sonarr mock data ──────────────────────────────────────────────────
+    # Page 1 records (returned on page=1, with totalRecords=1001 to force pagination)
+    SONARR_HISTORY_PAGE1 = [
+        {"episodeId": 100, "seriesId": 1, "downloadId": MANAGED_TV,   "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"episodeId": 200, "seriesId": 1, "downloadId": OLD_TV,        "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"episodeId": 200, "seriesId": 1, "downloadId": NEW_TV,        "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+        # Season pack covers 3 episodes
+        {"episodeId": 300, "seriesId": 2, "downloadId": SEASON_PACK,   "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"episodeId": 301, "seriesId": 2, "downloadId": SEASON_PACK,   "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"episodeId": 302, "seriesId": 2, "downloadId": SEASON_PACK,   "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        # Non-import event (should be ignored)
+        {"episodeId": 200, "seriesId": 1, "downloadId": NEW_TV,        "eventType": "grabbed",                "date": "2024-05-31T00:00:00Z"},
+    ]
+    # Page 2 records (critical data only available via pagination)
+    SONARR_HISTORY_PAGE2 = [
+        # Episode 300 re-imported from a repack -- but 301,302 still reference SEASON_PACK
+        {"episodeId": 300, "seriesId": 2, "downloadId": REPACK,        "eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+        # Removed series scenario
+        {"episodeId": 400, "seriesId": 99, "downloadId": REMOVED_TV,    "eventType": "downloadFolderImported", "date": "2024-01-01T00:00:00Z"},
+        {"episodeId": 400, "seriesId": 99, "downloadId": REMOVED_TV_NEW,"eventType": "downloadFolderImported", "date": "2024-06-01T00:00:00Z"},
+    ]
+    SONARR_HISTORY_ALL = SONARR_HISTORY_PAGE1 + SONARR_HISTORY_PAGE2
+
+    # seriesId=99 intentionally MISSING -- series removed from Sonarr
+    SONARR_SERIES = [
+        {"id": 1, "title": "Managed Show"},
+        {"id": 2, "title": "Season Pack Show"},
+    ]
+
+    class Handler(BaseHTTPRequestHandler):
+        def do_POST(self):
+            if self.path.startswith("/api/v2/auth/login"):
+                self.send_response(200)
+                self.send_header("Content-Type", "text/plain")
+                self.send_header("Set-Cookie", "SID=test; path=/")
+                self.end_headers()
+                self.wfile.write(b"Ok.")
+            else:
+                self._handle_json()
+
+        def do_GET(self):
+            self._handle_json()
+
+        def _handle_json(self):
+            parsed = urlparse(self.path)
+            path = parsed.path
+            params = parse_qs(parsed.query)
+
+            content_length = int(self.headers.get("Content-Length", 0))
+            if content_length:
+                body = self.rfile.read(content_length).decode()
+                params.update(parse_qs(body))
+
+            response = self._route(path, params)
+
+            self.send_response(200)
+            self.send_header("Content-Type", "application/json")
+            self.end_headers()
+            self.wfile.write(json.dumps(response).encode())
+
+        def _route(self, path, params):
+            if SERVICE == "qbit":
+                category = params.get("category", [""])[0]
+                return QBIT_DATA.get(category, [])
+
+            elif SERVICE == "radarr":
+                if path == "/api/v3/history":
+                    return {"records": RADARR_HISTORY, "totalRecords": len(RADARR_HISTORY)}
+                elif path == "/api/v3/queue":
+                    return {"records": [{"downloadId": QUEUED_MOV}], "totalRecords": 1}
+                elif path == "/api/v3/movie":
+                    return RADARR_MOVIES
+                return {}
+
+            elif SERVICE == "sonarr":
+                if path == "/api/v3/history":
+                    page = int(params.get("page", ["1"])[0])
+                    if page == 1:
+                        return {"records": SONARR_HISTORY_PAGE1, "totalRecords": 1001}
+                    else:
+                        return {"records": SONARR_HISTORY_PAGE2, "totalRecords": 1001}
+                elif path == "/api/v3/queue":
+                    return {"records": [], "totalRecords": 0}
+                elif path == "/api/v3/series":
+                    return SONARR_SERIES
+                return {}
+
+            return {}
+
+        def log_message(self, fmt, *args):
+            pass
+
+    HTTPServer(("0.0.0.0", PORT), Handler).serve_forever()
+  '';
+in
+pkgs.testers.runNixOSTest {
+  name = "torrent-audit";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      environment.systemPackages = [ pkgs.curl ];
+
+      systemd.services.mock-qbittorrent = {
+        description = "Mock qBittorrent API";
+        wantedBy = [ "multi-user.target" ];
+        serviceConfig = {
+          ExecStart = "${pkgs.python3}/bin/python3 ${mockScript} qbit ${toString qbitPort}";
+          Type = "simple";
+        };
+      };
+
+      systemd.services.mock-radarr = {
+        description = "Mock Radarr API";
+        wantedBy = [ "multi-user.target" ];
+        serviceConfig = {
+          ExecStart = "${pkgs.python3}/bin/python3 ${mockScript} radarr ${toString radarrPort}";
+          Type = "simple";
+        };
+      };
+
+      systemd.services.mock-sonarr = {
+        description = "Mock Sonarr API";
+        wantedBy = [ "multi-user.target" ];
+        serviceConfig = {
+          ExecStart = "${pkgs.python3}/bin/python3 ${mockScript} sonarr ${toString sonarrPort}";
+          Type = "simple";
+        };
+      };
+    };
+
+  testScript = ''
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+
+    # Wait for all mock services to be responsive
+    machine.wait_for_unit("mock-qbittorrent.service")
+    machine.wait_for_unit("mock-radarr.service")
+    machine.wait_for_unit("mock-sonarr.service")
+    machine.wait_until_succeeds(
+        "curl -sf http://localhost:${toString qbitPort}/api/v2/torrents/info?category=movies",
+        timeout=30,
+    )
+    machine.wait_until_succeeds(
+        "curl -sf http://localhost:${toString radarrPort}/api/v3/movie",
+        timeout=30,
+    )
+    machine.wait_until_succeeds(
+        "curl -sf http://localhost:${toString sonarrPort}/api/v3/queue",
+        timeout=30,
+    )
+
+    # Run the audit script and capture stdout
+    output = machine.succeed(
+        "QBITTORRENT_URL=http://localhost:${toString qbitPort} "
+        "RADARR_URL=http://localhost:${toString radarrPort} "
+        "RADARR_CONFIG=${radarrConfig} "
+        "SONARR_URL=http://localhost:${toString sonarrPort} "
+        "SONARR_CONFIG=${sonarrConfig} "
+        "CATEGORIES=movies,tvshows,anime "
+        "${python} ${auditScript}"
+    )
+
+    print("=== SCRIPT OUTPUT ===")
+    print(output)
+    print("=== END OUTPUT ===")
+
+    # Fix 10: Assert section heading exists before splitting
+    assert "ABANDONED UPGRADE LEFTOVERS" in output, \
+        "Output must contain ABANDONED UPGRADE LEFTOVERS heading"
+
+    # Split output into sections for targeted assertions
+    unmanaged_section = output.split("ABANDONED UPGRADE LEFTOVERS")[0]
+    abandoned_section = output.split("ABANDONED UPGRADE LEFTOVERS")[1]
+
+    # Helper: find a torrent name line and check nearby lines (within 3) for a note
+    def assert_note_near(section, torrent_name, note_text):
+        lines = section.splitlines()
+        found_idx = None
+        for i, line in enumerate(lines):
+            if torrent_name in line:
+                found_idx = i
+                break
+        assert found_idx is not None, f"{torrent_name} not found in section"
+        nearby = "\n".join(lines[max(0, found_idx):found_idx + 4])
+        assert note_text in nearby, \
+            f"Expected '{note_text}' near '{torrent_name}', got:\n{nearby}"
+
+    with subtest("Detects unmanaged movie torrent"):
+        assert "Unmanaged.Movie.2024" in unmanaged_section, \
+            "Should detect unmanaged movie"
+        assert "1 unmanaged / 9 total" in unmanaged_section, \
+            "Should show 1 unmanaged movie out of 9"
+
+    with subtest("Detects unmanaged TV torrent"):
+        assert "Unmanaged.Show.S01E01" in unmanaged_section, \
+            "Should detect unmanaged TV show"
+        assert "1 unmanaged / 6 total" in unmanaged_section, \
+            "Should show 1 unmanaged TV show out of 6"
+
+    with subtest("Empty category shows zero counts"):
+        assert "0 unmanaged / 0 total" in unmanaged_section, \
+            "anime category should show 0 unmanaged / 0 total"
+
+    with subtest("Managed torrents are NOT listed as unmanaged"):
+        assert "Managed.Movie.2024" not in unmanaged_section, \
+            "Managed movie should not appear in unmanaged section"
+        assert "Managed.Show.S01E01" not in unmanaged_section, \
+            "Managed TV show should not appear in unmanaged section"
+
+    with subtest("Queue-known hash is NOT listed as unmanaged"):
+        assert "Queued.Movie.2024" not in unmanaged_section, \
+            "Torrent in Radarr queue should not appear as unmanaged"
+
+    with subtest("Detects abandoned movie upgrade as SAFE"):
+        assert "Old.Movie.Quality.2024" in abandoned_section, \
+            "Should detect abandoned movie"
+        for line in abandoned_section.splitlines():
+            if "Old.Movie.Quality.2024" in line:
+                assert "SAFE" in line, f"Old movie should be SAFE, got: {line}"
+                break
+
+    with subtest("Detects abandoned TV episode as SAFE"):
+        assert "Old.Show.S01E01" in abandoned_section, \
+            "Should detect abandoned TV episode"
+        for line in abandoned_section.splitlines():
+            if "Old.Show.S01E01" in line:
+                assert "SAFE" in line, f"Old TV should be SAFE, got: {line}"
+                break
+
+    with subtest("Keeper-also-abandoned hash is NOT listed as abandoned"):
+        assert "CrossRef.Movie.2024" not in abandoned_section, \
+            "Hash that is keeper for another movie must not appear as abandoned"
+
+    with subtest("Season pack NOT abandoned when still active for other episodes"):
+        assert "Season.Pack.S02" not in abandoned_section, \
+            "Season pack still active for episodes 301/302 must not be abandoned"
+
+    with subtest("Negative assertions for keepers"):
+        assert "New.Movie.Quality.2024" not in abandoned_section, \
+            "Keeper for movieId=2 must not appear as abandoned"
+        assert "New.Show.S01E01" not in abandoned_section, \
+            "Keeper for episodeId=200 must not appear as abandoned"
+        assert "Managed.Movie.2024" not in abandoned_section, \
+            "Single-import movie must not appear as abandoned"
+        assert "Managed.Show.S01E01" not in abandoned_section, \
+            "Single-import TV show must not appear as abandoned"
+
+    with subtest("Single-import keeper not abandoned (Bug 1 regression)"):
+        assert "SingleCross.Movie.2024" not in abandoned_section, \
+            "Hash that is sole import for movieId=7 must be in keeper set, not abandoned"
+
+    with subtest("Removed movie triggers REVIEW status"):
+        assert "Removed.Movie.2024" in abandoned_section, \
+            "Should detect abandoned torrent for removed movie"
+        assert_note_near(abandoned_section, "Removed.Movie.2024", "movie removed")
+        for line in abandoned_section.splitlines():
+            if "Removed.Movie.2024" in line:
+                assert "REVIEW" in line, f"Removed movie should be REVIEW, got: {line}"
+                break
+
+    with subtest("Abandoned larger than current triggers REVIEW"):
+        assert "Larger.Movie.2024" in abandoned_section, \
+            "Should detect larger abandoned torrent"
+        assert_note_near(abandoned_section, "Larger.Movie.2024", "abandoned is larger")
+        for line in abandoned_section.splitlines():
+            if "Larger.Movie.2024" in line:
+                assert "REVIEW" in line, f"Larger abandoned should be REVIEW, got: {line}"
+                break
+
+    with subtest("Removed series triggers REVIEW status for TV"):
+        assert "Removed.Show.S01E01" in abandoned_section, \
+            "Should detect abandoned torrent for removed series"
+        assert_note_near(abandoned_section, "Removed.Show.S01E01", "series removed")
+        for line in abandoned_section.splitlines():
+            if "Removed.Show.S01E01" in line:
+                assert "REVIEW" in line, f"Removed series should be REVIEW, got: {line}"
+                break
+
+    with subtest("Correct abandoned counts per category"):
+        assert "movies (3 abandoned)" in abandoned_section, \
+            "Should show 3 abandoned movies"
+        assert "tvshows (2 abandoned)" in abandoned_section, \
+            "Should show 2 abandoned TV shows"
+
+    with subtest("Correct summary totals"):
+        assert "ABANDONED: 5 total (2 safe to delete)" in output, \
+            "Summary should show 5 total abandoned, 2 safe to delete"
+        assert "SAFE TO RECLAIM: 3.4 GiB" in output, \
+            "Should report 3.4 GiB reclaimable (2.8 GiB movie + 0.7 GiB TV)"
+  '';
+}
--- a/tests/xmrig-auto-pause.nix
+++ b/tests/xmrig-auto-pause.nix
@@ -0,0 +1,206 @@
+{
+  pkgs,
+  ...
+}:
+let
+  script = ../services/monero/xmrig-auto-pause.py;
+  python = pkgs.python3;
+in
+pkgs.testers.runNixOSTest {
+  name = "xmrig-auto-pause";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      environment.systemPackages = [
+        pkgs.python3
+        pkgs.procps
+      ];
+
+      # Mock xmrig as a nice'd sleep process that can be stopped/started.
+      systemd.services.xmrig = {
+        description = "Mock xmrig miner";
+        serviceConfig = {
+          ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
+          Type = "simple";
+          Nice = 19;
+        };
+        wantedBy = [ "multi-user.target" ];
+      };
+    };
+
+  testScript = ''
+    import time
+
+    PYTHON = "${python}/bin/python3"
+    SCRIPT = "${script}"
+
+    # Tuned for test VMs (1-2 cores).
+    # POLL_INTERVAL=1 keeps detection latency low.
+    # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short
+    # enough that the full test completes in reasonable time.
+    # CPU_STOP_THRESHOLD=20 catches a busy-loop on a 1-2 core VM (50-100%)
+    # without triggering from normal VM noise.
+    # CPU_RESUME_THRESHOLD=10 is the idle cutoff for a 1-2 core VM.
+    POLL_INTERVAL = "1"
+    GRACE_PERIOD = "5"
+    CPU_STOP_THRESHOLD = "20"
+    CPU_RESUME_THRESHOLD = "10"
+    STARTUP_COOLDOWN = "4"
+    STATE_DIR = "/tmp/xap-state"
+    def start_cpu_load(name):
+        """Start a non-nice CPU burn as a transient systemd unit."""
+        machine.succeed(
+            f"systemd-run --unit={name} --property=Type=exec "
+            f"bash -c 'while true; do :; done'"
+        )
+
+    def stop_cpu_load(name):
+        machine.succeed(f"systemctl stop {name}")
+
+    def start_monitor(unit_name):
+        """Start the auto-pause monitor as a transient unit."""
+        machine.succeed(
+            f"systemd-run --unit={unit_name} "
+            f"--setenv=POLL_INTERVAL={POLL_INTERVAL} "
+            f"--setenv=GRACE_PERIOD={GRACE_PERIOD} "
+            f"--setenv=CPU_STOP_THRESHOLD={CPU_STOP_THRESHOLD} "
+            f"--setenv=CPU_RESUME_THRESHOLD={CPU_RESUME_THRESHOLD} "
+            f"--setenv=STARTUP_COOLDOWN={STARTUP_COOLDOWN} "
+            f"--setenv=STATE_DIR={STATE_DIR} "
+            f"{PYTHON} {SCRIPT}"
+        )
+        # Monitor needs two consecutive polls to compute a CPU delta.
+        time.sleep(3)
+        # Monitor needs two consecutive polls to compute a CPU delta.
+        time.sleep(3)
+
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+    machine.wait_for_unit("xmrig.service")
+    machine.succeed(f"mkdir -p {STATE_DIR}")
+
+    with subtest("Start auto-pause monitor"):
+        start_monitor("xmrig-auto-pause")
+
+    with subtest("xmrig stays running while system is idle"):
+        machine.succeed("systemctl is-active xmrig")
+
+    with subtest("xmrig stopped when CPU load appears"):
+        start_cpu_load("cpu-load")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+    with subtest("xmrig remains stopped during grace period after load ends"):
+        stop_cpu_load("cpu-load")
+        # Load just stopped. Grace period is 5s. Check at 2s — well within.
+        time.sleep(2)
+        machine.fail("systemctl is-active xmrig")
+
+    with subtest("xmrig resumes after grace period expires"):
+        # Already idle since previous subtest. Grace period (5s) plus
+        # detection delay (~2 polls) plus startup cooldown (4s) means
+        # xmrig should restart within ~12s.
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
+
+    with subtest("Intermittent load does not cause flapping"):
+        # First load — stop xmrig
+        start_cpu_load("cpu-load-1")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+        stop_cpu_load("cpu-load-1")
+
+        # Brief idle gap — shorter than grace period
+        time.sleep(2)
+
+        # Second load arrives before grace period expires
+        start_cpu_load("cpu-load-2")
+        time.sleep(3)
+
+        # xmrig must still be stopped
+        machine.fail("systemctl is-active xmrig")
+
+        stop_cpu_load("cpu-load-2")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
+
+    with subtest("Sustained load keeps xmrig stopped"):
+        start_cpu_load("cpu-load-3")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+        # Stay busy longer than the grace period to prove continuous
+        # activity keeps xmrig stopped indefinitely.
+        time.sleep(8)
+        machine.fail("systemctl is-active xmrig")
+
+        stop_cpu_load("cpu-load-3")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
+
+    with subtest("External restart detected and re-stopped under load"):
+        # Put system under load so auto-pause stops xmrig.
+        start_cpu_load("cpu-load-4")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+        # Something external starts xmrig while load is active.
+        # The script should detect this and re-stop it.
+        machine.succeed("systemctl start xmrig")
+        machine.succeed("systemctl is-active xmrig")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+        stop_cpu_load("cpu-load-4")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
+
+    # --- State persistence and crash recovery ---
+    machine.succeed("systemctl stop xmrig-auto-pause")
+
+    with subtest("xmrig recovers after crash during startup cooldown"):
+        machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
+        start_monitor("xmrig-auto-pause-crash")
+
+        # Load -> xmrig stops
+        start_cpu_load("cpu-crash")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+        # End load -> xmrig restarts after grace period
+        stop_cpu_load("cpu-crash")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
+
+        # Kill xmrig immediately — simulates crash during startup cooldown.
+        # The script should detect the failure when cooldown expires and
+        # re-enter the retry cycle.
+        machine.succeed("systemctl kill --signal=KILL xmrig")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=5)
+
+        # After cooldown + grace period + restart, xmrig should be back.
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
+
+        machine.succeed("systemctl stop xmrig-auto-pause-crash")
+        machine.succeed("systemctl reset-failed xmrig.service || true")
+        machine.succeed("systemctl start xmrig")
+        machine.wait_for_unit("xmrig.service")
+
+    with subtest("Script restart preserves pause state"):
+        machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
+        start_monitor("xmrig-auto-pause-persist")
+
+        # Load -> xmrig stops
+        start_cpu_load("cpu-persist")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+        # Kill the monitor while xmrig is paused (simulates script crash)
+        machine.succeed("systemctl stop xmrig-auto-pause-persist")
+
+        # State file must exist — the monitor persisted the pause flag
+        machine.succeed(f"test -f {STATE_DIR}/paused")
+
+        # Start a fresh monitor instance (reads state file on startup)
+        start_monitor("xmrig-auto-pause-persist2")
+
+        # End load — the new monitor should pick up the paused state
+        # and restart xmrig after the grace period
+        stop_cpu_load("cpu-persist")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
+
+        # State file should be cleaned up after successful restart
+        machine.fail(f"test -f {STATE_DIR}/paused")
+
+        machine.succeed("systemctl stop xmrig-auto-pause-persist2")
+  '';
+}
--- a/tests/zfs-scrub-annotations.nix
+++ b/tests/zfs-scrub-annotations.nix
@@ -0,0 +1,123 @@
+{
+  lib,
+  pkgs,
+  ...
+}:
+let
+  mockServer = ./mock-grafana-server.py;
+
+  mockZpool = pkgs.writeShellScript "zpool" ''
+    case "$1" in
+      list)
+        echo "tank"
+        echo "hdds"
+        ;;
+      status)
+        pool="$2"
+        if [ "$pool" = "tank" ]; then
+          echo "  scan: scrub repaired 0B in 00:24:39 with 0 errors on Mon Jan  1 02:24:39 2024"
+        elif [ "$pool" = "hdds" ]; then
+          echo "  scan: scrub repaired 0B in 04:12:33 with 0 errors on Mon Jan  1 06:12:33 2024"
+        fi
+        ;;
+    esac
+  '';
+
+  script = ../services/grafana/zfs-scrub-annotations.sh;
+  python = pkgs.python3;
+in
+pkgs.testers.runNixOSTest {
+  name = "zfs-scrub-annotations";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      environment.systemPackages = with pkgs; [
+        python3
+        curl
+        jq
+      ];
+    };
+
+  testScript = ''
+    import json
+
+    GRAFANA_PORT = 13000
+    ANNOTS_FILE  = "/tmp/annotations.json"
+    STATE_DIR    = "/tmp/scrub-state"
+    PYTHON       = "${python}/bin/python3"
+    MOCK         = "${mockServer}"
+    SCRIPT       = "${script}"
+    MOCK_ZPOOL   = "${mockZpool}"
+
+    MOCK_BIN = "/tmp/mock-bin"
+    ENV_PREFIX = (
+        f"GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
+        f"STATE_DIR={STATE_DIR} "
+        f"PATH={MOCK_BIN}:$PATH "
+    )
+
+    def read_annotations():
+        out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'")
+        return json.loads(out.strip())
+
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+
+    with subtest("Setup state directory and mock zpool"):
+        machine.succeed(f"mkdir -p {STATE_DIR}")
+        machine.succeed(f"mkdir -p {MOCK_BIN} && cp {MOCK_ZPOOL} {MOCK_BIN}/zpool && chmod +x {MOCK_BIN}/zpool")
+
+    with subtest("Start mock Grafana server"):
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+        machine.succeed(
+            f"systemd-run --unit=mock-grafana {PYTHON} {MOCK} {GRAFANA_PORT} {ANNOTS_FILE}"
+        )
+        machine.wait_until_succeeds(
+            f"curl -sf -X POST http://127.0.0.1:{GRAFANA_PORT}/api/annotations "
+            f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id",
+            timeout=10,
+        )
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+
+    with subtest("Start action creates annotation with pool names and zfs-scrub tag"):
+        machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} start")
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
+        assert "zfs-scrub" in annots[0].get("tags", []), f"Missing zfs-scrub tag: {annots[0]}"
+        assert "tank" in annots[0]["text"], f"Missing tank in text: {annots[0]['text']}"
+        assert "hdds" in annots[0]["text"], f"Missing hdds in text: {annots[0]['text']}"
+        assert "time" in annots[0], f"Missing time field: {annots[0]}"
+        assert "timeEnd" not in annots[0], f"timeEnd should not be set yet: {annots[0]}"
+
+    with subtest("State file contains annotation ID"):
+        ann_id = machine.succeed(f"cat {STATE_DIR}/annotation-id").strip()
+        assert ann_id == "1", f"Expected annotation ID 1, got: {ann_id}"
+
+    with subtest("Stop action closes annotation with per-pool scrub results"):
+        machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} stop")
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
+        assert "timeEnd" in annots[0], f"timeEnd should be set: {annots[0]}"
+        assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time"
+        text = annots[0]["text"]
+        assert "ZFS scrub completed" in text, f"Missing completed text: {text}"
+        assert "tank:" in text, f"Missing tank results: {text}"
+        assert "hdds:" in text, f"Missing hdds results: {text}"
+        assert "00:24:39" in text, f"Missing tank scrub duration: {text}"
+        assert "04:12:33" in text, f"Missing hdds scrub duration: {text}"
+
+    with subtest("State file cleaned up after stop"):
+        machine.fail(f"test -f {STATE_DIR}/annotation-id")
+
+    with subtest("Stop action handles missing state file gracefully"):
+        machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} stop")
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected no new annotations, got: {annots}"
+
+    with subtest("Start action handles Grafana being down gracefully"):
+        machine.succeed("systemctl stop mock-grafana")
+        machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} start")
+        machine.fail(f"test -f {STATE_DIR}/annotation-id")
+  '';
+}
--- a/tests/zfs.nix
+++ b/tests/zfs.nix
@@ -0,0 +1,153 @@
+{
+  config,
+  lib,
+  pkgs,
+  inputs,
+  ...
+}:
+let
+  # Create pkgs with ensureZfsMounts overlay
+  testPkgs = pkgs.appendOverlays [ (import ../modules/overlays.nix) ];
+in
+testPkgs.testers.runNixOSTest {
+  name = "zfs test";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      imports = [
+        # Test valid paths within zpool
+        (lib.serviceMountWithZpool "test-service" "rpool" [ "/mnt/rpool_data" ])
+
+        # Test service with paths outside zpool (should fail assertion)
+        (lib.serviceMountWithZpool "invalid-service" "rpool2" [ "/mnt/rpool_data" ])
+
+        # Test multi-command logic: service with multiple serviceMountWithZpool calls
+        (lib.serviceMountWithZpool "multi-service" "rpool" [ "/mnt/rpool_data" ])
+        (lib.serviceMountWithZpool "multi-service" "rpool2" [ "/mnt/rpool2_data" ])
+
+        # Test multi-command logic: service with multiple serviceMountWithZpool calls
+        # BUT this one should fail as `/mnt/rpool_moar_data` is not on rpool2
+        (lib.serviceMountWithZpool "multi-service-fail" "rpool" [ "/mnt/rpool_data" ])
+        (lib.serviceMountWithZpool "multi-service-fail" "rpool2" [ "/mnt/rpool_moar_data" ])
+      ];
+
+      virtualisation = {
+        emptyDiskImages = [
+          4096
+          4096
+        ];
+        # Add this to avoid ZFS hanging issues
+        additionalPaths = [ pkgs.zfs ];
+      };
+      networking.hostId = "deadbeef";
+      boot.kernelPackages = config.boot.kernelPackages;
+      boot.zfs.package = config.boot.zfs.package;
+      boot.supportedFilesystems = [ "zfs" ];
+
+      environment.systemPackages = with pkgs; [
+        parted
+        ensureZfsMounts
+      ];
+
+      systemd.services."test-service" = {
+        serviceConfig = {
+          Type = "oneshot";
+          RemainAfterExit = true;
+          ExecStart = lib.getExe pkgs.bash;
+        };
+      };
+
+      systemd.services."invalid-service" = {
+        serviceConfig = {
+          Type = "oneshot";
+          RemainAfterExit = true;
+          ExecStart = lib.getExe pkgs.bash;
+        };
+      };
+
+      systemd.services."multi-service" = {
+        serviceConfig = {
+          Type = "oneshot";
+          RemainAfterExit = true;
+          ExecStart = lib.getExe pkgs.bash;
+        };
+      };
+
+      systemd.services."multi-service-fail" = {
+        serviceConfig = {
+          Type = "oneshot";
+          RemainAfterExit = true;
+          ExecStart = lib.getExe pkgs.bash;
+        };
+      };
+    };
+
+  testScript = ''
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+
+    # Setup ZFS pool
+    machine.succeed(
+        "parted --script /dev/vdb mklabel msdos",
+        "parted --script /dev/vdb -- mkpart primary 1024M -1s",
+        "zpool create rpool /dev/vdb1"
+    )
+
+    # Setup ZFS pool 2
+    machine.succeed(
+        "parted --script /dev/vdc mklabel msdos",
+        "parted --script /dev/vdc -- mkpart primary 1024M -1s",
+        "zpool create rpool2 /dev/vdc1"
+    )
+
+    machine.succeed("zfs create -o mountpoint=/mnt/rpool_data rpool/data")
+
+    machine.succeed("zfs create -o mountpoint=/mnt/rpool2_data rpool2/data")
+
+    machine.succeed("zfs create -o mountpoint=/mnt/rpool_moar_data rpool/moar_data")
+
+    # Test that valid service starts successfully
+    machine.succeed("systemctl start test-service")
+
+    # Manually test our validation logic by checking the debug output
+    zfs_output = machine.succeed("zfs list -H -o name,mountpoint")
+    print("ZFS LIST OUTPUT:")
+    print(zfs_output)
+
+    dataset = machine.succeed("zfs list -H -o name,mountpoint | awk '/\\/mnt\\/rpool_data/ { print $1 }'")
+    print("DATASET FOR /mnt/rpool_data:")
+    print(dataset)
+
+    # Test that invalid-service mount service fails validation
+    machine.fail("systemctl start invalid-service.service")
+
+    # Check the journal for our detailed validation error message
+    journal_output = machine.succeed("journalctl -u invalid-service-mounts.service --no-pager")
+    print("JOURNAL OUTPUT:")
+    print(journal_output)
+
+    # Verify our validation error is in the journal using Python string matching
+    assert "ERROR: ZFS pool mismatch for /mnt/rpool_data" in journal_output
+    assert "Expected pool: rpool2" in journal_output
+    assert "Actual pool:   rpool" in journal_output
+
+
+    # Test that invalid-service mount service fails validation
+    machine.fail("systemctl start multi-service-fail.service")
+
+    # Check the journal for our detailed validation error message
+    journal_output = machine.succeed("journalctl -u multi-service-fail-mounts.service --no-pager")
+    print("JOURNAL OUTPUT:")
+    print(journal_output)
+
+    # Verify our validation error is in the journal using Python string matching
+    assert "ERROR: ZFS pool mismatch for /mnt/rpool_moar_data" in journal_output, "no zfs pool mismatch found (1)"
+    assert "Expected pool: rpool2" in journal_output, "no zfs pool mismatch found (2)"
+    assert "Actual pool:   rpool" in journal_output, "no zfs pool mismatch found (3)"
+
+
+    machine.succeed("systemctl start multi-service")
+    machine.succeed("systemctl is-active multi-service-mounts.service")
+  '';
+}