{ config, lib, pkgs, ... }: let # Mock ntfy server script that records POST requests to a JSON log. mockNtfyScript = pkgs.writeScript "mock-ntfy.py" '' import json import os from http.server import HTTPServer, BaseHTTPRequestHandler from datetime import datetime REQUESTS_FILE = "/tmp/ntfy-requests.json" class MockNtfy(BaseHTTPRequestHandler): def _respond(self, code=200, body=b"Ok"): self.send_response(code) self.send_header("Content-Type", "application/json") self.end_headers() self.wfile.write(body if isinstance(body, bytes) else body.encode()) def do_GET(self): self._respond() def do_POST(self): content_length = int(self.headers.get("Content-Length", 0)) body = self.rfile.read(content_length).decode() if content_length > 0 else "" request_data = { "timestamp": datetime.now().isoformat(), "path": self.path, "headers": dict(self.headers), "body": body, } requests = [] if os.path.exists(REQUESTS_FILE): try: with open(REQUESTS_FILE, "r") as f: requests = json.load(f) except: requests = [] requests.append(request_data) with open(REQUESTS_FILE, "w") as f: json.dump(requests, f, indent=2) self._respond() def log_message(self, format, *args): pass HTTPServer(("0.0.0.0", 8080), MockNtfy).serve_forever() ''; # Jellyfin log lines used in the test. # Jellyfin log format: [YYYY-MM-DD HH:MM:SS.mmm +TZ] [LEVEL] [thread] Source: message mkLogLine = ts: level: thread: source: msg: "[${ts}] [${level}] [${thread}] ${source}: ${msg}"; # Realistic error lines that should trigger alerts. transcodeCrash = mkLogLine "2026-05-12 14:23:01.123 +00:00" "ERR" "42" "MediaBrowser.MediaEncoding.Transcoding.TranscodeManager" "FFmpeg exited with code 137 after 45.2 seconds"; playbackError = mkLogLine "2026-05-12 14:24:05.456 +00:00" "ERR" "17" "Emby.Server.Implementations.Session.SessionManager" "Playback error for user \"alice\" on device \"Living Room TV\""; exceptionMiddleware = mkLogLine "2026-05-12 14:25:10.789 +00:00" "ERR" "99" "Jellyfin.Server.Middleware.ExceptionMiddleware" ''Error processing request. URL "GET" "/Videos/a1b2c3d4-e5f6-7890-abcd-ef1234567890/stream".''; streamAbort = mkLogLine "2026-05-12 14:26:00.111 +00:00" "ERR" "33" "MediaBrowser.Api.Playback.Hls.DynamicHlsService" "Cannot open HLS stream segment"; # Lines that should NOT trigger alerts. authDenied = mkLogLine "2026-05-12 14:27:00.222 +00:00" "ERR" "12" "Jellyfin.Server.Implementations.Users.UserManager" ''Authentication request for "bob" has been denied (IP: "10.0.0.5").''; libraryScanError = mkLogLine "2026-05-12 14:28:00.333 +00:00" "ERR" "55" "MediaBrowser.Controller.Entities.BaseItem" "Error refreshing item metadata for /library/some-broken-file.mkv"; # Below ERR level – never triggers. warnLine = mkLogLine "2026-05-12 14:29:00.444 +00:00" "WRN" "77" "MediaBrowser.MediaEncoding.Transcoding.TranscodeManager" "Slow transcoding detected (0.95x realtime)"; infoLine = mkLogLine "2026-05-12 14:30:00.555 +00:00" "INF" "88" "Jellyfin.Server.Middleware.ExceptionMiddleware" "This is informational and should not alert"; # Log file contents for each scenario. logWithFailures = pkgs.writeText "jellyfin-failure-log.log" '' ${authDenied} ${libraryScanError} ${warnLine} ${infoLine} ${transcodeCrash} ${playbackError} ${exceptionMiddleware} ${streamAbort} ''; logWithDedup = pkgs.writeText "jellyfin-dedup-log.log" '' ${transcodeCrash} ${transcodeCrash} ${transcodeCrash} ''; logNoFailures = pkgs.writeText "jellyfin-clean-log.log" '' ${authDenied} ${libraryScanError} ${warnLine} ${infoLine} ''; in pkgs.testers.runNixOSTest { name = "jellyfin-failure-alert"; nodes.machine = { pkgs, ... }: { imports = [ ../modules/ntfy-alerts.nix ../services/jellyfin/jellyfin-failure-alert.nix ]; system.stateVersion = config.system.stateVersion; virtualisation.memorySize = 2048; environment.systemPackages = with pkgs; [ curl jq ]; # Minimal jellyfin config so the guard passes. Jellyfin 10.11+ # requires 2 GiB free space, so give the VM a 4 GiB disk. virtualisation.diskSize = 4096; services.jellyfin = { enable = true; dataDir = "/var/lib/jellyfin-test"; cacheDir = "/var/cache/jellyfin-test"; user = "jellyfin"; group = "jellyfin"; }; # Jellyfin base dirs systemd.tmpfiles.rules = [ "d /var/lib/jellyfin-test 0755 jellyfin jellyfin" "d /var/lib/jellyfin-test/log 0755 jellyfin jellyfin" "f /run/ntfy-test-topic 0644 root root - test-alerts" "f /run/ntfy-test-token 0644 root root - test-token-value" ]; # Mock ntfy server systemd.services.mock-ntfy = { description = "Mock ntfy server for jellyfin-failure-alert test"; wantedBy = [ "multi-user.target" ]; before = [ "jellyfin-failure-alert.service" ]; serviceConfig = { ExecStart = "${pkgs.python3}/bin/python3 ${mockNtfyScript}"; Type = "simple"; }; }; # Configure ntfy-alerts to use mock server services.ntfyAlerts = { enable = true; serverUrl = "http://localhost:8080"; topicFile = "/run/ntfy-test-topic"; tokenFile = "/run/ntfy-test-token"; }; # Speed up polling for the test systemd.services.jellyfin-failure-alert.environment.POLL_INTERVAL = lib.mkForce "2"; systemd.services.jellyfin-failure-alert.environment.DEDUP_WINDOW = lib.mkForce "10"; }; testScript = '' import json import time LOG_DIR = "/var/lib/jellyfin-test/log" REQUESTS_FILE = "/tmp/ntfy-requests.json" start_all() # Wait for mock ntfy server machine.wait_for_unit("mock-ntfy.service") machine.wait_until_succeeds("curl -sf http://localhost:8080/", timeout=30) # ------------------------------------------------------------------ # Phase 1: Client failures trigger alerts # ------------------------------------------------------------------ with subtest("Client failure log lines trigger ntfy notifications"): # Place a log file with known failure lines machine.succeed( "cp ${logWithFailures} {}/log_test.log && chown jellyfin:jellyfin {}/log_test.log".format( LOG_DIR, LOG_DIR ) ) # Start the monitor machine.succeed("systemctl start jellyfin-failure-alert.service") # Wait for the monitor to poll and send notifications. # Should pick up: transcodeCrash, playbackError, exceptionMiddleware, streamAbort machine.wait_until_succeeds( "test -f {} && test $(jq 'length' {}) -ge 4".format(REQUESTS_FILE, REQUESTS_FILE), timeout=30, ) result = machine.succeed("cat {}".format(REQUESTS_FILE)) requests = json.loads(result) print(f"Phase 1: received {len(requests)} ntfy notifications") assert len(requests) >= 4, f"Expected >= 4 notifications, got {len(requests)}" # Verify each notification has the expected shape for req in requests: assert "/test-alerts" in req["path"], f"Wrong topic path: {req['path']}" assert "Title" in req["headers"], "Missing Title header" assert "Jellyfin" in req["headers"]["Title"], ( f"Title should mention Jellyfin: {req['headers']['Title']}" ) assert req["headers"]["Priority"] == "high", ( f"Expected Priority 'high', got {req['headers'].get('Priority')}" ) assert req["headers"]["Tags"] == "warning", ( f"Expected Tags 'warning', got {req['headers'].get('Tags')}" ) assert req["headers"]["Authorization"] == "Bearer test-token-value", ( f"Missing or wrong Authorization header: {req['headers'].get('Authorization')}" ) assert len(req["body"]) > 0, "Notification body is empty" # Verify specific error content appears in bodies bodies = " ".join(r["body"] for r in requests) assert "FFmpeg" in bodies, "Missing FFmpeg error in notifications" assert "Playback error" in bodies, "Missing playback error in notifications" assert "ExceptionMiddleware" in bodies or "/Videos/" in bodies, ( "Missing exception middleware error in notifications" ) print("Phase 1 passed: all client failures triggered alerts") # ------------------------------------------------------------------ # Phase 2: Non-client errors are filtered out # ------------------------------------------------------------------ with subtest("Non-client errors do not trigger alerts"): # Clear previous requests and add a new log file with only non-client errors machine.succeed("rm -f {}".format(REQUESTS_FILE)) machine.succeed( "cp ${logNoFailures} {}/log_clean.log && chown jellyfin:jellyfin {}/log_clean.log".format( LOG_DIR, LOG_DIR ) ) # Wait enough poll cycles (at least 8s = 4 cycles at POLL_INTERVAL=2) time.sleep(8) # Assert no requests file was created. If it exists, it must be empty # (any non-empty result means a non-client error leaked through). rc, _ = machine.execute("test -f {}".format(REQUESTS_FILE)) if rc == 0: machine.succeed("test $(jq 'length' {}) -eq 0".format(REQUESTS_FILE)) print("Phase 2 passed: non-client errors correctly filtered") # ------------------------------------------------------------------ # Phase 3: Deduplication # ------------------------------------------------------------------ with subtest("Duplicate errors are deduplicated within the window"): # Clear and add a log file with the same error repeated machine.succeed("rm -f {}".format(REQUESTS_FILE)) machine.succeed( "cp ${logWithDedup} {}/log_dedup.log && chown jellyfin:jellyfin {}/log_dedup.log".format( LOG_DIR, LOG_DIR ) ) # Wait for the monitor to process. Should get exactly 1 notification. machine.wait_until_succeeds( "test -f {} && test $(jq 'length' {}) -eq 1".format(REQUESTS_FILE, REQUESTS_FILE), timeout=30, ) result = machine.succeed("cat {}".format(REQUESTS_FILE)) requests = json.loads(result) print(f"Phase 3: received {len(requests)} notifications (expected 1 for dedup)") assert len(requests) == 1, ( f"Expected exactly 1 notification for 3 identical errors, got {len(requests)}" ) assert "FFmpeg" in requests[0]["body"], "Missing FFmpeg error in dedup notification" print("Phase 3 passed: deduplication works correctly") # ------------------------------------------------------------------ # Phase 4: Re-alert after dedup window expires # ------------------------------------------------------------------ with subtest("Same error re-alerts after dedup window expires"): # Wait for dedup window to expire (10s) time.sleep(12) # Write the same error again in a new log file machine.succeed( "cp ${logWithDedup} {}/log_dedup2.log && chown jellyfin:jellyfin {}/log_dedup2.log".format( LOG_DIR, LOG_DIR ) ) # Wait for the monitor to pick it up. Should now have 2 entries. machine.wait_until_succeeds( "test -f {} && test $(jq 'length' {}) -eq 2".format(REQUESTS_FILE, REQUESTS_FILE), timeout=30, ) result = machine.succeed("cat {}".format(REQUESTS_FILE)) requests = json.loads(result) print(f"Phase 4: received {len(requests)} notifications (expected 2)") assert len(requests) == 2, ( f"Expected 2 notifications after dedup window expired, got {len(requests)}" ) print("Phase 4 passed: re-alert after dedup window works") print("All jellyfin-failure-alert tests passed!") ''; }