diff --git a/services/jellyfin/default.nix b/services/jellyfin/default.nix index 45a46f6..e931163 100644 --- a/services/jellyfin/default.nix +++ b/services/jellyfin/default.nix @@ -3,5 +3,6 @@ ./jellyfin.nix ./jellyfin-qbittorrent-monitor.nix ./jellyfin-deploy-guard.nix + ./jellyfin-failure-alert.nix ]; } diff --git a/services/jellyfin/jellyfin-failure-alert.nix b/services/jellyfin/jellyfin-failure-alert.nix new file mode 100644 index 0000000..50bcd38 --- /dev/null +++ b/services/jellyfin/jellyfin-failure-alert.nix @@ -0,0 +1,65 @@ +{ + config, + lib, + pkgs, + ... +}: +let + jfCfg = config.services.jellyfin; + ntfyCfg = config.services.ntfyAlerts; +in +lib.mkIf (jfCfg.enable && ntfyCfg.enable) { + systemd.services.jellyfin-failure-alert = { + description = "Monitor Jellyfin logs for client playback failures and alert via ntfy"; + after = [ + "network.target" + "jellyfin.service" + ]; + wants = [ "jellyfin.service" ]; + wantedBy = [ "multi-user.target" ]; + + serviceConfig = { + Type = "simple"; + ExecStart = pkgs.writeShellScript "jellyfin-failure-alert-start" '' + set -euo pipefail + export NTFY_TOPIC=$(cat "$CREDENTIALS_DIRECTORY/ntfy-topic" | tr -d '[:space:]') + ${lib.optionalString (ntfyCfg.tokenFile != null) '' + export NTFY_TOKEN_FILE="$CREDENTIALS_DIRECTORY/ntfy-token" + ''} + exec ${pkgs.python3}/bin/python ${./jellyfin-failure-alert.py} + ''; + Restart = "always"; + RestartSec = "10s"; + + # Security hardening + DynamicUser = true; + NoNewPrivileges = true; + ProtectSystem = "strict"; + ProtectHome = true; + ProtectKernelTunables = true; + ProtectKernelModules = true; + ProtectControlGroups = true; + MemoryDenyWriteExecute = true; + RestrictRealtime = true; + RestrictSUIDSGID = true; + RemoveIPC = true; + + # DynamicUser needs jellyfin group to read 0700 log dir + SupplementaryGroups = [ jfCfg.group ]; + + # Load credentials from agenix secrets + LoadCredential = [ + "ntfy-topic:${ntfyCfg.topicFile}" + ] + ++ lib.optional (ntfyCfg.tokenFile != null) "ntfy-token:${ntfyCfg.tokenFile}"; + }; + + environment = { + JELLYFIN_LOG_DIR = "${jfCfg.dataDir}/log"; + NTFY_SERVER_URL = ntfyCfg.serverUrl; + HOSTNAME = config.networking.hostName; + POLL_INTERVAL = "15"; + DEDUP_WINDOW = "300"; + }; + }; +} diff --git a/services/jellyfin/jellyfin-failure-alert.py b/services/jellyfin/jellyfin-failure-alert.py new file mode 100644 index 0000000..8356296 --- /dev/null +++ b/services/jellyfin/jellyfin-failure-alert.py @@ -0,0 +1,324 @@ +#!/usr/bin/env python3 +"""Monitor Jellyfin log files for client playback/transcoding failures. + +Tails Jellyfin's rotating log files, matches [ERR] lines that indicate +a client-facing failure (playback error, transcode crash, stream abort), +deduplicates within a window, and pushes a ntfy notification. + +Environment + JELLYFIN_LOG_DIR path to Jellyfin log directory (required) + NTFY_SERVER_URL ntfy server base URL (required) + NTFY_TOPIC ntfy topic name (required) + NTFY_TOKEN_FILE optional path to file containing ntfy auth bearer token + HOSTNAME server hostname for notification title (default: "muffin") + POLL_INTERVAL seconds between log scans (default: 15) + DEDUP_WINDOW seconds before re-alerting same signature (default: 300) +""" + +import glob +import hashlib +import logging +import os +import re +import signal +import sys +import time +import urllib.request +import urllib.error +from pathlib import Path + +logging.basicConfig( + level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" +) +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Patterns +# --------------------------------------------------------------------------- + +# Jellyfin log line prefix: [2024-01-01 12:00:00.000 +00:00] [ERR] [123] +_LOG_PREFIX_RE = re.compile( + r"^\[[\d\-]{10} [\d:.]{12} [+-]\d{2}:\d{2}\] \[ERR\] \[\d+\] " +) + +# Sources that indicate a client-facing failure. +_CLIENT_FAILURE_SOURCES = { + # Transcoding engine crashes / errors + "MediaBrowser.MediaEncoding.Transcoding.TranscodeManager", + "MediaBrowser.MediaEncoding.Encoder.EncodingManager", + # Playback / session errors + "Emby.Server.Implementations.Session.SessionManager", + # HTTP exceptions on media endpoints + "Jellyfin.Server.Middleware.ExceptionMiddleware", + # Streaming / live TV + "MediaBrowser.Api.Playback.MediaInfoService", + "MediaBrowser.Api.Playback.Progressive.ProgressiveStreamWriter", + "MediaBrowser.Api.Playback.Hls.DynamicHlsService", + # Direct play / stream + "MediaBrowser.Controller.MediaEncoding.EncodingHelper", + # DLNA / remote control (rare but client-facing) + "Emby.Server.Implementations.HttpServer.HttpListenerHost", +} + +# Additional message-level patterns for lines whose source is not in +# _CLIENT_FAILURE_SOURCES but whose message text indicates a client problem. +_CLIENT_FAILURE_PATTERNS = [ + re.compile(p, re.IGNORECASE) + for p in [ + r"error processing request.*?(?:/Videos/|/Items/|/Audio/)", + r"ffmpeg.*?(?:error|exited with code [1-9]|crashed|killed)", + r"playback\s*error", + r"transcode.*?(?:fail|error|abort)", + r"stream.*?(?:fail|error|abort|closed)", + r"client.*?(?:disconnect|error|timeout)", + ] +] + +# Items to scrub from log lines before generating a dedup signature. +_SIGNATURE_SCRUB_RE = re.compile( + r"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}" # UUID + r"|\b[0-9a-fA-F]{32,}\b" # long hex hashes + r"|\b\d{4,}\b" # ids / durations / sizes ≥ 4 digits + r"|0x[0-9a-fA-F]+" # hex addresses +) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _read_token(token_file: str | None) -> str | None: + if not token_file or not os.path.isfile(token_file): + return None + try: + return Path(token_file).read_text().strip() + except OSError: + return None + + +def _send_ntfy( + server_url: str, + topic: str, + title: str, + message: str, + token: str | None, + priority: str = "high", + tags: str = "warning", +) -> bool: + """POST a ntfy notification. Returns True on success.""" + url = f"{server_url.rstrip('/')}/{topic}" + data = message.encode("utf-8") + headers = { + "Title": title, + "Priority": priority, + "Tags": tags, + "Content-Type": "text/plain", + } + if token: + headers["Authorization"] = f"Bearer {token}" + + req = urllib.request.Request(url, data=data, headers=headers, method="POST") + try: + with urllib.request.urlopen(req, timeout=15) as resp: + return 200 <= resp.status < 300 + except urllib.error.HTTPError as exc: + logger.warning("ntfy POST returned HTTP %s: %s", exc.code, exc.reason) + return False + except urllib.error.URLError as exc: + logger.warning("ntfy POST failed: %s", exc.reason) + return False + + +def _error_signature(line: str) -> str: + """Return a stable hash for a Jellyfin error log line. + + Strips the timestamp prefix and normalises UUIDs, hex hashes, and large + integers so that the same logical error from different sessions or items + collapses to the same signature. + """ + # Strip timestamp / level / thread prefix so we keep : + body = _LOG_PREFIX_RE.sub("", line, count=1) + if not body: + body = line + # Collapse repeated whitespace + normalised = _SIGNATURE_SCRUB_RE.sub("", body) + normalised = re.sub(r"\s+", " ", normalised).strip() + # Keep the source prefix (up to first ':') as part of the signature + return hashlib.sha256(normalised.encode()).hexdigest() + + +def _is_client_failure(line: str) -> bool: + """Check whether a Jellyfin [ERR] log line indicates a client failure.""" + if not _LOG_PREFIX_RE.match(line): + return False + + # Strip prefix for matching + body = _LOG_PREFIX_RE.sub("", line, count=1) + if not body: + return False + + # Check source (the part before ': ') + if ": " in body: + source = body.split(": ", 1)[0] + if source in _CLIENT_FAILURE_SOURCES: + return True + + # Fall back to message-level patterns + for pat in _CLIENT_FAILURE_PATTERNS: + if pat.search(body): + return True + + return False + + +def _scan_log_file(path: str, seen_positions: dict[str, int]) -> list[str]: + """Read new lines from *path* since *seen_positions[path]*. + + Updates *seen_positions* in place. Handles truncation (log rotation) + by resetting the cursor to 0 when the file shrinks. + """ + hits: list[str] = [] + try: + st = os.stat(path) + inode_key = f"{st.st_ino}:{st.st_dev}" + prev_offset = seen_positions.get(inode_key, 0) + + if st.st_size < prev_offset: + # File was truncated (rotation): start from the beginning. + prev_offset = 0 + + if st.st_size == prev_offset: + seen_positions[inode_key] = prev_offset + return hits + + with open(path, "r", errors="replace") as fh: + fh.seek(prev_offset) + for raw in fh: + line = raw.rstrip("\n\r") + if _is_client_failure(line): + hits.append(line) + seen_positions[inode_key] = fh.tell() + except FileNotFoundError: + seen_positions.pop(inode_key, None) + except OSError as exc: + logger.debug("Cannot read %s: %s", path, exc) + + return hits + + +# --------------------------------------------------------------------------- +# Main loop +# --------------------------------------------------------------------------- + + +def main() -> None: + log_dir = os.environ.get("JELLYFIN_LOG_DIR") + ntfy_url = os.environ.get("NTFY_SERVER_URL") + ntfy_topic = os.environ.get("NTFY_TOPIC") + ntfy_token_file = os.environ.get("NTFY_TOKEN_FILE") + hostname = os.environ.get("HOSTNAME", "muffin") + poll_interval = int(os.environ.get("POLL_INTERVAL", "15")) + dedup_window = int(os.environ.get("DEDUP_WINDOW", "300")) + + if not log_dir: + logger.fatal("JELLYFIN_LOG_DIR is required") + sys.exit(1) + if not ntfy_url: + logger.fatal("NTFY_SERVER_URL is required") + sys.exit(1) + if not ntfy_topic: + logger.fatal("NTFY_TOPIC is required") + sys.exit(1) + + running = True + + def _handle_signal(signum: int, _frame: object) -> None: + nonlocal running + logger.info("Received signal %s, shutting down", signum) + running = False + + signal.signal(signal.SIGTERM, _handle_signal) + signal.signal(signal.SIGINT, _handle_signal) + + ntfy_token = _read_token(ntfy_token_file) + + # Dedup state: signature → last-alerted timestamp + seen_signatures: dict[str, float] = {} + # File read cursors: "{inode}:{dev}" → byte offset + file_positions: dict[str, int] = {} + + logger.info( + "Starting Jellyfin failure alert monitor (log_dir=%s, poll=%ss, dedup=%ss)", + log_dir, + poll_interval, + dedup_window, + ) + + while running: + try: + now = time.time() + + # Expire old dedup entries + expired = [s for s, ts in seen_signatures.items() if now - ts > dedup_window] + for s in expired: + del seen_signatures[s] + + # Scan all log files + log_pattern = os.path.join(log_dir, "log_*.log") + for path in sorted(glob.glob(log_pattern)): + hits = _scan_log_file(path, file_positions) + for line in hits: + sig = _error_signature(line) + if sig in seen_signatures: + logger.debug("Suppressed duplicate: %s", line[:120]) + continue + seen_signatures[sig] = now + + # Build a clean title: source + short summary + body = _LOG_PREFIX_RE.sub("", line, count=1) + title = f"[{hostname}] Jellyfin client failure" + if ": " in body: + source, msg = body.split(": ", 1) + title = f"[{hostname}] Jellyfin: {source.split('.')[-1]}" + body = msg + + # Truncate body for readability + if len(body) > 500: + body = body[:497] + "..." + + logger.warning("Alerting: %s", body[:120]) + _send_ntfy( + ntfy_url, + ntfy_topic, + title, + body, + ntfy_token, + ) + + # Clean up stale file-position entries for rotated-out files + current_inodes = set() + for path in glob.glob(log_pattern): + try: + st = os.stat(path) + current_inodes.add(f"{st.st_ino}:{st.st_dev}") + except OSError: + pass + stale = [k for k in file_positions if k not in current_inodes] + for k in stale: + del file_positions[k] + + except Exception: + logger.exception("Unhandled error in main loop") + + # Sleep in small increments so we can react to SIGTERM promptly. + deadline = time.time() + poll_interval + while running and time.time() < deadline: + time.sleep(min(1, deadline - time.time())) + + logger.info("Jellyfin failure alert monitor stopped") + + +if __name__ == "__main__": + main() diff --git a/tests/jellyfin-failure-alert.nix b/tests/jellyfin-failure-alert.nix new file mode 100644 index 0000000..9d5c2de --- /dev/null +++ b/tests/jellyfin-failure-alert.nix @@ -0,0 +1,351 @@ +{ + config, + lib, + pkgs, + ... +}: +let + + # Mock ntfy server script that records POST requests to a JSON log. + mockNtfyScript = pkgs.writeScript "mock-ntfy.py" '' + import json + import os + from http.server import HTTPServer, BaseHTTPRequestHandler + from datetime import datetime + + REQUESTS_FILE = "/tmp/ntfy-requests.json" + + class MockNtfy(BaseHTTPRequestHandler): + def _respond(self, code=200, body=b"Ok"): + self.send_response(code) + self.send_header("Content-Type", "application/json") + self.end_headers() + self.wfile.write(body if isinstance(body, bytes) else body.encode()) + + def do_GET(self): + self._respond() + + def do_POST(self): + content_length = int(self.headers.get("Content-Length", 0)) + body = self.rfile.read(content_length).decode() if content_length > 0 else "" + + request_data = { + "timestamp": datetime.now().isoformat(), + "path": self.path, + "headers": dict(self.headers), + "body": body, + } + + requests = [] + if os.path.exists(REQUESTS_FILE): + try: + with open(REQUESTS_FILE, "r") as f: + requests = json.load(f) + except: + requests = [] + + requests.append(request_data) + + with open(REQUESTS_FILE, "w") as f: + json.dump(requests, f, indent=2) + + self._respond() + + def log_message(self, format, *args): + pass + + HTTPServer(("0.0.0.0", 8080), MockNtfy).serve_forever() + ''; + + # Jellyfin log lines used in the test. + # Jellyfin log format: [YYYY-MM-DD HH:MM:SS.mmm +TZ] [LEVEL] [thread] Source: message + mkLogLine = + ts: level: thread: source: msg: + "[${ts}] [${level}] [${thread}] ${source}: ${msg}"; + + # Realistic error lines that should trigger alerts. + transcodeCrash = + mkLogLine "2026-05-12 14:23:01.123 +00:00" "ERR" "42" + "MediaBrowser.MediaEncoding.Transcoding.TranscodeManager" + "FFmpeg exited with code 137 after 45.2 seconds"; + + playbackError = + mkLogLine "2026-05-12 14:24:05.456 +00:00" "ERR" "17" + "Emby.Server.Implementations.Session.SessionManager" + "Playback error for user \"alice\" on device \"Living Room TV\""; + + exceptionMiddleware = + mkLogLine "2026-05-12 14:25:10.789 +00:00" "ERR" "99" + "Jellyfin.Server.Middleware.ExceptionMiddleware" + ''Error processing request. URL "GET" "/Videos/a1b2c3d4-e5f6-7890-abcd-ef1234567890/stream".''; + + streamAbort = + mkLogLine "2026-05-12 14:26:00.111 +00:00" "ERR" "33" + "MediaBrowser.Api.Playback.Hls.DynamicHlsService" + "Cannot open HLS stream segment"; + + # Lines that should NOT trigger alerts. + authDenied = + mkLogLine "2026-05-12 14:27:00.222 +00:00" "ERR" "12" + "Jellyfin.Server.Implementations.Users.UserManager" + ''Authentication request for "bob" has been denied (IP: "10.0.0.5").''; + + libraryScanError = + mkLogLine "2026-05-12 14:28:00.333 +00:00" "ERR" "55" "MediaBrowser.Controller.Entities.BaseItem" + "Error refreshing item metadata for /library/some-broken-file.mkv"; + + # Below ERR level – never triggers. + warnLine = + mkLogLine "2026-05-12 14:29:00.444 +00:00" "WRN" "77" + "MediaBrowser.MediaEncoding.Transcoding.TranscodeManager" + "Slow transcoding detected (0.95x realtime)"; + + infoLine = + mkLogLine "2026-05-12 14:30:00.555 +00:00" "INF" "88" + "Jellyfin.Server.Middleware.ExceptionMiddleware" + "This is informational and should not alert"; + + # Log file contents for each scenario. + logWithFailures = pkgs.writeText "jellyfin-failure-log.log" '' + ${authDenied} + ${libraryScanError} + ${warnLine} + ${infoLine} + ${transcodeCrash} + ${playbackError} + ${exceptionMiddleware} + ${streamAbort} + ''; + + logWithDedup = pkgs.writeText "jellyfin-dedup-log.log" '' + ${transcodeCrash} + ${transcodeCrash} + ${transcodeCrash} + ''; + + logNoFailures = pkgs.writeText "jellyfin-clean-log.log" '' + ${authDenied} + ${libraryScanError} + ${warnLine} + ${infoLine} + ''; +in +pkgs.testers.runNixOSTest { + name = "jellyfin-failure-alert"; + + nodes.machine = + { pkgs, ... }: + { + imports = [ + ../modules/ntfy-alerts.nix + ../services/jellyfin/jellyfin-failure-alert.nix + ]; + + system.stateVersion = config.system.stateVersion; + + virtualisation.memorySize = 2048; + + environment.systemPackages = with pkgs; [ + curl + jq + ]; + # Minimal jellyfin config so the guard passes. Jellyfin 10.11+ + # requires 2 GiB free space, so give the VM a 4 GiB disk. + virtualisation.diskSize = 4096; + + services.jellyfin = { + enable = true; + dataDir = "/var/lib/jellyfin-test"; + cacheDir = "/var/cache/jellyfin-test"; + user = "jellyfin"; + group = "jellyfin"; + }; + + # Jellyfin base dirs + systemd.tmpfiles.rules = [ + "d /var/lib/jellyfin-test 0755 jellyfin jellyfin" + "d /var/lib/jellyfin-test/log 0755 jellyfin jellyfin" + "f /run/ntfy-test-topic 0644 root root - test-alerts" + "f /run/ntfy-test-token 0644 root root - test-token-value" + ]; + + # Mock ntfy server + systemd.services.mock-ntfy = { + description = "Mock ntfy server for jellyfin-failure-alert test"; + wantedBy = [ "multi-user.target" ]; + before = [ "jellyfin-failure-alert.service" ]; + serviceConfig = { + ExecStart = "${pkgs.python3}/bin/python3 ${mockNtfyScript}"; + Type = "simple"; + }; + }; + + # Configure ntfy-alerts to use mock server + services.ntfyAlerts = { + enable = true; + serverUrl = "http://localhost:8080"; + topicFile = "/run/ntfy-test-topic"; + tokenFile = "/run/ntfy-test-token"; + }; + + # Speed up polling for the test + systemd.services.jellyfin-failure-alert.environment.POLL_INTERVAL = lib.mkForce "2"; + systemd.services.jellyfin-failure-alert.environment.DEDUP_WINDOW = lib.mkForce "10"; + }; + testScript = '' + import json + import time + + LOG_DIR = "/var/lib/jellyfin-test/log" + REQUESTS_FILE = "/tmp/ntfy-requests.json" + + start_all() + + # Wait for mock ntfy server + machine.wait_for_unit("mock-ntfy.service") + machine.wait_until_succeeds("curl -sf http://localhost:8080/", timeout=30) + + # ------------------------------------------------------------------ + # Phase 1: Client failures trigger alerts + # ------------------------------------------------------------------ + + with subtest("Client failure log lines trigger ntfy notifications"): + # Place a log file with known failure lines + machine.succeed( + "cp ${logWithFailures} {}/log_test.log && chown jellyfin:jellyfin {}/log_test.log".format( + LOG_DIR, LOG_DIR + ) + ) + + # Start the monitor + machine.succeed("systemctl start jellyfin-failure-alert.service") + + # Wait for the monitor to poll and send notifications. + # Should pick up: transcodeCrash, playbackError, exceptionMiddleware, streamAbort + machine.wait_until_succeeds( + "test -f {} && test $(jq 'length' {}) -ge 4".format(REQUESTS_FILE, REQUESTS_FILE), + timeout=30, + ) + + result = machine.succeed("cat {}".format(REQUESTS_FILE)) + requests = json.loads(result) + print(f"Phase 1: received {len(requests)} ntfy notifications") + + assert len(requests) >= 4, f"Expected >= 4 notifications, got {len(requests)}" + + # Verify each notification has the expected shape + for req in requests: + assert "/test-alerts" in req["path"], f"Wrong topic path: {req['path']}" + assert "Title" in req["headers"], "Missing Title header" + assert "Jellyfin" in req["headers"]["Title"], ( + f"Title should mention Jellyfin: {req['headers']['Title']}" + ) + assert req["headers"]["Priority"] == "high", ( + f"Expected Priority 'high', got {req['headers'].get('Priority')}" + ) + assert req["headers"]["Tags"] == "warning", ( + f"Expected Tags 'warning', got {req['headers'].get('Tags')}" + ) + assert req["headers"]["Authorization"] == "Bearer test-token-value", ( + f"Missing or wrong Authorization header: {req['headers'].get('Authorization')}" + ) + assert len(req["body"]) > 0, "Notification body is empty" + + # Verify specific error content appears in bodies + bodies = " ".join(r["body"] for r in requests) + assert "FFmpeg" in bodies, "Missing FFmpeg error in notifications" + assert "Playback error" in bodies, "Missing playback error in notifications" + assert "ExceptionMiddleware" in bodies or "/Videos/" in bodies, ( + "Missing exception middleware error in notifications" + ) + + print("Phase 1 passed: all client failures triggered alerts") + + # ------------------------------------------------------------------ + # Phase 2: Non-client errors are filtered out + # ------------------------------------------------------------------ + + with subtest("Non-client errors do not trigger alerts"): + # Clear previous requests and add a new log file with only non-client errors + machine.succeed("rm -f {}".format(REQUESTS_FILE)) + machine.succeed( + "cp ${logNoFailures} {}/log_clean.log && chown jellyfin:jellyfin {}/log_clean.log".format( + LOG_DIR, LOG_DIR + ) + ) + + # Wait enough poll cycles (at least 8s = 4 cycles at POLL_INTERVAL=2) + time.sleep(8) + + # Assert no requests file was created. If it exists, it must be empty + # (any non-empty result means a non-client error leaked through). + rc, _ = machine.execute("test -f {}".format(REQUESTS_FILE)) + if rc == 0: + machine.succeed("test $(jq 'length' {}) -eq 0".format(REQUESTS_FILE)) + + print("Phase 2 passed: non-client errors correctly filtered") + + # ------------------------------------------------------------------ + # Phase 3: Deduplication + # ------------------------------------------------------------------ + + with subtest("Duplicate errors are deduplicated within the window"): + # Clear and add a log file with the same error repeated + machine.succeed("rm -f {}".format(REQUESTS_FILE)) + machine.succeed( + "cp ${logWithDedup} {}/log_dedup.log && chown jellyfin:jellyfin {}/log_dedup.log".format( + LOG_DIR, LOG_DIR + ) + ) + + # Wait for the monitor to process. Should get exactly 1 notification. + machine.wait_until_succeeds( + "test -f {} && test $(jq 'length' {}) -eq 1".format(REQUESTS_FILE, REQUESTS_FILE), + timeout=30, + ) + + result = machine.succeed("cat {}".format(REQUESTS_FILE)) + requests = json.loads(result) + print(f"Phase 3: received {len(requests)} notifications (expected 1 for dedup)") + + assert len(requests) == 1, ( + f"Expected exactly 1 notification for 3 identical errors, got {len(requests)}" + ) + assert "FFmpeg" in requests[0]["body"], "Missing FFmpeg error in dedup notification" + + print("Phase 3 passed: deduplication works correctly") + + # ------------------------------------------------------------------ + # Phase 4: Re-alert after dedup window expires + # ------------------------------------------------------------------ + + with subtest("Same error re-alerts after dedup window expires"): + # Wait for dedup window to expire (10s) + time.sleep(12) + + # Write the same error again in a new log file + machine.succeed( + "cp ${logWithDedup} {}/log_dedup2.log && chown jellyfin:jellyfin {}/log_dedup2.log".format( + LOG_DIR, LOG_DIR + ) + ) + + # Wait for the monitor to pick it up. Should now have 2 entries. + machine.wait_until_succeeds( + "test -f {} && test $(jq 'length' {}) -eq 2".format(REQUESTS_FILE, REQUESTS_FILE), + timeout=30, + ) + + result = machine.succeed("cat {}".format(REQUESTS_FILE)) + requests = json.loads(result) + print(f"Phase 4: received {len(requests)} notifications (expected 2)") + + assert len(requests) == 2, ( + f"Expected 2 notifications after dedup window expired, got {len(requests)}" + ) + + print("Phase 4 passed: re-alert after dedup window works") + + print("All jellyfin-failure-alert tests passed!") + ''; +} diff --git a/tests/tests.nix b/tests/tests.nix index e2d9e40..e00f120 100644 --- a/tests/tests.nix +++ b/tests/tests.nix @@ -27,6 +27,8 @@ in # jellyfin annotation service test jellyfinAnnotationsTest = handleTest ./jellyfin-annotations.nix; + # jellyfin failure alert test + jellyfinFailureAlertTest = handleTest ./jellyfin-failure-alert.nix; # zfs scrub annotations test zfsScrubAnnotationsTest = handleTest ./zfs-scrub-annotations.nix;