352 lines
12 KiB
Nix
352 lines
12 KiB
Nix
{
|
||
config,
|
||
lib,
|
||
pkgs,
|
||
...
|
||
}:
|
||
let
|
||
|
||
# Mock ntfy server script that records POST requests to a JSON log.
|
||
mockNtfyScript = pkgs.writeScript "mock-ntfy.py" ''
|
||
import json
|
||
import os
|
||
from http.server import HTTPServer, BaseHTTPRequestHandler
|
||
from datetime import datetime
|
||
|
||
REQUESTS_FILE = "/tmp/ntfy-requests.json"
|
||
|
||
class MockNtfy(BaseHTTPRequestHandler):
|
||
def _respond(self, code=200, body=b"Ok"):
|
||
self.send_response(code)
|
||
self.send_header("Content-Type", "application/json")
|
||
self.end_headers()
|
||
self.wfile.write(body if isinstance(body, bytes) else body.encode())
|
||
|
||
def do_GET(self):
|
||
self._respond()
|
||
|
||
def do_POST(self):
|
||
content_length = int(self.headers.get("Content-Length", 0))
|
||
body = self.rfile.read(content_length).decode() if content_length > 0 else ""
|
||
|
||
request_data = {
|
||
"timestamp": datetime.now().isoformat(),
|
||
"path": self.path,
|
||
"headers": dict(self.headers),
|
||
"body": body,
|
||
}
|
||
|
||
requests = []
|
||
if os.path.exists(REQUESTS_FILE):
|
||
try:
|
||
with open(REQUESTS_FILE, "r") as f:
|
||
requests = json.load(f)
|
||
except:
|
||
requests = []
|
||
|
||
requests.append(request_data)
|
||
|
||
with open(REQUESTS_FILE, "w") as f:
|
||
json.dump(requests, f, indent=2)
|
||
|
||
self._respond()
|
||
|
||
def log_message(self, format, *args):
|
||
pass
|
||
|
||
HTTPServer(("0.0.0.0", 8080), MockNtfy).serve_forever()
|
||
'';
|
||
|
||
# Jellyfin log lines used in the test.
|
||
# Jellyfin log format: [YYYY-MM-DD HH:MM:SS.mmm +TZ] [LEVEL] [thread] Source: message
|
||
mkLogLine =
|
||
ts: level: thread: source: msg:
|
||
"[${ts}] [${level}] [${thread}] ${source}: ${msg}";
|
||
|
||
# Realistic error lines that should trigger alerts.
|
||
transcodeCrash =
|
||
mkLogLine "2026-05-12 14:23:01.123 +00:00" "ERR" "42"
|
||
"MediaBrowser.MediaEncoding.Transcoding.TranscodeManager"
|
||
"FFmpeg exited with code 137 after 45.2 seconds";
|
||
|
||
playbackError =
|
||
mkLogLine "2026-05-12 14:24:05.456 +00:00" "ERR" "17"
|
||
"Emby.Server.Implementations.Session.SessionManager"
|
||
"Playback error for user \"alice\" on device \"Living Room TV\"";
|
||
|
||
exceptionMiddleware =
|
||
mkLogLine "2026-05-12 14:25:10.789 +00:00" "ERR" "99"
|
||
"Jellyfin.Server.Middleware.ExceptionMiddleware"
|
||
''Error processing request. URL "GET" "/Videos/a1b2c3d4-e5f6-7890-abcd-ef1234567890/stream".'';
|
||
|
||
streamAbort =
|
||
mkLogLine "2026-05-12 14:26:00.111 +00:00" "ERR" "33"
|
||
"MediaBrowser.Api.Playback.Hls.DynamicHlsService"
|
||
"Cannot open HLS stream segment";
|
||
|
||
# Lines that should NOT trigger alerts.
|
||
authDenied =
|
||
mkLogLine "2026-05-12 14:27:00.222 +00:00" "ERR" "12"
|
||
"Jellyfin.Server.Implementations.Users.UserManager"
|
||
''Authentication request for "bob" has been denied (IP: "10.0.0.5").'';
|
||
|
||
libraryScanError =
|
||
mkLogLine "2026-05-12 14:28:00.333 +00:00" "ERR" "55" "MediaBrowser.Controller.Entities.BaseItem"
|
||
"Error refreshing item metadata for /library/some-broken-file.mkv";
|
||
|
||
# Below ERR level – never triggers.
|
||
warnLine =
|
||
mkLogLine "2026-05-12 14:29:00.444 +00:00" "WRN" "77"
|
||
"MediaBrowser.MediaEncoding.Transcoding.TranscodeManager"
|
||
"Slow transcoding detected (0.95x realtime)";
|
||
|
||
infoLine =
|
||
mkLogLine "2026-05-12 14:30:00.555 +00:00" "INF" "88"
|
||
"Jellyfin.Server.Middleware.ExceptionMiddleware"
|
||
"This is informational and should not alert";
|
||
|
||
# Log file contents for each scenario.
|
||
logWithFailures = pkgs.writeText "jellyfin-failure-log.log" ''
|
||
${authDenied}
|
||
${libraryScanError}
|
||
${warnLine}
|
||
${infoLine}
|
||
${transcodeCrash}
|
||
${playbackError}
|
||
${exceptionMiddleware}
|
||
${streamAbort}
|
||
'';
|
||
|
||
logWithDedup = pkgs.writeText "jellyfin-dedup-log.log" ''
|
||
${transcodeCrash}
|
||
${transcodeCrash}
|
||
${transcodeCrash}
|
||
'';
|
||
|
||
logNoFailures = pkgs.writeText "jellyfin-clean-log.log" ''
|
||
${authDenied}
|
||
${libraryScanError}
|
||
${warnLine}
|
||
${infoLine}
|
||
'';
|
||
in
|
||
pkgs.testers.runNixOSTest {
|
||
name = "jellyfin-failure-alert";
|
||
|
||
nodes.machine =
|
||
{ pkgs, ... }:
|
||
{
|
||
imports = [
|
||
../modules/ntfy-alerts.nix
|
||
../services/jellyfin/jellyfin-failure-alert.nix
|
||
];
|
||
|
||
system.stateVersion = config.system.stateVersion;
|
||
|
||
virtualisation.memorySize = 2048;
|
||
|
||
environment.systemPackages = with pkgs; [
|
||
curl
|
||
jq
|
||
];
|
||
# Minimal jellyfin config so the guard passes. Jellyfin 10.11+
|
||
# requires 2 GiB free space, so give the VM a 4 GiB disk.
|
||
virtualisation.diskSize = 4096;
|
||
|
||
services.jellyfin = {
|
||
enable = true;
|
||
dataDir = "/var/lib/jellyfin-test";
|
||
cacheDir = "/var/cache/jellyfin-test";
|
||
user = "jellyfin";
|
||
group = "jellyfin";
|
||
};
|
||
|
||
# Jellyfin base dirs
|
||
systemd.tmpfiles.rules = [
|
||
"d /var/lib/jellyfin-test 0755 jellyfin jellyfin"
|
||
"d /var/lib/jellyfin-test/log 0755 jellyfin jellyfin"
|
||
"f /run/ntfy-test-topic 0644 root root - test-alerts"
|
||
"f /run/ntfy-test-token 0644 root root - test-token-value"
|
||
];
|
||
|
||
# Mock ntfy server
|
||
systemd.services.mock-ntfy = {
|
||
description = "Mock ntfy server for jellyfin-failure-alert test";
|
||
wantedBy = [ "multi-user.target" ];
|
||
before = [ "jellyfin-failure-alert.service" ];
|
||
serviceConfig = {
|
||
ExecStart = "${pkgs.python3}/bin/python3 ${mockNtfyScript}";
|
||
Type = "simple";
|
||
};
|
||
};
|
||
|
||
# Configure ntfy-alerts to use mock server
|
||
services.ntfyAlerts = {
|
||
enable = true;
|
||
serverUrl = "http://localhost:8080";
|
||
topicFile = "/run/ntfy-test-topic";
|
||
tokenFile = "/run/ntfy-test-token";
|
||
};
|
||
|
||
# Speed up polling for the test
|
||
systemd.services.jellyfin-failure-alert.environment.POLL_INTERVAL = lib.mkForce "2";
|
||
systemd.services.jellyfin-failure-alert.environment.DEDUP_WINDOW = lib.mkForce "10";
|
||
};
|
||
testScript = ''
|
||
import json
|
||
import time
|
||
|
||
LOG_DIR = "/var/lib/jellyfin-test/log"
|
||
REQUESTS_FILE = "/tmp/ntfy-requests.json"
|
||
|
||
start_all()
|
||
|
||
# Wait for mock ntfy server
|
||
machine.wait_for_unit("mock-ntfy.service")
|
||
machine.wait_until_succeeds("curl -sf http://localhost:8080/", timeout=30)
|
||
|
||
# ------------------------------------------------------------------
|
||
# Phase 1: Client failures trigger alerts
|
||
# ------------------------------------------------------------------
|
||
|
||
with subtest("Client failure log lines trigger ntfy notifications"):
|
||
# Place a log file with known failure lines
|
||
machine.succeed(
|
||
"cp ${logWithFailures} {}/log_test.log && chown jellyfin:jellyfin {}/log_test.log".format(
|
||
LOG_DIR, LOG_DIR
|
||
)
|
||
)
|
||
|
||
# Start the monitor
|
||
machine.succeed("systemctl start jellyfin-failure-alert.service")
|
||
|
||
# Wait for the monitor to poll and send notifications.
|
||
# Should pick up: transcodeCrash, playbackError, exceptionMiddleware, streamAbort
|
||
machine.wait_until_succeeds(
|
||
"test -f {} && test $(jq 'length' {}) -ge 4".format(REQUESTS_FILE, REQUESTS_FILE),
|
||
timeout=30,
|
||
)
|
||
|
||
result = machine.succeed("cat {}".format(REQUESTS_FILE))
|
||
requests = json.loads(result)
|
||
print(f"Phase 1: received {len(requests)} ntfy notifications")
|
||
|
||
assert len(requests) >= 4, f"Expected >= 4 notifications, got {len(requests)}"
|
||
|
||
# Verify each notification has the expected shape
|
||
for req in requests:
|
||
assert "/test-alerts" in req["path"], f"Wrong topic path: {req['path']}"
|
||
assert "Title" in req["headers"], "Missing Title header"
|
||
assert "Jellyfin" in req["headers"]["Title"], (
|
||
f"Title should mention Jellyfin: {req['headers']['Title']}"
|
||
)
|
||
assert req["headers"]["Priority"] == "high", (
|
||
f"Expected Priority 'high', got {req['headers'].get('Priority')}"
|
||
)
|
||
assert req["headers"]["Tags"] == "warning", (
|
||
f"Expected Tags 'warning', got {req['headers'].get('Tags')}"
|
||
)
|
||
assert req["headers"]["Authorization"] == "Bearer test-token-value", (
|
||
f"Missing or wrong Authorization header: {req['headers'].get('Authorization')}"
|
||
)
|
||
assert len(req["body"]) > 0, "Notification body is empty"
|
||
|
||
# Verify specific error content appears in bodies
|
||
bodies = " ".join(r["body"] for r in requests)
|
||
assert "FFmpeg" in bodies, "Missing FFmpeg error in notifications"
|
||
assert "Playback error" in bodies, "Missing playback error in notifications"
|
||
assert "ExceptionMiddleware" in bodies or "/Videos/" in bodies, (
|
||
"Missing exception middleware error in notifications"
|
||
)
|
||
|
||
print("Phase 1 passed: all client failures triggered alerts")
|
||
|
||
# ------------------------------------------------------------------
|
||
# Phase 2: Non-client errors are filtered out
|
||
# ------------------------------------------------------------------
|
||
|
||
with subtest("Non-client errors do not trigger alerts"):
|
||
# Clear previous requests and add a new log file with only non-client errors
|
||
machine.succeed("rm -f {}".format(REQUESTS_FILE))
|
||
machine.succeed(
|
||
"cp ${logNoFailures} {}/log_clean.log && chown jellyfin:jellyfin {}/log_clean.log".format(
|
||
LOG_DIR, LOG_DIR
|
||
)
|
||
)
|
||
|
||
# Wait enough poll cycles (at least 8s = 4 cycles at POLL_INTERVAL=2)
|
||
time.sleep(8)
|
||
|
||
# Assert no requests file was created. If it exists, it must be empty
|
||
# (any non-empty result means a non-client error leaked through).
|
||
rc, _ = machine.execute("test -f {}".format(REQUESTS_FILE))
|
||
if rc == 0:
|
||
machine.succeed("test $(jq 'length' {}) -eq 0".format(REQUESTS_FILE))
|
||
|
||
print("Phase 2 passed: non-client errors correctly filtered")
|
||
|
||
# ------------------------------------------------------------------
|
||
# Phase 3: Deduplication
|
||
# ------------------------------------------------------------------
|
||
|
||
with subtest("Duplicate errors are deduplicated within the window"):
|
||
# Clear and add a log file with the same error repeated
|
||
machine.succeed("rm -f {}".format(REQUESTS_FILE))
|
||
machine.succeed(
|
||
"cp ${logWithDedup} {}/log_dedup.log && chown jellyfin:jellyfin {}/log_dedup.log".format(
|
||
LOG_DIR, LOG_DIR
|
||
)
|
||
)
|
||
|
||
# Wait for the monitor to process. Should get exactly 1 notification.
|
||
machine.wait_until_succeeds(
|
||
"test -f {} && test $(jq 'length' {}) -eq 1".format(REQUESTS_FILE, REQUESTS_FILE),
|
||
timeout=30,
|
||
)
|
||
|
||
result = machine.succeed("cat {}".format(REQUESTS_FILE))
|
||
requests = json.loads(result)
|
||
print(f"Phase 3: received {len(requests)} notifications (expected 1 for dedup)")
|
||
|
||
assert len(requests) == 1, (
|
||
f"Expected exactly 1 notification for 3 identical errors, got {len(requests)}"
|
||
)
|
||
assert "FFmpeg" in requests[0]["body"], "Missing FFmpeg error in dedup notification"
|
||
|
||
print("Phase 3 passed: deduplication works correctly")
|
||
|
||
# ------------------------------------------------------------------
|
||
# Phase 4: Re-alert after dedup window expires
|
||
# ------------------------------------------------------------------
|
||
|
||
with subtest("Same error re-alerts after dedup window expires"):
|
||
# Wait for dedup window to expire (10s)
|
||
time.sleep(12)
|
||
|
||
# Write the same error again in a new log file
|
||
machine.succeed(
|
||
"cp ${logWithDedup} {}/log_dedup2.log && chown jellyfin:jellyfin {}/log_dedup2.log".format(
|
||
LOG_DIR, LOG_DIR
|
||
)
|
||
)
|
||
|
||
# Wait for the monitor to pick it up. Should now have 2 entries.
|
||
machine.wait_until_succeeds(
|
||
"test -f {} && test $(jq 'length' {}) -eq 2".format(REQUESTS_FILE, REQUESTS_FILE),
|
||
timeout=30,
|
||
)
|
||
|
||
result = machine.succeed("cat {}".format(REQUESTS_FILE))
|
||
requests = json.loads(result)
|
||
print(f"Phase 4: received {len(requests)} notifications (expected 2)")
|
||
|
||
assert len(requests) == 2, (
|
||
f"Expected 2 notifications after dedup window expired, got {len(requests)}"
|
||
)
|
||
|
||
print("Phase 4 passed: re-alert after dedup window works")
|
||
|
||
print("All jellyfin-failure-alert tests passed!")
|
||
'';
|
||
}
|