From aef99e7365152e2715f5d69c5fe38a57a9c01972 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Wed, 22 Apr 2026 00:36:21 -0400 Subject: [PATCH] deploy-guard: block activation while users are online - modules/server-deploy-guard.nix: extendable aggregator registered via services.deployGuard.checks..{description,command}. Installs deploy-guard-check with per-check timeout, pass/block reporting, JSON output, DEPLOY_GUARD_BYPASS / /run/deploy-guard-bypass (single-shot). - services/jellyfin/jellyfin-deploy-guard.nix: curl+jq on /Sessions, blocks when any session carries NowPlayingItem; soft-fails when unreachable. - services/minecraft-deploy-guard.nix: mcstatus SLP query on 25565, blocks when players.online > 0; soft-fails when unreachable. - flake.nix: wrap deploy.nodes.muffin activation with activate.custom so deploy-guard-check runs before switch-to-configuration. Auto-rollback catches the failure. dryActivate/boot branches preserved. - deploy.sh: SSH preflight for ./deploy.sh muffin with --force / DEPLOY_GUARD_FORCE=1 (touches remote bypass marker). Connectivity failure is soft; activation still enforces. - tests/deploy-guard.nix: aggregator contract, bypass mechanics, timeout, JSON output. --- AGENTS.md | 33 ++++ deploy.sh | 46 ++++- flake.nix | 32 +++- hosts/muffin/default.nix | 3 + modules/server-deploy-guard.nix | 173 +++++++++++++++++++ services/jellyfin/default.nix | 1 + services/jellyfin/jellyfin-deploy-guard.nix | 78 +++++++++ services/minecraft-deploy-guard.nix | 67 ++++++++ services/minecraft.nix | 1 + tests/deploy-guard.nix | 175 ++++++++++++++++++++ tests/tests.nix | 1 + 11 files changed, 603 insertions(+), 7 deletions(-) create mode 100644 modules/server-deploy-guard.nix create mode 100644 services/jellyfin/jellyfin-deploy-guard.nix create mode 100644 services/minecraft-deploy-guard.nix create mode 100644 tests/deploy-guard.nix diff --git a/AGENTS.md b/AGENTS.md index f5e29b5..aaf6b55 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -156,6 +156,39 @@ Hard requirements that are asserted at eval time: - **Hugepages**: services that need 2 MiB hugepages declare their budget in `service-configs.nix` under `hugepages_2m.services`. The `vm.nr_hugepages` sysctl is derived from the total. - **PostgreSQL-first**: any service that supports PostgreSQL uses it (via peer-auth Unix socket when possible). Per-service Sqlite (or similar) is not liked. +## Deploy guard (muffin) + +`modules/server-deploy-guard.nix` blocks `./deploy.sh muffin` / deploy-rs activation when a service it covers is in active use. Two paths enforce it: + +- **Preflight**: `./deploy.sh muffin` SSHes to `server-public` and runs `deploy-guard-check` before the build. Connectivity failure is soft (activation still enforces). `./deploy.sh muffin --force` or `DEPLOY_GUARD_FORCE=1 ./deploy.sh muffin` touches `/run/deploy-guard-bypass` remotely (single-shot) and skips the preflight. +- **Activation**: the custom `activate.custom` wrapper in `flake.nix` runs `$PROFILE/sw/bin/deploy-guard-check` before `switch-to-configuration switch`. A non-zero exit triggers deploy-rs auto-rollback. Same bypass: `DEPLOY_GUARD_BYPASS=1` env or pre-touched `/run/deploy-guard-bypass`. + +### Adding a new check + +In the service's own file (or a sibling `-deploy-guard.nix`): + +```nix +{ config, lib, pkgs, ... }: +let + check = pkgs.writeShellApplication { + name = "deploy-guard-check-"; + runtimeInputs = [ /* curl, jq, etc. */ ]; + text = '' + # exit 0 when the service is idle / unreachable (soft-fail) + # exit 1 with a reason on stdout/stderr when live users would be disrupted + ''; + }; +in +lib.mkIf config.services..enable { + services.deployGuard.checks. = { + description = "Active users"; + command = check; + }; +} +``` + +Existing registrations live in `services/jellyfin/jellyfin-deploy-guard.nix` (REST `/Sessions` via curl+jq) and `services/minecraft-deploy-guard.nix` (Server List Ping via `mcstatus`). Prefer soft-fail on unreachable — a service that's already down has no users to disrupt. + ## Technical details - **Privilege escalation**: `doas` everywhere; `sudo` is disabled on every host. diff --git a/deploy.sh b/deploy.sh index 40dc5d7..3e6d27a 100755 --- a/deploy.sh +++ b/deploy.sh @@ -2,14 +2,17 @@ # Wrapper around nixos-rebuild and deploy-rs for the three hosts. # # Usage: -# ./deploy.sh # nixos-rebuild boot on current host (mreow/yarn) -# ./deploy.sh switch # apply immediately on current host -# ./deploy.sh test # apply without adding boot entry -# ./deploy.sh build # build only, no activation -# ./deploy.sh muffin # build + deploy to muffin via deploy-rs +# ./deploy.sh # nixos-rebuild boot on current host (mreow/yarn) +# ./deploy.sh switch # apply immediately on current host +# ./deploy.sh test # apply without adding boot entry +# ./deploy.sh build # build only, no activation +# ./deploy.sh muffin # build + deploy to muffin via deploy-rs +# ./deploy.sh muffin --force # bypass the deploy guard (active-user check) # # muffin cannot be rebuilt locally from another host — this script only issues # the remote deploy via deploy-rs when explicitly named. +# +# DEPLOY_GUARD_FORCE=1 is equivalent to passing --force. set -eu @@ -18,13 +21,44 @@ arg="${1:-boot}" case "$arg" in muffin) + shift # consume "muffin" + + force=0 + if [ "${DEPLOY_GUARD_FORCE:-0}" = "1" ]; then force=1; fi + if [ "${1:-}" = "--force" ]; then force=1; shift; fi + + if [ "$force" = "1" ]; then + echo "deploy-guard: bypass requested; setting remote marker" + ssh -o BatchMode=yes -o ConnectTimeout=3 root@server-public \ + 'touch /run/deploy-guard-bypass' \ + || echo "deploy-guard: warning: could not write remote bypass marker" >&2 + else + # Single SSH probe — if exit 255 it's a connectivity failure (skip + # preflight; the activation-time guard still enforces). Any other + # non-zero is the guard blocking the deploy. + output=$(ssh -o BatchMode=yes -o ConnectTimeout=5 \ + root@server-public deploy-guard-check 2>&1) && rc=0 || rc=$? + + if [ "$rc" -eq 255 ]; then + echo "deploy-guard: muffin unreachable for preflight;" \ + "activation will still enforce" >&2 + elif [ "$rc" -ne 0 ]; then + printf '%s\n' "$output" + echo >&2 + echo "Blocked by deploy guard. Bypass: ./deploy.sh muffin --force" >&2 + exit 1 + elif [ -n "$output" ]; then + printf '%s\n' "$output" + fi + fi + exec nix run .#deploy -- .#muffin "$@" ;; boot | switch | test | build) exec nixos-rebuild "$arg" --flake ".#$host" --use-remote-sudo ;; *) - echo "usage: $0 [muffin | boot | switch | test | build]" >&2 + echo "usage: $0 [muffin [--force] | boot | switch | test | build]" >&2 exit 2 ;; esac diff --git a/flake.nix b/flake.nix index 67fb24a..1e46131 100644 --- a/flake.nix +++ b/flake.nix @@ -372,7 +372,37 @@ profiles.system = { sshUser = "root"; user = "root"; - path = deploy-rs.lib.${system}.activate.nixos self.nixosConfigurations.muffin; + # Wrap deploy-rs.activate.nixos so the guard runs before + # switch-to-configuration. If the guard exits non-zero, deploy-rs's + # auto-rollback restores the previous profile. Bypass via + # DEPLOY_GUARD_BYPASS=1 or by pre-touching /run/deploy-guard-bypass. + path = + let + base = self.nixosConfigurations.muffin; + activate = deploy-rs.lib.${system}.activate; + bootloaderDefaultCleanup = nixpkgs-stable.lib.optionalString base.config.boot.loader.systemd-boot.enable "sed -i '/^default /d' ${base.config.boot.loader.efi.efiSysMountPoint}/loader/loader.conf"; + in + ( + activate.custom + // { + dryActivate = "$PROFILE/bin/switch-to-configuration dry-activate"; + boot = "$PROFILE/bin/switch-to-configuration boot"; + } + ) + base.config.system.build.toplevel + '' + # work around https://github.com/NixOS/nixpkgs/issues/73404 + cd /tmp + + # Halt deploys while users are actively using services. + # See modules/server-deploy-guard.nix. + "$PROFILE/sw/bin/deploy-guard-check" + + $PROFILE/bin/switch-to-configuration switch + + # https://github.com/serokell/deploy-rs/issues/31 + ${bootloaderDefaultCleanup} + ''; }; }; diff --git a/hosts/muffin/default.nix b/hosts/muffin/default.nix index ed831d4..652ef45 100644 --- a/hosts/muffin/default.nix +++ b/hosts/muffin/default.nix @@ -27,6 +27,7 @@ ../../modules/server-security.nix ../../modules/ntfy-alerts.nix ../../modules/server-power.nix + ../../modules/server-deploy-guard.nix ../../services/postgresql.nix ../../services/jellyfin @@ -92,6 +93,8 @@ git.gardling.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIFMjgaMnE+zS7tL+m5E7gh9Q9U1zurLdmU0qcmEmaucu ''; + services.deployGuard.enable = true; + services.kmscon.enable = true; # Disable serial getty on ttyS0 to prevent dmesg warnings diff --git a/modules/server-deploy-guard.nix b/modules/server-deploy-guard.nix new file mode 100644 index 0000000..9a0c290 --- /dev/null +++ b/modules/server-deploy-guard.nix @@ -0,0 +1,173 @@ +# Extendable deploy guard. Aggregates per-service "is it safe to deploy right now?" +# checks registered under `services.deployGuard.checks.` and exposes a single +# `deploy-guard-check` binary that deploy-rs (and the local deploy.sh preflight) +# invokes before switch-to-configuration. +# +# Extension contract (per-service): register a submodule entry whose `command` +# package installs a single executable (via meta.mainProgram) that exits 0 when +# the service is idle and non-zero when a live user would be disrupted by a +# deploy. Human-readable reasons go to stdout/stderr. +# +# Bypass: export DEPLOY_GUARD_BYPASS=1 or touch /run/deploy-guard-bypass before +# invoking the aggregator. The marker file is single-shot; the aggregator +# removes it after honoring it. +{ + config, + lib, + pkgs, + ... +}: +let + cfg = config.services.deployGuard; + + # attrNames returns alphabetical order in Nix; rely on that for stable output. + checkNames = builtins.attrNames cfg.checks; + + runCheckLines = lib.concatMapStringsSep "\n" (name: '' + run_check ${lib.escapeShellArg name} \ + ${lib.escapeShellArg cfg.checks.${name}.description} \ + ${lib.getExe cfg.checks.${name}.command} + '') checkNames; + + aggregator = pkgs.writeShellApplication { + name = "deploy-guard-check"; + runtimeInputs = [ + pkgs.coreutils + pkgs.jq + ]; + text = '' + json_mode=0 + if [[ "''${1:-}" == "--json" ]]; then + json_mode=1 + fi + + # Bypass precedence: env var first (useful in ad-hoc SSH one-liners), + # then file marker (written by `./deploy.sh muffin --force` over SSH). + bypass=0 + bypass_reason="" + if [[ "''${DEPLOY_GUARD_BYPASS:-0}" == "1" ]]; then + bypass=1 + bypass_reason="DEPLOY_GUARD_BYPASS=1" + elif [[ -e /run/deploy-guard-bypass ]]; then + bypass=1 + bypass_reason="/run/deploy-guard-bypass" + rm -f /run/deploy-guard-bypass + fi + + if [[ "$bypass" == "1" ]]; then + if [[ "$json_mode" == "1" ]]; then + jq -cn --arg reason "$bypass_reason" \ + '{bypassed:true, reason:$reason, ok:true, checks:[]}' + else + printf 'deploy-guard: BYPASS via %s — no checks executed\n' "$bypass_reason" >&2 + fi + exit 0 + fi + + declare -a results=() + overall=0 + + run_check() { + local name="$1" description="$2" exe="$3" + local status=0 output="" + # shellcheck disable=SC2034 + output=$(timeout --signal=TERM ${toString cfg.timeout} "$exe" 2>&1) || status=$? + + if [[ $status -eq 0 ]]; then + results+=("$(jq -cn \ + --arg name "$name" \ + --arg description "$description" \ + --arg output "$output" \ + '{name:$name, description:$description, ok:true, output:$output}')") + [[ "$json_mode" == "1" ]] || printf 'PASS: %s — %s\n' "$name" "$description" + else + overall=1 + results+=("$(jq -cn \ + --arg name "$name" \ + --arg description "$description" \ + --arg output "$output" \ + --argjson exit "$status" \ + '{name:$name, description:$description, ok:false, exit:$exit, output:$output}')") + if [[ "$json_mode" != "1" ]]; then + if [[ $status -eq 124 ]]; then + printf 'BLOCK: %s — %s — check timed out after ${toString cfg.timeout}s\n' \ + "$name" "$description" + else + printf 'BLOCK: %s — %s\n' "$name" "$description" + if [[ -n "$output" ]]; then + printf '%s\n' "$output" | sed 's/^/ /' + fi + fi + fi + fi + } + + ${runCheckLines} + + if [[ "$json_mode" == "1" ]]; then + ok=$([[ $overall -eq 0 ]] && echo true || echo false) + joined="" + for r in "''${results[@]:-}"; do + if [[ -z "$r" ]]; then continue; fi + if [[ -z "$joined" ]]; then joined="$r"; else joined="$joined,$r"; fi + done + printf '{"bypassed":false,"ok":%s,"checks":[%s]}\n' "$ok" "$joined" + fi + + exit "$overall" + ''; + }; +in +{ + options.services.deployGuard = { + enable = lib.mkEnableOption "deploy guard aggregator for blocking deploys on live use"; + + timeout = lib.mkOption { + type = lib.types.ints.positive; + default = 10; + description = "Per-check timeout in seconds."; + }; + + checks = lib.mkOption { + type = lib.types.attrsOf ( + lib.types.submodule { + options = { + description = lib.mkOption { + type = lib.types.str; + description = "Short human description shown in pass/fail output."; + }; + command = lib.mkOption { + type = lib.types.package; + description = '' + A derivation whose meta.mainProgram is the check executable. + Contract: exit 0 when deploys are safe, non-zero with a + human-readable reason on stdout/stderr when blocked. + ''; + }; + }; + } + ); + default = { }; + description = '' + Per-service deploy guard checks. Merged from anywhere in the config. + Any module can register a check — see modules/server-deploy-guard.nix + for the contract. + ''; + }; + }; + + config = lib.mkIf cfg.enable { + environment.systemPackages = [ aggregator ]; + + assertions = [ + { + assertion = cfg.checks != { }; + message = '' + services.deployGuard.enable = true but no checks are registered. + Either disable it or register at least one check via + services.deployGuard.checks.. + ''; + } + ]; + }; +} diff --git a/services/jellyfin/default.nix b/services/jellyfin/default.nix index a396984..45a46f6 100644 --- a/services/jellyfin/default.nix +++ b/services/jellyfin/default.nix @@ -2,5 +2,6 @@ imports = [ ./jellyfin.nix ./jellyfin-qbittorrent-monitor.nix + ./jellyfin-deploy-guard.nix ]; } diff --git a/services/jellyfin/jellyfin-deploy-guard.nix b/services/jellyfin/jellyfin-deploy-guard.nix new file mode 100644 index 0000000..3888bbe --- /dev/null +++ b/services/jellyfin/jellyfin-deploy-guard.nix @@ -0,0 +1,78 @@ +# Deploy guard check for Jellyfin. +# +# Contract (deploy-guard-check plug-in): +# - exit 0: Jellyfin has no active playback sessions (or is unreachable, which +# also means no users can be watching). +# - exit 1: at least one session is actively playing back media; stdout lists +# user / title / client so the operator sees who they'd disrupt. +# +# A paused session counts as "active" — the user is at the keyboard and will +# notice a restart. +{ + config, + lib, + pkgs, + service_configs, + ... +}: +let + apiKeyPath = config.age.secrets.jellyfin-api-key.path; + jellyfinPort = service_configs.ports.private.jellyfin.port; + + check = pkgs.writeShellApplication { + name = "deploy-guard-check-jellyfin"; + runtimeInputs = with pkgs; [ + curl + jq + coreutils + ]; + text = '' + api_key_path=${lib.escapeShellArg apiKeyPath} + if [[ ! -r "$api_key_path" ]]; then + echo "jellyfin: api key not readable at $api_key_path; skipping" >&2 + exit 0 + fi + + key=$(cat "$api_key_path") + + if ! resp=$(curl -sf --max-time 5 \ + -H "Authorization: MediaBrowser Token=$key" \ + "http://127.0.0.1:${toString jellyfinPort}/Sessions" 2>/dev/null); then + echo "jellyfin: unreachable; assuming safe to deploy" >&2 + exit 0 + fi + + # Parse defensively — if Jellyfin returns something we can't understand + # we prefer allowing the deploy over blocking it (the worst case is we + # restart jellyfin while nobody is watching). + if ! active=$(printf '%s' "$resp" | jq '[.[] | select(.NowPlayingItem)] | length' 2>/dev/null); then + echo "jellyfin: /Sessions response not parsable; assuming safe" >&2 + exit 0 + fi + + if [[ "$active" -eq 0 ]]; then + exit 0 + fi + + echo "Jellyfin: $active active playback session(s):" + printf '%s' "$resp" | jq -r ' + .[] + | select(.NowPlayingItem) + | " - \(.UserName // "?") \(if (.PlayState.IsPaused // false) then "paused" else "playing" end) \(.NowPlayingItem.Type // "item") \"\(.NowPlayingItem.Name // "?")\" on \(.Client // "?") / \(.DeviceName // "?")" + ' + exit 1 + ''; + }; +in +{ + imports = [ + ../../modules/server-deploy-guard.nix + ]; + + config = lib.mkIf config.services.jellyfin.enable { + services.deployGuard.checks.jellyfin = { + description = "Active Jellyfin playback sessions"; + command = check; + }; + }; +} diff --git a/services/minecraft-deploy-guard.nix b/services/minecraft-deploy-guard.nix new file mode 100644 index 0000000..3449a07 --- /dev/null +++ b/services/minecraft-deploy-guard.nix @@ -0,0 +1,67 @@ +# Deploy guard check for the Minecraft server. +# +# Queries the standard Server List Ping (SLP) handshake on the game port — +# no RCON, no query, no extra config. SLP is always enabled and returns the +# live player count plus (usually) a short name sample. +# +# Contract (deploy-guard-check plug-in): +# - exit 0: no players online, or the server isn't reachable at all (down ⇒ +# no users to disrupt). +# - exit 1: at least one player is connected; stdout lists the names that +# made it into the SLP sample. +{ + config, + lib, + pkgs, + service_configs, + ... +}: +let + minecraftPort = service_configs.ports.public.minecraft.port; + + check = + pkgs.writers.writePython3Bin "deploy-guard-check-minecraft" + { + libraries = [ pkgs.python3Packages.mcstatus ]; + flakeIgnore = [ + "E501" + "E402" + ]; + } + '' + import sys + + try: + from mcstatus import JavaServer + except ImportError as e: + print(f"minecraft: mcstatus unavailable ({e}); assuming safe", file=sys.stderr) + sys.exit(0) + + try: + status = JavaServer.lookup("127.0.0.1:${toString minecraftPort}", timeout=5).status() + except Exception as e: + print(f"minecraft: unreachable ({e}); assuming safe to deploy", file=sys.stderr) + sys.exit(0) + + online = status.players.online + if online <= 0: + sys.exit(0) + + sample = getattr(status.players, "sample", None) or [] + names = ", ".join(p.name for p in sample) or "" + print(f"Minecraft: {online} player(s) online: {names}") + sys.exit(1) + ''; +in +{ + imports = [ + ../modules/server-deploy-guard.nix + ]; + + config = lib.mkIf config.services.minecraft-servers.enable { + services.deployGuard.checks.minecraft = { + description = "Players connected to the Minecraft server"; + command = check; + }; + }; +} diff --git a/services/minecraft.nix b/services/minecraft.nix index 895d9c5..2882079 100644 --- a/services/minecraft.nix +++ b/services/minecraft.nix @@ -22,6 +22,7 @@ "z ${service_configs.minecraft.parent_dir}/${service_configs.minecraft.server_name} 710 ${config.services.minecraft-servers.user} ${config.services.minecraft-servers.group}" "z ${service_configs.minecraft.parent_dir}/${service_configs.minecraft.server_name}/squaremap 710 ${config.services.minecraft-servers.user} ${config.services.minecraft-servers.group}" ]) + ./minecraft-deploy-guard.nix ]; boot.kernel.sysctl = { diff --git a/tests/deploy-guard.nix b/tests/deploy-guard.nix new file mode 100644 index 0000000..aa35aad --- /dev/null +++ b/tests/deploy-guard.nix @@ -0,0 +1,175 @@ +# Aggregator test for modules/server-deploy-guard.nix. +# +# The jellyfin and minecraft check scripts are validated at build time by +# writeShellApplication's shellcheck / writePython3Bin's pyflakes, plus manual +# post-deploy verification on muffin. This test focuses on the aggregator and +# bypass contract with synthetic checks so failures in this file point at the +# aggregator itself rather than at Jellyfin/Minecraft availability. +{ + lib, + pkgs, + inputs, + ... +}: +let + baseServiceConfigs = import ../hosts/muffin/service-configs.nix; + testServiceConfigs = lib.recursiveUpdate baseServiceConfigs { + zpool_ssds = ""; + https.domain = "test.local"; + }; + + alwaysOk = pkgs.writeShellApplication { + name = "deploy-guard-check-synthetic-ok"; + text = ''echo "all clear"''; + }; + + alwaysFail = pkgs.writeShellApplication { + name = "deploy-guard-check-synthetic-fail"; + text = '' + echo "synthetic failure reason" + exit 1 + ''; + }; + + # Blocks only while /tmp/synth-fail exists. Lets the test script drive the + # check's state without restarting the system. + conditional = pkgs.writeShellApplication { + name = "deploy-guard-check-conditional"; + text = '' + if [[ -e /tmp/synth-fail ]]; then + echo "conditional marker present" + exit 1 + fi + echo "condition clear" + ''; + }; + + # Hangs past the aggregator's timeout only while /tmp/synth-slow exists — + # otherwise fast-path so the default state of other subtests is unaffected. + slowIfMarker = pkgs.writeShellApplication { + name = "deploy-guard-check-slow-if-marker"; + runtimeInputs = [ pkgs.coreutils ]; + text = '' + if [[ -e /tmp/synth-slow ]]; then + sleep 30 + fi + echo "fast path" + ''; + }; +in +pkgs.testers.runNixOSTest { + name = "deploy-guard"; + + node.specialArgs = { + inherit inputs lib; + service_configs = testServiceConfigs; + username = "testuser"; + }; + + nodes.machine = + { ... }: + { + imports = [ + ../modules/server-deploy-guard.nix + ]; + + environment.systemPackages = [ pkgs.jq ]; + + services.deployGuard = { + enable = true; + timeout = 2; + checks = { + always-ok = { + description = "synthetic always-pass"; + command = alwaysOk; + }; + synthetic-fail = { + description = "synthetic always-block"; + command = alwaysFail; + }; + conditional = { + description = "blocks while /tmp/synth-fail exists"; + command = conditional; + }; + slow-if-marker = { + description = "sleeps past timeout while /tmp/synth-slow exists"; + command = slowIfMarker; + }; + }; + }; + }; + + testScript = '' + import json + import time + + start_all() + machine.wait_for_unit("multi-user.target") + + with subtest("baseline: mixed pass/block aggregates to blocked"): + rc, out = machine.execute("deploy-guard-check 2>&1") + assert rc == 1, f"expected blocked (rc=1), got rc={rc}\n{out}" + assert "PASS: always-ok" in out, out + assert "PASS: conditional" in out, out + assert "PASS: slow-if-marker" in out, out + assert "BLOCK: synthetic-fail" in out, out + assert "synthetic failure reason" in out, out + + with subtest("bypass via DEPLOY_GUARD_BYPASS env"): + rc, out = machine.execute("DEPLOY_GUARD_BYPASS=1 deploy-guard-check 2>&1") + assert rc == 0, f"bypass should pass, got rc={rc}\n{out}" + assert "BYPASS" in out, out + + with subtest("bypass via /run/deploy-guard-bypass is single-shot"): + machine.succeed("touch /run/deploy-guard-bypass") + rc, out = machine.execute("deploy-guard-check 2>&1") + assert rc == 0, f"marker bypass should pass, got rc={rc}\n{out}" + machine.fail("test -e /run/deploy-guard-bypass") + rc, out = machine.execute("deploy-guard-check 2>&1") + assert rc == 1, f"marker must be single-shot, got rc={rc}\n{out}" + + with subtest("conditional check toggles on marker file"): + machine.succeed("touch /tmp/synth-fail") + rc, out = machine.execute("deploy-guard-check 2>&1") + assert rc == 1 + assert "BLOCK: conditional" in out, out + assert "conditional marker present" in out, out + machine.succeed("rm /tmp/synth-fail") + rc, out = machine.execute("deploy-guard-check 2>&1") + assert rc == 1 + assert "PASS: conditional" in out, out + + with subtest("per-check timeout kills runaway checks"): + machine.succeed("touch /tmp/synth-slow") + t0 = time.monotonic() + rc, out = machine.execute("deploy-guard-check 2>&1") + elapsed = time.monotonic() - t0 + assert rc == 1, f"expected block, got rc={rc}\n{out}" + assert "BLOCK: slow-if-marker" in out, out + assert "timed out" in out, out + # timeout=2s per check; the whole run must finish well under 10s even + # running every check serially. + assert elapsed < 10, f"aggregator took {elapsed:.1f}s — did timeout misfire?" + machine.succeed("rm /tmp/synth-slow") + + with subtest("--json output is well-formed"): + rc, out = machine.execute("DEPLOY_GUARD_BYPASS=1 deploy-guard-check --json") + data = json.loads(out.strip()) + assert data["bypassed"] is True, data + assert data["ok"] is True, data + assert data["checks"] == [], data + + rc, out = machine.execute("deploy-guard-check --json") + data = json.loads(out.strip()) + assert data["bypassed"] is False, data + assert data["ok"] is False, data + names = {c["name"]: c for c in data["checks"]} + assert set(names) == { + "always-ok", "synthetic-fail", "conditional", "slow-if-marker" + }, names + assert names["always-ok"]["ok"] is True, names + assert names["synthetic-fail"]["ok"] is False, names + assert names["synthetic-fail"]["exit"] == 1, names + assert "synthetic failure reason" in names["synthetic-fail"]["output"], names + ''; +} diff --git a/tests/tests.nix b/tests/tests.nix index 2266825..5d77b31 100644 --- a/tests/tests.nix +++ b/tests/tests.nix @@ -12,6 +12,7 @@ in testTest = handleTest ./testTest.nix; minecraftTest = handleTest ./minecraft.nix; jellyfinQbittorrentMonitorTest = handleTest ./jellyfin-qbittorrent-monitor.nix; + deployGuardTest = handleTest ./deploy-guard.nix; filePermsTest = handleTest ./file-perms.nix; # fail2ban tests