deploy-guard: block activation while users are online
- modules/server-deploy-guard.nix: extendable aggregator registered via
services.deployGuard.checks.<name>.{description,command}. Installs
deploy-guard-check with per-check timeout, pass/block reporting, JSON
output, DEPLOY_GUARD_BYPASS / /run/deploy-guard-bypass (single-shot).
- services/jellyfin/jellyfin-deploy-guard.nix: curl+jq on /Sessions,
blocks when any session carries NowPlayingItem; soft-fails when unreachable.
- services/minecraft-deploy-guard.nix: mcstatus SLP query on 25565, blocks
when players.online > 0; soft-fails when unreachable.
- flake.nix: wrap deploy.nodes.muffin activation with activate.custom so
deploy-guard-check runs before switch-to-configuration. Auto-rollback
catches the failure. dryActivate/boot branches preserved.
- deploy.sh: SSH preflight for ./deploy.sh muffin with --force /
DEPLOY_GUARD_FORCE=1 (touches remote bypass marker). Connectivity
failure is soft; activation still enforces.
- tests/deploy-guard.nix: aggregator contract, bypass mechanics, timeout,
JSON output.
This commit is contained in:
173
modules/server-deploy-guard.nix
Normal file
173
modules/server-deploy-guard.nix
Normal file
@@ -0,0 +1,173 @@
|
||||
# Extendable deploy guard. Aggregates per-service "is it safe to deploy right now?"
|
||||
# checks registered under `services.deployGuard.checks.<name>` and exposes a single
|
||||
# `deploy-guard-check` binary that deploy-rs (and the local deploy.sh preflight)
|
||||
# invokes before switch-to-configuration.
|
||||
#
|
||||
# Extension contract (per-service): register a submodule entry whose `command`
|
||||
# package installs a single executable (via meta.mainProgram) that exits 0 when
|
||||
# the service is idle and non-zero when a live user would be disrupted by a
|
||||
# deploy. Human-readable reasons go to stdout/stderr.
|
||||
#
|
||||
# Bypass: export DEPLOY_GUARD_BYPASS=1 or touch /run/deploy-guard-bypass before
|
||||
# invoking the aggregator. The marker file is single-shot; the aggregator
|
||||
# removes it after honoring it.
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
cfg = config.services.deployGuard;
|
||||
|
||||
# attrNames returns alphabetical order in Nix; rely on that for stable output.
|
||||
checkNames = builtins.attrNames cfg.checks;
|
||||
|
||||
runCheckLines = lib.concatMapStringsSep "\n" (name: ''
|
||||
run_check ${lib.escapeShellArg name} \
|
||||
${lib.escapeShellArg cfg.checks.${name}.description} \
|
||||
${lib.getExe cfg.checks.${name}.command}
|
||||
'') checkNames;
|
||||
|
||||
aggregator = pkgs.writeShellApplication {
|
||||
name = "deploy-guard-check";
|
||||
runtimeInputs = [
|
||||
pkgs.coreutils
|
||||
pkgs.jq
|
||||
];
|
||||
text = ''
|
||||
json_mode=0
|
||||
if [[ "''${1:-}" == "--json" ]]; then
|
||||
json_mode=1
|
||||
fi
|
||||
|
||||
# Bypass precedence: env var first (useful in ad-hoc SSH one-liners),
|
||||
# then file marker (written by `./deploy.sh muffin --force` over SSH).
|
||||
bypass=0
|
||||
bypass_reason=""
|
||||
if [[ "''${DEPLOY_GUARD_BYPASS:-0}" == "1" ]]; then
|
||||
bypass=1
|
||||
bypass_reason="DEPLOY_GUARD_BYPASS=1"
|
||||
elif [[ -e /run/deploy-guard-bypass ]]; then
|
||||
bypass=1
|
||||
bypass_reason="/run/deploy-guard-bypass"
|
||||
rm -f /run/deploy-guard-bypass
|
||||
fi
|
||||
|
||||
if [[ "$bypass" == "1" ]]; then
|
||||
if [[ "$json_mode" == "1" ]]; then
|
||||
jq -cn --arg reason "$bypass_reason" \
|
||||
'{bypassed:true, reason:$reason, ok:true, checks:[]}'
|
||||
else
|
||||
printf 'deploy-guard: BYPASS via %s — no checks executed\n' "$bypass_reason" >&2
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
declare -a results=()
|
||||
overall=0
|
||||
|
||||
run_check() {
|
||||
local name="$1" description="$2" exe="$3"
|
||||
local status=0 output=""
|
||||
# shellcheck disable=SC2034
|
||||
output=$(timeout --signal=TERM ${toString cfg.timeout} "$exe" 2>&1) || status=$?
|
||||
|
||||
if [[ $status -eq 0 ]]; then
|
||||
results+=("$(jq -cn \
|
||||
--arg name "$name" \
|
||||
--arg description "$description" \
|
||||
--arg output "$output" \
|
||||
'{name:$name, description:$description, ok:true, output:$output}')")
|
||||
[[ "$json_mode" == "1" ]] || printf 'PASS: %s — %s\n' "$name" "$description"
|
||||
else
|
||||
overall=1
|
||||
results+=("$(jq -cn \
|
||||
--arg name "$name" \
|
||||
--arg description "$description" \
|
||||
--arg output "$output" \
|
||||
--argjson exit "$status" \
|
||||
'{name:$name, description:$description, ok:false, exit:$exit, output:$output}')")
|
||||
if [[ "$json_mode" != "1" ]]; then
|
||||
if [[ $status -eq 124 ]]; then
|
||||
printf 'BLOCK: %s — %s — check timed out after ${toString cfg.timeout}s\n' \
|
||||
"$name" "$description"
|
||||
else
|
||||
printf 'BLOCK: %s — %s\n' "$name" "$description"
|
||||
if [[ -n "$output" ]]; then
|
||||
printf '%s\n' "$output" | sed 's/^/ /'
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
}
|
||||
|
||||
${runCheckLines}
|
||||
|
||||
if [[ "$json_mode" == "1" ]]; then
|
||||
ok=$([[ $overall -eq 0 ]] && echo true || echo false)
|
||||
joined=""
|
||||
for r in "''${results[@]:-}"; do
|
||||
if [[ -z "$r" ]]; then continue; fi
|
||||
if [[ -z "$joined" ]]; then joined="$r"; else joined="$joined,$r"; fi
|
||||
done
|
||||
printf '{"bypassed":false,"ok":%s,"checks":[%s]}\n' "$ok" "$joined"
|
||||
fi
|
||||
|
||||
exit "$overall"
|
||||
'';
|
||||
};
|
||||
in
|
||||
{
|
||||
options.services.deployGuard = {
|
||||
enable = lib.mkEnableOption "deploy guard aggregator for blocking deploys on live use";
|
||||
|
||||
timeout = lib.mkOption {
|
||||
type = lib.types.ints.positive;
|
||||
default = 10;
|
||||
description = "Per-check timeout in seconds.";
|
||||
};
|
||||
|
||||
checks = lib.mkOption {
|
||||
type = lib.types.attrsOf (
|
||||
lib.types.submodule {
|
||||
options = {
|
||||
description = lib.mkOption {
|
||||
type = lib.types.str;
|
||||
description = "Short human description shown in pass/fail output.";
|
||||
};
|
||||
command = lib.mkOption {
|
||||
type = lib.types.package;
|
||||
description = ''
|
||||
A derivation whose meta.mainProgram is the check executable.
|
||||
Contract: exit 0 when deploys are safe, non-zero with a
|
||||
human-readable reason on stdout/stderr when blocked.
|
||||
'';
|
||||
};
|
||||
};
|
||||
}
|
||||
);
|
||||
default = { };
|
||||
description = ''
|
||||
Per-service deploy guard checks. Merged from anywhere in the config.
|
||||
Any module can register a check — see modules/server-deploy-guard.nix
|
||||
for the contract.
|
||||
'';
|
||||
};
|
||||
};
|
||||
|
||||
config = lib.mkIf cfg.enable {
|
||||
environment.systemPackages = [ aggregator ];
|
||||
|
||||
assertions = [
|
||||
{
|
||||
assertion = cfg.checks != { };
|
||||
message = ''
|
||||
services.deployGuard.enable = true but no checks are registered.
|
||||
Either disable it or register at least one check via
|
||||
services.deployGuard.checks.<name>.
|
||||
'';
|
||||
}
|
||||
];
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user