181 lines
6.0 KiB
Nix
181 lines
6.0 KiB
Nix
# Extendable deploy guard. Aggregates per-service "is it safe to deploy right now?"
|
|
# checks registered under `services.deployGuard.checks.<name>` and exposes a single
|
|
# `deploy-guard-check` binary that deploy-rs (and the local deploy.sh preflight)
|
|
# invokes before switch-to-configuration.
|
|
#
|
|
# Extension contract (per-service): register a submodule entry whose `command`
|
|
# package installs a single executable (via meta.mainProgram) that exits 0 when
|
|
# the service is idle and non-zero when a live user would be disrupted by a
|
|
# deploy. Human-readable reasons go to stdout/stderr.
|
|
#
|
|
# Bypass: export DEPLOY_GUARD_BYPASS=1 or touch /run/deploy-guard-bypass before
|
|
# invoking the aggregator. The marker file is single-shot; the aggregator
|
|
# removes it after honoring it.
|
|
{
|
|
config,
|
|
lib,
|
|
pkgs,
|
|
...
|
|
}:
|
|
let
|
|
cfg = config.services.deployGuard;
|
|
|
|
# attrNames returns alphabetical order in Nix; rely on that for stable output.
|
|
checkNames = builtins.attrNames cfg.checks;
|
|
|
|
runCheckLines = lib.concatMapStringsSep "\n" (name: ''
|
|
run_check ${lib.escapeShellArg name} \
|
|
${lib.escapeShellArg cfg.checks.${name}.description} \
|
|
${lib.getExe cfg.checks.${name}.command}
|
|
'') checkNames;
|
|
|
|
aggregator = pkgs.writeShellApplication {
|
|
name = "deploy-guard-check";
|
|
runtimeInputs = [
|
|
pkgs.coreutils
|
|
pkgs.jq
|
|
];
|
|
text = ''
|
|
json_mode=0
|
|
if [[ "''${1:-}" == "--json" ]]; then
|
|
json_mode=1
|
|
fi
|
|
|
|
# Bypass precedence: env var first (useful in ad-hoc SSH one-liners),
|
|
# then file marker (written by `./deploy.sh muffin --force` over SSH).
|
|
bypass=0
|
|
bypass_reason=""
|
|
if [[ "''${DEPLOY_GUARD_BYPASS:-0}" == "1" ]]; then
|
|
bypass=1
|
|
bypass_reason="DEPLOY_GUARD_BYPASS=1"
|
|
elif [[ -e /run/deploy-guard-bypass ]]; then
|
|
bypass=1
|
|
bypass_reason="/run/deploy-guard-bypass"
|
|
rm -f /run/deploy-guard-bypass
|
|
fi
|
|
|
|
if [[ "$bypass" == "1" ]]; then
|
|
if [[ "$json_mode" == "1" ]]; then
|
|
jq -cn --arg reason "$bypass_reason" \
|
|
'{bypassed:true, reason:$reason, ok:true, checks:[]}'
|
|
else
|
|
printf 'deploy-guard: BYPASS via %s — no checks executed\n' "$bypass_reason" >&2
|
|
fi
|
|
exit 0
|
|
fi
|
|
|
|
declare -a results=()
|
|
overall=0
|
|
|
|
run_check() {
|
|
local name="$1" description="$2" exe="$3"
|
|
local status=0 output=""
|
|
# shellcheck disable=SC2034
|
|
output=$(timeout --signal=TERM ${toString cfg.timeout} "$exe" 2>&1) || status=$?
|
|
|
|
if [[ $status -eq 0 ]]; then
|
|
results+=("$(jq -cn \
|
|
--arg name "$name" \
|
|
--arg description "$description" \
|
|
--arg output "$output" \
|
|
'{name:$name, description:$description, ok:true, output:$output}')")
|
|
[[ "$json_mode" == "1" ]] || printf 'PASS: %s — %s\n' "$name" "$description"
|
|
else
|
|
overall=1
|
|
results+=("$(jq -cn \
|
|
--arg name "$name" \
|
|
--arg description "$description" \
|
|
--arg output "$output" \
|
|
--argjson exit "$status" \
|
|
'{name:$name, description:$description, ok:false, exit:$exit, output:$output}')")
|
|
if [[ "$json_mode" != "1" ]]; then
|
|
if [[ $status -eq 124 ]]; then
|
|
printf 'BLOCK: %s — %s — check timed out after ${toString cfg.timeout}s\n' \
|
|
"$name" "$description"
|
|
else
|
|
printf 'BLOCK: %s — %s\n' "$name" "$description"
|
|
if [[ -n "$output" ]]; then
|
|
printf '%s\n' "$output" | sed 's/^/ /'
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
|
|
${runCheckLines}
|
|
|
|
if [[ "$json_mode" == "1" ]]; then
|
|
ok=$([[ $overall -eq 0 ]] && echo true || echo false)
|
|
joined=""
|
|
for r in "''${results[@]:-}"; do
|
|
if [[ -z "$r" ]]; then continue; fi
|
|
if [[ -z "$joined" ]]; then joined="$r"; else joined="$joined,$r"; fi
|
|
done
|
|
printf '{"bypassed":false,"ok":%s,"checks":[%s]}\n' "$ok" "$joined"
|
|
fi
|
|
|
|
exit "$overall"
|
|
'';
|
|
};
|
|
in
|
|
{
|
|
options.services.deployGuard = {
|
|
enable = lib.mkEnableOption "deploy guard aggregator for blocking deploys on live use";
|
|
|
|
timeout = lib.mkOption {
|
|
type = lib.types.ints.positive;
|
|
default = 10;
|
|
description = "Per-check timeout in seconds.";
|
|
};
|
|
|
|
checks = lib.mkOption {
|
|
type = lib.types.attrsOf (
|
|
lib.types.submodule {
|
|
options = {
|
|
description = lib.mkOption {
|
|
type = lib.types.str;
|
|
description = "Short human description shown in pass/fail output.";
|
|
};
|
|
command = lib.mkOption {
|
|
type = lib.types.package;
|
|
description = ''
|
|
A derivation whose meta.mainProgram is the check executable.
|
|
Contract: exit 0 when deploys are safe, non-zero with a
|
|
human-readable reason on stdout/stderr when blocked.
|
|
'';
|
|
};
|
|
};
|
|
}
|
|
);
|
|
default = { };
|
|
description = ''
|
|
Per-service deploy guard checks. Merged from anywhere in the config.
|
|
Any module can register a check — see modules/server-deploy-guard.nix
|
|
for the contract.
|
|
'';
|
|
};
|
|
};
|
|
|
|
config = lib.mkIf cfg.enable {
|
|
environment.systemPackages = [ aggregator ];
|
|
|
|
# Expose the aggregator as a named system build so preflight drivers
|
|
# (deploy.sh, CI) can build just this derivation and invoke it by its
|
|
# /nix/store path — avoiding the bootstrap gap where
|
|
# /run/current-system/sw/bin/deploy-guard-check may not yet exist on the
|
|
# target (first deploy of the feature, post-rollback, etc).
|
|
system.build.deployGuardCheck = aggregator;
|
|
|
|
assertions = [
|
|
{
|
|
assertion = cfg.checks != { };
|
|
message = ''
|
|
services.deployGuard.enable = true but no checks are registered.
|
|
Either disable it or register at least one check via
|
|
services.deployGuard.checks.<name>.
|
|
'';
|
|
}
|
|
];
|
|
};
|
|
}
|