From 0901f5edf04e33c1ba1f5b0ef8f915cad24f2f70 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Wed, 22 Apr 2026 23:02:38 -0400 Subject: [PATCH] deploy: potentially fix self-deploy issue? --- AGENTS.md | 15 +++ flake.nix | 22 +++- hosts/muffin/default.nix | 8 ++ modules/server-deploy-finalize.nix | 187 +++++++++++++++++++++++++++ tests/deploy-finalize.nix | 196 +++++++++++++++++++++++++++++ tests/tests.nix | 1 + 6 files changed, 428 insertions(+), 1 deletion(-) create mode 100644 modules/server-deploy-finalize.nix create mode 100644 tests/deploy-finalize.nix diff --git a/AGENTS.md b/AGENTS.md index a4b4942..57ee10d 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -191,6 +191,21 @@ lib.mkIf config.services..enable { Existing registrations live in `services/jellyfin/jellyfin-deploy-guard.nix` (REST `/Sessions` via curl+jq) and `services/minecraft-deploy-guard.nix` (Server List Ping via `mcstatus`). Prefer soft-fail on unreachable — a service that's already down has no users to disrupt. +## Deploy finalize (muffin) + +`modules/server-deploy-finalize.nix` solves the self-deploy problem: the gitea-actions runner driving CI deploys lives on muffin itself, so a direct `switch-to-configuration switch` restarts the runner mid-activation, killing the SSH session, the CI job, and deploy-rs's magic-rollback handshake. The failure mode is visible as "deploy appears to fail even though the new config landed" (or worse, a rollback storm). + +The fix is a two-phase activation wired into `deploy.nodes.muffin.profiles.system.path` in `flake.nix`: + +1. `switch-to-configuration boot` — bootloader-only, no service restarts. The runner, SSH session, and magic-rollback survive. +2. `deploy-finalize` — schedules a detached `systemd-run --on-active=N` transient unit (default 60s). The unit is owned by pid1, so it survives the eventual runner restart. If `/run/booted-system/{kernel,initrd,kernel-modules}` differs from the new profile's, the unit runs `systemctl reboot`; otherwise it runs `switch-to-configuration switch`. + +That is, reboot is dynamically gated on kernel/initrd/kernel-modules change. The 60s delay is tuned so the CI job (or manual `./deploy.sh muffin`) has time to emit status/notification steps before the runner is recycled. + +Back-to-back deploys supersede each other: each invocation cancels any still-pending `deploy-finalize-*.timer` before scheduling its own. `deploy-finalize --dry-run` prints the decision without scheduling anything — useful when debugging. + +Prior art: the 3-path `{kernel,initrd,kernel-modules}` diff is lifted from nixpkgs's `system.autoUpgrade` module (the `allowReboot = true` branch) and was packaged the same way in [obsidiansystems/obelisk#957](https://github.com/obsidiansystems/obelisk/pull/957). nixpkgs#185030 tracks lifting it into `switch-to-configuration` proper but has been stale since 2025-07. The self-deploy `systemd-run` detachment is the proposed fix from [deploy-rs#153](https://github.com/serokell/deploy-rs/issues/153), also unmerged upstream. + ## Technical details - **Privilege escalation**: `doas` everywhere; `sudo` is disabled on every host. diff --git a/flake.nix b/flake.nix index 2fa6f42..ae61b47 100644 --- a/flake.nix +++ b/flake.nix @@ -415,7 +415,27 @@ # want to avoid when the deploy is supposed to be a no-op blocked by # the guard. Blocking before the deploy-rs invocation is the only # clean way to leave the running system untouched. - path = deploy-rs.lib.${system}.activate.nixos self.nixosConfigurations.muffin; + # + # Activation uses `switch-to-configuration boot` + a detached finalize + # (modules/server-deploy-finalize.nix) rather than the default + # `switch`. The gitea-actions runner driving CI deploys lives on + # muffin itself; a direct `switch` restarts gitea-runner-muffin mid- + # activation, killing the SSH session, the CI job, and deploy-rs's + # magic-rollback handshake. `boot` only touches the bootloader — no + # service restarts — and deploy-finalize schedules a pid1-owned + # transient unit that runs the real `switch` (or `systemctl reboot` + # when kernel/initrd/kernel-modules changed) ~60s later, surviving + # runner restart because it's decoupled from the SSH session. + path = + deploy-rs.lib.${system}.activate.custom self.nixosConfigurations.muffin.config.system.build.toplevel + '' + # matches activate.nixos's workaround for NixOS/nixpkgs#73404 + cd /tmp + + $PROFILE/bin/switch-to-configuration boot + + ${nixpkgs-stable.lib.getExe self.nixosConfigurations.muffin.config.system.build.deployFinalize} + ''; }; }; diff --git a/hosts/muffin/default.nix b/hosts/muffin/default.nix index 4c93093..a853561 100644 --- a/hosts/muffin/default.nix +++ b/hosts/muffin/default.nix @@ -26,6 +26,7 @@ ../../modules/ntfy-alerts.nix ../../modules/server-power.nix ../../modules/server-deploy-guard.nix + ../../modules/server-deploy-finalize.nix ../../services/postgresql.nix ../../services/jellyfin @@ -99,6 +100,13 @@ services.deployGuard.enable = true; + # Detached deploy finalize: see modules/server-deploy-finalize.nix. deploy-rs + # activates in `boot` mode and invokes deploy-finalize to schedule the real + # `switch` (or reboot, when kernel/initrd/kernel-modules changed) 60s later + # as a pid1-owned transient unit. Prevents the self-hosted gitea runner from + # being restarted mid-CI-deploy. + services.deployFinalize.enable = true; + # Disable serial getty on ttyS0 to prevent dmesg warnings systemd.services."serial-getty@ttyS0".enable = false; diff --git a/modules/server-deploy-finalize.nix b/modules/server-deploy-finalize.nix new file mode 100644 index 0000000..a95d18a --- /dev/null +++ b/modules/server-deploy-finalize.nix @@ -0,0 +1,187 @@ +# Deferred deploy finalize for deploy-rs-driven hosts. +# +# When deploy-rs activates via `switch-to-configuration switch` and the gitea- +# actions runner driving the deploy lives on the same host, the runner unit +# gets restarted mid-activation — its definition changes between builds. That +# restart kills the SSH session, the CI job, and deploy-rs's magic-rollback +# handshake, so CI reports failure even when the deploy itself completed. +# This is deploy-rs#153, open since 2022. +# +# This module breaks the dependency: activation does `switch-to-configuration +# boot` (bootloader only, no service restarts), then invokes deploy-finalize +# which schedules a detached systemd transient unit that fires `delay` seconds +# later with the real `switch` (or `systemctl reboot` when the kernel, initrd, +# or kernel-modules changed since boot). The transient unit is owned by pid1, +# so it survives the runner's eventual restart — by which time the CI job has +# finished reporting. +# +# Prior art (reboot-or-switch logic, not the self-deploy detachment): +# - nixpkgs `system.autoUpgrade` (allowReboot = true branch) is the canonical +# source of the 3-path {initrd,kernel,kernel-modules} comparison. +# - obsidiansystems/obelisk#957 merged the same snippet into `ob deploy` for +# push-based remote deploys — but doesn't need detachment since its deployer +# lives on a different machine from the target. +# - nixpkgs#185030 tracks lifting this into switch-to-configuration proper. +# Stale since 2025-07; until it lands, every downstream reimplements it. +# +# Bootstrap note: the activation snippet resolves deploy-finalize via +# lib.getExe (store path), not via `/run/current-system/sw/bin` — `boot` mode +# does not update `/run/current-system`, so the old binary would be resolved. +{ + config, + lib, + pkgs, + ... +}: +let + cfg = config.services.deployFinalize; + + finalize = pkgs.writeShellApplication { + name = "deploy-finalize"; + runtimeInputs = [ + pkgs.coreutils + pkgs.systemd + ]; + text = '' + delay=${toString cfg.delay} + profile=/nix/var/nix/profiles/system + dry_run=0 + + usage() { + cat <&2 + usage >&2 + exit 2 + ;; + esac + done + + # Comparing {kernel,initrd,kernel-modules} matches nixpkgs's canonical + # `system.autoUpgrade` allowReboot logic. -e (not -f) so a dangling + # symlink counts as missing: on a real NixOS profile all three exist, + # but defensive: if a profile has bad symlinks we refuse to schedule + # rather than scheduling against ghost paths. + booted_kernel="$(readlink -e /run/booted-system/kernel 2>/dev/null || true)" + booted_initrd="$(readlink -e /run/booted-system/initrd 2>/dev/null || true)" + booted_modules="$(readlink -e /run/booted-system/kernel-modules 2>/dev/null || true)" + new_kernel="$(readlink -e "$profile/kernel" 2>/dev/null || true)" + new_initrd="$(readlink -e "$profile/initrd" 2>/dev/null || true)" + new_modules="$(readlink -e "$profile/kernel-modules" 2>/dev/null || true)" + + if [[ -z "$new_kernel" || -z "$new_initrd" || -z "$new_modules" ]]; then + echo "deploy-finalize: refusing to schedule — $profile is missing kernel, initrd, or kernel-modules" >&2 + exit 1 + fi + + changed=() + if [[ -z "$booted_kernel" || -z "$booted_initrd" || -z "$booted_modules" ]]; then + # Unreachable on a booted NixOS, but fail closed on reboot. + changed+=("/run/booted-system incomplete") + fi + [[ "$booted_kernel" != "$new_kernel" ]] && changed+=("kernel") + [[ "$booted_initrd" != "$new_initrd" ]] && changed+=("initrd") + [[ "$booted_modules" != "$new_modules" ]] && changed+=("kernel-modules") + + reboot_needed=0 + reason="" + if [[ ''${#changed[@]} -gt 0 ]]; then + reboot_needed=1 + # Join with commas so the reason reads as e.g. `kernel,initrd changed`. + reason="$(IFS=, ; echo "''${changed[*]}") changed" + fi + + if [[ "$reboot_needed" == 1 ]]; then + action=reboot + cmd="systemctl reboot" + else + action=switch + reason="services only" + cmd="$profile/bin/switch-to-configuration switch" + fi + + # Nanosecond suffix so back-to-back deploys don't collide on unit names. + unit="deploy-finalize-$(date +%s%N)" + + printf 'deploy-finalize: booted_kernel=%s\n' "$booted_kernel" + printf 'deploy-finalize: new_kernel=%s\n' "$new_kernel" + printf 'deploy-finalize: booted_initrd=%s\n' "$booted_initrd" + printf 'deploy-finalize: new_initrd=%s\n' "$new_initrd" + printf 'deploy-finalize: booted_kernel-modules=%s\n' "$booted_modules" + printf 'deploy-finalize: new_kernel-modules=%s\n' "$new_modules" + printf 'deploy-finalize: action=%s reason=%s delay=%ss unit=%s\n' \ + "$action" "$reason" "$delay" "$unit" + + if [[ "$dry_run" == 1 ]]; then + printf 'deploy-finalize: dry-run — not scheduling\n' + printf 'deploy-finalize: would run: %s\n' "$cmd" + printf 'deploy-finalize: would schedule: systemd-run --collect --unit=%s --on-active=%s\n' \ + "$unit" "$delay" + exit 0 + fi + + # Cancel any still-pending finalize timers from an earlier deploy so this + # invocation is authoritative. Without this a stale timer could fire with + # the old profile's action (reboot/switch) against the new profile and + # briefly run new userspace under the old kernel. + systemctl stop 'deploy-finalize-*.timer' 2>/dev/null || true + + # --on-active arms a transient timer owned by pid1. systemd-run returns + # once the timer is armed; the SSH session that called us can exit and + # the gitea-runner can be restarted (by the switch the timer fires) + # without affecting whether the finalize runs. + systemd-run \ + --collect \ + --unit="$unit" \ + --description="Finalize NixOS deploy ($action after boot-mode activation)" \ + --on-active="$delay" \ + /bin/sh -c "$cmd" + ''; + }; +in +{ + options.services.deployFinalize = { + enable = lib.mkEnableOption "deferred deploy finalize (switch or reboot) after boot-mode activation"; + + delay = lib.mkOption { + type = lib.types.ints.positive; + default = 60; + description = '' + Seconds between the deploy-rs activation completing and the scheduled + finalize firing. Tuned so the CI job (or manual SSH session) has time + to complete status reporting before the runner is restarted by the + eventual switch-to-configuration. + ''; + }; + }; + + config = lib.mkIf cfg.enable { + environment.systemPackages = [ finalize ]; + + # Exposed for the deploy-rs activation snippet to reference by /nix/store + # path via lib.getExe — `boot` mode does not update /run/current-system, + # so reading through /run/current-system/sw/bin would resolve to the OLD + # binary on a new-feature rollout or immediately after a rollback. + system.build.deployFinalize = finalize; + }; +} diff --git a/tests/deploy-finalize.nix b/tests/deploy-finalize.nix new file mode 100644 index 0000000..3061dd4 --- /dev/null +++ b/tests/deploy-finalize.nix @@ -0,0 +1,196 @@ +# Test for modules/server-deploy-finalize.nix. +# +# Covers the decision and scheduling logic with fabricated profile directories, +# since spawning a second booted NixOS toplevel to diff kernels is too heavy for +# a runNixOSTest. We rely on the shellcheck pass baked into writeShellApplication +# to catch syntax regressions in the script itself. +{ + lib, + pkgs, + inputs, + ... +}: +pkgs.testers.runNixOSTest { + name = "deploy-finalize"; + + node.specialArgs = { + inherit inputs lib; + username = "testuser"; + }; + + nodes.machine = + { ... }: + { + imports = [ + ../modules/server-deploy-finalize.nix + ]; + + services.deployFinalize = { + enable = true; + # Shorter default in the test to make expected-substring assertions + # stable and reinforce that the option is wired through. + delay = 15; + }; + }; + + testScript = '' + start_all() + machine.wait_for_unit("multi-user.target") + + # Test fixtures: fabricated profile trees whose kernel/initrd/kernel-modules + # symlinks are under test control. `readlink -e` requires the targets to + # exist, so we point at real files in /tmp rather than non-existent paths. + machine.succeed( + "mkdir -p /tmp/profile-same /tmp/profile-changed-kernel " + "/tmp/profile-changed-initrd /tmp/profile-changed-modules " + "/tmp/profile-missing /tmp/fake-targets" + ) + machine.succeed( + "touch /tmp/fake-targets/alt-kernel /tmp/fake-targets/alt-initrd " + "/tmp/fake-targets/alt-modules" + ) + + booted_kernel = machine.succeed("readlink -e /run/booted-system/kernel").strip() + booted_initrd = machine.succeed("readlink -e /run/booted-system/initrd").strip() + booted_modules = machine.succeed("readlink -e /run/booted-system/kernel-modules").strip() + + def link_profile(path, kernel, initrd, modules): + machine.succeed(f"ln -sf {kernel} {path}/kernel") + machine.succeed(f"ln -sf {initrd} {path}/initrd") + machine.succeed(f"ln -sf {modules} {path}/kernel-modules") + + # profile-same: matches booted exactly → should choose `switch`. + link_profile("/tmp/profile-same", booted_kernel, booted_initrd, booted_modules) + machine.succeed("mkdir -p /tmp/profile-same/bin") + machine.succeed( + "ln -sf /run/current-system/bin/switch-to-configuration " + "/tmp/profile-same/bin/switch-to-configuration" + ) + + # profile-changed-kernel: kernel differs only → should choose `reboot`. + link_profile( + "/tmp/profile-changed-kernel", + "/tmp/fake-targets/alt-kernel", + booted_initrd, + booted_modules, + ) + + # profile-changed-initrd: initrd differs only → should choose `reboot`. + link_profile( + "/tmp/profile-changed-initrd", + booted_kernel, + "/tmp/fake-targets/alt-initrd", + booted_modules, + ) + + # profile-changed-modules: kernel-modules differs only → should choose `reboot`. + # Catches the obelisk PR / nixpkgs auto-upgrade case where modules rebuild + # against the same kernel but ABI-incompatible. + link_profile( + "/tmp/profile-changed-modules", + booted_kernel, + booted_initrd, + "/tmp/fake-targets/alt-modules", + ) + + # profile-missing: no kernel/initrd/kernel-modules → should fail closed. + + with subtest("dry-run against identical profile selects switch"): + rc, out = machine.execute( + "deploy-finalize --dry-run --profile /tmp/profile-same 2>&1" + ) + assert rc == 0, f"rc={rc}\n{out}" + assert "action=switch" in out, out + assert "services only" in out, out + assert "dry-run — not scheduling" in out, out + assert "would run: /tmp/profile-same/bin/switch-to-configuration switch" in out, out + assert "would schedule: systemd-run" in out, out + + with subtest("dry-run against changed-kernel profile selects reboot"): + rc, out = machine.execute( + "deploy-finalize --dry-run --profile /tmp/profile-changed-kernel 2>&1" + ) + assert rc == 0, f"rc={rc}\n{out}" + assert "action=reboot" in out, out + assert "reason=kernel changed" in out, out + assert "systemctl reboot" in out, out + + with subtest("dry-run against changed-initrd profile selects reboot"): + rc, out = machine.execute( + "deploy-finalize --dry-run --profile /tmp/profile-changed-initrd 2>&1" + ) + assert rc == 0, f"rc={rc}\n{out}" + assert "action=reboot" in out, out + assert "reason=initrd changed" in out, out + + with subtest("dry-run against changed-modules profile selects reboot"): + rc, out = machine.execute( + "deploy-finalize --dry-run --profile /tmp/profile-changed-modules 2>&1" + ) + assert rc == 0, f"rc={rc}\n{out}" + assert "action=reboot" in out, out + assert "reason=kernel-modules changed" in out, out + + with subtest("dry-run against empty profile fails closed with rc=1"): + rc, out = machine.execute( + "deploy-finalize --dry-run --profile /tmp/profile-missing 2>&1" + ) + assert rc == 1, f"rc={rc}\n{out}" + assert "missing kernel, initrd, or kernel-modules" in out, out + + with subtest("--delay override is reflected in output"): + rc, out = machine.execute( + "deploy-finalize --dry-run --delay 7 --profile /tmp/profile-same 2>&1" + ) + assert rc == 0, f"rc={rc}\n{out}" + assert "delay=7s" in out, out + + with subtest("configured default delay from module option is used"): + rc, out = machine.execute( + "deploy-finalize --dry-run --profile /tmp/profile-same 2>&1" + ) + assert rc == 0, f"rc={rc}\n{out}" + # module option delay=15 in nodes.machine above. + assert "delay=15s" in out, out + + with subtest("unknown option rejected with rc=2"): + rc, out = machine.execute("deploy-finalize --bogus 2>&1") + assert rc == 2, f"rc={rc}\n{out}" + assert "unknown option --bogus" in out, out + + with subtest("non-dry run arms a transient systemd timer"): + # Long delay so the timer doesn't fire during the test. We stop it + # explicitly afterwards. + rc, out = machine.execute( + "deploy-finalize --delay 3600 --profile /tmp/profile-same 2>&1" + ) + assert rc == 0, f"scheduling rc={rc}\n{out}" + # Confirm exactly one transient timer is active. + timers = machine.succeed( + "systemctl list-units --type=timer --no-legend 'deploy-finalize-*.timer' " + "--state=waiting | awk 'NF{print $1}'" + ).strip().splitlines() + assert len(timers) == 1, f"expected exactly one pending timer, got {timers}" + assert timers[0].startswith("deploy-finalize-"), timers + + with subtest("back-to-back scheduling cancels the previous timer"): + # The previous subtest left one timer armed. Schedule again; the old + # one should be stopped before the new unit name is created. + machine.succeed("sleep 1") # ensure a distinct unit-name timestamp + rc, out = machine.execute( + "deploy-finalize --delay 3600 --profile /tmp/profile-same 2>&1" + ) + assert rc == 0, f"second-schedule rc={rc}\n{out}" + timers = machine.succeed( + "systemctl list-units --type=timer --no-legend 'deploy-finalize-*.timer' " + "--state=waiting | awk 'NF{print $1}'" + ).strip().splitlines() + assert len(timers) == 1, f"expected only the new timer, got {timers}" + + # Clean up so the test's shutdown path is quiet. + machine.succeed( + "systemctl stop 'deploy-finalize-*.timer' 'deploy-finalize-*.service' " + "2>/dev/null || true" + ) + ''; +} diff --git a/tests/tests.nix b/tests/tests.nix index 3c2dc20..e2d9e40 100644 --- a/tests/tests.nix +++ b/tests/tests.nix @@ -13,6 +13,7 @@ in minecraftTest = handleTest ./minecraft.nix; jellyfinQbittorrentMonitorTest = handleTest ./jellyfin-qbittorrent-monitor.nix; deployGuardTest = handleTest ./deploy-guard.nix; + deployFinalizeTest = handleTest ./deploy-finalize.nix; filePermsTest = handleTest ./file-perms.nix; # fail2ban tests