{ pkgs, ... }: let script = ../services/monero/xmrig-auto-pause.py; python = pkgs.python3; in pkgs.testers.runNixOSTest { name = "xmrig-auto-pause"; nodes.machine = { pkgs, ... }: { environment.systemPackages = [ pkgs.python3 pkgs.procps ]; # Mock xmrig as a nice'd sleep process that can be stopped/started. systemd.services.xmrig = { description = "Mock xmrig miner"; serviceConfig = { ExecStart = "${pkgs.coreutils}/bin/sleep infinity"; Type = "simple"; Nice = 19; }; wantedBy = [ "multi-user.target" ]; }; }; testScript = '' import time PYTHON = "${python}/bin/python3" SCRIPT = "${script}" # Tuned for test VMs (1-2 cores). # POLL_INTERVAL=1 keeps detection latency low. # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short # enough that the full test completes in reasonable time. # CPU_STOP_THRESHOLD=20 catches a busy-loop on a 1-2 core VM (50-100%) # without triggering from normal VM noise. # CPU_RESUME_THRESHOLD=10 is the idle cutoff for a 1-2 core VM. POLL_INTERVAL = "1" GRACE_PERIOD = "5" CPU_STOP_THRESHOLD = "20" CPU_RESUME_THRESHOLD = "10" STARTUP_COOLDOWN = "4" STATE_DIR = "/tmp/xap-state" def start_cpu_load(name): """Start a non-nice CPU burn as a transient systemd unit.""" machine.succeed( f"systemd-run --unit={name} --property=Type=exec " f"bash -c 'while true; do :; done'" ) def stop_cpu_load(name): machine.succeed(f"systemctl stop {name}") def start_monitor(unit_name): """Start the auto-pause monitor as a transient unit.""" machine.succeed( f"systemd-run --unit={unit_name} " f"--setenv=POLL_INTERVAL={POLL_INTERVAL} " f"--setenv=GRACE_PERIOD={GRACE_PERIOD} " f"--setenv=CPU_STOP_THRESHOLD={CPU_STOP_THRESHOLD} " f"--setenv=CPU_RESUME_THRESHOLD={CPU_RESUME_THRESHOLD} " f"--setenv=STARTUP_COOLDOWN={STARTUP_COOLDOWN} " f"--setenv=STATE_DIR={STATE_DIR} " f"{PYTHON} {SCRIPT}" ) # Monitor needs two consecutive polls to compute a CPU delta. time.sleep(3) # Monitor needs two consecutive polls to compute a CPU delta. time.sleep(3) start_all() machine.wait_for_unit("multi-user.target") machine.wait_for_unit("xmrig.service") machine.succeed(f"mkdir -p {STATE_DIR}") with subtest("Start auto-pause monitor"): start_monitor("xmrig-auto-pause") with subtest("xmrig stays running while system is idle"): machine.succeed("systemctl is-active xmrig") with subtest("xmrig stopped when CPU load appears"): start_cpu_load("cpu-load") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) with subtest("xmrig remains stopped during grace period after load ends"): stop_cpu_load("cpu-load") # Load just stopped. Grace period is 5s. Check at 2s — well within. time.sleep(2) machine.fail("systemctl is-active xmrig") with subtest("xmrig resumes after grace period expires"): # Already idle since previous subtest. Grace period (5s) plus # detection delay (~2 polls) plus startup cooldown (4s) means # xmrig should restart within ~12s. machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) with subtest("Intermittent load does not cause flapping"): # First load — stop xmrig start_cpu_load("cpu-load-1") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) stop_cpu_load("cpu-load-1") # Brief idle gap — shorter than grace period time.sleep(2) # Second load arrives before grace period expires start_cpu_load("cpu-load-2") time.sleep(3) # xmrig must still be stopped machine.fail("systemctl is-active xmrig") stop_cpu_load("cpu-load-2") machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) with subtest("Sustained load keeps xmrig stopped"): start_cpu_load("cpu-load-3") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) # Stay busy longer than the grace period to prove continuous # activity keeps xmrig stopped indefinitely. time.sleep(8) machine.fail("systemctl is-active xmrig") stop_cpu_load("cpu-load-3") machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) with subtest("External restart detected and re-stopped under load"): # Put system under load so auto-pause stops xmrig. start_cpu_load("cpu-load-4") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) # Something external starts xmrig while load is active. # The script should detect this and re-stop it. machine.succeed("systemctl start xmrig") machine.succeed("systemctl is-active xmrig") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) stop_cpu_load("cpu-load-4") machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) # --- State persistence and crash recovery --- machine.succeed("systemctl stop xmrig-auto-pause") with subtest("xmrig recovers after crash during startup cooldown"): machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}") start_monitor("xmrig-auto-pause-crash") # Load -> xmrig stops start_cpu_load("cpu-crash") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) # End load -> xmrig restarts after grace period stop_cpu_load("cpu-crash") machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) # Kill xmrig immediately — simulates crash during startup cooldown. # The script should detect the failure when cooldown expires and # re-enter the retry cycle. machine.succeed("systemctl kill --signal=KILL xmrig") machine.wait_until_fails("systemctl is-active xmrig", timeout=5) # After cooldown + grace period + restart, xmrig should be back. machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) machine.succeed("systemctl stop xmrig-auto-pause-crash") machine.succeed("systemctl reset-failed xmrig.service || true") machine.succeed("systemctl start xmrig") machine.wait_for_unit("xmrig.service") with subtest("Script restart preserves pause state"): machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}") start_monitor("xmrig-auto-pause-persist") # Load -> xmrig stops start_cpu_load("cpu-persist") machine.wait_until_fails("systemctl is-active xmrig", timeout=20) # Kill the monitor while xmrig is paused (simulates script crash) machine.succeed("systemctl stop xmrig-auto-pause-persist") # State file must exist — the monitor persisted the pause flag machine.succeed(f"test -f {STATE_DIR}/paused") # Start a fresh monitor instance (reads state file on startup) start_monitor("xmrig-auto-pause-persist2") # End load — the new monitor should pick up the paused state # and restart xmrig after the grace period stop_cpu_load("cpu-persist") machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) # State file should be cleaned up after successful restart machine.fail(f"test -f {STATE_DIR}/paused") machine.succeed("systemctl stop xmrig-auto-pause-persist2") ''; }