diff --git a/services/monero/xmrig-auto-pause.nix b/services/monero/xmrig-auto-pause.nix index 80107a5..403d6ac 100644 --- a/services/monero/xmrig-auto-pause.nix +++ b/services/monero/xmrig-auto-pause.nix @@ -2,15 +2,33 @@ config, lib, pkgs, + service_configs, ... }: +let + cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service"; + cgroupFreeze = "${cgroupDir}/cgroup.freeze"; +in lib.mkIf config.services.xmrig.enable { systemd.services.xmrig-auto-pause = { - description = "Auto-pause xmrig when other services need CPU"; + description = "Auto-pause xmrig via cgroup freezer when other services need CPU"; after = [ "xmrig.service" ]; + # PartOf cascades stop/restart: when xmrig stops (deploy, apcupsd battery, + # manual), systemd stops auto-pause first and ExecStop thaws xmrig so + # xmrig's own stop does not hang on a frozen cgroup. + partOf = [ "xmrig.service" ]; wantedBy = [ "multi-user.target" ]; + serviceConfig = { ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}"; + # Safety net: any exit path (SIGTERM from PartOf cascade, systemctl stop, + # crash with Restart=) must leave xmrig thawed. The Python SIGTERM + # handler does the same thing; this covers SIGKILL / hard crash paths + # too. Idempotent. + ExecStop = pkgs.writeShellScript "xmrig-auto-pause-thaw" '' + f=${cgroupFreeze} + [ -w "$f" ] && echo 0 > "$f" || true + ''; Restart = "always"; RestartSec = "10s"; NoNewPrivileges = true; @@ -22,6 +40,9 @@ lib.mkIf config.services.xmrig.enable { ]; MemoryDenyWriteExecute = true; StateDirectory = "xmrig-auto-pause"; + # Required so the script can write to cgroup.freeze under + # ProtectSystem=strict (which makes /sys read-only by default). + ReadWritePaths = [ cgroupDir ]; }; environment = { POLL_INTERVAL = "3"; @@ -32,8 +53,19 @@ lib.mkIf config.services.xmrig.enable { # steady-state floor to avoid restarting xmrig while services are active. CPU_STOP_THRESHOLD = "40"; CPU_RESUME_THRESHOLD = "10"; - STARTUP_COOLDOWN = "10"; STATE_DIR = "/var/lib/xmrig-auto-pause"; + XMRIG_CGROUP_FREEZE = cgroupFreeze; + # Per-service CPU thresholds. Catches sub-threshold activity that never + # trips the system-wide gauge — a single Minecraft player uses 3-15% of + # one core (0.3-1.3% of a 12-thread host) which is pure noise in + # /proc/stat but dominant in the minecraft cgroup. + WATCHED_SERVICES = lib.concatStringsSep "," ( + lib.optional config.services.minecraft-servers.enable "minecraft-server-${service_configs.minecraft.server_name}:2" + ); }; }; + + # Pull auto-pause along whenever xmrig starts. After= on auto-pause ensures + # correct order; Wants= here ensures it actually starts. + systemd.services.xmrig.wants = [ "xmrig-auto-pause.service" ]; } diff --git a/services/monero/xmrig-auto-pause.py b/services/monero/xmrig-auto-pause.py index 4e11f84..636ada8 100644 --- a/services/monero/xmrig-auto-pause.py +++ b/services/monero/xmrig-auto-pause.py @@ -2,33 +2,54 @@ """ Auto-pause xmrig when other services need CPU. -Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19, -its CPU time lands in the 'nice' column and is excluded from the metric. -When real workload (user + system + irq + softirq) exceeds the stop -threshold, stops xmrig. When it drops below the resume threshold for -GRACE_PERIOD seconds, restarts xmrig. +Two independent signals drive the decision; either one can trigger a pause: -This replaces per-service pause scripts with a single general-purpose -monitor that handles any CPU-intensive workload (gitea workers, llama-cpp -inference, etc.) without needing to know about specific processes. +1. System-wide non-nice CPU from /proc/stat. Catches any CPU-heavy workload + including non-systemd user work (interactive sessions, ad-hoc jobs). + Since xmrig runs at Nice=19, its CPU time lands in the 'nice' column and + is excluded from the metric. + +2. Per-service CPU from cgroup cpu.stat usage_usec. Catches sub-threshold + service activity — a single Minecraft player drives the server JVM to + 3-15% of one core, which is noise system-wide (0.3-1.3% of total on a + 12-thread host) but dominant for the minecraft cgroup. + +When either signal crosses its stop threshold, writes 1 to +/sys/fs/cgroup/system.slice/xmrig.service/cgroup.freeze. When both are quiet +for GRACE_PERIOD seconds, writes 0 to resume. + +Why direct cgroup.freeze instead of systemctl freeze: + systemd 256+ has a bug class where `systemctl freeze` followed by any + process death (SIGKILL, watchdog, OOM, segfault, shutdown) strands the + unit in FreezerState=frozen ActiveState=failed with no recovery short of + a reboot. See https://github.com/systemd/systemd/issues/38517. Writing + directly to cgroup.freeze keeps systemd's FreezerState at "running" the + whole time, so there is no state machine to get stuck: if xmrig dies + while frozen, systemd transitions it to inactive normally. Why scheduler priority alone isn't enough: Nice=19 / SCHED_IDLE only affects which thread gets the next time slice. - RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes - the shared 32MB L3 cache, and its memory access pattern saturates DRAM - bandwidth. Other services run slower even though they aren't denied CPU - time. The only fix is to stop xmrig entirely when real work is happening. + RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) holds about + 68% of the shared 32MB L3 cache on Zen 3, evicting hot lines from + interactive services. Measured on muffin: pointer-chase latency is 112ns + with xmrig running and 19ns with xmrig frozen — a 6x difference that + scheduler priority cannot address. Hysteresis: - The stop threshold is set higher than the resume threshold to prevent - oscillation. When xmrig runs, its L3 cache pressure makes other processes - appear ~3-8% busier. A single threshold trips on this indirect effect, - causing stop/start thrashing. Separate thresholds break the cycle: the - resume threshold confirms the system is truly idle, while the stop - threshold requires genuine workload above xmrig's indirect pressure. + The system-wide stop threshold sits higher than the resume threshold + because background services (qbittorrent, bitmagnet, postgres) produce + 15-25% non-nice CPU during normal operation, and xmrig's indirect cache + pressure inflates that by another few percent. A single threshold + thrashes on the floor; two thresholds break the cycle. + + Per-service thresholds are single-valued. Per-service CPU is a clean + signal without background noise to calibrate against, so idle_since is + reset whenever any watched service is at-or-above its threshold and the + grace period only advances when every watched service is below. """ import os +import signal import subprocess import sys import time @@ -37,19 +58,23 @@ POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3")) GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15")) # Percentage of total CPU ticks that non-nice processes must use to trigger # a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total. -# Default 15% requires roughly two busy cores, which avoids false positives -# from xmrig's L3 cache pressure inflating other processes' apparent CPU. CPU_STOP_THRESHOLD = float(os.environ.get("CPU_STOP_THRESHOLD", "15")) # Percentage below which the system is considered idle enough to resume # mining. Lower than the stop threshold to provide hysteresis. CPU_RESUME_THRESHOLD = float(os.environ.get("CPU_RESUME_THRESHOLD", "5")) -# After starting xmrig, ignore CPU spikes for this many seconds to let -# RandomX dataset initialization complete (~4s on the target hardware) -# without retriggering a stop. -STARTUP_COOLDOWN = float(os.environ.get("STARTUP_COOLDOWN", "10")) +# Per-service CPU thresholds parsed from "unit1:threshold1,unit2:threshold2". +# Thresholds are percentage of TOTAL CPU capacity (same frame as +# CPU_STOP_THRESHOLD). Empty / unset disables the per-service path. +WATCHED_SERVICES_RAW = os.environ.get("WATCHED_SERVICES", "") +# Path to xmrig's cgroup.freeze file. Direct write bypasses systemd's +# freezer state machine; see module docstring. +XMRIG_CGROUP_FREEZE = os.environ.get( + "XMRIG_CGROUP_FREEZE", + "/sys/fs/cgroup/system.slice/xmrig.service/cgroup.freeze", +) # Directory for persisting pause state across script restarts. Without # this, a restart while xmrig is paused loses the paused_by_us flag and -# xmrig stays stopped permanently. +# xmrig stays frozen until something else thaws it. STATE_DIR = os.environ.get("STATE_DIR", "") _PAUSE_FILE = os.path.join(STATE_DIR, "paused") if STATE_DIR else "" @@ -58,6 +83,51 @@ def log(msg): print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True) +def _parse_watched(spec): + out = {} + for entry in filter(None, (s.strip() for s in spec.split(","))): + name, _, pct = entry.partition(":") + name = name.strip() + pct = pct.strip() + if not name or not pct: + log(f"WATCHED_SERVICES: ignoring malformed entry '{entry}'") + continue + try: + out[name] = float(pct) + except ValueError: + log(f"WATCHED_SERVICES: ignoring non-numeric threshold in '{entry}'") + return out + + +def _resolve_cgroup_cpustat(unit): + """Look up the unit's cgroup path via systemd. Returns cpu.stat path or + None if the unit has no cgroup (service not running, unknown unit).""" + result = subprocess.run( + ["systemctl", "show", "--value", "--property=ControlGroup", unit], + capture_output=True, + text=True, + ) + cg = result.stdout.strip() + if not cg: + return None + path = f"/sys/fs/cgroup{cg}/cpu.stat" + if not os.path.isfile(path): + return None + return path + + +def _read_service_usec(path): + """Cumulative cpu.stat usage_usec, or None if the cgroup has vanished.""" + try: + with open(path) as f: + for line in f: + if line.startswith("usage_usec "): + return int(line.split()[1]) + except FileNotFoundError: + return None + return None + + def read_cpu_ticks(): """Read CPU tick counters from /proc/stat. @@ -84,123 +154,241 @@ def is_active(unit): return result.returncode == 0 -def systemctl(action, unit): +def main_pid(unit): + """Return the unit's MainPID, or 0 if unit is not running.""" result = subprocess.run( - ["systemctl", action, unit], + ["systemctl", "show", "--value", "--property=MainPID", unit], capture_output=True, text=True, ) - if result.returncode != 0: - log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}") - return result.returncode == 0 + try: + return int(result.stdout.strip() or "0") + except ValueError: + return 0 -def _save_paused(paused): - """Persist pause flag so a script restart can resume where we left off.""" +def _freeze(frozen): + """Write 1 or 0 to xmrig's cgroup.freeze. Returns True on success. + + Direct kernel interface — bypasses systemd's freezer state tracking.""" + try: + with open(XMRIG_CGROUP_FREEZE, "w") as f: + f.write("1" if frozen else "0") + return True + except OSError as e: + action = "freeze" if frozen else "thaw" + log(f"cgroup.freeze {action} write failed: {e}") + return False + + +def _is_frozen(): + """Read the actual frozen state from cgroup.events. False if cgroup absent.""" + events_path = os.path.join(os.path.dirname(XMRIG_CGROUP_FREEZE), "cgroup.events") + try: + with open(events_path) as f: + for line in f: + if line.startswith("frozen "): + return line.split()[1] == "1" + except FileNotFoundError: + return False + return False + + +def _save_paused(pid): + """Persist the xmrig MainPID at the time of freeze. pid=0 clears claim.""" if not _PAUSE_FILE: return try: - if paused: - open(_PAUSE_FILE, "w").close() + if pid: + with open(_PAUSE_FILE, "w") as f: + f.write(str(pid)) else: - os.remove(_PAUSE_FILE) - except OSError: - pass + try: + os.remove(_PAUSE_FILE) + except FileNotFoundError: + pass + except OSError as e: + log(f"state file write failed: {e}") def _load_paused(): - """Check if a previous instance left xmrig paused.""" + """Return True iff our claim is still valid: same PID and still frozen. + + Restart of the xmrig unit gives it a new PID, which invalidates any + prior claim — we can't "own" a freeze we didn't perform on this + instance. Also confirms the cgroup is actually frozen so an external + thaw drops the claim. + """ if not _PAUSE_FILE: return False - return os.path.isfile(_PAUSE_FILE) + try: + with open(_PAUSE_FILE) as f: + saved = int(f.read().strip() or "0") + except (FileNotFoundError, ValueError): + return False + if not saved: + return False + if saved != main_pid("xmrig.service"): + return False + return _is_frozen() + + +def _cleanup(signum=None, frame=None): + """On SIGTERM/SIGINT: thaw xmrig and clear claim. Operators must never see + a frozen unit we owned after auto-pause exits.""" + if _is_frozen(): + _freeze(False) + _save_paused(0) + sys.exit(0) def main(): - paused_by_us = _load_paused() - idle_since = None - started_at = None # monotonic time when we last started xmrig - prev_total = None - prev_work = None + watched_services = _parse_watched(WATCHED_SERVICES_RAW) + watched_paths = {} + for name in watched_services: + path = _resolve_cgroup_cpustat(name) + if path is None: + log(f"WATCHED_SERVICES: {name} has no cgroup — ignoring until it starts") + watched_paths[name] = path + nproc = os.cpu_count() or 1 + + signal.signal(signal.SIGTERM, _cleanup) + signal.signal(signal.SIGINT, _cleanup) + + paused_by_us = _load_paused() if paused_by_us: log("Recovered pause state from previous instance") log( f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s " - f"stop={CPU_STOP_THRESHOLD}% resume={CPU_RESUME_THRESHOLD}% " - f"cooldown={STARTUP_COOLDOWN}s" + f"sys_stop={CPU_STOP_THRESHOLD}% sys_resume={CPU_RESUME_THRESHOLD}% " + f"watched={watched_services or '(none)'}" ) + idle_since = None + prev_total = None + prev_work = None + prev_monotonic = None + prev_service_usec = {} + while True: total, work = read_cpu_ticks() + now = time.monotonic() if prev_total is None: prev_total = total prev_work = work + prev_monotonic = now + # seed per-service baselines too + for name, path in watched_paths.items(): + if path is None: + # Re-resolve in case the service has started since startup + path = _resolve_cgroup_cpustat(name) + watched_paths[name] = path + if path is not None: + usec = _read_service_usec(path) + if usec is not None: + prev_service_usec[name] = usec time.sleep(POLL_INTERVAL) continue dt = total - prev_total - if dt <= 0: + dt_s = now - prev_monotonic + if dt <= 0 or dt_s <= 0: prev_total = total prev_work = work + prev_monotonic = now time.sleep(POLL_INTERVAL) continue real_work_pct = ((work - prev_work) / dt) * 100 + + # Per-service CPU percentages this window. Fraction of total CPU + # capacity used by this specific service, same frame as real_work_pct. + svc_pct = {} + for name in watched_services: + path = watched_paths.get(name) + if path is None: + # Unit wasn't running at startup; try resolving again in case + # it has started since. + path = _resolve_cgroup_cpustat(name) + watched_paths[name] = path + if path is None: + prev_service_usec.pop(name, None) + continue + cur = _read_service_usec(path) + if cur is None: + # Service stopped; drop prev so it doesn't compute a huge delta + # on next start. + prev_service_usec.pop(name, None) + watched_paths[name] = None # force re-resolution next poll + continue + if name in prev_service_usec: + delta_us = cur - prev_service_usec[name] + if delta_us >= 0: + svc_pct[name] = (delta_us / 1_000_000) / (dt_s * nproc) * 100 + prev_service_usec[name] = cur + prev_total = total prev_work = work + prev_monotonic = now - # Don't act during startup cooldown — RandomX dataset init causes - # a transient CPU spike that would immediately retrigger a stop. - if started_at is not None: - if time.monotonic() - started_at < STARTUP_COOLDOWN: - time.sleep(POLL_INTERVAL) - continue - # Cooldown expired — verify xmrig survived startup. If it - # crashed during init (hugepage failure, pool unreachable, etc.), - # re-enter the pause/retry cycle rather than silently leaving - # xmrig dead. - if not is_active("xmrig.service"): - log("xmrig died during startup cooldown — will retry") - paused_by_us = True - _save_paused(True) - started_at = None + above_stop_sys = real_work_pct > CPU_STOP_THRESHOLD + below_resume_sys = real_work_pct <= CPU_RESUME_THRESHOLD - above_stop = real_work_pct > CPU_STOP_THRESHOLD - below_resume = real_work_pct <= CPU_RESUME_THRESHOLD + busy_services = [ + n for n in watched_services if svc_pct.get(n, 0) > watched_services[n] + ] + any_svc_at_or_above = any( + svc_pct.get(n, 0) >= watched_services[n] for n in watched_services + ) - if above_stop: + stop_pressure = above_stop_sys or bool(busy_services) + fully_idle = below_resume_sys and not any_svc_at_or_above + + if stop_pressure: idle_since = None - if paused_by_us and is_active("xmrig.service"): - # Something else restarted xmrig (deploy, manual start, etc.) - # while we thought it was stopped. Reset ownership so we can - # manage it again. - log("xmrig was restarted externally while paused — reclaiming") + if paused_by_us and not _is_frozen(): + # Someone thawed xmrig while we believed it paused. Reclaim + # ownership so we can re-freeze. + log("xmrig was thawed externally while paused — reclaiming") paused_by_us = False - _save_paused(False) - if not paused_by_us: - # Only claim ownership if xmrig is actually running. - # If something else stopped it (e.g. UPS battery hook), - # don't interfere — we'd wrongly restart it later. - if is_active("xmrig.service"): - log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig") - if systemctl("stop", "xmrig.service"): - paused_by_us = True - _save_paused(True) + _save_paused(0) + if not paused_by_us and is_active("xmrig.service"): + # Only claim ownership if xmrig is actually running. If + # something else stopped it (e.g. UPS battery hook), don't + # interfere. + if busy_services: + reasons = ", ".join( + f"{n}={svc_pct[n]:.1f}%>{watched_services[n]:.1f}%" + for n in busy_services + ) + log(f"Stop: watched service(s) busy [{reasons}] — freezing xmrig") + else: + log( + f"Stop: system CPU {real_work_pct:.1f}% > " + f"{CPU_STOP_THRESHOLD:.1f}% — freezing xmrig" + ) + if _freeze(True): + paused_by_us = True + _save_paused(main_pid("xmrig.service")) elif paused_by_us: - if below_resume: + if fully_idle: if idle_since is None: idle_since = time.monotonic() elif time.monotonic() - idle_since >= GRACE_PERIOD: - log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig") - if systemctl("start", "xmrig.service"): + log( + f"Idle past grace period (system {real_work_pct:.1f}%) " + "— thawing xmrig" + ) + if _freeze(False): paused_by_us = False - _save_paused(False) - started_at = time.monotonic() + _save_paused(0) idle_since = None else: - # Between thresholds — not idle enough to resume. + # Between thresholds or a watched service is borderline — not + # idle enough to resume. idle_since = None time.sleep(POLL_INTERVAL) diff --git a/tests/xmrig-auto-pause.nix b/tests/xmrig-auto-pause.nix index ca52d77..cf0a32e 100644 --- a/tests/xmrig-auto-pause.nix +++ b/tests/xmrig-auto-pause.nix @@ -5,6 +5,15 @@ let script = ../services/monero/xmrig-auto-pause.py; python = pkgs.python3; + cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service"; + cgroupFreeze = "${cgroupDir}/cgroup.freeze"; + cgroupEvents = "${cgroupDir}/cgroup.events"; + # Inline ExecStop for the transient monitor: mirrors the production .nix + # ExecStop so the PartOf cascade test exercises the same code path. + thawScript = pkgs.writeShellScript "test-thaw-xmrig" '' + f=${cgroupFreeze} + [ -w "$f" ] && echo 0 > "$f" || true + ''; in pkgs.testers.runNixOSTest { name = "xmrig-auto-pause"; @@ -17,13 +26,18 @@ pkgs.testers.runNixOSTest { pkgs.procps ]; - # Mock xmrig as a nice'd sleep process that can be stopped/started. + # Mock xmrig as a nice'd sleep process. Runs in the real + # /sys/fs/cgroup/system.slice/xmrig.service cgroup, which is what the + # auto-pause script writes cgroup.freeze into. systemd.services.xmrig = { description = "Mock xmrig miner"; serviceConfig = { ExecStart = "${pkgs.coreutils}/bin/sleep infinity"; Type = "simple"; Nice = 19; + # Short timeout so the PartOf cascade test completes fast if the + # cascade is broken (would otherwise hit systemd's 90s default). + TimeoutStopSec = "10s"; }; wantedBy = [ "multi-user.target" ]; }; @@ -34,20 +48,39 @@ pkgs.testers.runNixOSTest { PYTHON = "${python}/bin/python3" SCRIPT = "${script}" + CGROUP_FREEZE = "${cgroupFreeze}" + CGROUP_EVENTS = "${cgroupEvents}" + THAW_SCRIPT = "${thawScript}" # Tuned for test VMs (1-2 cores). # POLL_INTERVAL=1 keeps detection latency low. - # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short - # enough that the full test completes in reasonable time. - # CPU_STOP_THRESHOLD=20 catches a busy-loop on a 1-2 core VM (50-100%) - # without triggering from normal VM noise. - # CPU_RESUME_THRESHOLD=10 is the idle cutoff for a 1-2 core VM. + # GRACE_PERIOD=5 is long enough to verify hysteresis, short enough for + # reasonable total test time. + # CPU_STOP_HIGH=999 effectively disables the system-wide path (a 1-core + # VM can never exceed 100% of total CPU) so per-service subtests exercise + # that path in isolation. CPU_STOP_LOW=20 catches a bash busy-loop on a + # 1-2 core VM without tripping on normal VM noise. POLL_INTERVAL = "1" - GRACE_PERIOD = "5" - CPU_STOP_THRESHOLD = "20" - CPU_RESUME_THRESHOLD = "10" - STARTUP_COOLDOWN = "4" + GRACE_PERIOD = "5" + CPU_STOP_HIGH = "999" + CPU_STOP_LOW = "20" + CPU_RESUME_HIGH = "950" + CPU_RESUME_LOW = "10" STATE_DIR = "/tmp/xap-state" + WATCHED_UNIT = "watched-burn" + WATCHED_THR = "5" + + def frozen(): + out = machine.succeed(f"cat {CGROUP_EVENTS}") + return "frozen 1" in out + + def thawed(): + out = machine.succeed(f"cat {CGROUP_EVENTS}") + return "frozen 0" in out + + def xmrig_pid(): + return machine.succeed("systemctl show xmrig -p MainPID --value").strip() + def start_cpu_load(name): """Start a non-nice CPU burn as a transient systemd unit.""" machine.succeed( @@ -58,20 +91,29 @@ pkgs.testers.runNixOSTest { def stop_cpu_load(name): machine.succeed(f"systemctl stop {name}") - def start_monitor(unit_name): - """Start the auto-pause monitor as a transient unit.""" - machine.succeed( - f"systemd-run --unit={unit_name} " - f"--setenv=POLL_INTERVAL={POLL_INTERVAL} " - f"--setenv=GRACE_PERIOD={GRACE_PERIOD} " - f"--setenv=CPU_STOP_THRESHOLD={CPU_STOP_THRESHOLD} " - f"--setenv=CPU_RESUME_THRESHOLD={CPU_RESUME_THRESHOLD} " - f"--setenv=STARTUP_COOLDOWN={STARTUP_COOLDOWN} " - f"--setenv=STATE_DIR={STATE_DIR} " - f"{PYTHON} {SCRIPT}" - ) - # Monitor needs two consecutive polls to compute a CPU delta. - time.sleep(3) + def start_monitor(unit_name, *, watched="", cpu_stop=CPU_STOP_HIGH, cpu_resume=CPU_RESUME_HIGH): + """Start the auto-pause monitor as a transient unit. + + watched="foo:5,bar:10" enables the per-service path. + cpu_stop/cpu_resume default to values that disable the system-wide + path (95/90) so per-service behaviour is tested in isolation. + """ + parts = [ + f"systemd-run --unit={unit_name}", + "--property=After=xmrig.service", + "--property=PartOf=xmrig.service", + f"--property=ExecStop={THAW_SCRIPT}", + f"--setenv=POLL_INTERVAL={POLL_INTERVAL}", + f"--setenv=GRACE_PERIOD={GRACE_PERIOD}", + f"--setenv=CPU_STOP_THRESHOLD={cpu_stop}", + f"--setenv=CPU_RESUME_THRESHOLD={cpu_resume}", + f"--setenv=STATE_DIR={STATE_DIR}", + f"--setenv=XMRIG_CGROUP_FREEZE={CGROUP_FREEZE}", + ] + if watched: + parts.append(f"--setenv=WATCHED_SERVICES={watched}") + parts.append(f"{PYTHON} {SCRIPT}") + machine.succeed(" ".join(parts)) # Monitor needs two consecutive polls to compute a CPU delta. time.sleep(3) @@ -80,127 +122,139 @@ pkgs.testers.runNixOSTest { machine.wait_for_unit("xmrig.service") machine.succeed(f"mkdir -p {STATE_DIR}") - with subtest("Start auto-pause monitor"): - start_monitor("xmrig-auto-pause") + # ------------------------------------------------------------------ + # Per-service path (primary signal) + # ------------------------------------------------------------------ - with subtest("xmrig stays running while system is idle"): - machine.succeed("systemctl is-active xmrig") + with subtest("Idle → xmrig stays thawed"): + start_monitor("ap-watched", watched=f"{WATCHED_UNIT}:{WATCHED_THR}") + assert thawed(), f"expected thawed, got: {machine.succeed(f'cat {CGROUP_EVENTS}')}" + pid0 = xmrig_pid() + assert pid0 and pid0 != "0", f"expected a real xmrig PID, got {pid0!r}" - with subtest("xmrig stopped when CPU load appears"): - start_cpu_load("cpu-load") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + with subtest("Watched service CPU load → xmrig frozen, PID preserved"): + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + assert xmrig_pid() == pid0, "PID must be preserved across freeze" - with subtest("xmrig remains stopped during grace period after load ends"): - stop_cpu_load("cpu-load") - # Load just stopped. Grace period is 5s. Check at 2s — well within. - time.sleep(2) - machine.fail("systemctl is-active xmrig") + with subtest("Load ends → xmrig thawed after grace period, same PID"): + stop_cpu_load(WATCHED_UNIT) + # Grace period is 5s; watched service drops to 0 immediately, so the + # idle timer starts right away. Expect thaw within GRACE + 2*POLL. + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30) + assert xmrig_pid() == pid0, "PID must survive the whole cycle" - with subtest("xmrig resumes after grace period expires"): - # Already idle since previous subtest. Grace period (5s) plus - # detection delay (~2 polls) plus startup cooldown (4s) means - # xmrig should restart within ~12s. - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) - - with subtest("Intermittent load does not cause flapping"): - # First load — stop xmrig - start_cpu_load("cpu-load-1") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) - stop_cpu_load("cpu-load-1") - - # Brief idle gap — shorter than grace period - time.sleep(2) - - # Second load arrives before grace period expires - start_cpu_load("cpu-load-2") + with subtest("Intermittent watched load does not cause flapping"): + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + stop_cpu_load(WATCHED_UNIT) + time.sleep(2) # shorter than grace period + start_cpu_load(WATCHED_UNIT) time.sleep(3) + assert frozen(), "xmrig must still be frozen during intermittent load" + stop_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30) - # xmrig must still be stopped - machine.fail("systemctl is-active xmrig") + with subtest("Sustained watched load keeps xmrig frozen"): + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + time.sleep(int(GRACE_PERIOD) + 3) # past grace period + assert frozen(), "sustained load must keep xmrig frozen" + stop_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30) - stop_cpu_load("cpu-load-2") - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) + with subtest("External thaw reclaimed while load present"): + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + # Someone manually thaws xmrig. Auto-pause must detect and re-freeze. + machine.succeed(f"echo 0 > {CGROUP_FREEZE}") + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + stop_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30) - with subtest("Sustained load keeps xmrig stopped"): - start_cpu_load("cpu-load-3") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + with subtest("Monitor SIGTERM thaws xmrig"): + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + machine.succeed("systemctl stop ap-watched") + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=10) + stop_cpu_load(WATCHED_UNIT) + machine.succeed("systemctl reset-failed ap-watched 2>/dev/null || true") - # Stay busy longer than the grace period to prove continuous - # activity keeps xmrig stopped indefinitely. - time.sleep(8) - machine.fail("systemctl is-active xmrig") + # ------------------------------------------------------------------ + # Negative control + system-wide path + # ------------------------------------------------------------------ - stop_cpu_load("cpu-load-3") - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) + with subtest("Unwatched CPU burn does not trip per-service path"): + # High CPU_STOP_THRESHOLD + no watched service → no reason to freeze. + machine.succeed(f"rm -f {STATE_DIR}/paused") + start_monitor("ap-neg") + start_cpu_load("unwatched-neg") + time.sleep(int(GRACE_PERIOD) + 3) + assert thawed(), "unwatched load must not trip when system threshold is high and nothing is watched" + stop_cpu_load("unwatched-neg") + machine.succeed("systemctl stop ap-neg") + machine.succeed("systemctl reset-failed ap-neg 2>/dev/null || true") - with subtest("External restart detected and re-stopped under load"): - # Put system under load so auto-pause stops xmrig. - start_cpu_load("cpu-load-4") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + with subtest("System-wide CPU path freezes xmrig when threshold is low"): + machine.succeed(f"rm -f {STATE_DIR}/paused") + start_monitor("ap-sys", cpu_stop=CPU_STOP_LOW, cpu_resume=CPU_RESUME_LOW) + start_cpu_load("sys-burn") + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=20) + stop_cpu_load("sys-burn") + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30) + machine.succeed("systemctl stop ap-sys") + machine.succeed("systemctl reset-failed ap-sys 2>/dev/null || true") - # Something external starts xmrig while load is active. - # The script should detect this and re-stop it. - machine.succeed("systemctl start xmrig") - machine.succeed("systemctl is-active xmrig") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + # ------------------------------------------------------------------ + # State persistence and operational edge cases + # ------------------------------------------------------------------ - stop_cpu_load("cpu-load-4") - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) + with subtest("Monitor crash preserves pause claim; next instance resumes"): + machine.succeed(f"rm -f {STATE_DIR}/paused") + start_monitor("ap-persist", watched=f"{WATCHED_UNIT}:{WATCHED_THR}") + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + # State file must contain the xmrig PID we claim to have frozen. + machine.succeed(f"test -s {STATE_DIR}/paused") + saved = machine.succeed(f"cat {STATE_DIR}/paused").strip() + assert saved == xmrig_pid(), f"state file PID {saved!r} != live xmrig PID {xmrig_pid()!r}" + # Hard-kill the monitor. ExecStop does NOT run on SIGKILL, so xmrig + # stays frozen. The state file persists. + machine.succeed("systemctl kill --signal=KILL ap-persist") + machine.succeed("systemctl reset-failed ap-persist 2>/dev/null || true") + assert frozen(), "xmrig must remain frozen after monitor SIGKILL" + # Fresh monitor picks up the state file, recognises the same PID + + # still-frozen cgroup, and continues owning the claim. Ending the + # load must thaw xmrig through the normal grace path. + start_monitor("ap-persist2", watched=f"{WATCHED_UNIT}:{WATCHED_THR}") + stop_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30) + # State file cleared after successful resume. + machine.fail(f"test -f {STATE_DIR}/paused") + machine.succeed("systemctl stop ap-persist2") + machine.succeed("systemctl reset-failed ap-persist2 2>/dev/null || true") - # --- State persistence and crash recovery --- - machine.succeed("systemctl stop xmrig-auto-pause") - - with subtest("xmrig recovers after crash during startup cooldown"): - machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}") - start_monitor("xmrig-auto-pause-crash") - - # Load -> xmrig stops - start_cpu_load("cpu-crash") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) - - # End load -> xmrig restarts after grace period - stop_cpu_load("cpu-crash") - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) - - # Kill xmrig immediately — simulates crash during startup cooldown. - # The script should detect the failure when cooldown expires and - # re-enter the retry cycle. - machine.succeed("systemctl kill --signal=KILL xmrig") - machine.wait_until_fails("systemctl is-active xmrig", timeout=5) - - # After cooldown + grace period + restart, xmrig should be back. - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) - - machine.succeed("systemctl stop xmrig-auto-pause-crash") - machine.succeed("systemctl reset-failed xmrig.service || true") + with subtest("systemctl stop xmrig cascades via PartOf and completes quickly"): + machine.succeed(f"rm -f {STATE_DIR}/paused") + start_monitor("ap-cascade", watched=f"{WATCHED_UNIT}:{WATCHED_THR}") + start_cpu_load(WATCHED_UNIT) + machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15) + # Simulate apcupsd onbattery hook: `systemctl stop xmrig` while frozen. + # Without the PartOf cascade this would hang for TimeoutStopSec (10s + # in the mock config, 90s in production) and systemd's freezer bug + # class could strand the unit. With cascade: auto-pause stops first, + # its ExecStop thaws cgroup.freeze, xmrig's SIGTERM then succeeds. + t0 = time.monotonic() + machine.succeed("systemctl stop xmrig") + dt = time.monotonic() - t0 + assert dt < 5, f"systemctl stop xmrig took {dt:.1f}s, cascade broken" + machine.succeed("systemctl show xmrig -p ActiveState --value | grep -q inactive") + # auto-pause stopped as a PartOf dependent + machine.succeed("systemctl show ap-cascade -p ActiveState --value | grep -qE 'inactive|deactivating'") + # Bring xmrig back for any remaining subtests machine.succeed("systemctl start xmrig") machine.wait_for_unit("xmrig.service") - - with subtest("Script restart preserves pause state"): - machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}") - start_monitor("xmrig-auto-pause-persist") - - # Load -> xmrig stops - start_cpu_load("cpu-persist") - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) - - # Kill the monitor while xmrig is paused (simulates script crash) - machine.succeed("systemctl stop xmrig-auto-pause-persist") - - # State file must exist — the monitor persisted the pause flag - machine.succeed(f"test -f {STATE_DIR}/paused") - - # Start a fresh monitor instance (reads state file on startup) - start_monitor("xmrig-auto-pause-persist2") - - # End load — the new monitor should pick up the paused state - # and restart xmrig after the grace period - stop_cpu_load("cpu-persist") - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) - - # State file should be cleaned up after successful restart - machine.fail(f"test -f {STATE_DIR}/paused") - - machine.succeed("systemctl stop xmrig-auto-pause-persist2") + stop_cpu_load(WATCHED_UNIT) + machine.succeed("systemctl reset-failed ap-cascade 2>/dev/null || true") ''; }