xmrig-auto-pause: use cgroup.freeze and thaws

This commit is contained in:
2026-04-21 14:30:03 -04:00
parent a8cf95c7dd
commit 018b590e0d
3 changed files with 492 additions and 218 deletions

View File

@@ -2,15 +2,33 @@
config, config,
lib, lib,
pkgs, pkgs,
service_configs,
... ...
}: }:
let
cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service";
cgroupFreeze = "${cgroupDir}/cgroup.freeze";
in
lib.mkIf config.services.xmrig.enable { lib.mkIf config.services.xmrig.enable {
systemd.services.xmrig-auto-pause = { systemd.services.xmrig-auto-pause = {
description = "Auto-pause xmrig when other services need CPU"; description = "Auto-pause xmrig via cgroup freezer when other services need CPU";
after = [ "xmrig.service" ]; after = [ "xmrig.service" ];
# PartOf cascades stop/restart: when xmrig stops (deploy, apcupsd battery,
# manual), systemd stops auto-pause first and ExecStop thaws xmrig so
# xmrig's own stop does not hang on a frozen cgroup.
partOf = [ "xmrig.service" ];
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
serviceConfig = { serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}"; ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}";
# Safety net: any exit path (SIGTERM from PartOf cascade, systemctl stop,
# crash with Restart=) must leave xmrig thawed. The Python SIGTERM
# handler does the same thing; this covers SIGKILL / hard crash paths
# too. Idempotent.
ExecStop = pkgs.writeShellScript "xmrig-auto-pause-thaw" ''
f=${cgroupFreeze}
[ -w "$f" ] && echo 0 > "$f" || true
'';
Restart = "always"; Restart = "always";
RestartSec = "10s"; RestartSec = "10s";
NoNewPrivileges = true; NoNewPrivileges = true;
@@ -22,6 +40,9 @@ lib.mkIf config.services.xmrig.enable {
]; ];
MemoryDenyWriteExecute = true; MemoryDenyWriteExecute = true;
StateDirectory = "xmrig-auto-pause"; StateDirectory = "xmrig-auto-pause";
# Required so the script can write to cgroup.freeze under
# ProtectSystem=strict (which makes /sys read-only by default).
ReadWritePaths = [ cgroupDir ];
}; };
environment = { environment = {
POLL_INTERVAL = "3"; POLL_INTERVAL = "3";
@@ -32,8 +53,19 @@ lib.mkIf config.services.xmrig.enable {
# steady-state floor to avoid restarting xmrig while services are active. # steady-state floor to avoid restarting xmrig while services are active.
CPU_STOP_THRESHOLD = "40"; CPU_STOP_THRESHOLD = "40";
CPU_RESUME_THRESHOLD = "10"; CPU_RESUME_THRESHOLD = "10";
STARTUP_COOLDOWN = "10";
STATE_DIR = "/var/lib/xmrig-auto-pause"; STATE_DIR = "/var/lib/xmrig-auto-pause";
XMRIG_CGROUP_FREEZE = cgroupFreeze;
# Per-service CPU thresholds. Catches sub-threshold activity that never
# trips the system-wide gauge — a single Minecraft player uses 3-15% of
# one core (0.3-1.3% of a 12-thread host) which is pure noise in
# /proc/stat but dominant in the minecraft cgroup.
WATCHED_SERVICES = lib.concatStringsSep "," (
lib.optional config.services.minecraft-servers.enable "minecraft-server-${service_configs.minecraft.server_name}:2"
);
}; };
}; };
# Pull auto-pause along whenever xmrig starts. After= on auto-pause ensures
# correct order; Wants= here ensures it actually starts.
systemd.services.xmrig.wants = [ "xmrig-auto-pause.service" ];
} }

View File

@@ -2,33 +2,54 @@
""" """
Auto-pause xmrig when other services need CPU. Auto-pause xmrig when other services need CPU.
Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19, Two independent signals drive the decision; either one can trigger a pause:
its CPU time lands in the 'nice' column and is excluded from the metric.
When real workload (user + system + irq + softirq) exceeds the stop
threshold, stops xmrig. When it drops below the resume threshold for
GRACE_PERIOD seconds, restarts xmrig.
This replaces per-service pause scripts with a single general-purpose 1. System-wide non-nice CPU from /proc/stat. Catches any CPU-heavy workload
monitor that handles any CPU-intensive workload (gitea workers, llama-cpp including non-systemd user work (interactive sessions, ad-hoc jobs).
inference, etc.) without needing to know about specific processes. Since xmrig runs at Nice=19, its CPU time lands in the 'nice' column and
is excluded from the metric.
2. Per-service CPU from cgroup cpu.stat usage_usec. Catches sub-threshold
service activity — a single Minecraft player drives the server JVM to
3-15% of one core, which is noise system-wide (0.3-1.3% of total on a
12-thread host) but dominant for the minecraft cgroup.
When either signal crosses its stop threshold, writes 1 to
/sys/fs/cgroup/system.slice/xmrig.service/cgroup.freeze. When both are quiet
for GRACE_PERIOD seconds, writes 0 to resume.
Why direct cgroup.freeze instead of systemctl freeze:
systemd 256+ has a bug class where `systemctl freeze` followed by any
process death (SIGKILL, watchdog, OOM, segfault, shutdown) strands the
unit in FreezerState=frozen ActiveState=failed with no recovery short of
a reboot. See https://github.com/systemd/systemd/issues/38517. Writing
directly to cgroup.freeze keeps systemd's FreezerState at "running" the
whole time, so there is no state machine to get stuck: if xmrig dies
while frozen, systemd transitions it to inactive normally.
Why scheduler priority alone isn't enough: Why scheduler priority alone isn't enough:
Nice=19 / SCHED_IDLE only affects which thread gets the next time slice. Nice=19 / SCHED_IDLE only affects which thread gets the next time slice.
RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) holds about
the shared 32MB L3 cache, and its memory access pattern saturates DRAM 68% of the shared 32MB L3 cache on Zen 3, evicting hot lines from
bandwidth. Other services run slower even though they aren't denied CPU interactive services. Measured on muffin: pointer-chase latency is 112ns
time. The only fix is to stop xmrig entirely when real work is happening. with xmrig running and 19ns with xmrig frozen — a 6x difference that
scheduler priority cannot address.
Hysteresis: Hysteresis:
The stop threshold is set higher than the resume threshold to prevent The system-wide stop threshold sits higher than the resume threshold
oscillation. When xmrig runs, its L3 cache pressure makes other processes because background services (qbittorrent, bitmagnet, postgres) produce
appear ~3-8% busier. A single threshold trips on this indirect effect, 15-25% non-nice CPU during normal operation, and xmrig's indirect cache
causing stop/start thrashing. Separate thresholds break the cycle: the pressure inflates that by another few percent. A single threshold
resume threshold confirms the system is truly idle, while the stop thrashes on the floor; two thresholds break the cycle.
threshold requires genuine workload above xmrig's indirect pressure.
Per-service thresholds are single-valued. Per-service CPU is a clean
signal without background noise to calibrate against, so idle_since is
reset whenever any watched service is at-or-above its threshold and the
grace period only advances when every watched service is below.
""" """
import os import os
import signal
import subprocess import subprocess
import sys import sys
import time import time
@@ -37,19 +58,23 @@ POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15")) GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15"))
# Percentage of total CPU ticks that non-nice processes must use to trigger # Percentage of total CPU ticks that non-nice processes must use to trigger
# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total. # a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total.
# Default 15% requires roughly two busy cores, which avoids false positives
# from xmrig's L3 cache pressure inflating other processes' apparent CPU.
CPU_STOP_THRESHOLD = float(os.environ.get("CPU_STOP_THRESHOLD", "15")) CPU_STOP_THRESHOLD = float(os.environ.get("CPU_STOP_THRESHOLD", "15"))
# Percentage below which the system is considered idle enough to resume # Percentage below which the system is considered idle enough to resume
# mining. Lower than the stop threshold to provide hysteresis. # mining. Lower than the stop threshold to provide hysteresis.
CPU_RESUME_THRESHOLD = float(os.environ.get("CPU_RESUME_THRESHOLD", "5")) CPU_RESUME_THRESHOLD = float(os.environ.get("CPU_RESUME_THRESHOLD", "5"))
# After starting xmrig, ignore CPU spikes for this many seconds to let # Per-service CPU thresholds parsed from "unit1:threshold1,unit2:threshold2".
# RandomX dataset initialization complete (~4s on the target hardware) # Thresholds are percentage of TOTAL CPU capacity (same frame as
# without retriggering a stop. # CPU_STOP_THRESHOLD). Empty / unset disables the per-service path.
STARTUP_COOLDOWN = float(os.environ.get("STARTUP_COOLDOWN", "10")) WATCHED_SERVICES_RAW = os.environ.get("WATCHED_SERVICES", "")
# Path to xmrig's cgroup.freeze file. Direct write bypasses systemd's
# freezer state machine; see module docstring.
XMRIG_CGROUP_FREEZE = os.environ.get(
"XMRIG_CGROUP_FREEZE",
"/sys/fs/cgroup/system.slice/xmrig.service/cgroup.freeze",
)
# Directory for persisting pause state across script restarts. Without # Directory for persisting pause state across script restarts. Without
# this, a restart while xmrig is paused loses the paused_by_us flag and # this, a restart while xmrig is paused loses the paused_by_us flag and
# xmrig stays stopped permanently. # xmrig stays frozen until something else thaws it.
STATE_DIR = os.environ.get("STATE_DIR", "") STATE_DIR = os.environ.get("STATE_DIR", "")
_PAUSE_FILE = os.path.join(STATE_DIR, "paused") if STATE_DIR else "" _PAUSE_FILE = os.path.join(STATE_DIR, "paused") if STATE_DIR else ""
@@ -58,6 +83,51 @@ def log(msg):
print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True) print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True)
def _parse_watched(spec):
out = {}
for entry in filter(None, (s.strip() for s in spec.split(","))):
name, _, pct = entry.partition(":")
name = name.strip()
pct = pct.strip()
if not name or not pct:
log(f"WATCHED_SERVICES: ignoring malformed entry '{entry}'")
continue
try:
out[name] = float(pct)
except ValueError:
log(f"WATCHED_SERVICES: ignoring non-numeric threshold in '{entry}'")
return out
def _resolve_cgroup_cpustat(unit):
"""Look up the unit's cgroup path via systemd. Returns cpu.stat path or
None if the unit has no cgroup (service not running, unknown unit)."""
result = subprocess.run(
["systemctl", "show", "--value", "--property=ControlGroup", unit],
capture_output=True,
text=True,
)
cg = result.stdout.strip()
if not cg:
return None
path = f"/sys/fs/cgroup{cg}/cpu.stat"
if not os.path.isfile(path):
return None
return path
def _read_service_usec(path):
"""Cumulative cpu.stat usage_usec, or None if the cgroup has vanished."""
try:
with open(path) as f:
for line in f:
if line.startswith("usage_usec "):
return int(line.split()[1])
except FileNotFoundError:
return None
return None
def read_cpu_ticks(): def read_cpu_ticks():
"""Read CPU tick counters from /proc/stat. """Read CPU tick counters from /proc/stat.
@@ -84,123 +154,241 @@ def is_active(unit):
return result.returncode == 0 return result.returncode == 0
def systemctl(action, unit): def main_pid(unit):
"""Return the unit's MainPID, or 0 if unit is not running."""
result = subprocess.run( result = subprocess.run(
["systemctl", action, unit], ["systemctl", "show", "--value", "--property=MainPID", unit],
capture_output=True, capture_output=True,
text=True, text=True,
) )
if result.returncode != 0: try:
log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}") return int(result.stdout.strip() or "0")
return result.returncode == 0 except ValueError:
return 0
def _save_paused(paused): def _freeze(frozen):
"""Persist pause flag so a script restart can resume where we left off.""" """Write 1 or 0 to xmrig's cgroup.freeze. Returns True on success.
Direct kernel interface — bypasses systemd's freezer state tracking."""
try:
with open(XMRIG_CGROUP_FREEZE, "w") as f:
f.write("1" if frozen else "0")
return True
except OSError as e:
action = "freeze" if frozen else "thaw"
log(f"cgroup.freeze {action} write failed: {e}")
return False
def _is_frozen():
"""Read the actual frozen state from cgroup.events. False if cgroup absent."""
events_path = os.path.join(os.path.dirname(XMRIG_CGROUP_FREEZE), "cgroup.events")
try:
with open(events_path) as f:
for line in f:
if line.startswith("frozen "):
return line.split()[1] == "1"
except FileNotFoundError:
return False
return False
def _save_paused(pid):
"""Persist the xmrig MainPID at the time of freeze. pid=0 clears claim."""
if not _PAUSE_FILE: if not _PAUSE_FILE:
return return
try: try:
if paused: if pid:
open(_PAUSE_FILE, "w").close() with open(_PAUSE_FILE, "w") as f:
f.write(str(pid))
else: else:
try:
os.remove(_PAUSE_FILE) os.remove(_PAUSE_FILE)
except OSError: except FileNotFoundError:
pass pass
except OSError as e:
log(f"state file write failed: {e}")
def _load_paused(): def _load_paused():
"""Check if a previous instance left xmrig paused.""" """Return True iff our claim is still valid: same PID and still frozen.
Restart of the xmrig unit gives it a new PID, which invalidates any
prior claim — we can't "own" a freeze we didn't perform on this
instance. Also confirms the cgroup is actually frozen so an external
thaw drops the claim.
"""
if not _PAUSE_FILE: if not _PAUSE_FILE:
return False return False
return os.path.isfile(_PAUSE_FILE) try:
with open(_PAUSE_FILE) as f:
saved = int(f.read().strip() or "0")
except (FileNotFoundError, ValueError):
return False
if not saved:
return False
if saved != main_pid("xmrig.service"):
return False
return _is_frozen()
def _cleanup(signum=None, frame=None):
"""On SIGTERM/SIGINT: thaw xmrig and clear claim. Operators must never see
a frozen unit we owned after auto-pause exits."""
if _is_frozen():
_freeze(False)
_save_paused(0)
sys.exit(0)
def main(): def main():
paused_by_us = _load_paused() watched_services = _parse_watched(WATCHED_SERVICES_RAW)
idle_since = None watched_paths = {}
started_at = None # monotonic time when we last started xmrig for name in watched_services:
prev_total = None path = _resolve_cgroup_cpustat(name)
prev_work = None if path is None:
log(f"WATCHED_SERVICES: {name} has no cgroup — ignoring until it starts")
watched_paths[name] = path
nproc = os.cpu_count() or 1
signal.signal(signal.SIGTERM, _cleanup)
signal.signal(signal.SIGINT, _cleanup)
paused_by_us = _load_paused()
if paused_by_us: if paused_by_us:
log("Recovered pause state from previous instance") log("Recovered pause state from previous instance")
log( log(
f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s " f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s "
f"stop={CPU_STOP_THRESHOLD}% resume={CPU_RESUME_THRESHOLD}% " f"sys_stop={CPU_STOP_THRESHOLD}% sys_resume={CPU_RESUME_THRESHOLD}% "
f"cooldown={STARTUP_COOLDOWN}s" f"watched={watched_services or '(none)'}"
) )
idle_since = None
prev_total = None
prev_work = None
prev_monotonic = None
prev_service_usec = {}
while True: while True:
total, work = read_cpu_ticks() total, work = read_cpu_ticks()
now = time.monotonic()
if prev_total is None: if prev_total is None:
prev_total = total prev_total = total
prev_work = work prev_work = work
prev_monotonic = now
# seed per-service baselines too
for name, path in watched_paths.items():
if path is None:
# Re-resolve in case the service has started since startup
path = _resolve_cgroup_cpustat(name)
watched_paths[name] = path
if path is not None:
usec = _read_service_usec(path)
if usec is not None:
prev_service_usec[name] = usec
time.sleep(POLL_INTERVAL) time.sleep(POLL_INTERVAL)
continue continue
dt = total - prev_total dt = total - prev_total
if dt <= 0: dt_s = now - prev_monotonic
if dt <= 0 or dt_s <= 0:
prev_total = total prev_total = total
prev_work = work prev_work = work
prev_monotonic = now
time.sleep(POLL_INTERVAL) time.sleep(POLL_INTERVAL)
continue continue
real_work_pct = ((work - prev_work) / dt) * 100 real_work_pct = ((work - prev_work) / dt) * 100
# Per-service CPU percentages this window. Fraction of total CPU
# capacity used by this specific service, same frame as real_work_pct.
svc_pct = {}
for name in watched_services:
path = watched_paths.get(name)
if path is None:
# Unit wasn't running at startup; try resolving again in case
# it has started since.
path = _resolve_cgroup_cpustat(name)
watched_paths[name] = path
if path is None:
prev_service_usec.pop(name, None)
continue
cur = _read_service_usec(path)
if cur is None:
# Service stopped; drop prev so it doesn't compute a huge delta
# on next start.
prev_service_usec.pop(name, None)
watched_paths[name] = None # force re-resolution next poll
continue
if name in prev_service_usec:
delta_us = cur - prev_service_usec[name]
if delta_us >= 0:
svc_pct[name] = (delta_us / 1_000_000) / (dt_s * nproc) * 100
prev_service_usec[name] = cur
prev_total = total prev_total = total
prev_work = work prev_work = work
prev_monotonic = now
# Don't act during startup cooldown — RandomX dataset init causes above_stop_sys = real_work_pct > CPU_STOP_THRESHOLD
# a transient CPU spike that would immediately retrigger a stop. below_resume_sys = real_work_pct <= CPU_RESUME_THRESHOLD
if started_at is not None:
if time.monotonic() - started_at < STARTUP_COOLDOWN:
time.sleep(POLL_INTERVAL)
continue
# Cooldown expired — verify xmrig survived startup. If it
# crashed during init (hugepage failure, pool unreachable, etc.),
# re-enter the pause/retry cycle rather than silently leaving
# xmrig dead.
if not is_active("xmrig.service"):
log("xmrig died during startup cooldown — will retry")
paused_by_us = True
_save_paused(True)
started_at = None
above_stop = real_work_pct > CPU_STOP_THRESHOLD busy_services = [
below_resume = real_work_pct <= CPU_RESUME_THRESHOLD n for n in watched_services if svc_pct.get(n, 0) > watched_services[n]
]
any_svc_at_or_above = any(
svc_pct.get(n, 0) >= watched_services[n] for n in watched_services
)
if above_stop: stop_pressure = above_stop_sys or bool(busy_services)
fully_idle = below_resume_sys and not any_svc_at_or_above
if stop_pressure:
idle_since = None idle_since = None
if paused_by_us and is_active("xmrig.service"): if paused_by_us and not _is_frozen():
# Something else restarted xmrig (deploy, manual start, etc.) # Someone thawed xmrig while we believed it paused. Reclaim
# while we thought it was stopped. Reset ownership so we can # ownership so we can re-freeze.
# manage it again. log("xmrig was thawed externally while paused — reclaiming")
log("xmrig was restarted externally while paused — reclaiming")
paused_by_us = False paused_by_us = False
_save_paused(False) _save_paused(0)
if not paused_by_us: if not paused_by_us and is_active("xmrig.service"):
# Only claim ownership if xmrig is actually running. # Only claim ownership if xmrig is actually running. If
# If something else stopped it (e.g. UPS battery hook), # something else stopped it (e.g. UPS battery hook), don't
# don't interfere — we'd wrongly restart it later. # interfere.
if is_active("xmrig.service"): if busy_services:
log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig") reasons = ", ".join(
if systemctl("stop", "xmrig.service"): f"{n}={svc_pct[n]:.1f}%>{watched_services[n]:.1f}%"
for n in busy_services
)
log(f"Stop: watched service(s) busy [{reasons}] — freezing xmrig")
else:
log(
f"Stop: system CPU {real_work_pct:.1f}% > "
f"{CPU_STOP_THRESHOLD:.1f}% — freezing xmrig"
)
if _freeze(True):
paused_by_us = True paused_by_us = True
_save_paused(True) _save_paused(main_pid("xmrig.service"))
elif paused_by_us: elif paused_by_us:
if below_resume: if fully_idle:
if idle_since is None: if idle_since is None:
idle_since = time.monotonic() idle_since = time.monotonic()
elif time.monotonic() - idle_since >= GRACE_PERIOD: elif time.monotonic() - idle_since >= GRACE_PERIOD:
log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig") log(
if systemctl("start", "xmrig.service"): f"Idle past grace period (system {real_work_pct:.1f}%) "
"— thawing xmrig"
)
if _freeze(False):
paused_by_us = False paused_by_us = False
_save_paused(False) _save_paused(0)
started_at = time.monotonic()
idle_since = None idle_since = None
else: else:
# Between thresholds — not idle enough to resume. # Between thresholds or a watched service is borderline — not
# idle enough to resume.
idle_since = None idle_since = None
time.sleep(POLL_INTERVAL) time.sleep(POLL_INTERVAL)

View File

@@ -5,6 +5,15 @@
let let
script = ../services/monero/xmrig-auto-pause.py; script = ../services/monero/xmrig-auto-pause.py;
python = pkgs.python3; python = pkgs.python3;
cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service";
cgroupFreeze = "${cgroupDir}/cgroup.freeze";
cgroupEvents = "${cgroupDir}/cgroup.events";
# Inline ExecStop for the transient monitor: mirrors the production .nix
# ExecStop so the PartOf cascade test exercises the same code path.
thawScript = pkgs.writeShellScript "test-thaw-xmrig" ''
f=${cgroupFreeze}
[ -w "$f" ] && echo 0 > "$f" || true
'';
in in
pkgs.testers.runNixOSTest { pkgs.testers.runNixOSTest {
name = "xmrig-auto-pause"; name = "xmrig-auto-pause";
@@ -17,13 +26,18 @@ pkgs.testers.runNixOSTest {
pkgs.procps pkgs.procps
]; ];
# Mock xmrig as a nice'd sleep process that can be stopped/started. # Mock xmrig as a nice'd sleep process. Runs in the real
# /sys/fs/cgroup/system.slice/xmrig.service cgroup, which is what the
# auto-pause script writes cgroup.freeze into.
systemd.services.xmrig = { systemd.services.xmrig = {
description = "Mock xmrig miner"; description = "Mock xmrig miner";
serviceConfig = { serviceConfig = {
ExecStart = "${pkgs.coreutils}/bin/sleep infinity"; ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
Type = "simple"; Type = "simple";
Nice = 19; Nice = 19;
# Short timeout so the PartOf cascade test completes fast if the
# cascade is broken (would otherwise hit systemd's 90s default).
TimeoutStopSec = "10s";
}; };
wantedBy = [ "multi-user.target" ]; wantedBy = [ "multi-user.target" ];
}; };
@@ -34,20 +48,39 @@ pkgs.testers.runNixOSTest {
PYTHON = "${python}/bin/python3" PYTHON = "${python}/bin/python3"
SCRIPT = "${script}" SCRIPT = "${script}"
CGROUP_FREEZE = "${cgroupFreeze}"
CGROUP_EVENTS = "${cgroupEvents}"
THAW_SCRIPT = "${thawScript}"
# Tuned for test VMs (1-2 cores). # Tuned for test VMs (1-2 cores).
# POLL_INTERVAL=1 keeps detection latency low. # POLL_INTERVAL=1 keeps detection latency low.
# GRACE_PERIOD=5 is long enough to verify "stays stopped" but short # GRACE_PERIOD=5 is long enough to verify hysteresis, short enough for
# enough that the full test completes in reasonable time. # reasonable total test time.
# CPU_STOP_THRESHOLD=20 catches a busy-loop on a 1-2 core VM (50-100%) # CPU_STOP_HIGH=999 effectively disables the system-wide path (a 1-core
# without triggering from normal VM noise. # VM can never exceed 100% of total CPU) so per-service subtests exercise
# CPU_RESUME_THRESHOLD=10 is the idle cutoff for a 1-2 core VM. # that path in isolation. CPU_STOP_LOW=20 catches a bash busy-loop on a
# 1-2 core VM without tripping on normal VM noise.
POLL_INTERVAL = "1" POLL_INTERVAL = "1"
GRACE_PERIOD = "5" GRACE_PERIOD = "5"
CPU_STOP_THRESHOLD = "20" CPU_STOP_HIGH = "999"
CPU_RESUME_THRESHOLD = "10" CPU_STOP_LOW = "20"
STARTUP_COOLDOWN = "4" CPU_RESUME_HIGH = "950"
CPU_RESUME_LOW = "10"
STATE_DIR = "/tmp/xap-state" STATE_DIR = "/tmp/xap-state"
WATCHED_UNIT = "watched-burn"
WATCHED_THR = "5"
def frozen():
out = machine.succeed(f"cat {CGROUP_EVENTS}")
return "frozen 1" in out
def thawed():
out = machine.succeed(f"cat {CGROUP_EVENTS}")
return "frozen 0" in out
def xmrig_pid():
return machine.succeed("systemctl show xmrig -p MainPID --value").strip()
def start_cpu_load(name): def start_cpu_load(name):
"""Start a non-nice CPU burn as a transient systemd unit.""" """Start a non-nice CPU burn as a transient systemd unit."""
machine.succeed( machine.succeed(
@@ -58,20 +91,29 @@ pkgs.testers.runNixOSTest {
def stop_cpu_load(name): def stop_cpu_load(name):
machine.succeed(f"systemctl stop {name}") machine.succeed(f"systemctl stop {name}")
def start_monitor(unit_name): def start_monitor(unit_name, *, watched="", cpu_stop=CPU_STOP_HIGH, cpu_resume=CPU_RESUME_HIGH):
"""Start the auto-pause monitor as a transient unit.""" """Start the auto-pause monitor as a transient unit.
machine.succeed(
f"systemd-run --unit={unit_name} " watched="foo:5,bar:10" enables the per-service path.
f"--setenv=POLL_INTERVAL={POLL_INTERVAL} " cpu_stop/cpu_resume default to values that disable the system-wide
f"--setenv=GRACE_PERIOD={GRACE_PERIOD} " path (95/90) so per-service behaviour is tested in isolation.
f"--setenv=CPU_STOP_THRESHOLD={CPU_STOP_THRESHOLD} " """
f"--setenv=CPU_RESUME_THRESHOLD={CPU_RESUME_THRESHOLD} " parts = [
f"--setenv=STARTUP_COOLDOWN={STARTUP_COOLDOWN} " f"systemd-run --unit={unit_name}",
f"--setenv=STATE_DIR={STATE_DIR} " "--property=After=xmrig.service",
f"{PYTHON} {SCRIPT}" "--property=PartOf=xmrig.service",
) f"--property=ExecStop={THAW_SCRIPT}",
# Monitor needs two consecutive polls to compute a CPU delta. f"--setenv=POLL_INTERVAL={POLL_INTERVAL}",
time.sleep(3) f"--setenv=GRACE_PERIOD={GRACE_PERIOD}",
f"--setenv=CPU_STOP_THRESHOLD={cpu_stop}",
f"--setenv=CPU_RESUME_THRESHOLD={cpu_resume}",
f"--setenv=STATE_DIR={STATE_DIR}",
f"--setenv=XMRIG_CGROUP_FREEZE={CGROUP_FREEZE}",
]
if watched:
parts.append(f"--setenv=WATCHED_SERVICES={watched}")
parts.append(f"{PYTHON} {SCRIPT}")
machine.succeed(" ".join(parts))
# Monitor needs two consecutive polls to compute a CPU delta. # Monitor needs two consecutive polls to compute a CPU delta.
time.sleep(3) time.sleep(3)
@@ -80,127 +122,139 @@ pkgs.testers.runNixOSTest {
machine.wait_for_unit("xmrig.service") machine.wait_for_unit("xmrig.service")
machine.succeed(f"mkdir -p {STATE_DIR}") machine.succeed(f"mkdir -p {STATE_DIR}")
with subtest("Start auto-pause monitor"): # ------------------------------------------------------------------
start_monitor("xmrig-auto-pause") # Per-service path (primary signal)
# ------------------------------------------------------------------
with subtest("xmrig stays running while system is idle"): with subtest("Idle xmrig stays thawed"):
machine.succeed("systemctl is-active xmrig") start_monitor("ap-watched", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
assert thawed(), f"expected thawed, got: {machine.succeed(f'cat {CGROUP_EVENTS}')}"
pid0 = xmrig_pid()
assert pid0 and pid0 != "0", f"expected a real xmrig PID, got {pid0!r}"
with subtest("xmrig stopped when CPU load appears"): with subtest("Watched service CPU load xmrig frozen, PID preserved"):
start_cpu_load("cpu-load") start_cpu_load(WATCHED_UNIT)
machine.wait_until_fails("systemctl is-active xmrig", timeout=20) machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
assert xmrig_pid() == pid0, "PID must be preserved across freeze"
with subtest("xmrig remains stopped during grace period after load ends"): with subtest("Load ends xmrig thawed after grace period, same PID"):
stop_cpu_load("cpu-load") stop_cpu_load(WATCHED_UNIT)
# Load just stopped. Grace period is 5s. Check at 2s well within. # Grace period is 5s; watched service drops to 0 immediately, so the
time.sleep(2) # idle timer starts right away. Expect thaw within GRACE + 2*POLL.
machine.fail("systemctl is-active xmrig") machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
assert xmrig_pid() == pid0, "PID must survive the whole cycle"
with subtest("xmrig resumes after grace period expires"): with subtest("Intermittent watched load does not cause flapping"):
# Already idle since previous subtest. Grace period (5s) plus start_cpu_load(WATCHED_UNIT)
# detection delay (~2 polls) plus startup cooldown (4s) means machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
# xmrig should restart within ~12s. stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) time.sleep(2) # shorter than grace period
start_cpu_load(WATCHED_UNIT)
with subtest("Intermittent load does not cause flapping"):
# First load stop xmrig
start_cpu_load("cpu-load-1")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
stop_cpu_load("cpu-load-1")
# Brief idle gap shorter than grace period
time.sleep(2)
# Second load arrives before grace period expires
start_cpu_load("cpu-load-2")
time.sleep(3) time.sleep(3)
assert frozen(), "xmrig must still be frozen during intermittent load"
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
# xmrig must still be stopped with subtest("Sustained watched load keeps xmrig frozen"):
machine.fail("systemctl is-active xmrig") start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
time.sleep(int(GRACE_PERIOD) + 3) # past grace period
assert frozen(), "sustained load must keep xmrig frozen"
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
stop_cpu_load("cpu-load-2") with subtest("External thaw reclaimed while load present"):
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
# Someone manually thaws xmrig. Auto-pause must detect and re-freeze.
machine.succeed(f"echo 0 > {CGROUP_FREEZE}")
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
with subtest("Sustained load keeps xmrig stopped"): with subtest("Monitor SIGTERM thaws xmrig"):
start_cpu_load("cpu-load-3") start_cpu_load(WATCHED_UNIT)
machine.wait_until_fails("systemctl is-active xmrig", timeout=20) machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
machine.succeed("systemctl stop ap-watched")
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=10)
stop_cpu_load(WATCHED_UNIT)
machine.succeed("systemctl reset-failed ap-watched 2>/dev/null || true")
# Stay busy longer than the grace period to prove continuous # ------------------------------------------------------------------
# activity keeps xmrig stopped indefinitely. # Negative control + system-wide path
time.sleep(8) # ------------------------------------------------------------------
machine.fail("systemctl is-active xmrig")
stop_cpu_load("cpu-load-3") with subtest("Unwatched CPU burn does not trip per-service path"):
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) # High CPU_STOP_THRESHOLD + no watched service no reason to freeze.
machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-neg")
start_cpu_load("unwatched-neg")
time.sleep(int(GRACE_PERIOD) + 3)
assert thawed(), "unwatched load must not trip when system threshold is high and nothing is watched"
stop_cpu_load("unwatched-neg")
machine.succeed("systemctl stop ap-neg")
machine.succeed("systemctl reset-failed ap-neg 2>/dev/null || true")
with subtest("External restart detected and re-stopped under load"): with subtest("System-wide CPU path freezes xmrig when threshold is low"):
# Put system under load so auto-pause stops xmrig. machine.succeed(f"rm -f {STATE_DIR}/paused")
start_cpu_load("cpu-load-4") start_monitor("ap-sys", cpu_stop=CPU_STOP_LOW, cpu_resume=CPU_RESUME_LOW)
machine.wait_until_fails("systemctl is-active xmrig", timeout=20) start_cpu_load("sys-burn")
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=20)
stop_cpu_load("sys-burn")
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
machine.succeed("systemctl stop ap-sys")
machine.succeed("systemctl reset-failed ap-sys 2>/dev/null || true")
# Something external starts xmrig while load is active. # ------------------------------------------------------------------
# The script should detect this and re-stop it. # State persistence and operational edge cases
machine.succeed("systemctl start xmrig") # ------------------------------------------------------------------
machine.succeed("systemctl is-active xmrig")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
stop_cpu_load("cpu-load-4") with subtest("Monitor crash preserves pause claim; next instance resumes"):
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20) machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-persist", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
# State file must contain the xmrig PID we claim to have frozen.
machine.succeed(f"test -s {STATE_DIR}/paused")
saved = machine.succeed(f"cat {STATE_DIR}/paused").strip()
assert saved == xmrig_pid(), f"state file PID {saved!r} != live xmrig PID {xmrig_pid()!r}"
# Hard-kill the monitor. ExecStop does NOT run on SIGKILL, so xmrig
# stays frozen. The state file persists.
machine.succeed("systemctl kill --signal=KILL ap-persist")
machine.succeed("systemctl reset-failed ap-persist 2>/dev/null || true")
assert frozen(), "xmrig must remain frozen after monitor SIGKILL"
# Fresh monitor picks up the state file, recognises the same PID +
# still-frozen cgroup, and continues owning the claim. Ending the
# load must thaw xmrig through the normal grace path.
start_monitor("ap-persist2", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
# State file cleared after successful resume.
machine.fail(f"test -f {STATE_DIR}/paused")
machine.succeed("systemctl stop ap-persist2")
machine.succeed("systemctl reset-failed ap-persist2 2>/dev/null || true")
# --- State persistence and crash recovery --- with subtest("systemctl stop xmrig cascades via PartOf and completes quickly"):
machine.succeed("systemctl stop xmrig-auto-pause") machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-cascade", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
with subtest("xmrig recovers after crash during startup cooldown"): start_cpu_load(WATCHED_UNIT)
machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}") machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
start_monitor("xmrig-auto-pause-crash") # Simulate apcupsd onbattery hook: `systemctl stop xmrig` while frozen.
# Without the PartOf cascade this would hang for TimeoutStopSec (10s
# Load -> xmrig stops # in the mock config, 90s in production) and systemd's freezer bug
start_cpu_load("cpu-crash") # class could strand the unit. With cascade: auto-pause stops first,
machine.wait_until_fails("systemctl is-active xmrig", timeout=20) # its ExecStop thaws cgroup.freeze, xmrig's SIGTERM then succeeds.
t0 = time.monotonic()
# End load -> xmrig restarts after grace period machine.succeed("systemctl stop xmrig")
stop_cpu_load("cpu-crash") dt = time.monotonic() - t0
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30) assert dt < 5, f"systemctl stop xmrig took {dt:.1f}s, cascade broken"
machine.succeed("systemctl show xmrig -p ActiveState --value | grep -q inactive")
# Kill xmrig immediately simulates crash during startup cooldown. # auto-pause stopped as a PartOf dependent
# The script should detect the failure when cooldown expires and machine.succeed("systemctl show ap-cascade -p ActiveState --value | grep -qE 'inactive|deactivating'")
# re-enter the retry cycle. # Bring xmrig back for any remaining subtests
machine.succeed("systemctl kill --signal=KILL xmrig")
machine.wait_until_fails("systemctl is-active xmrig", timeout=5)
# After cooldown + grace period + restart, xmrig should be back.
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
machine.succeed("systemctl stop xmrig-auto-pause-crash")
machine.succeed("systemctl reset-failed xmrig.service || true")
machine.succeed("systemctl start xmrig") machine.succeed("systemctl start xmrig")
machine.wait_for_unit("xmrig.service") machine.wait_for_unit("xmrig.service")
stop_cpu_load(WATCHED_UNIT)
with subtest("Script restart preserves pause state"): machine.succeed("systemctl reset-failed ap-cascade 2>/dev/null || true")
machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
start_monitor("xmrig-auto-pause-persist")
# Load -> xmrig stops
start_cpu_load("cpu-persist")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# Kill the monitor while xmrig is paused (simulates script crash)
machine.succeed("systemctl stop xmrig-auto-pause-persist")
# State file must exist the monitor persisted the pause flag
machine.succeed(f"test -f {STATE_DIR}/paused")
# Start a fresh monitor instance (reads state file on startup)
start_monitor("xmrig-auto-pause-persist2")
# End load the new monitor should pick up the paused state
# and restart xmrig after the grace period
stop_cpu_load("cpu-persist")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
# State file should be cleaned up after successful restart
machine.fail(f"test -f {STATE_DIR}/paused")
machine.succeed("systemctl stop xmrig-auto-pause-persist2")
''; '';
} }