xmrig-auto-pause: use cgroup.freeze and thaws

This commit is contained in:
2026-04-21 14:30:03 -04:00
parent a8cf95c7dd
commit 018b590e0d
3 changed files with 492 additions and 218 deletions

View File

@@ -2,15 +2,33 @@
config,
lib,
pkgs,
service_configs,
...
}:
let
cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service";
cgroupFreeze = "${cgroupDir}/cgroup.freeze";
in
lib.mkIf config.services.xmrig.enable {
systemd.services.xmrig-auto-pause = {
description = "Auto-pause xmrig when other services need CPU";
description = "Auto-pause xmrig via cgroup freezer when other services need CPU";
after = [ "xmrig.service" ];
# PartOf cascades stop/restart: when xmrig stops (deploy, apcupsd battery,
# manual), systemd stops auto-pause first and ExecStop thaws xmrig so
# xmrig's own stop does not hang on a frozen cgroup.
partOf = [ "xmrig.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}";
# Safety net: any exit path (SIGTERM from PartOf cascade, systemctl stop,
# crash with Restart=) must leave xmrig thawed. The Python SIGTERM
# handler does the same thing; this covers SIGKILL / hard crash paths
# too. Idempotent.
ExecStop = pkgs.writeShellScript "xmrig-auto-pause-thaw" ''
f=${cgroupFreeze}
[ -w "$f" ] && echo 0 > "$f" || true
'';
Restart = "always";
RestartSec = "10s";
NoNewPrivileges = true;
@@ -22,6 +40,9 @@ lib.mkIf config.services.xmrig.enable {
];
MemoryDenyWriteExecute = true;
StateDirectory = "xmrig-auto-pause";
# Required so the script can write to cgroup.freeze under
# ProtectSystem=strict (which makes /sys read-only by default).
ReadWritePaths = [ cgroupDir ];
};
environment = {
POLL_INTERVAL = "3";
@@ -32,8 +53,19 @@ lib.mkIf config.services.xmrig.enable {
# steady-state floor to avoid restarting xmrig while services are active.
CPU_STOP_THRESHOLD = "40";
CPU_RESUME_THRESHOLD = "10";
STARTUP_COOLDOWN = "10";
STATE_DIR = "/var/lib/xmrig-auto-pause";
XMRIG_CGROUP_FREEZE = cgroupFreeze;
# Per-service CPU thresholds. Catches sub-threshold activity that never
# trips the system-wide gauge — a single Minecraft player uses 3-15% of
# one core (0.3-1.3% of a 12-thread host) which is pure noise in
# /proc/stat but dominant in the minecraft cgroup.
WATCHED_SERVICES = lib.concatStringsSep "," (
lib.optional config.services.minecraft-servers.enable "minecraft-server-${service_configs.minecraft.server_name}:2"
);
};
};
# Pull auto-pause along whenever xmrig starts. After= on auto-pause ensures
# correct order; Wants= here ensures it actually starts.
systemd.services.xmrig.wants = [ "xmrig-auto-pause.service" ];
}

View File

@@ -2,33 +2,54 @@
"""
Auto-pause xmrig when other services need CPU.
Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19,
its CPU time lands in the 'nice' column and is excluded from the metric.
When real workload (user + system + irq + softirq) exceeds the stop
threshold, stops xmrig. When it drops below the resume threshold for
GRACE_PERIOD seconds, restarts xmrig.
Two independent signals drive the decision; either one can trigger a pause:
This replaces per-service pause scripts with a single general-purpose
monitor that handles any CPU-intensive workload (gitea workers, llama-cpp
inference, etc.) without needing to know about specific processes.
1. System-wide non-nice CPU from /proc/stat. Catches any CPU-heavy workload
including non-systemd user work (interactive sessions, ad-hoc jobs).
Since xmrig runs at Nice=19, its CPU time lands in the 'nice' column and
is excluded from the metric.
2. Per-service CPU from cgroup cpu.stat usage_usec. Catches sub-threshold
service activity — a single Minecraft player drives the server JVM to
3-15% of one core, which is noise system-wide (0.3-1.3% of total on a
12-thread host) but dominant for the minecraft cgroup.
When either signal crosses its stop threshold, writes 1 to
/sys/fs/cgroup/system.slice/xmrig.service/cgroup.freeze. When both are quiet
for GRACE_PERIOD seconds, writes 0 to resume.
Why direct cgroup.freeze instead of systemctl freeze:
systemd 256+ has a bug class where `systemctl freeze` followed by any
process death (SIGKILL, watchdog, OOM, segfault, shutdown) strands the
unit in FreezerState=frozen ActiveState=failed with no recovery short of
a reboot. See https://github.com/systemd/systemd/issues/38517. Writing
directly to cgroup.freeze keeps systemd's FreezerState at "running" the
whole time, so there is no state machine to get stuck: if xmrig dies
while frozen, systemd transitions it to inactive normally.
Why scheduler priority alone isn't enough:
Nice=19 / SCHED_IDLE only affects which thread gets the next time slice.
RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes
the shared 32MB L3 cache, and its memory access pattern saturates DRAM
bandwidth. Other services run slower even though they aren't denied CPU
time. The only fix is to stop xmrig entirely when real work is happening.
RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) holds about
68% of the shared 32MB L3 cache on Zen 3, evicting hot lines from
interactive services. Measured on muffin: pointer-chase latency is 112ns
with xmrig running and 19ns with xmrig frozen — a 6x difference that
scheduler priority cannot address.
Hysteresis:
The stop threshold is set higher than the resume threshold to prevent
oscillation. When xmrig runs, its L3 cache pressure makes other processes
appear ~3-8% busier. A single threshold trips on this indirect effect,
causing stop/start thrashing. Separate thresholds break the cycle: the
resume threshold confirms the system is truly idle, while the stop
threshold requires genuine workload above xmrig's indirect pressure.
The system-wide stop threshold sits higher than the resume threshold
because background services (qbittorrent, bitmagnet, postgres) produce
15-25% non-nice CPU during normal operation, and xmrig's indirect cache
pressure inflates that by another few percent. A single threshold
thrashes on the floor; two thresholds break the cycle.
Per-service thresholds are single-valued. Per-service CPU is a clean
signal without background noise to calibrate against, so idle_since is
reset whenever any watched service is at-or-above its threshold and the
grace period only advances when every watched service is below.
"""
import os
import signal
import subprocess
import sys
import time
@@ -37,19 +58,23 @@ POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15"))
# Percentage of total CPU ticks that non-nice processes must use to trigger
# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total.
# Default 15% requires roughly two busy cores, which avoids false positives
# from xmrig's L3 cache pressure inflating other processes' apparent CPU.
CPU_STOP_THRESHOLD = float(os.environ.get("CPU_STOP_THRESHOLD", "15"))
# Percentage below which the system is considered idle enough to resume
# mining. Lower than the stop threshold to provide hysteresis.
CPU_RESUME_THRESHOLD = float(os.environ.get("CPU_RESUME_THRESHOLD", "5"))
# After starting xmrig, ignore CPU spikes for this many seconds to let
# RandomX dataset initialization complete (~4s on the target hardware)
# without retriggering a stop.
STARTUP_COOLDOWN = float(os.environ.get("STARTUP_COOLDOWN", "10"))
# Per-service CPU thresholds parsed from "unit1:threshold1,unit2:threshold2".
# Thresholds are percentage of TOTAL CPU capacity (same frame as
# CPU_STOP_THRESHOLD). Empty / unset disables the per-service path.
WATCHED_SERVICES_RAW = os.environ.get("WATCHED_SERVICES", "")
# Path to xmrig's cgroup.freeze file. Direct write bypasses systemd's
# freezer state machine; see module docstring.
XMRIG_CGROUP_FREEZE = os.environ.get(
"XMRIG_CGROUP_FREEZE",
"/sys/fs/cgroup/system.slice/xmrig.service/cgroup.freeze",
)
# Directory for persisting pause state across script restarts. Without
# this, a restart while xmrig is paused loses the paused_by_us flag and
# xmrig stays stopped permanently.
# xmrig stays frozen until something else thaws it.
STATE_DIR = os.environ.get("STATE_DIR", "")
_PAUSE_FILE = os.path.join(STATE_DIR, "paused") if STATE_DIR else ""
@@ -58,6 +83,51 @@ def log(msg):
print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True)
def _parse_watched(spec):
out = {}
for entry in filter(None, (s.strip() for s in spec.split(","))):
name, _, pct = entry.partition(":")
name = name.strip()
pct = pct.strip()
if not name or not pct:
log(f"WATCHED_SERVICES: ignoring malformed entry '{entry}'")
continue
try:
out[name] = float(pct)
except ValueError:
log(f"WATCHED_SERVICES: ignoring non-numeric threshold in '{entry}'")
return out
def _resolve_cgroup_cpustat(unit):
"""Look up the unit's cgroup path via systemd. Returns cpu.stat path or
None if the unit has no cgroup (service not running, unknown unit)."""
result = subprocess.run(
["systemctl", "show", "--value", "--property=ControlGroup", unit],
capture_output=True,
text=True,
)
cg = result.stdout.strip()
if not cg:
return None
path = f"/sys/fs/cgroup{cg}/cpu.stat"
if not os.path.isfile(path):
return None
return path
def _read_service_usec(path):
"""Cumulative cpu.stat usage_usec, or None if the cgroup has vanished."""
try:
with open(path) as f:
for line in f:
if line.startswith("usage_usec "):
return int(line.split()[1])
except FileNotFoundError:
return None
return None
def read_cpu_ticks():
"""Read CPU tick counters from /proc/stat.
@@ -84,123 +154,241 @@ def is_active(unit):
return result.returncode == 0
def systemctl(action, unit):
def main_pid(unit):
"""Return the unit's MainPID, or 0 if unit is not running."""
result = subprocess.run(
["systemctl", action, unit],
["systemctl", "show", "--value", "--property=MainPID", unit],
capture_output=True,
text=True,
)
if result.returncode != 0:
log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
return result.returncode == 0
try:
return int(result.stdout.strip() or "0")
except ValueError:
return 0
def _save_paused(paused):
"""Persist pause flag so a script restart can resume where we left off."""
def _freeze(frozen):
"""Write 1 or 0 to xmrig's cgroup.freeze. Returns True on success.
Direct kernel interface — bypasses systemd's freezer state tracking."""
try:
with open(XMRIG_CGROUP_FREEZE, "w") as f:
f.write("1" if frozen else "0")
return True
except OSError as e:
action = "freeze" if frozen else "thaw"
log(f"cgroup.freeze {action} write failed: {e}")
return False
def _is_frozen():
"""Read the actual frozen state from cgroup.events. False if cgroup absent."""
events_path = os.path.join(os.path.dirname(XMRIG_CGROUP_FREEZE), "cgroup.events")
try:
with open(events_path) as f:
for line in f:
if line.startswith("frozen "):
return line.split()[1] == "1"
except FileNotFoundError:
return False
return False
def _save_paused(pid):
"""Persist the xmrig MainPID at the time of freeze. pid=0 clears claim."""
if not _PAUSE_FILE:
return
try:
if paused:
open(_PAUSE_FILE, "w").close()
if pid:
with open(_PAUSE_FILE, "w") as f:
f.write(str(pid))
else:
os.remove(_PAUSE_FILE)
except OSError:
pass
try:
os.remove(_PAUSE_FILE)
except FileNotFoundError:
pass
except OSError as e:
log(f"state file write failed: {e}")
def _load_paused():
"""Check if a previous instance left xmrig paused."""
"""Return True iff our claim is still valid: same PID and still frozen.
Restart of the xmrig unit gives it a new PID, which invalidates any
prior claim — we can't "own" a freeze we didn't perform on this
instance. Also confirms the cgroup is actually frozen so an external
thaw drops the claim.
"""
if not _PAUSE_FILE:
return False
return os.path.isfile(_PAUSE_FILE)
try:
with open(_PAUSE_FILE) as f:
saved = int(f.read().strip() or "0")
except (FileNotFoundError, ValueError):
return False
if not saved:
return False
if saved != main_pid("xmrig.service"):
return False
return _is_frozen()
def _cleanup(signum=None, frame=None):
"""On SIGTERM/SIGINT: thaw xmrig and clear claim. Operators must never see
a frozen unit we owned after auto-pause exits."""
if _is_frozen():
_freeze(False)
_save_paused(0)
sys.exit(0)
def main():
paused_by_us = _load_paused()
idle_since = None
started_at = None # monotonic time when we last started xmrig
prev_total = None
prev_work = None
watched_services = _parse_watched(WATCHED_SERVICES_RAW)
watched_paths = {}
for name in watched_services:
path = _resolve_cgroup_cpustat(name)
if path is None:
log(f"WATCHED_SERVICES: {name} has no cgroup — ignoring until it starts")
watched_paths[name] = path
nproc = os.cpu_count() or 1
signal.signal(signal.SIGTERM, _cleanup)
signal.signal(signal.SIGINT, _cleanup)
paused_by_us = _load_paused()
if paused_by_us:
log("Recovered pause state from previous instance")
log(
f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s "
f"stop={CPU_STOP_THRESHOLD}% resume={CPU_RESUME_THRESHOLD}% "
f"cooldown={STARTUP_COOLDOWN}s"
f"sys_stop={CPU_STOP_THRESHOLD}% sys_resume={CPU_RESUME_THRESHOLD}% "
f"watched={watched_services or '(none)'}"
)
idle_since = None
prev_total = None
prev_work = None
prev_monotonic = None
prev_service_usec = {}
while True:
total, work = read_cpu_ticks()
now = time.monotonic()
if prev_total is None:
prev_total = total
prev_work = work
prev_monotonic = now
# seed per-service baselines too
for name, path in watched_paths.items():
if path is None:
# Re-resolve in case the service has started since startup
path = _resolve_cgroup_cpustat(name)
watched_paths[name] = path
if path is not None:
usec = _read_service_usec(path)
if usec is not None:
prev_service_usec[name] = usec
time.sleep(POLL_INTERVAL)
continue
dt = total - prev_total
if dt <= 0:
dt_s = now - prev_monotonic
if dt <= 0 or dt_s <= 0:
prev_total = total
prev_work = work
prev_monotonic = now
time.sleep(POLL_INTERVAL)
continue
real_work_pct = ((work - prev_work) / dt) * 100
# Per-service CPU percentages this window. Fraction of total CPU
# capacity used by this specific service, same frame as real_work_pct.
svc_pct = {}
for name in watched_services:
path = watched_paths.get(name)
if path is None:
# Unit wasn't running at startup; try resolving again in case
# it has started since.
path = _resolve_cgroup_cpustat(name)
watched_paths[name] = path
if path is None:
prev_service_usec.pop(name, None)
continue
cur = _read_service_usec(path)
if cur is None:
# Service stopped; drop prev so it doesn't compute a huge delta
# on next start.
prev_service_usec.pop(name, None)
watched_paths[name] = None # force re-resolution next poll
continue
if name in prev_service_usec:
delta_us = cur - prev_service_usec[name]
if delta_us >= 0:
svc_pct[name] = (delta_us / 1_000_000) / (dt_s * nproc) * 100
prev_service_usec[name] = cur
prev_total = total
prev_work = work
prev_monotonic = now
# Don't act during startup cooldown — RandomX dataset init causes
# a transient CPU spike that would immediately retrigger a stop.
if started_at is not None:
if time.monotonic() - started_at < STARTUP_COOLDOWN:
time.sleep(POLL_INTERVAL)
continue
# Cooldown expired — verify xmrig survived startup. If it
# crashed during init (hugepage failure, pool unreachable, etc.),
# re-enter the pause/retry cycle rather than silently leaving
# xmrig dead.
if not is_active("xmrig.service"):
log("xmrig died during startup cooldown — will retry")
paused_by_us = True
_save_paused(True)
started_at = None
above_stop_sys = real_work_pct > CPU_STOP_THRESHOLD
below_resume_sys = real_work_pct <= CPU_RESUME_THRESHOLD
above_stop = real_work_pct > CPU_STOP_THRESHOLD
below_resume = real_work_pct <= CPU_RESUME_THRESHOLD
busy_services = [
n for n in watched_services if svc_pct.get(n, 0) > watched_services[n]
]
any_svc_at_or_above = any(
svc_pct.get(n, 0) >= watched_services[n] for n in watched_services
)
if above_stop:
stop_pressure = above_stop_sys or bool(busy_services)
fully_idle = below_resume_sys and not any_svc_at_or_above
if stop_pressure:
idle_since = None
if paused_by_us and is_active("xmrig.service"):
# Something else restarted xmrig (deploy, manual start, etc.)
# while we thought it was stopped. Reset ownership so we can
# manage it again.
log("xmrig was restarted externally while paused — reclaiming")
if paused_by_us and not _is_frozen():
# Someone thawed xmrig while we believed it paused. Reclaim
# ownership so we can re-freeze.
log("xmrig was thawed externally while paused — reclaiming")
paused_by_us = False
_save_paused(False)
if not paused_by_us:
# Only claim ownership if xmrig is actually running.
# If something else stopped it (e.g. UPS battery hook),
# don't interfere — we'd wrongly restart it later.
if is_active("xmrig.service"):
log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig")
if systemctl("stop", "xmrig.service"):
paused_by_us = True
_save_paused(True)
_save_paused(0)
if not paused_by_us and is_active("xmrig.service"):
# Only claim ownership if xmrig is actually running. If
# something else stopped it (e.g. UPS battery hook), don't
# interfere.
if busy_services:
reasons = ", ".join(
f"{n}={svc_pct[n]:.1f}%>{watched_services[n]:.1f}%"
for n in busy_services
)
log(f"Stop: watched service(s) busy [{reasons}] — freezing xmrig")
else:
log(
f"Stop: system CPU {real_work_pct:.1f}% > "
f"{CPU_STOP_THRESHOLD:.1f}% — freezing xmrig"
)
if _freeze(True):
paused_by_us = True
_save_paused(main_pid("xmrig.service"))
elif paused_by_us:
if below_resume:
if fully_idle:
if idle_since is None:
idle_since = time.monotonic()
elif time.monotonic() - idle_since >= GRACE_PERIOD:
log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig")
if systemctl("start", "xmrig.service"):
log(
f"Idle past grace period (system {real_work_pct:.1f}%) "
"— thawing xmrig"
)
if _freeze(False):
paused_by_us = False
_save_paused(False)
started_at = time.monotonic()
_save_paused(0)
idle_since = None
else:
# Between thresholds — not idle enough to resume.
# Between thresholds or a watched service is borderline — not
# idle enough to resume.
idle_since = None
time.sleep(POLL_INTERVAL)

View File

@@ -5,6 +5,15 @@
let
script = ../services/monero/xmrig-auto-pause.py;
python = pkgs.python3;
cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service";
cgroupFreeze = "${cgroupDir}/cgroup.freeze";
cgroupEvents = "${cgroupDir}/cgroup.events";
# Inline ExecStop for the transient monitor: mirrors the production .nix
# ExecStop so the PartOf cascade test exercises the same code path.
thawScript = pkgs.writeShellScript "test-thaw-xmrig" ''
f=${cgroupFreeze}
[ -w "$f" ] && echo 0 > "$f" || true
'';
in
pkgs.testers.runNixOSTest {
name = "xmrig-auto-pause";
@@ -17,13 +26,18 @@ pkgs.testers.runNixOSTest {
pkgs.procps
];
# Mock xmrig as a nice'd sleep process that can be stopped/started.
# Mock xmrig as a nice'd sleep process. Runs in the real
# /sys/fs/cgroup/system.slice/xmrig.service cgroup, which is what the
# auto-pause script writes cgroup.freeze into.
systemd.services.xmrig = {
description = "Mock xmrig miner";
serviceConfig = {
ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
Type = "simple";
Nice = 19;
# Short timeout so the PartOf cascade test completes fast if the
# cascade is broken (would otherwise hit systemd's 90s default).
TimeoutStopSec = "10s";
};
wantedBy = [ "multi-user.target" ];
};
@@ -34,20 +48,39 @@ pkgs.testers.runNixOSTest {
PYTHON = "${python}/bin/python3"
SCRIPT = "${script}"
CGROUP_FREEZE = "${cgroupFreeze}"
CGROUP_EVENTS = "${cgroupEvents}"
THAW_SCRIPT = "${thawScript}"
# Tuned for test VMs (1-2 cores).
# POLL_INTERVAL=1 keeps detection latency low.
# GRACE_PERIOD=5 is long enough to verify "stays stopped" but short
# enough that the full test completes in reasonable time.
# CPU_STOP_THRESHOLD=20 catches a busy-loop on a 1-2 core VM (50-100%)
# without triggering from normal VM noise.
# CPU_RESUME_THRESHOLD=10 is the idle cutoff for a 1-2 core VM.
# GRACE_PERIOD=5 is long enough to verify hysteresis, short enough for
# reasonable total test time.
# CPU_STOP_HIGH=999 effectively disables the system-wide path (a 1-core
# VM can never exceed 100% of total CPU) so per-service subtests exercise
# that path in isolation. CPU_STOP_LOW=20 catches a bash busy-loop on a
# 1-2 core VM without tripping on normal VM noise.
POLL_INTERVAL = "1"
GRACE_PERIOD = "5"
CPU_STOP_THRESHOLD = "20"
CPU_RESUME_THRESHOLD = "10"
STARTUP_COOLDOWN = "4"
GRACE_PERIOD = "5"
CPU_STOP_HIGH = "999"
CPU_STOP_LOW = "20"
CPU_RESUME_HIGH = "950"
CPU_RESUME_LOW = "10"
STATE_DIR = "/tmp/xap-state"
WATCHED_UNIT = "watched-burn"
WATCHED_THR = "5"
def frozen():
out = machine.succeed(f"cat {CGROUP_EVENTS}")
return "frozen 1" in out
def thawed():
out = machine.succeed(f"cat {CGROUP_EVENTS}")
return "frozen 0" in out
def xmrig_pid():
return machine.succeed("systemctl show xmrig -p MainPID --value").strip()
def start_cpu_load(name):
"""Start a non-nice CPU burn as a transient systemd unit."""
machine.succeed(
@@ -58,20 +91,29 @@ pkgs.testers.runNixOSTest {
def stop_cpu_load(name):
machine.succeed(f"systemctl stop {name}")
def start_monitor(unit_name):
"""Start the auto-pause monitor as a transient unit."""
machine.succeed(
f"systemd-run --unit={unit_name} "
f"--setenv=POLL_INTERVAL={POLL_INTERVAL} "
f"--setenv=GRACE_PERIOD={GRACE_PERIOD} "
f"--setenv=CPU_STOP_THRESHOLD={CPU_STOP_THRESHOLD} "
f"--setenv=CPU_RESUME_THRESHOLD={CPU_RESUME_THRESHOLD} "
f"--setenv=STARTUP_COOLDOWN={STARTUP_COOLDOWN} "
f"--setenv=STATE_DIR={STATE_DIR} "
f"{PYTHON} {SCRIPT}"
)
# Monitor needs two consecutive polls to compute a CPU delta.
time.sleep(3)
def start_monitor(unit_name, *, watched="", cpu_stop=CPU_STOP_HIGH, cpu_resume=CPU_RESUME_HIGH):
"""Start the auto-pause monitor as a transient unit.
watched="foo:5,bar:10" enables the per-service path.
cpu_stop/cpu_resume default to values that disable the system-wide
path (95/90) so per-service behaviour is tested in isolation.
"""
parts = [
f"systemd-run --unit={unit_name}",
"--property=After=xmrig.service",
"--property=PartOf=xmrig.service",
f"--property=ExecStop={THAW_SCRIPT}",
f"--setenv=POLL_INTERVAL={POLL_INTERVAL}",
f"--setenv=GRACE_PERIOD={GRACE_PERIOD}",
f"--setenv=CPU_STOP_THRESHOLD={cpu_stop}",
f"--setenv=CPU_RESUME_THRESHOLD={cpu_resume}",
f"--setenv=STATE_DIR={STATE_DIR}",
f"--setenv=XMRIG_CGROUP_FREEZE={CGROUP_FREEZE}",
]
if watched:
parts.append(f"--setenv=WATCHED_SERVICES={watched}")
parts.append(f"{PYTHON} {SCRIPT}")
machine.succeed(" ".join(parts))
# Monitor needs two consecutive polls to compute a CPU delta.
time.sleep(3)
@@ -80,127 +122,139 @@ pkgs.testers.runNixOSTest {
machine.wait_for_unit("xmrig.service")
machine.succeed(f"mkdir -p {STATE_DIR}")
with subtest("Start auto-pause monitor"):
start_monitor("xmrig-auto-pause")
# ------------------------------------------------------------------
# Per-service path (primary signal)
# ------------------------------------------------------------------
with subtest("xmrig stays running while system is idle"):
machine.succeed("systemctl is-active xmrig")
with subtest("Idle xmrig stays thawed"):
start_monitor("ap-watched", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
assert thawed(), f"expected thawed, got: {machine.succeed(f'cat {CGROUP_EVENTS}')}"
pid0 = xmrig_pid()
assert pid0 and pid0 != "0", f"expected a real xmrig PID, got {pid0!r}"
with subtest("xmrig stopped when CPU load appears"):
start_cpu_load("cpu-load")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
with subtest("Watched service CPU load xmrig frozen, PID preserved"):
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
assert xmrig_pid() == pid0, "PID must be preserved across freeze"
with subtest("xmrig remains stopped during grace period after load ends"):
stop_cpu_load("cpu-load")
# Load just stopped. Grace period is 5s. Check at 2s well within.
time.sleep(2)
machine.fail("systemctl is-active xmrig")
with subtest("Load ends xmrig thawed after grace period, same PID"):
stop_cpu_load(WATCHED_UNIT)
# Grace period is 5s; watched service drops to 0 immediately, so the
# idle timer starts right away. Expect thaw within GRACE + 2*POLL.
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
assert xmrig_pid() == pid0, "PID must survive the whole cycle"
with subtest("xmrig resumes after grace period expires"):
# Already idle since previous subtest. Grace period (5s) plus
# detection delay (~2 polls) plus startup cooldown (4s) means
# xmrig should restart within ~12s.
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("Intermittent load does not cause flapping"):
# First load stop xmrig
start_cpu_load("cpu-load-1")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
stop_cpu_load("cpu-load-1")
# Brief idle gap shorter than grace period
time.sleep(2)
# Second load arrives before grace period expires
start_cpu_load("cpu-load-2")
with subtest("Intermittent watched load does not cause flapping"):
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
stop_cpu_load(WATCHED_UNIT)
time.sleep(2) # shorter than grace period
start_cpu_load(WATCHED_UNIT)
time.sleep(3)
assert frozen(), "xmrig must still be frozen during intermittent load"
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
# xmrig must still be stopped
machine.fail("systemctl is-active xmrig")
with subtest("Sustained watched load keeps xmrig frozen"):
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
time.sleep(int(GRACE_PERIOD) + 3) # past grace period
assert frozen(), "sustained load must keep xmrig frozen"
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
stop_cpu_load("cpu-load-2")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("External thaw reclaimed while load present"):
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
# Someone manually thaws xmrig. Auto-pause must detect and re-freeze.
machine.succeed(f"echo 0 > {CGROUP_FREEZE}")
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
with subtest("Sustained load keeps xmrig stopped"):
start_cpu_load("cpu-load-3")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
with subtest("Monitor SIGTERM thaws xmrig"):
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
machine.succeed("systemctl stop ap-watched")
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=10)
stop_cpu_load(WATCHED_UNIT)
machine.succeed("systemctl reset-failed ap-watched 2>/dev/null || true")
# Stay busy longer than the grace period to prove continuous
# activity keeps xmrig stopped indefinitely.
time.sleep(8)
machine.fail("systemctl is-active xmrig")
# ------------------------------------------------------------------
# Negative control + system-wide path
# ------------------------------------------------------------------
stop_cpu_load("cpu-load-3")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("Unwatched CPU burn does not trip per-service path"):
# High CPU_STOP_THRESHOLD + no watched service no reason to freeze.
machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-neg")
start_cpu_load("unwatched-neg")
time.sleep(int(GRACE_PERIOD) + 3)
assert thawed(), "unwatched load must not trip when system threshold is high and nothing is watched"
stop_cpu_load("unwatched-neg")
machine.succeed("systemctl stop ap-neg")
machine.succeed("systemctl reset-failed ap-neg 2>/dev/null || true")
with subtest("External restart detected and re-stopped under load"):
# Put system under load so auto-pause stops xmrig.
start_cpu_load("cpu-load-4")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
with subtest("System-wide CPU path freezes xmrig when threshold is low"):
machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-sys", cpu_stop=CPU_STOP_LOW, cpu_resume=CPU_RESUME_LOW)
start_cpu_load("sys-burn")
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=20)
stop_cpu_load("sys-burn")
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
machine.succeed("systemctl stop ap-sys")
machine.succeed("systemctl reset-failed ap-sys 2>/dev/null || true")
# Something external starts xmrig while load is active.
# The script should detect this and re-stop it.
machine.succeed("systemctl start xmrig")
machine.succeed("systemctl is-active xmrig")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# ------------------------------------------------------------------
# State persistence and operational edge cases
# ------------------------------------------------------------------
stop_cpu_load("cpu-load-4")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("Monitor crash preserves pause claim; next instance resumes"):
machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-persist", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
# State file must contain the xmrig PID we claim to have frozen.
machine.succeed(f"test -s {STATE_DIR}/paused")
saved = machine.succeed(f"cat {STATE_DIR}/paused").strip()
assert saved == xmrig_pid(), f"state file PID {saved!r} != live xmrig PID {xmrig_pid()!r}"
# Hard-kill the monitor. ExecStop does NOT run on SIGKILL, so xmrig
# stays frozen. The state file persists.
machine.succeed("systemctl kill --signal=KILL ap-persist")
machine.succeed("systemctl reset-failed ap-persist 2>/dev/null || true")
assert frozen(), "xmrig must remain frozen after monitor SIGKILL"
# Fresh monitor picks up the state file, recognises the same PID +
# still-frozen cgroup, and continues owning the claim. Ending the
# load must thaw xmrig through the normal grace path.
start_monitor("ap-persist2", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
stop_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
# State file cleared after successful resume.
machine.fail(f"test -f {STATE_DIR}/paused")
machine.succeed("systemctl stop ap-persist2")
machine.succeed("systemctl reset-failed ap-persist2 2>/dev/null || true")
# --- State persistence and crash recovery ---
machine.succeed("systemctl stop xmrig-auto-pause")
with subtest("xmrig recovers after crash during startup cooldown"):
machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
start_monitor("xmrig-auto-pause-crash")
# Load -> xmrig stops
start_cpu_load("cpu-crash")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# End load -> xmrig restarts after grace period
stop_cpu_load("cpu-crash")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
# Kill xmrig immediately simulates crash during startup cooldown.
# The script should detect the failure when cooldown expires and
# re-enter the retry cycle.
machine.succeed("systemctl kill --signal=KILL xmrig")
machine.wait_until_fails("systemctl is-active xmrig", timeout=5)
# After cooldown + grace period + restart, xmrig should be back.
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
machine.succeed("systemctl stop xmrig-auto-pause-crash")
machine.succeed("systemctl reset-failed xmrig.service || true")
with subtest("systemctl stop xmrig cascades via PartOf and completes quickly"):
machine.succeed(f"rm -f {STATE_DIR}/paused")
start_monitor("ap-cascade", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
start_cpu_load(WATCHED_UNIT)
machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
# Simulate apcupsd onbattery hook: `systemctl stop xmrig` while frozen.
# Without the PartOf cascade this would hang for TimeoutStopSec (10s
# in the mock config, 90s in production) and systemd's freezer bug
# class could strand the unit. With cascade: auto-pause stops first,
# its ExecStop thaws cgroup.freeze, xmrig's SIGTERM then succeeds.
t0 = time.monotonic()
machine.succeed("systemctl stop xmrig")
dt = time.monotonic() - t0
assert dt < 5, f"systemctl stop xmrig took {dt:.1f}s, cascade broken"
machine.succeed("systemctl show xmrig -p ActiveState --value | grep -q inactive")
# auto-pause stopped as a PartOf dependent
machine.succeed("systemctl show ap-cascade -p ActiveState --value | grep -qE 'inactive|deactivating'")
# Bring xmrig back for any remaining subtests
machine.succeed("systemctl start xmrig")
machine.wait_for_unit("xmrig.service")
with subtest("Script restart preserves pause state"):
machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
start_monitor("xmrig-auto-pause-persist")
# Load -> xmrig stops
start_cpu_load("cpu-persist")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# Kill the monitor while xmrig is paused (simulates script crash)
machine.succeed("systemctl stop xmrig-auto-pause-persist")
# State file must exist the monitor persisted the pause flag
machine.succeed(f"test -f {STATE_DIR}/paused")
# Start a fresh monitor instance (reads state file on startup)
start_monitor("xmrig-auto-pause-persist2")
# End load the new monitor should pick up the paused state
# and restart xmrig after the grace period
stop_cpu_load("cpu-persist")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
# State file should be cleaned up after successful restart
machine.fail(f"test -f {STATE_DIR}/paused")
machine.succeed("systemctl stop xmrig-auto-pause-persist2")
stop_cpu_load(WATCHED_UNIT)
machine.succeed("systemctl reset-failed ap-cascade 2>/dev/null || true")
'';
}