{
  pkgs,
  ...
}:
let
  script = ../services/monero/xmrig-auto-pause.py;
  python = pkgs.python3;
  cgroupDir = "/sys/fs/cgroup/system.slice/xmrig.service";
  cgroupFreeze = "${cgroupDir}/cgroup.freeze";
  cgroupEvents = "${cgroupDir}/cgroup.events";
  # Inline ExecStop for the transient monitor: mirrors the production .nix
  # ExecStop so the PartOf cascade test exercises the same code path.
  thawScript = pkgs.writeShellScript "test-thaw-xmrig" ''
    f=${cgroupFreeze}
    [ -w "$f" ] && echo 0 > "$f" || true
  '';
in
pkgs.testers.runNixOSTest {
  name = "xmrig-auto-pause";

  nodes.machine =
    { pkgs, ... }:
    {
      environment.systemPackages = [
        pkgs.python3
        pkgs.procps
      ];

      # Mock xmrig as a nice'd sleep process. Runs in the real
      # /sys/fs/cgroup/system.slice/xmrig.service cgroup, which is what the
      # auto-pause script writes cgroup.freeze into.
      systemd.services.xmrig = {
        description = "Mock xmrig miner";
        serviceConfig = {
          ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
          Type = "simple";
          Nice = 19;
          # Short timeout so the PartOf cascade test completes fast if the
          # cascade is broken (would otherwise hit systemd's 90s default).
          TimeoutStopSec = "10s";
        };
        wantedBy = [ "multi-user.target" ];
      };
    };

  testScript = ''
    import time

    PYTHON = "${python}/bin/python3"
    SCRIPT = "${script}"
    CGROUP_FREEZE = "${cgroupFreeze}"
    CGROUP_EVENTS = "${cgroupEvents}"
    THAW_SCRIPT = "${thawScript}"

    # Tuned for test VMs (1-2 cores).
    # POLL_INTERVAL=1 keeps detection latency low.
    # GRACE_PERIOD=5 is long enough to verify hysteresis, short enough for
    # reasonable total test time.
    # CPU_STOP_HIGH=999 effectively disables the system-wide path (a 1-core
    # VM can never exceed 100% of total CPU) so per-service subtests exercise
    # that path in isolation. CPU_STOP_LOW=20 catches a bash busy-loop on a
    # 1-2 core VM without tripping on normal VM noise.
    POLL_INTERVAL = "1"
    GRACE_PERIOD  = "5"
    CPU_STOP_HIGH = "999"
    CPU_STOP_LOW  = "20"
    CPU_RESUME_HIGH = "950"
    CPU_RESUME_LOW  = "10"
    STATE_DIR = "/tmp/xap-state"
    WATCHED_UNIT = "watched-burn"
    WATCHED_THR  = "5"

    def frozen():
        out = machine.succeed(f"cat {CGROUP_EVENTS}")
        return "frozen 1" in out

    def thawed():
        out = machine.succeed(f"cat {CGROUP_EVENTS}")
        return "frozen 0" in out

    def xmrig_pid():
        return machine.succeed("systemctl show xmrig -p MainPID --value").strip()

    def start_cpu_load(name):
        """Start a non-nice CPU burn as a transient systemd unit."""
        machine.succeed(
            f"systemd-run --unit={name} --property=Type=exec "
            f"bash -c 'while true; do :; done'"
        )

    def stop_cpu_load(name):
        machine.succeed(f"systemctl stop {name}")

    def start_monitor(unit_name, *, watched="", cpu_stop=CPU_STOP_HIGH, cpu_resume=CPU_RESUME_HIGH):
        """Start the auto-pause monitor as a transient unit.

        watched="foo:5,bar:10" enables the per-service path.
        cpu_stop/cpu_resume default to values that disable the system-wide
        path (95/90) so per-service behaviour is tested in isolation.
        """
        parts = [
            f"systemd-run --unit={unit_name}",
            "--property=After=xmrig.service",
            "--property=PartOf=xmrig.service",
            f"--property=ExecStop={THAW_SCRIPT}",
            f"--setenv=POLL_INTERVAL={POLL_INTERVAL}",
            f"--setenv=GRACE_PERIOD={GRACE_PERIOD}",
            f"--setenv=CPU_STOP_THRESHOLD={cpu_stop}",
            f"--setenv=CPU_RESUME_THRESHOLD={cpu_resume}",
            f"--setenv=STATE_DIR={STATE_DIR}",
            f"--setenv=XMRIG_CGROUP_FREEZE={CGROUP_FREEZE}",
        ]
        if watched:
            parts.append(f"--setenv=WATCHED_SERVICES={watched}")
        parts.append(f"{PYTHON} {SCRIPT}")
        machine.succeed(" ".join(parts))
        # Monitor needs two consecutive polls to compute a CPU delta.
        time.sleep(3)

    start_all()
    machine.wait_for_unit("multi-user.target")
    machine.wait_for_unit("xmrig.service")
    machine.succeed(f"mkdir -p {STATE_DIR}")

    # ------------------------------------------------------------------
    # Per-service path (primary signal)
    # ------------------------------------------------------------------

    with subtest("Idle → xmrig stays thawed"):
        start_monitor("ap-watched", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
        assert thawed(), f"expected thawed, got: {machine.succeed(f'cat {CGROUP_EVENTS}')}"
        pid0 = xmrig_pid()
        assert pid0 and pid0 != "0", f"expected a real xmrig PID, got {pid0!r}"

    with subtest("Watched service CPU load → xmrig frozen, PID preserved"):
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        assert xmrig_pid() == pid0, "PID must be preserved across freeze"

    with subtest("Load ends → xmrig thawed after grace period, same PID"):
        stop_cpu_load(WATCHED_UNIT)
        # Grace period is 5s; watched service drops to 0 immediately, so the
        # idle timer starts right away. Expect thaw within GRACE + 2*POLL.
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
        assert xmrig_pid() == pid0, "PID must survive the whole cycle"

    with subtest("Intermittent watched load does not cause flapping"):
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        stop_cpu_load(WATCHED_UNIT)
        time.sleep(2)  # shorter than grace period
        start_cpu_load(WATCHED_UNIT)
        time.sleep(3)
        assert frozen(), "xmrig must still be frozen during intermittent load"
        stop_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)

    with subtest("Sustained watched load keeps xmrig frozen"):
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        time.sleep(int(GRACE_PERIOD) + 3)  # past grace period
        assert frozen(), "sustained load must keep xmrig frozen"
        stop_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)

    with subtest("External thaw reclaimed while load present"):
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        # Someone manually thaws xmrig. Auto-pause must detect and re-freeze.
        machine.succeed(f"echo 0 > {CGROUP_FREEZE}")
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        stop_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)

    with subtest("Monitor SIGTERM thaws xmrig"):
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        machine.succeed("systemctl stop ap-watched")
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=10)
        stop_cpu_load(WATCHED_UNIT)
        machine.succeed("systemctl reset-failed ap-watched 2>/dev/null || true")

    # ------------------------------------------------------------------
    # Negative control + system-wide path
    # ------------------------------------------------------------------

    with subtest("Unwatched CPU burn does not trip per-service path"):
        # High CPU_STOP_THRESHOLD + no watched service → no reason to freeze.
        machine.succeed(f"rm -f {STATE_DIR}/paused")
        start_monitor("ap-neg")
        start_cpu_load("unwatched-neg")
        time.sleep(int(GRACE_PERIOD) + 3)
        assert thawed(), "unwatched load must not trip when system threshold is high and nothing is watched"
        stop_cpu_load("unwatched-neg")
        machine.succeed("systemctl stop ap-neg")
        machine.succeed("systemctl reset-failed ap-neg 2>/dev/null || true")

    with subtest("System-wide CPU path freezes xmrig when threshold is low"):
        machine.succeed(f"rm -f {STATE_DIR}/paused")
        start_monitor("ap-sys", cpu_stop=CPU_STOP_LOW, cpu_resume=CPU_RESUME_LOW)
        start_cpu_load("sys-burn")
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=20)
        stop_cpu_load("sys-burn")
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
        machine.succeed("systemctl stop ap-sys")
        machine.succeed("systemctl reset-failed ap-sys 2>/dev/null || true")

    # ------------------------------------------------------------------
    # State persistence and operational edge cases
    # ------------------------------------------------------------------

    with subtest("Monitor crash preserves pause claim; next instance resumes"):
        machine.succeed(f"rm -f {STATE_DIR}/paused")
        start_monitor("ap-persist", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        # State file must contain the xmrig PID we claim to have frozen.
        machine.succeed(f"test -s {STATE_DIR}/paused")
        saved = machine.succeed(f"cat {STATE_DIR}/paused").strip()
        assert saved == xmrig_pid(), f"state file PID {saved!r} != live xmrig PID {xmrig_pid()!r}"
        # Hard-kill the monitor. ExecStop does NOT run on SIGKILL, so xmrig
        # stays frozen. The state file persists.
        machine.succeed("systemctl kill --signal=KILL ap-persist")
        machine.succeed("systemctl reset-failed ap-persist 2>/dev/null || true")
        assert frozen(), "xmrig must remain frozen after monitor SIGKILL"
        # Fresh monitor picks up the state file, recognises the same PID +
        # still-frozen cgroup, and continues owning the claim. Ending the
        # load must thaw xmrig through the normal grace path.
        start_monitor("ap-persist2", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
        stop_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 0' {CGROUP_EVENTS}", timeout=30)
        # State file cleared after successful resume.
        machine.fail(f"test -f {STATE_DIR}/paused")
        machine.succeed("systemctl stop ap-persist2")
        machine.succeed("systemctl reset-failed ap-persist2 2>/dev/null || true")

    with subtest("systemctl stop xmrig cascades via PartOf and completes quickly"):
        machine.succeed(f"rm -f {STATE_DIR}/paused")
        start_monitor("ap-cascade", watched=f"{WATCHED_UNIT}:{WATCHED_THR}")
        start_cpu_load(WATCHED_UNIT)
        machine.wait_until_succeeds(f"grep -q '^frozen 1' {CGROUP_EVENTS}", timeout=15)
        # Simulate apcupsd onbattery hook: `systemctl stop xmrig` while frozen.
        # Without the PartOf cascade this would hang for TimeoutStopSec (10s
        # in the mock config, 90s in production) and systemd's freezer bug
        # class could strand the unit. With cascade: auto-pause stops first,
        # its ExecStop thaws cgroup.freeze, xmrig's SIGTERM then succeeds.
        t0 = time.monotonic()
        machine.succeed("systemctl stop xmrig")
        dt = time.monotonic() - t0
        assert dt < 5, f"systemctl stop xmrig took {dt:.1f}s, cascade broken"
        machine.succeed("systemctl show xmrig -p ActiveState --value | grep -q inactive")
        # auto-pause stopped as a PartOf dependent
        machine.succeed("systemctl show ap-cascade -p ActiveState --value | grep -qE 'inactive|deactivating'")
        # Bring xmrig back for any remaining subtests
        machine.succeed("systemctl start xmrig")
        machine.wait_for_unit("xmrig.service")
        stop_cpu_load(WATCHED_UNIT)
        machine.succeed("systemctl reset-failed ap-cascade 2>/dev/null || true")
  '';
}