fix xmrig pause

2026-04-03 14:39:20 -04:00
parent d4d01d63f1
commit daf82c16ba
8 changed files with 263 additions and 298 deletions
--- a/configuration.nix
+++ b/configuration.nix
@@ -63,6 +63,7 @@
    ./services/monero.nix
    ./services/p2pool.nix
    ./services/xmrig.nix
+    ./services/xmrig-auto-pause.nix

    ./services/graphing-calculator.nix

--- a/services/llama-cpp/default.nix
+++ b/services/llama-cpp/default.nix
@@ -1,6 +1,5 @@
 {
  imports = [
    ./llama-cpp.nix
-    ./llama-cpp-xmrig-pause.nix
  ];
 }
--- a/services/llama-cpp/llama-cpp-xmrig-pause.py
+++ b/services/llama-cpp/llama-cpp-xmrig-pause.py
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-"""
-Pause xmrig while llama-cpp is processing inference requests.
-
-Checks if the llama-server process is actively using CPU by reading
-/proc/<pid>/stat. When CPU usage exceeds the threshold, stops xmrig.
-When CPU drops below threshold for GRACE_PERIOD seconds, restarts xmrig.
-"""
-
-import glob
-import os
-import subprocess
-import sys
-import time
-
-POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
-GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "10"))
-# CPU percentage (per-core) above which llama-server is considered busy.
-# Idle llama-server uses ~0% CPU; active inference saturates multiple cores.
-CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "50"))
-
-
-def log(msg):
-    print(f"[llama-cpp-xmrig-pause] {msg}", file=sys.stderr, flush=True)
-
-
-def find_llama_pid():
-    """Find the PID of the llama-server process."""
-    for path in glob.glob("/proc/[0-9]*/comm"):
-        try:
-            with open(path) as f:
-                if f.read().strip() == "llama-server":
-                    return int(path.split("/")[2])
-        except (OSError, ValueError):
-            continue
-    return None
-
-
-def get_cpu_times(pid):
-    """Read utime + stime from /proc/<pid>/stat. Returns total ticks or None."""
-    try:
-        with open(f"/proc/{pid}/stat") as f:
-            fields = f.read().split(")")[-1].split()
-            # fields[11] = utime, fields[12] = stime (0-indexed after ')')
-            return int(fields[11]) + int(fields[12])
-    except (OSError, IndexError, ValueError):
-        return None
-
-
-def systemctl(action, unit):
-    result = subprocess.run(
-        ["systemctl", action, unit],
-        capture_output=True,
-        text=True,
-    )
-    if result.returncode != 0:
-        log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
-    return result.returncode == 0
-
-
-def main():
-    xmrig_paused = False
-    idle_since = None
-    prev_ticks = None
-    prev_time = None
-    hz = os.sysconf("SC_CLK_TCK")
-
-    log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%")
-
-    while True:
-        pid = find_llama_pid()
-        if pid is None:
-            # llama-server not running
-            idle_since = None
-            prev_ticks = None
-            prev_time = None
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        ticks = get_cpu_times(pid)
-        now = time.monotonic()
-
-        if ticks is None or prev_ticks is None or prev_time is None:
-            prev_ticks = ticks
-            prev_time = now
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        dt = now - prev_time
-        if dt <= 0:
-            prev_ticks = ticks
-            prev_time = now
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        # CPU% = (delta_ticks / hz) / delta_seconds * 100
-        cpu_pct = ((ticks - prev_ticks) / hz) / dt * 100
-        prev_ticks = ticks
-        prev_time = now
-
-        busy = cpu_pct > CPU_THRESHOLD
-
-        if busy:
-            idle_since = None
-            if not xmrig_paused:
-                log(f"llama-server busy ({cpu_pct:.0f}% CPU) — stopping xmrig")
-                if systemctl("stop", "xmrig"):
-                    xmrig_paused = True
-        else:
-            if xmrig_paused:
-                if idle_since is None:
-                    idle_since = now
-                elif now - idle_since >= GRACE_PERIOD:
-                    log(f"llama-server idle ({cpu_pct:.0f}% CPU) past grace period — starting xmrig")
-                    if systemctl("start", "xmrig"):
-                        xmrig_paused = False
-                    idle_since = None
-
-        time.sleep(POLL_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()
--- a/services/llama-cpp/llama-cpp-xmrig-pause.nix
+++ b/services/llama-cpp/llama-cpp-xmrig-pause.nix
@@ -4,19 +4,15 @@
  pkgs,
  ...
 }:
-lib.mkIf config.services.llama-cpp.enable {
-  systemd.services.llama-cpp-xmrig-pause = {
-    description = "Pause xmrig while llama-cpp is processing requests";
-    after = [
-      "llama-cpp.service"
-      "xmrig.service"
-    ];
+lib.mkIf config.services.xmrig.enable {
+  systemd.services.xmrig-auto-pause = {
+    description = "Auto-pause xmrig when other services need CPU";
+    after = [ "xmrig.service" ];
    wantedBy = [ "multi-user.target" ];
    serviceConfig = {
-      ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}";
+      ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}";
      Restart = "always";
      RestartSec = "10s";
-      # Needs /proc access (default) and AF_UNIX for systemctl
      NoNewPrivileges = true;
      ProtectHome = true;
      ProtectSystem = "strict";
@@ -28,8 +24,8 @@ lib.mkIf config.services.llama-cpp.enable {
    };
    environment = {
      POLL_INTERVAL = "3";
-      GRACE_PERIOD = "10";
-      CPU_THRESHOLD = "50";
+      GRACE_PERIOD = "15";
+      CPU_THRESHOLD = "5";
    };
  };
 }
--- a/services/xmrig-auto-pause.py
+++ b/services/xmrig-auto-pause.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Auto-pause xmrig when other services need CPU.
+
+Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19,
+its CPU time lands in the 'nice' column and is excluded from the metric.
+When real workload (user + system + irq + softirq) exceeds the threshold,
+stops xmrig. When it drops below threshold for GRACE_PERIOD seconds,
+restarts xmrig.
+
+This replaces per-service pause scripts with a single general-purpose
+monitor that handles any CPU-intensive workload (gitea workers, llama-cpp
+inference, etc.) without needing to know about specific processes.
+
+Why scheduler priority alone isn't enough:
+  Nice=19 / SCHED_IDLE only affects which thread gets the next time slice.
+  RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes
+  the shared 32MB L3 cache, and its memory access pattern saturates DRAM
+  bandwidth. Other services run slower even though they aren't denied CPU
+  time. The only fix is to stop xmrig entirely when real work is happening.
+"""
+
+import os
+import subprocess
+import sys
+import time
+
+POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
+GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15"))
+# Percentage of total CPU ticks that non-nice processes must use to trigger
+# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total.
+# Default 5% catches anything using more than ~60% of a single core.
+CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "5"))
+
+
+def log(msg):
+    print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True)
+
+
+def read_cpu_ticks():
+    """Read CPU tick counters from /proc/stat.
+
+    Returns (total_ticks, real_work_ticks) where real_work excludes the
+    'nice' column (xmrig) and idle/iowait.
+    """
+    with open("/proc/stat") as f:
+        parts = f.readline().split()
+    # cpu  user nice system idle iowait irq softirq steal
+    user, nice, system, idle, iowait, irq, softirq, steal = (
+        int(x) for x in parts[1:9]
+    )
+    total = user + nice + system + idle + iowait + irq + softirq + steal
+    real_work = user + system + irq + softirq
+    return total, real_work
+
+
+def is_active(unit):
+    """Check if a systemd unit is currently active."""
+    result = subprocess.run(
+        ["systemctl", "is-active", "--quiet", unit],
+        capture_output=True,
+    )
+    return result.returncode == 0
+
+
+def systemctl(action, unit):
+    result = subprocess.run(
+        ["systemctl", action, unit],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
+    return result.returncode == 0
+
+
+def main():
+    paused_by_us = False
+    idle_since = None
+    prev_total = None
+    prev_work = None
+
+    log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%")
+
+    while True:
+        total, work = read_cpu_ticks()
+
+        if prev_total is None:
+            prev_total = total
+            prev_work = work
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        dt = total - prev_total
+        if dt <= 0:
+            prev_total = total
+            prev_work = work
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        real_work_pct = ((work - prev_work) / dt) * 100
+        prev_total = total
+        prev_work = work
+
+        busy = real_work_pct > CPU_THRESHOLD
+
+        if busy:
+            idle_since = None
+            if not paused_by_us:
+                # Only claim ownership if xmrig is actually running.
+                # If something else stopped it (e.g. UPS battery hook),
+                # don't interfere — we'd wrongly restart it later.
+                if is_active("xmrig.service"):
+                    log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig")
+                    if systemctl("stop", "xmrig.service"):
+                        paused_by_us = True
+        else:
+            if paused_by_us:
+                if idle_since is None:
+                    idle_since = time.monotonic()
+                elif time.monotonic() - idle_since >= GRACE_PERIOD:
+                    log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig")
+                    if systemctl("start", "xmrig.service"):
+                        paused_by_us = False
+                    idle_since = None
+
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/llama-cpp-xmrig-pause.nix
+++ b/tests/llama-cpp-xmrig-pause.nix
@@ -1,162 +0,0 @@
-{
-  pkgs,
-  ...
-}:
-let
-  script = ../services/llama-cpp/llama-cpp-xmrig-pause.py;
-  python = pkgs.python3;
-
-  # SmolLM-135M Q2_K: 85MB, modern GGUFv3, generates ~30 tok/s on one CPU
-  # thread — slow enough that a 200-token request keeps the process busy for
-  # several seconds, fast enough that tests don't crawl.
-  tinyModel = pkgs.fetchurl {
-    url = "https://huggingface.co/QuantFactory/SmolLM-135M-GGUF/resolve/main/SmolLM-135M.Q2_K.gguf";
-    hash = "sha256-DX46drPNJILNba21xfY2tyE0/yPWgOhz43gJdeSYKh4=";
-  };
-in
-pkgs.testers.runNixOSTest {
-  name = "llama-cpp-xmrig-pause";
-
-  nodes.machine =
-    { pkgs, ... }:
-    {
-      environment.systemPackages = [
-        pkgs.python3
-        pkgs.procps
-        pkgs.curl
-        pkgs.llama-cpp
-      ];
-
-      # Mock xmrig as a simple sleep process that can be stopped/started.
-      systemd.services.xmrig = {
-        description = "Mock xmrig miner";
-        serviceConfig = {
-          ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
-          Type = "simple";
-        };
-        wantedBy = [ "multi-user.target" ];
-      };
-    };
-
-  testScript = ''
-    import time
-
-    PORT = 18088
-    MODEL = "${tinyModel}"
-    PYTHON = "${python}/bin/python3"
-    SCRIPT = "${script}"
-
-    # Tuned for test speed while remaining realistic.
-    # POLL_INTERVAL=1 keeps detection latency low.
-    # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short enough
-    # that the full test completes in ~2 minutes.
-    # CPU_THRESHOLD=10 is low because the VM has limited cores and the model
-    # is small — but any active inference still saturates a core.
-    POLL_INTERVAL = "1"
-    GRACE_PERIOD = "5"
-    CPU_THRESHOLD = "10"
-
-    infer_counter = 0
-
-    def send_completion(n_predict=200):
-        """Fire a completion request in the background via a transient systemd unit."""
-        global infer_counter
-        infer_counter += 1
-        name = f"infer-{infer_counter}"
-        machine.succeed(
-            f"systemd-run --unit={name} --property=Type=exec "
-            f"curl -sf -X POST http://127.0.0.1:{PORT}/completion "
-            f"-H 'Content-Type: application/json' "
-            f"-d '{{\"prompt\": \"Once upon a time in a land far away there lived\", \"n_predict\": {n_predict}}}'"
-        )
-        return name
-
-    def wait_inference_done(unit_name, timeout=60):
-        """Wait for a background inference request to finish."""
-        machine.wait_until_fails(
-            f"systemctl is-active {unit_name}",
-            timeout=timeout,
-        )
-
-    start_all()
-    machine.wait_for_unit("multi-user.target")
-    machine.wait_for_unit("xmrig.service")
-
-    with subtest("Start llama-server"):
-        machine.succeed(
-            f"systemd-run --unit=llama-server "
-            # Single inference thread to maximise per-core CPU%, which is
-            # what the monitor measures.  Keeps token generation slow enough
-            # (~30 tok/s) that a 200-token request sustains load for seconds.
-            f"llama-server --model {MODEL} --port {PORT} --ctx-size 512 -t 1 -np 1"
-        )
-        machine.wait_until_succeeds(
-            f"curl -sf http://127.0.0.1:{PORT}/health",
-            timeout=30,
-        )
-        machine.succeed("pgrep -x llama-server")
-
-    with subtest("Start pause monitor"):
-        machine.succeed(
-            f"systemd-run --unit=llama-xmrig-pause "
-            f"--setenv=POLL_INTERVAL={POLL_INTERVAL} "
-            f"--setenv=GRACE_PERIOD={GRACE_PERIOD} "
-            f"--setenv=CPU_THRESHOLD={CPU_THRESHOLD} "
-            f"{PYTHON} {SCRIPT}"
-        )
-        # The monitor needs two consecutive polls to compute a CPU delta.
-        # Wait for baseline to stabilise.
-        time.sleep(3)
-
-    with subtest("xmrig stays running while llama-server is idle"):
-        machine.succeed("systemctl is-active xmrig")
-
-    with subtest("xmrig stopped during prompt processing"):
-        unit = send_completion(n_predict=200)
-        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
-
-    with subtest("xmrig remains stopped during grace period after inference ends"):
-        wait_inference_done(unit)
-        # Inference just finished.  The monitor will need 1-2 polls to detect
-        # idle, then the grace period starts.  Checking 2s after completion
-        # is well within the 5s grace window.
-        time.sleep(2)
-        machine.fail("systemctl is-active xmrig")
-
-    with subtest("xmrig resumes after grace period expires"):
-        # Already idle since previous subtest.  Grace period (5s) plus
-        # detection delay (~2 polls) means xmrig should restart within ~8s.
-        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15)
-
-    with subtest("Sequential prompts do not cause xmrig flapping"):
-        # First prompt — stop xmrig
-        unit1 = send_completion(n_predict=200)
-        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
-        wait_inference_done(unit1)
-
-        # Brief idle gap — shorter than grace period
-        time.sleep(2)
-
-        # Second prompt arrives before grace period expires, resetting it
-        unit2 = send_completion(n_predict=200)
-        time.sleep(3)
-
-        # xmrig must still be stopped
-        machine.fail("systemctl is-active xmrig")
-
-        wait_inference_done(unit2)
-        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15)
-
-    with subtest("xmrig stays stopped during sustained inference"):
-        unit = send_completion(n_predict=500)
-        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
-
-        # Stay busy longer than the grace period to prove continuous
-        # activity keeps xmrig stopped indefinitely.
-        time.sleep(8)
-        machine.fail("systemctl is-active xmrig")
-
-        wait_inference_done(unit)
-        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15)
-  '';
-}
--- a/tests/tests.nix
+++ b/tests/tests.nix
@@ -30,7 +30,9 @@ in

  # llama-cpp tests
  llamaCppAnnotationsTest = handleTest ./llama-cpp-annotations.nix;
-  llamaCppXmrigPauseTest = handleTest ./llama-cpp-xmrig-pause.nix;
+
+  # xmrig auto-pause test
+  xmrigAutoPauseTest = handleTest ./xmrig-auto-pause.nix;
  # ntfy alerts test
  ntfyAlertsTest = handleTest ./ntfy-alerts.nix;

--- a/tests/xmrig-auto-pause.nix
+++ b/tests/xmrig-auto-pause.nix
@@ -0,0 +1,121 @@
+{
+  pkgs,
+  ...
+}:
+let
+  script = ../services/xmrig-auto-pause.py;
+  python = pkgs.python3;
+in
+pkgs.testers.runNixOSTest {
+  name = "xmrig-auto-pause";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      environment.systemPackages = [
+        pkgs.python3
+        pkgs.procps
+      ];
+
+      # Mock xmrig as a nice'd sleep process that can be stopped/started.
+      systemd.services.xmrig = {
+        description = "Mock xmrig miner";
+        serviceConfig = {
+          ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
+          Type = "simple";
+          Nice = 19;
+        };
+        wantedBy = [ "multi-user.target" ];
+      };
+    };
+
+  testScript = ''
+    import time
+
+    PYTHON = "${python}/bin/python3"
+    SCRIPT = "${script}"
+
+    # Tuned for test VMs (1-2 cores).
+    # POLL_INTERVAL=1 keeps detection latency low.
+    # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short
+    # enough that the full test completes in reasonable time.
+    # CPU_THRESHOLD=10 catches a single busy-loop on a 1-2 core VM.
+    POLL_INTERVAL = "1"
+    GRACE_PERIOD = "5"
+    CPU_THRESHOLD = "10"
+
+    def start_cpu_load(name):
+        """Start a non-nice CPU burn as a transient systemd unit."""
+        machine.succeed(
+            f"systemd-run --unit={name} --property=Type=exec "
+            f"bash -c 'while true; do :; done'"
+        )
+
+    def stop_cpu_load(name):
+        machine.succeed(f"systemctl stop {name}")
+
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+    machine.wait_for_unit("xmrig.service")
+
+    with subtest("Start auto-pause monitor"):
+        machine.succeed(
+            f"systemd-run --unit=xmrig-auto-pause "
+            f"--setenv=POLL_INTERVAL={POLL_INTERVAL} "
+            f"--setenv=GRACE_PERIOD={GRACE_PERIOD} "
+            f"--setenv=CPU_THRESHOLD={CPU_THRESHOLD} "
+            f"{PYTHON} {SCRIPT}"
+        )
+        # Monitor needs two consecutive polls to compute a CPU delta.
+        time.sleep(3)
+
+    with subtest("xmrig stays running while system is idle"):
+        machine.succeed("systemctl is-active xmrig")
+
+    with subtest("xmrig stopped when CPU load appears"):
+        start_cpu_load("cpu-load")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+    with subtest("xmrig remains stopped during grace period after load ends"):
+        stop_cpu_load("cpu-load")
+        # Load just stopped. Grace period is 5s. Check at 2s — well within.
+        time.sleep(2)
+        machine.fail("systemctl is-active xmrig")
+
+    with subtest("xmrig resumes after grace period expires"):
+        # Already idle since previous subtest. Grace period (5s) plus
+        # detection delay (~2 polls) means xmrig should restart within ~8s.
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15)
+
+    with subtest("Intermittent load does not cause flapping"):
+        # First load — stop xmrig
+        start_cpu_load("cpu-load-1")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+        stop_cpu_load("cpu-load-1")
+
+        # Brief idle gap — shorter than grace period
+        time.sleep(2)
+
+        # Second load arrives before grace period expires
+        start_cpu_load("cpu-load-2")
+        time.sleep(3)
+
+        # xmrig must still be stopped
+        machine.fail("systemctl is-active xmrig")
+
+        stop_cpu_load("cpu-load-2")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15)
+
+    with subtest("Sustained load keeps xmrig stopped"):
+        start_cpu_load("cpu-load-3")
+        machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
+
+        # Stay busy longer than the grace period to prove continuous
+        # activity keeps xmrig stopped indefinitely.
+        time.sleep(8)
+        machine.fail("systemctl is-active xmrig")
+
+        stop_cpu_load("cpu-load-3")
+        machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15)
+  '';
+}