fix xmrig pause

2026-04-03 14:39:20 -04:00
parent d4d01d63f1
commit daf82c16ba
8 changed files with 263 additions and 298 deletions
--- a/services/llama-cpp/default.nix
+++ b/services/llama-cpp/default.nix
@@ -1,6 +1,5 @@
 {
  imports = [
    ./llama-cpp.nix
-    ./llama-cpp-xmrig-pause.nix
  ];
 }
--- a/services/llama-cpp/llama-cpp-xmrig-pause.py
+++ b/services/llama-cpp/llama-cpp-xmrig-pause.py
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-"""
-Pause xmrig while llama-cpp is processing inference requests.
-
-Checks if the llama-server process is actively using CPU by reading
-/proc/<pid>/stat. When CPU usage exceeds the threshold, stops xmrig.
-When CPU drops below threshold for GRACE_PERIOD seconds, restarts xmrig.
-"""
-
-import glob
-import os
-import subprocess
-import sys
-import time
-
-POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
-GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "10"))
-# CPU percentage (per-core) above which llama-server is considered busy.
-# Idle llama-server uses ~0% CPU; active inference saturates multiple cores.
-CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "50"))
-
-
-def log(msg):
-    print(f"[llama-cpp-xmrig-pause] {msg}", file=sys.stderr, flush=True)
-
-
-def find_llama_pid():
-    """Find the PID of the llama-server process."""
-    for path in glob.glob("/proc/[0-9]*/comm"):
-        try:
-            with open(path) as f:
-                if f.read().strip() == "llama-server":
-                    return int(path.split("/")[2])
-        except (OSError, ValueError):
-            continue
-    return None
-
-
-def get_cpu_times(pid):
-    """Read utime + stime from /proc/<pid>/stat. Returns total ticks or None."""
-    try:
-        with open(f"/proc/{pid}/stat") as f:
-            fields = f.read().split(")")[-1].split()
-            # fields[11] = utime, fields[12] = stime (0-indexed after ')')
-            return int(fields[11]) + int(fields[12])
-    except (OSError, IndexError, ValueError):
-        return None
-
-
-def systemctl(action, unit):
-    result = subprocess.run(
-        ["systemctl", action, unit],
-        capture_output=True,
-        text=True,
-    )
-    if result.returncode != 0:
-        log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
-    return result.returncode == 0
-
-
-def main():
-    xmrig_paused = False
-    idle_since = None
-    prev_ticks = None
-    prev_time = None
-    hz = os.sysconf("SC_CLK_TCK")
-
-    log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%")
-
-    while True:
-        pid = find_llama_pid()
-        if pid is None:
-            # llama-server not running
-            idle_since = None
-            prev_ticks = None
-            prev_time = None
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        ticks = get_cpu_times(pid)
-        now = time.monotonic()
-
-        if ticks is None or prev_ticks is None or prev_time is None:
-            prev_ticks = ticks
-            prev_time = now
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        dt = now - prev_time
-        if dt <= 0:
-            prev_ticks = ticks
-            prev_time = now
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        # CPU% = (delta_ticks / hz) / delta_seconds * 100
-        cpu_pct = ((ticks - prev_ticks) / hz) / dt * 100
-        prev_ticks = ticks
-        prev_time = now
-
-        busy = cpu_pct > CPU_THRESHOLD
-
-        if busy:
-            idle_since = None
-            if not xmrig_paused:
-                log(f"llama-server busy ({cpu_pct:.0f}% CPU) — stopping xmrig")
-                if systemctl("stop", "xmrig"):
-                    xmrig_paused = True
-        else:
-            if xmrig_paused:
-                if idle_since is None:
-                    idle_since = now
-                elif now - idle_since >= GRACE_PERIOD:
-                    log(f"llama-server idle ({cpu_pct:.0f}% CPU) past grace period — starting xmrig")
-                    if systemctl("start", "xmrig"):
-                        xmrig_paused = False
-                    idle_since = None
-
-        time.sleep(POLL_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()
--- a/services/llama-cpp/llama-cpp-xmrig-pause.nix
+++ b/services/llama-cpp/llama-cpp-xmrig-pause.nix
@@ -4,19 +4,15 @@
  pkgs,
  ...
 }:
-lib.mkIf config.services.llama-cpp.enable {
-  systemd.services.llama-cpp-xmrig-pause = {
-    description = "Pause xmrig while llama-cpp is processing requests";
-    after = [
-      "llama-cpp.service"
-      "xmrig.service"
-    ];
+lib.mkIf config.services.xmrig.enable {
+  systemd.services.xmrig-auto-pause = {
+    description = "Auto-pause xmrig when other services need CPU";
+    after = [ "xmrig.service" ];
    wantedBy = [ "multi-user.target" ];
    serviceConfig = {
-      ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}";
+      ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}";
      Restart = "always";
      RestartSec = "10s";
-      # Needs /proc access (default) and AF_UNIX for systemctl
      NoNewPrivileges = true;
      ProtectHome = true;
      ProtectSystem = "strict";
@@ -28,8 +24,8 @@ lib.mkIf config.services.llama-cpp.enable {
    };
    environment = {
      POLL_INTERVAL = "3";
-      GRACE_PERIOD = "10";
-      CPU_THRESHOLD = "50";
+      GRACE_PERIOD = "15";
+      CPU_THRESHOLD = "5";
    };
  };
 }
--- a/services/xmrig-auto-pause.py
+++ b/services/xmrig-auto-pause.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+"""
+Auto-pause xmrig when other services need CPU.
+
+Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19,
+its CPU time lands in the 'nice' column and is excluded from the metric.
+When real workload (user + system + irq + softirq) exceeds the threshold,
+stops xmrig. When it drops below threshold for GRACE_PERIOD seconds,
+restarts xmrig.
+
+This replaces per-service pause scripts with a single general-purpose
+monitor that handles any CPU-intensive workload (gitea workers, llama-cpp
+inference, etc.) without needing to know about specific processes.
+
+Why scheduler priority alone isn't enough:
+  Nice=19 / SCHED_IDLE only affects which thread gets the next time slice.
+  RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes
+  the shared 32MB L3 cache, and its memory access pattern saturates DRAM
+  bandwidth. Other services run slower even though they aren't denied CPU
+  time. The only fix is to stop xmrig entirely when real work is happening.
+"""
+
+import os
+import subprocess
+import sys
+import time
+
+POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
+GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15"))
+# Percentage of total CPU ticks that non-nice processes must use to trigger
+# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total.
+# Default 5% catches anything using more than ~60% of a single core.
+CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "5"))
+
+
+def log(msg):
+    print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True)
+
+
+def read_cpu_ticks():
+    """Read CPU tick counters from /proc/stat.
+
+    Returns (total_ticks, real_work_ticks) where real_work excludes the
+    'nice' column (xmrig) and idle/iowait.
+    """
+    with open("/proc/stat") as f:
+        parts = f.readline().split()
+    # cpu  user nice system idle iowait irq softirq steal
+    user, nice, system, idle, iowait, irq, softirq, steal = (
+        int(x) for x in parts[1:9]
+    )
+    total = user + nice + system + idle + iowait + irq + softirq + steal
+    real_work = user + system + irq + softirq
+    return total, real_work
+
+
+def is_active(unit):
+    """Check if a systemd unit is currently active."""
+    result = subprocess.run(
+        ["systemctl", "is-active", "--quiet", unit],
+        capture_output=True,
+    )
+    return result.returncode == 0
+
+
+def systemctl(action, unit):
+    result = subprocess.run(
+        ["systemctl", action, unit],
+        capture_output=True,
+        text=True,
+    )
+    if result.returncode != 0:
+        log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
+    return result.returncode == 0
+
+
+def main():
+    paused_by_us = False
+    idle_since = None
+    prev_total = None
+    prev_work = None
+
+    log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%")
+
+    while True:
+        total, work = read_cpu_ticks()
+
+        if prev_total is None:
+            prev_total = total
+            prev_work = work
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        dt = total - prev_total
+        if dt <= 0:
+            prev_total = total
+            prev_work = work
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        real_work_pct = ((work - prev_work) / dt) * 100
+        prev_total = total
+        prev_work = work
+
+        busy = real_work_pct > CPU_THRESHOLD
+
+        if busy:
+            idle_since = None
+            if not paused_by_us:
+                # Only claim ownership if xmrig is actually running.
+                # If something else stopped it (e.g. UPS battery hook),
+                # don't interfere — we'd wrongly restart it later.
+                if is_active("xmrig.service"):
+                    log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig")
+                    if systemctl("stop", "xmrig.service"):
+                        paused_by_us = True
+        else:
+            if paused_by_us:
+                if idle_since is None:
+                    idle_since = time.monotonic()
+                elif time.monotonic() - idle_since >= GRACE_PERIOD:
+                    log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig")
+                    if systemctl("start", "xmrig.service"):
+                        paused_by_us = False
+                    idle_since = None
+
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()