fix xmrig pause

2026-04-03 14:39:20 -04:00
parent d4d01d63f1
commit daf82c16ba
8 changed files with 263 additions and 298 deletions
--- a/services/llama-cpp/default.nix
+++ b/services/llama-cpp/default.nix
@@ -1,6 +1,5 @@
 {
  imports = [
    ./llama-cpp.nix
-    ./llama-cpp-xmrig-pause.nix
  ];
 }
--- a/services/llama-cpp/llama-cpp-xmrig-pause.nix
+++ b/services/llama-cpp/llama-cpp-xmrig-pause.nix
@@ -1,35 +0,0 @@
-{
-  config,
-  lib,
-  pkgs,
-  ...
-}:
-lib.mkIf config.services.llama-cpp.enable {
-  systemd.services.llama-cpp-xmrig-pause = {
-    description = "Pause xmrig while llama-cpp is processing requests";
-    after = [
-      "llama-cpp.service"
-      "xmrig.service"
-    ];
-    wantedBy = [ "multi-user.target" ];
-    serviceConfig = {
-      ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}";
-      Restart = "always";
-      RestartSec = "10s";
-      # Needs /proc access (default) and AF_UNIX for systemctl
-      NoNewPrivileges = true;
-      ProtectHome = true;
-      ProtectSystem = "strict";
-      PrivateTmp = true;
-      RestrictAddressFamilies = [
-        "AF_UNIX" # systemctl talks to systemd over D-Bus unix socket
-      ];
-      MemoryDenyWriteExecute = true;
-    };
-    environment = {
-      POLL_INTERVAL = "3";
-      GRACE_PERIOD = "10";
-      CPU_THRESHOLD = "50";
-    };
-  };
-}
--- a/services/llama-cpp/llama-cpp-xmrig-pause.py
+++ b/services/llama-cpp/llama-cpp-xmrig-pause.py
@@ -1,123 +0,0 @@
-#!/usr/bin/env python3
-"""
-Pause xmrig while llama-cpp is processing inference requests.
-
-Checks if the llama-server process is actively using CPU by reading
-/proc/<pid>/stat. When CPU usage exceeds the threshold, stops xmrig.
-When CPU drops below threshold for GRACE_PERIOD seconds, restarts xmrig.
-"""
-
-import glob
-import os
-import subprocess
-import sys
-import time
-
-POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
-GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "10"))
-# CPU percentage (per-core) above which llama-server is considered busy.
-# Idle llama-server uses ~0% CPU; active inference saturates multiple cores.
-CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "50"))
-
-
-def log(msg):
-    print(f"[llama-cpp-xmrig-pause] {msg}", file=sys.stderr, flush=True)
-
-
-def find_llama_pid():
-    """Find the PID of the llama-server process."""
-    for path in glob.glob("/proc/[0-9]*/comm"):
-        try:
-            with open(path) as f:
-                if f.read().strip() == "llama-server":
-                    return int(path.split("/")[2])
-        except (OSError, ValueError):
-            continue
-    return None
-
-
-def get_cpu_times(pid):
-    """Read utime + stime from /proc/<pid>/stat. Returns total ticks or None."""
-    try:
-        with open(f"/proc/{pid}/stat") as f:
-            fields = f.read().split(")")[-1].split()
-            # fields[11] = utime, fields[12] = stime (0-indexed after ')')
-            return int(fields[11]) + int(fields[12])
-    except (OSError, IndexError, ValueError):
-        return None
-
-
-def systemctl(action, unit):
-    result = subprocess.run(
-        ["systemctl", action, unit],
-        capture_output=True,
-        text=True,
-    )
-    if result.returncode != 0:
-        log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
-    return result.returncode == 0
-
-
-def main():
-    xmrig_paused = False
-    idle_since = None
-    prev_ticks = None
-    prev_time = None
-    hz = os.sysconf("SC_CLK_TCK")
-
-    log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%")
-
-    while True:
-        pid = find_llama_pid()
-        if pid is None:
-            # llama-server not running
-            idle_since = None
-            prev_ticks = None
-            prev_time = None
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        ticks = get_cpu_times(pid)
-        now = time.monotonic()
-
-        if ticks is None or prev_ticks is None or prev_time is None:
-            prev_ticks = ticks
-            prev_time = now
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        dt = now - prev_time
-        if dt <= 0:
-            prev_ticks = ticks
-            prev_time = now
-            time.sleep(POLL_INTERVAL)
-            continue
-
-        # CPU% = (delta_ticks / hz) / delta_seconds * 100
-        cpu_pct = ((ticks - prev_ticks) / hz) / dt * 100
-        prev_ticks = ticks
-        prev_time = now
-
-        busy = cpu_pct > CPU_THRESHOLD
-
-        if busy:
-            idle_since = None
-            if not xmrig_paused:
-                log(f"llama-server busy ({cpu_pct:.0f}% CPU) — stopping xmrig")
-                if systemctl("stop", "xmrig"):
-                    xmrig_paused = True
-        else:
-            if xmrig_paused:
-                if idle_since is None:
-                    idle_since = now
-                elif now - idle_since >= GRACE_PERIOD:
-                    log(f"llama-server idle ({cpu_pct:.0f}% CPU) past grace period — starting xmrig")
-                    if systemctl("start", "xmrig"):
-                        xmrig_paused = False
-                    idle_since = None
-
-        time.sleep(POLL_INTERVAL)
-
-
-if __name__ == "__main__":
-    main()