From daf82c16bacb8805eed3c0f8f53e58e4cddc9fc0 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Fri, 3 Apr 2026 14:39:20 -0400 Subject: [PATCH] fix xmrig pause --- configuration.nix | 1 + services/llama-cpp/default.nix | 1 - services/llama-cpp/llama-cpp-xmrig-pause.py | 123 ------------- ...p-xmrig-pause.nix => xmrig-auto-pause.nix} | 18 +- services/xmrig-auto-pause.py | 131 ++++++++++++++ tests/llama-cpp-xmrig-pause.nix | 162 ------------------ tests/tests.nix | 4 +- tests/xmrig-auto-pause.nix | 121 +++++++++++++ 8 files changed, 263 insertions(+), 298 deletions(-) delete mode 100644 services/llama-cpp/llama-cpp-xmrig-pause.py rename services/{llama-cpp/llama-cpp-xmrig-pause.nix => xmrig-auto-pause.nix} (52%) create mode 100644 services/xmrig-auto-pause.py delete mode 100644 tests/llama-cpp-xmrig-pause.nix create mode 100644 tests/xmrig-auto-pause.nix diff --git a/configuration.nix b/configuration.nix index b14678c..c2b99c3 100644 --- a/configuration.nix +++ b/configuration.nix @@ -63,6 +63,7 @@ ./services/monero.nix ./services/p2pool.nix ./services/xmrig.nix + ./services/xmrig-auto-pause.nix ./services/graphing-calculator.nix diff --git a/services/llama-cpp/default.nix b/services/llama-cpp/default.nix index cce8a0a..0cd110d 100644 --- a/services/llama-cpp/default.nix +++ b/services/llama-cpp/default.nix @@ -1,6 +1,5 @@ { imports = [ ./llama-cpp.nix - ./llama-cpp-xmrig-pause.nix ]; } diff --git a/services/llama-cpp/llama-cpp-xmrig-pause.py b/services/llama-cpp/llama-cpp-xmrig-pause.py deleted file mode 100644 index 9426be2..0000000 --- a/services/llama-cpp/llama-cpp-xmrig-pause.py +++ /dev/null @@ -1,123 +0,0 @@ -#!/usr/bin/env python3 -""" -Pause xmrig while llama-cpp is processing inference requests. - -Checks if the llama-server process is actively using CPU by reading -/proc//stat. When CPU usage exceeds the threshold, stops xmrig. -When CPU drops below threshold for GRACE_PERIOD seconds, restarts xmrig. -""" - -import glob -import os -import subprocess -import sys -import time - -POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3")) -GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "10")) -# CPU percentage (per-core) above which llama-server is considered busy. -# Idle llama-server uses ~0% CPU; active inference saturates multiple cores. -CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "50")) - - -def log(msg): - print(f"[llama-cpp-xmrig-pause] {msg}", file=sys.stderr, flush=True) - - -def find_llama_pid(): - """Find the PID of the llama-server process.""" - for path in glob.glob("/proc/[0-9]*/comm"): - try: - with open(path) as f: - if f.read().strip() == "llama-server": - return int(path.split("/")[2]) - except (OSError, ValueError): - continue - return None - - -def get_cpu_times(pid): - """Read utime + stime from /proc//stat. Returns total ticks or None.""" - try: - with open(f"/proc/{pid}/stat") as f: - fields = f.read().split(")")[-1].split() - # fields[11] = utime, fields[12] = stime (0-indexed after ')') - return int(fields[11]) + int(fields[12]) - except (OSError, IndexError, ValueError): - return None - - -def systemctl(action, unit): - result = subprocess.run( - ["systemctl", action, unit], - capture_output=True, - text=True, - ) - if result.returncode != 0: - log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}") - return result.returncode == 0 - - -def main(): - xmrig_paused = False - idle_since = None - prev_ticks = None - prev_time = None - hz = os.sysconf("SC_CLK_TCK") - - log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%") - - while True: - pid = find_llama_pid() - if pid is None: - # llama-server not running - idle_since = None - prev_ticks = None - prev_time = None - time.sleep(POLL_INTERVAL) - continue - - ticks = get_cpu_times(pid) - now = time.monotonic() - - if ticks is None or prev_ticks is None or prev_time is None: - prev_ticks = ticks - prev_time = now - time.sleep(POLL_INTERVAL) - continue - - dt = now - prev_time - if dt <= 0: - prev_ticks = ticks - prev_time = now - time.sleep(POLL_INTERVAL) - continue - - # CPU% = (delta_ticks / hz) / delta_seconds * 100 - cpu_pct = ((ticks - prev_ticks) / hz) / dt * 100 - prev_ticks = ticks - prev_time = now - - busy = cpu_pct > CPU_THRESHOLD - - if busy: - idle_since = None - if not xmrig_paused: - log(f"llama-server busy ({cpu_pct:.0f}% CPU) — stopping xmrig") - if systemctl("stop", "xmrig"): - xmrig_paused = True - else: - if xmrig_paused: - if idle_since is None: - idle_since = now - elif now - idle_since >= GRACE_PERIOD: - log(f"llama-server idle ({cpu_pct:.0f}% CPU) past grace period — starting xmrig") - if systemctl("start", "xmrig"): - xmrig_paused = False - idle_since = None - - time.sleep(POLL_INTERVAL) - - -if __name__ == "__main__": - main() diff --git a/services/llama-cpp/llama-cpp-xmrig-pause.nix b/services/xmrig-auto-pause.nix similarity index 52% rename from services/llama-cpp/llama-cpp-xmrig-pause.nix rename to services/xmrig-auto-pause.nix index c694bad..12dc475 100644 --- a/services/llama-cpp/llama-cpp-xmrig-pause.nix +++ b/services/xmrig-auto-pause.nix @@ -4,19 +4,15 @@ pkgs, ... }: -lib.mkIf config.services.llama-cpp.enable { - systemd.services.llama-cpp-xmrig-pause = { - description = "Pause xmrig while llama-cpp is processing requests"; - after = [ - "llama-cpp.service" - "xmrig.service" - ]; +lib.mkIf config.services.xmrig.enable { + systemd.services.xmrig-auto-pause = { + description = "Auto-pause xmrig when other services need CPU"; + after = [ "xmrig.service" ]; wantedBy = [ "multi-user.target" ]; serviceConfig = { - ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}"; + ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}"; Restart = "always"; RestartSec = "10s"; - # Needs /proc access (default) and AF_UNIX for systemctl NoNewPrivileges = true; ProtectHome = true; ProtectSystem = "strict"; @@ -28,8 +24,8 @@ lib.mkIf config.services.llama-cpp.enable { }; environment = { POLL_INTERVAL = "3"; - GRACE_PERIOD = "10"; - CPU_THRESHOLD = "50"; + GRACE_PERIOD = "15"; + CPU_THRESHOLD = "5"; }; }; } diff --git a/services/xmrig-auto-pause.py b/services/xmrig-auto-pause.py new file mode 100644 index 0000000..2aafd7e --- /dev/null +++ b/services/xmrig-auto-pause.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Auto-pause xmrig when other services need CPU. + +Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19, +its CPU time lands in the 'nice' column and is excluded from the metric. +When real workload (user + system + irq + softirq) exceeds the threshold, +stops xmrig. When it drops below threshold for GRACE_PERIOD seconds, +restarts xmrig. + +This replaces per-service pause scripts with a single general-purpose +monitor that handles any CPU-intensive workload (gitea workers, llama-cpp +inference, etc.) without needing to know about specific processes. + +Why scheduler priority alone isn't enough: + Nice=19 / SCHED_IDLE only affects which thread gets the next time slice. + RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes + the shared 32MB L3 cache, and its memory access pattern saturates DRAM + bandwidth. Other services run slower even though they aren't denied CPU + time. The only fix is to stop xmrig entirely when real work is happening. +""" + +import os +import subprocess +import sys +import time + +POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3")) +GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15")) +# Percentage of total CPU ticks that non-nice processes must use to trigger +# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total. +# Default 5% catches anything using more than ~60% of a single core. +CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "5")) + + +def log(msg): + print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True) + + +def read_cpu_ticks(): + """Read CPU tick counters from /proc/stat. + + Returns (total_ticks, real_work_ticks) where real_work excludes the + 'nice' column (xmrig) and idle/iowait. + """ + with open("/proc/stat") as f: + parts = f.readline().split() + # cpu user nice system idle iowait irq softirq steal + user, nice, system, idle, iowait, irq, softirq, steal = ( + int(x) for x in parts[1:9] + ) + total = user + nice + system + idle + iowait + irq + softirq + steal + real_work = user + system + irq + softirq + return total, real_work + + +def is_active(unit): + """Check if a systemd unit is currently active.""" + result = subprocess.run( + ["systemctl", "is-active", "--quiet", unit], + capture_output=True, + ) + return result.returncode == 0 + + +def systemctl(action, unit): + result = subprocess.run( + ["systemctl", action, unit], + capture_output=True, + text=True, + ) + if result.returncode != 0: + log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}") + return result.returncode == 0 + + +def main(): + paused_by_us = False + idle_since = None + prev_total = None + prev_work = None + + log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%") + + while True: + total, work = read_cpu_ticks() + + if prev_total is None: + prev_total = total + prev_work = work + time.sleep(POLL_INTERVAL) + continue + + dt = total - prev_total + if dt <= 0: + prev_total = total + prev_work = work + time.sleep(POLL_INTERVAL) + continue + + real_work_pct = ((work - prev_work) / dt) * 100 + prev_total = total + prev_work = work + + busy = real_work_pct > CPU_THRESHOLD + + if busy: + idle_since = None + if not paused_by_us: + # Only claim ownership if xmrig is actually running. + # If something else stopped it (e.g. UPS battery hook), + # don't interfere — we'd wrongly restart it later. + if is_active("xmrig.service"): + log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig") + if systemctl("stop", "xmrig.service"): + paused_by_us = True + else: + if paused_by_us: + if idle_since is None: + idle_since = time.monotonic() + elif time.monotonic() - idle_since >= GRACE_PERIOD: + log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig") + if systemctl("start", "xmrig.service"): + paused_by_us = False + idle_since = None + + time.sleep(POLL_INTERVAL) + + +if __name__ == "__main__": + main() diff --git a/tests/llama-cpp-xmrig-pause.nix b/tests/llama-cpp-xmrig-pause.nix deleted file mode 100644 index 78a272c..0000000 --- a/tests/llama-cpp-xmrig-pause.nix +++ /dev/null @@ -1,162 +0,0 @@ -{ - pkgs, - ... -}: -let - script = ../services/llama-cpp/llama-cpp-xmrig-pause.py; - python = pkgs.python3; - - # SmolLM-135M Q2_K: 85MB, modern GGUFv3, generates ~30 tok/s on one CPU - # thread — slow enough that a 200-token request keeps the process busy for - # several seconds, fast enough that tests don't crawl. - tinyModel = pkgs.fetchurl { - url = "https://huggingface.co/QuantFactory/SmolLM-135M-GGUF/resolve/main/SmolLM-135M.Q2_K.gguf"; - hash = "sha256-DX46drPNJILNba21xfY2tyE0/yPWgOhz43gJdeSYKh4="; - }; -in -pkgs.testers.runNixOSTest { - name = "llama-cpp-xmrig-pause"; - - nodes.machine = - { pkgs, ... }: - { - environment.systemPackages = [ - pkgs.python3 - pkgs.procps - pkgs.curl - pkgs.llama-cpp - ]; - - # Mock xmrig as a simple sleep process that can be stopped/started. - systemd.services.xmrig = { - description = "Mock xmrig miner"; - serviceConfig = { - ExecStart = "${pkgs.coreutils}/bin/sleep infinity"; - Type = "simple"; - }; - wantedBy = [ "multi-user.target" ]; - }; - }; - - testScript = '' - import time - - PORT = 18088 - MODEL = "${tinyModel}" - PYTHON = "${python}/bin/python3" - SCRIPT = "${script}" - - # Tuned for test speed while remaining realistic. - # POLL_INTERVAL=1 keeps detection latency low. - # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short enough - # that the full test completes in ~2 minutes. - # CPU_THRESHOLD=10 is low because the VM has limited cores and the model - # is small — but any active inference still saturates a core. - POLL_INTERVAL = "1" - GRACE_PERIOD = "5" - CPU_THRESHOLD = "10" - - infer_counter = 0 - - def send_completion(n_predict=200): - """Fire a completion request in the background via a transient systemd unit.""" - global infer_counter - infer_counter += 1 - name = f"infer-{infer_counter}" - machine.succeed( - f"systemd-run --unit={name} --property=Type=exec " - f"curl -sf -X POST http://127.0.0.1:{PORT}/completion " - f"-H 'Content-Type: application/json' " - f"-d '{{\"prompt\": \"Once upon a time in a land far away there lived\", \"n_predict\": {n_predict}}}'" - ) - return name - - def wait_inference_done(unit_name, timeout=60): - """Wait for a background inference request to finish.""" - machine.wait_until_fails( - f"systemctl is-active {unit_name}", - timeout=timeout, - ) - - start_all() - machine.wait_for_unit("multi-user.target") - machine.wait_for_unit("xmrig.service") - - with subtest("Start llama-server"): - machine.succeed( - f"systemd-run --unit=llama-server " - # Single inference thread to maximise per-core CPU%, which is - # what the monitor measures. Keeps token generation slow enough - # (~30 tok/s) that a 200-token request sustains load for seconds. - f"llama-server --model {MODEL} --port {PORT} --ctx-size 512 -t 1 -np 1" - ) - machine.wait_until_succeeds( - f"curl -sf http://127.0.0.1:{PORT}/health", - timeout=30, - ) - machine.succeed("pgrep -x llama-server") - - with subtest("Start pause monitor"): - machine.succeed( - f"systemd-run --unit=llama-xmrig-pause " - f"--setenv=POLL_INTERVAL={POLL_INTERVAL} " - f"--setenv=GRACE_PERIOD={GRACE_PERIOD} " - f"--setenv=CPU_THRESHOLD={CPU_THRESHOLD} " - f"{PYTHON} {SCRIPT}" - ) - # The monitor needs two consecutive polls to compute a CPU delta. - # Wait for baseline to stabilise. - time.sleep(3) - - with subtest("xmrig stays running while llama-server is idle"): - machine.succeed("systemctl is-active xmrig") - - with subtest("xmrig stopped during prompt processing"): - unit = send_completion(n_predict=200) - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) - - with subtest("xmrig remains stopped during grace period after inference ends"): - wait_inference_done(unit) - # Inference just finished. The monitor will need 1-2 polls to detect - # idle, then the grace period starts. Checking 2s after completion - # is well within the 5s grace window. - time.sleep(2) - machine.fail("systemctl is-active xmrig") - - with subtest("xmrig resumes after grace period expires"): - # Already idle since previous subtest. Grace period (5s) plus - # detection delay (~2 polls) means xmrig should restart within ~8s. - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15) - - with subtest("Sequential prompts do not cause xmrig flapping"): - # First prompt — stop xmrig - unit1 = send_completion(n_predict=200) - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) - wait_inference_done(unit1) - - # Brief idle gap — shorter than grace period - time.sleep(2) - - # Second prompt arrives before grace period expires, resetting it - unit2 = send_completion(n_predict=200) - time.sleep(3) - - # xmrig must still be stopped - machine.fail("systemctl is-active xmrig") - - wait_inference_done(unit2) - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15) - - with subtest("xmrig stays stopped during sustained inference"): - unit = send_completion(n_predict=500) - machine.wait_until_fails("systemctl is-active xmrig", timeout=20) - - # Stay busy longer than the grace period to prove continuous - # activity keeps xmrig stopped indefinitely. - time.sleep(8) - machine.fail("systemctl is-active xmrig") - - wait_inference_done(unit) - machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15) - ''; -} diff --git a/tests/tests.nix b/tests/tests.nix index 2dcec9a..c5e6cd4 100644 --- a/tests/tests.nix +++ b/tests/tests.nix @@ -30,7 +30,9 @@ in # llama-cpp tests llamaCppAnnotationsTest = handleTest ./llama-cpp-annotations.nix; - llamaCppXmrigPauseTest = handleTest ./llama-cpp-xmrig-pause.nix; + + # xmrig auto-pause test + xmrigAutoPauseTest = handleTest ./xmrig-auto-pause.nix; # ntfy alerts test ntfyAlertsTest = handleTest ./ntfy-alerts.nix; diff --git a/tests/xmrig-auto-pause.nix b/tests/xmrig-auto-pause.nix new file mode 100644 index 0000000..6f5daa6 --- /dev/null +++ b/tests/xmrig-auto-pause.nix @@ -0,0 +1,121 @@ +{ + pkgs, + ... +}: +let + script = ../services/xmrig-auto-pause.py; + python = pkgs.python3; +in +pkgs.testers.runNixOSTest { + name = "xmrig-auto-pause"; + + nodes.machine = + { pkgs, ... }: + { + environment.systemPackages = [ + pkgs.python3 + pkgs.procps + ]; + + # Mock xmrig as a nice'd sleep process that can be stopped/started. + systemd.services.xmrig = { + description = "Mock xmrig miner"; + serviceConfig = { + ExecStart = "${pkgs.coreutils}/bin/sleep infinity"; + Type = "simple"; + Nice = 19; + }; + wantedBy = [ "multi-user.target" ]; + }; + }; + + testScript = '' + import time + + PYTHON = "${python}/bin/python3" + SCRIPT = "${script}" + + # Tuned for test VMs (1-2 cores). + # POLL_INTERVAL=1 keeps detection latency low. + # GRACE_PERIOD=5 is long enough to verify "stays stopped" but short + # enough that the full test completes in reasonable time. + # CPU_THRESHOLD=10 catches a single busy-loop on a 1-2 core VM. + POLL_INTERVAL = "1" + GRACE_PERIOD = "5" + CPU_THRESHOLD = "10" + + def start_cpu_load(name): + """Start a non-nice CPU burn as a transient systemd unit.""" + machine.succeed( + f"systemd-run --unit={name} --property=Type=exec " + f"bash -c 'while true; do :; done'" + ) + + def stop_cpu_load(name): + machine.succeed(f"systemctl stop {name}") + + start_all() + machine.wait_for_unit("multi-user.target") + machine.wait_for_unit("xmrig.service") + + with subtest("Start auto-pause monitor"): + machine.succeed( + f"systemd-run --unit=xmrig-auto-pause " + f"--setenv=POLL_INTERVAL={POLL_INTERVAL} " + f"--setenv=GRACE_PERIOD={GRACE_PERIOD} " + f"--setenv=CPU_THRESHOLD={CPU_THRESHOLD} " + f"{PYTHON} {SCRIPT}" + ) + # Monitor needs two consecutive polls to compute a CPU delta. + time.sleep(3) + + with subtest("xmrig stays running while system is idle"): + machine.succeed("systemctl is-active xmrig") + + with subtest("xmrig stopped when CPU load appears"): + start_cpu_load("cpu-load") + machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + + with subtest("xmrig remains stopped during grace period after load ends"): + stop_cpu_load("cpu-load") + # Load just stopped. Grace period is 5s. Check at 2s — well within. + time.sleep(2) + machine.fail("systemctl is-active xmrig") + + with subtest("xmrig resumes after grace period expires"): + # Already idle since previous subtest. Grace period (5s) plus + # detection delay (~2 polls) means xmrig should restart within ~8s. + machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15) + + with subtest("Intermittent load does not cause flapping"): + # First load — stop xmrig + start_cpu_load("cpu-load-1") + machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + stop_cpu_load("cpu-load-1") + + # Brief idle gap — shorter than grace period + time.sleep(2) + + # Second load arrives before grace period expires + start_cpu_load("cpu-load-2") + time.sleep(3) + + # xmrig must still be stopped + machine.fail("systemctl is-active xmrig") + + stop_cpu_load("cpu-load-2") + machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15) + + with subtest("Sustained load keeps xmrig stopped"): + start_cpu_load("cpu-load-3") + machine.wait_until_fails("systemctl is-active xmrig", timeout=20) + + # Stay busy longer than the grace period to prove continuous + # activity keeps xmrig stopped indefinitely. + time.sleep(8) + machine.fail("systemctl is-active xmrig") + + stop_cpu_load("cpu-load-3") + machine.wait_until_succeeds("systemctl is-active xmrig", timeout=15) + ''; +}