server-config/services/llama-cpp-xmrig-pause.py

#!/usr/bin/env python3
"""
Pause xmrig while llama-cpp is processing inference requests.

Polls llama-cpp /slots endpoint. When any slot is busy, stops xmrig.
When all slots are idle for GRACE_PERIOD seconds, restarts xmrig.
If llama-cpp is unreachable, does nothing (leaves xmrig in its current state).
"""

import json
import os
import subprocess
import sys
import time
import urllib.request

LLAMA_CPP_URL = os.environ["LLAMA_CPP_URL"].rstrip("/")
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "10"))


def log(msg):
    print(f"[llama-cpp-xmrig-pause] {msg}", file=sys.stderr, flush=True)


def get_slots():
    """Fetch /slots from llama-cpp. Returns list of slot dicts, or None on error."""
    req = urllib.request.Request(f"{LLAMA_CPP_URL}/slots")
    try:
        with urllib.request.urlopen(req, timeout=5) as resp:
            return json.loads(resp.read())
    except (urllib.error.URLError, OSError, json.JSONDecodeError, ValueError) as exc:
        log(f"Cannot reach llama-cpp: {exc}")
        return None


def any_slot_busy(slots):
    return any(s.get("is_processing", False) for s in slots)


def systemctl(action, unit):
    result = subprocess.run(
        ["systemctl", action, unit],
        capture_output=True,
        text=True,
    )
    if result.returncode != 0:
        log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
    return result.returncode == 0


def main():
    xmrig_paused = False
    idle_since = None  # monotonic timestamp when slots first went idle

    log(f"Starting: url={LLAMA_CPP_URL} poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s")

    while True:
        slots = get_slots()

        if slots is None:
            # llama-cpp unreachable — leave xmrig alone, reset idle timer
            idle_since = None
            time.sleep(POLL_INTERVAL)
            continue

        busy = any_slot_busy(slots)

        if busy:
            idle_since = None
            if not xmrig_paused:
                log("Slot busy — stopping xmrig")
                if systemctl("stop", "xmrig"):
                    xmrig_paused = True
        else:
            # All slots idle
            if xmrig_paused:
                now = time.monotonic()
                if idle_since is None:
                    idle_since = now
                elif now - idle_since >= GRACE_PERIOD:
                    log("Slots idle past grace period — starting xmrig")
                    if systemctl("start", "xmrig"):
                        xmrig_paused = False
                    idle_since = None

        time.sleep(POLL_INTERVAL)


if __name__ == "__main__":
    main()