llama-cpp: pause xmrig during active inference requests

Add sidecar service that polls llama-cpp /slots endpoint every 3s. When any slot is processing, stops xmrig. Restarts xmrig after 10s grace period when all slots are idle. Handles unreachable llama-cpp gracefully (leaves xmrig untouched).
2026-04-02 17:43:07 -04:00
parent 50453cf0b5
commit df15be01ea
3 changed files with 128 additions and 0 deletions
--- a/services/llama-cpp-xmrig-pause.nix
+++ b/services/llama-cpp-xmrig-pause.nix
@@ -0,0 +1,35 @@
+{
+  pkgs,
+  service_configs,
+  ...
+}:
+{
+  systemd.services.llama-cpp-xmrig-pause = {
+    description = "Pause xmrig while llama-cpp is processing requests";
+    after = [
+      "network.target"
+      "llama-cpp.service"
+      "xmrig.service"
+    ];
+    wantedBy = [ "multi-user.target" ];
+    serviceConfig = {
+      ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}";
+      Restart = "always";
+      RestartSec = "10s";
+      NoNewPrivileges = true;
+      ProtectHome = true;
+      ProtectSystem = "strict";
+      PrivateTmp = true;
+      RestrictAddressFamilies = [
+        "AF_INET"
+        "AF_INET6"
+      ];
+      MemoryDenyWriteExecute = true;
+    };
+    environment = {
+      LLAMA_CPP_URL = "http://127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}";
+      POLL_INTERVAL = "3";
+      GRACE_PERIOD = "10";
+    };
+  };
+}