llama-cpp: pause xmrig during active inference requests
Add sidecar service that polls llama-cpp /slots endpoint every 3s. When any slot is processing, stops xmrig. Restarts xmrig after 10s grace period when all slots are idle. Handles unreachable llama-cpp gracefully (leaves xmrig untouched).
This commit is contained in:
35
services/llama-cpp-xmrig-pause.nix
Normal file
35
services/llama-cpp-xmrig-pause.nix
Normal file
@@ -0,0 +1,35 @@
|
||||
{
|
||||
pkgs,
|
||||
service_configs,
|
||||
...
|
||||
}:
|
||||
{
|
||||
systemd.services.llama-cpp-xmrig-pause = {
|
||||
description = "Pause xmrig while llama-cpp is processing requests";
|
||||
after = [
|
||||
"network.target"
|
||||
"llama-cpp.service"
|
||||
"xmrig.service"
|
||||
];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}";
|
||||
Restart = "always";
|
||||
RestartSec = "10s";
|
||||
NoNewPrivileges = true;
|
||||
ProtectHome = true;
|
||||
ProtectSystem = "strict";
|
||||
PrivateTmp = true;
|
||||
RestrictAddressFamilies = [
|
||||
"AF_INET"
|
||||
"AF_INET6"
|
||||
];
|
||||
MemoryDenyWriteExecute = true;
|
||||
};
|
||||
environment = {
|
||||
LLAMA_CPP_URL = "http://127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}";
|
||||
POLL_INTERVAL = "3";
|
||||
GRACE_PERIOD = "10";
|
||||
};
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user