better organize related monero and matrix services
All checks were successful
Build and Deploy / deploy (push) Successful in 2m48s

This commit is contained in:
2026-04-04 14:32:26 -04:00
parent 8ea96c8b8e
commit 324a9123db
12 changed files with 19 additions and 9 deletions

View File

@@ -0,0 +1,8 @@
{
imports = [
./monero.nix
./p2pool.nix
./xmrig.nix
./xmrig-auto-pause.nix
];
}

View File

@@ -0,0 +1,37 @@
{
service_configs,
lib,
...
}:
{
imports = [
(lib.serviceMountWithZpool "monero" service_configs.zpool_ssds [
service_configs.monero.dataDir
])
(lib.serviceFilePerms "monero" [
"Z ${service_configs.monero.dataDir} 0700 monero monero"
])
];
services.monero = {
enable = true;
dataDir = service_configs.monero.dataDir;
rpc = {
address = "0.0.0.0";
port = service_configs.ports.public.monero_rpc.port;
restricted = true;
};
extraConfig = ''
p2p-bind-port=${builtins.toString service_configs.ports.public.monero.port}
zmq-pub=tcp://127.0.0.1:${builtins.toString service_configs.ports.private.monero_zmq.port}
db-sync-mode=fast:async:1000000000bytes
public-node=1
confirm-external-bind=1
'';
};
networking.firewall.allowedTCPPorts = [
service_configs.ports.public.monero.port
service_configs.ports.public.monero_rpc.port
];
}

View File

@@ -0,0 +1,45 @@
{
config,
service_configs,
lib,
...
}:
{
imports = [
(lib.serviceMountWithZpool "p2pool" service_configs.zpool_ssds [
service_configs.p2pool.dataDir
])
(lib.serviceFilePerms "p2pool" [
"Z ${service_configs.p2pool.dataDir} 0700 p2pool p2pool"
])
];
services.p2pool = {
enable = true;
dataDir = service_configs.p2pool.dataDir;
walletAddress = service_configs.p2pool.walletAddress;
sidechain = "nano";
host = "127.0.0.1";
rpcPort = service_configs.ports.public.monero_rpc.port;
zmqPort = service_configs.ports.private.monero_zmq.port;
extraArgs = [
" --stratum 0.0.0.0:${builtins.toString service_configs.ports.private.p2pool_stratum.port}"
];
};
# Ensure p2pool starts after monero is ready
systemd.services.p2pool = {
after = [ "monero.service" ];
wants = [ "monero.service" ];
};
# Stop p2pool on UPS battery to conserve power
services.apcupsd.hooks = lib.mkIf config.services.apcupsd.enable {
onbattery = "systemctl stop p2pool";
offbattery = "systemctl start p2pool";
};
networking.firewall.allowedTCPPorts = [
service_configs.ports.public.p2pool_p2p.port
];
}

View File

@@ -0,0 +1,31 @@
{
config,
lib,
pkgs,
...
}:
lib.mkIf config.services.xmrig.enable {
systemd.services.xmrig-auto-pause = {
description = "Auto-pause xmrig when other services need CPU";
after = [ "xmrig.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}";
Restart = "always";
RestartSec = "10s";
NoNewPrivileges = true;
ProtectHome = true;
ProtectSystem = "strict";
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_UNIX" # systemctl talks to systemd over D-Bus unix socket
];
MemoryDenyWriteExecute = true;
};
environment = {
POLL_INTERVAL = "3";
GRACE_PERIOD = "15";
CPU_THRESHOLD = "5";
};
};
}

View File

@@ -0,0 +1,131 @@
#!/usr/bin/env python3
"""
Auto-pause xmrig when other services need CPU.
Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19,
its CPU time lands in the 'nice' column and is excluded from the metric.
When real workload (user + system + irq + softirq) exceeds the threshold,
stops xmrig. When it drops below threshold for GRACE_PERIOD seconds,
restarts xmrig.
This replaces per-service pause scripts with a single general-purpose
monitor that handles any CPU-intensive workload (gitea workers, llama-cpp
inference, etc.) without needing to know about specific processes.
Why scheduler priority alone isn't enough:
Nice=19 / SCHED_IDLE only affects which thread gets the next time slice.
RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes
the shared 32MB L3 cache, and its memory access pattern saturates DRAM
bandwidth. Other services run slower even though they aren't denied CPU
time. The only fix is to stop xmrig entirely when real work is happening.
"""
import os
import subprocess
import sys
import time
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15"))
# Percentage of total CPU ticks that non-nice processes must use to trigger
# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total.
# Default 5% catches anything using more than ~60% of a single core.
CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "5"))
def log(msg):
print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True)
def read_cpu_ticks():
"""Read CPU tick counters from /proc/stat.
Returns (total_ticks, real_work_ticks) where real_work excludes the
'nice' column (xmrig) and idle/iowait.
"""
with open("/proc/stat") as f:
parts = f.readline().split()
# cpu user nice system idle iowait irq softirq steal
user, nice, system, idle, iowait, irq, softirq, steal = (
int(x) for x in parts[1:9]
)
total = user + nice + system + idle + iowait + irq + softirq + steal
real_work = user + system + irq + softirq
return total, real_work
def is_active(unit):
"""Check if a systemd unit is currently active."""
result = subprocess.run(
["systemctl", "is-active", "--quiet", unit],
capture_output=True,
)
return result.returncode == 0
def systemctl(action, unit):
result = subprocess.run(
["systemctl", action, unit],
capture_output=True,
text=True,
)
if result.returncode != 0:
log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
return result.returncode == 0
def main():
paused_by_us = False
idle_since = None
prev_total = None
prev_work = None
log(f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s threshold={CPU_THRESHOLD}%")
while True:
total, work = read_cpu_ticks()
if prev_total is None:
prev_total = total
prev_work = work
time.sleep(POLL_INTERVAL)
continue
dt = total - prev_total
if dt <= 0:
prev_total = total
prev_work = work
time.sleep(POLL_INTERVAL)
continue
real_work_pct = ((work - prev_work) / dt) * 100
prev_total = total
prev_work = work
busy = real_work_pct > CPU_THRESHOLD
if busy:
idle_since = None
if not paused_by_us:
# Only claim ownership if xmrig is actually running.
# If something else stopped it (e.g. UPS battery hook),
# don't interfere — we'd wrongly restart it later.
if is_active("xmrig.service"):
log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig")
if systemctl("stop", "xmrig.service"):
paused_by_us = True
else:
if paused_by_us:
if idle_since is None:
idle_since = time.monotonic()
elif time.monotonic() - idle_since >= GRACE_PERIOD:
log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig")
if systemctl("start", "xmrig.service"):
paused_by_us = False
idle_since = None
time.sleep(POLL_INTERVAL)
if __name__ == "__main__":
main()

59
services/monero/xmrig.nix Normal file
View File

@@ -0,0 +1,59 @@
{
config,
lib,
pkgs,
service_configs,
...
}:
let
threadCount = 12;
in
{
services.xmrig = {
enable = true;
package = pkgs.xmrig;
settings = {
autosave = true;
cpu = {
enabled = true;
huge-pages = true;
hw-aes = true;
rx = lib.range 0 (threadCount - 1);
};
randomx = {
"1gb-pages" = true;
};
opencl = false;
cuda = false;
pools = [
{
url = "127.0.0.1:${builtins.toString service_configs.ports.private.p2pool_stratum.port}";
tls = false;
}
];
};
};
systemd.services.xmrig.serviceConfig = {
Nice = 19;
CPUSchedulingPolicy = "idle";
IOSchedulingClass = "idle";
};
# Stop mining on UPS battery to conserve power
services.apcupsd.hooks = lib.mkIf config.services.apcupsd.enable {
onbattery = "systemctl stop xmrig";
offbattery = "systemctl start xmrig";
};
# Reserve 1GB huge pages for RandomX (dataset is ~2GB)
boot.kernelParams = [
"hugepagesz=1G"
"hugepages=3"
];
}