Compare commits

..

2 Commits

Author SHA1 Message Date
7ff404b032 llama-cpp: add API key auth via --api-key-file
Some checks failed
Build and Deploy / deploy (push) Failing after 1m18s
Generate and encrypt a Bearer token for llama-cpp's built-in auth.
Remove caddy_auth from the vhost since basic auth blocks Bearer-only
clients. Internal sidecars (xmrig-pause, annotations) connect
directly to localhost and are unaffected (/slots is public).
2026-04-02 17:54:51 -04:00
b8dd129bea monitoring: add zpool and boot partition usage metrics
Add textfile collector for ZFS pool utilization (tank, hdds) and
boot drive partitions (/boot, /persistent, /nix). Runs every 60s.
Add two Grafana dashboard panels: ZFS Pool Utilization and Boot
Drive Partitions as Row 5.
2026-04-02 17:44:11 -04:00
88 changed files with 1480 additions and 2841 deletions

View File

@@ -112,7 +112,6 @@ Each service file in `services/` follows this structure:
- **Hugepages**: Services needing large pages declare their budget in `service-configs.nix` under `hugepages_2m.services`. The kernel sysctl is set automatically from the total.
- **Domain**: Primary domain is `sigkill.computer`. Old domain `gardling.com` redirects automatically.
- **Hardened kernel**: Uses `_hardened` kernel. Security-sensitive defaults apply.
- **PostgreSQL as central database**: All services that support PostgreSQL MUST use it instead of embedded databases (H2, SQLite, etc.). Connect via Unix socket with peer auth when possible (JDBC services can use junixsocket). The PostgreSQL instance is declared in `services/postgresql.nix` with ZFS-backed storage. Use `ensureDatabases`/`ensureUsers` to auto-create databases and roles.
### Test Pattern
Tests use `pkgs.testers.runNixOSTest` (NixOS VM tests):

View File

@@ -1,15 +0,0 @@
# server-config (archived)
This repository has been unified with its sibling `dotfiles` into
[**titaniumtown/nixos**](https://git.sigkill.computer/titaniumtown/nixos).
The final pre-unification commit is tagged `final-before-unify`.
See the new repo's `README.md` and `AGENTS.md` for:
- current flake layout (hosts: mreow, yarn, muffin)
- deploy workflow
- git-crypt / agenix setup
Do **not** push new commits here — CI has been disabled, and muffin's harmonia
binary-cache no longer serves paths from `/var/lib/dotfiles-deploy/`.

View File

@@ -23,8 +23,8 @@
./modules/power.nix
./services/postgresql.nix
./services/jellyfin
./services/caddy
./services/jellyfin.nix
./services/caddy.nix
./services/immich.nix
./services/gitea.nix
./services/gitea-actions-runner.nix
@@ -32,6 +32,7 @@
./services/wg.nix
./services/qbittorrent.nix
./services/jellyfin-qbittorrent-monitor.nix
./services/bitmagnet.nix
./services/arr/prowlarr.nix
@@ -46,19 +47,30 @@
./services/soulseek.nix
# ./services/llama-cpp.nix
./services/llama-cpp.nix
./services/llama-cpp-annotations.nix
./services/trilium.nix
./services/ups.nix
./services/grafana
./services/monitoring.nix
./services/jellyfin-annotations.nix
./services/zfs-scrub-annotations.nix
./services/bitwarden.nix
./services/firefox-syncserver.nix
./services/matrix
./services/matrix.nix
./services/coturn.nix
./services/livekit.nix
./services/monero
./services/monero.nix
./services/p2pool.nix
./services/xmrig.nix
./services/llama-cpp-xmrig-pause.nix
# KEEP UNTIL 2028
./services/caddy_senior_project.nix
./services/graphing-calculator.nix
@@ -66,13 +78,10 @@
./services/syncthing.nix
./services/ntfy
./services/ntfy.nix
./services/ntfy-alerts.nix
./services/mollysocket.nix
./services/harmonia.nix
./services/ddns-updater.nix
];
# Hosts entries for CI/CD deploy targets
@@ -122,21 +131,14 @@
};
};
# Intel Arc A380 (DG2, 56a5) uses the i915 driver on kernel 6.12.
# The xe driver's iHD media driver integration has buffer mapping
# failures on this GPU/kernel combination. i915 works correctly for
# VAAPI transcode as long as ASPM deep states are disabled for the
# GPU (see modules/power.nix).
hardware.intelgpu.driver = "i915";
hardware.intelgpu.driver = "xe";
# Per-service 2MB hugepage budget calculated in service-configs.nix.
boot.kernel.sysctl."vm.nr_hugepages" = service_configs.hugepages_2m.total_pages;
boot = {
# 6.12 LTS until 2027-03. Kernel 6.18 causes a reproducible ZFS deadlock
# in dbuf_evict due to page allocator changes (__free_frozen_pages).
# https://github.com/openzfs/zfs/issues/18426
kernelPackages = pkgs.linuxPackages_6_12;
# 6.12 LTS until 2026
kernelPackages = pkgs.linuxPackages_6_12_hardened;
loader = {
# Use the systemd-boot EFI boot loader.

214
flake.lock generated
View File

@@ -27,17 +27,16 @@
},
"arr-init": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1776401121,
"narHash": "sha256-BELV1YMBuLL0aQNQ3SLvSLq8YN5h2o1jcrwz1+Zt32Q=",
"lastModified": 1774681523,
"narHash": "sha256-K49RohIwbgzVeOdStfVDO83qy5K5ZLKWk4EsHJKj/k4=",
"ref": "refs/heads/main",
"rev": "6dde2a3e0d087208b8084b61113707c5533c4c2d",
"revCount": 19,
"rev": "f8475f6cb4d4d4df99002d07cf9583fb33b87876",
"revCount": 11,
"type": "git",
"url": "ssh://gitea@git.gardling.com/titaniumtown/arr-init"
},
@@ -103,29 +102,6 @@
"type": "github"
}
},
"fenix": {
"inputs": {
"nixpkgs": [
"qbittorrent-metrics-exporter",
"naersk",
"nixpkgs"
],
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1752475459,
"narHash": "sha256-z6QEu4ZFuHiqdOPbYss4/Q8B0BFhacR8ts6jO/F/aOU=",
"owner": "nix-community",
"repo": "fenix",
"rev": "bf0d6f70f4c9a9cf8845f992105652173f4b617f",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "fenix",
"type": "github"
}
},
"flake-compat": {
"flake": false,
"locked": {
@@ -194,25 +170,7 @@
},
"flake-utils": {
"inputs": {
"systems": "systems_2"
},
"locked": {
"lastModified": 1731533236,
"narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
},
"flake-utils_2": {
"inputs": {
"systems": "systems_6"
"systems": "systems_4"
},
"locked": {
"lastModified": 1731533236,
@@ -257,11 +215,11 @@
]
},
"locked": {
"lastModified": 1775425411,
"narHash": "sha256-KY6HsebJHEe5nHOWP7ur09mb0drGxYSzE3rQxy62rJo=",
"lastModified": 1774875830,
"narHash": "sha256-WPYlTmZvVa9dWlAziFkVjBdv1Z6giNIq40O1DxsBmiI=",
"owner": "nix-community",
"repo": "home-manager",
"rev": "0d02ec1d0a05f88ef9e74b516842900c41f0f2fe",
"rev": "7afd8cebb99e25a64a745765920e663478eb8830",
"type": "github"
},
"original": {
@@ -323,11 +281,11 @@
"rust-overlay": "rust-overlay"
},
"locked": {
"lastModified": 1776248416,
"narHash": "sha256-TC6yzbCAex1pDfqUZv9u8fVm8e17ft5fNrcZ0JRDOIQ=",
"lastModified": 1774858933,
"narHash": "sha256-rgHUoE4QhOvK3Rcl9cbuIVdjPjFjfhcTm/uPs8Y7+2w=",
"owner": "nix-community",
"repo": "lanzaboote",
"rev": "18e9e64bae15b828c092658335599122a6db939b",
"rev": "45338aab3013924c75305f5cb3543b9cda993183",
"type": "github"
},
"original": {
@@ -344,11 +302,11 @@
]
},
"locked": {
"lastModified": 1776301820,
"narHash": "sha256-Yr3JRZ05PNmX4sR2Ak7e0jT+oCQgTAAML7FUoyTmitk=",
"lastModified": 1775101360,
"narHash": "sha256-X1cyWED8lmsGKFc7Pb6nGJ8EVzpPqi5iKcyL8NVVIe8=",
"owner": "TheTom",
"repo": "llama-cpp-turboquant",
"rev": "1073622985bb68075472474b4b0fdfcdabcfc9d0",
"rev": "04eeabb0d344b54ca12d4140b8af8c236ffe7beb",
"type": "github"
},
"original": {
@@ -358,40 +316,20 @@
"type": "github"
}
},
"naersk": {
"inputs": {
"fenix": "fenix",
"nixpkgs": "nixpkgs_2"
},
"locked": {
"lastModified": 1763384566,
"narHash": "sha256-r+wgI+WvNaSdxQmqaM58lVNvJYJ16zoq+tKN20cLst4=",
"owner": "nix-community",
"repo": "naersk",
"rev": "d4155d6ebb70fbe2314959842f744aa7cabbbf6a",
"type": "github"
},
"original": {
"owner": "nix-community",
"ref": "master",
"repo": "naersk",
"type": "github"
}
},
"nix-minecraft": {
"inputs": {
"flake-compat": "flake-compat_3",
"nixpkgs": [
"nixpkgs"
],
"systems": "systems_4"
"systems": "systems_3"
},
"locked": {
"lastModified": 1776310483,
"narHash": "sha256-xMFl+umxGmo5VEgcZcXT5Dk9sXU5WyTRz1Olpywr/60=",
"lastModified": 1775014230,
"narHash": "sha256-oqRN8daUQrUPIjdoc8+bXgy+MVLXt3pa02UeFE/0Eus=",
"owner": "Infinidoge",
"repo": "nix-minecraft",
"rev": "74abd91054e2655d6c392428a27e5d27edd5e6bf",
"rev": "253331438df9aaa637c4b13fbac7cce5f6d04775",
"type": "github"
},
"original": {
@@ -402,11 +340,11 @@
},
"nixos-hardware": {
"locked": {
"lastModified": 1775490113,
"narHash": "sha256-2ZBhDNZZwYkRmefK5XLOusCJHnoeKkoN95hoSGgMxWM=",
"lastModified": 1774933469,
"narHash": "sha256-OrnCQeUO2bqaWUl0lkDWyGWjKsOhtCyd7JSfTedQNUE=",
"owner": "NixOS",
"repo": "nixos-hardware",
"rev": "c775c2772ba56e906cbeb4e0b2db19079ef11ff7",
"rev": "f4c4c2c0c923d7811ac2a63ccc154767e4195337",
"type": "github"
},
"original": {
@@ -418,11 +356,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1776221942,
"narHash": "sha256-FbQAeVNi7G4v3QCSThrSAAvzQTmrmyDLiHNPvTF2qFM=",
"lastModified": 1775002709,
"narHash": "sha256-d3Yx83vSrN+2z/loBh4mJpyRqr9aAJqlke4TkpFmRJA=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "1766437c5509f444c1b15331e82b8b6a9b967000",
"rev": "bcd464ccd2a1a7cd09aa2f8d4ffba83b761b1d0e",
"type": "github"
},
"original": {
@@ -462,22 +400,6 @@
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1752077645,
"narHash": "sha256-HM791ZQtXV93xtCY+ZxG1REzhQenSQO020cu6rHtAPk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "be9e214982e20b8310878ac2baa063a961c1bdf6",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_3": {
"locked": {
"lastModified": 1764517877,
"narHash": "sha256-pp3uT4hHijIC8JUK5MEqeAWmParJrgBVzHLNfJDZxg4=",
@@ -516,28 +438,6 @@
"type": "github"
}
},
"qbittorrent-metrics-exporter": {
"inputs": {
"naersk": "naersk",
"nixpkgs": [
"nixpkgs"
],
"systems": "systems_5"
},
"locked": {
"lastModified": 1771989937,
"narHash": "sha256-bPUV4gVvSbF4VMkbLKYrfwVwzTeS+Sr41wucDj1///g=",
"ref": "refs/heads/main",
"rev": "cb94f866b7a2738532b1cae31d0b9f89adecbd54",
"revCount": 112,
"type": "git",
"url": "https://codeberg.org/anriha/qbittorrent-metrics-exporter"
},
"original": {
"type": "git",
"url": "https://codeberg.org/anriha/qbittorrent-metrics-exporter"
}
},
"root": {
"inputs": {
"agenix": "agenix",
@@ -552,7 +452,6 @@
"nixos-hardware": "nixos-hardware",
"nixpkgs": "nixpkgs",
"nixpkgs-p2pool-module": "nixpkgs-p2pool-module",
"qbittorrent-metrics-exporter": "qbittorrent-metrics-exporter",
"senior_project-website": "senior_project-website",
"srvos": "srvos",
"trackerlist": "trackerlist",
@@ -561,23 +460,6 @@
"ytbn-graphing-software": "ytbn-graphing-software"
}
},
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1752428706,
"narHash": "sha256-EJcdxw3aXfP8Ex1Nm3s0awyH9egQvB2Gu+QEnJn2Sfg=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "591e3b7624be97e4443ea7b5542c191311aa141d",
"type": "github"
},
"original": {
"owner": "rust-lang",
"ref": "nightly",
"repo": "rust-analyzer",
"type": "github"
}
},
"rust-overlay": {
"inputs": {
"nixpkgs": [
@@ -643,11 +525,11 @@
]
},
"locked": {
"lastModified": 1776306894,
"narHash": "sha256-l4N3O1cfXiQCHJGspAkg6WlZyOFBTbLXhi8Anf8jB0g=",
"lastModified": 1774909327,
"narHash": "sha256-P0L3fYEiQHp2bKrBF+H9GCPYKhLohE32Bu5OgnGYh7o=",
"owner": "nix-community",
"repo": "srvos",
"rev": "01d98209264c78cb323b636d7ab3fe8e7a8b60c7",
"rev": "154666bca66525a3f6cc206df1cc5ae84e1450b6",
"type": "github"
},
"original": {
@@ -716,44 +598,14 @@
"type": "github"
}
},
"systems_5": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"systems_6": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"trackerlist": {
"flake": false,
"locked": {
"lastModified": 1776290985,
"narHash": "sha256-eNWDOLBA0vk1TiKqse71siIAgLycjvBFDw35eAtnUPs=",
"lastModified": 1774994979,
"narHash": "sha256-fYUw6SA2qvG2K5O1NN087EaP3fAPhFZM/9YHINyjaxc=",
"owner": "ngosang",
"repo": "trackerslist",
"rev": "9bb380b3c2a641a3289f92dedef97016f2e47f36",
"rev": "37bdb0abc56c990797b1bf8387c9691778ee2a74",
"type": "github"
},
"original": {
@@ -764,7 +616,7 @@
},
"utils": {
"inputs": {
"systems": "systems_3"
"systems": "systems_2"
},
"locked": {
"lastModified": 1731533236,
@@ -813,8 +665,8 @@
},
"ytbn-graphing-software": {
"inputs": {
"flake-utils": "flake-utils_2",
"nixpkgs": "nixpkgs_3",
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs_2",
"rust-overlay": "rust-overlay_2"
},
"locked": {

View File

@@ -83,11 +83,6 @@
url = "github:JacoMalan1/nixpkgs/create-p2pool-service";
flake = false;
};
qbittorrent-metrics-exporter = {
url = "git+https://codeberg.org/anriha/qbittorrent-metrics-exporter";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs =
@@ -123,7 +118,7 @@
name = "nixpkgs-patched";
src = nixpkgs;
patches = [
./patches/nixpkgs/0001-firefox-syncserver-add-postgresql-backend-support.patch
./patches/0001-firefox-syncserver-add-postgresql-backend-support.patch
];
};

View File

@@ -46,22 +46,6 @@
group = "caddy";
};
# Njalla API token (NJALLA_API_TOKEN=...) for Caddy DNS-01 challenge
njalla-api-token-env = {
file = ../secrets/njalla-api-token-env.age;
mode = "0400";
owner = "caddy";
group = "caddy";
};
# ddns-updater config.json with Njalla provider credentials
ddns-updater-config = {
file = ../secrets/ddns-updater-config.age;
mode = "0400";
owner = "ddns-updater";
group = "ddns-updater";
};
jellyfin-api-key = {
file = ../secrets/jellyfin-api-key.age;
mode = "0400";
@@ -156,8 +140,8 @@
git-crypt-key-dotfiles = {
file = ../secrets/git-crypt-key-dotfiles.age;
mode = "0400";
owner = "gitea-runner";
group = "gitea-runner";
owner = "root";
group = "root";
};
# Git-crypt symmetric key for server-config repo
@@ -168,15 +152,6 @@
group = "gitea-runner";
};
# Git-crypt symmetric key for the new unified nixos repo (Phase 5 of the unify migration).
# Added additively here so muffin can decrypt nixos's secrets once Phase 6 cuts CI over.
git-crypt-key-nixos = {
file = ../secrets/git-crypt-key-nixos.age;
mode = "0400";
owner = "gitea-runner";
group = "gitea-runner";
};
# Gitea Actions runner registration token
gitea-runner-token = {
file = ../secrets/gitea-runner-token.age;
@@ -192,21 +167,5 @@
owner = "root";
group = "root";
};
# Harmonia binary cache signing key
harmonia-sign-key = {
file = ../secrets/harmonia-sign-key.age;
mode = "0400";
owner = "harmonia";
group = "harmonia";
};
# Caddy basic auth for nix binary cache (separate from main caddy_auth)
nix-cache-auth = {
file = ../secrets/nix-cache-auth.age;
mode = "0400";
owner = "caddy";
group = "caddy";
};
};
}

View File

@@ -12,7 +12,7 @@ let
parent=''${1%%[0-9]*}
dev="/sys/block/$parent"
[ -d "$dev/queue/iosched" ] || exit 0
echo 500 > "$dev/queue/iosched/read_expire"
echo 15000 > "$dev/queue/iosched/read_expire"
echo 15000 > "$dev/queue/iosched/write_expire"
echo 128 > "$dev/queue/iosched/fifo_batch"
echo 16 > "$dev/queue/iosched/writes_starved"
@@ -36,17 +36,11 @@ in
hardware.cpu.amd.updateMicrocode = true;
hardware.enableRedistributableFirmware = true;
# HDD I/O tuning for torrent seeding workload (high-concurrency random reads)
# sharing the pool with latency-sensitive sequential reads (Jellyfin playback).
# HDD I/O tuning for torrent seeding workload (high-concurrency random reads).
#
# mq-deadline sorts requests into elevator sweeps, reducing seek distance.
# read_expire=500ms keeps reads bounded so a Jellyfin segment can't queue for
# seconds behind a torrent burst; write_expire=15s lets the scheduler batch
# writes for coalescence (torrent writes are async and tolerate delay).
# The bulk of read coalescence already happens above the scheduler via ZFS
# aggregation (zfs_vdev_aggregation_limit=4M, read_gap_limit=128K,
# async_read_max=32), so the scheduler deadline only needs to be large enough
# to keep the elevator sweep coherent -- 500ms is plenty on rotational disks.
# Aggressive deadlines (15s) let the scheduler accumulate more ops before dispatching,
# maximizing coalescence — latency is irrelevant since torrent peers tolerate 30-60s.
# fifo_batch=128 keeps sweeps long; writes_starved=16 heavily favors reads.
# 4 MiB readahead matches libtorrent piece extent affinity for sequential prefetch.
#

View File

@@ -10,16 +10,20 @@ inputs.nixpkgs.lib.extend (
lib = prev;
in
{
# stolen from: https://stackoverflow.com/a/42398526
optimizeWithFlags =
pkg: flags:
pkg.overrideAttrs (old: {
env = (old.env or { }) // {
NIX_CFLAGS_COMPILE =
(old.env.NIX_CFLAGS_COMPILE or old.NIX_CFLAGS_COMPILE or "")
+ " "
+ (lib.concatStringsSep " " flags);
};
});
lib.overrideDerivation pkg (
old:
let
newflags = lib.foldl' (acc: x: "${acc} ${x}") "" flags;
oldflags = if (lib.hasAttr "NIX_CFLAGS_COMPILE" old) then "${old.NIX_CFLAGS_COMPILE}" else "";
in
{
NIX_CFLAGS_COMPILE = "${oldflags} ${newflags}";
# stdenv = pkgs.clang19Stdenv;
}
);
optimizePackage =
pkg:
@@ -59,12 +63,8 @@ inputs.nixpkgs.lib.extend (
{ pkgs, config, ... }:
{
systemd.services."${serviceName}-mounts" = {
wants = [
"zfs.target"
"zfs-mount.service"
]
++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
after = [ "zfs-mount.service" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
wants = [ "zfs.target" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
after = lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
before = [ "${serviceName}.service" ];
serviceConfig = {
@@ -180,108 +180,5 @@ inputs.nixpkgs.lib.extend (
after = [ "${serviceName}-file-perms.service" ];
};
};
# Creates a Caddy virtualHost with reverse_proxy to a local or VPN-namespaced port.
# Use `subdomain` for "<name>.${domain}" or `domain` for a full custom domain.
# Exactly one of `subdomain` or `domain` must be provided.
mkCaddyReverseProxy =
{
subdomain ? null,
domain ? null,
port,
auth ? false,
vpn ? false,
}:
assert (subdomain != null) != (domain != null);
{ config, ... }:
let
vhostDomain = if domain != null then domain else "${subdomain}.${service_configs.https.domain}";
upstream =
if vpn then
"${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString port}"
else
":${builtins.toString port}";
in
{
services.caddy.virtualHosts."${vhostDomain}".extraConfig = lib.concatStringsSep "\n" (
lib.optional auth "import ${config.age.secrets.caddy_auth.path}" ++ [ "reverse_proxy ${upstream}" ]
);
};
# Creates a fail2ban jail with systemd journal backend.
# Covers the common pattern: journal-based detection, http/https ports, default thresholds.
mkFail2banJail =
{
name,
unitName ? "${name}.service",
failregex,
}:
{ ... }:
{
services.fail2ban.jails.${name} = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
inherit failregex;
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=${unitName}";
};
};
};
# Creates a hardened Grafana annotation daemon service.
# Provides DynamicUser, sandboxing, state directory, and GRAFANA_URL/STATE_FILE automatically.
mkGrafanaAnnotationService =
{
name,
description,
script,
after ? [ ],
environment ? { },
loadCredential ? null,
}:
{
systemd.services."${name}-annotations" = {
inherit description;
after = [
"network.target"
"grafana.service"
]
++ after;
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${script}";
Restart = "always";
RestartSec = "10s";
DynamicUser = true;
StateDirectory = "${name}-annotations";
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
}
// lib.optionalAttrs (loadCredential != null) {
LoadCredential = loadCredential;
};
environment = {
GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
STATE_FILE = "/var/lib/${name}-annotations/state.json";
}
// environment;
};
};
# Shell command to extract an API key from an *arr config.xml file.
# Returns a string suitable for $() command substitution in shell scripts.
extractArrApiKey =
configXmlPath: "${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${configXmlPath}";
}
)

View File

@@ -43,36 +43,4 @@ final: prev: {
}
);
};
jellyfin-exporter = prev.buildGoModule rec {
pname = "jellyfin-exporter";
version = "unstable-2025-03-27";
src = prev.fetchFromGitHub {
owner = "rebelcore";
repo = "jellyfin_exporter";
rev = "8e3970cb1bdf3cb21fac099c13072bb7c1b20cf9";
hash = "sha256-wDnhepYj1MyLRZlwKfmwf4xiEEL3mgQY6V+7TnBd0MY=";
};
vendorHash = "sha256-e08u10e/wNapNZSsD/fGVN9ybMHe3sW0yDIOqI8ZcYs=";
# upstream tests require a running Jellyfin instance
doCheck = false;
meta.mainProgram = "jellyfin_exporter";
};
igpu-exporter = prev.buildGoModule rec {
pname = "igpu-exporter";
version = "unstable-2025-03-27";
src = prev.fetchFromGitHub {
owner = "mike1808";
repo = "igpu-exporter";
rev = "db2dace1a895c2b950f6d3ba1a2e46729251d124";
hash = "sha256-xWTiu26UzTZIK/6jeda+x6VePUgoWTS0AekejFdgFWs=";
};
vendorHash = "sha256-oeCSKwDKVwvYQ1fjXXTwQSXNl/upDE3WAAk680vqh3U=";
subPackages = [ "cmd" ];
postInstall = ''
mv $out/bin/cmd $out/bin/igpu-exporter
'';
meta.mainProgram = "igpu-exporter";
};
}

View File

@@ -1,9 +1,12 @@
{
lib,
pkgs,
...
}:
{
powerManagement = {
enable = true;
powertop.enable = true;
cpuFreqGovernor = "powersave";
};
@@ -26,6 +29,14 @@
# work items -- irrelevant for a server whose latency-sensitive paths are
# all in userspace (caddy, jellyfin).
"workqueue.power_efficient=1"
# Force PCIe ASPM on even if the BIOS doesn't advertise support. ASRock
# B550M Pro4 BIOS defaults are conservative; the Zen 3 root complex and
# all downstream devices (NVMe, AHCI, Intel NIC) support L1 substates.
# powertop auto-tune sets the policy to powersupersave at runtime, but
# without `force` the kernel may refuse to enable ASPM at all if the BIOS
# opted out, making the policy write a no-op.
"pcie_aspm=force"
];
boot.kernel.sysctl = {
@@ -34,8 +45,49 @@
"kernel.nmi_watchdog" = 0;
};
# Server has no audio consumers. Power-gate the HDA codec at module load.
# Server has no audio consumers. Power-gate the HDA codec at module load
# rather than waiting for powertop auto-tune to do it after boot.
boot.extraModprobeConfig = ''
options snd_hda_intel power_save=1 power_save_controller=Y
'';
# Apply sysfs power knobs that powertop --auto-tune cannot reach (hardened
# kernel blocks debugfs mount, so powertop silently skips ASPM policy and
# may only lower EPP to balance_power instead of power).
#
# AMD pstate EPP "power": deepest P-states, fastest core parking. Safe because:
# - xmrig runs at Nice=19 / CPUSchedulingPolicy=idle and tolerates latency
# - web services (caddy, jellyfin) are I/O-bound; the ~50 us extra C-state
# exit latency is invisible behind network RTT
# - Minecraft server benefits from single-thread boost, which pstate still
# provides on demand even in "power" mode (just with slightly slower ramp)
#
# ASPM powersupersave: deepest PCIe link power states (L1.1/L1.2). The
# pcie_aspm=force boot param enables ASPM, but the runtime policy defaults
# to "default" which only uses L0s. powersupersave adds L1 substates for
# all downstream devices (NVMe, AHCI, NIC).
systemd.services.power-tune = {
description = "Apply power-saving sysfs knobs (EPP, ASPM policy)";
after = [ "multi-user.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = lib.getExe (
pkgs.writeShellApplication {
name = "power-tune";
text = ''
# AMD pstate energy performance preference
for epp in /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference; do
[ -f "$epp" ] && echo power > "$epp"
done
# PCIe ASPM policy
aspm=/sys/module/pcie_aspm/parameters/policy
[ -f "$aspm" ] && echo powersupersave > "$aspm"
'';
}
);
};
};
}

View File

@@ -13,89 +13,6 @@
# disable coredumps
systemd.coredump.enable = false;
# Needed for Nix sandbox UID/GID mapping inside derivation builds.
# See https://github.com/NixOS/nixpkgs/issues/287194
security.unprivilegedUsernsClone = true;
# Disable kexec to prevent replacing the running kernel at runtime.
security.protectKernelImage = true;
# Kernel hardening boot parameters. These recover most of the runtime-
# configurable protections that the linux-hardened patchset provided.
boot.kernelParams = [
# Zero all page allocator pages on free / alloc. Prevents info leaks
# and use-after-free from seeing stale data. Modest CPU overhead.
"init_on_alloc=1"
"init_on_free=1"
# Prevent SLUB allocator from merging caches with similar size/flags.
# Keeps different kernel object types in separate slabs, making heap
# exploitation (type confusion, spray, use-after-free) significantly harder.
"slab_nomerge"
# Randomize order of pages returned by the buddy allocator.
"page_alloc.shuffle=1"
# Disable debugfs entirely (exposes kernel internals).
"debugfs=off"
# Disable legacy vsyscall emulation (unused by any modern glibc).
"vsyscall=none"
# Strict IOMMU TLB invalidation (no batching). Prevents DMA-capable
# devices from accessing stale mappings after unmap.
"iommu.strict=1"
];
boot.kernel.sysctl = {
# Immediately reboot on kernel oops (don't leave a compromised
# kernel running). Negative value = reboot without delay.
"kernel.panic" = -1;
# Hide kernel pointers from all processes, including CAP_SYSLOG.
# Prevents info leaks used to defeat KASLR.
"kernel.kptr_restrict" = 2;
# Disable bpf() JIT compiler (eliminates JIT spray attack vector).
"net.core.bpf_jit_enable" = false;
# Disable ftrace (kernel function tracer) at runtime.
"kernel.ftrace_enabled" = false;
# Strict reverse-path filtering: drop packets arriving on an interface
# where the source address isn't routable back via that interface.
"net.ipv4.conf.all.rp_filter" = 1;
"net.ipv4.conf.default.rp_filter" = 1;
"net.ipv4.conf.all.log_martians" = true;
"net.ipv4.conf.default.log_martians" = true;
# Ignore ICMP redirects (prevents route table poisoning).
"net.ipv4.conf.all.accept_redirects" = false;
"net.ipv4.conf.all.secure_redirects" = false;
"net.ipv4.conf.default.accept_redirects" = false;
"net.ipv4.conf.default.secure_redirects" = false;
"net.ipv6.conf.all.accept_redirects" = false;
"net.ipv6.conf.default.accept_redirects" = false;
# Don't send ICMP redirects (we are not a router).
"net.ipv4.conf.all.send_redirects" = false;
"net.ipv4.conf.default.send_redirects" = false;
# Ignore broadcast ICMP (SMURF amplification mitigation).
"net.ipv4.icmp_echo_ignore_broadcasts" = true;
# Filesystem hardening: prevent hardlink/symlink-based attacks.
# protected_hardlinks/symlinks: block unprivileged creation of hard/symlinks
# to files the user doesn't own (prevents TOCTOU privilege escalation).
# protected_fifos/regular (level 2): restrict opening FIFOs and regular files
# in world-writable sticky directories to owner/group match only.
# Also required for systemd-tmpfiles to chmod hardlinked files.
"fs.protected_hardlinks" = true;
"fs.protected_symlinks" = true;
"fs.protected_fifos" = 2;
"fs.protected_regular" = 2;
};
services = {
dbus.implementation = "broker";
/*

View File

@@ -1,39 +1,15 @@
{
config,
lib,
service_configs,
pkgs,
...
}:
let
# Total RAM in bytes (from /proc/meminfo: 65775836 KiB).
totalRamBytes = 65775836 * 1024;
# Hugepage reservations that the kernel carves out before ZFS can use them.
hugepages2mBytes = service_configs.hugepages_2m.total_pages * 2 * 1024 * 1024;
hugepages1gBytes = 3 * 1024 * 1024 * 1024; # 3x 1G pages for RandomX (xmrig.nix)
totalHugepageBytes = hugepages2mBytes + hugepages1gBytes;
# ARC max: 60% of RAM remaining after hugepages. Leaves headroom for
# application RSS (PostgreSQL, qBittorrent, Jellyfin, Grafana, etc.),
# kernel slabs, and page cache.
arcMaxBytes = (totalRamBytes - totalHugepageBytes) * 60 / 100;
in
{
boot.zfs.package = pkgs.zfs_2_4;
boot.zfs.package = pkgs.zfs;
boot.initrd.kernelModules = [ "zfs" ];
boot.kernelParams = [
# 120s TXG timeout: batch more dirty data per transaction group so the
# HDD pool (hdds) writes larger, sequential I/Os instead of many small syncs.
# This is a global setting (no per-pool control); the SSD pool (tank) syncs
# infrequently but handles it fine since SSDs don't suffer from seek overhead.
"zfs.zfs_txg_timeout=120"
# Cap ARC to prevent it from claiming memory reserved for hugepages.
# Without this, ZFS auto-sizes c_max to ~62 GiB on a 64 GiB system,
# ignoring the 11.5 GiB of hugepage reservations.
"zfs.zfs_arc_max=${toString arcMaxBytes}"
"zfs.zfs_txg_timeout=120" # longer TXG open time = larger sequential writes
# vdev I/O scheduler: feed more concurrent reads to the block scheduler so
# mq-deadline has a larger pool of requests to sort and merge into elevator sweeps.

View File

@@ -1,443 +0,0 @@
From f0582558f0a8b0ef543b3251c4a07afab89fde63 Mon Sep 17 00:00:00 2001
From: Simon Gardling <titaniumtown@proton.me>
Date: Fri, 17 Apr 2026 19:37:11 -0400
Subject: [PATCH] nixos/jellyfin: add declarative network.xml options
Adds services.jellyfin.network.* (baseUrl, ports, IPv4/6, LAN subnets,
known proxies, remote IP filter, etc.) and services.jellyfin.forceNetworkConfig,
mirroring the existing hardwareAcceleration / forceEncodingConfig pattern.
Motivation: running Jellyfin behind a reverse proxy requires configuring
KnownProxies (so the real client IP is extracted from X-Forwarded-For)
and LocalNetworkSubnets (so LAN clients are correctly classified and not
subject to RemoteClientBitrateLimit). These settings previously had no
declarative option -- they could only be set via the web dashboard or
by hand-editing network.xml, with no guarantee they would survive a
reinstall or be consistent across deployments.
Implementation:
- Adds a networkXmlText template alongside the existing encodingXmlText.
- Factors the force-vs-soft install logic out of preStart into a
small 'manage_config_xml' shell helper; encoding.xml and network.xml
now share the same install/backup semantics.
- Extends the VM test with a machineWithNetworkConfig node and a
subtest that verifies the declared values land in network.xml,
Jellyfin parses them at startup, and the backup-on-overwrite path
works (same shape as the existing 'Force encoding config' subtest).
---
nixos/modules/services/misc/jellyfin.nix | 303 ++++++++++++++++++++---
nixos/tests/jellyfin.nix | 50 ++++
2 files changed, 317 insertions(+), 36 deletions(-)
diff --git a/nixos/modules/services/misc/jellyfin.nix b/nixos/modules/services/misc/jellyfin.nix
index 5c08fc478e45..387da907c652 100644
--- a/nixos/modules/services/misc/jellyfin.nix
+++ b/nixos/modules/services/misc/jellyfin.nix
@@ -26,8 +26,10 @@ let
bool
enum
ints
+ listOf
nullOr
path
+ port
str
submodule
;
@@ -68,6 +70,41 @@ let
</EncodingOptions>
'';
encodingXmlFile = pkgs.writeText "encoding.xml" encodingXmlText;
+ stringListToXml =
+ tag: items:
+ if items == [ ] then
+ "<${tag} />"
+ else
+ "<${tag}>\n ${
+ concatMapStringsSep "\n " (item: "<string>${escapeXML item}</string>") items
+ }\n </${tag}>";
+ networkXmlText = ''
+ <?xml version="1.0" encoding="utf-8"?>
+ <NetworkConfiguration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
+ <BaseUrl>${escapeXML cfg.network.baseUrl}</BaseUrl>
+ <EnableHttps>${boolToString cfg.network.enableHttps}</EnableHttps>
+ <RequireHttps>${boolToString cfg.network.requireHttps}</RequireHttps>
+ <InternalHttpPort>${toString cfg.network.internalHttpPort}</InternalHttpPort>
+ <InternalHttpsPort>${toString cfg.network.internalHttpsPort}</InternalHttpsPort>
+ <PublicHttpPort>${toString cfg.network.publicHttpPort}</PublicHttpPort>
+ <PublicHttpsPort>${toString cfg.network.publicHttpsPort}</PublicHttpsPort>
+ <AutoDiscovery>${boolToString cfg.network.autoDiscovery}</AutoDiscovery>
+ <EnableUPnP>${boolToString cfg.network.enableUPnP}</EnableUPnP>
+ <EnableIPv4>${boolToString cfg.network.enableIPv4}</EnableIPv4>
+ <EnableIPv6>${boolToString cfg.network.enableIPv6}</EnableIPv6>
+ <EnableRemoteAccess>${boolToString cfg.network.enableRemoteAccess}</EnableRemoteAccess>
+ ${stringListToXml "LocalNetworkSubnets" cfg.network.localNetworkSubnets}
+ ${stringListToXml "LocalNetworkAddresses" cfg.network.localNetworkAddresses}
+ ${stringListToXml "KnownProxies" cfg.network.knownProxies}
+ <IgnoreVirtualInterfaces>${boolToString cfg.network.ignoreVirtualInterfaces}</IgnoreVirtualInterfaces>
+ ${stringListToXml "VirtualInterfaceNames" cfg.network.virtualInterfaceNames}
+ <EnablePublishedServerUriByRequest>${boolToString cfg.network.enablePublishedServerUriByRequest}</EnablePublishedServerUriByRequest>
+ ${stringListToXml "PublishedServerUriBySubnet" cfg.network.publishedServerUriBySubnet}
+ ${stringListToXml "RemoteIPFilter" cfg.network.remoteIPFilter}
+ <IsRemoteIPFilterBlacklist>${boolToString cfg.network.isRemoteIPFilterBlacklist}</IsRemoteIPFilterBlacklist>
+ </NetworkConfiguration>
+ '';
+ networkXmlFile = pkgs.writeText "network.xml" networkXmlText;
codecListToType =
desc: list:
submodule {
@@ -205,6 +242,196 @@ in
'';
};
+ network = {
+ baseUrl = mkOption {
+ type = str;
+ default = "";
+ example = "/jellyfin";
+ description = ''
+ Prefix added to Jellyfin's internal URLs when it sits behind a reverse proxy at a sub-path.
+ Leave empty when Jellyfin is served at the root of its host.
+ '';
+ };
+
+ enableHttps = mkOption {
+ type = bool;
+ default = false;
+ description = ''
+ Serve HTTPS directly from Jellyfin. Usually unnecessary when terminating TLS in a reverse proxy.
+ '';
+ };
+
+ requireHttps = mkOption {
+ type = bool;
+ default = false;
+ description = ''
+ Redirect plaintext HTTP requests to HTTPS. Only meaningful when {option}`enableHttps` is true.
+ '';
+ };
+
+ internalHttpPort = mkOption {
+ type = port;
+ default = 8096;
+ description = "TCP port Jellyfin binds for HTTP.";
+ };
+
+ internalHttpsPort = mkOption {
+ type = port;
+ default = 8920;
+ description = "TCP port Jellyfin binds for HTTPS. Only used when {option}`enableHttps` is true.";
+ };
+
+ publicHttpPort = mkOption {
+ type = port;
+ default = 8096;
+ description = "HTTP port Jellyfin advertises in server discovery responses and published URIs.";
+ };
+
+ publicHttpsPort = mkOption {
+ type = port;
+ default = 8920;
+ description = "HTTPS port Jellyfin advertises in server discovery responses and published URIs.";
+ };
+
+ autoDiscovery = mkOption {
+ type = bool;
+ default = true;
+ description = "Respond to LAN client auto-discovery broadcasts (UDP 7359).";
+ };
+
+ enableUPnP = mkOption {
+ type = bool;
+ default = false;
+ description = "Attempt to open the public ports on the router via UPnP.";
+ };
+
+ enableIPv4 = mkOption {
+ type = bool;
+ default = true;
+ description = "Listen on IPv4.";
+ };
+
+ enableIPv6 = mkOption {
+ type = bool;
+ default = true;
+ description = "Listen on IPv6.";
+ };
+
+ enableRemoteAccess = mkOption {
+ type = bool;
+ default = true;
+ description = ''
+ Allow connections from clients outside the subnets listed in {option}`localNetworkSubnets`.
+ When false, Jellyfin rejects non-local requests regardless of reverse proxy configuration.
+ '';
+ };
+
+ localNetworkSubnets = mkOption {
+ type = listOf str;
+ default = [ ];
+ example = [
+ "192.168.1.0/24"
+ "10.0.0.0/8"
+ ];
+ description = ''
+ CIDR ranges (or bare IPs) that Jellyfin classifies as the local network.
+ Clients originating from these ranges -- as seen after {option}`knownProxies` X-Forwarded-For
+ unwrapping -- are not subject to {option}`services.jellyfin` remote-client bitrate limits.
+ '';
+ };
+
+ localNetworkAddresses = mkOption {
+ type = listOf str;
+ default = [ ];
+ example = [ "192.168.1.50" ];
+ description = ''
+ Specific interface addresses Jellyfin binds to. Leave empty to bind all interfaces.
+ '';
+ };
+
+ knownProxies = mkOption {
+ type = listOf str;
+ default = [ ];
+ example = [ "127.0.0.1" ];
+ description = ''
+ Addresses of reverse proxies trusted to forward the real client IP via `X-Forwarded-For`.
+ Without this, Jellyfin sees the proxy's address for every request and cannot apply
+ {option}`localNetworkSubnets` classification to the true client.
+ '';
+ };
+
+ ignoreVirtualInterfaces = mkOption {
+ type = bool;
+ default = true;
+ description = "Skip virtual network interfaces (matching {option}`virtualInterfaceNames`) during auto-bind.";
+ };
+
+ virtualInterfaceNames = mkOption {
+ type = listOf str;
+ default = [ "veth" ];
+ description = "Interface name prefixes treated as virtual when {option}`ignoreVirtualInterfaces` is true.";
+ };
+
+ enablePublishedServerUriByRequest = mkOption {
+ type = bool;
+ default = false;
+ description = ''
+ Derive the server's public URI from the incoming request's Host header instead of any
+ configured {option}`publishedServerUriBySubnet` entry.
+ '';
+ };
+
+ publishedServerUriBySubnet = mkOption {
+ type = listOf str;
+ default = [ ];
+ example = [ "192.168.1.0/24=http://jellyfin.lan:8096" ];
+ description = ''
+ Per-subnet overrides for the URI Jellyfin advertises to clients, in `subnet=uri` form.
+ '';
+ };
+
+ remoteIPFilter = mkOption {
+ type = listOf str;
+ default = [ ];
+ example = [ "203.0.113.0/24" ];
+ description = ''
+ IPs or CIDRs used as the allow- or denylist for remote access.
+ Behaviour is controlled by {option}`isRemoteIPFilterBlacklist`.
+ '';
+ };
+
+ isRemoteIPFilterBlacklist = mkOption {
+ type = bool;
+ default = false;
+ description = ''
+ When true, {option}`remoteIPFilter` is a denylist; when false, it is an allowlist
+ (and an empty list allows all remote addresses).
+ '';
+ };
+ };
+
+ forceNetworkConfig = mkOption {
+ type = bool;
+ default = false;
+ description = ''
+ Whether to overwrite Jellyfin's `network.xml` configuration file on each service start.
+
+ When enabled, the network configuration specified in {option}`services.jellyfin.network`
+ is applied on every service restart. A backup of the existing `network.xml` will be
+ created at `network.xml.backup-$timestamp`.
+
+ ::: {.warning}
+ Enabling this option means that any changes made to networking settings through
+ Jellyfin's web dashboard will be lost on the next service restart. The NixOS configuration
+ becomes the single source of truth for network settings.
+ :::
+
+ When disabled (the default), the network configuration is only written if no `network.xml`
+ exists yet. This allows settings to be changed through Jellyfin's web dashboard and persist
+ across restarts, but means the NixOS configuration options will be ignored after the initial setup.
+ '';
+ };
+
transcoding = {
maxConcurrentStreams = mkOption {
type = nullOr ints.positive;
@@ -384,46 +611,50 @@ in
wants = [ "network-online.target" ];
wantedBy = [ "multi-user.target" ];
- preStart = mkIf cfg.hardwareAcceleration.enable (
- ''
- configDir=${escapeShellArg cfg.configDir}
- encodingXml="$configDir/encoding.xml"
- ''
- + (
- if cfg.forceEncodingConfig then
- ''
- if [[ -e $encodingXml ]]; then
+ preStart =
+ let
+ # manage_config_xml <source> <destination> <force> <description>
+ #
+ # Installs a NixOS-declared XML config at <destination>, preserving
+ # any existing file as a timestamped backup when <force> is true.
+ # With <force>=false, leaves existing files untouched and warns if
+ # the on-disk content differs from the declared content.
+ helper = ''
+ manage_config_xml() {
+ local src="$1" dest="$2" force="$3" desc="$4"
+ if [[ -e "$dest" ]]; then
# this intentionally removes trailing newlines
- currentText="$(<"$encodingXml")"
- configuredText="$(<${encodingXmlFile})"
- if [[ $currentText == "$configuredText" ]]; then
- # don't need to do anything
- exit 0
- else
- encodingXmlBackup="$configDir/encoding.xml.backup-$(date -u +"%FT%H_%M_%SZ")"
- mv --update=none-fail -T "$encodingXml" "$encodingXmlBackup"
+ local currentText configuredText
+ currentText="$(<"$dest")"
+ configuredText="$(<"$src")"
+ if [[ "$currentText" == "$configuredText" ]]; then
+ return 0
fi
- fi
- cp --update=none-fail -T ${encodingXmlFile} "$encodingXml"
- chmod u+w "$encodingXml"
- ''
- else
- ''
- if [[ -e $encodingXml ]]; then
- # this intentionally removes trailing newlines
- currentText="$(<"$encodingXml")"
- configuredText="$(<${encodingXmlFile})"
- if [[ $currentText != "$configuredText" ]]; then
- echo "WARN: $encodingXml already exists and is different from the configured settings. transcoding options NOT applied." >&2
- echo "WARN: Set config.services.jellyfin.forceEncodingConfig = true to override." >&2
+ if [[ "$force" == true ]]; then
+ local backup
+ backup="$dest.backup-$(date -u +"%FT%H_%M_%SZ")"
+ mv --update=none-fail -T "$dest" "$backup"
+ else
+ echo "WARN: $dest already exists and is different from the configured settings. $desc options NOT applied." >&2
+ echo "WARN: Set the corresponding force*Config option to override." >&2
+ return 0
fi
- else
- cp --update=none-fail -T ${encodingXmlFile} "$encodingXml"
- chmod u+w "$encodingXml"
fi
- ''
- )
- );
+ cp --update=none-fail -T "$src" "$dest"
+ chmod u+w "$dest"
+ }
+ configDir=${escapeShellArg cfg.configDir}
+ '';
+ in
+ (
+ helper
+ + optionalString cfg.hardwareAcceleration.enable ''
+ manage_config_xml ${encodingXmlFile} "$configDir/encoding.xml" ${boolToString cfg.forceEncodingConfig} transcoding
+ ''
+ + ''
+ manage_config_xml ${networkXmlFile} "$configDir/network.xml" ${boolToString cfg.forceNetworkConfig} network
+ ''
+ );
# This is mostly follows: https://github.com/jellyfin/jellyfin/blob/master/fedora/jellyfin.service
# Upstream also disable some hardenings when running in LXC, we do the same with the isContainer option
diff --git a/nixos/tests/jellyfin.nix b/nixos/tests/jellyfin.nix
index 4896c13d4eca..0c9191960f78 100644
--- a/nixos/tests/jellyfin.nix
+++ b/nixos/tests/jellyfin.nix
@@ -63,6 +63,26 @@
environment.systemPackages = with pkgs; [ ffmpeg ];
virtualisation.diskSize = 3 * 1024;
};
+
+ machineWithNetworkConfig = {
+ services.jellyfin = {
+ enable = true;
+ forceNetworkConfig = true;
+ network = {
+ localNetworkSubnets = [
+ "192.168.1.0/24"
+ "10.0.0.0/8"
+ ];
+ knownProxies = [ "127.0.0.1" ];
+ enableUPnP = false;
+ enableIPv6 = false;
+ remoteIPFilter = [ "203.0.113.5" ];
+ isRemoteIPFilterBlacklist = true;
+ };
+ };
+ environment.systemPackages = with pkgs; [ ffmpeg ];
+ virtualisation.diskSize = 3 * 1024;
+ };
};
# Documentation of the Jellyfin API: https://api.jellyfin.org/
@@ -122,6 +142,36 @@
# Verify the new encoding.xml does not have the marker (was overwritten)
machineWithForceConfig.fail("grep -q 'MARKER' /var/lib/jellyfin/config/encoding.xml")
+ # Test forceNetworkConfig and network.xml generation
+ with subtest("Force network config writes declared values and backs up on overwrite"):
+ wait_for_jellyfin(machineWithNetworkConfig)
+
+ # Verify network.xml exists and contains the declared values
+ machineWithNetworkConfig.succeed("test -f /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<string>192.168.1.0/24</string>' /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<string>10.0.0.0/8</string>' /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<string>127.0.0.1</string>' /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<string>203.0.113.5</string>' /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<IsRemoteIPFilterBlacklist>true</IsRemoteIPFilterBlacklist>' /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<EnableIPv6>false</EnableIPv6>' /var/lib/jellyfin/config/network.xml")
+ machineWithNetworkConfig.succeed("grep -F '<EnableUPnP>false</EnableUPnP>' /var/lib/jellyfin/config/network.xml")
+
+ # Stop service before modifying config
+ machineWithNetworkConfig.succeed("systemctl stop jellyfin.service")
+
+ # Plant a marker so we can prove the backup-and-overwrite path runs
+ machineWithNetworkConfig.succeed("echo '<!-- NETMARKER -->' > /var/lib/jellyfin/config/network.xml")
+
+ # Restart the service to trigger the backup
+ machineWithNetworkConfig.succeed("systemctl restart jellyfin.service")
+ wait_for_jellyfin(machineWithNetworkConfig)
+
+ # Verify the marked content was preserved as a timestamped backup
+ machineWithNetworkConfig.succeed("grep -q 'NETMARKER' /var/lib/jellyfin/config/network.xml.backup-*")
+
+ # Verify the new network.xml does not have the marker (was overwritten)
+ machineWithNetworkConfig.fail("grep -q 'NETMARKER' /var/lib/jellyfin/config/network.xml")
+
auth_header = 'MediaBrowser Client="NixOS Integration Tests", DeviceId="1337", Device="Apple II", Version="20.09"'
--
2.53.0

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@@ -81,12 +81,6 @@ rec {
port = 6011;
proto = "tcp";
};
# Webhook receiver for the Jellyfin-qBittorrent monitor — Jellyfin pushes
# playback events here so throttling reacts without waiting for the poll.
jellyfin_qbittorrent_monitor_webhook = {
port = 9898;
proto = "tcp";
};
bitmagnet = {
port = 3333;
proto = "tcp";
@@ -183,26 +177,6 @@ rec {
port = 8787;
proto = "tcp";
};
jellyfin_exporter = {
port = 9594;
proto = "tcp";
};
qbittorrent_exporter = {
port = 9561;
proto = "tcp";
};
igpu_exporter = {
port = 9563;
proto = "tcp";
};
prometheus_zfs = {
port = 9134;
proto = "tcp";
};
harmonia = {
port = 5500;
proto = "tcp";
};
};
};

View File

@@ -1,6 +1,5 @@
{
pkgs,
lib,
service_configs,
...
}:
@@ -13,6 +12,7 @@ let
curl = "${pkgs.curl}/bin/curl";
jq = "${pkgs.jq}/bin/jq";
grep = "${pkgs.gnugrep}/bin/grep";
# Max items to search per cycle per category (missing + cutoff) per app
maxPerCycle = 5;
@@ -20,8 +20,8 @@ let
searchScript = pkgs.writeShellScript "arr-search" ''
set -euo pipefail
RADARR_KEY=$(${lib.extractArrApiKey radarrConfig})
SONARR_KEY=$(${lib.extractArrApiKey sonarrConfig})
RADARR_KEY=$(${grep} -oP '(?<=<ApiKey>)[^<]+' ${radarrConfig})
SONARR_KEY=$(${grep} -oP '(?<=<ApiKey>)[^<]+' ${sonarrConfig})
search_radarr() {
local endpoint="$1"

View File

@@ -16,11 +16,6 @@
(lib.serviceFilePerms "bazarr" [
"Z ${service_configs.bazarr.dataDir} 0700 ${config.services.bazarr.user} ${config.services.bazarr.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "bazarr";
port = service_configs.ports.private.bazarr.port;
auth = true;
})
];
services.bazarr = {
@@ -28,6 +23,11 @@
listenPort = service_configs.ports.private.bazarr.port;
};
services.caddy.virtualHosts."bazarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.bazarr.port}
'';
users.users.${config.services.bazarr.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -8,26 +8,13 @@
dataDir = service_configs.prowlarr.dataDir;
apiVersion = "v1";
networkNamespacePath = "/run/netns/wg";
networkNamespaceService = "wg";
# Guarantee critical config.xml elements before startup. Prowlarr has a
# history of losing <Port> from config.xml, causing the service to run
# without binding any socket. See arr-init's configXml for details.
configXml = {
Port = service_configs.ports.private.prowlarr.port;
BindAddress = "*";
EnableSsl = false;
};
# Prowlarr runs in the wg netns; Sonarr/Radarr in the host netns.
# From host netns, Prowlarr is reachable at the wg namespace address,
# not at localhost (which resolves to the host's own netns).
# Health checks can now run — the reverse-connect is reachable.
healthChecks = true;
syncedApps = [
{
name = "Sonarr";
implementation = "Sonarr";
configContract = "SonarrSettings";
prowlarrUrl = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}";
prowlarrUrl = "http://localhost:${builtins.toString service_configs.ports.private.prowlarr.port}";
baseUrl = "http://${config.vpnNamespaces.wg.bridgeAddress}:${builtins.toString service_configs.ports.private.sonarr.port}";
apiKeyFrom = "${service_configs.sonarr.dataDir}/config.xml";
serviceName = "sonarr";
@@ -36,7 +23,7 @@
name = "Radarr";
implementation = "Radarr";
configContract = "RadarrSettings";
prowlarrUrl = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}";
prowlarrUrl = "http://localhost:${builtins.toString service_configs.ports.private.prowlarr.port}";
baseUrl = "http://${config.vpnNamespaces.wg.bridgeAddress}:${builtins.toString service_configs.ports.private.radarr.port}";
apiKeyFrom = "${service_configs.radarr.dataDir}/config.xml";
serviceName = "radarr";
@@ -50,11 +37,6 @@
port = service_configs.ports.private.sonarr.port;
dataDir = service_configs.sonarr.dataDir;
healthChecks = true;
configXml = {
Port = service_configs.ports.private.sonarr.port;
BindAddress = "*";
EnableSsl = false;
};
rootFolders = [ service_configs.media.tvDir ];
naming = {
renameEpisodes = true;
@@ -87,11 +69,6 @@
port = service_configs.ports.private.radarr.port;
dataDir = service_configs.radarr.dataDir;
healthChecks = true;
configXml = {
Port = service_configs.ports.private.radarr.port;
BindAddress = "*";
EnableSsl = false;
};
rootFolders = [ service_configs.media.moviesDir ];
naming = {
renameMovies = true;
@@ -133,21 +110,4 @@
serviceName = "radarr";
};
};
services.jellyseerrInit = {
enable = true;
configDir = service_configs.jellyseerr.configDir;
radarr = {
profileName = "Remux + WEB 2160p";
dataDir = service_configs.radarr.dataDir;
port = service_configs.ports.private.radarr.port;
serviceName = "radarr";
};
sonarr = {
profileName = "WEB-2160p";
dataDir = service_configs.sonarr.dataDir;
port = service_configs.ports.private.sonarr.port;
serviceName = "sonarr";
};
};
}

View File

@@ -13,10 +13,6 @@
(lib.serviceFilePerms "jellyseerr" [
"Z ${service_configs.jellyseerr.configDir} 0700 jellyseerr jellyseerr"
])
(lib.mkCaddyReverseProxy {
subdomain = "jellyseerr";
port = service_configs.ports.private.jellyseerr.port;
})
];
services.jellyseerr = {
@@ -40,4 +36,8 @@
users.groups.jellyseerr = { };
services.caddy.virtualHosts."jellyseerr.${service_configs.https.domain}".extraConfig = ''
# import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.jellyseerr.port}
'';
}

View File

@@ -14,12 +14,6 @@
(lib.serviceFilePerms "prowlarr" [
"Z ${service_configs.prowlarr.dataDir} 0700 prowlarr prowlarr"
])
(lib.mkCaddyReverseProxy {
subdomain = "prowlarr";
port = service_configs.ports.private.prowlarr.port;
auth = true;
vpn = true;
})
];
services.prowlarr = {
@@ -57,4 +51,8 @@
ExecStart = lib.mkForce "${lib.getExe pkgs.prowlarr} -nobrowser -data=${service_configs.prowlarr.dataDir}";
};
services.caddy.virtualHosts."prowlarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}
'';
}

View File

@@ -16,11 +16,6 @@
(lib.serviceFilePerms "radarr" [
"Z ${service_configs.radarr.dataDir} 0700 ${config.services.radarr.user} ${config.services.radarr.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "radarr";
port = service_configs.ports.private.radarr.port;
auth = true;
})
];
services.radarr = {
@@ -30,6 +25,11 @@
settings.update.mechanism = "external";
};
services.caddy.virtualHosts."radarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.radarr.port}
'';
users.users.${config.services.radarr.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -13,8 +13,8 @@ let
# Runs as root (via + prefix) after the NixOS module writes config.json.
# Extracts API keys from radarr/sonarr config.xml and injects them via jq.
injectApiKeys = pkgs.writeShellScript "recyclarr-inject-api-keys" ''
RADARR_KEY=$(${lib.extractArrApiKey radarrConfig})
SONARR_KEY=$(${lib.extractArrApiKey sonarrConfig})
RADARR_KEY=$(${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${radarrConfig})
SONARR_KEY=$(${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${sonarrConfig})
${pkgs.jq}/bin/jq \
--arg rk "$RADARR_KEY" \
--arg sk "$SONARR_KEY" \
@@ -46,69 +46,50 @@ in
radarr.movies = {
base_url = "http://localhost:${builtins.toString service_configs.ports.private.radarr.port}";
# Recyclarr is the sole authority for custom formats and scores.
# Overwrite any manually-created CFs and delete stale ones.
replace_existing_custom_formats = true;
delete_old_custom_formats = true;
include = [
{ template = "radarr-quality-definition-movie"; }
{ template = "radarr-quality-profile-remux-web-2160p"; }
{ template = "radarr-custom-formats-remux-web-2160p"; }
];
# Group WEB 2160p with 1080p in the same quality tier so custom
# format scores -- not quality ranking -- decide the winner.
# Native 4K with HDR/DV from good release groups scores high and
# wins; AI upscales get -10000 from the Upscaled CF and are
# blocked by min_format_score. Untagged upscales from unknown
# groups (score ~0) lose to well-scored 1080p (Tier 01 = +1750).
quality_profiles = [
{
name = "Remux + WEB 2160p";
min_format_score = 0;
reset_unmatched_scores.enabled = true;
reset_unmatched_scores = {
enabled = true;
};
upgrade = {
allowed = true;
until_quality = "Remux-2160p";
until_score = 10000;
};
quality_sort = "top";
qualities = [
{ name = "Remux-2160p"; }
{
name = "WEB/Bluray";
name = "WEB 2160p";
qualities = [
"WEBDL-2160p"
"WEBRip-2160p"
"Remux-1080p"
"Bluray-1080p"
];
}
{ name = "Remux-1080p"; }
{ name = "Bluray-1080p"; }
{
name = "WEB 1080p";
qualities = [
"WEBDL-1080p"
"WEBRip-1080p"
];
}
{ name = "HDTV-1080p"; }
{ name = "Bluray-720p"; }
{
name = "WEB 720p";
qualities = [
"WEBDL-720p"
"WEBRip-720p"
];
}
{ name = "HDTV-720p"; }
];
}
];
custom_formats = [
# DV (w/o HDR fallback) - block releases with DV that lack HDR10 fallback
{
trash_ids = [ "923b6abef9b17f937fab56cfcf89e1f1" ];
assign_scores_to = [
{ name = "Remux + WEB 2160p"; }
];
}
# Upscaled - block AI upscales and other upscaled-to-2160p releases
# Upscaled
{
trash_ids = [ "bfd8eb01832d646a0a89c4deb46f8564" ];
assign_scores_to = [
@@ -118,74 +99,97 @@ in
}
];
}
# x265 (HD) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "dc98083864ea246d05a42df0d05f81cc" ];
assign_scores_to = [
{
name = "Remux + WEB 2160p";
score = 0;
}
];
}
# x265 (no HDR/DV) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "839bea857ed2c0a8e084f3cbdbd65ecb" ];
assign_scores_to = [
{
name = "Remux + WEB 2160p";
score = 0;
}
];
}
# Codec ranking: AV1 (20) > HEVC (10) > AVC (0)
#
# Positive scores only -- nothing drops below min_format_score.
# AVC stays at 0 implicitly (no custom format adds or removes score).
{
trash_ids = [ "cae4ca30163749b891686f95532519bd" ]; # AV1
assign_scores_to = [
{
name = "Remux + WEB 2160p";
score = 20;
}
];
}
{
trash_ids = [ "9170d55c319f4fe40da8711ba9d8050d" ]; # x265
assign_scores_to = [
{
name = "Remux + WEB 2160p";
score = 10;
}
];
}
];
};
sonarr.series = {
base_url = "http://localhost:${builtins.toString service_configs.ports.private.sonarr.port}";
# Recyclarr is the sole authority for custom formats and scores.
# Overwrite any manually-created CFs and delete stale ones.
replace_existing_custom_formats = true;
delete_old_custom_formats = true;
include = [
{ template = "sonarr-quality-definition-series"; }
{ template = "sonarr-v4-quality-profile-web-2160p"; }
{ template = "sonarr-v4-custom-formats-web-2160p"; }
];
# Group WEB 2160p with 1080p in the same quality tier so custom
# format scores -- not quality ranking -- decide the winner.
# Native 4K with HDR/DV from good release groups scores high and
# wins; AI upscales get -10000 from the Upscaled CF and are
# blocked by min_format_score. Untagged upscales from unknown
# groups (score ~0) lose to well-scored 1080p (Tier 01 = +1750).
quality_profiles = [
{
name = "WEB-2160p";
min_format_score = 0;
reset_unmatched_scores.enabled = true;
reset_unmatched_scores = {
enabled = true;
};
upgrade = {
allowed = true;
until_quality = "WEB/Bluray";
until_quality = "WEB 2160p";
until_score = 10000;
};
quality_sort = "top";
qualities = [
{
name = "WEB/Bluray";
name = "WEB 2160p";
qualities = [
"WEBDL-2160p"
"WEBRip-2160p"
"Bluray-1080p Remux"
"Bluray-1080p"
];
}
{ name = "Bluray-1080p Remux"; }
{ name = "Bluray-1080p"; }
{
name = "WEB 1080p";
qualities = [
"WEBDL-1080p"
"WEBRip-1080p"
];
}
{ name = "HDTV-1080p"; }
{ name = "Bluray-720p"; }
{
name = "WEB 720p";
qualities = [
"WEBDL-720p"
"WEBRip-720p"
];
}
{ name = "HDTV-720p"; }
];
}
];
custom_formats = [
# DV (w/o HDR fallback) - block releases with DV that lack HDR10 fallback
{
trash_ids = [ "9b27ab6498ec0f31a3353992e19434ca" ];
assign_scores_to = [
{ name = "WEB-2160p"; }
];
}
# Upscaled - block AI upscales and other upscaled-to-2160p releases
# Upscaled
{
trash_ids = [ "23297a736ca77c0fc8e70f8edd7ee56c" ];
assign_scores_to = [
@@ -195,24 +199,56 @@ in
}
];
}
# x265 (HD) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "47435ece6b99a0b477caf360e79ba0bb" ];
assign_scores_to = [
{
name = "WEB-2160p";
score = 0;
}
];
}
# x265 (no HDR/DV) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "9b64dff695c2115facf1b6ea59c9bd07" ];
assign_scores_to = [
{
name = "WEB-2160p";
score = 0;
}
];
}
# Codec ranking: AV1 (20) > HEVC (10) > AVC (0)
#
# Positive scores only -- nothing drops below min_format_score.
# AVC stays at 0 implicitly (no custom format adds or removes score).
{
trash_ids = [ "15a05bc7c1a36e2b57fd628f8977e2fc" ]; # AV1
assign_scores_to = [
{
name = "WEB-2160p";
score = 20;
}
];
}
{
trash_ids = [ "c9eafd50846d299b862ca9bb6ea91950" ]; # x265
assign_scores_to = [
{
name = "WEB-2160p";
score = 10;
}
];
}
];
};
};
};
# Trigger immediate sync on deploy when recyclarr config changes.
# restartTriggers on the oneshot service are unreliable (systemd may
# no-op a restart of an inactive oneshot). Instead, embed a config
# hash in the timer unit -- NixOS restarts changed timers reliably,
# and OnActiveSec fires the sync within seconds.
systemd.timers.recyclarr = {
timerConfig.OnActiveSec = "5s";
unitConfig.X-ConfigHash = builtins.hashString "sha256" (
builtins.toJSON config.services.recyclarr.configuration
);
};
# Re-sync immediately on deploy when the recyclarr config changes
systemd.services.recyclarr = {
restartTriggers = [ (builtins.toJSON config.services.recyclarr.configuration) ];
after = [
"network-online.target"
"radarr.service"

View File

@@ -16,11 +16,6 @@
(lib.serviceFilePerms "sonarr" [
"Z ${service_configs.sonarr.dataDir} 0700 ${config.services.sonarr.user} ${config.services.sonarr.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "sonarr";
port = service_configs.ports.private.sonarr.port;
auth = true;
})
];
systemd.tmpfiles.rules = [
@@ -36,6 +31,11 @@
settings.update.mechanism = "external";
};
services.caddy.virtualHosts."sonarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.sonarr.port}
'';
users.users.${config.services.sonarr.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -5,66 +5,9 @@
lib,
...
}:
let
prowlarrPort = toString service_configs.ports.private.prowlarr.port;
sonarrPort = toString service_configs.ports.private.sonarr.port;
radarrPort = toString service_configs.ports.private.radarr.port;
bitmagnetPort = toString service_configs.ports.private.bitmagnet.port;
bridgeAddr = config.vpnNamespaces.wg.bridgeAddress;
prowlarrConfigXml = "${service_configs.prowlarr.dataDir}/config.xml";
sonarrConfigXml = "${service_configs.sonarr.dataDir}/config.xml";
radarrConfigXml = "${service_configs.radarr.dataDir}/config.xml";
curl = "${pkgs.curl}/bin/curl";
jq = "${pkgs.jq}/bin/jq";
# Clears the escalating failure backoff for the Bitmagnet indexer across
# Prowlarr, Sonarr, and Radarr so searches resume immediately after
# Bitmagnet restarts instead of waiting hours for disable timers to expire.
recoveryScript = pkgs.writeShellScript "prowlarr-bitmagnet-recovery" ''
set -euo pipefail
wait_for() {
for _ in $(seq 1 "$2"); do
${curl} -sf --max-time 5 "$1" > /dev/null && return 0
sleep 5
done
echo "$1 not reachable, aborting" >&2; exit 1
}
# Test a Bitmagnet-named indexer to clear its failure status.
# A successful test triggers RecordSuccess() which resets the backoff.
clear_status() {
local key indexer
key=$(${lib.extractArrApiKey ''"$3"''}) || return 0
indexer=$(${curl} -sf --max-time 10 \
-H "X-Api-Key: $key" "$2/api/$1/indexer" | \
${jq} 'first(.[] | select(.name | test("Bitmagnet"; "i")))') || return 0
[ -n "$indexer" ] && [ "$indexer" != "null" ] || return 0
${curl} -sf --max-time 30 \
-H "X-Api-Key: $key" -H "Content-Type: application/json" \
-X POST "$2/api/$1/indexer/test" -d "$indexer" > /dev/null
}
wait_for "http://localhost:${bitmagnetPort}" 12
wait_for "http://localhost:${prowlarrPort}/ping" 6
# Prowlarr first downstream apps route searches through it.
clear_status v1 "http://localhost:${prowlarrPort}" "${prowlarrConfigXml}" || true
clear_status v3 "http://${bridgeAddr}:${sonarrPort}" "${sonarrConfigXml}" || true
clear_status v3 "http://${bridgeAddr}:${radarrPort}" "${radarrConfigXml}" || true
'';
in
{
imports = [
(lib.vpnNamespaceOpenPort service_configs.ports.private.bitmagnet.port "bitmagnet")
(lib.mkCaddyReverseProxy {
subdomain = "bitmagnet";
port = service_configs.ports.private.bitmagnet.port;
auth = true;
vpn = true;
})
];
services.bitmagnet = {
@@ -76,38 +19,13 @@ in
};
http_server = {
# TODO! make issue about this being a string and not a `port` type
port = ":" + (toString service_configs.ports.private.bitmagnet.port);
port = ":" + (builtins.toString service_configs.ports.private.bitmagnet.port);
};
};
};
# The upstream default (Restart=on-failure) leaves Bitmagnet dead after
# clean exits (e.g. systemd stop during deploy). Always restart it.
systemd.services.bitmagnet.serviceConfig = {
Restart = lib.mkForce "always";
RestartSec = 10;
};
# After Bitmagnet restarts, clear the escalating failure backoff across
# Prowlarr, Sonarr, and Radarr so searches resume immediately instead of
# waiting hours for the disable timers to expire.
systemd.services.prowlarr-bitmagnet-recovery = {
description = "Clear Prowlarr/Sonarr/Radarr failure status for Bitmagnet indexer";
after = [
"bitmagnet.service"
"prowlarr.service"
"sonarr.service"
"radarr.service"
];
bindsTo = [ "bitmagnet.service" ];
wantedBy = [ "bitmagnet.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = recoveryScript;
# Same VPN namespace as Bitmagnet and Prowlarr.
NetworkNamespacePath = "/run/netns/wg";
};
};
services.caddy.virtualHosts."bitmagnet.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.bitmagnet.port}
'';
}

View File

@@ -13,10 +13,6 @@
(lib.serviceFilePerms "vaultwarden" [
"Z ${service_configs.vaultwarden.path} 0700 vaultwarden vaultwarden"
])
(lib.mkFail2banJail {
name = "vaultwarden";
failregex = ''^.*Username or password is incorrect\. Try again\. IP: <HOST>\..*$'';
})
];
services.vaultwarden = {
@@ -42,4 +38,18 @@
}
'';
# Protect Vaultwarden login from brute force attacks
services.fail2ban.jails.vaultwarden = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
failregex = ''^.*Username or password is incorrect\. Try again\. IP: <HOST>\..*$'';
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=vaultwarden.service";
};
};
}

View File

@@ -56,19 +56,9 @@ in
enable = true;
email = "titaniumtown@proton.me";
# Build with Njalla DNS provider for DNS-01 ACME challenges (wildcard certs)
package = pkgs.caddy.withPlugins {
plugins = [ "github.com/caddy-dns/njalla@v0.0.0-20250823094507-f709141f1fe6" ];
hash = "sha256-rrOAR6noTDpV/I/hZXxhz0OXVJKu0mFQRq87RUrpmzw=";
};
# Enable on-demand TLS for old domain redirects
# Certs are issued dynamically when subdomains are accessed
globalConfig = ''
# Wildcard cert for *.${newDomain} via DNS-01 challenge
acme_dns njalla {
api_token {env.NJALLA_API_TOKEN}
}
# On-demand TLS for old domain redirects
on_demand_tls {
ask http://localhost:9123/check
}
@@ -116,9 +106,6 @@ in
};
};
# Inject Njalla API token for DNS-01 challenge
systemd.services.caddy.serviceConfig.EnvironmentFile = config.age.secrets.njalla-api-token-env.path;
systemd.tmpfiles.rules = [
"d ${config.services.caddy.dataDir} 700 ${config.services.caddy.user} ${config.services.caddy.group}"
];

View File

@@ -1,7 +0,0 @@
{
imports = [
./caddy.nix
# KEEP UNTIL 2028
./caddy_senior_project.nix
];
}

View File

@@ -1,27 +0,0 @@
{
config,
lib,
...
}:
{
services.ddns-updater = {
enable = true;
environment = {
PERIOD = "5m";
# ddns-updater reads config from this path at runtime
CONFIG_FILEPATH = config.age.secrets.ddns-updater-config.path;
};
};
users.users.ddns-updater = {
isSystemUser = true;
group = "ddns-updater";
};
users.groups.ddns-updater = { };
systemd.services.ddns-updater.serviceConfig = {
DynamicUser = lib.mkForce false;
User = "ddns-updater";
Group = "ddns-updater";
};
}

View File

@@ -0,0 +1,44 @@
#!/usr/bin/env bash
# Collects ZFS pool utilization and boot partition usage for Prometheus textfile collector
set -euo pipefail
TEXTFILE="${TEXTFILE:?TEXTFILE env required}"
TMP="${TEXTFILE}.$$"
{
echo '# HELP zpool_size_bytes Total size of ZFS pool in bytes'
echo '# TYPE zpool_size_bytes gauge'
echo '# HELP zpool_used_bytes Used space in ZFS pool in bytes'
echo '# TYPE zpool_used_bytes gauge'
echo '# HELP zpool_free_bytes Free space in ZFS pool in bytes'
echo '# TYPE zpool_free_bytes gauge'
# -Hp: scripting mode, parseable, bytes
zpool list -Hp -o name,size,alloc,free | while IFS=$'\t' read -r name size alloc free; do
echo "zpool_size_bytes{pool=\"${name}\"} ${size}"
echo "zpool_used_bytes{pool=\"${name}\"} ${alloc}"
echo "zpool_free_bytes{pool=\"${name}\"} ${free}"
done
echo '# HELP partition_size_bytes Total size of partition in bytes'
echo '# TYPE partition_size_bytes gauge'
echo '# HELP partition_used_bytes Used space on partition in bytes'
echo '# TYPE partition_used_bytes gauge'
echo '# HELP partition_free_bytes Free space on partition in bytes'
echo '# TYPE partition_free_bytes gauge'
# Boot drive partitions: /boot (ESP), /persistent, /nix
# Use df with 1K blocks and convert to bytes
for mount in /boot /persistent /nix; do
if mountpoint -q "$mount" 2>/dev/null; then
read -r size used avail _ <<< $(df -k --output=size,used,avail "$mount" | tail -1)
size_b=$((size * 1024))
used_b=$((used * 1024))
avail_b=$((avail * 1024))
echo "partition_size_bytes{mount=\"${mount}\"} ${size_b}"
echo "partition_used_bytes{mount=\"${mount}\"} ${used_b}"
echo "partition_free_bytes{mount=\"${mount}\"} ${avail_b}"
fi
done
} > "$TMP"
mv "$TMP" "$TEXTFILE"

View File

@@ -6,13 +6,6 @@
...
}:
{
imports = [
(lib.mkCaddyReverseProxy {
domain = service_configs.firefox_syncserver.domain;
port = service_configs.ports.private.firefox_syncserver.port;
})
];
services.firefox-syncserver = {
enable = true;
database = {
@@ -40,4 +33,7 @@
];
};
services.caddy.virtualHosts."${service_configs.firefox_syncserver.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.firefox_syncserver.port}
'';
}

View File

@@ -29,17 +29,13 @@
settings = {
runner = {
capacity = 1;
timeout = "6h";
timeout = "3h";
};
};
};
# Override DynamicUser to use our static gitea-runner user, and ensure
# the runner doesn't start before the co-located gitea instance is ready
# (upstream can't assume locality, so this dependency is ours to add).
# Override DynamicUser to use our static gitea-runner user
systemd.services."gitea-runner-muffin" = {
requires = [ "gitea.service" ];
after = [ "gitea.service" ];
serviceConfig = {
DynamicUser = lib.mkForce false;
User = "gitea-runner";

View File

@@ -11,14 +11,6 @@
(lib.serviceFilePerms "gitea" [
"Z ${config.services.gitea.stateDir} 0700 ${config.services.gitea.user} ${config.services.gitea.group}"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.gitea.domain;
port = service_configs.ports.private.gitea.port;
})
(lib.mkFail2banJail {
name = "gitea";
failregex = "^.*Failed authentication attempt for .* from <HOST>:.*$";
})
];
services.gitea = {
@@ -49,6 +41,10 @@
};
};
services.caddy.virtualHosts."${service_configs.gitea.domain}".extraConfig = ''
reverse_proxy :${builtins.toString config.services.gitea.settings.server.HTTP_PORT}
'';
services.postgresql = {
ensureDatabases = [ config.services.gitea.user ];
ensureUsers = [
@@ -62,4 +58,18 @@
services.openssh.settings.AllowUsers = [ config.services.gitea.user ];
# Protect Gitea login from brute force attacks
services.fail2ban.jails.gitea = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
failregex = "^.*Failed authentication attempt for .* from <HOST>:.*$";
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=gitea.service";
};
};
}

View File

@@ -1,10 +0,0 @@
{
imports = [
./grafana.nix
./prometheus.nix
./dashboard.nix
./exporters.nix
./jellyfin-annotations.nix
./zfs-scrub-annotations.nix
];
}

View File

@@ -1,112 +0,0 @@
{
config,
pkgs,
inputs,
service_configs,
lib,
...
}:
let
jellyfinExporterPort = service_configs.ports.private.jellyfin_exporter.port;
qbitExporterPort = service_configs.ports.private.qbittorrent_exporter.port;
igpuExporterPort = service_configs.ports.private.igpu_exporter.port;
in
{
# -- Jellyfin Prometheus Exporter --
# Replaces custom jellyfin-collector.nix textfile timer.
# Exposes per-session metrics (jellyfin_now_playing_state) and library stats.
systemd.services.jellyfin-exporter =
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable)
{
description = "Prometheus exporter for Jellyfin";
after = [
"network.target"
"jellyfin.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = lib.getExe (
pkgs.writeShellApplication {
name = "jellyfin-exporter-wrapper";
runtimeInputs = [ pkgs.jellyfin-exporter ];
text = ''
exec jellyfin_exporter \
--jellyfin.address=http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port} \
--jellyfin.token="$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")" \
--web.listen-address=127.0.0.1:${toString jellyfinExporterPort}
'';
}
);
Restart = "on-failure";
RestartSec = "10s";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
MemoryDenyWriteExecute = true;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
# -- qBittorrent Prometheus Exporter --
# Replaces custom qbittorrent-collector.nix textfile timer.
# Exposes per-torrent metrics (qbit_dlspeed, qbit_upspeed) and aggregate stats.
# qBittorrent runs in a VPN namespace; the exporter reaches it via namespace address.
systemd.services.qbittorrent-exporter =
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable)
{
description = "Prometheus exporter for qBittorrent";
after = [
"network.target"
"qbittorrent.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart =
lib.getExe' inputs.qbittorrent-metrics-exporter.packages.${pkgs.system}.default
"qbittorrent-metrics-exporter";
Restart = "on-failure";
RestartSec = "10s";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
};
environment = {
HOST = "127.0.0.1";
PORT = toString qbitExporterPort;
SCRAPE_INTERVAL = "15";
BACKEND = "in-memory";
# qBittorrent has AuthSubnetWhitelist=0.0.0.0/0, so no real password needed.
# The exporter still expects the env var to be set.
QBITTORRENT_PASSWORD = "unused";
QBITTORRENT_USERNAME = "admin";
TORRENT_HOSTS = "qbit:main=http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}|http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}";
RUST_LOG = "warn";
};
};
# -- Intel GPU Prometheus Exporter --
# Replaces custom intel-gpu-collector.nix + intel-gpu-collector.py textfile timer.
# Exposes engine busy%, frequency, and RC6 metrics via /metrics.
# Requires privileged access to GPU debug interfaces (intel_gpu_top).
systemd.services.igpu-exporter = lib.mkIf config.services.grafana.enable {
description = "Prometheus exporter for Intel integrated GPU";
wantedBy = [ "multi-user.target" ];
path = [ pkgs.intel-gpu-tools ];
serviceConfig = {
ExecStart = lib.getExe pkgs.igpu-exporter;
Restart = "on-failure";
RestartSec = "10s";
# intel_gpu_top requires root-level access to GPU debug interfaces
ProtectHome = true;
PrivateTmp = true;
};
environment = {
PORT = toString igpuExporterPort;
REFRESH_PERIOD_MS = "30000";
};
};
}

View File

@@ -1,103 +0,0 @@
{
config,
service_configs,
lib,
...
}:
{
imports = [
(lib.serviceMountWithZpool "grafana" service_configs.zpool_ssds [
service_configs.grafana.dir
])
(lib.serviceFilePerms "grafana" [
"Z ${service_configs.grafana.dir} 0700 grafana grafana"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.grafana.domain;
port = service_configs.ports.private.grafana.port;
auth = true;
})
];
services.grafana = {
enable = true;
dataDir = service_configs.grafana.dir;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = service_configs.ports.private.grafana.port;
domain = service_configs.grafana.domain;
root_url = "https://${service_configs.grafana.domain}";
};
database = {
type = "postgres";
host = service_configs.postgres.socket;
user = "grafana";
};
"auth.anonymous" = {
enabled = true;
org_role = "Admin";
};
"auth.basic".enabled = false;
"auth".disable_login_form = true;
analytics.reporting_enabled = false;
feature_toggles.enable = "dataConnectionsConsole=false";
users.default_theme = "dark";
# Disable unused built-in integrations
alerting.enabled = false;
"unified_alerting".enabled = false;
explore.enabled = false;
news.news_feed_enabled = false;
plugins = {
enable_alpha = false;
plugin_admin_enabled = false;
};
};
provision = {
datasources.settings = {
apiVersion = 1;
datasources = [
{
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:${toString service_configs.ports.private.prometheus.port}";
access = "proxy";
isDefault = true;
editable = false;
uid = "prometheus";
}
];
};
dashboards.settings.providers = [
{
name = "system";
type = "file";
options.path = "/etc/grafana-dashboards";
disableDeletion = true;
updateIntervalSeconds = 60;
}
];
};
};
services.postgresql = {
ensureDatabases = [ "grafana" ];
ensureUsers = [
{
name = "grafana";
ensureDBOwnership = true;
ensureClauses.login = true;
}
];
};
}

View File

@@ -1,18 +0,0 @@
{
config,
service_configs,
lib,
...
}:
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) (
lib.mkGrafanaAnnotationService {
name = "jellyfin";
description = "Jellyfin stream annotation service for Grafana";
script = ./jellyfin-annotations.py;
environment = {
JELLYFIN_URL = "http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}";
POLL_INTERVAL = "30";
};
loadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
}
)

View File

@@ -1,110 +0,0 @@
{
service_configs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
in
{
imports = [
(lib.serviceMountWithZpool "prometheus" service_configs.zpool_ssds [
"/var/lib/prometheus"
])
(lib.serviceFilePerms "prometheus" [
"Z /var/lib/prometheus 0700 prometheus prometheus"
])
];
services.prometheus = {
enable = true;
port = service_configs.ports.private.prometheus.port;
listenAddress = "127.0.0.1";
stateDir = "prometheus";
retentionTime = "0d"; # 0 disables time-based retention (keep forever)
exporters = {
node = {
enable = true;
port = service_configs.ports.private.prometheus_node.port;
listenAddress = "127.0.0.1";
enabledCollectors = [
"hwmon"
"systemd"
"textfile"
];
extraFlags = [
"--collector.textfile.directory=${textfileDir}"
];
};
apcupsd = {
enable = true;
port = service_configs.ports.private.prometheus_apcupsd.port;
listenAddress = "127.0.0.1";
apcupsdAddress = "127.0.0.1:3551";
};
zfs = {
enable = true;
port = service_configs.ports.private.prometheus_zfs.port;
listenAddress = "127.0.0.1";
};
};
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus.port}" ]; }
];
}
{
job_name = "node";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_node.port}" ]; }
];
}
{
job_name = "apcupsd";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; }
];
}
{
job_name = "llama-cpp";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; }
];
}
{
job_name = "jellyfin";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.jellyfin_exporter.port}" ]; }
];
}
{
job_name = "qbittorrent";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.qbittorrent_exporter.port}" ]; }
];
}
{
job_name = "igpu";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
];
}
{
job_name = "zfs";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_zfs.port}" ]; }
];
}
];
};
systemd.tmpfiles.rules = [
"d ${textfileDir} 0755 root root -"
];
}

View File

@@ -1,38 +0,0 @@
{
config,
lib,
service_configs,
...
}:
{
imports = [
(lib.serviceFilePerms "harmonia" [
"Z /run/agenix/harmonia-sign-key 0400 harmonia harmonia"
])
];
services.harmonia = {
enable = true;
signKeyPaths = [ config.age.secrets.harmonia-sign-key.path ];
settings.bind = "127.0.0.1:${toString service_configs.ports.private.harmonia.port}";
};
# serve latest deploy store paths (unauthenticated — just a path string)
# CI writes to /var/lib/dotfiles-deploy/<hostname> after building
services.caddy.virtualHosts."nix-cache.${service_configs.https.domain}".extraConfig = ''
handle_path /deploy/* {
root * /var/lib/dotfiles-deploy
file_server
}
handle {
import ${config.age.secrets.nix-cache-auth.path}
reverse_proxy :${toString service_configs.ports.private.harmonia.port}
}
'';
# directory for CI to record latest deploy store paths
systemd.tmpfiles.rules = [
"d /var/lib/dotfiles-deploy 0755 gitea-runner gitea-runner"
];
}

View File

@@ -16,15 +16,6 @@
(lib.serviceFilePerms "immich-server" [
"Z ${config.services.immich.mediaLocation} 0770 ${config.services.immich.user} ${config.services.immich.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "immich";
port = service_configs.ports.private.immich.port;
})
(lib.mkFail2banJail {
name = "immich";
unitName = "immich-server.service";
failregex = "^.*Failed login attempt for user .* from ip address <HOST>.*$";
})
];
services.immich = {
@@ -38,6 +29,10 @@
};
};
services.caddy.virtualHosts."immich.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${builtins.toString config.services.immich.port}
'';
environment.systemPackages = with pkgs; [
immich-go
];
@@ -47,4 +42,18 @@
"render"
];
# Protect Immich login from brute force attacks
services.fail2ban.jails.immich = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
failregex = "^.*Failed login attempt for user .* from ip address <HOST>.*$";
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=immich-server.service";
};
};
}

View File

@@ -0,0 +1,107 @@
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
import time
TEXTFILE = os.environ.get(
"TEXTFILE",
"/var/lib/prometheus-node-exporter-textfiles/intel-gpu.prom",
)
def read_one_sample():
try:
proc = subprocess.Popen(
["intel_gpu_top", "-J", "-s", "1000"],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
buf = b""
depth = 0
in_obj = False
deadline = time.monotonic() + 8.0
try:
while time.monotonic() < deadline:
byte = proc.stdout.read(1)
if not byte:
break
if byte == b"{":
in_obj = True
depth += 1
if in_obj:
buf += byte
if in_obj and byte == b"}":
depth -= 1
if depth == 0:
break
finally:
proc.terminate()
proc.wait()
if not buf:
return None
try:
return json.loads(buf)
except json.JSONDecodeError:
print("Malformed JSON from intel_gpu_top", file=sys.stderr)
return None
except Exception as e:
print(f"intel_gpu_top unavailable: {e}", file=sys.stderr)
return None
def write_empty_metrics():
"""Write zero-valued metrics so Prometheus doesn't see stale data."""
lines = [
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
"# TYPE intel_gpu_engine_busy_percent gauge",
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
"# TYPE intel_gpu_frequency_mhz gauge",
"intel_gpu_frequency_mhz 0",
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
"# TYPE intel_gpu_rc6_percent gauge",
"intel_gpu_rc6_percent 0",
]
tmp = TEXTFILE + ".tmp"
with open(tmp, "w") as f:
f.write("\n".join(lines) + "\n")
os.replace(tmp, TEXTFILE)
def write_metrics(sample):
lines = [
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
"# TYPE intel_gpu_engine_busy_percent gauge",
]
for engine, data in sample.get("engines", {}).items():
lines.append(
f'intel_gpu_engine_busy_percent{{engine="{engine}"}} {data.get("busy", 0)}'
)
freq = sample.get("frequency", {})
lines += [
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
"# TYPE intel_gpu_frequency_mhz gauge",
f'intel_gpu_frequency_mhz {freq.get("actual", 0)}',
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
"# TYPE intel_gpu_rc6_percent gauge",
f'intel_gpu_rc6_percent {sample.get("rc6", {}).get("value", 0)}',
]
tmp = TEXTFILE + ".tmp"
with open(tmp, "w") as f:
f.write("\n".join(lines) + "\n")
os.replace(tmp, TEXTFILE)
def main():
sample = read_one_sample()
if sample is None:
print("Failed to read intel_gpu_top sample", file=sys.stderr)
write_empty_metrics()
sys.exit(0)
write_metrics(sample)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,40 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
{
systemd.services.jellyfin-annotations = {
description = "Jellyfin stream annotation service for Grafana";
after = [
"network.target"
"grafana.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./jellyfin-annotations.py}";
Restart = "always";
RestartSec = "10s";
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
DynamicUser = true;
StateDirectory = "jellyfin-annotations";
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
};
environment = {
JELLYFIN_URL = "http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}";
GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
STATE_FILE = "/var/lib/jellyfin-annotations/state.json";
POLL_INTERVAL = "30";
};
};
}

View File

@@ -0,0 +1,57 @@
{
pkgs,
service_configs,
config,
...
}:
{
systemd.services."jellyfin-qbittorrent-monitor" = {
description = "Monitor Jellyfin streaming and control qBittorrent rate limits";
after = [
"network.target"
"jellyfin.service"
"qbittorrent.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "simple";
ExecStart = pkgs.writeShellScript "jellyfin-monitor-start" ''
export JELLYFIN_API_KEY=$(cat $CREDENTIALS_DIRECTORY/jellyfin-api-key)
exec ${
pkgs.python3.withPackages (ps: with ps; [ requests ])
}/bin/python ${./jellyfin-qbittorrent-monitor.py}
'';
Restart = "always";
RestartSec = "10s";
# Security hardening
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
MemoryDenyWriteExecute = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
RemoveIPC = true;
# Load credentials from agenix secrets
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
environment = {
JELLYFIN_URL = "http://localhost:${builtins.toString service_configs.ports.private.jellyfin.port}";
QBITTORRENT_URL = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.torrent.port}";
CHECK_INTERVAL = "30";
# Bandwidth budget configuration
TOTAL_BANDWIDTH_BUDGET = "30000000"; # 30 Mbps in bits per second
SERVICE_BUFFER = "5000000"; # 5 Mbps reserved for other services (bps)
DEFAULT_STREAM_BITRATE = "10000000"; # 10 Mbps fallback when bitrate unknown (bps)
MIN_TORRENT_SPEED = "100"; # KB/s - below this, pause torrents instead
STREAM_BITRATE_HEADROOM = "1.1"; # multiplier per stream for bitrate fluctuations
};
};
}

View File

@@ -7,8 +7,6 @@ import sys
import signal
import json
import ipaddress
import threading
from http.server import HTTPServer, BaseHTTPRequestHandler
logging.basicConfig(
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -36,8 +34,6 @@ class JellyfinQBittorrentMonitor:
default_stream_bitrate=10000000,
min_torrent_speed=100,
stream_bitrate_headroom=1.1,
webhook_port=0,
webhook_bind="127.0.0.1",
):
self.jellyfin_url = jellyfin_url
self.qbittorrent_url = qbittorrent_url
@@ -61,12 +57,6 @@ class JellyfinQBittorrentMonitor:
self.streaming_stop_delay = streaming_stop_delay
self.last_state_change = 0
# Webhook receiver: allows Jellyfin to push events instead of waiting for the poll
self.webhook_port = webhook_port
self.webhook_bind = webhook_bind
self.wake_event = threading.Event()
self.webhook_server = None
# Local network ranges (RFC 1918 private networks + localhost)
self.local_networks = [
ipaddress.ip_network("10.0.0.0/8"),
@@ -89,56 +79,9 @@ class JellyfinQBittorrentMonitor:
def signal_handler(self, signum, frame):
logger.info("Received shutdown signal, cleaning up...")
self.running = False
if self.webhook_server is not None:
# shutdown() blocks until serve_forever returns; run from a thread so we don't deadlock
threading.Thread(target=self.webhook_server.shutdown, daemon=True).start()
self.restore_normal_limits()
sys.exit(0)
def wake(self) -> None:
"""Signal the main loop to re-evaluate state immediately."""
self.wake_event.set()
def sleep_or_wake(self, seconds: float) -> None:
"""Wait up to `seconds`, returning early if a webhook wakes the loop."""
self.wake_event.wait(seconds)
self.wake_event.clear()
def start_webhook_server(self) -> None:
"""Start a background HTTP server that wakes the monitor on any POST."""
if not self.webhook_port:
return
monitor = self
class WebhookHandler(BaseHTTPRequestHandler):
def do_POST(self): # noqa: N802
length = int(self.headers.get("Content-Length", "0") or "0")
body = self.rfile.read(min(length, 65536)) if length else b""
event = "unknown"
try:
if body:
event = json.loads(body).get("NotificationType", "unknown")
except (json.JSONDecodeError, ValueError):
pass
logger.info(f"Webhook received: {event}")
self.send_response(204)
self.end_headers()
monitor.wake()
def log_message(self, format, *args):
return # suppress default access log
self.webhook_server = HTTPServer(
(self.webhook_bind, self.webhook_port), WebhookHandler
)
threading.Thread(
target=self.webhook_server.serve_forever, daemon=True, name="webhook-server"
).start()
logger.info(
f"Webhook receiver listening on http://{self.webhook_bind}:{self.webhook_port}"
)
def check_jellyfin_sessions(self) -> list[dict]:
headers = (
{"X-Emby-Token": self.jellyfin_api_key} if self.jellyfin_api_key else {}
@@ -354,14 +297,10 @@ class JellyfinQBittorrentMonitor:
logger.info(f"Default stream bitrate: {self.default_stream_bitrate} bps")
logger.info(f"Minimum torrent speed: {self.min_torrent_speed} KB/s")
logger.info(f"Stream bitrate headroom: {self.stream_bitrate_headroom}x")
if self.webhook_port:
logger.info(f"Webhook receiver: {self.webhook_bind}:{self.webhook_port}")
signal.signal(signal.SIGINT, self.signal_handler)
signal.signal(signal.SIGTERM, self.signal_handler)
self.start_webhook_server()
while self.running:
try:
self.sync_qbittorrent_state()
@@ -370,7 +309,7 @@ class JellyfinQBittorrentMonitor:
active_streams = self.check_jellyfin_sessions()
except ServiceUnavailable:
logger.warning("Jellyfin unavailable, maintaining current state")
self.sleep_or_wake(self.check_interval)
time.sleep(self.check_interval)
continue
streaming_active = len(active_streams) > 0
@@ -455,13 +394,13 @@ class JellyfinQBittorrentMonitor:
self.current_state = desired_state
self.last_active_streams = active_streams
self.sleep_or_wake(self.check_interval)
time.sleep(self.check_interval)
except KeyboardInterrupt:
break
except Exception as e:
logger.error(f"Unexpected error in monitoring loop: {e}")
self.sleep_or_wake(self.check_interval)
time.sleep(self.check_interval)
self.restore_normal_limits()
logger.info("Monitor stopped")
@@ -482,8 +421,6 @@ if __name__ == "__main__":
default_stream_bitrate = int(os.getenv("DEFAULT_STREAM_BITRATE", "10000000"))
min_torrent_speed = int(os.getenv("MIN_TORRENT_SPEED", "100"))
stream_bitrate_headroom = float(os.getenv("STREAM_BITRATE_HEADROOM", "1.1"))
webhook_port = int(os.getenv("WEBHOOK_PORT", "0"))
webhook_bind = os.getenv("WEBHOOK_BIND", "127.0.0.1")
monitor = JellyfinQBittorrentMonitor(
jellyfin_url=jellyfin_url,
@@ -497,8 +434,6 @@ if __name__ == "__main__":
default_stream_bitrate=default_stream_bitrate,
min_torrent_speed=min_torrent_speed,
stream_bitrate_headroom=stream_bitrate_headroom,
webhook_port=webhook_port,
webhook_bind=webhook_bind,
)
monitor.run()

View File

@@ -26,14 +26,6 @@
services.caddy.virtualHosts."jellyfin.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.jellyfin.port} {
# Disable response buffering for streaming. Caddy's default partial
# buffering delays fMP4-HLS segments and direct-play responses where
# Content-Length is known (so auto-flush doesn't trigger).
flush_interval -1
transport http {
# Localhost: compression wastes CPU re-encoding already-compressed media.
compression off
}
header_up X-Real-IP {remote_host}
header_up X-Forwarded-For {remote_host}
header_up X-Forwarded-Proto {scheme}

View File

@@ -1,6 +0,0 @@
{
imports = [
./jellyfin.nix
./jellyfin-qbittorrent-monitor.nix
];
}

View File

@@ -1,127 +0,0 @@
{
pkgs,
service_configs,
config,
lib,
...
}:
let
webhookPlugin = import ./jellyfin-webhook-plugin.nix { inherit pkgs lib; };
jellyfinPort = service_configs.ports.private.jellyfin.port;
webhookPort = service_configs.ports.private.jellyfin_qbittorrent_monitor_webhook.port;
in
lib.mkIf config.services.jellyfin.enable {
# Materialise the Jellyfin Webhook plugin into Jellyfin's plugins dir before
# Jellyfin starts. Jellyfin rewrites meta.json at runtime, so a read-only
# nix-store symlink would EACCES -- we copy instead.
#
# `wantedBy = [ "jellyfin.service" ]` alone is insufficient on initial rollout:
# if jellyfin is already running at activation time, systemd won't start the
# oneshot until the next jellyfin restart. `restartTriggers` on jellyfin pinned
# to the plugin package + install script forces that restart whenever either
# changes, which invokes this unit via the `before`/`wantedBy` chain.
systemd.services.jellyfin-webhook-install = {
before = [ "jellyfin.service" ];
wantedBy = [ "jellyfin.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
User = config.services.jellyfin.user;
Group = config.services.jellyfin.group;
ExecStart = webhookPlugin.mkInstallScript {
pluginsDir = "${config.services.jellyfin.dataDir}/plugins";
};
};
};
systemd.services.jellyfin.restartTriggers = [
webhookPlugin.package
(webhookPlugin.mkInstallScript {
pluginsDir = "${config.services.jellyfin.dataDir}/plugins";
})
];
# After Jellyfin starts, POST the plugin configuration so the webhook
# targets the monitor's receiver. Idempotent; runs on every boot.
systemd.services.jellyfin-webhook-configure = {
after = [ "jellyfin.service" ];
wants = [ "jellyfin.service" ];
before = [ "jellyfin-qbittorrent-monitor.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
DynamicUser = true;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
ExecStart = webhookPlugin.mkConfigureScript {
jellyfinUrl = "http://127.0.0.1:${toString jellyfinPort}";
webhooks = [
{
name = "qBittorrent Monitor";
uri = "http://127.0.0.1:${toString webhookPort}/";
notificationTypes = [
"PlaybackStart"
"PlaybackProgress"
"PlaybackStop"
];
}
];
};
};
};
systemd.services."jellyfin-qbittorrent-monitor" = {
description = "Monitor Jellyfin streaming and control qBittorrent rate limits";
after = [
"network.target"
"jellyfin.service"
"qbittorrent.service"
"jellyfin-webhook-configure.service"
];
wants = [ "jellyfin-webhook-configure.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "simple";
ExecStart = pkgs.writeShellScript "jellyfin-monitor-start" ''
export JELLYFIN_API_KEY=$(cat $CREDENTIALS_DIRECTORY/jellyfin-api-key)
exec ${
pkgs.python3.withPackages (ps: with ps; [ requests ])
}/bin/python ${./jellyfin-qbittorrent-monitor.py}
'';
Restart = "always";
RestartSec = "10s";
# Security hardening
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
ProtectKernelTunables = true;
ProtectKernelModules = true;
ProtectControlGroups = true;
MemoryDenyWriteExecute = true;
RestrictRealtime = true;
RestrictSUIDSGID = true;
RemoveIPC = true;
# Load credentials from agenix secrets
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
environment = {
JELLYFIN_URL = "http://localhost:${builtins.toString jellyfinPort}";
QBITTORRENT_URL = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.torrent.port}";
CHECK_INTERVAL = "30";
# Bandwidth budget configuration
TOTAL_BANDWIDTH_BUDGET = "30000000"; # 30 Mbps in bits per second
SERVICE_BUFFER = "5000000"; # 5 Mbps reserved for other services (bps)
DEFAULT_STREAM_BITRATE = "10000000"; # 10 Mbps fallback when bitrate unknown (bps)
MIN_TORRENT_SPEED = "100"; # KB/s - below this, pause torrents instead
STREAM_BITRATE_HEADROOM = "1.1"; # multiplier per stream for bitrate fluctuations
# Webhook receiver: Jellyfin Webhook plugin POSTs events here to throttle immediately.
WEBHOOK_BIND = "127.0.0.1";
WEBHOOK_PORT = toString webhookPort;
};
};
}

View File

@@ -1,105 +0,0 @@
{ pkgs, lib }:
let
pluginVersion = "18.0.0.0";
# GUID from the plugin's meta.json; addresses it on /Plugins/<guid>/Configuration.
pluginGuid = "71552a5a-5c5c-4350-a2ae-ebe451a30173";
package = pkgs.stdenvNoCC.mkDerivation {
pname = "jellyfin-plugin-webhook";
version = pluginVersion;
src = pkgs.fetchurl {
url = "https://repo.jellyfin.org/files/plugin/webhook/webhook_${pluginVersion}.zip";
hash = "sha256-LFFojiPnBGl9KJ0xVyPBnCmatcaeVbllRwRkz5Z3dqI=";
};
nativeBuildInputs = [ pkgs.unzip ];
unpackPhase = ''unzip "$src"'';
installPhase = ''
mkdir -p "$out"
cp *.dll meta.json "$out/"
'';
dontFixup = true; # managed .NET assemblies must not be patched
};
# Minimal Handlebars template, base64 encoded. The monitor only needs the POST;
# NotificationType is parsed for the debug log line.
# Decoded: {"NotificationType":"{{NotificationType}}"}
templateB64 = "eyJOb3RpZmljYXRpb25UeXBlIjoie3tOb3RpZmljYXRpb25UeXBlfX0ifQ==";
# Build a PluginConfiguration payload accepted by Jellyfin's JSON deserializer.
# Each webhook is `{ name, uri, notificationTypes }`.
mkConfigJson =
webhooks:
builtins.toJSON {
ServerUrl = "";
GenericOptions = map (w: {
NotificationTypes = w.notificationTypes;
WebhookName = w.name;
WebhookUri = w.uri;
EnableMovies = true;
EnableEpisodes = true;
EnableVideos = true;
EnableWebhook = true;
Template = templateB64;
Headers = [
{
Key = "Content-Type";
Value = "application/json";
}
];
}) webhooks;
};
# Oneshot that POSTs the plugin configuration. Retries past the window
# between Jellyfin API health and plugin registration.
mkConfigureScript =
{ jellyfinUrl, webhooks }:
pkgs.writeShellScript "jellyfin-webhook-configure" ''
set -euo pipefail
export PATH=${
lib.makeBinPath [
pkgs.coreutils
pkgs.curl
]
}
URL=${lib.escapeShellArg jellyfinUrl}
AUTH="Authorization: MediaBrowser Token=\"$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")\""
CONFIG=${lib.escapeShellArg (mkConfigJson webhooks)}
for _ in $(seq 1 120); do curl -sf -o /dev/null "$URL/health" && break; sleep 1; done
curl -sf -o /dev/null "$URL/health"
for _ in $(seq 1 60); do
if printf '%s' "$CONFIG" | curl -sf -X POST \
-H "$AUTH" -H "Content-Type: application/json" --data-binary @- \
"$URL/Plugins/${pluginGuid}/Configuration"; then
echo "Jellyfin webhook plugin configured"; exit 0
fi
sleep 1
done
echo "Failed to configure webhook plugin" >&2; exit 1
'';
# Materialise a writable copy of the plugin. Jellyfin rewrites meta.json at
# runtime, so a read-only nix-store symlink would EACCES.
mkInstallScript =
{ pluginsDir }:
pkgs.writeShellScript "jellyfin-webhook-install" ''
set -euo pipefail
export PATH=${lib.makeBinPath [ pkgs.coreutils ]}
dst=${lib.escapeShellArg "${pluginsDir}/Webhook_${pluginVersion}"}
mkdir -p ${lib.escapeShellArg pluginsDir}
rm -rf "$dst" && mkdir -p "$dst"
cp ${package}/*.dll ${package}/meta.json "$dst/"
chmod u+rw "$dst"/*
'';
in
{
inherit
package
pluginVersion
pluginGuid
mkConfigureScript
mkInstallScript
;
}

View File

@@ -3,7 +3,7 @@
...
}:
let
keyFile = ../../secrets/livekit_keys;
keyFile = ../secrets/livekit_keys;
in
{
services.livekit = {

View File

@@ -0,0 +1,40 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
{
systemd.services.llama-cpp-annotations = {
description = "LLM request annotation service for Grafana";
after = [
"network.target"
"grafana.service"
"llama-cpp.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-annotations.py}";
Restart = "always";
RestartSec = "10s";
DynamicUser = true;
StateDirectory = "llama-cpp-annotations";
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
};
environment = {
LLAMA_CPP_URL = "http://127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}";
GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
STATE_FILE = "/var/lib/llama-cpp-annotations/state.json";
POLL_INTERVAL = "5";
};
};
}

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env python3
import json
import os
import sys
import time
import urllib.request
LLAMA_CPP_URL = os.environ.get("LLAMA_CPP_URL", "http://127.0.0.1:6688")
GRAFANA_URL = os.environ.get("GRAFANA_URL", "http://127.0.0.1:3000")
STATE_FILE = os.environ.get("STATE_FILE", "/var/lib/llama-cpp-annotations/state.json")
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "5"))
def http_json(method, url, body=None):
data = json.dumps(body).encode() if body is not None else None
req = urllib.request.Request(
url,
data=data,
headers={"Content-Type": "application/json", "Accept": "application/json"},
method=method,
)
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read())
def get_slots():
try:
req = urllib.request.Request(
f"{LLAMA_CPP_URL}/slots",
headers={"Accept": "application/json"},
)
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read())
except Exception as e:
print(f"Error fetching slots: {e}", file=sys.stderr)
return None
def load_state():
try:
with open(STATE_FILE) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {}
def save_state(state):
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
tmp = STATE_FILE + ".tmp"
with open(tmp, "w") as f:
json.dump(state, f)
os.replace(tmp, STATE_FILE)
def grafana_post(text, start_ms):
try:
result = http_json(
"POST",
f"{GRAFANA_URL}/api/annotations",
{"time": start_ms, "text": text, "tags": ["llama-cpp"]},
)
return result.get("id")
except Exception as e:
print(f"Error posting annotation: {e}", file=sys.stderr)
return None
def grafana_close(grafana_id, end_ms, text=None):
try:
body = {"timeEnd": end_ms}
if text is not None:
body["text"] = text
http_json(
"PATCH",
f"{GRAFANA_URL}/api/annotations/{grafana_id}",
body,
)
except Exception as e:
print(f"Error closing annotation {grafana_id}: {e}", file=sys.stderr)
def main():
state = load_state()
while True:
now_ms = int(time.time() * 1000)
slots = get_slots()
if slots is not None:
# Track which slots are currently processing
processing_ids = set()
for slot in slots:
slot_id = str(slot["id"])
is_processing = slot.get("is_processing", False)
if is_processing:
processing_ids.add(slot_id)
if slot_id not in state:
text = f"LLM request (slot {slot['id']})"
grafana_id = grafana_post(text, now_ms)
if grafana_id is not None:
state[slot_id] = {
"grafana_id": grafana_id,
"start_ms": now_ms,
}
save_state(state)
# Close annotations for slots that stopped processing
for slot_id in [k for k in state if k not in processing_ids]:
info = state.pop(slot_id)
# Try to get token count from the slot data
n_decoded = None
for slot in slots:
if str(slot["id"]) == slot_id:
n_decoded = slot.get("next_token", {}).get("n_decoded")
break
text = f"LLM request (slot {slot_id})"
if n_decoded is not None and n_decoded > 0:
text += f"{n_decoded} tokens"
grafana_close(info["grafana_id"], now_ms, text)
save_state(state)
time.sleep(POLL_INTERVAL)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,35 @@
{
pkgs,
service_configs,
...
}:
{
systemd.services.llama-cpp-xmrig-pause = {
description = "Pause xmrig while llama-cpp is processing requests";
after = [
"network.target"
"llama-cpp.service"
"xmrig.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-xmrig-pause.py}";
Restart = "always";
RestartSec = "10s";
NoNewPrivileges = true;
ProtectHome = true;
ProtectSystem = "strict";
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
};
environment = {
LLAMA_CPP_URL = "http://127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}";
POLL_INTERVAL = "3";
GRACE_PERIOD = "10";
};
};
}

View File

@@ -0,0 +1,91 @@
#!/usr/bin/env python3
"""
Pause xmrig while llama-cpp is processing inference requests.
Polls llama-cpp /slots endpoint. When any slot is busy, stops xmrig.
When all slots are idle for GRACE_PERIOD seconds, restarts xmrig.
If llama-cpp is unreachable, does nothing (leaves xmrig in its current state).
"""
import json
import os
import subprocess
import sys
import time
import urllib.request
LLAMA_CPP_URL = os.environ["LLAMA_CPP_URL"].rstrip("/")
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "10"))
def log(msg):
print(f"[llama-cpp-xmrig-pause] {msg}", file=sys.stderr, flush=True)
def get_slots():
"""Fetch /slots from llama-cpp. Returns list of slot dicts, or None on error."""
req = urllib.request.Request(f"{LLAMA_CPP_URL}/slots")
try:
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read())
except (urllib.error.URLError, OSError, json.JSONDecodeError, ValueError) as exc:
log(f"Cannot reach llama-cpp: {exc}")
return None
def any_slot_busy(slots):
return any(s.get("is_processing", False) for s in slots)
def systemctl(action, unit):
result = subprocess.run(
["systemctl", action, unit],
capture_output=True,
text=True,
)
if result.returncode != 0:
log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
return result.returncode == 0
def main():
xmrig_paused = False
idle_since = None # monotonic timestamp when slots first went idle
log(f"Starting: url={LLAMA_CPP_URL} poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s")
while True:
slots = get_slots()
if slots is None:
# llama-cpp unreachable — leave xmrig alone, reset idle timer
idle_since = None
time.sleep(POLL_INTERVAL)
continue
busy = any_slot_busy(slots)
if busy:
idle_since = None
if not xmrig_paused:
log("Slot busy — stopping xmrig")
if systemctl("stop", "xmrig"):
xmrig_paused = True
else:
# All slots idle
if xmrig_paused:
now = time.monotonic()
if idle_since is None:
idle_since = now
elif now - idle_since >= GRACE_PERIOD:
log("Slots idle past grace period — starting xmrig")
if systemctl("start", "xmrig"):
xmrig_paused = False
idle_since = None
time.sleep(POLL_INTERVAL)
if __name__ == "__main__":
main()

View File

@@ -4,100 +4,43 @@
config,
inputs,
lib,
utils,
...
}:
let
cfg = config.services.llama-cpp;
modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-IQ2_M.gguf";
modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
in
{
imports = [
(lib.mkCaddyReverseProxy {
subdomain = "llm";
port = service_configs.ports.private.llama_cpp.port;
})
];
services.llama-cpp = {
enable = true;
model = toString (
pkgs.fetchurl {
url = modelUrl;
sha256 = "17e869ac54d0e59faa884d5319fc55ad84cd866f50f0b3073fbb25accc875a23";
url = "https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-v2-GGUF/resolve/main/Qwen3.5-9B.Q4_K_M.gguf";
sha256 = "8fbbc7b04a7d4b052d14b7aa97c8bf2014d39ceca8c2baaa043711712ba71ccc";
}
);
port = service_configs.ports.private.llama_cpp.port;
host = "0.0.0.0";
package = lib.optimizePackage (
inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
patches = (old.patches or [ ]) ++ [
];
})
);
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.default);
extraFlags = [
"-ngl"
"999"
# "-ngl"
# "12"
"-c"
"65536"
"32768"
"-ctk"
"turbo3"
"q8_0"
"-ctv"
"turbo3"
"turbo4"
"-fa"
"on"
"--api-key-file"
config.age.secrets.llama-cpp-api-key.path
"--metrics"
"--alias"
modelAlias
"-b"
"4096"
"-ub"
"4096"
"--parallel"
"2"
];
};
# have to do this in order to get vulkan to work
systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
# ANV driver's turbo3 shader compilation exceeds the default 8 MB thread stack.
systemd.services.llama-cpp.serviceConfig.LimitSTACK = lib.mkForce "67108864"; # 64 MB soft+hard
# llama-server tries to create ~/.cache; ProtectSystem=strict + impermanent
# root make /root read-only. Give it a writable cache dir and point HOME there.
systemd.services.llama-cpp.serviceConfig.CacheDirectory = "llama-cpp";
systemd.services.llama-cpp.environment.HOME = "/var/cache/llama-cpp";
# turbo3 KV cache quantization runs a 14-barrier WHT butterfly per 128-element
# workgroup in SET_ROWS. With 4 concurrent slots and batch=4096, the combined
# GPU dispatch can exceed the default i915 CCS engine preempt timeout (7.5s),
# causing GPU HANG -> ErrorDeviceLost. Increase compute engine timeouts.
# Note: batch<4096 is not viable -- GDN chunked mode needs a larger compute
# buffer at smaller batch sizes, exceeding the A380's 6 GB VRAM.
# '+' prefix runs as root regardless of service User=.
systemd.services.llama-cpp.serviceConfig.ExecStartPre = [
"+${pkgs.writeShellScript "set-gpu-compute-timeout" ''
for f in /sys/class/drm/card*/engine/ccs*/preempt_timeout_ms; do
[ -w "$f" ] && echo 30000 > "$f"
done
for f in /sys/class/drm/card*/engine/ccs*/heartbeat_interval_ms; do
[ -w "$f" ] && echo 10000 > "$f"
done
''}"
];
# upstream module hardcodes --log-disable; override ExecStart to keep logs
# so we can see prompt processing progress via journalctl
systemd.services.llama-cpp.serviceConfig.ExecStart = lib.mkForce (
"${cfg.package}/bin/llama-server"
+ " --host ${cfg.host}"
+ " --port ${toString cfg.port}"
+ " -m ${cfg.model}"
+ " ${utils.escapeSystemdExecArgs cfg.extraFlags}"
);
# Auth handled by llama-cpp --api-key-file (Bearer token).
# No caddy_auth — the API key is the auth layer, and caddy_auth's basic
# auth would block Bearer-only clients like oh-my-pi.
services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${toString config.services.llama-cpp.port}
'';
}

View File

@@ -12,10 +12,6 @@
(lib.serviceFilePerms "continuwuity" [
"Z /var/lib/private/continuwuity 0770 ${config.services.matrix-continuwuity.user} ${config.services.matrix-continuwuity.group}"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.matrix.domain;
port = service_configs.ports.private.matrix.port;
})
];
services.matrix-continuwuity = {
@@ -57,6 +53,10 @@
respond /.well-known/matrix/client `{"m.server":{"base_url":"https://${service_configs.matrix.domain}"},"m.homeserver":{"base_url":"https://${service_configs.matrix.domain}"},"org.matrix.msc3575.proxy":{"base_url":"https://${config.services.matrix-continuwuity.settings.global.server_name}"},"org.matrix.msc4143.rtc_foci":[{"type":"livekit","livekit_service_url":"https://${service_configs.livekit.domain}"}]}`
'';
services.caddy.virtualHosts."${service_configs.matrix.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.matrix.port}
'';
# Exact duplicate for federation port
services.caddy.virtualHosts."${service_configs.matrix.domain}:${builtins.toString service_configs.ports.public.matrix_federation.port}".extraConfig =
config.services.caddy.virtualHosts."${service_configs.matrix.domain}".extraConfig;

View File

@@ -1,7 +0,0 @@
{
imports = [
./matrix.nix
./coturn.nix
./livekit.nix
];
}

View File

@@ -37,21 +37,15 @@
servers.${service_configs.minecraft.server_name} = {
enable = true;
package = pkgs.fabricServers.fabric-26_1_2.override { jre_headless = pkgs.openjdk25_headless; };
package = pkgs.fabricServers.fabric-1_21_11;
jvmOpts = lib.concatStringsSep " " [
# Memory
"-Xmx${builtins.toString service_configs.minecraft.memory.heap_size_m}M"
"-Xms${builtins.toString service_configs.minecraft.memory.heap_size_m}M"
# GC
"-XX:+UseZGC"
"-XX:+ZGenerational"
# added in new minecraft version
"-XX:+UseCompactObjectHeaders"
"-XX:+UseStringDeduplication"
# Base JVM optimizations (brucethemoose/Minecraft-Performance-Flags-Benchmarks)
"-XX:+UnlockExperimentalVMOptions"
"-XX:+UnlockDiagnosticVMOptions"
@@ -73,7 +67,6 @@
"-XX:NonProfiledCodeHeapSize=194M"
"-XX:NmethodSweepActivity=1"
"-XX:+UseVectorCmov"
# Large pages (requires vm.nr_hugepages sysctl)
"-XX:+UseLargePages"
"-XX:LargePageSizeInBytes=${builtins.toString service_configs.minecraft.memory.large_page_size_m}M"
@@ -99,68 +92,71 @@
with pkgs;
builtins.attrValues {
FabricApi = fetchurl {
url = "https://cdn.modrinth.com/data/P7dR8mSH/versions/fm7UYECV/fabric-api-0.145.4%2B26.1.2.jar";
sha512 = "ffd5ef62a745f76cd2e5481252cb7bc67006c809b4f436827d05ea22c01d19279e94a3b24df3d57e127af1cd08440b5de6a92a4ea8f39b2dcbbe1681275564c3";
url = "https://cdn.modrinth.com/data/P7dR8mSH/versions/i5tSkVBH/fabric-api-0.141.3%2B1.21.11.jar";
sha512 = "c20c017e23d6d2774690d0dd774cec84c16bfac5461da2d9345a1cd95eee495b1954333c421e3d1c66186284d24a433f6b0cced8021f62e0bfa617d2384d0471";
};
# No 26.1.2 version available
# FerriteCore = fetchurl {
# url = "https://cdn.modrinth.com/data/uXXizFIs/versions/d5ddUdiB/ferritecore-9.0.0-fabric.jar";
# sha512 = "d81fa97e11784c19d42f89c2f433831d007603dd7193cee45fa177e4a6a9c52b384b198586e04a0f7f63cd996fed713322578bde9a8db57e1188854ae5cbe584";
# };
FerriteCore = fetchurl {
url = "https://cdn.modrinth.com/data/uXXizFIs/versions/Ii0gP3D8/ferritecore-8.2.0-fabric.jar";
sha512 = "3210926a82eb32efd9bcebabe2f6c053daf5c4337eebc6d5bacba96d283510afbde646e7e195751de795ec70a2ea44fef77cb54bf22c8e57bb832d6217418869";
};
Lithium = fetchurl {
url = "https://cdn.modrinth.com/data/gvQqBUqZ/versions/v2xoRvRP/lithium-fabric-0.24.1%2Bmc26.1.2.jar";
sha512 = "8711bc8c6f39be4c8511becb7a68e573ced56777bd691639f2fc62299b35bb4ccd2efe4a39bd9c308084b523be86a5f5c4bf921ab85f7a22bf075d8ea2359621";
url = "https://cdn.modrinth.com/data/gvQqBUqZ/versions/Ow7wA0kG/lithium-fabric-0.21.4%2Bmc1.21.11.jar";
sha512 = "f14a5c3d2fad786347ca25083f902139694f618b7c103947f2fd067a7c5ee88a63e1ef8926f7d693ea79ed7d00f57317bae77ef9c2d630bf5ed01ac97a752b94";
};
NoChatReports = fetchurl {
url = "https://cdn.modrinth.com/data/qQyHxfxd/versions/2yrLNE3S/NoChatReports-FABRIC-26.1-v2.19.0.jar";
sha512 = "94d58a1a4cde4e3b1750bdf724e65c5f4ff3436c2532f36a465d497d26bf59f5ac996cddbff8ecdfed770c319aa2f2dcc9c7b2d19a35651c2a7735c5b2124dad";
url = "https://cdn.modrinth.com/data/qQyHxfxd/versions/rhykGstm/NoChatReports-FABRIC-1.21.11-v2.18.0.jar";
sha512 = "d2c35cc8d624616f441665aff67c0e366e4101dba243bad25ed3518170942c1a3c1a477b28805cd1a36c44513693b1c55e76bea627d3fced13927a3d67022ccc";
};
squaremap = fetchurl {
url = "https://cdn.modrinth.com/data/PFb7ZqK6/versions/UBN6MFvH/squaremap-fabric-mc26.1.2-1.3.13.jar";
sha512 = "97bc130184b5d0ddc4ff98a15acef6203459d982e0e2afbd49a2976d546c55a86ef22b841378b51dd782be9b2cfbe4cfa197717f2b7f6800fd8b4ff4df6e564f";
url = "https://cdn.modrinth.com/data/PFb7ZqK6/versions/BW8lMXBi/squaremap-fabric-mc1.21.11-1.3.12.jar";
sha512 = "f62eb791a3f5812eb174565d318f2e6925353f846ef8ac56b4e595f481494e0c281f26b9e9fcfdefa855093c96b735b12f67ee17c07c2477aa7a3439238670d9";
};
scalablelux = fetchurl {
url = "https://cdn.modrinth.com/data/Ps1zyz6x/versions/gYbHVCz8/ScalableLux-0.2.0%2Bfabric.2b63825-all.jar";
sha512 = "48565a4d8a1cbd623f0044086d971f2c0cf1c40e1d0b6636a61d41512f4c1c1ddff35879d9dba24b088a670ee254e2d5842d13a30b6d76df23706fa94ea4a58b";
url = "https://cdn.modrinth.com/data/Ps1zyz6x/versions/PV9KcrYQ/ScalableLux-0.1.6%2Bfabric.c25518a-all.jar";
sha512 = "729515c1e75cf8d9cd704f12b3487ddb9664cf9928e7b85b12289c8fbbc7ed82d0211e1851375cbd5b385820b4fedbc3f617038fff5e30b302047b0937042ae7";
};
c2me = fetchurl {
url = "https://cdn.modrinth.com/data/VSNURh3q/versions/yrNQQ1AQ/c2me-fabric-mc26.1.2-0.3.7%2Balpha.0.65.jar";
sha512 = "6666ebaa3bfa403e386776590fc845b7c306107d37ebc7b1be3b057893fbf9f933abb2314c171d7fe19c177cf8823cb47fdc32040d34a9704f5ab656dd5d93f8";
url = "https://cdn.modrinth.com/data/VSNURh3q/versions/QdLiMUjx/c2me-fabric-mc1.21.11-0.3.7%2Balpha.0.7.jar";
sha512 = "f9543febe2d649a82acd6d5b66189b6a3d820cf24aa503ba493fdb3bbd4e52e30912c4c763fe50006f9a46947ae8cd737d420838c61b93429542573ed67f958e";
};
# No 26.1 version available
# krypton = fetchurl {
# url = "https://cdn.modrinth.com/data/fQEb0iXm/versions/O9LmWYR7/krypton-0.2.10.jar";
# sha512 = "4dcd7228d1890ddfc78c99ff284b45f9cf40aae77ef6359308e26d06fa0d938365255696af4cc12d524c46c4886cdcd19268c165a2bf0a2835202fe857da5cab";
# };
krypton = fetchurl {
url = "https://cdn.modrinth.com/data/fQEb0iXm/versions/O9LmWYR7/krypton-0.2.10.jar";
sha512 = "4dcd7228d1890ddfc78c99ff284b45f9cf40aae77ef6359308e26d06fa0d938365255696af4cc12d524c46c4886cdcd19268c165a2bf0a2835202fe857da5cab";
};
# No 26.1.2 version available
# disconnect-packet-fix = fetchurl {
# url = "https://cdn.modrinth.com/data/rd9rKuJT/versions/x9gVeaTU/disconnect-packet-fix-fabric-2.1.0.jar";
# sha512 = "bf84d02bdcd737706df123e452dd31ef535580fa4ced6af1e4ceea022fef94e4764775253e970b8caa1292e2fa00eb470557f70b290fafdb444479fa801b07a1";
# };
better-fabric-console = fetchurl {
url = "https://cdn.modrinth.com/data/Y8o1j1Sf/versions/6aIKl5wy/better-fabric-console-mc1.21.11-1.2.9.jar";
sha512 = "427247dafd99df202ee10b4bf60ffcbbecbabfadb01c167097ffb5b85670edb811f4d061c2551be816295cbbc6b8ec5ec464c14a6ff41912ef1f6c57b038d320";
};
disconnect-packet-fix = fetchurl {
url = "https://cdn.modrinth.com/data/rd9rKuJT/versions/Gv74xveQ/disconnect-packet-fix-fabric-2.0.0.jar";
sha512 = "1fd6f09a41ce36284e1a8e9def53f3f6834d7201e69e54e24933be56445ba569fbc26278f28300d36926ba92db6f4f9c0ae245d23576aaa790530345587316db";
};
packet-fixer = fetchurl {
url = "https://cdn.modrinth.com/data/c7m1mi73/versions/M8PqPQr4/packetfixer-fabric-3.3.4-26.1.2.jar";
sha512 = "698020edba2a1fd80bb282bfd4832a00d6447b08eaafbc2e16a8f3bf89e187fc9a622c92dfe94ae140dd485fc0220a86890f12158ec08054e473fef8337829bc";
url = "https://cdn.modrinth.com/data/c7m1mi73/versions/CUh1DWeO/packetfixer-fabric-3.3.4-1.21.11.jar";
sha512 = "33331b16cb40c5e6fbaade3cacc26f3a0e8fa5805a7186f94d7366a0e14dbeee9de2d2e8c76fa71f5e9dd24eb1c261667c35447e32570ea965ca0f154fdfba0a";
};
# mVUS fork: upstream ModernFix no longer ships Fabric builds
# fork of Modernfix for 1.21.11 (upstream will support 26.1)
modernfix = fetchurl {
url = "https://cdn.modrinth.com/data/TjSm1wrD/versions/dqQ7mabN/modernfix-5.26.2-build.1.jar";
sha512 = "fbef93c2dabf7bcd0ccd670226dfc4958f7ebe5d8c2b1158e88a65e6954a40f595efd58401d2a3dbb224660dca5952199cf64df29100e7bd39b1b1941290b57b";
url = "https://cdn.modrinth.com/data/TjSm1wrD/versions/JwSO8JCN/modernfix-5.25.2-build.4.jar";
sha512 = "0d65c05ac0475408c58ef54215714e6301113101bf98bfe4bb2ba949fbfddd98225ac4e2093a5f9206a9e01ba80a931424b237bdfa3b6e178c741ca6f7f8c6a3";
};
debugify = fetchurl {
url = "https://cdn.modrinth.com/data/QwxR6Gcd/versions/mfTTfiKn/debugify-26.1.2%2B1.0.jar";
sha512 = "63db82f2163b9f7fc27ebea999ffcd7a961054435b3ed7d8bf32d905b5f60ce81715916b7fd4e9509dd23703d5492059f3ce7e5f176402f8ed4f985a415553f4";
url = "https://cdn.modrinth.com/data/QwxR6Gcd/versions/8Q49lnaU/debugify-1.21.11%2B1.0.jar";
sha512 = "04d82dd33f44ced37045f1f9a54ad4eacd70861ff74a8800f2d2df358579e6cb0ea86a34b0086b3e87026b1a0691dd6594b4fdc49f89106466eea840518beb03";
};
}
);
};

View File

@@ -1,8 +0,0 @@
{
imports = [
./monero.nix
./p2pool.nix
./xmrig.nix
./xmrig-auto-pause.nix
];
}

View File

@@ -1,39 +0,0 @@
{
config,
lib,
pkgs,
...
}:
lib.mkIf config.services.xmrig.enable {
systemd.services.xmrig-auto-pause = {
description = "Auto-pause xmrig when other services need CPU";
after = [ "xmrig.service" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./xmrig-auto-pause.py}";
Restart = "always";
RestartSec = "10s";
NoNewPrivileges = true;
ProtectHome = true;
ProtectSystem = "strict";
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_UNIX" # systemctl talks to systemd over D-Bus unix socket
];
MemoryDenyWriteExecute = true;
StateDirectory = "xmrig-auto-pause";
};
environment = {
POLL_INTERVAL = "3";
GRACE_PERIOD = "15";
# Background services (qbittorrent, bitmagnet, postgresql, etc.) produce
# 15-25% non-nice CPU during normal operation. The stop threshold must
# sit above transient spikes; the resume threshold must be below the
# steady-state floor to avoid restarting xmrig while services are active.
CPU_STOP_THRESHOLD = "40";
CPU_RESUME_THRESHOLD = "10";
STARTUP_COOLDOWN = "10";
STATE_DIR = "/var/lib/xmrig-auto-pause";
};
};
}

View File

@@ -1,210 +0,0 @@
#!/usr/bin/env python3
"""
Auto-pause xmrig when other services need CPU.
Monitors non-nice CPU usage from /proc/stat. Since xmrig runs at Nice=19,
its CPU time lands in the 'nice' column and is excluded from the metric.
When real workload (user + system + irq + softirq) exceeds the stop
threshold, stops xmrig. When it drops below the resume threshold for
GRACE_PERIOD seconds, restarts xmrig.
This replaces per-service pause scripts with a single general-purpose
monitor that handles any CPU-intensive workload (gitea workers, llama-cpp
inference, etc.) without needing to know about specific processes.
Why scheduler priority alone isn't enough:
Nice=19 / SCHED_IDLE only affects which thread gets the next time slice.
RandomX's 2MB-per-thread scratchpad (24MB across 12 threads) pollutes
the shared 32MB L3 cache, and its memory access pattern saturates DRAM
bandwidth. Other services run slower even though they aren't denied CPU
time. The only fix is to stop xmrig entirely when real work is happening.
Hysteresis:
The stop threshold is set higher than the resume threshold to prevent
oscillation. When xmrig runs, its L3 cache pressure makes other processes
appear ~3-8% busier. A single threshold trips on this indirect effect,
causing stop/start thrashing. Separate thresholds break the cycle: the
resume threshold confirms the system is truly idle, while the stop
threshold requires genuine workload above xmrig's indirect pressure.
"""
import os
import subprocess
import sys
import time
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "3"))
GRACE_PERIOD = float(os.environ.get("GRACE_PERIOD", "15"))
# Percentage of total CPU ticks that non-nice processes must use to trigger
# a pause. On a 12-thread system, one fully loaded core ≈ 8.3% of total.
# Default 15% requires roughly two busy cores, which avoids false positives
# from xmrig's L3 cache pressure inflating other processes' apparent CPU.
CPU_STOP_THRESHOLD = float(os.environ.get("CPU_STOP_THRESHOLD", "15"))
# Percentage below which the system is considered idle enough to resume
# mining. Lower than the stop threshold to provide hysteresis.
CPU_RESUME_THRESHOLD = float(os.environ.get("CPU_RESUME_THRESHOLD", "5"))
# After starting xmrig, ignore CPU spikes for this many seconds to let
# RandomX dataset initialization complete (~4s on the target hardware)
# without retriggering a stop.
STARTUP_COOLDOWN = float(os.environ.get("STARTUP_COOLDOWN", "10"))
# Directory for persisting pause state across script restarts. Without
# this, a restart while xmrig is paused loses the paused_by_us flag and
# xmrig stays stopped permanently.
STATE_DIR = os.environ.get("STATE_DIR", "")
_PAUSE_FILE = os.path.join(STATE_DIR, "paused") if STATE_DIR else ""
def log(msg):
print(f"[xmrig-auto-pause] {msg}", file=sys.stderr, flush=True)
def read_cpu_ticks():
"""Read CPU tick counters from /proc/stat.
Returns (total_ticks, real_work_ticks) where real_work excludes the
'nice' column (xmrig) and idle/iowait.
"""
with open("/proc/stat") as f:
parts = f.readline().split()
# cpu user nice system idle iowait irq softirq steal
user, nice, system, idle, iowait, irq, softirq, steal = (
int(x) for x in parts[1:9]
)
total = user + nice + system + idle + iowait + irq + softirq + steal
real_work = user + system + irq + softirq
return total, real_work
def is_active(unit):
"""Check if a systemd unit is currently active."""
result = subprocess.run(
["systemctl", "is-active", "--quiet", unit],
capture_output=True,
)
return result.returncode == 0
def systemctl(action, unit):
result = subprocess.run(
["systemctl", action, unit],
capture_output=True,
text=True,
)
if result.returncode != 0:
log(f"systemctl {action} {unit} failed (rc={result.returncode}): {result.stderr.strip()}")
return result.returncode == 0
def _save_paused(paused):
"""Persist pause flag so a script restart can resume where we left off."""
if not _PAUSE_FILE:
return
try:
if paused:
open(_PAUSE_FILE, "w").close()
else:
os.remove(_PAUSE_FILE)
except OSError:
pass
def _load_paused():
"""Check if a previous instance left xmrig paused."""
if not _PAUSE_FILE:
return False
return os.path.isfile(_PAUSE_FILE)
def main():
paused_by_us = _load_paused()
idle_since = None
started_at = None # monotonic time when we last started xmrig
prev_total = None
prev_work = None
if paused_by_us:
log("Recovered pause state from previous instance")
log(
f"Starting: poll={POLL_INTERVAL}s grace={GRACE_PERIOD}s "
f"stop={CPU_STOP_THRESHOLD}% resume={CPU_RESUME_THRESHOLD}% "
f"cooldown={STARTUP_COOLDOWN}s"
)
while True:
total, work = read_cpu_ticks()
if prev_total is None:
prev_total = total
prev_work = work
time.sleep(POLL_INTERVAL)
continue
dt = total - prev_total
if dt <= 0:
prev_total = total
prev_work = work
time.sleep(POLL_INTERVAL)
continue
real_work_pct = ((work - prev_work) / dt) * 100
prev_total = total
prev_work = work
# Don't act during startup cooldown — RandomX dataset init causes
# a transient CPU spike that would immediately retrigger a stop.
if started_at is not None:
if time.monotonic() - started_at < STARTUP_COOLDOWN:
time.sleep(POLL_INTERVAL)
continue
# Cooldown expired — verify xmrig survived startup. If it
# crashed during init (hugepage failure, pool unreachable, etc.),
# re-enter the pause/retry cycle rather than silently leaving
# xmrig dead.
if not is_active("xmrig.service"):
log("xmrig died during startup cooldown — will retry")
paused_by_us = True
_save_paused(True)
started_at = None
above_stop = real_work_pct > CPU_STOP_THRESHOLD
below_resume = real_work_pct <= CPU_RESUME_THRESHOLD
if above_stop:
idle_since = None
if paused_by_us and is_active("xmrig.service"):
# Something else restarted xmrig (deploy, manual start, etc.)
# while we thought it was stopped. Reset ownership so we can
# manage it again.
log("xmrig was restarted externally while paused — reclaiming")
paused_by_us = False
_save_paused(False)
if not paused_by_us:
# Only claim ownership if xmrig is actually running.
# If something else stopped it (e.g. UPS battery hook),
# don't interfere — we'd wrongly restart it later.
if is_active("xmrig.service"):
log(f"Real workload detected ({real_work_pct:.1f}% CPU) — stopping xmrig")
if systemctl("stop", "xmrig.service"):
paused_by_us = True
_save_paused(True)
elif paused_by_us:
if below_resume:
if idle_since is None:
idle_since = time.monotonic()
elif time.monotonic() - idle_since >= GRACE_PERIOD:
log(f"Workload ended ({real_work_pct:.1f}% CPU) past grace period — starting xmrig")
if systemctl("start", "xmrig.service"):
paused_by_us = False
_save_paused(False)
started_at = time.monotonic()
idle_since = None
else:
# Between thresholds — not idle enough to resume.
idle_since = None
time.sleep(POLL_INTERVAL)
if __name__ == "__main__":
main()

View File

@@ -1,12 +1,95 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
promDs = {
type = "prometheus";
uid = "prometheus";
};
jellyfinCollector = pkgs.writeShellApplication {
name = "jellyfin-metrics-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
API_KEY=$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")
JELLYFIN="http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}"
if response=$(curl -sf --max-time 5 "''${JELLYFIN}/Sessions?api_key=''${API_KEY}"); then
active_streams=$(echo "$response" | jq '[.[] | select(.NowPlayingItem != null)] | length')
else
active_streams=0
fi
{
echo '# HELP jellyfin_active_streams Number of currently active Jellyfin streams'
echo '# TYPE jellyfin_active_streams gauge'
echo "jellyfin_active_streams $active_streams"
} > "${textfileDir}/jellyfin.prom.$$.tmp"
mv "${textfileDir}/jellyfin.prom.$$.tmp" "${textfileDir}/jellyfin.prom"
'';
};
intelGpuCollector = pkgs.writeShellApplication {
name = "intel-gpu-collector";
runtimeInputs = with pkgs; [
python3
intel-gpu-tools
];
text = ''
exec python3 ${./intel-gpu-collector.py}
'';
};
qbittorrentCollector = pkgs.writeShellApplication {
name = "qbittorrent-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
QBIT="http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}"
OUT="${textfileDir}/qbittorrent.prom"
if info=$(curl -sf --max-time 5 "''${QBIT}/api/v2/transfer/info"); then
dl=$(echo "$info" | jq '.dl_info_speed')
ul=$(echo "$info" | jq '.up_info_speed')
else
dl=0
ul=0
fi
{
echo '# HELP qbittorrent_download_bytes_per_second Current download speed in bytes/s'
echo '# TYPE qbittorrent_download_bytes_per_second gauge'
echo "qbittorrent_download_bytes_per_second $dl"
echo '# HELP qbittorrent_upload_bytes_per_second Current upload speed in bytes/s'
echo '# TYPE qbittorrent_upload_bytes_per_second gauge'
echo "qbittorrent_upload_bytes_per_second $ul"
} > "''${OUT}.tmp"
mv "''${OUT}.tmp" "$OUT"
'';
};
diskUsageCollector = pkgs.writeShellApplication {
name = "disk-usage-collector";
runtimeInputs = with pkgs; [
coreutils
gawk
config.boot.zfs.package
util-linux # for mountpoint
];
text = builtins.readFile ./disk-usage-collector.sh;
};
dashboard = {
editable = true;
graphTooltip = 1;
@@ -50,17 +133,15 @@ let
}
{
name = "LLM Requests";
datasource = promDs;
datasource = {
type = "grafana";
uid = "-- Grafana --";
};
enable = true;
iconColor = "purple";
target = {
datasource = promDs;
expr = "llamacpp:requests_processing > 0";
instant = false;
range = true;
refId = "A";
};
titleFormat = "LLM inference";
showIn = 0;
type = "tags";
tags = [ "llama-cpp" ];
}
];
@@ -389,7 +470,7 @@ let
targets = [
{
datasource = promDs;
expr = "count(jellyfin_now_playing_state) or vector(0)";
expr = "jellyfin_active_streams";
refId = "A";
}
];
@@ -441,25 +522,25 @@ let
targets = [
{
datasource = promDs;
expr = "sum(qbit_dlspeed) or vector(0)";
expr = "qbittorrent_download_bytes_per_second";
legendFormat = "Download";
refId = "A";
}
{
datasource = promDs;
expr = "sum(qbit_upspeed) or vector(0)";
expr = "qbittorrent_upload_bytes_per_second";
legendFormat = "Upload";
refId = "B";
}
{
datasource = promDs;
expr = "avg_over_time((sum(qbit_dlspeed) or vector(0))[10m:])";
expr = "avg_over_time(qbittorrent_download_bytes_per_second[10m:])";
legendFormat = "Download (10m avg)";
refId = "C";
}
{
datasource = promDs;
expr = "avg_over_time((sum(qbit_upspeed) or vector(0))[10m:])";
expr = "avg_over_time(qbittorrent_upload_bytes_per_second[10m:])";
legendFormat = "Upload (10m avg)";
refId = "D";
}
@@ -579,7 +660,7 @@ let
targets = [
{
datasource = promDs;
expr = "igpu_engines_busy_percent";
expr = "intel_gpu_engine_busy_percent";
legendFormat = "{{engine}}";
refId = "A";
}
@@ -615,13 +696,13 @@ let
targets = [
{
datasource = promDs;
expr = "zfs_pool_allocated_bytes{pool=\"tank\"} / zfs_pool_size_bytes{pool=\"tank\"} * 100";
expr = "zpool_used_bytes{pool=\"tank\"} / zpool_size_bytes{pool=\"tank\"} * 100";
legendFormat = "tank";
refId = "A";
}
{
datasource = promDs;
expr = "zfs_pool_allocated_bytes{pool=\"hdds\"} / zfs_pool_size_bytes{pool=\"hdds\"} * 100";
expr = "zpool_used_bytes{pool=\"hdds\"} / zpool_size_bytes{pool=\"hdds\"} * 100";
legendFormat = "hdds";
refId = "B";
}
@@ -655,19 +736,19 @@ let
targets = [
{
datasource = promDs;
expr = "(node_filesystem_size_bytes{mountpoint=\"/boot\"} - node_filesystem_avail_bytes{mountpoint=\"/boot\"}) / node_filesystem_size_bytes{mountpoint=\"/boot\"} * 100";
expr = "partition_used_bytes{mount=\"/boot\"} / partition_size_bytes{mount=\"/boot\"} * 100";
legendFormat = "/boot";
refId = "A";
}
{
datasource = promDs;
expr = "(node_filesystem_size_bytes{mountpoint=\"/persistent\"} - node_filesystem_avail_bytes{mountpoint=\"/persistent\"}) / node_filesystem_size_bytes{mountpoint=\"/persistent\"} * 100";
expr = "partition_used_bytes{mount=\"/persistent\"} / partition_size_bytes{mount=\"/persistent\"} * 100";
legendFormat = "/persistent";
refId = "B";
}
{
datasource = promDs;
expr = "(node_filesystem_size_bytes{mountpoint=\"/nix\"} - node_filesystem_avail_bytes{mountpoint=\"/nix\"}) / node_filesystem_size_bytes{mountpoint=\"/nix\"} * 100";
expr = "partition_used_bytes{mount=\"/nix\"} / partition_size_bytes{mount=\"/nix\"} * 100";
legendFormat = "/nix";
refId = "C";
}
@@ -691,8 +772,227 @@ let
};
in
{
imports = [
(lib.serviceMountWithZpool "grafana" service_configs.zpool_ssds [
service_configs.grafana.dir
])
(lib.serviceFilePerms "grafana" [
"Z ${service_configs.grafana.dir} 0700 grafana grafana"
])
(lib.serviceMountWithZpool "prometheus" service_configs.zpool_ssds [
"/var/lib/prometheus"
])
(lib.serviceFilePerms "prometheus" [
"Z /var/lib/prometheus 0700 prometheus prometheus"
])
];
# -- Prometheus --
services.prometheus = {
enable = true;
port = service_configs.ports.private.prometheus.port;
listenAddress = "127.0.0.1";
stateDir = "prometheus";
retentionTime = "90d";
exporters = {
node = {
enable = true;
port = service_configs.ports.private.prometheus_node.port;
listenAddress = "127.0.0.1";
enabledCollectors = [
"hwmon"
"systemd"
"textfile"
];
extraFlags = [
"--collector.textfile.directory=${textfileDir}"
];
};
apcupsd = {
enable = true;
port = service_configs.ports.private.prometheus_apcupsd.port;
listenAddress = "127.0.0.1";
apcupsdAddress = "127.0.0.1:3551";
};
};
scrapeConfigs = [
{
job_name = "prometheus";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus.port}" ]; }
];
}
{
job_name = "node";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_node.port}" ]; }
];
}
{
job_name = "apcupsd";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; }
];
}
];
};
# -- Grafana --
services.grafana = {
enable = true;
dataDir = service_configs.grafana.dir;
settings = {
server = {
http_addr = "127.0.0.1";
http_port = service_configs.ports.private.grafana.port;
domain = service_configs.grafana.domain;
root_url = "https://${service_configs.grafana.domain}";
};
"auth.anonymous" = {
enabled = true;
org_role = "Admin";
};
"auth.basic".enabled = false;
"auth".disable_login_form = true;
analytics.reporting_enabled = false;
feature_toggles.enable = "dataConnectionsConsole=false";
users.default_theme = "dark";
# Disable unused built-in integrations
alerting.enabled = false;
"unified_alerting".enabled = false;
explore.enabled = false;
news.news_feed_enabled = false;
plugins = {
enable_alpha = false;
plugin_admin_enabled = false;
};
};
provision = {
datasources.settings = {
apiVersion = 1;
datasources = [
{
name = "Prometheus";
type = "prometheus";
url = "http://127.0.0.1:${toString service_configs.ports.private.prometheus.port}";
access = "proxy";
isDefault = true;
editable = false;
uid = "prometheus";
}
];
};
dashboards.settings.providers = [
{
name = "system";
type = "file";
options.path = "/etc/grafana-dashboards";
disableDeletion = true;
updateIntervalSeconds = 60;
}
];
};
};
environment.etc."grafana-dashboards/system-overview.json" = {
text = builtins.toJSON dashboard;
mode = "0444";
};
services.caddy.virtualHosts."${service_configs.grafana.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.grafana.port}
'';
# -- Jellyfin active-stream prometheus textfile collector --
systemd.services.jellyfin-metrics-collector = {
description = "Collect Jellyfin metrics for Prometheus";
after = [ "network.target" ];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe jellyfinCollector;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
systemd.timers.jellyfin-metrics-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "5s";
};
};
# -- Intel GPU textfile collector --
systemd.services.intel-gpu-collector = {
description = "Collect Intel GPU metrics for Prometheus";
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe intelGpuCollector;
};
environment.TEXTFILE = "${textfileDir}/intel-gpu.prom";
};
systemd.timers.intel-gpu-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "10s";
};
};
# -- qBittorrent speed textfile collector --
systemd.services.qbittorrent-collector = {
description = "Collect qBittorrent transfer metrics for Prometheus";
after = [
"network.target"
"qbittorrent.service"
];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe qbittorrentCollector;
};
};
systemd.timers.qbittorrent-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/15";
RandomizedDelaySec = "3s";
};
};
# -- Disk/pool usage textfile collector --
systemd.services.disk-usage-collector = {
description = "Collect ZFS pool and partition usage metrics for Prometheus";
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe diskUsageCollector;
};
environment.TEXTFILE = "${textfileDir}/disk-usage.prom";
};
systemd.timers.disk-usage-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/60"; # every 60 seconds
RandomizedDelaySec = "10s";
};
};
systemd.tmpfiles.rules = [
"d ${textfileDir} 0755 root root -"
];
}

View File

@@ -1,10 +1,5 @@
{ config, service_configs, ... }:
{
config,
lib,
service_configs,
...
}:
lib.mkIf config.services.ntfy-sh.enable {
services.ntfyAlerts = {
enable = true;
serverUrl = "https://${service_configs.ntfy.domain}";

View File

@@ -12,10 +12,6 @@
(lib.serviceFilePerms "ntfy-sh" [
"Z /var/lib/private/ntfy-sh 0700 ${config.services.ntfy-sh.user} ${config.services.ntfy-sh.group}"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.ntfy.domain;
port = service_configs.ports.private.ntfy.port;
})
];
services.ntfy-sh = {
@@ -31,4 +27,8 @@
};
};
services.caddy.virtualHosts."${service_configs.ntfy.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.ntfy.port}
'';
}

View File

@@ -1,6 +0,0 @@
{
imports = [
./ntfy.nix
./ntfy-alerts.nix
];
}

View File

@@ -33,6 +33,12 @@
wants = [ "monero.service" ];
};
# Stop p2pool on UPS battery to conserve power
services.apcupsd.hooks = lib.mkIf config.services.apcupsd.enable {
onbattery = "systemctl stop p2pool";
offbattery = "systemctl start p2pool";
};
networking.firewall.allowedTCPPorts = [
service_configs.ports.public.p2pool_p2p.port
];

View File

@@ -23,18 +23,10 @@ in
(lib.serviceFilePerms "qbittorrent" [
# 0770: group (media) needs write to delete files during upgrades —
# Radarr/Sonarr must unlink the old file before placing the new one.
# Non-recursive (z not Z): UMask=0007 ensures new files get correct perms.
# A recursive Z rule would walk millions of files on the HDD pool at every boot.
"z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.SavePath} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group}"
"Z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.SavePath} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group}"
"z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.TempPath} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
"Z ${config.services.qbittorrent.profileDir} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "torrent";
port = service_configs.ports.private.torrent.port;
auth = true;
vpn = true;
})
];
services.qbittorrent = {
@@ -164,34 +156,10 @@ in
_: path: "d ${path} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group} -"
) service_configs.torrent.categories;
# Periodically checkpoint qBittorrent's SQLite WAL (Write-Ahead Log).
# qBittorrent holds a read transaction open for its entire lifetime,
# preventing SQLite's auto-checkpoint from running. The WAL grows
# unbounded (observed: 405 MB) and must be replayed on next startup,
# causing 10+ minute "internal preparations" hangs.
# A second sqlite3 connection can checkpoint concurrently and safely.
# See: https://github.com/qbittorrent/qBittorrent/issues/20433
systemd.services.qbittorrent-wal-checkpoint = {
description = "Checkpoint qBittorrent SQLite WAL";
after = [ "qbittorrent.service" ];
requires = [ "qbittorrent.service" ];
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.sqlite}/bin/sqlite3 ${config.services.qbittorrent.profileDir}/qBittorrent/data/torrents.db 'PRAGMA wal_checkpoint(TRUNCATE);'";
User = config.services.qbittorrent.user;
Group = config.services.qbittorrent.group;
};
};
systemd.timers.qbittorrent-wal-checkpoint = {
description = "Periodically checkpoint qBittorrent SQLite WAL";
wantedBy = [ "timers.target" ];
timerConfig = {
OnUnitActiveSec = "4h";
OnBootSec = "30min";
RandomizedDelaySec = "10min";
};
};
services.caddy.virtualHosts."torrent.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString config.services.qbittorrent.webuiPort}
'';
users.users.${config.services.qbittorrent.user}.extraGroups = [
service_configs.media_group

View File

@@ -19,10 +19,6 @@
"Z ${service_configs.slskd.downloads} 0750 ${config.services.slskd.user} music"
"Z ${service_configs.slskd.incomplete} 0750 ${config.services.slskd.user} music"
])
(lib.mkCaddyReverseProxy {
subdomain = "soulseek";
port = service_configs.ports.private.soulseek_web.port;
})
];
users.groups."music" = { };
@@ -62,6 +58,11 @@
users.users.${config.services.jellyfin.user}.extraGroups = [ "music" ];
users.users.${username}.extraGroups = [ "music" ];
# doesn't work with auth????
services.caddy.virtualHosts."soulseek.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${builtins.toString config.services.slskd.settings.web.port}
'';
networking.firewall.allowedTCPPorts = [
service_configs.ports.public.soulseek_listen.port
];

View File

@@ -17,11 +17,6 @@
"Z ${service_configs.syncthing.signalBackupDir} 0750 ${config.services.syncthing.user} ${config.services.syncthing.group}"
"Z ${service_configs.syncthing.grayjayBackupDir} 0750 ${config.services.syncthing.user} ${config.services.syncthing.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "syncthing";
port = service_configs.ports.private.syncthing_gui.port;
auth = true;
})
];
services.syncthing = {
@@ -54,4 +49,9 @@
];
};
services.caddy.virtualHosts."syncthing.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${toString service_configs.ports.private.syncthing_gui.port}
'';
}

View File

@@ -10,11 +10,6 @@
(lib.serviceMountWithZpool "trilium-server" service_configs.zpool_ssds [
(service_configs.services_dir + "/trilium")
])
(lib.mkCaddyReverseProxy {
subdomain = "notes";
port = service_configs.ports.private.trilium.port;
auth = true;
})
];
services.trilium-server = {
@@ -24,4 +19,8 @@
dataDir = service_configs.trilium.dataDir;
};
services.caddy.virtualHosts."notes.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${toString service_configs.ports.private.trilium.port}
'';
}

View File

@@ -11,7 +11,7 @@ in
{
services.xmrig = {
enable = true;
package = lib.optimizePackage pkgs.xmrig;
package = pkgs.xmrig;
settings = {
autosave = true;

View File

@@ -21,7 +21,7 @@ let
text = builtins.readFile ./zfs-scrub-annotations.sh;
};
in
lib.mkIf (config.services.grafana.enable && config.services.zfs.autoScrub.enable) {
{
systemd.services.zfs-scrub = {
environment = {
GRAFANA_URL = grafanaUrl;

View File

@@ -30,7 +30,7 @@ let
{ config, pkgs, ... }:
{
imports = [
(import ../services/jellyfin/jellyfin.nix {
(import ../services/jellyfin.nix {
inherit config pkgs;
lib = testLib;
service_configs = testServiceConfigs;
@@ -107,7 +107,7 @@ pkgs.testers.runNixOSTest {
server.wait_for_unit("jellyfin.service")
server.wait_for_unit("fail2ban.service")
server.wait_for_open_port(8096)
server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=120)
server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=60)
time.sleep(2)
# Wait for Jellyfin to create real log files and reload fail2ban

View File

@@ -6,7 +6,7 @@
let
jfLib = import ./jellyfin-test-lib.nix { inherit pkgs lib; };
mockGrafana = ./mock-grafana-server.py;
script = ../services/grafana/jellyfin-annotations.py;
script = ../services/jellyfin-annotations.py;
python = pkgs.python3;
in
pkgs.testers.runNixOSTest {

View File

@@ -6,21 +6,6 @@
}:
let
jfLib = import ./jellyfin-test-lib.nix { inherit pkgs lib; };
webhookPlugin = import ../services/jellyfin/jellyfin-webhook-plugin.nix { inherit pkgs lib; };
configureWebhook = webhookPlugin.mkConfigureScript {
jellyfinUrl = "http://localhost:8096";
webhooks = [
{
name = "qBittorrent Monitor";
uri = "http://127.0.0.1:9898/";
notificationTypes = [
"PlaybackStart"
"PlaybackProgress"
"PlaybackStop"
];
}
];
};
in
pkgs.testers.runNixOSTest {
name = "jellyfin-qbittorrent-monitor";
@@ -84,30 +69,11 @@ pkgs.testers.runNixOSTest {
}
];
# Create directories for qBittorrent.
# Create directories for qBittorrent
systemd.tmpfiles.rules = [
"d /var/lib/qbittorrent/downloads 0755 qbittorrent qbittorrent"
"d /var/lib/qbittorrent/incomplete 0755 qbittorrent qbittorrent"
];
# Install the Jellyfin Webhook plugin before Jellyfin starts, mirroring
# the production module. Jellyfin rewrites meta.json at runtime so a
# read-only nix-store symlink would fail — we materialise a writable copy.
systemd.services."jellyfin-webhook-install" = {
description = "Install Jellyfin Webhook plugin files";
before = [ "jellyfin.service" ];
wantedBy = [ "jellyfin.service" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
User = "jellyfin";
Group = "jellyfin";
UMask = "0077";
ExecStart = webhookPlugin.mkInstallScript {
pluginsDir = "/var/lib/jellyfin/plugins";
};
};
};
};
# Public test IP (RFC 5737 TEST-NET-3) so Jellyfin sees it as external
@@ -171,7 +137,7 @@ pkgs.testers.runNixOSTest {
with subtest("Start monitor service"):
python = "${pkgs.python3.withPackages (ps: [ ps.requests ])}/bin/python"
monitor = "${../services/jellyfin/jellyfin-qbittorrent-monitor.py}"
monitor = "${../services/jellyfin-qbittorrent-monitor.py}"
server.succeed(f"""
systemd-run --unit=monitor-test \
--setenv=JELLYFIN_URL=http://localhost:8096 \
@@ -428,97 +394,6 @@ pkgs.testers.runNixOSTest {
local_playback["PositionTicks"] = 50000000
server.succeed(f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing/Stopped' -d '{json.dumps(local_playback)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{local_auth}, Token={local_token}'")
# === WEBHOOK TESTS ===
#
# Configure the Jellyfin Webhook plugin to target the monitor, then verify
# the real Jellyfin plugin monitor path reacts faster than any possible
# poll. CHECK_INTERVAL=30 rules out polling as the cause.
WEBHOOK_PORT = 9898
WEBHOOK_CREDS = "/tmp/webhook-creds"
# Start a webhook-enabled monitor with long poll interval.
server.succeed("systemctl stop monitor-test || true")
time.sleep(1)
server.succeed(f"""
systemd-run --unit=monitor-webhook \
--setenv=JELLYFIN_URL=http://localhost:8096 \
--setenv=JELLYFIN_API_KEY={token} \
--setenv=QBITTORRENT_URL=http://localhost:8080 \
--setenv=CHECK_INTERVAL=30 \
--setenv=STREAMING_START_DELAY=1 \
--setenv=STREAMING_STOP_DELAY=1 \
--setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
--setenv=SERVICE_BUFFER=2000000 \
--setenv=DEFAULT_STREAM_BITRATE=10000000 \
--setenv=MIN_TORRENT_SPEED=100 \
--setenv=WEBHOOK_PORT={WEBHOOK_PORT} \
--setenv=WEBHOOK_BIND=127.0.0.1 \
{python} {monitor}
""")
server.wait_until_succeeds(f"ss -ltn | grep -q ':{WEBHOOK_PORT}'", timeout=15)
time.sleep(2)
assert not is_throttled(), "Should start unthrottled"
# Drop the admin token where the configure script expects it (production uses agenix).
server.succeed(f"mkdir -p {WEBHOOK_CREDS} && echo '{token}' > {WEBHOOK_CREDS}/jellyfin-api-key")
server.succeed(
f"systemd-run --wait --unit=webhook-configure-test "
f"--setenv=CREDENTIALS_DIRECTORY={WEBHOOK_CREDS} "
f"${configureWebhook}"
)
with subtest("Real PlaybackStart event throttles via the plugin"):
playback_start = {
"ItemId": movie_id,
"MediaSourceId": media_source_id,
"PlaySessionId": "test-plugin-start",
"CanSeek": True,
"IsPaused": False,
}
start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
client.succeed(start_cmd)
server.wait_until_succeeds(
"curl -sf http://localhost:8080/api/v2/transfer/speedLimitsMode | grep -q '^1$'",
timeout=5,
)
# Let STREAMING_STOP_DELAY (1s) elapse so the upcoming stop is not swallowed by hysteresis.
time.sleep(2)
with subtest("Real PlaybackStop event unthrottles via the plugin"):
playback_stop = {
"ItemId": movie_id,
"MediaSourceId": media_source_id,
"PlaySessionId": "test-plugin-start",
"PositionTicks": 50000000,
}
stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
client.succeed(stop_cmd)
server.wait_until_succeeds(
"curl -sf http://localhost:8080/api/v2/transfer/speedLimitsMode | grep -q '^0$'",
timeout=10,
)
# Restore fast-polling monitor for the service-restart tests below.
server.succeed("systemctl stop monitor-webhook || true")
time.sleep(1)
server.succeed(f"""
systemd-run --unit=monitor-test \
--setenv=JELLYFIN_URL=http://localhost:8096 \
--setenv=JELLYFIN_API_KEY={token} \
--setenv=QBITTORRENT_URL=http://localhost:8080 \
--setenv=CHECK_INTERVAL=1 \
--setenv=STREAMING_START_DELAY=1 \
--setenv=STREAMING_STOP_DELAY=1 \
--setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
--setenv=SERVICE_BUFFER=2000000 \
--setenv=DEFAULT_STREAM_BITRATE=10000000 \
--setenv=MIN_TORRENT_SPEED=100 \
{python} {monitor}
""")
time.sleep(2)
# === SERVICE RESTART TESTS ===
with subtest("qBittorrent restart during throttled state re-applies throttling"):

View File

@@ -18,7 +18,7 @@ def setup_jellyfin(machine, retry, auth_header, auth_payload, empty_payload):
machine.wait_for_unit("jellyfin.service")
machine.wait_for_open_port(8096)
machine.wait_until_succeeds(
"curl -sf http://localhost:8096/health | grep -q Healthy", timeout=120
"curl -sf http://localhost:8096/health | grep -q Healthy", timeout=60
)
machine.wait_until_succeeds(

View File

@@ -0,0 +1,179 @@
{
lib,
pkgs,
...
}:
let
mockGrafana = ./mock-grafana-server.py;
script = ../services/llama-cpp-annotations.py;
python = pkgs.python3;
mockLlamaCpp = pkgs.writeText "mock-llama-cpp-server.py" ''
import http.server, json, sys, os
PORT = int(sys.argv[1])
STATE_FILE = sys.argv[2]
if not os.path.exists(STATE_FILE):
with open(STATE_FILE, "w") as f:
json.dump([{"id": 0, "is_processing": False, "next_token": {"n_decoded": 0}}], f)
class Handler(http.server.BaseHTTPRequestHandler):
def log_message(self, fmt, *args):
pass
def _json(self, code, body):
data = json.dumps(body).encode()
self.send_response(code)
self.send_header("Content-Type", "application/json")
self.end_headers()
self.wfile.write(data)
def do_GET(self):
if self.path == "/slots":
with open(STATE_FILE) as f:
slots = json.load(f)
self._json(200, slots)
else:
self.send_response(404)
self.end_headers()
def do_POST(self):
if self.path == "/test/set-slots":
length = int(self.headers.get("Content-Length", 0))
body = json.loads(self.rfile.read(length)) if length else []
with open(STATE_FILE, "w") as f:
json.dump(body, f)
self._json(200, {"ok": True})
else:
self.send_response(404)
self.end_headers()
http.server.HTTPServer(("127.0.0.1", PORT), Handler).serve_forever()
'';
in
pkgs.testers.runNixOSTest {
name = "llama-cpp-annotations";
nodes.machine =
{ pkgs, ... }:
{
environment.systemPackages = [
pkgs.python3
pkgs.curl
];
};
testScript = ''
import json
import time
GRAFANA_PORT = 13000
LLAMA_PORT = 16688
ANNOTS_FILE = "/tmp/annotations.json"
SLOTS_FILE = "/tmp/llama-slots.json"
STATE_FILE = "/tmp/llama-annot-state.json"
PYTHON = "${python}/bin/python3"
MOCK_GRAFANA = "${mockGrafana}"
MOCK_LLAMA = "${mockLlamaCpp}"
SCRIPT = "${script}"
def read_annotations():
out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'")
return json.loads(out.strip())
def set_slots(slots):
machine.succeed(
f"curl -sf -X POST http://127.0.0.1:{LLAMA_PORT}/test/set-slots "
f"-H 'Content-Type: application/json' "
f"-d '{json.dumps(slots)}'"
)
start_all()
machine.wait_for_unit("multi-user.target")
with subtest("Start mock services"):
machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
machine.succeed(
f"systemd-run --unit=mock-grafana {PYTHON} {MOCK_GRAFANA} {GRAFANA_PORT} {ANNOTS_FILE}"
)
machine.succeed(
f"echo '[{{\"id\": 0, \"is_processing\": false, \"next_token\": {{\"n_decoded\": 0}}}}]' > {SLOTS_FILE}"
)
machine.succeed(
f"systemd-run --unit=mock-llama {PYTHON} {MOCK_LLAMA} {LLAMA_PORT} {SLOTS_FILE}"
)
machine.wait_until_succeeds(
f"curl -sf http://127.0.0.1:{GRAFANA_PORT}/api/annotations -X POST "
f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id",
timeout=10,
)
machine.wait_until_succeeds(
f"curl -sf http://127.0.0.1:{LLAMA_PORT}/slots | grep -q is_processing",
timeout=10,
)
machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
with subtest("Start annotation service"):
machine.succeed(
f"systemd-run --unit=llama-annot "
f"--setenv=LLAMA_CPP_URL=http://127.0.0.1:{LLAMA_PORT} "
f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
f"--setenv=STATE_FILE={STATE_FILE} "
f"--setenv=POLL_INTERVAL=2 "
f"{PYTHON} {SCRIPT}"
)
time.sleep(3)
with subtest("No annotations when slots are idle"):
annots = read_annotations()
assert annots == [], f"Expected no annotations, got: {annots}"
with subtest("Annotation created when slot starts processing"):
set_slots([{"id": 0, "is_processing": True, "next_token": {"n_decoded": 0}}])
machine.wait_until_succeeds(
f"cat {ANNOTS_FILE} | {PYTHON} -c "
f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a else 1)\"",
timeout=15,
)
annots = read_annotations()
assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
assert "llama-cpp" in annots[0].get("tags", []), f"Missing tag: {annots[0]}"
assert "slot 0" in annots[0]["text"], f"Missing slot info: {annots[0]['text']}"
assert "timeEnd" not in annots[0], f"timeEnd should not be set: {annots[0]}"
with subtest("Annotation closed when slot stops processing"):
set_slots([{"id": 0, "is_processing": False, "next_token": {"n_decoded": 42}}])
machine.wait_until_succeeds(
f"cat {ANNOTS_FILE} | {PYTHON} -c "
f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a and 'timeEnd' in a[0] else 1)\"",
timeout=15,
)
annots = read_annotations()
assert len(annots) == 1, f"Expected 1, got: {annots}"
assert "timeEnd" in annots[0], f"timeEnd missing: {annots[0]}"
assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time"
assert "42 tokens" in annots[0].get("text", ""), f"Token count missing: {annots[0]}"
with subtest("State survives restart"):
set_slots([{"id": 0, "is_processing": True, "next_token": {"n_decoded": 0}}])
machine.wait_until_succeeds(
f"cat {ANNOTS_FILE} | {PYTHON} -c "
f"\"import sys,json; a=json.load(sys.stdin); exit(0 if len(a)==2 else 1)\"",
timeout=15,
)
machine.succeed("systemctl stop llama-annot || true")
time.sleep(1)
machine.succeed(
f"systemd-run --unit=llama-annot-2 "
f"--setenv=LLAMA_CPP_URL=http://127.0.0.1:{LLAMA_PORT} "
f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
f"--setenv=STATE_FILE={STATE_FILE} "
f"--setenv=POLL_INTERVAL=2 "
f"{PYTHON} {SCRIPT}"
)
time.sleep(4)
annots = read_annotations()
assert len(annots) == 2, f"Restart should not duplicate, got: {annots}"
'';
}

View File

@@ -28,8 +28,9 @@ in
# zfs scrub annotations test
zfsScrubAnnotationsTest = handleTest ./zfs-scrub-annotations.nix;
# xmrig auto-pause test
xmrigAutoPauseTest = handleTest ./xmrig-auto-pause.nix;
# llama-cpp annotation service test
llamaCppAnnotationsTest = handleTest ./llama-cpp-annotations.nix;
# ntfy alerts test
ntfyAlertsTest = handleTest ./ntfy-alerts.nix;

View File

@@ -1,206 +0,0 @@
{
pkgs,
...
}:
let
script = ../services/monero/xmrig-auto-pause.py;
python = pkgs.python3;
in
pkgs.testers.runNixOSTest {
name = "xmrig-auto-pause";
nodes.machine =
{ pkgs, ... }:
{
environment.systemPackages = [
pkgs.python3
pkgs.procps
];
# Mock xmrig as a nice'd sleep process that can be stopped/started.
systemd.services.xmrig = {
description = "Mock xmrig miner";
serviceConfig = {
ExecStart = "${pkgs.coreutils}/bin/sleep infinity";
Type = "simple";
Nice = 19;
};
wantedBy = [ "multi-user.target" ];
};
};
testScript = ''
import time
PYTHON = "${python}/bin/python3"
SCRIPT = "${script}"
# Tuned for test VMs (1-2 cores).
# POLL_INTERVAL=1 keeps detection latency low.
# GRACE_PERIOD=5 is long enough to verify "stays stopped" but short
# enough that the full test completes in reasonable time.
# CPU_STOP_THRESHOLD=20 catches a busy-loop on a 1-2 core VM (50-100%)
# without triggering from normal VM noise.
# CPU_RESUME_THRESHOLD=10 is the idle cutoff for a 1-2 core VM.
POLL_INTERVAL = "1"
GRACE_PERIOD = "5"
CPU_STOP_THRESHOLD = "20"
CPU_RESUME_THRESHOLD = "10"
STARTUP_COOLDOWN = "4"
STATE_DIR = "/tmp/xap-state"
def start_cpu_load(name):
"""Start a non-nice CPU burn as a transient systemd unit."""
machine.succeed(
f"systemd-run --unit={name} --property=Type=exec "
f"bash -c 'while true; do :; done'"
)
def stop_cpu_load(name):
machine.succeed(f"systemctl stop {name}")
def start_monitor(unit_name):
"""Start the auto-pause monitor as a transient unit."""
machine.succeed(
f"systemd-run --unit={unit_name} "
f"--setenv=POLL_INTERVAL={POLL_INTERVAL} "
f"--setenv=GRACE_PERIOD={GRACE_PERIOD} "
f"--setenv=CPU_STOP_THRESHOLD={CPU_STOP_THRESHOLD} "
f"--setenv=CPU_RESUME_THRESHOLD={CPU_RESUME_THRESHOLD} "
f"--setenv=STARTUP_COOLDOWN={STARTUP_COOLDOWN} "
f"--setenv=STATE_DIR={STATE_DIR} "
f"{PYTHON} {SCRIPT}"
)
# Monitor needs two consecutive polls to compute a CPU delta.
time.sleep(3)
# Monitor needs two consecutive polls to compute a CPU delta.
time.sleep(3)
start_all()
machine.wait_for_unit("multi-user.target")
machine.wait_for_unit("xmrig.service")
machine.succeed(f"mkdir -p {STATE_DIR}")
with subtest("Start auto-pause monitor"):
start_monitor("xmrig-auto-pause")
with subtest("xmrig stays running while system is idle"):
machine.succeed("systemctl is-active xmrig")
with subtest("xmrig stopped when CPU load appears"):
start_cpu_load("cpu-load")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
with subtest("xmrig remains stopped during grace period after load ends"):
stop_cpu_load("cpu-load")
# Load just stopped. Grace period is 5s. Check at 2s well within.
time.sleep(2)
machine.fail("systemctl is-active xmrig")
with subtest("xmrig resumes after grace period expires"):
# Already idle since previous subtest. Grace period (5s) plus
# detection delay (~2 polls) plus startup cooldown (4s) means
# xmrig should restart within ~12s.
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("Intermittent load does not cause flapping"):
# First load stop xmrig
start_cpu_load("cpu-load-1")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
stop_cpu_load("cpu-load-1")
# Brief idle gap shorter than grace period
time.sleep(2)
# Second load arrives before grace period expires
start_cpu_load("cpu-load-2")
time.sleep(3)
# xmrig must still be stopped
machine.fail("systemctl is-active xmrig")
stop_cpu_load("cpu-load-2")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("Sustained load keeps xmrig stopped"):
start_cpu_load("cpu-load-3")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# Stay busy longer than the grace period to prove continuous
# activity keeps xmrig stopped indefinitely.
time.sleep(8)
machine.fail("systemctl is-active xmrig")
stop_cpu_load("cpu-load-3")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
with subtest("External restart detected and re-stopped under load"):
# Put system under load so auto-pause stops xmrig.
start_cpu_load("cpu-load-4")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# Something external starts xmrig while load is active.
# The script should detect this and re-stop it.
machine.succeed("systemctl start xmrig")
machine.succeed("systemctl is-active xmrig")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
stop_cpu_load("cpu-load-4")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=20)
# --- State persistence and crash recovery ---
machine.succeed("systemctl stop xmrig-auto-pause")
with subtest("xmrig recovers after crash during startup cooldown"):
machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
start_monitor("xmrig-auto-pause-crash")
# Load -> xmrig stops
start_cpu_load("cpu-crash")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# End load -> xmrig restarts after grace period
stop_cpu_load("cpu-crash")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
# Kill xmrig immediately simulates crash during startup cooldown.
# The script should detect the failure when cooldown expires and
# re-enter the retry cycle.
machine.succeed("systemctl kill --signal=KILL xmrig")
machine.wait_until_fails("systemctl is-active xmrig", timeout=5)
# After cooldown + grace period + restart, xmrig should be back.
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
machine.succeed("systemctl stop xmrig-auto-pause-crash")
machine.succeed("systemctl reset-failed xmrig.service || true")
machine.succeed("systemctl start xmrig")
machine.wait_for_unit("xmrig.service")
with subtest("Script restart preserves pause state"):
machine.succeed(f"rm -rf {STATE_DIR} && mkdir -p {STATE_DIR}")
start_monitor("xmrig-auto-pause-persist")
# Load -> xmrig stops
start_cpu_load("cpu-persist")
machine.wait_until_fails("systemctl is-active xmrig", timeout=20)
# Kill the monitor while xmrig is paused (simulates script crash)
machine.succeed("systemctl stop xmrig-auto-pause-persist")
# State file must exist the monitor persisted the pause flag
machine.succeed(f"test -f {STATE_DIR}/paused")
# Start a fresh monitor instance (reads state file on startup)
start_monitor("xmrig-auto-pause-persist2")
# End load the new monitor should pick up the paused state
# and restart xmrig after the grace period
stop_cpu_load("cpu-persist")
machine.wait_until_succeeds("systemctl is-active xmrig", timeout=30)
# State file should be cleaned up after successful restart
machine.fail(f"test -f {STATE_DIR}/paused")
machine.succeed("systemctl stop xmrig-auto-pause-persist2")
'';
}

View File

@@ -23,7 +23,7 @@ let
esac
'';
script = ../services/grafana/zfs-scrub-annotations.sh;
script = ../services/zfs-scrub-annotations.sh;
python = pkgs.python3;
in
pkgs.testers.runNixOSTest {