Compare commits

..

4 Commits

Author SHA1 Message Date
ce1c335230 caddy: wildcard TLS via DNS-01 challenge + ddns-updater for Njalla
Some checks failed
Build and Deploy / deploy (push) Failing after 31m3s
Build Caddy with the caddy-dns/njalla plugin to enable DNS-01 ACME
challenges. This issues a single wildcard certificate for
*.sigkill.computer instead of per-subdomain certificates, reducing
Let's Encrypt API calls and certificate management overhead.

Add ddns-updater service (nixpkgs services.ddns-updater) configured
with Njalla provider to automatically update DNS records when the
server's public IP changes.
2026-04-09 19:54:57 -04:00
e9ce1ce0a2 grafana: replace llama-cpp-annotations daemon with prometheus query 2026-04-09 19:54:57 -04:00
a3a6700106 grafana: replace disk-usage-collector with prometheus-zfs-exporter
The custom disk-usage-collector shell script + minutely timer is replaced
by prometheus-zfs-exporter (pdf/zfs_exporter, packaged in nixpkgs as
services.prometheus.exporters.zfs). The exporter provides pool capacity
metrics (allocated/free/size) natively.

Partition metrics (/boot, /persistent, /nix) now use node_exporter's
built-in filesystem collector (node_filesystem_*_bytes) which already
runs and collects these metrics.

Also fixes a latent race condition in serviceMountWithZpool: the -mounts
service now orders after zfs-mount.service (which runs 'zfs mount -a'),
not just after pool import. Without this, the mount check could run
before datasets are actually mounted.
2026-04-09 19:54:57 -04:00
75319256f3 lib: add mkCaddyReverseProxy, mkFail2banJail, mkGrafanaAnnotationService, extractArrApiKey 2026-04-09 19:54:57 -04:00
39 changed files with 290 additions and 623 deletions

View File

@@ -71,6 +71,8 @@
./services/mollysocket.nix
./services/harmonia.nix
./services/ddns-updater.nix
];
# Hosts entries for CI/CD deploy targets

View File

@@ -46,6 +46,20 @@
group = "caddy";
};
# Njalla API token (NJALLA_API_TOKEN=...) for Caddy DNS-01 challenge
njalla-api-token-env = {
file = ../secrets/njalla-api-token-env.age;
mode = "0400";
owner = "caddy";
group = "caddy";
};
# ddns-updater config.json with Njalla provider credentials
ddns-updater-config = {
file = ../secrets/ddns-updater-config.age;
mode = "0400";
};
jellyfin-api-key = {
file = ../secrets/jellyfin-api-key.age;
mode = "0400";

View File

@@ -59,8 +59,12 @@ inputs.nixpkgs.lib.extend (
{ pkgs, config, ... }:
{
systemd.services."${serviceName}-mounts" = {
wants = [ "zfs.target" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
after = lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
wants = [
"zfs.target"
"zfs-mount.service"
]
++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
after = [ "zfs-mount.service" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
before = [ "${serviceName}.service" ];
serviceConfig = {
@@ -176,5 +180,108 @@ inputs.nixpkgs.lib.extend (
after = [ "${serviceName}-file-perms.service" ];
};
};
# Creates a Caddy virtualHost with reverse_proxy to a local or VPN-namespaced port.
# Use `subdomain` for "<name>.${domain}" or `domain` for a full custom domain.
# Exactly one of `subdomain` or `domain` must be provided.
mkCaddyReverseProxy =
{
subdomain ? null,
domain ? null,
port,
auth ? false,
vpn ? false,
}:
assert (subdomain != null) != (domain != null);
{ config, ... }:
let
vhostDomain = if domain != null then domain else "${subdomain}.${service_configs.https.domain}";
upstream =
if vpn then
"${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString port}"
else
":${builtins.toString port}";
in
{
services.caddy.virtualHosts."${vhostDomain}".extraConfig = lib.concatStringsSep "\n" (
lib.optional auth "import ${config.age.secrets.caddy_auth.path}" ++ [ "reverse_proxy ${upstream}" ]
);
};
# Creates a fail2ban jail with systemd journal backend.
# Covers the common pattern: journal-based detection, http/https ports, default thresholds.
mkFail2banJail =
{
name,
unitName ? "${name}.service",
failregex,
}:
{ ... }:
{
services.fail2ban.jails.${name} = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
inherit failregex;
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=${unitName}";
};
};
};
# Creates a hardened Grafana annotation daemon service.
# Provides DynamicUser, sandboxing, state directory, and GRAFANA_URL/STATE_FILE automatically.
mkGrafanaAnnotationService =
{
name,
description,
script,
after ? [ ],
environment ? { },
loadCredential ? null,
}:
{
systemd.services."${name}-annotations" = {
inherit description;
after = [
"network.target"
"grafana.service"
]
++ after;
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${script}";
Restart = "always";
RestartSec = "10s";
DynamicUser = true;
StateDirectory = "${name}-annotations";
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
}
// lib.optionalAttrs (loadCredential != null) {
LoadCredential = loadCredential;
};
environment = {
GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
STATE_FILE = "/var/lib/${name}-annotations/state.json";
}
// environment;
};
};
# Shell command to extract an API key from an *arr config.xml file.
# Returns a string suitable for $() command substitution in shell scripts.
extractArrApiKey =
configXmlPath: "${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${configXmlPath}";
}
)

Binary file not shown.

Binary file not shown.

View File

@@ -189,6 +189,10 @@ rec {
port = 9563;
proto = "tcp";
};
prometheus_zfs = {
port = 9134;
proto = "tcp";
};
harmonia = {
port = 5500;
proto = "tcp";

View File

@@ -1,5 +1,6 @@
{
pkgs,
lib,
service_configs,
...
}:
@@ -12,7 +13,6 @@ let
curl = "${pkgs.curl}/bin/curl";
jq = "${pkgs.jq}/bin/jq";
grep = "${pkgs.gnugrep}/bin/grep";
# Max items to search per cycle per category (missing + cutoff) per app
maxPerCycle = 5;
@@ -20,8 +20,8 @@ let
searchScript = pkgs.writeShellScript "arr-search" ''
set -euo pipefail
RADARR_KEY=$(${grep} -oP '(?<=<ApiKey>)[^<]+' ${radarrConfig})
SONARR_KEY=$(${grep} -oP '(?<=<ApiKey>)[^<]+' ${sonarrConfig})
RADARR_KEY=$(${lib.extractArrApiKey radarrConfig})
SONARR_KEY=$(${lib.extractArrApiKey sonarrConfig})
search_radarr() {
local endpoint="$1"

View File

@@ -16,6 +16,11 @@
(lib.serviceFilePerms "bazarr" [
"Z ${service_configs.bazarr.dataDir} 0700 ${config.services.bazarr.user} ${config.services.bazarr.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "bazarr";
port = service_configs.ports.private.bazarr.port;
auth = true;
})
];
services.bazarr = {
@@ -23,11 +28,6 @@
listenPort = service_configs.ports.private.bazarr.port;
};
services.caddy.virtualHosts."bazarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.bazarr.port}
'';
users.users.${config.services.bazarr.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -13,6 +13,10 @@
(lib.serviceFilePerms "jellyseerr" [
"Z ${service_configs.jellyseerr.configDir} 0700 jellyseerr jellyseerr"
])
(lib.mkCaddyReverseProxy {
subdomain = "jellyseerr";
port = service_configs.ports.private.jellyseerr.port;
})
];
services.jellyseerr = {
@@ -36,8 +40,4 @@
users.groups.jellyseerr = { };
services.caddy.virtualHosts."jellyseerr.${service_configs.https.domain}".extraConfig = ''
# import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.jellyseerr.port}
'';
}

View File

@@ -14,6 +14,12 @@
(lib.serviceFilePerms "prowlarr" [
"Z ${service_configs.prowlarr.dataDir} 0700 prowlarr prowlarr"
])
(lib.mkCaddyReverseProxy {
subdomain = "prowlarr";
port = service_configs.ports.private.prowlarr.port;
auth = true;
vpn = true;
})
];
services.prowlarr = {
@@ -51,8 +57,4 @@
ExecStart = lib.mkForce "${lib.getExe pkgs.prowlarr} -nobrowser -data=${service_configs.prowlarr.dataDir}";
};
services.caddy.virtualHosts."prowlarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}
'';
}

View File

@@ -16,6 +16,11 @@
(lib.serviceFilePerms "radarr" [
"Z ${service_configs.radarr.dataDir} 0700 ${config.services.radarr.user} ${config.services.radarr.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "radarr";
port = service_configs.ports.private.radarr.port;
auth = true;
})
];
services.radarr = {
@@ -25,11 +30,6 @@
settings.update.mechanism = "external";
};
services.caddy.virtualHosts."radarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.radarr.port}
'';
users.users.${config.services.radarr.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -13,8 +13,8 @@ let
# Runs as root (via + prefix) after the NixOS module writes config.json.
# Extracts API keys from radarr/sonarr config.xml and injects them via jq.
injectApiKeys = pkgs.writeShellScript "recyclarr-inject-api-keys" ''
RADARR_KEY=$(${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${radarrConfig})
SONARR_KEY=$(${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${sonarrConfig})
RADARR_KEY=$(${lib.extractArrApiKey radarrConfig})
SONARR_KEY=$(${lib.extractArrApiKey sonarrConfig})
${pkgs.jq}/bin/jq \
--arg rk "$RADARR_KEY" \
--arg sk "$SONARR_KEY" \

View File

@@ -16,6 +16,11 @@
(lib.serviceFilePerms "sonarr" [
"Z ${service_configs.sonarr.dataDir} 0700 ${config.services.sonarr.user} ${config.services.sonarr.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "sonarr";
port = service_configs.ports.private.sonarr.port;
auth = true;
})
];
systemd.tmpfiles.rules = [
@@ -31,11 +36,6 @@
settings.update.mechanism = "external";
};
services.caddy.virtualHosts."sonarr.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${builtins.toString service_configs.ports.private.sonarr.port}
'';
users.users.${config.services.sonarr.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -8,6 +8,12 @@
{
imports = [
(lib.vpnNamespaceOpenPort service_configs.ports.private.bitmagnet.port "bitmagnet")
(lib.mkCaddyReverseProxy {
subdomain = "bitmagnet";
port = service_configs.ports.private.bitmagnet.port;
auth = true;
vpn = true;
})
];
services.bitmagnet = {
@@ -24,8 +30,4 @@
};
};
services.caddy.virtualHosts."bitmagnet.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.bitmagnet.port}
'';
}

View File

@@ -13,6 +13,10 @@
(lib.serviceFilePerms "vaultwarden" [
"Z ${service_configs.vaultwarden.path} 0700 vaultwarden vaultwarden"
])
(lib.mkFail2banJail {
name = "vaultwarden";
failregex = ''^.*Username or password is incorrect\. Try again\. IP: <HOST>\..*$'';
})
];
services.vaultwarden = {
@@ -38,18 +42,4 @@
}
'';
# Protect Vaultwarden login from brute force attacks
services.fail2ban.jails.vaultwarden = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
failregex = ''^.*Username or password is incorrect\. Try again\. IP: <HOST>\..*$'';
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=vaultwarden.service";
};
};
}

View File

@@ -56,9 +56,19 @@ in
enable = true;
email = "titaniumtown@proton.me";
# Enable on-demand TLS for old domain redirects
# Certs are issued dynamically when subdomains are accessed
# Build with Njalla DNS provider for DNS-01 ACME challenges (wildcard certs)
package = pkgs.caddy.withPlugins {
plugins = [ "github.com/caddy-dns/njalla@v0.0.0-20250823094507-f709141f1fe6" ];
hash = "sha256-rrOAR6noTDpV/I/hZXxhz0OXVJKu0mFQRq87RUrpmzw=";
};
globalConfig = ''
# Wildcard cert for *.${newDomain} via DNS-01 challenge
acme_dns njalla {
api_token {env.NJALLA_API_TOKEN}
}
# On-demand TLS for old domain redirects
on_demand_tls {
ask http://localhost:9123/check
}
@@ -106,6 +116,9 @@ in
};
};
# Inject Njalla API token for DNS-01 challenge
systemd.services.caddy.serviceConfig.EnvironmentFile = config.age.secrets.njalla-api-token-env.path;
systemd.tmpfiles.rules = [
"d ${config.services.caddy.dataDir} 700 ${config.services.caddy.user} ${config.services.caddy.group}"
];

14
services/ddns-updater.nix Normal file
View File

@@ -0,0 +1,14 @@
{
config,
...
}:
{
services.ddns-updater = {
enable = true;
environment = {
PERIOD = "5m";
# ddns-updater reads config from this path at runtime
CONFIG_FILEPATH = config.age.secrets.ddns-updater-config.path;
};
};
}

View File

@@ -6,6 +6,13 @@
...
}:
{
imports = [
(lib.mkCaddyReverseProxy {
domain = service_configs.firefox_syncserver.domain;
port = service_configs.ports.private.firefox_syncserver.port;
})
];
services.firefox-syncserver = {
enable = true;
database = {
@@ -33,7 +40,4 @@
];
};
services.caddy.virtualHosts."${service_configs.firefox_syncserver.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.firefox_syncserver.port}
'';
}

View File

@@ -11,6 +11,14 @@
(lib.serviceFilePerms "gitea" [
"Z ${config.services.gitea.stateDir} 0700 ${config.services.gitea.user} ${config.services.gitea.group}"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.gitea.domain;
port = service_configs.ports.private.gitea.port;
})
(lib.mkFail2banJail {
name = "gitea";
failregex = "^.*Failed authentication attempt for .* from <HOST>:.*$";
})
];
services.gitea = {
@@ -41,10 +49,6 @@
};
};
services.caddy.virtualHosts."${service_configs.gitea.domain}".extraConfig = ''
reverse_proxy :${builtins.toString config.services.gitea.settings.server.HTTP_PORT}
'';
services.postgresql = {
ensureDatabases = [ config.services.gitea.user ];
ensureUsers = [
@@ -58,18 +62,4 @@
services.openssh.settings.AllowUsers = [ config.services.gitea.user ];
# Protect Gitea login from brute force attacks
services.fail2ban.jails.gitea = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
failregex = "^.*Failed authentication attempt for .* from <HOST>:.*$";
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=gitea.service";
};
};
}

View File

@@ -50,15 +50,12 @@ let
}
{
name = "LLM Requests";
datasource = {
type = "grafana";
uid = "-- Grafana --";
};
datasource = promDs;
enable = true;
iconColor = "purple";
showIn = 0;
type = "tags";
tags = [ "llama-cpp" ];
expr = "llamacpp:requests_processing > 0";
step = "10s";
titleFormat = "LLM inference";
}
];
@@ -613,13 +610,13 @@ let
targets = [
{
datasource = promDs;
expr = "zpool_used_bytes{pool=\"tank\"} / zpool_size_bytes{pool=\"tank\"} * 100";
expr = "zfs_pool_allocated_bytes{pool=\"tank\"} / zfs_pool_size_bytes{pool=\"tank\"} * 100";
legendFormat = "tank";
refId = "A";
}
{
datasource = promDs;
expr = "zpool_used_bytes{pool=\"hdds\"} / zpool_size_bytes{pool=\"hdds\"} * 100";
expr = "zfs_pool_allocated_bytes{pool=\"hdds\"} / zfs_pool_size_bytes{pool=\"hdds\"} * 100";
legendFormat = "hdds";
refId = "B";
}
@@ -653,19 +650,19 @@ let
targets = [
{
datasource = promDs;
expr = "partition_used_bytes{mount=\"/boot\"} / partition_size_bytes{mount=\"/boot\"} * 100";
expr = "(node_filesystem_size_bytes{mountpoint=\"/boot\"} - node_filesystem_avail_bytes{mountpoint=\"/boot\"}) / node_filesystem_size_bytes{mountpoint=\"/boot\"} * 100";
legendFormat = "/boot";
refId = "A";
}
{
datasource = promDs;
expr = "partition_used_bytes{mount=\"/persistent\"} / partition_size_bytes{mount=\"/persistent\"} * 100";
expr = "(node_filesystem_size_bytes{mountpoint=\"/persistent\"} - node_filesystem_avail_bytes{mountpoint=\"/persistent\"}) / node_filesystem_size_bytes{mountpoint=\"/persistent\"} * 100";
legendFormat = "/persistent";
refId = "B";
}
{
datasource = promDs;
expr = "partition_used_bytes{mount=\"/nix\"} / partition_size_bytes{mount=\"/nix\"} * 100";
expr = "(node_filesystem_size_bytes{mountpoint=\"/nix\"} - node_filesystem_avail_bytes{mountpoint=\"/nix\"}) / node_filesystem_size_bytes{mountpoint=\"/nix\"} * 100";
legendFormat = "/nix";
refId = "C";
}

View File

@@ -5,8 +5,6 @@
./dashboard.nix
./exporters.nix
./jellyfin-annotations.nix
./disk-usage-collector.nix
./llama-cpp-annotations.nix
./zfs-scrub-annotations.nix
];
}

View File

@@ -1,38 +0,0 @@
{
config,
pkgs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
diskUsageCollector = pkgs.writeShellApplication {
name = "disk-usage-collector";
runtimeInputs = with pkgs; [
coreutils
gawk
config.boot.zfs.package
util-linux # for mountpoint
];
text = builtins.readFile ./disk-usage-collector.sh;
};
in
lib.mkIf config.services.grafana.enable {
systemd.services.disk-usage-collector = {
description = "Collect ZFS pool and partition usage metrics for Prometheus";
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe diskUsageCollector;
};
environment.TEXTFILE = "${textfileDir}/disk-usage.prom";
};
systemd.timers.disk-usage-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "minutely";
RandomizedDelaySec = "10s";
};
};
}

View File

@@ -1,44 +0,0 @@
#!/usr/bin/env bash
# Collects ZFS pool utilization and boot partition usage for Prometheus textfile collector
set -euo pipefail
TEXTFILE="${TEXTFILE:?TEXTFILE env required}"
TMP="${TEXTFILE}.$$"
{
echo '# HELP zpool_size_bytes Total size of ZFS pool in bytes'
echo '# TYPE zpool_size_bytes gauge'
echo '# HELP zpool_used_bytes Used space in ZFS pool in bytes'
echo '# TYPE zpool_used_bytes gauge'
echo '# HELP zpool_free_bytes Free space in ZFS pool in bytes'
echo '# TYPE zpool_free_bytes gauge'
# -Hp: scripting mode, parseable, bytes
zpool list -Hp -o name,size,alloc,free | while IFS=$'\t' read -r name size alloc free; do
echo "zpool_size_bytes{pool=\"${name}\"} ${size}"
echo "zpool_used_bytes{pool=\"${name}\"} ${alloc}"
echo "zpool_free_bytes{pool=\"${name}\"} ${free}"
done
echo '# HELP partition_size_bytes Total size of partition in bytes'
echo '# TYPE partition_size_bytes gauge'
echo '# HELP partition_used_bytes Used space on partition in bytes'
echo '# TYPE partition_used_bytes gauge'
echo '# HELP partition_free_bytes Free space on partition in bytes'
echo '# TYPE partition_free_bytes gauge'
# Boot drive partitions: /boot (ESP), /persistent, /nix
# Use df with 1K blocks and convert to bytes
for mount in /boot /persistent /nix; do
if mountpoint -q "$mount" 2>/dev/null; then
read -r size used avail _ <<< "$(df -k --output=size,used,avail "$mount" | tail -1)"
size_b=$((size * 1024))
used_b=$((used * 1024))
avail_b=$((avail * 1024))
echo "partition_size_bytes{mount=\"${mount}\"} ${size_b}"
echo "partition_used_bytes{mount=\"${mount}\"} ${used_b}"
echo "partition_free_bytes{mount=\"${mount}\"} ${avail_b}"
fi
done
} > "$TMP"
mv "$TMP" "$TEXTFILE"

View File

@@ -12,6 +12,11 @@
(lib.serviceFilePerms "grafana" [
"Z ${service_configs.grafana.dir} 0700 grafana grafana"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.grafana.domain;
port = service_configs.ports.private.grafana.port;
auth = true;
})
];
services.grafana = {
@@ -85,11 +90,6 @@
};
};
services.caddy.virtualHosts."${service_configs.grafana.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${toString service_configs.ports.private.grafana.port}
'';
services.postgresql = {
ensureDatabases = [ "grafana" ];
ensureUsers = [

View File

@@ -1,40 +1,18 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) {
systemd.services.jellyfin-annotations = {
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) (
lib.mkGrafanaAnnotationService {
name = "jellyfin";
description = "Jellyfin stream annotation service for Grafana";
after = [
"network.target"
"grafana.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./jellyfin-annotations.py}";
Restart = "always";
RestartSec = "10s";
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
DynamicUser = true;
StateDirectory = "jellyfin-annotations";
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
};
script = ./jellyfin-annotations.py;
environment = {
JELLYFIN_URL = "http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}";
GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
STATE_FILE = "/var/lib/jellyfin-annotations/state.json";
POLL_INTERVAL = "30";
};
};
}
loadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
}
)

View File

@@ -1,39 +0,0 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
lib.mkIf (config.services.grafana.enable && config.services.llama-cpp.enable) {
systemd.services.llama-cpp-annotations = {
description = "LLM request annotation service for Grafana";
after = [
"grafana.service"
"llama-cpp.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-annotations.py}";
Restart = "always";
RestartSec = "10s";
DynamicUser = true;
StateDirectory = "llama-cpp-annotations";
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
RestrictAddressFamilies = [
"AF_INET"
"AF_INET6"
];
MemoryDenyWriteExecute = true;
};
environment = {
GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
STATE_FILE = "/var/lib/llama-cpp-annotations/state.json";
POLL_INTERVAL = "5";
CPU_THRESHOLD = "50";
};
};
}

View File

@@ -1,155 +0,0 @@
#!/usr/bin/env python3
"""
Grafana annotation service for llama-cpp inference requests.
Monitors llama-server CPU usage via /proc. Creates a Grafana annotation
when inference starts (CPU spikes), closes it when inference ends.
"""
import glob
import json
import os
import sys
import time
import urllib.request
GRAFANA_URL = os.environ.get("GRAFANA_URL", "http://127.0.0.1:3000")
STATE_FILE = os.environ.get("STATE_FILE", "/var/lib/llama-cpp-annotations/state.json")
POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "5"))
CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "50"))
def find_llama_pid():
for path in glob.glob("/proc/[0-9]*/comm"):
try:
with open(path) as f:
if f.read().strip() == "llama-server":
return int(path.split("/")[2])
except (OSError, ValueError):
continue
return None
def get_cpu_times(pid):
try:
with open(f"/proc/{pid}/stat") as f:
fields = f.read().split(")")[-1].split()
return int(fields[11]) + int(fields[12])
except (OSError, IndexError, ValueError):
return None
def http_json(method, url, body=None):
data = json.dumps(body).encode() if body is not None else None
req = urllib.request.Request(
url,
data=data,
headers={"Content-Type": "application/json", "Accept": "application/json"},
method=method,
)
with urllib.request.urlopen(req, timeout=5) as resp:
return json.loads(resp.read())
def load_state():
try:
with open(STATE_FILE) as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError):
return {}
def save_state(state):
os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
tmp = STATE_FILE + ".tmp"
with open(tmp, "w") as f:
json.dump(state, f)
os.replace(tmp, STATE_FILE)
def grafana_post(text, start_ms):
try:
result = http_json(
"POST",
f"{GRAFANA_URL}/api/annotations",
{"time": start_ms, "text": text, "tags": ["llama-cpp"]},
)
return result.get("id")
except Exception as e:
print(f"Error posting annotation: {e}", file=sys.stderr)
return None
def grafana_close(grafana_id, end_ms, text=None):
try:
body = {"timeEnd": end_ms}
if text is not None:
body["text"] = text
http_json(
"PATCH",
f"{GRAFANA_URL}/api/annotations/{grafana_id}",
body,
)
except Exception as e:
print(f"Error closing annotation {grafana_id}: {e}", file=sys.stderr)
def main():
state = load_state()
prev_ticks = None
prev_time = None
hz = os.sysconf("SC_CLK_TCK")
while True:
now_ms = int(time.time() * 1000)
pid = find_llama_pid()
if pid is None:
prev_ticks = None
prev_time = None
time.sleep(POLL_INTERVAL)
continue
ticks = get_cpu_times(pid)
now = time.monotonic()
if ticks is None or prev_ticks is None or prev_time is None:
prev_ticks = ticks
prev_time = now
time.sleep(POLL_INTERVAL)
continue
dt = now - prev_time
if dt <= 0:
prev_ticks = ticks
prev_time = now
time.sleep(POLL_INTERVAL)
continue
cpu_pct = ((ticks - prev_ticks) / hz) / dt * 100
prev_ticks = ticks
prev_time = now
busy = cpu_pct > CPU_THRESHOLD
if busy and "active" not in state:
grafana_id = grafana_post("LLM request", now_ms)
if grafana_id is not None:
state["active"] = {
"grafana_id": grafana_id,
"start_ms": now_ms,
}
save_state(state)
elif not busy and "active" in state:
info = state.pop("active")
duration_s = (now_ms - info["start_ms"]) / 1000
text = f"LLM request ({duration_s:.1f}s)"
grafana_close(info["grafana_id"], now_ms, text)
save_state(state)
time.sleep(POLL_INTERVAL)
if __name__ == "__main__":
main()

View File

@@ -44,6 +44,12 @@ in
listenAddress = "127.0.0.1";
apcupsdAddress = "127.0.0.1:3551";
};
zfs = {
enable = true;
port = service_configs.ports.private.prometheus_zfs.port;
listenAddress = "127.0.0.1";
};
};
scrapeConfigs = [
@@ -89,6 +95,12 @@ in
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
];
}
{
job_name = "zfs";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_zfs.port}" ]; }
];
}
];
};

View File

@@ -16,6 +16,15 @@
(lib.serviceFilePerms "immich-server" [
"Z ${config.services.immich.mediaLocation} 0770 ${config.services.immich.user} ${config.services.immich.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "immich";
port = service_configs.ports.private.immich.port;
})
(lib.mkFail2banJail {
name = "immich";
unitName = "immich-server.service";
failregex = "^.*Failed login attempt for user .* from ip address <HOST>.*$";
})
];
services.immich = {
@@ -29,10 +38,6 @@
};
};
services.caddy.virtualHosts."immich.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${builtins.toString config.services.immich.port}
'';
environment.systemPackages = with pkgs; [
immich-go
];
@@ -42,18 +47,4 @@
"render"
];
# Protect Immich login from brute force attacks
services.fail2ban.jails.immich = {
enabled = true;
settings = {
backend = "systemd";
port = "http,https";
# defaults: maxretry=5, findtime=10m, bantime=10m
};
filter.Definition = {
failregex = "^.*Failed login attempt for user .* from ip address <HOST>.*$";
ignoreregex = "";
journalmatch = "_SYSTEMD_UNIT=immich-server.service";
};
};
}

View File

@@ -13,6 +13,13 @@ let
modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
in
{
imports = [
(lib.mkCaddyReverseProxy {
subdomain = "llm";
port = service_configs.ports.private.llama_cpp.port;
})
];
services.llama-cpp = {
enable = true;
model = toString (
@@ -94,10 +101,4 @@ in
+ " ${utils.escapeSystemdExecArgs cfg.extraFlags}"
);
# Auth handled by llama-cpp --api-key-file (Bearer token).
# No caddy_auth — the API key is the auth layer, and caddy_auth's basic
# auth would block Bearer-only clients like oh-my-pi.
services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${toString config.services.llama-cpp.port}
'';
}

View File

@@ -12,6 +12,10 @@
(lib.serviceFilePerms "continuwuity" [
"Z /var/lib/private/continuwuity 0770 ${config.services.matrix-continuwuity.user} ${config.services.matrix-continuwuity.group}"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.matrix.domain;
port = service_configs.ports.private.matrix.port;
})
];
services.matrix-continuwuity = {
@@ -53,10 +57,6 @@
respond /.well-known/matrix/client `{"m.server":{"base_url":"https://${service_configs.matrix.domain}"},"m.homeserver":{"base_url":"https://${service_configs.matrix.domain}"},"org.matrix.msc3575.proxy":{"base_url":"https://${config.services.matrix-continuwuity.settings.global.server_name}"},"org.matrix.msc4143.rtc_foci":[{"type":"livekit","livekit_service_url":"https://${service_configs.livekit.domain}"}]}`
'';
services.caddy.virtualHosts."${service_configs.matrix.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.matrix.port}
'';
# Exact duplicate for federation port
services.caddy.virtualHosts."${service_configs.matrix.domain}:${builtins.toString service_configs.ports.public.matrix_federation.port}".extraConfig =
config.services.caddy.virtualHosts."${service_configs.matrix.domain}".extraConfig;

View File

@@ -12,6 +12,10 @@
(lib.serviceFilePerms "ntfy-sh" [
"Z /var/lib/private/ntfy-sh 0700 ${config.services.ntfy-sh.user} ${config.services.ntfy-sh.group}"
])
(lib.mkCaddyReverseProxy {
domain = service_configs.ntfy.domain;
port = service_configs.ports.private.ntfy.port;
})
];
services.ntfy-sh = {
@@ -27,8 +31,4 @@
};
};
services.caddy.virtualHosts."${service_configs.ntfy.domain}".extraConfig = ''
reverse_proxy :${builtins.toString service_configs.ports.private.ntfy.port}
'';
}

View File

@@ -27,6 +27,12 @@ in
"z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.TempPath} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
"Z ${config.services.qbittorrent.profileDir} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "torrent";
port = service_configs.ports.private.torrent.port;
auth = true;
vpn = true;
})
];
services.qbittorrent = {
@@ -156,11 +162,6 @@ in
_: path: "d ${path} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group} -"
) service_configs.torrent.categories;
services.caddy.virtualHosts."torrent.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString config.services.qbittorrent.webuiPort}
'';
users.users.${config.services.qbittorrent.user}.extraGroups = [
service_configs.media_group
];

View File

@@ -19,6 +19,10 @@
"Z ${service_configs.slskd.downloads} 0750 ${config.services.slskd.user} music"
"Z ${service_configs.slskd.incomplete} 0750 ${config.services.slskd.user} music"
])
(lib.mkCaddyReverseProxy {
subdomain = "soulseek";
port = service_configs.ports.private.soulseek_web.port;
})
];
users.groups."music" = { };
@@ -58,11 +62,6 @@
users.users.${config.services.jellyfin.user}.extraGroups = [ "music" ];
users.users.${username}.extraGroups = [ "music" ];
# doesn't work with auth????
services.caddy.virtualHosts."soulseek.${service_configs.https.domain}".extraConfig = ''
reverse_proxy :${builtins.toString config.services.slskd.settings.web.port}
'';
networking.firewall.allowedTCPPorts = [
service_configs.ports.public.soulseek_listen.port
];

View File

@@ -17,6 +17,11 @@
"Z ${service_configs.syncthing.signalBackupDir} 0750 ${config.services.syncthing.user} ${config.services.syncthing.group}"
"Z ${service_configs.syncthing.grayjayBackupDir} 0750 ${config.services.syncthing.user} ${config.services.syncthing.group}"
])
(lib.mkCaddyReverseProxy {
subdomain = "syncthing";
port = service_configs.ports.private.syncthing_gui.port;
auth = true;
})
];
services.syncthing = {
@@ -49,9 +54,4 @@
];
};
services.caddy.virtualHosts."syncthing.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${toString service_configs.ports.private.syncthing_gui.port}
'';
}

View File

@@ -10,6 +10,11 @@
(lib.serviceMountWithZpool "trilium-server" service_configs.zpool_ssds [
(service_configs.services_dir + "/trilium")
])
(lib.mkCaddyReverseProxy {
subdomain = "notes";
port = service_configs.ports.private.trilium.port;
auth = true;
})
];
services.trilium-server = {
@@ -19,8 +24,4 @@
dataDir = service_configs.trilium.dataDir;
};
services.caddy.virtualHosts."notes.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${toString service_configs.ports.private.trilium.port}
'';
}

View File

@@ -1,132 +0,0 @@
{
pkgs,
...
}:
let
mockGrafana = ./mock-grafana-server.py;
script = ../services/grafana/llama-cpp-annotations.py;
python = pkgs.python3;
mockLlamaProcess = ./mock-llama-server-proc.py;
in
pkgs.testers.runNixOSTest {
name = "llama-cpp-annotations";
nodes.machine =
{ pkgs, ... }:
{
environment.systemPackages = [
pkgs.python3
pkgs.curl
pkgs.procps
];
};
testScript = ''
import json
import time
GRAFANA_PORT = 13000
ANNOTS_FILE = "/tmp/annotations.json"
LLAMA_STATE = "/tmp/llama-state.txt"
STATE_FILE = "/tmp/llama-annot-state.json"
PYTHON = "${python}/bin/python3"
MOCK_GRAFANA = "${mockGrafana}"
MOCK_LLAMA = "${mockLlamaProcess}"
SCRIPT = "${script}"
def read_annotations():
out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'")
return json.loads(out.strip())
def set_busy():
machine.succeed(f"echo busy > {LLAMA_STATE}")
def set_idle():
machine.succeed(f"echo idle > {LLAMA_STATE}")
start_all()
machine.wait_for_unit("multi-user.target")
with subtest("Start mock services"):
machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
machine.succeed(
f"systemd-run --unit=mock-grafana {PYTHON} {MOCK_GRAFANA} {GRAFANA_PORT} {ANNOTS_FILE}"
)
machine.succeed(
f"systemd-run --unit=mock-llama {PYTHON} {MOCK_LLAMA} {LLAMA_STATE}"
)
machine.wait_until_succeeds(
f"curl -sf http://127.0.0.1:{GRAFANA_PORT}/api/annotations -X POST "
f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id",
timeout=10,
)
machine.wait_until_succeeds(
"pgrep -x llama-server",
timeout=10,
)
machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
with subtest("Start annotation service"):
machine.succeed(
f"systemd-run --unit=llama-annot "
f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
f"--setenv=STATE_FILE={STATE_FILE} "
f"--setenv=POLL_INTERVAL=2 "
f"--setenv=CPU_THRESHOLD=10 "
f"{PYTHON} {SCRIPT}"
)
time.sleep(5)
with subtest("No annotations when idle"):
annots = read_annotations()
assert annots == [], f"Expected no annotations, got: {annots}"
with subtest("Annotation created when llama-server becomes busy"):
set_busy()
machine.wait_until_succeeds(
f"cat {ANNOTS_FILE} | {PYTHON} -c "
f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a else 1)\"",
timeout=20,
)
annots = read_annotations()
assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
assert "llama-cpp" in annots[0].get("tags", []), f"Missing tag: {annots[0]}"
assert "LLM request" in annots[0]["text"], f"Missing text: {annots[0]['text']}"
assert "timeEnd" not in annots[0], f"timeEnd should not be set: {annots[0]}"
with subtest("Annotation closed when llama-server becomes idle"):
set_idle()
machine.wait_until_succeeds(
f"cat {ANNOTS_FILE} | {PYTHON} -c "
f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a and 'timeEnd' in a[0] else 1)\"",
timeout=20,
)
annots = read_annotations()
assert len(annots) == 1, f"Expected 1, got: {annots}"
assert "timeEnd" in annots[0], f"timeEnd missing: {annots[0]}"
assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time"
assert "s)" in annots[0].get("text", ""), f"Duration missing: {annots[0]}"
with subtest("State survives restart"):
set_busy()
machine.wait_until_succeeds(
f"cat {ANNOTS_FILE} | {PYTHON} -c "
f"\"import sys,json; a=json.load(sys.stdin); exit(0 if len(a)==2 else 1)\"",
timeout=20,
)
machine.succeed("systemctl stop llama-annot || true")
time.sleep(1)
machine.succeed(
f"systemd-run --unit=llama-annot-2 "
f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
f"--setenv=STATE_FILE={STATE_FILE} "
f"--setenv=POLL_INTERVAL=2 "
f"--setenv=CPU_THRESHOLD=10 "
f"{PYTHON} {SCRIPT}"
)
time.sleep(6)
annots = read_annotations()
assert len(annots) == 2, f"Restart should not duplicate, got: {annots}"
'';
}

View File

@@ -1,42 +0,0 @@
#!/usr/bin/env python3
"""
Mock llama-server process for NixOS VM tests.
Sets /proc/self/comm to "llama-server" via prctl so that monitoring scripts
(llama-cpp-annotations, llama-cpp-xmrig-pause) can discover this process
the same way they discover the real one.
Usage: python3 mock-llama-server-proc.py <state-file>
The state file controls behavior:
"busy" -> burn CPU in a tight loop (simulates prompt processing / inference)
"idle" -> sleep (simulates waiting for requests)
"""
import ctypes
import ctypes.util
import sys
import time
STATE_FILE = sys.argv[1]
# PR_SET_NAME = 15, sets /proc/self/comm
libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
libc.prctl(15, b"llama-server", 0, 0, 0)
with open(STATE_FILE, "w") as f:
f.write("idle")
while True:
try:
with open(STATE_FILE) as f:
state = f.read().strip()
except Exception:
state = "idle"
if state == "busy":
end = time.monotonic() + 0.1
while time.monotonic() < end:
_ = sum(range(10000))
else:
time.sleep(0.5)

View File

@@ -28,9 +28,6 @@ in
# zfs scrub annotations test
zfsScrubAnnotationsTest = handleTest ./zfs-scrub-annotations.nix;
# llama-cpp tests
llamaCppAnnotationsTest = handleTest ./llama-cpp-annotations.nix;
# xmrig auto-pause test
xmrigAutoPauseTest = handleTest ./xmrig-auto-pause.nix;
# ntfy alerts test