llama-cpp: add gemma 4 graph fix

2026-04-07 22:59:59 -04:00
63 changed files with 862 additions and 1669 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -112,7 +112,6 @@ Each service file in `services/` follows this structure:
 - **Hugepages**: Services needing large pages declare their budget in `service-configs.nix` under `hugepages_2m.services`. The kernel sysctl is set automatically from the total.
 - **Domain**: Primary domain is `sigkill.computer`. Old domain `gardling.com` redirects automatically.
 - **Hardened kernel**: Uses `_hardened` kernel. Security-sensitive defaults apply.
- **PostgreSQL as central database**: All services that support PostgreSQL MUST use it instead of embedded databases (H2, SQLite, etc.). Connect via Unix socket with peer auth when possible (JDBC services can use junixsocket). The PostgreSQL instance is declared in `services/postgresql.nix` with ZFS-backed storage. Use `ensureDatabases`/`ensureUsers` to auto-create databases and roles.

 ### Test Pattern
 Tests use `pkgs.testers.runNixOSTest` (NixOS VM tests):
--- a/README.md
+++ b/README.md
@@ -1,15 +0,0 @@
-# server-config (archived)
-
-This repository has been unified with its sibling `dotfiles` into
-[**titaniumtown/nixos**](https://git.sigkill.computer/titaniumtown/nixos).
-
-The final pre-unification commit is tagged `final-before-unify`.
-
-See the new repo's `README.md` and `AGENTS.md` for:
-
- current flake layout (hosts: mreow, yarn, muffin)
- deploy workflow
- git-crypt / agenix setup
-
-Do **not** push new commits here — CI has been disabled, and muffin's harmonia
-binary-cache no longer serves paths from `/var/lib/dotfiles-deploy/`.
--- a/configuration.nix
+++ b/configuration.nix
@@ -46,7 +46,7 @@

    ./services/soulseek.nix

-    # ./services/llama-cpp.nix
+    ./services/llama-cpp.nix
    ./services/trilium.nix

    ./services/ups.nix
@@ -71,8 +71,6 @@
    ./services/mollysocket.nix

    ./services/harmonia.nix
-
-    ./services/ddns-updater.nix
  ];

  # Hosts entries for CI/CD deploy targets
@@ -133,10 +131,8 @@
  boot.kernel.sysctl."vm.nr_hugepages" = service_configs.hugepages_2m.total_pages;

  boot = {
-    # 6.12 LTS until 2027-03. Kernel 6.18 causes a reproducible ZFS deadlock
-    # in dbuf_evict due to page allocator changes (__free_frozen_pages).
-    # https://github.com/openzfs/zfs/issues/18426
-    kernelPackages = pkgs.linuxPackages_6_12;
+    # 6.12 LTS until 2026
+    kernelPackages = pkgs.linuxPackages_6_12_hardened;

    loader = {
      # Use the systemd-boot EFI boot loader.
--- a/flake.lock
+++ b/flake.lock
@@ -27,17 +27,16 @@
    },
    "arr-init": {
      "inputs": {
-        "flake-utils": "flake-utils",
        "nixpkgs": [
          "nixpkgs"
        ]
      },
      "locked": {
-        "lastModified": 1776401121,
-        "narHash": "sha256-BELV1YMBuLL0aQNQ3SLvSLq8YN5h2o1jcrwz1+Zt32Q=",
+        "lastModified": 1774681523,
+        "narHash": "sha256-K49RohIwbgzVeOdStfVDO83qy5K5ZLKWk4EsHJKj/k4=",
        "ref": "refs/heads/main",
-        "rev": "6dde2a3e0d087208b8084b61113707c5533c4c2d",
-        "revCount": 19,
+        "rev": "f8475f6cb4d4d4df99002d07cf9583fb33b87876",
+        "revCount": 11,
        "type": "git",
        "url": "ssh://gitea@git.gardling.com/titaniumtown/arr-init"
      },
@@ -194,25 +193,7 @@
    },
    "flake-utils": {
      "inputs": {
-        "systems": "systems_2"
-      },
-      "locked": {
-        "lastModified": 1731533236,
-        "narHash": "sha256-l0KFg5HjrsfsO/JpG+r7fRrqm12kzFHyUHqHCVpMMbI=",
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "rev": "11707dc2f618dd54ca8739b309ec4fc024de578b",
-        "type": "github"
-      },
-      "original": {
-        "owner": "numtide",
-        "repo": "flake-utils",
-        "type": "github"
-      }
-    },
-    "flake-utils_2": {
-      "inputs": {
-        "systems": "systems_6"
+        "systems": "systems_5"
      },
      "locked": {
        "lastModified": 1731533236,
@@ -323,11 +304,11 @@
        "rust-overlay": "rust-overlay"
      },
      "locked": {
-        "lastModified": 1776248416,
-        "narHash": "sha256-TC6yzbCAex1pDfqUZv9u8fVm8e17ft5fNrcZ0JRDOIQ=",
+        "lastModified": 1775510693,
+        "narHash": "sha256-gZfJ07j/oOciDi8mF/V8QTm7YCeDcusNSMZzBFi8OUM=",
        "owner": "nix-community",
        "repo": "lanzaboote",
-        "rev": "18e9e64bae15b828c092658335599122a6db939b",
+        "rev": "3fe0ae8cb285e0ad101a9675f4190d455fb05e85",
        "type": "github"
      },
      "original": {
@@ -344,11 +325,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1776301820,
-        "narHash": "sha256-Yr3JRZ05PNmX4sR2Ak7e0jT+oCQgTAAML7FUoyTmitk=",
+        "lastModified": 1775614184,
+        "narHash": "sha256-OYwr36LLVIeEqccN1mJ2k6vCsFocboCQJnbtne415Ig=",
        "owner": "TheTom",
        "repo": "llama-cpp-turboquant",
-        "rev": "1073622985bb68075472474b4b0fdfcdabcfc9d0",
+        "rev": "eea498c42716519e58baf2d9600d2e2b41839255",
        "type": "github"
      },
      "original": {
@@ -384,14 +365,14 @@
        "nixpkgs": [
          "nixpkgs"
        ],
-        "systems": "systems_4"
+        "systems": "systems_3"
      },
      "locked": {
-        "lastModified": 1776310483,
-        "narHash": "sha256-xMFl+umxGmo5VEgcZcXT5Dk9sXU5WyTRz1Olpywr/60=",
+        "lastModified": 1775531897,
+        "narHash": "sha256-3NIpnV1HxBCwi00iMvj9KcqXkM0VNA72KABj8g0cFFs=",
        "owner": "Infinidoge",
        "repo": "nix-minecraft",
-        "rev": "74abd91054e2655d6c392428a27e5d27edd5e6bf",
+        "rev": "8c7693880cb861e60adeab5480f02dc3e7a390f6",
        "type": "github"
      },
      "original": {
@@ -418,11 +399,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1776221942,
-        "narHash": "sha256-FbQAeVNi7G4v3QCSThrSAAvzQTmrmyDLiHNPvTF2qFM=",
+        "lastModified": 1775305101,
+        "narHash": "sha256-/74n1oQPtKG52Yw41cbToxspxHbYz6O3vi+XEw16Qe8=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "1766437c5509f444c1b15331e82b8b6a9b967000",
+        "rev": "36a601196c4ebf49e035270e10b2d103fe39076b",
        "type": "github"
      },
      "original": {
@@ -522,7 +503,7 @@
        "nixpkgs": [
          "nixpkgs"
        ],
-        "systems": "systems_5"
+        "systems": "systems_4"
      },
      "locked": {
        "lastModified": 1771989937,
@@ -643,11 +624,11 @@
        ]
      },
      "locked": {
-        "lastModified": 1776306894,
-        "narHash": "sha256-l4N3O1cfXiQCHJGspAkg6WlZyOFBTbLXhi8Anf8jB0g=",
+        "lastModified": 1775444042,
+        "narHash": "sha256-cg19ipIlZaLYgs/5ZPFcDDuOcZlGzfprB5xS4x7bVM4=",
        "owner": "nix-community",
        "repo": "srvos",
-        "rev": "01d98209264c78cb323b636d7ab3fe8e7a8b60c7",
+        "rev": "64c9cc6a274dac7d08c4d53494ffa4acf906e287",
        "type": "github"
      },
      "original": {
@@ -731,29 +712,14 @@
        "type": "github"
      }
    },
-    "systems_6": {
-      "locked": {
-        "lastModified": 1681028828,
-        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
-        "owner": "nix-systems",
-        "repo": "default",
-        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
-        "type": "github"
-      },
-      "original": {
-        "owner": "nix-systems",
-        "repo": "default",
-        "type": "github"
-      }
-    },
    "trackerlist": {
      "flake": false,
      "locked": {
-        "lastModified": 1776290985,
-        "narHash": "sha256-eNWDOLBA0vk1TiKqse71siIAgLycjvBFDw35eAtnUPs=",
+        "lastModified": 1775599784,
+        "narHash": "sha256-ZapxbiFEYjJV2nhdowHQ/8+c8Jd5fpBIEKDiPEmyNgI=",
        "owner": "ngosang",
        "repo": "trackerslist",
-        "rev": "9bb380b3c2a641a3289f92dedef97016f2e47f36",
+        "rev": "6cc71b5b65349081bb713719f5142c200438a327",
        "type": "github"
      },
      "original": {
@@ -764,7 +730,7 @@
    },
    "utils": {
      "inputs": {
-        "systems": "systems_3"
+        "systems": "systems_2"
      },
      "locked": {
        "lastModified": 1731533236,
@@ -813,7 +779,7 @@
    },
    "ytbn-graphing-software": {
      "inputs": {
-        "flake-utils": "flake-utils_2",
+        "flake-utils": "flake-utils",
        "nixpkgs": "nixpkgs_3",
        "rust-overlay": "rust-overlay_2"
      },
--- a/modules/age-secrets.nix
+++ b/modules/age-secrets.nix
@@ -46,22 +46,6 @@
      group = "caddy";
    };

-    # Njalla API token (NJALLA_API_TOKEN=...) for Caddy DNS-01 challenge
-    njalla-api-token-env = {
-      file = ../secrets/njalla-api-token-env.age;
-      mode = "0400";
-      owner = "caddy";
-      group = "caddy";
-    };
-
-    # ddns-updater config.json with Njalla provider credentials
-    ddns-updater-config = {
-      file = ../secrets/ddns-updater-config.age;
-      mode = "0400";
-      owner = "ddns-updater";
-      group = "ddns-updater";
-    };
-
    jellyfin-api-key = {
      file = ../secrets/jellyfin-api-key.age;
      mode = "0400";
@@ -168,15 +152,6 @@
      group = "gitea-runner";
    };

-    # Git-crypt symmetric key for the new unified nixos repo (Phase 5 of the unify migration).
-    # Added additively here so muffin can decrypt nixos's secrets once Phase 6 cuts CI over.
-    git-crypt-key-nixos = {
-      file = ../secrets/git-crypt-key-nixos.age;
-      mode = "0400";
-      owner = "gitea-runner";
-      group = "gitea-runner";
-    };
-
    # Gitea Actions runner registration token
    gitea-runner-token = {
      file = ../secrets/gitea-runner-token.age;
--- a/modules/hardware.nix
+++ b/modules/hardware.nix
@@ -12,7 +12,7 @@ let
    parent=''${1%%[0-9]*}
    dev="/sys/block/$parent"
    [ -d "$dev/queue/iosched" ] || exit 0
-    echo 500 > "$dev/queue/iosched/read_expire"
+    echo 15000 > "$dev/queue/iosched/read_expire"
    echo 15000 > "$dev/queue/iosched/write_expire"
    echo 128 > "$dev/queue/iosched/fifo_batch"
    echo 16 > "$dev/queue/iosched/writes_starved"
@@ -36,17 +36,11 @@ in
  hardware.cpu.amd.updateMicrocode = true;
  hardware.enableRedistributableFirmware = true;

-  # HDD I/O tuning for torrent seeding workload (high-concurrency random reads)
-  # sharing the pool with latency-sensitive sequential reads (Jellyfin playback).
+  # HDD I/O tuning for torrent seeding workload (high-concurrency random reads).
  #
  # mq-deadline sorts requests into elevator sweeps, reducing seek distance.
-  # read_expire=500ms keeps reads bounded so a Jellyfin segment can't queue for
-  # seconds behind a torrent burst; write_expire=15s lets the scheduler batch
-  # writes for coalescence (torrent writes are async and tolerate delay).
-  # The bulk of read coalescence already happens above the scheduler via ZFS
-  # aggregation (zfs_vdev_aggregation_limit=4M, read_gap_limit=128K,
-  # async_read_max=32), so the scheduler deadline only needs to be large enough
-  # to keep the elevator sweep coherent -- 500ms is plenty on rotational disks.
+  # Aggressive deadlines (15s) let the scheduler accumulate more ops before dispatching,
+  # maximizing coalescence — latency is irrelevant since torrent peers tolerate 30-60s.
  # fifo_batch=128 keeps sweeps long; writes_starved=16 heavily favors reads.
  # 4 MiB readahead matches libtorrent piece extent affinity for sequential prefetch.
  #
--- a/modules/lib.nix
+++ b/modules/lib.nix
@@ -59,12 +59,8 @@ inputs.nixpkgs.lib.extend (
      { pkgs, config, ... }:
      {
        systemd.services."${serviceName}-mounts" = {
-          wants = [
-            "zfs.target"
-            "zfs-mount.service"
-          ]
-          ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
-          after = [ "zfs-mount.service" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
+          wants = [ "zfs.target" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
+          after = lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
          before = [ "${serviceName}.service" ];

          serviceConfig = {
@@ -180,108 +176,5 @@ inputs.nixpkgs.lib.extend (
          after = [ "${serviceName}-file-perms.service" ];
        };
      };
-    # Creates a Caddy virtualHost with reverse_proxy to a local or VPN-namespaced port.
-    # Use `subdomain` for "<name>.${domain}" or `domain` for a full custom domain.
-    # Exactly one of `subdomain` or `domain` must be provided.
-    mkCaddyReverseProxy =
-      {
-        subdomain ? null,
-        domain ? null,
-        port,
-        auth ? false,
-        vpn ? false,
-      }:
-      assert (subdomain != null) != (domain != null);
-      { config, ... }:
-      let
-        vhostDomain = if domain != null then domain else "${subdomain}.${service_configs.https.domain}";
-        upstream =
-          if vpn then
-            "${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString port}"
-          else
-            ":${builtins.toString port}";
-      in
-      {
-        services.caddy.virtualHosts."${vhostDomain}".extraConfig = lib.concatStringsSep "\n" (
-          lib.optional auth "import ${config.age.secrets.caddy_auth.path}" ++ [ "reverse_proxy ${upstream}" ]
-        );
-      };
-
-    # Creates a fail2ban jail with systemd journal backend.
-    # Covers the common pattern: journal-based detection, http/https ports, default thresholds.
-    mkFail2banJail =
-      {
-        name,
-        unitName ? "${name}.service",
-        failregex,
-      }:
-      { ... }:
-      {
-        services.fail2ban.jails.${name} = {
-          enabled = true;
-          settings = {
-            backend = "systemd";
-            port = "http,https";
-            # defaults: maxretry=5, findtime=10m, bantime=10m
-          };
-          filter.Definition = {
-            inherit failregex;
-            ignoreregex = "";
-            journalmatch = "_SYSTEMD_UNIT=${unitName}";
-          };
-        };
-      };
-
-    # Creates a hardened Grafana annotation daemon service.
-    # Provides DynamicUser, sandboxing, state directory, and GRAFANA_URL/STATE_FILE automatically.
-    mkGrafanaAnnotationService =
-      {
-        name,
-        description,
-        script,
-        after ? [ ],
-        environment ? { },
-        loadCredential ? null,
-      }:
-      {
-        systemd.services."${name}-annotations" = {
-          inherit description;
-          after = [
-            "network.target"
-            "grafana.service"
-          ]
-          ++ after;
-          wantedBy = [ "multi-user.target" ];
-          serviceConfig = {
-            ExecStart = "${pkgs.python3}/bin/python3 ${script}";
-            Restart = "always";
-            RestartSec = "10s";
-            DynamicUser = true;
-            StateDirectory = "${name}-annotations";
-            NoNewPrivileges = true;
-            ProtectSystem = "strict";
-            ProtectHome = true;
-            PrivateTmp = true;
-            RestrictAddressFamilies = [
-              "AF_INET"
-              "AF_INET6"
-            ];
-            MemoryDenyWriteExecute = true;
-          }
-          // lib.optionalAttrs (loadCredential != null) {
-            LoadCredential = loadCredential;
-          };
-          environment = {
-            GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
-            STATE_FILE = "/var/lib/${name}-annotations/state.json";
-          }
-          // environment;
-        };
-      };
-
-    # Shell command to extract an API key from an *arr config.xml file.
-    # Returns a string suitable for $() command substitution in shell scripts.
-    extractArrApiKey =
-      configXmlPath: "${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${configXmlPath}";
  }
 )
--- a/modules/security.nix
+++ b/modules/security.nix
@@ -13,89 +13,12 @@
  # disable coredumps
  systemd.coredump.enable = false;

-  # Needed for Nix sandbox UID/GID mapping inside derivation builds.
-  # See https://github.com/NixOS/nixpkgs/issues/287194
+  # The hardened kernel defaults kernel.unprivileged_userns_clone to 0, which
+  # prevents the Nix sandbox from mapping UIDs/GIDs. Without this, any derivation
+  # that calls `id` in its build phase (e.g. logrotate checkPhase) fails when not
+  # served from the binary cache. See https://github.com/NixOS/nixpkgs/issues/287194
  security.unprivilegedUsernsClone = true;

-  # Disable kexec to prevent replacing the running kernel at runtime.
-  security.protectKernelImage = true;
-
-  # Kernel hardening boot parameters. These recover most of the runtime-
-  # configurable protections that the linux-hardened patchset provided.
-  boot.kernelParams = [
-    # Zero all page allocator pages on free / alloc. Prevents info leaks
-    # and use-after-free from seeing stale data. Modest CPU overhead.
-    "init_on_alloc=1"
-    "init_on_free=1"
-
-    # Prevent SLUB allocator from merging caches with similar size/flags.
-    # Keeps different kernel object types in separate slabs, making heap
-    # exploitation (type confusion, spray, use-after-free) significantly harder.
-    "slab_nomerge"
-
-    # Randomize order of pages returned by the buddy allocator.
-    "page_alloc.shuffle=1"
-
-    # Disable debugfs entirely (exposes kernel internals).
-    "debugfs=off"
-
-    # Disable legacy vsyscall emulation (unused by any modern glibc).
-    "vsyscall=none"
-
-    # Strict IOMMU TLB invalidation (no batching). Prevents DMA-capable
-    # devices from accessing stale mappings after unmap.
-    "iommu.strict=1"
-  ];
-
-  boot.kernel.sysctl = {
-    # Immediately reboot on kernel oops (don't leave a compromised
-    # kernel running). Negative value = reboot without delay.
-    "kernel.panic" = -1;
-
-    # Hide kernel pointers from all processes, including CAP_SYSLOG.
-    # Prevents info leaks used to defeat KASLR.
-    "kernel.kptr_restrict" = 2;
-
-    # Disable bpf() JIT compiler (eliminates JIT spray attack vector).
-    "net.core.bpf_jit_enable" = false;
-
-    # Disable ftrace (kernel function tracer) at runtime.
-    "kernel.ftrace_enabled" = false;
-
-    # Strict reverse-path filtering: drop packets arriving on an interface
-    # where the source address isn't routable back via that interface.
-    "net.ipv4.conf.all.rp_filter" = 1;
-    "net.ipv4.conf.default.rp_filter" = 1;
-    "net.ipv4.conf.all.log_martians" = true;
-    "net.ipv4.conf.default.log_martians" = true;
-
-    # Ignore ICMP redirects (prevents route table poisoning).
-    "net.ipv4.conf.all.accept_redirects" = false;
-    "net.ipv4.conf.all.secure_redirects" = false;
-    "net.ipv4.conf.default.accept_redirects" = false;
-    "net.ipv4.conf.default.secure_redirects" = false;
-    "net.ipv6.conf.all.accept_redirects" = false;
-    "net.ipv6.conf.default.accept_redirects" = false;
-
-    # Don't send ICMP redirects (we are not a router).
-    "net.ipv4.conf.all.send_redirects" = false;
-    "net.ipv4.conf.default.send_redirects" = false;
-
-    # Ignore broadcast ICMP (SMURF amplification mitigation).
-    "net.ipv4.icmp_echo_ignore_broadcasts" = true;
-
-    # Filesystem hardening: prevent hardlink/symlink-based attacks.
-    # protected_hardlinks/symlinks: block unprivileged creation of hard/symlinks
-    # to files the user doesn't own (prevents TOCTOU privilege escalation).
-    # protected_fifos/regular (level 2): restrict opening FIFOs and regular files
-    # in world-writable sticky directories to owner/group match only.
-    # Also required for systemd-tmpfiles to chmod hardlinked files.
-    "fs.protected_hardlinks" = true;
-    "fs.protected_symlinks" = true;
-    "fs.protected_fifos" = 2;
-    "fs.protected_regular" = 2;
-  };
-
  services = {
    dbus.implementation = "broker";
    /*
--- a/modules/zfs.nix
+++ b/modules/zfs.nix
@@ -1,39 +1,15 @@
 {
  config,
-  lib,
  service_configs,
  pkgs,
  ...
 }:
-let
-  # Total RAM in bytes (from /proc/meminfo: 65775836 KiB).
-  totalRamBytes = 65775836 * 1024;
-
-  # Hugepage reservations that the kernel carves out before ZFS can use them.
-  hugepages2mBytes = service_configs.hugepages_2m.total_pages * 2 * 1024 * 1024;
-  hugepages1gBytes = 3 * 1024 * 1024 * 1024; # 3x 1G pages for RandomX (xmrig.nix)
-  totalHugepageBytes = hugepages2mBytes + hugepages1gBytes;
-
-  # ARC max: 60% of RAM remaining after hugepages. Leaves headroom for
-  # application RSS (PostgreSQL, qBittorrent, Jellyfin, Grafana, etc.),
-  # kernel slabs, and page cache.
-  arcMaxBytes = (totalRamBytes - totalHugepageBytes) * 60 / 100;
-in
 {
-  boot.zfs.package = pkgs.zfs_2_4;
+  boot.zfs.package = pkgs.zfs;
  boot.initrd.kernelModules = [ "zfs" ];

  boot.kernelParams = [
-    # 120s TXG timeout: batch more dirty data per transaction group so the
-    # HDD pool (hdds) writes larger, sequential I/Os instead of many small syncs.
-    # This is a global setting (no per-pool control); the SSD pool (tank) syncs
-    # infrequently but handles it fine since SSDs don't suffer from seek overhead.
-    "zfs.zfs_txg_timeout=120"
-
-    # Cap ARC to prevent it from claiming memory reserved for hugepages.
-    # Without this, ZFS auto-sizes c_max to ~62 GiB on a 64 GiB system,
-    # ignoring the 11.5 GiB of hugepage reservations.
-    "zfs.zfs_arc_max=${toString arcMaxBytes}"
+    "zfs.zfs_txg_timeout=120" # longer TXG open time = larger sequential writes

    # vdev I/O scheduler: feed more concurrent reads to the block scheduler so
    # mq-deadline has a larger pool of requests to sort and merge into elevator sweeps.
--- a/patches/llamacpp/0003-gemma4-tokenizer-fix.patch
+++ b/patches/llamacpp/0003-gemma4-tokenizer-fix.patch
@@ -0,0 +1,88 @@
+From 320c29c2dbe3c8df56374a9ec19a7fe5c124d4f8 Mon Sep 17 00:00:00 2001
+From: Piotr Wilkin <piotr.wilkin@syndatis.com>
+Date: Tue, 7 Apr 2026 00:54:00 +0200
+Subject: [PATCH 1/2] YATF (Yet Another Tokenizer Fix) for Gemma 4. With tests!
+
+---
+ convert_hf_to_gguf_update.py       |   1 +
+ models/ggml-vocab-gemma-4.gguf     | Bin 0 -> 15776467 bytes
+ models/ggml-vocab-gemma-4.gguf.inp | 111 +++++++++++++++++++++++++++++
+ models/ggml-vocab-gemma-4.gguf.out |  46 ++++++++++++
+ src/llama-vocab.cpp                |  13 +++-
+ tests/CMakeLists.txt               |   1 +
+ 6 files changed, 170 insertions(+), 2 deletions(-)
+ create mode 100644 models/ggml-vocab-gemma-4.gguf
+ create mode 100644 models/ggml-vocab-gemma-4.gguf.inp
+ create mode 100644 models/ggml-vocab-gemma-4.gguf.out
+
+diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
+index 086f1c22863..f1d70d62e73 100755
+--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
+@@ -114,6 +114,7 @@ class TOKENIZER_TYPE(IntEnum):
+     {"name": "viking",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
+     {"name": "gemma",            "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
+     {"name": "gemma-2",          "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
+    {"name": "gemma-4",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", },
+     {"name": "jais",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
+     {"name": "jais-2",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", },
+     {"name": "t5",               "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
+diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
+index de9a9466bc7..e9e276ab999 100644
+--- a/src/llama-vocab.cpp
+++ b/src/llama-vocab.cpp
+@@ -658,9 +658,18 @@ struct llm_tokenizer_bpe_session {
+                 const auto token = vocab.text_to_token(str);
+ 
+                 if (token == LLAMA_TOKEN_NULL) {
+                    static const char * hex = "0123456789ABCDEF";
+                     for (auto j = str.begin(); j != str.end(); ++j) {
+-                        std::string byte_str(1, *j);
+-                        auto token_multibyte = vocab.text_to_token(byte_str);
+                        llama_token token_multibyte = LLAMA_TOKEN_NULL;
+                        if (tokenizer.byte_encode) {
+                            std::string byte_str(1, *j);
+                            token_multibyte = vocab.text_to_token(byte_str);
+                        } else {
+                            // For non-byte-encoded BPE (e.g. gemma-4), byte tokens use <0xXX> format
+                            const uint8_t ch = (uint8_t)*j;
+                            const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
+                            token_multibyte = vocab.text_to_token(buf);
+                        }
+                         if (token_multibyte != LLAMA_TOKEN_NULL) {
+                             output.push_back(token_multibyte);
+                         }
+diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
+index 5e87c8b34e1..cd4bc5ef1d3 100644
+--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
+@@ -124,6 +124,7 @@ llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r         ARGS ${PROJE
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder    ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-coder.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm      ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-llm.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon            ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
+llama_test(test-tokenizer-0 NAME test-tokenizer-0-gemma-4           ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gemma-4.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2             ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe         ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
+ llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm         ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
+
+From 0e98596dec124c6968132ef042c21ccdb20d1304 Mon Sep 17 00:00:00 2001
+From: Piotr Wilkin <piotr.wilkin@syndatis.com>
+Date: Tue, 7 Apr 2026 00:58:08 +0200
+Subject: [PATCH 2/2] Remove unnecessary hash  from update script.
+
+---
+ convert_hf_to_gguf_update.py | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py
+index f1d70d62e73..086f1c22863 100755
+--- a/convert_hf_to_gguf_update.py
+++ b/convert_hf_to_gguf_update.py
+@@ -114,7 +114,6 @@ class TOKENIZER_TYPE(IntEnum):
+     {"name": "viking",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
+     {"name": "gemma",            "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
+     {"name": "gemma-2",          "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
+-    {"name": "gemma-4",          "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/google/gemma-4-E2B-it", },
+     {"name": "jais",             "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
+     {"name": "jais-2",           "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inceptionai/Jais-2-8B-Chat", },
+     {"name": "t5",               "tokt": TOKENIZER_TYPE.UGM, "repo": "https://huggingface.co/google-t5/t5-small", },
--- a/patches/llamacpp/0004-gemma4-graph-fix.patch
+++ b/patches/llamacpp/0004-gemma4-graph-fix.patch
@@ -0,0 +1,24 @@
+From b934a8ca49f9e764fa21d45ff2ce1168a3a7c914 Mon Sep 17 00:00:00 2001
+From: Georgi Gerganov <ggerganov@gmail.com>
+Date: Mon, 6 Apr 2026 11:50:22 +0300
+Subject: [PATCH] models : set gemma 4 FFN MoE prec to F32
+
+---
+ src/llama-graph.cpp | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp
+index 0e7d96ca10d..aa8a35721fa 100644
+--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
+@@ -1185,8 +1185,8 @@ ggml_tensor * llm_graph_context::build_ffn(
+ 
+     if (down) {
+         cur = build_lora_mm(down, cur);
+-        if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE || arch == LLM_ARCH_JAIS2) {
+-            // GLM4, GLM4_MOE, and JAIS2 seem to have numerical issues with half-precision accumulators
+        if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE || arch == LLM_ARCH_JAIS2 || arch == LLM_ARCH_GEMMA4) {
+            // certain models seem to have numerical issues with half-precision accumulators
+             ggml_mul_mat_set_prec(cur, GGML_PREC_F32);
+         }
+     }
--- a/patches/nixpkgs/0002-jellyfin-add-declarative-network-xml-options.patch
+++ b/patches/nixpkgs/0002-jellyfin-add-declarative-network-xml-options.patch
@@ -1,443 +0,0 @@
-From f0582558f0a8b0ef543b3251c4a07afab89fde63 Mon Sep 17 00:00:00 2001
-From: Simon Gardling <titaniumtown@proton.me>
-Date: Fri, 17 Apr 2026 19:37:11 -0400
-Subject: [PATCH] nixos/jellyfin: add declarative network.xml options
-
-Adds services.jellyfin.network.* (baseUrl, ports, IPv4/6, LAN subnets,
-known proxies, remote IP filter, etc.) and services.jellyfin.forceNetworkConfig,
-mirroring the existing hardwareAcceleration / forceEncodingConfig pattern.
-
-Motivation: running Jellyfin behind a reverse proxy requires configuring
-KnownProxies (so the real client IP is extracted from X-Forwarded-For)
-and LocalNetworkSubnets (so LAN clients are correctly classified and not
-subject to RemoteClientBitrateLimit). These settings previously had no
-declarative option -- they could only be set via the web dashboard or
-by hand-editing network.xml, with no guarantee they would survive a
-reinstall or be consistent across deployments.
-
-Implementation:
- Adds a networkXmlText template alongside the existing encodingXmlText.
- Factors the force-vs-soft install logic out of preStart into a
-  small 'manage_config_xml' shell helper; encoding.xml and network.xml
-  now share the same install/backup semantics.
- Extends the VM test with a machineWithNetworkConfig node and a
-  subtest that verifies the declared values land in network.xml,
-  Jellyfin parses them at startup, and the backup-on-overwrite path
-  works (same shape as the existing 'Force encoding config' subtest).
---
- nixos/modules/services/misc/jellyfin.nix | 303 ++++++++++++++++++++---
- nixos/tests/jellyfin.nix                 |  50 ++++
- 2 files changed, 317 insertions(+), 36 deletions(-)
-
-diff --git a/nixos/modules/services/misc/jellyfin.nix b/nixos/modules/services/misc/jellyfin.nix
-index 5c08fc478e45..387da907c652 100644
--- a/nixos/modules/services/misc/jellyfin.nix
-+++ b/nixos/modules/services/misc/jellyfin.nix
-@@ -26,8 +26,10 @@ let
-     bool
-     enum
-     ints
-+    listOf
-     nullOr
-     path
-+    port
-     str
-     submodule
-     ;
-@@ -68,6 +70,41 @@ let
-     </EncodingOptions>
-   '';
-   encodingXmlFile = pkgs.writeText "encoding.xml" encodingXmlText;
-+  stringListToXml =
-+    tag: items:
-+    if items == [ ] then
-+      "<${tag} />"
-+    else
-+      "<${tag}>\n    ${
-+        concatMapStringsSep "\n    " (item: "<string>${escapeXML item}</string>") items
-+      }\n  </${tag}>";
-+  networkXmlText = ''
-+    <?xml version="1.0" encoding="utf-8"?>
-+    <NetworkConfiguration xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
-+      <BaseUrl>${escapeXML cfg.network.baseUrl}</BaseUrl>
-+      <EnableHttps>${boolToString cfg.network.enableHttps}</EnableHttps>
-+      <RequireHttps>${boolToString cfg.network.requireHttps}</RequireHttps>
-+      <InternalHttpPort>${toString cfg.network.internalHttpPort}</InternalHttpPort>
-+      <InternalHttpsPort>${toString cfg.network.internalHttpsPort}</InternalHttpsPort>
-+      <PublicHttpPort>${toString cfg.network.publicHttpPort}</PublicHttpPort>
-+      <PublicHttpsPort>${toString cfg.network.publicHttpsPort}</PublicHttpsPort>
-+      <AutoDiscovery>${boolToString cfg.network.autoDiscovery}</AutoDiscovery>
-+      <EnableUPnP>${boolToString cfg.network.enableUPnP}</EnableUPnP>
-+      <EnableIPv4>${boolToString cfg.network.enableIPv4}</EnableIPv4>
-+      <EnableIPv6>${boolToString cfg.network.enableIPv6}</EnableIPv6>
-+      <EnableRemoteAccess>${boolToString cfg.network.enableRemoteAccess}</EnableRemoteAccess>
-+      ${stringListToXml "LocalNetworkSubnets" cfg.network.localNetworkSubnets}
-+      ${stringListToXml "LocalNetworkAddresses" cfg.network.localNetworkAddresses}
-+      ${stringListToXml "KnownProxies" cfg.network.knownProxies}
-+      <IgnoreVirtualInterfaces>${boolToString cfg.network.ignoreVirtualInterfaces}</IgnoreVirtualInterfaces>
-+      ${stringListToXml "VirtualInterfaceNames" cfg.network.virtualInterfaceNames}
-+      <EnablePublishedServerUriByRequest>${boolToString cfg.network.enablePublishedServerUriByRequest}</EnablePublishedServerUriByRequest>
-+      ${stringListToXml "PublishedServerUriBySubnet" cfg.network.publishedServerUriBySubnet}
-+      ${stringListToXml "RemoteIPFilter" cfg.network.remoteIPFilter}
-+      <IsRemoteIPFilterBlacklist>${boolToString cfg.network.isRemoteIPFilterBlacklist}</IsRemoteIPFilterBlacklist>
-+    </NetworkConfiguration>
-+  '';
-+  networkXmlFile = pkgs.writeText "network.xml" networkXmlText;
-   codecListToType =
-     desc: list:
-     submodule {
-@@ -205,6 +242,196 @@ in
-         '';
-       };
- 
-+      network = {
-+        baseUrl = mkOption {
-+          type = str;
-+          default = "";
-+          example = "/jellyfin";
-+          description = ''
-+            Prefix added to Jellyfin's internal URLs when it sits behind a reverse proxy at a sub-path.
-+            Leave empty when Jellyfin is served at the root of its host.
-+          '';
-+        };
-+
-+        enableHttps = mkOption {
-+          type = bool;
-+          default = false;
-+          description = ''
-+            Serve HTTPS directly from Jellyfin. Usually unnecessary when terminating TLS in a reverse proxy.
-+          '';
-+        };
-+
-+        requireHttps = mkOption {
-+          type = bool;
-+          default = false;
-+          description = ''
-+            Redirect plaintext HTTP requests to HTTPS. Only meaningful when {option}`enableHttps` is true.
-+          '';
-+        };
-+
-+        internalHttpPort = mkOption {
-+          type = port;
-+          default = 8096;
-+          description = "TCP port Jellyfin binds for HTTP.";
-+        };
-+
-+        internalHttpsPort = mkOption {
-+          type = port;
-+          default = 8920;
-+          description = "TCP port Jellyfin binds for HTTPS. Only used when {option}`enableHttps` is true.";
-+        };
-+
-+        publicHttpPort = mkOption {
-+          type = port;
-+          default = 8096;
-+          description = "HTTP port Jellyfin advertises in server discovery responses and published URIs.";
-+        };
-+
-+        publicHttpsPort = mkOption {
-+          type = port;
-+          default = 8920;
-+          description = "HTTPS port Jellyfin advertises in server discovery responses and published URIs.";
-+        };
-+
-+        autoDiscovery = mkOption {
-+          type = bool;
-+          default = true;
-+          description = "Respond to LAN client auto-discovery broadcasts (UDP 7359).";
-+        };
-+
-+        enableUPnP = mkOption {
-+          type = bool;
-+          default = false;
-+          description = "Attempt to open the public ports on the router via UPnP.";
-+        };
-+
-+        enableIPv4 = mkOption {
-+          type = bool;
-+          default = true;
-+          description = "Listen on IPv4.";
-+        };
-+
-+        enableIPv6 = mkOption {
-+          type = bool;
-+          default = true;
-+          description = "Listen on IPv6.";
-+        };
-+
-+        enableRemoteAccess = mkOption {
-+          type = bool;
-+          default = true;
-+          description = ''
-+            Allow connections from clients outside the subnets listed in {option}`localNetworkSubnets`.
-+            When false, Jellyfin rejects non-local requests regardless of reverse proxy configuration.
-+          '';
-+        };
-+
-+        localNetworkSubnets = mkOption {
-+          type = listOf str;
-+          default = [ ];
-+          example = [
-+            "192.168.1.0/24"
-+            "10.0.0.0/8"
-+          ];
-+          description = ''
-+            CIDR ranges (or bare IPs) that Jellyfin classifies as the local network.
-+            Clients originating from these ranges -- as seen after {option}`knownProxies` X-Forwarded-For
-+            unwrapping -- are not subject to {option}`services.jellyfin` remote-client bitrate limits.
-+          '';
-+        };
-+
-+        localNetworkAddresses = mkOption {
-+          type = listOf str;
-+          default = [ ];
-+          example = [ "192.168.1.50" ];
-+          description = ''
-+            Specific interface addresses Jellyfin binds to. Leave empty to bind all interfaces.
-+          '';
-+        };
-+
-+        knownProxies = mkOption {
-+          type = listOf str;
-+          default = [ ];
-+          example = [ "127.0.0.1" ];
-+          description = ''
-+            Addresses of reverse proxies trusted to forward the real client IP via `X-Forwarded-For`.
-+            Without this, Jellyfin sees the proxy's address for every request and cannot apply
-+            {option}`localNetworkSubnets` classification to the true client.
-+          '';
-+        };
-+
-+        ignoreVirtualInterfaces = mkOption {
-+          type = bool;
-+          default = true;
-+          description = "Skip virtual network interfaces (matching {option}`virtualInterfaceNames`) during auto-bind.";
-+        };
-+
-+        virtualInterfaceNames = mkOption {
-+          type = listOf str;
-+          default = [ "veth" ];
-+          description = "Interface name prefixes treated as virtual when {option}`ignoreVirtualInterfaces` is true.";
-+        };
-+
-+        enablePublishedServerUriByRequest = mkOption {
-+          type = bool;
-+          default = false;
-+          description = ''
-+            Derive the server's public URI from the incoming request's Host header instead of any
-+            configured {option}`publishedServerUriBySubnet` entry.
-+          '';
-+        };
-+
-+        publishedServerUriBySubnet = mkOption {
-+          type = listOf str;
-+          default = [ ];
-+          example = [ "192.168.1.0/24=http://jellyfin.lan:8096" ];
-+          description = ''
-+            Per-subnet overrides for the URI Jellyfin advertises to clients, in `subnet=uri` form.
-+          '';
-+        };
-+
-+        remoteIPFilter = mkOption {
-+          type = listOf str;
-+          default = [ ];
-+          example = [ "203.0.113.0/24" ];
-+          description = ''
-+            IPs or CIDRs used as the allow- or denylist for remote access.
-+            Behaviour is controlled by {option}`isRemoteIPFilterBlacklist`.
-+          '';
-+        };
-+
-+        isRemoteIPFilterBlacklist = mkOption {
-+          type = bool;
-+          default = false;
-+          description = ''
-+            When true, {option}`remoteIPFilter` is a denylist; when false, it is an allowlist
-+            (and an empty list allows all remote addresses).
-+          '';
-+        };
-+      };
-+
-+      forceNetworkConfig = mkOption {
-+        type = bool;
-+        default = false;
-+        description = ''
-+          Whether to overwrite Jellyfin's `network.xml` configuration file on each service start.
-+
-+          When enabled, the network configuration specified in {option}`services.jellyfin.network`
-+          is applied on every service restart. A backup of the existing `network.xml` will be
-+          created at `network.xml.backup-$timestamp`.
-+
-+          ::: {.warning}
-+          Enabling this option means that any changes made to networking settings through
-+          Jellyfin's web dashboard will be lost on the next service restart. The NixOS configuration
-+          becomes the single source of truth for network settings.
-+          :::
-+
-+          When disabled (the default), the network configuration is only written if no `network.xml`
-+          exists yet. This allows settings to be changed through Jellyfin's web dashboard and persist
-+          across restarts, but means the NixOS configuration options will be ignored after the initial setup.
-+        '';
-+      };
-+
-       transcoding = {
-         maxConcurrentStreams = mkOption {
-           type = nullOr ints.positive;
-@@ -384,46 +611,50 @@ in
-         wants = [ "network-online.target" ];
-         wantedBy = [ "multi-user.target" ];
- 
-        preStart = mkIf cfg.hardwareAcceleration.enable (
-          ''
-            configDir=${escapeShellArg cfg.configDir}
-            encodingXml="$configDir/encoding.xml"
-          ''
-          + (
-            if cfg.forceEncodingConfig then
-              ''
-                if [[ -e $encodingXml ]]; then
-+        preStart =
-+          let
-+            # manage_config_xml <source> <destination> <force> <description>
-+            #
-+            # Installs a NixOS-declared XML config at <destination>, preserving
-+            # any existing file as a timestamped backup when <force> is true.
-+            # With <force>=false, leaves existing files untouched and warns if
-+            # the on-disk content differs from the declared content.
-+            helper = ''
-+              manage_config_xml() {
-+                local src="$1" dest="$2" force="$3" desc="$4"
-+                if [[ -e "$dest" ]]; then
-                   # this intentionally removes trailing newlines
-                  currentText="$(<"$encodingXml")"
-                  configuredText="$(<${encodingXmlFile})"
-                  if [[ $currentText == "$configuredText" ]]; then
-                    # don't need to do anything
-                    exit 0
-                  else
-                    encodingXmlBackup="$configDir/encoding.xml.backup-$(date -u +"%FT%H_%M_%SZ")"
-                    mv --update=none-fail -T "$encodingXml" "$encodingXmlBackup"
-+                  local currentText configuredText
-+                  currentText="$(<"$dest")"
-+                  configuredText="$(<"$src")"
-+                  if [[ "$currentText" == "$configuredText" ]]; then
-+                    return 0
-                   fi
-                fi
-                cp --update=none-fail -T ${encodingXmlFile} "$encodingXml"
-                chmod u+w "$encodingXml"
-              ''
-            else
-              ''
-                if [[ -e $encodingXml ]]; then
-                  # this intentionally removes trailing newlines
-                  currentText="$(<"$encodingXml")"
-                  configuredText="$(<${encodingXmlFile})"
-                  if [[ $currentText != "$configuredText" ]]; then
-                    echo "WARN: $encodingXml already exists and is different from the configured settings. transcoding options NOT applied." >&2
-                    echo "WARN: Set config.services.jellyfin.forceEncodingConfig = true to override." >&2
-+                  if [[ "$force" == true ]]; then
-+                    local backup
-+                    backup="$dest.backup-$(date -u +"%FT%H_%M_%SZ")"
-+                    mv --update=none-fail -T "$dest" "$backup"
-+                  else
-+                    echo "WARN: $dest already exists and is different from the configured settings. $desc options NOT applied." >&2
-+                    echo "WARN: Set the corresponding force*Config option to override." >&2
-+                    return 0
-                   fi
-                else
-                  cp --update=none-fail -T ${encodingXmlFile} "$encodingXml"
-                  chmod u+w "$encodingXml"
-                 fi
-              ''
-          )
-        );
-+                cp --update=none-fail -T "$src" "$dest"
-+                chmod u+w "$dest"
-+              }
-+              configDir=${escapeShellArg cfg.configDir}
-+            '';
-+          in
-+          (
-+            helper
-+            + optionalString cfg.hardwareAcceleration.enable ''
-+              manage_config_xml ${encodingXmlFile} "$configDir/encoding.xml" ${boolToString cfg.forceEncodingConfig} transcoding
-+            ''
-+            + ''
-+              manage_config_xml ${networkXmlFile} "$configDir/network.xml" ${boolToString cfg.forceNetworkConfig} network
-+            ''
-+          );
- 
-         # This is mostly follows: https://github.com/jellyfin/jellyfin/blob/master/fedora/jellyfin.service
-         # Upstream also disable some hardenings when running in LXC, we do the same with the isContainer option
-diff --git a/nixos/tests/jellyfin.nix b/nixos/tests/jellyfin.nix
-index 4896c13d4eca..0c9191960f78 100644
--- a/nixos/tests/jellyfin.nix
-+++ b/nixos/tests/jellyfin.nix
-@@ -63,6 +63,26 @@
-       environment.systemPackages = with pkgs; [ ffmpeg ];
-       virtualisation.diskSize = 3 * 1024;
-     };
-+
-+    machineWithNetworkConfig = {
-+      services.jellyfin = {
-+        enable = true;
-+        forceNetworkConfig = true;
-+        network = {
-+          localNetworkSubnets = [
-+            "192.168.1.0/24"
-+            "10.0.0.0/8"
-+          ];
-+          knownProxies = [ "127.0.0.1" ];
-+          enableUPnP = false;
-+          enableIPv6 = false;
-+          remoteIPFilter = [ "203.0.113.5" ];
-+          isRemoteIPFilterBlacklist = true;
-+        };
-+      };
-+      environment.systemPackages = with pkgs; [ ffmpeg ];
-+      virtualisation.diskSize = 3 * 1024;
-+    };
-   };
- 
-   # Documentation of the Jellyfin API: https://api.jellyfin.org/
-@@ -122,6 +142,36 @@
-           # Verify the new encoding.xml does not have the marker (was overwritten)
-           machineWithForceConfig.fail("grep -q 'MARKER' /var/lib/jellyfin/config/encoding.xml")
- 
-+      # Test forceNetworkConfig and network.xml generation
-+      with subtest("Force network config writes declared values and backs up on overwrite"):
-+          wait_for_jellyfin(machineWithNetworkConfig)
-+
-+          # Verify network.xml exists and contains the declared values
-+          machineWithNetworkConfig.succeed("test -f /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<string>192.168.1.0/24</string>' /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<string>10.0.0.0/8</string>' /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<string>127.0.0.1</string>' /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<string>203.0.113.5</string>' /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<IsRemoteIPFilterBlacklist>true</IsRemoteIPFilterBlacklist>' /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<EnableIPv6>false</EnableIPv6>' /var/lib/jellyfin/config/network.xml")
-+          machineWithNetworkConfig.succeed("grep -F '<EnableUPnP>false</EnableUPnP>' /var/lib/jellyfin/config/network.xml")
-+
-+          # Stop service before modifying config
-+          machineWithNetworkConfig.succeed("systemctl stop jellyfin.service")
-+
-+          # Plant a marker so we can prove the backup-and-overwrite path runs
-+          machineWithNetworkConfig.succeed("echo '<!-- NETMARKER -->' > /var/lib/jellyfin/config/network.xml")
-+
-+          # Restart the service to trigger the backup
-+          machineWithNetworkConfig.succeed("systemctl restart jellyfin.service")
-+          wait_for_jellyfin(machineWithNetworkConfig)
-+
-+          # Verify the marked content was preserved as a timestamped backup
-+          machineWithNetworkConfig.succeed("grep -q 'NETMARKER' /var/lib/jellyfin/config/network.xml.backup-*")
-+
-+          # Verify the new network.xml does not have the marker (was overwritten)
-+          machineWithNetworkConfig.fail("grep -q 'NETMARKER' /var/lib/jellyfin/config/network.xml")
-+
-       auth_header = 'MediaBrowser Client="NixOS Integration Tests", DeviceId="1337", Device="Apple II", Version="20.09"'
- 
- 
-- 
-2.53.0
-
--- a/secrets/ddns-updater-config.age
+++ b/secrets/ddns-updater-config.age
--- a/secrets/git-crypt-key-nixos.age
+++ b/secrets/git-crypt-key-nixos.age
--- a/secrets/njalla-api-token-env.age
+++ b/secrets/njalla-api-token-env.age
--- a/service-configs.nix
+++ b/service-configs.nix
@@ -81,12 +81,6 @@ rec {
        port = 6011;
        proto = "tcp";
      };
-      # Webhook receiver for the Jellyfin-qBittorrent monitor — Jellyfin pushes
-      # playback events here so throttling reacts without waiting for the poll.
-      jellyfin_qbittorrent_monitor_webhook = {
-        port = 9898;
-        proto = "tcp";
-      };
      bitmagnet = {
        port = 3333;
        proto = "tcp";
@@ -195,10 +189,6 @@ rec {
        port = 9563;
        proto = "tcp";
      };
-      prometheus_zfs = {
-        port = 9134;
-        proto = "tcp";
-      };
      harmonia = {
        port = 5500;
        proto = "tcp";
--- a/services/arr/arr-search.nix
+++ b/services/arr/arr-search.nix
@@ -1,6 +1,5 @@
 {
  pkgs,
-  lib,
  service_configs,
  ...
 }:
@@ -13,6 +12,7 @@ let

  curl = "${pkgs.curl}/bin/curl";
  jq = "${pkgs.jq}/bin/jq";
+  grep = "${pkgs.gnugrep}/bin/grep";

  # Max items to search per cycle per category (missing + cutoff) per app
  maxPerCycle = 5;
@@ -20,8 +20,8 @@ let
  searchScript = pkgs.writeShellScript "arr-search" ''
    set -euo pipefail

-    RADARR_KEY=$(${lib.extractArrApiKey radarrConfig})
-    SONARR_KEY=$(${lib.extractArrApiKey sonarrConfig})
+    RADARR_KEY=$(${grep} -oP '(?<=<ApiKey>)[^<]+' ${radarrConfig})
+    SONARR_KEY=$(${grep} -oP '(?<=<ApiKey>)[^<]+' ${sonarrConfig})

    search_radarr() {
      local endpoint="$1"
--- a/services/arr/bazarr.nix
+++ b/services/arr/bazarr.nix
@@ -16,11 +16,6 @@
    (lib.serviceFilePerms "bazarr" [
      "Z ${service_configs.bazarr.dataDir} 0700 ${config.services.bazarr.user} ${config.services.bazarr.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "bazarr";
-      port = service_configs.ports.private.bazarr.port;
-      auth = true;
-    })
  ];

  services.bazarr = {
@@ -28,6 +23,11 @@
    listenPort = service_configs.ports.private.bazarr.port;
  };

+  services.caddy.virtualHosts."bazarr.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${builtins.toString service_configs.ports.private.bazarr.port}
+  '';
+
  users.users.${config.services.bazarr.user}.extraGroups = [
    service_configs.media_group
  ];
--- a/services/arr/init.nix
+++ b/services/arr/init.nix
@@ -8,26 +8,13 @@
      dataDir = service_configs.prowlarr.dataDir;
      apiVersion = "v1";
      networkNamespacePath = "/run/netns/wg";
-      networkNamespaceService = "wg";
-      # Guarantee critical config.xml elements before startup. Prowlarr has a
-      # history of losing <Port> from config.xml, causing the service to run
-      # without binding any socket. See arr-init's configXml for details.
-      configXml = {
-        Port = service_configs.ports.private.prowlarr.port;
-        BindAddress = "*";
-        EnableSsl = false;
-      };
-      # Prowlarr runs in the wg netns; Sonarr/Radarr in the host netns.
-      # From host netns, Prowlarr is reachable at the wg namespace address,
-      # not at localhost (which resolves to the host's own netns).
-      # Health checks can now run — the reverse-connect is reachable.
      healthChecks = true;
      syncedApps = [
        {
          name = "Sonarr";
          implementation = "Sonarr";
          configContract = "SonarrSettings";
-          prowlarrUrl = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}";
+          prowlarrUrl = "http://localhost:${builtins.toString service_configs.ports.private.prowlarr.port}";
          baseUrl = "http://${config.vpnNamespaces.wg.bridgeAddress}:${builtins.toString service_configs.ports.private.sonarr.port}";
          apiKeyFrom = "${service_configs.sonarr.dataDir}/config.xml";
          serviceName = "sonarr";
@@ -36,7 +23,7 @@
          name = "Radarr";
          implementation = "Radarr";
          configContract = "RadarrSettings";
-          prowlarrUrl = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}";
+          prowlarrUrl = "http://localhost:${builtins.toString service_configs.ports.private.prowlarr.port}";
          baseUrl = "http://${config.vpnNamespaces.wg.bridgeAddress}:${builtins.toString service_configs.ports.private.radarr.port}";
          apiKeyFrom = "${service_configs.radarr.dataDir}/config.xml";
          serviceName = "radarr";
@@ -50,11 +37,6 @@
      port = service_configs.ports.private.sonarr.port;
      dataDir = service_configs.sonarr.dataDir;
      healthChecks = true;
-      configXml = {
-        Port = service_configs.ports.private.sonarr.port;
-        BindAddress = "*";
-        EnableSsl = false;
-      };
      rootFolders = [ service_configs.media.tvDir ];
      naming = {
        renameEpisodes = true;
@@ -87,11 +69,6 @@
      port = service_configs.ports.private.radarr.port;
      dataDir = service_configs.radarr.dataDir;
      healthChecks = true;
-      configXml = {
-        Port = service_configs.ports.private.radarr.port;
-        BindAddress = "*";
-        EnableSsl = false;
-      };
      rootFolders = [ service_configs.media.moviesDir ];
      naming = {
        renameMovies = true;
@@ -133,21 +110,4 @@
      serviceName = "radarr";
    };
  };
-
-  services.jellyseerrInit = {
-    enable = true;
-    configDir = service_configs.jellyseerr.configDir;
-    radarr = {
-      profileName = "Remux + WEB 2160p";
-      dataDir = service_configs.radarr.dataDir;
-      port = service_configs.ports.private.radarr.port;
-      serviceName = "radarr";
-    };
-    sonarr = {
-      profileName = "WEB-2160p";
-      dataDir = service_configs.sonarr.dataDir;
-      port = service_configs.ports.private.sonarr.port;
-      serviceName = "sonarr";
-    };
-  };
 }
--- a/services/arr/jellyseerr.nix
+++ b/services/arr/jellyseerr.nix
@@ -13,10 +13,6 @@
    (lib.serviceFilePerms "jellyseerr" [
      "Z ${service_configs.jellyseerr.configDir} 0700 jellyseerr jellyseerr"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "jellyseerr";
-      port = service_configs.ports.private.jellyseerr.port;
-    })
  ];

  services.jellyseerr = {
@@ -40,4 +36,8 @@

  users.groups.jellyseerr = { };

+  services.caddy.virtualHosts."jellyseerr.${service_configs.https.domain}".extraConfig = ''
+    # import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${builtins.toString service_configs.ports.private.jellyseerr.port}
+  '';
 }
--- a/services/arr/prowlarr.nix
+++ b/services/arr/prowlarr.nix
@@ -14,12 +14,6 @@
    (lib.serviceFilePerms "prowlarr" [
      "Z ${service_configs.prowlarr.dataDir} 0700 prowlarr prowlarr"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "prowlarr";
-      port = service_configs.ports.private.prowlarr.port;
-      auth = true;
-      vpn = true;
-    })
  ];

  services.prowlarr = {
@@ -57,4 +51,8 @@
    ExecStart = lib.mkForce "${lib.getExe pkgs.prowlarr} -nobrowser -data=${service_configs.prowlarr.dataDir}";
  };

+  services.caddy.virtualHosts."prowlarr.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.prowlarr.port}
+  '';
 }
--- a/services/arr/radarr.nix
+++ b/services/arr/radarr.nix
@@ -16,11 +16,6 @@
    (lib.serviceFilePerms "radarr" [
      "Z ${service_configs.radarr.dataDir} 0700 ${config.services.radarr.user} ${config.services.radarr.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "radarr";
-      port = service_configs.ports.private.radarr.port;
-      auth = true;
-    })
  ];

  services.radarr = {
@@ -30,6 +25,11 @@
    settings.update.mechanism = "external";
  };

+  services.caddy.virtualHosts."radarr.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${builtins.toString service_configs.ports.private.radarr.port}
+  '';
+
  users.users.${config.services.radarr.user}.extraGroups = [
    service_configs.media_group
  ];
--- a/services/arr/recyclarr.nix
+++ b/services/arr/recyclarr.nix
@@ -13,8 +13,8 @@ let
  # Runs as root (via + prefix) after the NixOS module writes config.json.
  # Extracts API keys from radarr/sonarr config.xml and injects them via jq.
  injectApiKeys = pkgs.writeShellScript "recyclarr-inject-api-keys" ''
-    RADARR_KEY=$(${lib.extractArrApiKey radarrConfig})
-    SONARR_KEY=$(${lib.extractArrApiKey sonarrConfig})
+    RADARR_KEY=$(${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${radarrConfig})
+    SONARR_KEY=$(${lib.getExe pkgs.gnugrep} -oP '(?<=<ApiKey>)[^<]+' ${sonarrConfig})
    ${pkgs.jq}/bin/jq \
      --arg rk "$RADARR_KEY" \
      --arg sk "$SONARR_KEY" \
@@ -46,42 +46,30 @@ in
      radarr.movies = {
        base_url = "http://localhost:${builtins.toString service_configs.ports.private.radarr.port}";

-        # Recyclarr is the sole authority for custom formats and scores.
-        # Overwrite any manually-created CFs and delete stale ones.
-        replace_existing_custom_formats = true;
-        delete_old_custom_formats = true;
-
        include = [
          { template = "radarr-quality-definition-movie"; }
          { template = "radarr-quality-profile-remux-web-2160p"; }
          { template = "radarr-custom-formats-remux-web-2160p"; }
        ];

-        # Group WEB 2160p with 1080p in the same quality tier so custom
-        # format scores -- not quality ranking -- decide the winner.
-        # Native 4K with HDR/DV from good release groups scores high and
-        # wins; AI upscales get -10000 from the Upscaled CF and are
-        # blocked by min_format_score. Untagged upscales from unknown
-        # groups (score ~0) lose to well-scored 1080p (Tier 01 = +1750).
+        # Extend the template's quality profile with lower-resolution fallbacks
        quality_profiles = [
          {
            name = "Remux + WEB 2160p";
-            min_format_score = 0;
-            reset_unmatched_scores.enabled = true;
-            upgrade = {
-              allowed = true;
-              until_quality = "Remux-2160p";
-              until_score = 10000;
-            };
            qualities = [
              { name = "Remux-2160p"; }
              {
-                name = "WEB/Bluray";
+                name = "WEB 2160p";
                qualities = [
                  "WEBDL-2160p"
                  "WEBRip-2160p"
-                  "Remux-1080p"
-                  "Bluray-1080p"
+                ];
+              }
+              { name = "Remux-1080p"; }
+              { name = "Bluray-1080p"; }
+              {
+                name = "WEB 1080p";
+                qualities = [
                  "WEBDL-1080p"
                  "WEBRip-1080p"
                ];
@@ -108,57 +96,35 @@ in
              { name = "Remux + WEB 2160p"; }
            ];
          }
-          # Upscaled - block AI upscales and other upscaled-to-2160p releases
-          {
-            trash_ids = [ "bfd8eb01832d646a0a89c4deb46f8564" ];
-            assign_scores_to = [
-              {
-                name = "Remux + WEB 2160p";
-                score = -10000;
-              }
-            ];
-          }
        ];
      };

      sonarr.series = {
        base_url = "http://localhost:${builtins.toString service_configs.ports.private.sonarr.port}";

-        # Recyclarr is the sole authority for custom formats and scores.
-        # Overwrite any manually-created CFs and delete stale ones.
-        replace_existing_custom_formats = true;
-        delete_old_custom_formats = true;
-
        include = [
          { template = "sonarr-quality-definition-series"; }
          { template = "sonarr-v4-quality-profile-web-2160p"; }
          { template = "sonarr-v4-custom-formats-web-2160p"; }
        ];

-        # Group WEB 2160p with 1080p in the same quality tier so custom
-        # format scores -- not quality ranking -- decide the winner.
-        # Native 4K with HDR/DV from good release groups scores high and
-        # wins; AI upscales get -10000 from the Upscaled CF and are
-        # blocked by min_format_score. Untagged upscales from unknown
-        # groups (score ~0) lose to well-scored 1080p (Tier 01 = +1750).
+        # Extend the template's quality profile with lower-resolution fallbacks
        quality_profiles = [
          {
            name = "WEB-2160p";
-            min_format_score = 0;
-            reset_unmatched_scores.enabled = true;
-            upgrade = {
-              allowed = true;
-              until_quality = "WEB/Bluray";
-              until_score = 10000;
-            };
            qualities = [
              {
-                name = "WEB/Bluray";
+                name = "WEB 2160p";
                qualities = [
                  "WEBDL-2160p"
                  "WEBRip-2160p"
-                  "Bluray-1080p Remux"
-                  "Bluray-1080p"
+                ];
+              }
+              { name = "Bluray-1080p Remux"; }
+              { name = "Bluray-1080p"; }
+              {
+                name = "WEB 1080p";
+                qualities = [
                  "WEBDL-1080p"
                  "WEBRip-1080p"
                ];
@@ -185,34 +151,14 @@ in
              { name = "WEB-2160p"; }
            ];
          }
-          # Upscaled - block AI upscales and other upscaled-to-2160p releases
-          {
-            trash_ids = [ "23297a736ca77c0fc8e70f8edd7ee56c" ];
-            assign_scores_to = [
-              {
-                name = "WEB-2160p";
-                score = -10000;
-              }
-            ];
-          }
        ];
      };
    };
  };

-  # Trigger immediate sync on deploy when recyclarr config changes.
-  # restartTriggers on the oneshot service are unreliable (systemd may
-  # no-op a restart of an inactive oneshot). Instead, embed a config
-  # hash in the timer unit -- NixOS restarts changed timers reliably,
-  # and OnActiveSec fires the sync within seconds.
-  systemd.timers.recyclarr = {
-    timerConfig.OnActiveSec = "5s";
-    unitConfig.X-ConfigHash = builtins.hashString "sha256" (
-      builtins.toJSON config.services.recyclarr.configuration
-    );
-  };
-
+  # Re-sync immediately on deploy when the recyclarr config changes
  systemd.services.recyclarr = {
+    restartTriggers = [ (builtins.toJSON config.services.recyclarr.configuration) ];
    after = [
      "network-online.target"
      "radarr.service"
--- a/services/arr/sonarr.nix
+++ b/services/arr/sonarr.nix
@@ -16,11 +16,6 @@
    (lib.serviceFilePerms "sonarr" [
      "Z ${service_configs.sonarr.dataDir} 0700 ${config.services.sonarr.user} ${config.services.sonarr.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "sonarr";
-      port = service_configs.ports.private.sonarr.port;
-      auth = true;
-    })
  ];

  systemd.tmpfiles.rules = [
@@ -36,6 +31,11 @@
    settings.update.mechanism = "external";
  };

+  services.caddy.virtualHosts."sonarr.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${builtins.toString service_configs.ports.private.sonarr.port}
+  '';
+
  users.users.${config.services.sonarr.user}.extraGroups = [
    service_configs.media_group
  ];
--- a/services/bitmagnet.nix
+++ b/services/bitmagnet.nix
@@ -5,66 +5,9 @@
  lib,
  ...
 }:
-let
-  prowlarrPort = toString service_configs.ports.private.prowlarr.port;
-  sonarrPort = toString service_configs.ports.private.sonarr.port;
-  radarrPort = toString service_configs.ports.private.radarr.port;
-  bitmagnetPort = toString service_configs.ports.private.bitmagnet.port;
-  bridgeAddr = config.vpnNamespaces.wg.bridgeAddress;
-
-  prowlarrConfigXml = "${service_configs.prowlarr.dataDir}/config.xml";
-  sonarrConfigXml = "${service_configs.sonarr.dataDir}/config.xml";
-  radarrConfigXml = "${service_configs.radarr.dataDir}/config.xml";
-
-  curl = "${pkgs.curl}/bin/curl";
-  jq = "${pkgs.jq}/bin/jq";
-
-  # Clears the escalating failure backoff for the Bitmagnet indexer across
-  # Prowlarr, Sonarr, and Radarr so searches resume immediately after
-  # Bitmagnet restarts instead of waiting hours for disable timers to expire.
-  recoveryScript = pkgs.writeShellScript "prowlarr-bitmagnet-recovery" ''
-    set -euo pipefail
-
-    wait_for() {
-      for _ in $(seq 1 "$2"); do
-        ${curl} -sf --max-time 5 "$1" > /dev/null && return 0
-        sleep 5
-      done
-      echo "$1 not reachable, aborting" >&2; exit 1
-    }
-
-    # Test a Bitmagnet-named indexer to clear its failure status.
-    # A successful test triggers RecordSuccess() which resets the backoff.
-    clear_status() {
-      local key indexer
-      key=$(${lib.extractArrApiKey ''"$3"''}) || return 0
-      indexer=$(${curl} -sf --max-time 10 \
-        -H "X-Api-Key: $key" "$2/api/$1/indexer" | \
-        ${jq} 'first(.[] | select(.name | test("Bitmagnet"; "i")))') || return 0
-      [ -n "$indexer" ] && [ "$indexer" != "null" ] || return 0
-      ${curl} -sf --max-time 30 \
-        -H "X-Api-Key: $key" -H "Content-Type: application/json" \
-        -X POST "$2/api/$1/indexer/test" -d "$indexer" > /dev/null
-    }
-
-    wait_for "http://localhost:${bitmagnetPort}" 12
-    wait_for "http://localhost:${prowlarrPort}/ping" 6
-
-    # Prowlarr first — downstream apps route searches through it.
-    clear_status v1 "http://localhost:${prowlarrPort}" "${prowlarrConfigXml}" || true
-    clear_status v3 "http://${bridgeAddr}:${sonarrPort}" "${sonarrConfigXml}" || true
-    clear_status v3 "http://${bridgeAddr}:${radarrPort}" "${radarrConfigXml}" || true
-  '';
-in
 {
  imports = [
    (lib.vpnNamespaceOpenPort service_configs.ports.private.bitmagnet.port "bitmagnet")
-    (lib.mkCaddyReverseProxy {
-      subdomain = "bitmagnet";
-      port = service_configs.ports.private.bitmagnet.port;
-      auth = true;
-      vpn = true;
-    })
  ];

  services.bitmagnet = {
@@ -76,38 +19,13 @@ in
      };
      http_server = {
        # TODO! make issue about this being a string and not a `port` type
-        port = ":" + (toString service_configs.ports.private.bitmagnet.port);
+        port = ":" + (builtins.toString service_configs.ports.private.bitmagnet.port);
      };
    };
  };

-  # The upstream default (Restart=on-failure) leaves Bitmagnet dead after
-  # clean exits (e.g. systemd stop during deploy). Always restart it.
-  systemd.services.bitmagnet.serviceConfig = {
-    Restart = lib.mkForce "always";
-    RestartSec = 10;
-  };
-
-  # After Bitmagnet restarts, clear the escalating failure backoff across
-  # Prowlarr, Sonarr, and Radarr so searches resume immediately instead of
-  # waiting hours for the disable timers to expire.
-  systemd.services.prowlarr-bitmagnet-recovery = {
-    description = "Clear Prowlarr/Sonarr/Radarr failure status for Bitmagnet indexer";
-    after = [
-      "bitmagnet.service"
-      "prowlarr.service"
-      "sonarr.service"
-      "radarr.service"
-    ];
-    bindsTo = [ "bitmagnet.service" ];
-    wantedBy = [ "bitmagnet.service" ];
-
-    serviceConfig = {
-      Type = "oneshot";
-      RemainAfterExit = true;
-      ExecStart = recoveryScript;
-      # Same VPN namespace as Bitmagnet and Prowlarr.
-      NetworkNamespacePath = "/run/netns/wg";
-    };
-  };
+  services.caddy.virtualHosts."bitmagnet.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.bitmagnet.port}
+  '';
 }
--- a/services/bitwarden.nix
+++ b/services/bitwarden.nix
@@ -13,10 +13,6 @@
    (lib.serviceFilePerms "vaultwarden" [
      "Z ${service_configs.vaultwarden.path} 0700 vaultwarden vaultwarden"
    ])
-    (lib.mkFail2banJail {
-      name = "vaultwarden";
-      failregex = ''^.*Username or password is incorrect\. Try again\. IP: <HOST>\..*$'';
-    })
  ];

  services.vaultwarden = {
@@ -42,4 +38,18 @@
    }
  '';

+  # Protect Vaultwarden login from brute force attacks
+  services.fail2ban.jails.vaultwarden = {
+    enabled = true;
+    settings = {
+      backend = "systemd";
+      port = "http,https";
+      # defaults: maxretry=5, findtime=10m, bantime=10m
+    };
+    filter.Definition = {
+      failregex = ''^.*Username or password is incorrect\. Try again\. IP: <HOST>\..*$'';
+      ignoreregex = "";
+      journalmatch = "_SYSTEMD_UNIT=vaultwarden.service";
+    };
+  };
 }
--- a/services/caddy/caddy.nix
+++ b/services/caddy/caddy.nix
@@ -56,19 +56,9 @@ in
    enable = true;
    email = "titaniumtown@proton.me";

-    # Build with Njalla DNS provider for DNS-01 ACME challenges (wildcard certs)
-    package = pkgs.caddy.withPlugins {
-      plugins = [ "github.com/caddy-dns/njalla@v0.0.0-20250823094507-f709141f1fe6" ];
-      hash = "sha256-rrOAR6noTDpV/I/hZXxhz0OXVJKu0mFQRq87RUrpmzw=";
-    };
-
+    # Enable on-demand TLS for old domain redirects
+    # Certs are issued dynamically when subdomains are accessed
    globalConfig = ''
-      # Wildcard cert for *.${newDomain} via DNS-01 challenge
-      acme_dns njalla {
-        api_token {env.NJALLA_API_TOKEN}
-      }
-
-      # On-demand TLS for old domain redirects
      on_demand_tls {
        ask http://localhost:9123/check
      }
@@ -116,9 +106,6 @@ in
    };
  };

-  # Inject Njalla API token for DNS-01 challenge
-  systemd.services.caddy.serviceConfig.EnvironmentFile = config.age.secrets.njalla-api-token-env.path;
-
  systemd.tmpfiles.rules = [
    "d ${config.services.caddy.dataDir} 700 ${config.services.caddy.user} ${config.services.caddy.group}"
  ];
--- a/services/ddns-updater.nix
+++ b/services/ddns-updater.nix
@@ -1,27 +0,0 @@
-{
-  config,
-  lib,
-  ...
-}:
-{
-  services.ddns-updater = {
-    enable = true;
-    environment = {
-      PERIOD = "5m";
-      # ddns-updater reads config from this path at runtime
-      CONFIG_FILEPATH = config.age.secrets.ddns-updater-config.path;
-    };
-  };
-
-  users.users.ddns-updater = {
-    isSystemUser = true;
-    group = "ddns-updater";
-  };
-  users.groups.ddns-updater = { };
-
-  systemd.services.ddns-updater.serviceConfig = {
-    DynamicUser = lib.mkForce false;
-    User = "ddns-updater";
-    Group = "ddns-updater";
-  };
-}
--- a/services/firefox-syncserver.nix
+++ b/services/firefox-syncserver.nix
@@ -6,13 +6,6 @@
  ...
 }:
 {
-  imports = [
-    (lib.mkCaddyReverseProxy {
-      domain = service_configs.firefox_syncserver.domain;
-      port = service_configs.ports.private.firefox_syncserver.port;
-    })
-  ];
-
  services.firefox-syncserver = {
    enable = true;
    database = {
@@ -40,4 +33,7 @@
    ];
  };

+  services.caddy.virtualHosts."${service_configs.firefox_syncserver.domain}".extraConfig = ''
+    reverse_proxy :${builtins.toString service_configs.ports.private.firefox_syncserver.port}
+  '';
 }
--- a/services/gitea-actions-runner.nix
+++ b/services/gitea-actions-runner.nix
@@ -29,17 +29,13 @@
    settings = {
      runner = {
        capacity = 1;
-        timeout = "6h";
+        timeout = "3h";
      };
    };
  };

-  # Override DynamicUser to use our static gitea-runner user, and ensure
-  # the runner doesn't start before the co-located gitea instance is ready
-  # (upstream can't assume locality, so this dependency is ours to add).
+  # Override DynamicUser to use our static gitea-runner user
  systemd.services."gitea-runner-muffin" = {
-    requires = [ "gitea.service" ];
-    after = [ "gitea.service" ];
    serviceConfig = {
      DynamicUser = lib.mkForce false;
      User = "gitea-runner";
--- a/services/gitea.nix
+++ b/services/gitea.nix
@@ -11,14 +11,6 @@
    (lib.serviceFilePerms "gitea" [
      "Z ${config.services.gitea.stateDir} 0700 ${config.services.gitea.user} ${config.services.gitea.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      domain = service_configs.gitea.domain;
-      port = service_configs.ports.private.gitea.port;
-    })
-    (lib.mkFail2banJail {
-      name = "gitea";
-      failregex = "^.*Failed authentication attempt for .* from <HOST>:.*$";
-    })
  ];

  services.gitea = {
@@ -49,6 +41,10 @@
    };
  };

+  services.caddy.virtualHosts."${service_configs.gitea.domain}".extraConfig = ''
+    reverse_proxy :${builtins.toString config.services.gitea.settings.server.HTTP_PORT}
+  '';
+
  services.postgresql = {
    ensureDatabases = [ config.services.gitea.user ];
    ensureUsers = [
@@ -62,4 +58,18 @@

  services.openssh.settings.AllowUsers = [ config.services.gitea.user ];

+  # Protect Gitea login from brute force attacks
+  services.fail2ban.jails.gitea = {
+    enabled = true;
+    settings = {
+      backend = "systemd";
+      port = "http,https";
+      # defaults: maxretry=5, findtime=10m, bantime=10m
+    };
+    filter.Definition = {
+      failregex = "^.*Failed authentication attempt for .* from <HOST>:.*$";
+      ignoreregex = "";
+      journalmatch = "_SYSTEMD_UNIT=gitea.service";
+    };
+  };
 }
--- a/services/grafana/dashboard.nix
+++ b/services/grafana/dashboard.nix
@@ -50,17 +50,15 @@ let
      }
      {
        name = "LLM Requests";
-        datasource = promDs;
+        datasource = {
+          type = "grafana";
+          uid = "-- Grafana --";
+        };
        enable = true;
        iconColor = "purple";
-        target = {
-          datasource = promDs;
-          expr = "llamacpp:requests_processing > 0";
-          instant = false;
-          range = true;
-          refId = "A";
-        };
-        titleFormat = "LLM inference";
+        showIn = 0;
+        type = "tags";
+        tags = [ "llama-cpp" ];
      }
    ];

@@ -615,13 +613,13 @@ let
        targets = [
          {
            datasource = promDs;
-            expr = "zfs_pool_allocated_bytes{pool=\"tank\"} / zfs_pool_size_bytes{pool=\"tank\"} * 100";
+            expr = "zpool_used_bytes{pool=\"tank\"} / zpool_size_bytes{pool=\"tank\"} * 100";
            legendFormat = "tank";
            refId = "A";
          }
          {
            datasource = promDs;
-            expr = "zfs_pool_allocated_bytes{pool=\"hdds\"} / zfs_pool_size_bytes{pool=\"hdds\"} * 100";
+            expr = "zpool_used_bytes{pool=\"hdds\"} / zpool_size_bytes{pool=\"hdds\"} * 100";
            legendFormat = "hdds";
            refId = "B";
          }
@@ -655,19 +653,19 @@ let
        targets = [
          {
            datasource = promDs;
-            expr = "(node_filesystem_size_bytes{mountpoint=\"/boot\"} - node_filesystem_avail_bytes{mountpoint=\"/boot\"}) / node_filesystem_size_bytes{mountpoint=\"/boot\"} * 100";
+            expr = "partition_used_bytes{mount=\"/boot\"} / partition_size_bytes{mount=\"/boot\"} * 100";
            legendFormat = "/boot";
            refId = "A";
          }
          {
            datasource = promDs;
-            expr = "(node_filesystem_size_bytes{mountpoint=\"/persistent\"} - node_filesystem_avail_bytes{mountpoint=\"/persistent\"}) / node_filesystem_size_bytes{mountpoint=\"/persistent\"} * 100";
+            expr = "partition_used_bytes{mount=\"/persistent\"} / partition_size_bytes{mount=\"/persistent\"} * 100";
            legendFormat = "/persistent";
            refId = "B";
          }
          {
            datasource = promDs;
-            expr = "(node_filesystem_size_bytes{mountpoint=\"/nix\"} - node_filesystem_avail_bytes{mountpoint=\"/nix\"}) / node_filesystem_size_bytes{mountpoint=\"/nix\"} * 100";
+            expr = "partition_used_bytes{mount=\"/nix\"} / partition_size_bytes{mount=\"/nix\"} * 100";
            legendFormat = "/nix";
            refId = "C";
          }
--- a/services/grafana/default.nix
+++ b/services/grafana/default.nix
@@ -5,6 +5,8 @@
    ./dashboard.nix
    ./exporters.nix
    ./jellyfin-annotations.nix
+    ./disk-usage-collector.nix
+    ./llama-cpp-annotations.nix
    ./zfs-scrub-annotations.nix
  ];
 }
--- a/services/grafana/disk-usage-collector.nix
+++ b/services/grafana/disk-usage-collector.nix
@@ -0,0 +1,38 @@
+{
+  config,
+  pkgs,
+  lib,
+  ...
+}:
+let
+  textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
+
+  diskUsageCollector = pkgs.writeShellApplication {
+    name = "disk-usage-collector";
+    runtimeInputs = with pkgs; [
+      coreutils
+      gawk
+      config.boot.zfs.package
+      util-linux # for mountpoint
+    ];
+    text = builtins.readFile ./disk-usage-collector.sh;
+  };
+in
+lib.mkIf config.services.grafana.enable {
+  systemd.services.disk-usage-collector = {
+    description = "Collect ZFS pool and partition usage metrics for Prometheus";
+    serviceConfig = {
+      Type = "oneshot";
+      ExecStart = lib.getExe diskUsageCollector;
+    };
+    environment.TEXTFILE = "${textfileDir}/disk-usage.prom";
+  };
+
+  systemd.timers.disk-usage-collector = {
+    wantedBy = [ "timers.target" ];
+    timerConfig = {
+      OnCalendar = "minutely";
+      RandomizedDelaySec = "10s";
+    };
+  };
+}
--- a/services/grafana/disk-usage-collector.sh
+++ b/services/grafana/disk-usage-collector.sh
@@ -0,0 +1,44 @@
+#!/usr/bin/env bash
+# Collects ZFS pool utilization and boot partition usage for Prometheus textfile collector
+set -euo pipefail
+
+TEXTFILE="${TEXTFILE:?TEXTFILE env required}"
+TMP="${TEXTFILE}.$$"
+
+{
+  echo '# HELP zpool_size_bytes Total size of ZFS pool in bytes'
+  echo '# TYPE zpool_size_bytes gauge'
+  echo '# HELP zpool_used_bytes Used space in ZFS pool in bytes'
+  echo '# TYPE zpool_used_bytes gauge'
+  echo '# HELP zpool_free_bytes Free space in ZFS pool in bytes'
+  echo '# TYPE zpool_free_bytes gauge'
+
+  # -Hp: scripting mode, parseable, bytes
+  zpool list -Hp -o name,size,alloc,free | while IFS=$'\t' read -r name size alloc free; do
+    echo "zpool_size_bytes{pool=\"${name}\"} ${size}"
+    echo "zpool_used_bytes{pool=\"${name}\"} ${alloc}"
+    echo "zpool_free_bytes{pool=\"${name}\"} ${free}"
+  done
+
+  echo '# HELP partition_size_bytes Total size of partition in bytes'
+  echo '# TYPE partition_size_bytes gauge'
+  echo '# HELP partition_used_bytes Used space on partition in bytes'
+  echo '# TYPE partition_used_bytes gauge'
+  echo '# HELP partition_free_bytes Free space on partition in bytes'
+  echo '# TYPE partition_free_bytes gauge'
+
+  # Boot drive partitions: /boot (ESP), /persistent, /nix
+  # Use df with 1K blocks and convert to bytes
+  for mount in /boot /persistent /nix; do
+    if mountpoint -q "$mount" 2>/dev/null; then
+      read -r size used avail _ <<< "$(df -k --output=size,used,avail "$mount" | tail -1)"
+      size_b=$((size * 1024))
+      used_b=$((used * 1024))
+      avail_b=$((avail * 1024))
+      echo "partition_size_bytes{mount=\"${mount}\"} ${size_b}"
+      echo "partition_used_bytes{mount=\"${mount}\"} ${used_b}"
+      echo "partition_free_bytes{mount=\"${mount}\"} ${avail_b}"
+    fi
+  done
+} > "$TMP"
+mv "$TMP" "$TEXTFILE"
--- a/services/grafana/grafana.nix
+++ b/services/grafana/grafana.nix
@@ -12,11 +12,6 @@
    (lib.serviceFilePerms "grafana" [
      "Z ${service_configs.grafana.dir} 0700 grafana grafana"
    ])
-    (lib.mkCaddyReverseProxy {
-      domain = service_configs.grafana.domain;
-      port = service_configs.ports.private.grafana.port;
-      auth = true;
-    })
  ];

  services.grafana = {
@@ -90,6 +85,11 @@
    };
  };

+  services.caddy.virtualHosts."${service_configs.grafana.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${toString service_configs.ports.private.grafana.port}
+  '';
+
  services.postgresql = {
    ensureDatabases = [ "grafana" ];
    ensureUsers = [
--- a/services/grafana/jellyfin-annotations.nix
+++ b/services/grafana/jellyfin-annotations.nix
@@ -1,18 +1,40 @@
 {
  config,
+  pkgs,
  service_configs,
  lib,
  ...
 }:
-lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) (
-  lib.mkGrafanaAnnotationService {
-    name = "jellyfin";
+lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) {
+  systemd.services.jellyfin-annotations = {
    description = "Jellyfin stream annotation service for Grafana";
-    script = ./jellyfin-annotations.py;
+    after = [
+      "network.target"
+      "grafana.service"
+    ];
+    wantedBy = [ "multi-user.target" ];
+    serviceConfig = {
+      ExecStart = "${pkgs.python3}/bin/python3 ${./jellyfin-annotations.py}";
+      Restart = "always";
+      RestartSec = "10s";
+      LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
+      DynamicUser = true;
+      StateDirectory = "jellyfin-annotations";
+      NoNewPrivileges = true;
+      ProtectSystem = "strict";
+      ProtectHome = true;
+      PrivateTmp = true;
+      RestrictAddressFamilies = [
+        "AF_INET"
+        "AF_INET6"
+      ];
+      MemoryDenyWriteExecute = true;
+    };
    environment = {
      JELLYFIN_URL = "http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}";
+      GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
+      STATE_FILE = "/var/lib/jellyfin-annotations/state.json";
      POLL_INTERVAL = "30";
    };
-    loadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
+  };
 }
-)
--- a/services/grafana/llama-cpp-annotations.nix
+++ b/services/grafana/llama-cpp-annotations.nix
@@ -0,0 +1,39 @@
+{
+  config,
+  pkgs,
+  service_configs,
+  lib,
+  ...
+}:
+lib.mkIf (config.services.grafana.enable && config.services.llama-cpp.enable) {
+  systemd.services.llama-cpp-annotations = {
+    description = "LLM request annotation service for Grafana";
+    after = [
+      "grafana.service"
+      "llama-cpp.service"
+    ];
+    wantedBy = [ "multi-user.target" ];
+    serviceConfig = {
+      ExecStart = "${pkgs.python3}/bin/python3 ${./llama-cpp-annotations.py}";
+      Restart = "always";
+      RestartSec = "10s";
+      DynamicUser = true;
+      StateDirectory = "llama-cpp-annotations";
+      NoNewPrivileges = true;
+      ProtectSystem = "strict";
+      ProtectHome = true;
+      PrivateTmp = true;
+      RestrictAddressFamilies = [
+        "AF_INET"
+        "AF_INET6"
+      ];
+      MemoryDenyWriteExecute = true;
+    };
+    environment = {
+      GRAFANA_URL = "http://127.0.0.1:${toString service_configs.ports.private.grafana.port}";
+      STATE_FILE = "/var/lib/llama-cpp-annotations/state.json";
+      POLL_INTERVAL = "5";
+      CPU_THRESHOLD = "50";
+    };
+  };
+}
--- a/services/grafana/llama-cpp-annotations.py
+++ b/services/grafana/llama-cpp-annotations.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+"""
+Grafana annotation service for llama-cpp inference requests.
+
+Monitors llama-server CPU usage via /proc. Creates a Grafana annotation
+when inference starts (CPU spikes), closes it when inference ends.
+"""
+
+import glob
+import json
+import os
+import sys
+import time
+import urllib.request
+
+GRAFANA_URL = os.environ.get("GRAFANA_URL", "http://127.0.0.1:3000")
+STATE_FILE = os.environ.get("STATE_FILE", "/var/lib/llama-cpp-annotations/state.json")
+POLL_INTERVAL = int(os.environ.get("POLL_INTERVAL", "5"))
+CPU_THRESHOLD = float(os.environ.get("CPU_THRESHOLD", "50"))
+
+
+def find_llama_pid():
+    for path in glob.glob("/proc/[0-9]*/comm"):
+        try:
+            with open(path) as f:
+                if f.read().strip() == "llama-server":
+                    return int(path.split("/")[2])
+        except (OSError, ValueError):
+            continue
+    return None
+
+
+def get_cpu_times(pid):
+    try:
+        with open(f"/proc/{pid}/stat") as f:
+            fields = f.read().split(")")[-1].split()
+            return int(fields[11]) + int(fields[12])
+    except (OSError, IndexError, ValueError):
+        return None
+
+
+def http_json(method, url, body=None):
+    data = json.dumps(body).encode() if body is not None else None
+    req = urllib.request.Request(
+        url,
+        data=data,
+        headers={"Content-Type": "application/json", "Accept": "application/json"},
+        method=method,
+    )
+    with urllib.request.urlopen(req, timeout=5) as resp:
+        return json.loads(resp.read())
+
+
+def load_state():
+    try:
+        with open(STATE_FILE) as f:
+            return json.load(f)
+    except (FileNotFoundError, json.JSONDecodeError):
+        return {}
+
+
+def save_state(state):
+    os.makedirs(os.path.dirname(STATE_FILE), exist_ok=True)
+    tmp = STATE_FILE + ".tmp"
+    with open(tmp, "w") as f:
+        json.dump(state, f)
+    os.replace(tmp, STATE_FILE)
+
+
+def grafana_post(text, start_ms):
+    try:
+        result = http_json(
+            "POST",
+            f"{GRAFANA_URL}/api/annotations",
+            {"time": start_ms, "text": text, "tags": ["llama-cpp"]},
+        )
+        return result.get("id")
+    except Exception as e:
+        print(f"Error posting annotation: {e}", file=sys.stderr)
+        return None
+
+
+def grafana_close(grafana_id, end_ms, text=None):
+    try:
+        body = {"timeEnd": end_ms}
+        if text is not None:
+            body["text"] = text
+        http_json(
+            "PATCH",
+            f"{GRAFANA_URL}/api/annotations/{grafana_id}",
+            body,
+        )
+    except Exception as e:
+        print(f"Error closing annotation {grafana_id}: {e}", file=sys.stderr)
+
+
+def main():
+    state = load_state()
+    prev_ticks = None
+    prev_time = None
+    hz = os.sysconf("SC_CLK_TCK")
+
+    while True:
+        now_ms = int(time.time() * 1000)
+        pid = find_llama_pid()
+
+        if pid is None:
+            prev_ticks = None
+            prev_time = None
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        ticks = get_cpu_times(pid)
+        now = time.monotonic()
+
+        if ticks is None or prev_ticks is None or prev_time is None:
+            prev_ticks = ticks
+            prev_time = now
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        dt = now - prev_time
+        if dt <= 0:
+            prev_ticks = ticks
+            prev_time = now
+            time.sleep(POLL_INTERVAL)
+            continue
+
+        cpu_pct = ((ticks - prev_ticks) / hz) / dt * 100
+        prev_ticks = ticks
+        prev_time = now
+
+        busy = cpu_pct > CPU_THRESHOLD
+
+        if busy and "active" not in state:
+            grafana_id = grafana_post("LLM request", now_ms)
+            if grafana_id is not None:
+                state["active"] = {
+                    "grafana_id": grafana_id,
+                    "start_ms": now_ms,
+                }
+                save_state(state)
+
+        elif not busy and "active" in state:
+            info = state.pop("active")
+            duration_s = (now_ms - info["start_ms"]) / 1000
+            text = f"LLM request ({duration_s:.1f}s)"
+            grafana_close(info["grafana_id"], now_ms, text)
+            save_state(state)
+
+        time.sleep(POLL_INTERVAL)
+
+
+if __name__ == "__main__":
+    main()
--- a/services/grafana/prometheus.nix
+++ b/services/grafana/prometheus.nix
@@ -44,12 +44,6 @@ in
        listenAddress = "127.0.0.1";
        apcupsdAddress = "127.0.0.1:3551";
      };
-
-      zfs = {
-        enable = true;
-        port = service_configs.ports.private.prometheus_zfs.port;
-        listenAddress = "127.0.0.1";
-      };
    };

    scrapeConfigs = [
@@ -95,12 +89,6 @@ in
          { targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
        ];
      }
-      {
-        job_name = "zfs";
-        static_configs = [
-          { targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_zfs.port}" ]; }
-        ];
-      }
    ];
  };

--- a/services/harmonia.nix
+++ b/services/harmonia.nix
@@ -17,22 +17,8 @@
    settings.bind = "127.0.0.1:${toString service_configs.ports.private.harmonia.port}";
  };

-  # serve latest deploy store paths (unauthenticated — just a path string)
-  # CI writes to /var/lib/dotfiles-deploy/<hostname> after building
  services.caddy.virtualHosts."nix-cache.${service_configs.https.domain}".extraConfig = ''
-    handle_path /deploy/* {
-        root * /var/lib/dotfiles-deploy
-        file_server
-    }
-
-    handle {
    import ${config.age.secrets.nix-cache-auth.path}
    reverse_proxy :${toString service_configs.ports.private.harmonia.port}
-    }
  '';
-
-  # directory for CI to record latest deploy store paths
-  systemd.tmpfiles.rules = [
-    "d /var/lib/dotfiles-deploy 0755 gitea-runner gitea-runner"
-  ];
 }
--- a/services/immich.nix
+++ b/services/immich.nix
@@ -16,15 +16,6 @@
    (lib.serviceFilePerms "immich-server" [
      "Z ${config.services.immich.mediaLocation} 0770 ${config.services.immich.user} ${config.services.immich.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "immich";
-      port = service_configs.ports.private.immich.port;
-    })
-    (lib.mkFail2banJail {
-      name = "immich";
-      unitName = "immich-server.service";
-      failregex = "^.*Failed login attempt for user .* from ip address <HOST>.*$";
-    })
  ];

  services.immich = {
@@ -38,6 +29,10 @@
    };
  };

+  services.caddy.virtualHosts."immich.${service_configs.https.domain}".extraConfig = ''
+    reverse_proxy :${builtins.toString config.services.immich.port}
+  '';
+
  environment.systemPackages = with pkgs; [
    immich-go
  ];
@@ -47,4 +42,18 @@
    "render"
  ];

+  # Protect Immich login from brute force attacks
+  services.fail2ban.jails.immich = {
+    enabled = true;
+    settings = {
+      backend = "systemd";
+      port = "http,https";
+      # defaults: maxretry=5, findtime=10m, bantime=10m
+    };
+    filter.Definition = {
+      failregex = "^.*Failed login attempt for user .* from ip address <HOST>.*$";
+      ignoreregex = "";
+      journalmatch = "_SYSTEMD_UNIT=immich-server.service";
+    };
+  };
 }
--- a/services/jellyfin/jellyfin-qbittorrent-monitor.nix
+++ b/services/jellyfin/jellyfin-qbittorrent-monitor.nix
@@ -5,80 +5,14 @@
  lib,
  ...
 }:
-let
-  webhookPlugin = import ./jellyfin-webhook-plugin.nix { inherit pkgs lib; };
-  jellyfinPort = service_configs.ports.private.jellyfin.port;
-  webhookPort = service_configs.ports.private.jellyfin_qbittorrent_monitor_webhook.port;
-in
 lib.mkIf config.services.jellyfin.enable {
-  # Materialise the Jellyfin Webhook plugin into Jellyfin's plugins dir before
-  # Jellyfin starts. Jellyfin rewrites meta.json at runtime, so a read-only
-  # nix-store symlink would EACCES -- we copy instead.
-  #
-  # `wantedBy = [ "jellyfin.service" ]` alone is insufficient on initial rollout:
-  # if jellyfin is already running at activation time, systemd won't start the
-  # oneshot until the next jellyfin restart. `restartTriggers` on jellyfin pinned
-  # to the plugin package + install script forces that restart whenever either
-  # changes, which invokes this unit via the `before`/`wantedBy` chain.
-  systemd.services.jellyfin-webhook-install = {
-    before = [ "jellyfin.service" ];
-    wantedBy = [ "jellyfin.service" ];
-    serviceConfig = {
-      Type = "oneshot";
-      RemainAfterExit = true;
-      User = config.services.jellyfin.user;
-      Group = config.services.jellyfin.group;
-      ExecStart = webhookPlugin.mkInstallScript {
-        pluginsDir = "${config.services.jellyfin.dataDir}/plugins";
-      };
-    };
-  };
-
-  systemd.services.jellyfin.restartTriggers = [
-    webhookPlugin.package
-    (webhookPlugin.mkInstallScript {
-      pluginsDir = "${config.services.jellyfin.dataDir}/plugins";
-    })
-  ];
-
-  # After Jellyfin starts, POST the plugin configuration so the webhook
-  # targets the monitor's receiver. Idempotent; runs on every boot.
-  systemd.services.jellyfin-webhook-configure = {
-    after = [ "jellyfin.service" ];
-    wants = [ "jellyfin.service" ];
-    before = [ "jellyfin-qbittorrent-monitor.service" ];
-    wantedBy = [ "multi-user.target" ];
-    serviceConfig = {
-      Type = "oneshot";
-      RemainAfterExit = true;
-      DynamicUser = true;
-      LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
-      ExecStart = webhookPlugin.mkConfigureScript {
-        jellyfinUrl = "http://127.0.0.1:${toString jellyfinPort}";
-        webhooks = [
-          {
-            name = "qBittorrent Monitor";
-            uri = "http://127.0.0.1:${toString webhookPort}/";
-            notificationTypes = [
-              "PlaybackStart"
-              "PlaybackProgress"
-              "PlaybackStop"
-            ];
-          }
-        ];
-      };
-    };
-  };
-
  systemd.services."jellyfin-qbittorrent-monitor" = {
    description = "Monitor Jellyfin streaming and control qBittorrent rate limits";
    after = [
      "network.target"
      "jellyfin.service"
      "qbittorrent.service"
-      "jellyfin-webhook-configure.service"
    ];
-    wants = [ "jellyfin-webhook-configure.service" ];
    wantedBy = [ "multi-user.target" ];

    serviceConfig = {
@@ -110,7 +44,7 @@ lib.mkIf config.services.jellyfin.enable {
    };

    environment = {
-      JELLYFIN_URL = "http://localhost:${builtins.toString jellyfinPort}";
+      JELLYFIN_URL = "http://localhost:${builtins.toString service_configs.ports.private.jellyfin.port}";
      QBITTORRENT_URL = "http://${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString service_configs.ports.private.torrent.port}";
      CHECK_INTERVAL = "30";
      # Bandwidth budget configuration
@@ -119,9 +53,6 @@ lib.mkIf config.services.jellyfin.enable {
      DEFAULT_STREAM_BITRATE = "10000000"; # 10 Mbps fallback when bitrate unknown (bps)
      MIN_TORRENT_SPEED = "100"; # KB/s - below this, pause torrents instead
      STREAM_BITRATE_HEADROOM = "1.1"; # multiplier per stream for bitrate fluctuations
-      # Webhook receiver: Jellyfin Webhook plugin POSTs events here to throttle immediately.
-      WEBHOOK_BIND = "127.0.0.1";
-      WEBHOOK_PORT = toString webhookPort;
    };
  };
 }
--- a/services/jellyfin/jellyfin-qbittorrent-monitor.py
+++ b/services/jellyfin/jellyfin-qbittorrent-monitor.py
@@ -7,8 +7,6 @@ import sys
 import signal
 import json
 import ipaddress
-import threading
-from http.server import HTTPServer, BaseHTTPRequestHandler

 logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -36,8 +34,6 @@ class JellyfinQBittorrentMonitor:
        default_stream_bitrate=10000000,
        min_torrent_speed=100,
        stream_bitrate_headroom=1.1,
-        webhook_port=0,
-        webhook_bind="127.0.0.1",
    ):
        self.jellyfin_url = jellyfin_url
        self.qbittorrent_url = qbittorrent_url
@@ -61,12 +57,6 @@ class JellyfinQBittorrentMonitor:
        self.streaming_stop_delay = streaming_stop_delay
        self.last_state_change = 0

-        # Webhook receiver: allows Jellyfin to push events instead of waiting for the poll
-        self.webhook_port = webhook_port
-        self.webhook_bind = webhook_bind
-        self.wake_event = threading.Event()
-        self.webhook_server = None
-
        # Local network ranges (RFC 1918 private networks + localhost)
        self.local_networks = [
            ipaddress.ip_network("10.0.0.0/8"),
@@ -89,56 +79,9 @@ class JellyfinQBittorrentMonitor:
    def signal_handler(self, signum, frame):
        logger.info("Received shutdown signal, cleaning up...")
        self.running = False
-        if self.webhook_server is not None:
-            # shutdown() blocks until serve_forever returns; run from a thread so we don't deadlock
-            threading.Thread(target=self.webhook_server.shutdown, daemon=True).start()
        self.restore_normal_limits()
        sys.exit(0)

-    def wake(self) -> None:
-        """Signal the main loop to re-evaluate state immediately."""
-        self.wake_event.set()
-
-    def sleep_or_wake(self, seconds: float) -> None:
-        """Wait up to `seconds`, returning early if a webhook wakes the loop."""
-        self.wake_event.wait(seconds)
-        self.wake_event.clear()
-
-    def start_webhook_server(self) -> None:
-        """Start a background HTTP server that wakes the monitor on any POST."""
-        if not self.webhook_port:
-            return
-
-        monitor = self
-
-        class WebhookHandler(BaseHTTPRequestHandler):
-            def do_POST(self):  # noqa: N802
-                length = int(self.headers.get("Content-Length", "0") or "0")
-                body = self.rfile.read(min(length, 65536)) if length else b""
-                event = "unknown"
-                try:
-                    if body:
-                        event = json.loads(body).get("NotificationType", "unknown")
-                except (json.JSONDecodeError, ValueError):
-                    pass
-                logger.info(f"Webhook received: {event}")
-                self.send_response(204)
-                self.end_headers()
-                monitor.wake()
-
-            def log_message(self, format, *args):
-                return  # suppress default access log
-
-        self.webhook_server = HTTPServer(
-            (self.webhook_bind, self.webhook_port), WebhookHandler
-        )
-        threading.Thread(
-            target=self.webhook_server.serve_forever, daemon=True, name="webhook-server"
-        ).start()
-        logger.info(
-            f"Webhook receiver listening on http://{self.webhook_bind}:{self.webhook_port}"
-        )
-
    def check_jellyfin_sessions(self) -> list[dict]:
        headers = (
            {"X-Emby-Token": self.jellyfin_api_key} if self.jellyfin_api_key else {}
@@ -354,14 +297,10 @@ class JellyfinQBittorrentMonitor:
        logger.info(f"Default stream bitrate: {self.default_stream_bitrate} bps")
        logger.info(f"Minimum torrent speed: {self.min_torrent_speed} KB/s")
        logger.info(f"Stream bitrate headroom: {self.stream_bitrate_headroom}x")
-        if self.webhook_port:
-            logger.info(f"Webhook receiver: {self.webhook_bind}:{self.webhook_port}")

        signal.signal(signal.SIGINT, self.signal_handler)
        signal.signal(signal.SIGTERM, self.signal_handler)

-        self.start_webhook_server()
-
        while self.running:
            try:
                self.sync_qbittorrent_state()
@@ -370,7 +309,7 @@ class JellyfinQBittorrentMonitor:
                    active_streams = self.check_jellyfin_sessions()
                except ServiceUnavailable:
                    logger.warning("Jellyfin unavailable, maintaining current state")
-                    self.sleep_or_wake(self.check_interval)
+                    time.sleep(self.check_interval)
                    continue

                streaming_active = len(active_streams) > 0
@@ -455,13 +394,13 @@ class JellyfinQBittorrentMonitor:

                self.current_state = desired_state
                self.last_active_streams = active_streams
-                self.sleep_or_wake(self.check_interval)
+                time.sleep(self.check_interval)

            except KeyboardInterrupt:
                break
            except Exception as e:
                logger.error(f"Unexpected error in monitoring loop: {e}")
-                self.sleep_or_wake(self.check_interval)
+                time.sleep(self.check_interval)

        self.restore_normal_limits()
        logger.info("Monitor stopped")
@@ -482,8 +421,6 @@ if __name__ == "__main__":
    default_stream_bitrate = int(os.getenv("DEFAULT_STREAM_BITRATE", "10000000"))
    min_torrent_speed = int(os.getenv("MIN_TORRENT_SPEED", "100"))
    stream_bitrate_headroom = float(os.getenv("STREAM_BITRATE_HEADROOM", "1.1"))
-    webhook_port = int(os.getenv("WEBHOOK_PORT", "0"))
-    webhook_bind = os.getenv("WEBHOOK_BIND", "127.0.0.1")

    monitor = JellyfinQBittorrentMonitor(
        jellyfin_url=jellyfin_url,
@@ -497,8 +434,6 @@ if __name__ == "__main__":
        default_stream_bitrate=default_stream_bitrate,
        min_torrent_speed=min_torrent_speed,
        stream_bitrate_headroom=stream_bitrate_headroom,
-        webhook_port=webhook_port,
-        webhook_bind=webhook_bind,
    )

    monitor.run()
--- a/services/jellyfin/jellyfin-webhook-plugin.nix
+++ b/services/jellyfin/jellyfin-webhook-plugin.nix
@@ -1,105 +0,0 @@
-{ pkgs, lib }:
-let
-  pluginVersion = "18.0.0.0";
-  # GUID from the plugin's meta.json; addresses it on /Plugins/<guid>/Configuration.
-  pluginGuid = "71552a5a-5c5c-4350-a2ae-ebe451a30173";
-
-  package = pkgs.stdenvNoCC.mkDerivation {
-    pname = "jellyfin-plugin-webhook";
-    version = pluginVersion;
-    src = pkgs.fetchurl {
-      url = "https://repo.jellyfin.org/files/plugin/webhook/webhook_${pluginVersion}.zip";
-      hash = "sha256-LFFojiPnBGl9KJ0xVyPBnCmatcaeVbllRwRkz5Z3dqI=";
-    };
-    nativeBuildInputs = [ pkgs.unzip ];
-    unpackPhase = ''unzip "$src"'';
-    installPhase = ''
-      mkdir -p "$out"
-      cp *.dll meta.json "$out/"
-    '';
-    dontFixup = true; # managed .NET assemblies must not be patched
-  };
-
-  # Minimal Handlebars template, base64 encoded. The monitor only needs the POST;
-  # NotificationType is parsed for the debug log line.
-  # Decoded: {"NotificationType":"{{NotificationType}}"}
-  templateB64 = "eyJOb3RpZmljYXRpb25UeXBlIjoie3tOb3RpZmljYXRpb25UeXBlfX0ifQ==";
-
-  # Build a PluginConfiguration payload accepted by Jellyfin's JSON deserializer.
-  # Each webhook is `{ name, uri, notificationTypes }`.
-  mkConfigJson =
-    webhooks:
-    builtins.toJSON {
-      ServerUrl = "";
-      GenericOptions = map (w: {
-        NotificationTypes = w.notificationTypes;
-        WebhookName = w.name;
-        WebhookUri = w.uri;
-        EnableMovies = true;
-        EnableEpisodes = true;
-        EnableVideos = true;
-        EnableWebhook = true;
-        Template = templateB64;
-        Headers = [
-          {
-            Key = "Content-Type";
-            Value = "application/json";
-          }
-        ];
-      }) webhooks;
-    };
-
-  # Oneshot that POSTs the plugin configuration. Retries past the window
-  # between Jellyfin API health and plugin registration.
-  mkConfigureScript =
-    { jellyfinUrl, webhooks }:
-    pkgs.writeShellScript "jellyfin-webhook-configure" ''
-      set -euo pipefail
-      export PATH=${
-        lib.makeBinPath [
-          pkgs.coreutils
-          pkgs.curl
-        ]
-      }
-
-      URL=${lib.escapeShellArg jellyfinUrl}
-      AUTH="Authorization: MediaBrowser Token=\"$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")\""
-      CONFIG=${lib.escapeShellArg (mkConfigJson webhooks)}
-
-      for _ in $(seq 1 120); do curl -sf -o /dev/null "$URL/health" && break; sleep 1; done
-      curl -sf -o /dev/null "$URL/health"
-
-      for _ in $(seq 1 60); do
-        if printf '%s' "$CONFIG" | curl -sf -X POST \
-          -H "$AUTH" -H "Content-Type: application/json" --data-binary @- \
-          "$URL/Plugins/${pluginGuid}/Configuration"; then
-          echo "Jellyfin webhook plugin configured"; exit 0
-        fi
-        sleep 1
-      done
-      echo "Failed to configure webhook plugin" >&2; exit 1
-    '';
-
-  # Materialise a writable copy of the plugin. Jellyfin rewrites meta.json at
-  # runtime, so a read-only nix-store symlink would EACCES.
-  mkInstallScript =
-    { pluginsDir }:
-    pkgs.writeShellScript "jellyfin-webhook-install" ''
-      set -euo pipefail
-      export PATH=${lib.makeBinPath [ pkgs.coreutils ]}
-      dst=${lib.escapeShellArg "${pluginsDir}/Webhook_${pluginVersion}"}
-      mkdir -p ${lib.escapeShellArg pluginsDir}
-      rm -rf "$dst" && mkdir -p "$dst"
-      cp ${package}/*.dll ${package}/meta.json "$dst/"
-      chmod u+rw "$dst"/*
-    '';
-in
-{
-  inherit
-    package
-    pluginVersion
-    pluginGuid
-    mkConfigureScript
-    mkInstallScript
-    ;
-}
--- a/services/jellyfin/jellyfin.nix
+++ b/services/jellyfin/jellyfin.nix
@@ -26,14 +26,6 @@

  services.caddy.virtualHosts."jellyfin.${service_configs.https.domain}".extraConfig = ''
    reverse_proxy :${builtins.toString service_configs.ports.private.jellyfin.port} {
-      # Disable response buffering for streaming. Caddy's default partial
-      # buffering delays fMP4-HLS segments and direct-play responses where
-      # Content-Length is known (so auto-flush doesn't trigger).
-      flush_interval -1
-      transport http {
-        # Localhost: compression wastes CPU re-encoding already-compressed media.
-        compression off
-      }
      header_up X-Real-IP {remote_host}
      header_up X-Forwarded-For {remote_host}
      header_up X-Forwarded-Proto {scheme}
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -9,23 +9,16 @@
 }:
 let
  cfg = config.services.llama-cpp;
-  modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-IQ2_M.gguf";
+  modelUrl = "https://huggingface.co/bartowski/google_gemma-4-E2B-it-GGUF/resolve/main/google_gemma-4-E2B-it-Q4_K_M.gguf";
  modelAlias = lib.removeSuffix ".gguf" (baseNameOf modelUrl);
 in
 {
-  imports = [
-    (lib.mkCaddyReverseProxy {
-      subdomain = "llm";
-      port = service_configs.ports.private.llama_cpp.port;
-    })
-  ];
-
  services.llama-cpp = {
    enable = true;
    model = toString (
      pkgs.fetchurl {
        url = modelUrl;
-        sha256 = "17e869ac54d0e59faa884d5319fc55ad84cd866f50f0b3073fbb25accc875a23";
+        sha256 = "5efe645db4e1909c7a1f4a9608df18e6c14383f5e86777fc49f769f9ba7d5fdf";
      }
    );
    port = service_configs.ports.private.llama_cpp.port;
@@ -33,6 +26,8 @@ in
    package = lib.optimizePackage (
      inputs.llamacpp.packages.${pkgs.system}.vulkan.overrideAttrs (old: {
        patches = (old.patches or [ ]) ++ [
+          ../patches/llamacpp/0003-gemma4-tokenizer-fix.patch
+          ../patches/llamacpp/0004-gemma4-graph-fix.patch
        ];
      })
    );
@@ -56,40 +51,17 @@ in
      "4096"
      "-ub"
      "4096"
-      "--parallel"
-      "2"
    ];
  };

  # have to do this in order to get vulkan to work
  systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;

-  # ANV driver's turbo3 shader compilation exceeds the default 8 MB thread stack.
-  systemd.services.llama-cpp.serviceConfig.LimitSTACK = lib.mkForce "67108864"; # 64 MB soft+hard
-
  # llama-server tries to create ~/.cache; ProtectSystem=strict + impermanent
  # root make /root read-only. Give it a writable cache dir and point HOME there.
  systemd.services.llama-cpp.serviceConfig.CacheDirectory = "llama-cpp";
  systemd.services.llama-cpp.environment.HOME = "/var/cache/llama-cpp";

-  # turbo3 KV cache quantization runs a 14-barrier WHT butterfly per 128-element
-  # workgroup in SET_ROWS. With 4 concurrent slots and batch=4096, the combined
-  # GPU dispatch can exceed the default i915 CCS engine preempt timeout (7.5s),
-  # causing GPU HANG -> ErrorDeviceLost. Increase compute engine timeouts.
-  # Note: batch<4096 is not viable -- GDN chunked mode needs a larger compute
-  # buffer at smaller batch sizes, exceeding the A380's 6 GB VRAM.
-  # '+' prefix runs as root regardless of service User=.
-  systemd.services.llama-cpp.serviceConfig.ExecStartPre = [
-    "+${pkgs.writeShellScript "set-gpu-compute-timeout" ''
-      for f in /sys/class/drm/card*/engine/ccs*/preempt_timeout_ms; do
-        [ -w "$f" ] && echo 30000 > "$f"
-      done
-      for f in /sys/class/drm/card*/engine/ccs*/heartbeat_interval_ms; do
-        [ -w "$f" ] && echo 10000 > "$f"
-      done
-    ''}"
-  ];
-
  # upstream module hardcodes --log-disable; override ExecStart to keep logs
  # so we can see prompt processing progress via journalctl
  systemd.services.llama-cpp.serviceConfig.ExecStart = lib.mkForce (
@@ -100,4 +72,10 @@ in
    + " ${utils.escapeSystemdExecArgs cfg.extraFlags}"
  );

+  # Auth handled by llama-cpp --api-key-file (Bearer token).
+  # No caddy_auth — the API key is the auth layer, and caddy_auth's basic
+  # auth would block Bearer-only clients like oh-my-pi.
+  services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
+    reverse_proxy :${toString config.services.llama-cpp.port}
+  '';
 }
--- a/services/matrix/matrix.nix
+++ b/services/matrix/matrix.nix
@@ -12,10 +12,6 @@
    (lib.serviceFilePerms "continuwuity" [
      "Z /var/lib/private/continuwuity 0770 ${config.services.matrix-continuwuity.user} ${config.services.matrix-continuwuity.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      domain = service_configs.matrix.domain;
-      port = service_configs.ports.private.matrix.port;
-    })
  ];

  services.matrix-continuwuity = {
@@ -57,6 +53,10 @@
    respond /.well-known/matrix/client `{"m.server":{"base_url":"https://${service_configs.matrix.domain}"},"m.homeserver":{"base_url":"https://${service_configs.matrix.domain}"},"org.matrix.msc3575.proxy":{"base_url":"https://${config.services.matrix-continuwuity.settings.global.server_name}"},"org.matrix.msc4143.rtc_foci":[{"type":"livekit","livekit_service_url":"https://${service_configs.livekit.domain}"}]}`
  '';

+  services.caddy.virtualHosts."${service_configs.matrix.domain}".extraConfig = ''
+    reverse_proxy :${builtins.toString service_configs.ports.private.matrix.port}
+  '';
+
  # Exact duplicate for federation port
  services.caddy.virtualHosts."${service_configs.matrix.domain}:${builtins.toString service_configs.ports.public.matrix_federation.port}".extraConfig =
    config.services.caddy.virtualHosts."${service_configs.matrix.domain}".extraConfig;
--- a/services/minecraft.nix
+++ b/services/minecraft.nix
@@ -37,21 +37,15 @@

    servers.${service_configs.minecraft.server_name} = {
      enable = true;
-      package = pkgs.fabricServers.fabric-26_1_2.override { jre_headless = pkgs.openjdk25_headless; };
+      package = pkgs.fabricServers.fabric-1_21_11;

      jvmOpts = lib.concatStringsSep " " [
        # Memory
        "-Xmx${builtins.toString service_configs.minecraft.memory.heap_size_m}M"
        "-Xms${builtins.toString service_configs.minecraft.memory.heap_size_m}M"
-
        # GC
        "-XX:+UseZGC"
        "-XX:+ZGenerational"
-
-        # added in new minecraft version
-        "-XX:+UseCompactObjectHeaders"
-        "-XX:+UseStringDeduplication"
-
        # Base JVM optimizations (brucethemoose/Minecraft-Performance-Flags-Benchmarks)
        "-XX:+UnlockExperimentalVMOptions"
        "-XX:+UnlockDiagnosticVMOptions"
@@ -73,7 +67,6 @@
        "-XX:NonProfiledCodeHeapSize=194M"
        "-XX:NmethodSweepActivity=1"
        "-XX:+UseVectorCmov"
-
        # Large pages (requires vm.nr_hugepages sysctl)
        "-XX:+UseLargePages"
        "-XX:LargePageSizeInBytes=${builtins.toString service_configs.minecraft.memory.large_page_size_m}M"
@@ -99,68 +92,71 @@
          with pkgs;
          builtins.attrValues {
            FabricApi = fetchurl {
-              url = "https://cdn.modrinth.com/data/P7dR8mSH/versions/fm7UYECV/fabric-api-0.145.4%2B26.1.2.jar";
-              sha512 = "ffd5ef62a745f76cd2e5481252cb7bc67006c809b4f436827d05ea22c01d19279e94a3b24df3d57e127af1cd08440b5de6a92a4ea8f39b2dcbbe1681275564c3";
+              url = "https://cdn.modrinth.com/data/P7dR8mSH/versions/i5tSkVBH/fabric-api-0.141.3%2B1.21.11.jar";
+              sha512 = "c20c017e23d6d2774690d0dd774cec84c16bfac5461da2d9345a1cd95eee495b1954333c421e3d1c66186284d24a433f6b0cced8021f62e0bfa617d2384d0471";
            };

-            # No 26.1.2 version available
-            # FerriteCore = fetchurl {
-            #   url = "https://cdn.modrinth.com/data/uXXizFIs/versions/d5ddUdiB/ferritecore-9.0.0-fabric.jar";
-            #   sha512 = "d81fa97e11784c19d42f89c2f433831d007603dd7193cee45fa177e4a6a9c52b384b198586e04a0f7f63cd996fed713322578bde9a8db57e1188854ae5cbe584";
-            # };
+            FerriteCore = fetchurl {
+              url = "https://cdn.modrinth.com/data/uXXizFIs/versions/Ii0gP3D8/ferritecore-8.2.0-fabric.jar";
+              sha512 = "3210926a82eb32efd9bcebabe2f6c053daf5c4337eebc6d5bacba96d283510afbde646e7e195751de795ec70a2ea44fef77cb54bf22c8e57bb832d6217418869";
+            };

            Lithium = fetchurl {
-              url = "https://cdn.modrinth.com/data/gvQqBUqZ/versions/v2xoRvRP/lithium-fabric-0.24.1%2Bmc26.1.2.jar";
-              sha512 = "8711bc8c6f39be4c8511becb7a68e573ced56777bd691639f2fc62299b35bb4ccd2efe4a39bd9c308084b523be86a5f5c4bf921ab85f7a22bf075d8ea2359621";
+              url = "https://cdn.modrinth.com/data/gvQqBUqZ/versions/Ow7wA0kG/lithium-fabric-0.21.4%2Bmc1.21.11.jar";
+              sha512 = "f14a5c3d2fad786347ca25083f902139694f618b7c103947f2fd067a7c5ee88a63e1ef8926f7d693ea79ed7d00f57317bae77ef9c2d630bf5ed01ac97a752b94";
            };

            NoChatReports = fetchurl {
-              url = "https://cdn.modrinth.com/data/qQyHxfxd/versions/2yrLNE3S/NoChatReports-FABRIC-26.1-v2.19.0.jar";
-              sha512 = "94d58a1a4cde4e3b1750bdf724e65c5f4ff3436c2532f36a465d497d26bf59f5ac996cddbff8ecdfed770c319aa2f2dcc9c7b2d19a35651c2a7735c5b2124dad";
+              url = "https://cdn.modrinth.com/data/qQyHxfxd/versions/rhykGstm/NoChatReports-FABRIC-1.21.11-v2.18.0.jar";
+              sha512 = "d2c35cc8d624616f441665aff67c0e366e4101dba243bad25ed3518170942c1a3c1a477b28805cd1a36c44513693b1c55e76bea627d3fced13927a3d67022ccc";
            };

            squaremap = fetchurl {
-              url = "https://cdn.modrinth.com/data/PFb7ZqK6/versions/UBN6MFvH/squaremap-fabric-mc26.1.2-1.3.13.jar";
-              sha512 = "97bc130184b5d0ddc4ff98a15acef6203459d982e0e2afbd49a2976d546c55a86ef22b841378b51dd782be9b2cfbe4cfa197717f2b7f6800fd8b4ff4df6e564f";
+              url = "https://cdn.modrinth.com/data/PFb7ZqK6/versions/BW8lMXBi/squaremap-fabric-mc1.21.11-1.3.12.jar";
+              sha512 = "f62eb791a3f5812eb174565d318f2e6925353f846ef8ac56b4e595f481494e0c281f26b9e9fcfdefa855093c96b735b12f67ee17c07c2477aa7a3439238670d9";
            };

            scalablelux = fetchurl {
-              url = "https://cdn.modrinth.com/data/Ps1zyz6x/versions/gYbHVCz8/ScalableLux-0.2.0%2Bfabric.2b63825-all.jar";
-              sha512 = "48565a4d8a1cbd623f0044086d971f2c0cf1c40e1d0b6636a61d41512f4c1c1ddff35879d9dba24b088a670ee254e2d5842d13a30b6d76df23706fa94ea4a58b";
+              url = "https://cdn.modrinth.com/data/Ps1zyz6x/versions/PV9KcrYQ/ScalableLux-0.1.6%2Bfabric.c25518a-all.jar";
+              sha512 = "729515c1e75cf8d9cd704f12b3487ddb9664cf9928e7b85b12289c8fbbc7ed82d0211e1851375cbd5b385820b4fedbc3f617038fff5e30b302047b0937042ae7";
            };

            c2me = fetchurl {
-              url = "https://cdn.modrinth.com/data/VSNURh3q/versions/yrNQQ1AQ/c2me-fabric-mc26.1.2-0.3.7%2Balpha.0.65.jar";
-              sha512 = "6666ebaa3bfa403e386776590fc845b7c306107d37ebc7b1be3b057893fbf9f933abb2314c171d7fe19c177cf8823cb47fdc32040d34a9704f5ab656dd5d93f8";
+              url = "https://cdn.modrinth.com/data/VSNURh3q/versions/QdLiMUjx/c2me-fabric-mc1.21.11-0.3.7%2Balpha.0.7.jar";
+              sha512 = "f9543febe2d649a82acd6d5b66189b6a3d820cf24aa503ba493fdb3bbd4e52e30912c4c763fe50006f9a46947ae8cd737d420838c61b93429542573ed67f958e";
            };

-            # No 26.1 version available
-            # krypton = fetchurl {
-            #   url = "https://cdn.modrinth.com/data/fQEb0iXm/versions/O9LmWYR7/krypton-0.2.10.jar";
-            #   sha512 = "4dcd7228d1890ddfc78c99ff284b45f9cf40aae77ef6359308e26d06fa0d938365255696af4cc12d524c46c4886cdcd19268c165a2bf0a2835202fe857da5cab";
-            # };
+            krypton = fetchurl {
+              url = "https://cdn.modrinth.com/data/fQEb0iXm/versions/O9LmWYR7/krypton-0.2.10.jar";
+              sha512 = "4dcd7228d1890ddfc78c99ff284b45f9cf40aae77ef6359308e26d06fa0d938365255696af4cc12d524c46c4886cdcd19268c165a2bf0a2835202fe857da5cab";
+            };

-            # No 26.1.2 version available
-            # disconnect-packet-fix = fetchurl {
-            #   url = "https://cdn.modrinth.com/data/rd9rKuJT/versions/x9gVeaTU/disconnect-packet-fix-fabric-2.1.0.jar";
-            #   sha512 = "bf84d02bdcd737706df123e452dd31ef535580fa4ced6af1e4ceea022fef94e4764775253e970b8caa1292e2fa00eb470557f70b290fafdb444479fa801b07a1";
-            # };
+            better-fabric-console = fetchurl {
+              url = "https://cdn.modrinth.com/data/Y8o1j1Sf/versions/6aIKl5wy/better-fabric-console-mc1.21.11-1.2.9.jar";
+              sha512 = "427247dafd99df202ee10b4bf60ffcbbecbabfadb01c167097ffb5b85670edb811f4d061c2551be816295cbbc6b8ec5ec464c14a6ff41912ef1f6c57b038d320";
+            };
+
+            disconnect-packet-fix = fetchurl {
+              url = "https://cdn.modrinth.com/data/rd9rKuJT/versions/Gv74xveQ/disconnect-packet-fix-fabric-2.0.0.jar";
+              sha512 = "1fd6f09a41ce36284e1a8e9def53f3f6834d7201e69e54e24933be56445ba569fbc26278f28300d36926ba92db6f4f9c0ae245d23576aaa790530345587316db";
+            };

            packet-fixer = fetchurl {
-              url = "https://cdn.modrinth.com/data/c7m1mi73/versions/M8PqPQr4/packetfixer-fabric-3.3.4-26.1.2.jar";
-              sha512 = "698020edba2a1fd80bb282bfd4832a00d6447b08eaafbc2e16a8f3bf89e187fc9a622c92dfe94ae140dd485fc0220a86890f12158ec08054e473fef8337829bc";
+              url = "https://cdn.modrinth.com/data/c7m1mi73/versions/CUh1DWeO/packetfixer-fabric-3.3.4-1.21.11.jar";
+              sha512 = "33331b16cb40c5e6fbaade3cacc26f3a0e8fa5805a7186f94d7366a0e14dbeee9de2d2e8c76fa71f5e9dd24eb1c261667c35447e32570ea965ca0f154fdfba0a";
            };

-            # mVUS fork: upstream ModernFix no longer ships Fabric builds
+            # fork of Modernfix for 1.21.11 (upstream will support 26.1)
            modernfix = fetchurl {
-              url = "https://cdn.modrinth.com/data/TjSm1wrD/versions/dqQ7mabN/modernfix-5.26.2-build.1.jar";
-              sha512 = "fbef93c2dabf7bcd0ccd670226dfc4958f7ebe5d8c2b1158e88a65e6954a40f595efd58401d2a3dbb224660dca5952199cf64df29100e7bd39b1b1941290b57b";
+              url = "https://cdn.modrinth.com/data/TjSm1wrD/versions/JwSO8JCN/modernfix-5.25.2-build.4.jar";
+              sha512 = "0d65c05ac0475408c58ef54215714e6301113101bf98bfe4bb2ba949fbfddd98225ac4e2093a5f9206a9e01ba80a931424b237bdfa3b6e178c741ca6f7f8c6a3";
            };

            debugify = fetchurl {
-              url = "https://cdn.modrinth.com/data/QwxR6Gcd/versions/mfTTfiKn/debugify-26.1.2%2B1.0.jar";
-              sha512 = "63db82f2163b9f7fc27ebea999ffcd7a961054435b3ed7d8bf32d905b5f60ce81715916b7fd4e9509dd23703d5492059f3ce7e5f176402f8ed4f985a415553f4";
+              url = "https://cdn.modrinth.com/data/QwxR6Gcd/versions/8Q49lnaU/debugify-1.21.11%2B1.0.jar";
+              sha512 = "04d82dd33f44ced37045f1f9a54ad4eacd70861ff74a8800f2d2df358579e6cb0ea86a34b0086b3e87026b1a0691dd6594b4fdc49f89106466eea840518beb03";
            };
+
          }
        );
      };
--- a/services/monero/p2pool.nix
+++ b/services/monero/p2pool.nix
@@ -33,6 +33,12 @@
    wants = [ "monero.service" ];
  };

+  # Stop p2pool on UPS battery to conserve power
+  services.apcupsd.hooks = lib.mkIf config.services.apcupsd.enable {
+    onbattery = "systemctl stop p2pool";
+    offbattery = "systemctl start p2pool";
+  };
+
  networking.firewall.allowedTCPPorts = [
    service_configs.ports.public.p2pool_p2p.port
  ];
--- a/services/monero/xmrig-auto-pause.nix
+++ b/services/monero/xmrig-auto-pause.nix
@@ -26,12 +26,11 @@ lib.mkIf config.services.xmrig.enable {
    environment = {
      POLL_INTERVAL = "3";
      GRACE_PERIOD = "15";
-      # Background services (qbittorrent, bitmagnet, postgresql, etc.) produce
-      # 15-25% non-nice CPU during normal operation. The stop threshold must
-      # sit above transient spikes; the resume threshold must be below the
-      # steady-state floor to avoid restarting xmrig while services are active.
+      # This server's background services (qbittorrent, monero, bazarr, etc.)
+      # produce 5-14% non-nice CPU during normal operation. Thresholds must
+      # sit above that noise floor.
      CPU_STOP_THRESHOLD = "40";
-      CPU_RESUME_THRESHOLD = "10";
+      CPU_RESUME_THRESHOLD = "30";
      STARTUP_COOLDOWN = "10";
      STATE_DIR = "/var/lib/xmrig-auto-pause";
    };
--- a/services/monero/xmrig.nix
+++ b/services/monero/xmrig.nix
@@ -11,7 +11,7 @@ in
 {
  services.xmrig = {
    enable = true;
-    package = lib.optimizePackage pkgs.xmrig;
+    package = pkgs.xmrig;

    settings = {
      autosave = true;
--- a/services/ntfy/ntfy.nix
+++ b/services/ntfy/ntfy.nix
@@ -12,10 +12,6 @@
    (lib.serviceFilePerms "ntfy-sh" [
      "Z /var/lib/private/ntfy-sh 0700 ${config.services.ntfy-sh.user} ${config.services.ntfy-sh.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      domain = service_configs.ntfy.domain;
-      port = service_configs.ports.private.ntfy.port;
-    })
  ];

  services.ntfy-sh = {
@@ -31,4 +27,8 @@
    };
  };

+  services.caddy.virtualHosts."${service_configs.ntfy.domain}".extraConfig = ''
+    reverse_proxy :${builtins.toString service_configs.ports.private.ntfy.port}
+  '';
+
 }
--- a/services/qbittorrent.nix
+++ b/services/qbittorrent.nix
@@ -23,18 +23,10 @@ in
    (lib.serviceFilePerms "qbittorrent" [
      # 0770: group (media) needs write to delete files during upgrades —
      # Radarr/Sonarr must unlink the old file before placing the new one.
-      # Non-recursive (z not Z): UMask=0007 ensures new files get correct perms.
-      # A recursive Z rule would walk millions of files on the HDD pool at every boot.
-      "z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.SavePath} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group}"
+      "Z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.SavePath} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group}"
      "z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.TempPath} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
      "Z ${config.services.qbittorrent.profileDir} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "torrent";
-      port = service_configs.ports.private.torrent.port;
-      auth = true;
-      vpn = true;
-    })
  ];

  services.qbittorrent = {
@@ -164,34 +156,10 @@ in
    _: path: "d ${path} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group} -"
  ) service_configs.torrent.categories;

-  # Periodically checkpoint qBittorrent's SQLite WAL (Write-Ahead Log).
-  # qBittorrent holds a read transaction open for its entire lifetime,
-  # preventing SQLite's auto-checkpoint from running. The WAL grows
-  # unbounded (observed: 405 MB) and must be replayed on next startup,
-  # causing 10+ minute "internal preparations" hangs.
-  # A second sqlite3 connection can checkpoint concurrently and safely.
-  # See: https://github.com/qbittorrent/qBittorrent/issues/20433
-  systemd.services.qbittorrent-wal-checkpoint = {
-    description = "Checkpoint qBittorrent SQLite WAL";
-    after = [ "qbittorrent.service" ];
-    requires = [ "qbittorrent.service" ];
-    serviceConfig = {
-      Type = "oneshot";
-      ExecStart = "${pkgs.sqlite}/bin/sqlite3 ${config.services.qbittorrent.profileDir}/qBittorrent/data/torrents.db 'PRAGMA wal_checkpoint(TRUNCATE);'";
-      User = config.services.qbittorrent.user;
-      Group = config.services.qbittorrent.group;
-    };
-  };
-
-  systemd.timers.qbittorrent-wal-checkpoint = {
-    description = "Periodically checkpoint qBittorrent SQLite WAL";
-    wantedBy = [ "timers.target" ];
-    timerConfig = {
-      OnUnitActiveSec = "4h";
-      OnBootSec = "30min";
-      RandomizedDelaySec = "10min";
-    };
-  };
+  services.caddy.virtualHosts."torrent.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy ${config.vpnNamespaces.wg.namespaceAddress}:${builtins.toString config.services.qbittorrent.webuiPort}
+  '';

  users.users.${config.services.qbittorrent.user}.extraGroups = [
    service_configs.media_group
--- a/services/soulseek.nix
+++ b/services/soulseek.nix
@@ -19,10 +19,6 @@
      "Z ${service_configs.slskd.downloads} 0750 ${config.services.slskd.user} music"
      "Z ${service_configs.slskd.incomplete} 0750 ${config.services.slskd.user} music"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "soulseek";
-      port = service_configs.ports.private.soulseek_web.port;
-    })
  ];

  users.groups."music" = { };
@@ -62,6 +58,11 @@
  users.users.${config.services.jellyfin.user}.extraGroups = [ "music" ];
  users.users.${username}.extraGroups = [ "music" ];

+  # doesn't work with auth????
+  services.caddy.virtualHosts."soulseek.${service_configs.https.domain}".extraConfig = ''
+    reverse_proxy :${builtins.toString config.services.slskd.settings.web.port}
+  '';
+
  networking.firewall.allowedTCPPorts = [
    service_configs.ports.public.soulseek_listen.port
  ];
--- a/services/syncthing.nix
+++ b/services/syncthing.nix
@@ -17,11 +17,6 @@
      "Z ${service_configs.syncthing.signalBackupDir} 0750 ${config.services.syncthing.user} ${config.services.syncthing.group}"
      "Z ${service_configs.syncthing.grayjayBackupDir} 0750 ${config.services.syncthing.user} ${config.services.syncthing.group}"
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "syncthing";
-      port = service_configs.ports.private.syncthing_gui.port;
-      auth = true;
-    })
  ];

  services.syncthing = {
@@ -54,4 +49,9 @@
    ];
  };

+  services.caddy.virtualHosts."syncthing.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${toString service_configs.ports.private.syncthing_gui.port}
+  '';
+
 }
--- a/services/trilium.nix
+++ b/services/trilium.nix
@@ -10,11 +10,6 @@
    (lib.serviceMountWithZpool "trilium-server" service_configs.zpool_ssds [
      (service_configs.services_dir + "/trilium")
    ])
-    (lib.mkCaddyReverseProxy {
-      subdomain = "notes";
-      port = service_configs.ports.private.trilium.port;
-      auth = true;
-    })
  ];

  services.trilium-server = {
@@ -24,4 +19,8 @@
    dataDir = service_configs.trilium.dataDir;
  };

+  services.caddy.virtualHosts."notes.${service_configs.https.domain}".extraConfig = ''
+    import ${config.age.secrets.caddy_auth.path}
+    reverse_proxy :${toString service_configs.ports.private.trilium.port}
+  '';
 }
--- a/tests/fail2ban-jellyfin.nix
+++ b/tests/fail2ban-jellyfin.nix
@@ -107,7 +107,7 @@ pkgs.testers.runNixOSTest {
    server.wait_for_unit("jellyfin.service")
    server.wait_for_unit("fail2ban.service")
    server.wait_for_open_port(8096)
-    server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=120)
+    server.wait_until_succeeds("curl -sf http://localhost:8096/health | grep -q Healthy", timeout=60)
    time.sleep(2)

    # Wait for Jellyfin to create real log files and reload fail2ban
--- a/tests/jellyfin-qbittorrent-monitor.nix
+++ b/tests/jellyfin-qbittorrent-monitor.nix
@@ -6,21 +6,6 @@
 }:
 let
  jfLib = import ./jellyfin-test-lib.nix { inherit pkgs lib; };
-  webhookPlugin = import ../services/jellyfin/jellyfin-webhook-plugin.nix { inherit pkgs lib; };
-  configureWebhook = webhookPlugin.mkConfigureScript {
-    jellyfinUrl = "http://localhost:8096";
-    webhooks = [
-      {
-        name = "qBittorrent Monitor";
-        uri = "http://127.0.0.1:9898/";
-        notificationTypes = [
-          "PlaybackStart"
-          "PlaybackProgress"
-          "PlaybackStop"
-        ];
-      }
-    ];
-  };
 in
 pkgs.testers.runNixOSTest {
  name = "jellyfin-qbittorrent-monitor";
@@ -84,30 +69,11 @@ pkgs.testers.runNixOSTest {
          }
        ];

-        # Create directories for qBittorrent.
+        # Create directories for qBittorrent
        systemd.tmpfiles.rules = [
          "d /var/lib/qbittorrent/downloads 0755 qbittorrent qbittorrent"
          "d /var/lib/qbittorrent/incomplete 0755 qbittorrent qbittorrent"
        ];
-
-        # Install the Jellyfin Webhook plugin before Jellyfin starts, mirroring
-        # the production module. Jellyfin rewrites meta.json at runtime so a
-        # read-only nix-store symlink would fail — we materialise a writable copy.
-        systemd.services."jellyfin-webhook-install" = {
-          description = "Install Jellyfin Webhook plugin files";
-          before = [ "jellyfin.service" ];
-          wantedBy = [ "jellyfin.service" ];
-          serviceConfig = {
-            Type = "oneshot";
-            RemainAfterExit = true;
-            User = "jellyfin";
-            Group = "jellyfin";
-            UMask = "0077";
-            ExecStart = webhookPlugin.mkInstallScript {
-              pluginsDir = "/var/lib/jellyfin/plugins";
-            };
-          };
-        };
      };

    # Public test IP (RFC 5737 TEST-NET-3) so Jellyfin sees it as external
@@ -428,97 +394,6 @@ pkgs.testers.runNixOSTest {
        local_playback["PositionTicks"] = 50000000
        server.succeed(f"curl -sf -X POST 'http://localhost:8096/Sessions/Playing/Stopped' -d '{json.dumps(local_playback)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{local_auth}, Token={local_token}'")

-    # === WEBHOOK TESTS ===
-    #
-    # Configure the Jellyfin Webhook plugin to target the monitor, then verify
-    # the real Jellyfin → plugin → monitor path reacts faster than any possible
-    # poll. CHECK_INTERVAL=30 rules out polling as the cause.
-
-    WEBHOOK_PORT = 9898
-    WEBHOOK_CREDS = "/tmp/webhook-creds"
-
-    # Start a webhook-enabled monitor with long poll interval.
-    server.succeed("systemctl stop monitor-test || true")
-    time.sleep(1)
-    server.succeed(f"""
-      systemd-run --unit=monitor-webhook \
-        --setenv=JELLYFIN_URL=http://localhost:8096 \
-        --setenv=JELLYFIN_API_KEY={token} \
-        --setenv=QBITTORRENT_URL=http://localhost:8080 \
-        --setenv=CHECK_INTERVAL=30 \
-        --setenv=STREAMING_START_DELAY=1 \
-        --setenv=STREAMING_STOP_DELAY=1 \
-        --setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
-        --setenv=SERVICE_BUFFER=2000000 \
-        --setenv=DEFAULT_STREAM_BITRATE=10000000 \
-        --setenv=MIN_TORRENT_SPEED=100 \
-        --setenv=WEBHOOK_PORT={WEBHOOK_PORT} \
-        --setenv=WEBHOOK_BIND=127.0.0.1 \
-        {python} {monitor}
-    """)
-    server.wait_until_succeeds(f"ss -ltn | grep -q ':{WEBHOOK_PORT}'", timeout=15)
-    time.sleep(2)
-    assert not is_throttled(), "Should start unthrottled"
-
-    # Drop the admin token where the configure script expects it (production uses agenix).
-    server.succeed(f"mkdir -p {WEBHOOK_CREDS} && echo '{token}' > {WEBHOOK_CREDS}/jellyfin-api-key")
-    server.succeed(
-        f"systemd-run --wait --unit=webhook-configure-test "
-        f"--setenv=CREDENTIALS_DIRECTORY={WEBHOOK_CREDS} "
-        f"${configureWebhook}"
-    )
-
-    with subtest("Real PlaybackStart event throttles via the plugin"):
-        playback_start = {
-            "ItemId": movie_id,
-            "MediaSourceId": media_source_id,
-            "PlaySessionId": "test-plugin-start",
-            "CanSeek": True,
-            "IsPaused": False,
-        }
-        start_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing' -d '{json.dumps(playback_start)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
-        client.succeed(start_cmd)
-        server.wait_until_succeeds(
-            "curl -sf http://localhost:8080/api/v2/transfer/speedLimitsMode | grep -q '^1$'",
-            timeout=5,
-        )
-        # Let STREAMING_STOP_DELAY (1s) elapse so the upcoming stop is not swallowed by hysteresis.
-        time.sleep(2)
-
-    with subtest("Real PlaybackStop event unthrottles via the plugin"):
-        playback_stop = {
-            "ItemId": movie_id,
-            "MediaSourceId": media_source_id,
-            "PlaySessionId": "test-plugin-start",
-            "PositionTicks": 50000000,
-        }
-        stop_cmd = f"curl -sf -X POST 'http://{server_ip}:8096/Sessions/Playing/Stopped' -d '{json.dumps(playback_stop)}' -H 'Content-Type:application/json' -H 'X-Emby-Authorization:{client_auth}, Token={client_token}'"
-        client.succeed(stop_cmd)
-        server.wait_until_succeeds(
-            "curl -sf http://localhost:8080/api/v2/transfer/speedLimitsMode | grep -q '^0$'",
-            timeout=10,
-        )
-
-    # Restore fast-polling monitor for the service-restart tests below.
-    server.succeed("systemctl stop monitor-webhook || true")
-    time.sleep(1)
-    server.succeed(f"""
-      systemd-run --unit=monitor-test \
-        --setenv=JELLYFIN_URL=http://localhost:8096 \
-        --setenv=JELLYFIN_API_KEY={token} \
-        --setenv=QBITTORRENT_URL=http://localhost:8080 \
-        --setenv=CHECK_INTERVAL=1 \
-        --setenv=STREAMING_START_DELAY=1 \
-        --setenv=STREAMING_STOP_DELAY=1 \
-        --setenv=TOTAL_BANDWIDTH_BUDGET=50000000 \
-        --setenv=SERVICE_BUFFER=2000000 \
-        --setenv=DEFAULT_STREAM_BITRATE=10000000 \
-        --setenv=MIN_TORRENT_SPEED=100 \
-        {python} {monitor}
-    """)
-    time.sleep(2)
-
-
    # === SERVICE RESTART TESTS ===

    with subtest("qBittorrent restart during throttled state re-applies throttling"):
--- a/tests/jellyfin-test-lib.py
+++ b/tests/jellyfin-test-lib.py
@@ -18,7 +18,7 @@ def setup_jellyfin(machine, retry, auth_header, auth_payload, empty_payload):
    machine.wait_for_unit("jellyfin.service")
    machine.wait_for_open_port(8096)
    machine.wait_until_succeeds(
-        "curl -sf http://localhost:8096/health | grep -q Healthy", timeout=120
+        "curl -sf http://localhost:8096/health | grep -q Healthy", timeout=60
    )

    machine.wait_until_succeeds(
--- a/tests/llama-cpp-annotations.nix
+++ b/tests/llama-cpp-annotations.nix
@@ -0,0 +1,132 @@
+{
+  pkgs,
+  ...
+}:
+let
+  mockGrafana = ./mock-grafana-server.py;
+  script = ../services/grafana/llama-cpp-annotations.py;
+  python = pkgs.python3;
+
+  mockLlamaProcess = ./mock-llama-server-proc.py;
+in
+pkgs.testers.runNixOSTest {
+  name = "llama-cpp-annotations";
+
+  nodes.machine =
+    { pkgs, ... }:
+    {
+      environment.systemPackages = [
+        pkgs.python3
+        pkgs.curl
+        pkgs.procps
+      ];
+    };
+
+  testScript = ''
+    import json
+    import time
+
+    GRAFANA_PORT = 13000
+    ANNOTS_FILE = "/tmp/annotations.json"
+    LLAMA_STATE = "/tmp/llama-state.txt"
+    STATE_FILE = "/tmp/llama-annot-state.json"
+    PYTHON = "${python}/bin/python3"
+    MOCK_GRAFANA = "${mockGrafana}"
+    MOCK_LLAMA = "${mockLlamaProcess}"
+    SCRIPT = "${script}"
+
+    def read_annotations():
+        out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'")
+        return json.loads(out.strip())
+
+    def set_busy():
+        machine.succeed(f"echo busy > {LLAMA_STATE}")
+
+    def set_idle():
+        machine.succeed(f"echo idle > {LLAMA_STATE}")
+
+    start_all()
+    machine.wait_for_unit("multi-user.target")
+
+    with subtest("Start mock services"):
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+        machine.succeed(
+            f"systemd-run --unit=mock-grafana {PYTHON} {MOCK_GRAFANA} {GRAFANA_PORT} {ANNOTS_FILE}"
+        )
+        machine.succeed(
+            f"systemd-run --unit=mock-llama {PYTHON} {MOCK_LLAMA} {LLAMA_STATE}"
+        )
+        machine.wait_until_succeeds(
+            f"curl -sf http://127.0.0.1:{GRAFANA_PORT}/api/annotations -X POST "
+            f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id",
+            timeout=10,
+        )
+        machine.wait_until_succeeds(
+            "pgrep -x llama-server",
+            timeout=10,
+        )
+        machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
+
+    with subtest("Start annotation service"):
+        machine.succeed(
+            f"systemd-run --unit=llama-annot "
+            f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
+            f"--setenv=STATE_FILE={STATE_FILE} "
+            f"--setenv=POLL_INTERVAL=2 "
+            f"--setenv=CPU_THRESHOLD=10 "
+            f"{PYTHON} {SCRIPT}"
+        )
+        time.sleep(5)
+
+    with subtest("No annotations when idle"):
+        annots = read_annotations()
+        assert annots == [], f"Expected no annotations, got: {annots}"
+
+    with subtest("Annotation created when llama-server becomes busy"):
+        set_busy()
+        machine.wait_until_succeeds(
+            f"cat {ANNOTS_FILE} | {PYTHON} -c "
+            f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a else 1)\"",
+            timeout=20,
+        )
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
+        assert "llama-cpp" in annots[0].get("tags", []), f"Missing tag: {annots[0]}"
+        assert "LLM request" in annots[0]["text"], f"Missing text: {annots[0]['text']}"
+        assert "timeEnd" not in annots[0], f"timeEnd should not be set: {annots[0]}"
+
+    with subtest("Annotation closed when llama-server becomes idle"):
+        set_idle()
+        machine.wait_until_succeeds(
+            f"cat {ANNOTS_FILE} | {PYTHON} -c "
+            f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a and 'timeEnd' in a[0] else 1)\"",
+            timeout=20,
+        )
+        annots = read_annotations()
+        assert len(annots) == 1, f"Expected 1, got: {annots}"
+        assert "timeEnd" in annots[0], f"timeEnd missing: {annots[0]}"
+        assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time"
+        assert "s)" in annots[0].get("text", ""), f"Duration missing: {annots[0]}"
+
+    with subtest("State survives restart"):
+        set_busy()
+        machine.wait_until_succeeds(
+            f"cat {ANNOTS_FILE} | {PYTHON} -c "
+            f"\"import sys,json; a=json.load(sys.stdin); exit(0 if len(a)==2 else 1)\"",
+            timeout=20,
+        )
+        machine.succeed("systemctl stop llama-annot || true")
+        time.sleep(1)
+        machine.succeed(
+            f"systemd-run --unit=llama-annot-2 "
+            f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
+            f"--setenv=STATE_FILE={STATE_FILE} "
+            f"--setenv=POLL_INTERVAL=2 "
+            f"--setenv=CPU_THRESHOLD=10 "
+            f"{PYTHON} {SCRIPT}"
+        )
+        time.sleep(6)
+        annots = read_annotations()
+        assert len(annots) == 2, f"Restart should not duplicate, got: {annots}"
+  '';
+}
--- a/tests/mock-llama-server-proc.py
+++ b/tests/mock-llama-server-proc.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+"""
+Mock llama-server process for NixOS VM tests.
+
+Sets /proc/self/comm to "llama-server" via prctl so that monitoring scripts
+(llama-cpp-annotations, llama-cpp-xmrig-pause) can discover this process
+the same way they discover the real one.
+
+Usage: python3 mock-llama-server-proc.py <state-file>
+
+The state file controls behavior:
+  "busy"  -> burn CPU in a tight loop (simulates prompt processing / inference)
+  "idle"  -> sleep (simulates waiting for requests)
+"""
+
+import ctypes
+import ctypes.util
+import sys
+import time
+
+STATE_FILE = sys.argv[1]
+
+# PR_SET_NAME = 15, sets /proc/self/comm
+libc = ctypes.CDLL(ctypes.util.find_library("c"), use_errno=True)
+libc.prctl(15, b"llama-server", 0, 0, 0)
+
+with open(STATE_FILE, "w") as f:
+    f.write("idle")
+
+while True:
+    try:
+        with open(STATE_FILE) as f:
+            state = f.read().strip()
+    except Exception:
+        state = "idle"
+
+    if state == "busy":
+        end = time.monotonic() + 0.1
+        while time.monotonic() < end:
+            _ = sum(range(10000))
+    else:
+        time.sleep(0.5)
--- a/tests/tests.nix
+++ b/tests/tests.nix
@@ -28,6 +28,9 @@ in
  # zfs scrub annotations test
  zfsScrubAnnotationsTest = handleTest ./zfs-scrub-annotations.nix;

+  # llama-cpp tests
+  llamaCppAnnotationsTest = handleTest ./llama-cpp-annotations.nix;
+
  # xmrig auto-pause test
  xmrigAutoPauseTest = handleTest ./xmrig-auto-pause.nix;
  # ntfy alerts test