zfs tuning

2026-04-15 13:25:38 -04:00
parent 48efd7fcf7
commit aecd9002b0
1 changed files with 26 additions and 2 deletions
--- a/modules/zfs.nix
+++ b/modules/zfs.nix
@@ -1,15 +1,39 @@
 {
  config,
+  lib,
  service_configs,
  pkgs,
  ...
 }:
+let
+  # Total RAM in bytes (from /proc/meminfo: 65775836 KiB).
+  totalRamBytes = 65775836 * 1024;
+
+  # Hugepage reservations that the kernel carves out before ZFS can use them.
+  hugepages2mBytes = service_configs.hugepages_2m.total_pages * 2 * 1024 * 1024;
+  hugepages1gBytes = 3 * 1024 * 1024 * 1024; # 3x 1G pages for RandomX (xmrig.nix)
+  totalHugepageBytes = hugepages2mBytes + hugepages1gBytes;
+
+  # ARC max: 60% of RAM remaining after hugepages. Leaves headroom for
+  # application RSS (PostgreSQL, qBittorrent, Jellyfin, Grafana, etc.),
+  # kernel slabs, and page cache.
+  arcMaxBytes = (totalRamBytes - totalHugepageBytes) * 60 / 100;
+in
 {
-  boot.zfs.package = pkgs.zfs;
+  boot.zfs.package = pkgs.zfs_2_4;
  boot.initrd.kernelModules = [ "zfs" ];

  boot.kernelParams = [
-    "zfs.zfs_txg_timeout=120" # longer TXG open time = larger sequential writes
+    # 120s TXG timeout: batch more dirty data per transaction group so the
+    # HDD pool (hdds) writes larger, sequential I/Os instead of many small syncs.
+    # This is a global setting (no per-pool control); the SSD pool (tank) syncs
+    # infrequently but handles it fine since SSDs don't suffer from seek overhead.
+    "zfs.zfs_txg_timeout=120"
+
+    # Cap ARC to prevent it from claiming memory reserved for hugepages.
+    # Without this, ZFS auto-sizes c_max to ~62 GiB on a 64 GiB system,
+    # ignoring the 11.5 GiB of hugepage reservations.
+    "zfs.zfs_arc_max=${toString arcMaxBytes}"

    # vdev I/O scheduler: feed more concurrent reads to the block scheduler so
    # mq-deadline has a larger pool of requests to sort and merge into elevator sweeps.