zfs tuning

2026-04-15 13:25:38 -04:00
parent 48efd7fcf7
commit aecd9002b0
1 changed files with 26 additions and 2 deletions
--- a/modules/zfs.nix
+++ b/modules/zfs.nix
@@ -1,15 +1,39 @@
 {
  config,
  lib,
  service_configs,
  pkgs,
  ...
 }:
 let
  # Total RAM in bytes (from /proc/meminfo: 65775836 KiB).
  totalRamBytes = 65775836 * 1024;
  # Hugepage reservations that the kernel carves out before ZFS can use them.
  hugepages2mBytes = service_configs.hugepages_2m.total_pages * 2 * 1024 * 1024;
  hugepages1gBytes = 3 * 1024 * 1024 * 1024; # 3x 1G pages for RandomX (xmrig.nix)
  totalHugepageBytes = hugepages2mBytes + hugepages1gBytes;
  # ARC max: 60% of RAM remaining after hugepages. Leaves headroom for
  # application RSS (PostgreSQL, qBittorrent, Jellyfin, Grafana, etc.),
  # kernel slabs, and page cache.
  arcMaxBytes = (totalRamBytes - totalHugepageBytes) * 60 / 100;
 in
 {
-  boot.zfs.package = pkgs.zfs;
+  boot.zfs.package = pkgs.zfs_2_4;
  boot.initrd.kernelModules = [ "zfs" ];
  boot.kernelParams = [
-    "zfs.zfs_txg_timeout=120" # longer TXG open time = larger sequential writes
+    # 120s TXG timeout: batch more dirty data per transaction group so the
    # HDD pool (hdds) writes larger, sequential I/Os instead of many small syncs.
    # This is a global setting (no per-pool control); the SSD pool (tank) syncs
    # infrequently but handles it fine since SSDs don't suffer from seek overhead.
    "zfs.zfs_txg_timeout=120"
    # Cap ARC to prevent it from claiming memory reserved for hugepages.
    # Without this, ZFS auto-sizes c_max to ~62 GiB on a 64 GiB system,
    # ignoring the 11.5 GiB of hugepage reservations.
    "zfs.zfs_arc_max=${toString arcMaxBytes}"
    # vdev I/O scheduler: feed more concurrent reads to the block scheduler so
    # mq-deadline has a larger pool of requests to sort and merge into elevator sweeps.