fix mq-deadline for hdds: 3

This commit is contained in:
2026-03-30 21:44:05 -04:00
parent 7de24b8870
commit eaeeed7f45

View File

@@ -5,6 +5,20 @@
service_configs,
...
}:
let
hddTuneIosched = pkgs.writeShellScript "hdd-tune-iosched" ''
# Called by udev with the partition kernel name (e.g. sdb1).
# Derives the parent disk and applies mq-deadline iosched params.
parent=''${1%%[0-9]*}
dev="/sys/block/$parent"
[ -d "$dev/queue/iosched" ] || exit 0
echo 15000 > "$dev/queue/iosched/read_expire"
echo 15000 > "$dev/queue/iosched/write_expire"
echo 128 > "$dev/queue/iosched/fifo_batch"
echo 16 > "$dev/queue/iosched/writes_starved"
echo 4096 > "$dev/queue/max_sectors_kb" 2>/dev/null || true
'';
in
{
boot.initrd.availableKernelModules = [
"xhci_pci"
@@ -30,48 +44,13 @@
# fifo_batch=128 keeps sweeps long; writes_starved=16 heavily favors reads.
# 4 MiB readahead matches libtorrent piece extent affinity for sequential prefetch.
#
# This runs as a systemd oneshot rather than udev rules because the NixOS ZFS module
# hardcodes a udev rule that forces scheduler="none" on all ZFS member partitions'
# parent disks, overriding any scheduler set via udev on the disk event.
systemd.services.hdd-io-tuning = {
description = "HDD I/O scheduler and queue tuning";
after = [
"zfs-import.target"
"systemd-udev-settle.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
};
path = with pkgs; [
coreutils
gawk
zfs
];
script = ''
# Only tune disks in the hdds pool not all rotational disks.
# zpool status gives by-id device names; we resolve to /sys/block/<name>.
zpool status hdds | awk '/^\t/ && $1 ~ /^(ata-|nvme-|scsi-)/ {print $1}' | while read -r id; do
link="/dev/disk/by-id/$id"
[ -L "$link" ] || continue
name=$(basename "$(readlink -f "$link")")
dev="/sys/block/$name"
[ -d "$dev" ] || continue
echo mq-deadline > "$dev/queue/scheduler"
echo 4096 > "$dev/queue/read_ahead_kb"
echo 512 > "$dev/queue/nr_requests"
echo 15000 > "$dev/queue/iosched/read_expire"
echo 15000 > "$dev/queue/iosched/write_expire"
echo 128 > "$dev/queue/iosched/fifo_batch"
echo 16 > "$dev/queue/iosched/writes_starved"
echo 4096 > "$dev/queue/max_sectors_kb" 2>/dev/null || true
echo "Tuned $id -> $name: mq-deadline, 4M readahead, 15s deadlines"
done
'';
};
# The NixOS ZFS module hardcodes a udev rule that forces scheduler="none" on all
# ZFS member partitions' parent disks (on both add AND change events). We counter
# it with lib.mkAfter so our rule appears after theirs in 99-local.rules — our
# rule matches the same partition events and sets mq-deadline back, then a RUN
# script applies the iosched params. Only targets rotational, non-removable disks
# (i.e. HDDs, not SSDs or USB).
services.udev.extraRules = lib.mkAfter ''
ACTION=="add|change", KERNEL=="sd[a-z]*[0-9]*", ENV{ID_FS_TYPE}=="zfs_member", ATTR{../queue/rotational}=="1", ATTR{../removable}=="0", ATTR{../queue/scheduler}="mq-deadline", ATTR{../queue/read_ahead_kb}="4096", ATTR{../queue/nr_requests}="512", RUN+="${hddTuneIosched} %k"
'';
}