Compare commits

...

5 Commits

Author SHA1 Message Date
20ca945436 qbt: create timer to flush WAL
All checks were successful
Build and Deploy / deploy (push) Successful in 2m45s
2026-04-15 18:46:26 -04:00
aecd9002b0 zfs tuning 2026-04-15 18:25:56 -04:00
48efd7fcf7 qbittorent: fix (?) perms 2026-04-15 18:25:56 -04:00
0289ce0856 xmrig-auto-pause: tweak resume_threshold 2026-04-15 18:25:56 -04:00
5b98e6197e kernel: rollback to 6.12
Major ZFS issue causing deadlocks on my system:
https://github.com/openzfs/zfs/issues/18426
2026-04-15 18:25:55 -04:00
4 changed files with 67 additions and 9 deletions

View File

@@ -133,8 +133,10 @@
boot.kernel.sysctl."vm.nr_hugepages" = service_configs.hugepages_2m.total_pages; boot.kernel.sysctl."vm.nr_hugepages" = service_configs.hugepages_2m.total_pages;
boot = { boot = {
# 6.18 LTS until 2027 # 6.12 LTS until 2027-03. Kernel 6.18 causes a reproducible ZFS deadlock
kernelPackages = pkgs.linuxPackages_6_18; # in dbuf_evict due to page allocator changes (__free_frozen_pages).
# https://github.com/openzfs/zfs/issues/18426
kernelPackages = pkgs.linuxPackages_6_12;
loader = { loader = {
# Use the systemd-boot EFI boot loader. # Use the systemd-boot EFI boot loader.

View File

@@ -1,15 +1,39 @@
{ {
config, config,
lib,
service_configs, service_configs,
pkgs, pkgs,
... ...
}: }:
let
# Total RAM in bytes (from /proc/meminfo: 65775836 KiB).
totalRamBytes = 65775836 * 1024;
# Hugepage reservations that the kernel carves out before ZFS can use them.
hugepages2mBytes = service_configs.hugepages_2m.total_pages * 2 * 1024 * 1024;
hugepages1gBytes = 3 * 1024 * 1024 * 1024; # 3x 1G pages for RandomX (xmrig.nix)
totalHugepageBytes = hugepages2mBytes + hugepages1gBytes;
# ARC max: 60% of RAM remaining after hugepages. Leaves headroom for
# application RSS (PostgreSQL, qBittorrent, Jellyfin, Grafana, etc.),
# kernel slabs, and page cache.
arcMaxBytes = (totalRamBytes - totalHugepageBytes) * 60 / 100;
in
{ {
boot.zfs.package = pkgs.zfs; boot.zfs.package = pkgs.zfs_2_4;
boot.initrd.kernelModules = [ "zfs" ]; boot.initrd.kernelModules = [ "zfs" ];
boot.kernelParams = [ boot.kernelParams = [
"zfs.zfs_txg_timeout=120" # longer TXG open time = larger sequential writes # 120s TXG timeout: batch more dirty data per transaction group so the
# HDD pool (hdds) writes larger, sequential I/Os instead of many small syncs.
# This is a global setting (no per-pool control); the SSD pool (tank) syncs
# infrequently but handles it fine since SSDs don't suffer from seek overhead.
"zfs.zfs_txg_timeout=120"
# Cap ARC to prevent it from claiming memory reserved for hugepages.
# Without this, ZFS auto-sizes c_max to ~62 GiB on a 64 GiB system,
# ignoring the 11.5 GiB of hugepage reservations.
"zfs.zfs_arc_max=${toString arcMaxBytes}"
# vdev I/O scheduler: feed more concurrent reads to the block scheduler so # vdev I/O scheduler: feed more concurrent reads to the block scheduler so
# mq-deadline has a larger pool of requests to sort and merge into elevator sweeps. # mq-deadline has a larger pool of requests to sort and merge into elevator sweeps.

View File

@@ -26,11 +26,12 @@ lib.mkIf config.services.xmrig.enable {
environment = { environment = {
POLL_INTERVAL = "3"; POLL_INTERVAL = "3";
GRACE_PERIOD = "15"; GRACE_PERIOD = "15";
# This server's background services (qbittorrent, monero, bazarr, etc.) # Background services (qbittorrent, bitmagnet, postgresql, etc.) produce
# produce 5-14% non-nice CPU during normal operation. Thresholds must # 15-25% non-nice CPU during normal operation. The stop threshold must
# sit above that noise floor. # sit above transient spikes; the resume threshold must be below the
# steady-state floor to avoid restarting xmrig while services are active.
CPU_STOP_THRESHOLD = "40"; CPU_STOP_THRESHOLD = "40";
CPU_RESUME_THRESHOLD = "30"; CPU_RESUME_THRESHOLD = "10";
STARTUP_COOLDOWN = "10"; STARTUP_COOLDOWN = "10";
STATE_DIR = "/var/lib/xmrig-auto-pause"; STATE_DIR = "/var/lib/xmrig-auto-pause";
}; };

View File

@@ -23,7 +23,9 @@ in
(lib.serviceFilePerms "qbittorrent" [ (lib.serviceFilePerms "qbittorrent" [
# 0770: group (media) needs write to delete files during upgrades — # 0770: group (media) needs write to delete files during upgrades —
# Radarr/Sonarr must unlink the old file before placing the new one. # Radarr/Sonarr must unlink the old file before placing the new one.
"Z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.SavePath} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group}" # Non-recursive (z not Z): UMask=0007 ensures new files get correct perms.
# A recursive Z rule would walk millions of files on the HDD pool at every boot.
"z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.SavePath} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group}"
"z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.TempPath} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}" "z ${config.services.qbittorrent.serverConfig.Preferences.Downloads.TempPath} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
"Z ${config.services.qbittorrent.profileDir} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}" "Z ${config.services.qbittorrent.profileDir} 0700 ${config.services.qbittorrent.user} ${config.services.qbittorrent.group}"
]) ])
@@ -162,6 +164,35 @@ in
_: path: "d ${path} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group} -" _: path: "d ${path} 0770 ${config.services.qbittorrent.user} ${service_configs.media_group} -"
) service_configs.torrent.categories; ) service_configs.torrent.categories;
# Periodically checkpoint qBittorrent's SQLite WAL (Write-Ahead Log).
# qBittorrent holds a read transaction open for its entire lifetime,
# preventing SQLite's auto-checkpoint from running. The WAL grows
# unbounded (observed: 405 MB) and must be replayed on next startup,
# causing 10+ minute "internal preparations" hangs.
# A second sqlite3 connection can checkpoint concurrently and safely.
# See: https://github.com/qbittorrent/qBittorrent/issues/20433
systemd.services.qbittorrent-wal-checkpoint = {
description = "Checkpoint qBittorrent SQLite WAL";
after = [ "qbittorrent.service" ];
requires = [ "qbittorrent.service" ];
serviceConfig = {
Type = "oneshot";
ExecStart = "${pkgs.sqlite}/bin/sqlite3 ${config.services.qbittorrent.profileDir}/qBittorrent/data/torrents.db 'PRAGMA wal_checkpoint(TRUNCATE);'";
User = config.services.qbittorrent.user;
Group = config.services.qbittorrent.group;
};
};
systemd.timers.qbittorrent-wal-checkpoint = {
description = "Periodically checkpoint qBittorrent SQLite WAL";
wantedBy = [ "timers.target" ];
timerConfig = {
OnUnitActiveSec = "4h";
OnBootSec = "30min";
RandomizedDelaySec = "10min";
};
};
users.users.${config.services.qbittorrent.user}.extraGroups = [ users.users.${config.services.qbittorrent.user}.extraGroups = [
service_configs.media_group service_configs.media_group
]; ];