jellyfin: fix Arc A380 VAAPI transcode (ASPM power gating)
Some checks failed
Build and Deploy / deploy (push) Failing after 1m16s

Arc A380 GPU (07:00.0) becomes unreachable (MMIO returns 0xFFFFFFFF)
when PCIe ASPM powersupersave puts it into L1.1/L1.2 substates.
Both i915 and xe drivers hit the same hardware failure.

Fix: disable runtime PM for the GPU in power-tune, run after powertop
so the override sticks. Use i915 driver (xe has iHD buffer mapping
failures on this GPU/kernel 6.12 combination).
This commit is contained in:
2026-04-07 18:17:52 -04:00
parent b4f62523d9
commit da62ed557c
2 changed files with 19 additions and 11 deletions

View File

@@ -120,15 +120,12 @@
};
};
# Arc A380 (56a5) is not in xe's default probe list on kernel 6.12.
# Without force_probe, xe refuses to bind and i915 claims the device
# instead -- producing broken MMIO (forcewake 0xFFFFFFFF) and crashing
# every VAAPI transcode.
hardware.intelgpu.driver = "xe";
boot.kernelParams = [
"xe.force_probe=56a5"
"i915.force_probe=!56a5"
];
# Intel Arc A380 (DG2, 56a5) uses the i915 driver on kernel 6.12.
# The xe driver's iHD media driver integration has buffer mapping
# failures on this GPU/kernel combination. i915 works correctly for
# VAAPI transcode as long as ASPM deep states are disabled for the
# GPU (see modules/power.nix).
hardware.intelgpu.driver = "i915";
# Per-service 2MB hugepage budget calculated in service-configs.nix.
boot.kernel.sysctl."vm.nr_hugepages" = service_configs.hugepages_2m.total_pages;

View File

@@ -65,10 +65,14 @@
# ASPM powersupersave: deepest PCIe link power states (L1.1/L1.2). The
# pcie_aspm=force boot param enables ASPM, but the runtime policy defaults
# to "default" which only uses L0s. powersupersave adds L1 substates for
# all downstream devices (NVMe, AHCI, NIC).
# all downstream devices (NVMe, AHCI, NIC). The Intel Arc A380 GPU is
# excluded -- it hangs on L1 substate re-entry (MMIO returns 0xFFFFFFFF).
systemd.services.power-tune = {
description = "Apply power-saving sysfs knobs (EPP, ASPM policy)";
after = [ "multi-user.target" ];
after = [
"multi-user.target"
"powertop.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
@@ -85,6 +89,13 @@
# PCIe ASPM policy
aspm=/sys/module/pcie_aspm/parameters/policy
[ -f "$aspm" ] && echo powersupersave > "$aspm"
# Intel Arc A380 (07:00.0) hangs when PCIe link enters L1
# substates or runtime PM suspends it. Force the device to stay
# active. This runs after powertop --auto-tune (which sets
# power/control=auto on every device) so the override sticks.
gpu=/sys/bus/pci/devices/0000:07:00.0/power/control
[ -f "$gpu" ] && echo on > "$gpu"
'';
}
);