Compare commits

...

3 Commits

Author SHA1 Message Date
f342521d46 llama-cpp: re-add w/ turboquant
All checks were successful
Build and Deploy / deploy (push) Successful in 28m52s
2026-04-02 13:42:39 -04:00
7e779ca0f7 power optimizations 2026-04-02 13:13:38 -04:00
06b2016bd6 recyclarr: things 2026-04-01 20:37:18 -04:00
7 changed files with 246 additions and 17 deletions

View File

@@ -20,6 +20,7 @@
./modules/no-rgb.nix
./modules/security.nix
./modules/ntfy-alerts.nix
./modules/power.nix
./services/postgresql.nix
./services/jellyfin.nix
@@ -46,6 +47,8 @@
./services/soulseek.nix
./services/llama-cpp.nix
./services/ups.nix
./services/monitoring.nix
./services/jellyfin-annotations.nix
@@ -91,13 +94,6 @@
services.kmscon.enable = true;
systemd.targets = {
sleep.enable = false;
suspend.enable = false;
hibernate.enable = false;
hybrid-sleep.enable = false;
};
# Disable serial getty on ttyS0 to prevent dmesg warnings
systemd.services."serial-getty@ttyS0".enable = false;
@@ -109,12 +105,6 @@
enable = false;
};
powerManagement = {
powertop.enable = true;
enable = true;
cpuFreqGovernor = "powersave";
};
# https://github.com/NixOS/nixpkgs/issues/101459#issuecomment-758306434
security.pam.loginLimits = [
{

53
flake.lock generated
View File

@@ -150,6 +150,24 @@
"type": "github"
}
},
"flake-parts": {
"inputs": {
"nixpkgs-lib": "nixpkgs-lib"
},
"locked": {
"lastModified": 1730504689,
"narHash": "sha256-hgmguH29K2fvs9szpq2r3pz2/8cJd2LPS+b4tfNFCwE=",
"owner": "hercules-ci",
"repo": "flake-parts",
"rev": "506278e768c2a08bec68eb62932193e341f55c90",
"type": "github"
},
"original": {
"owner": "hercules-ci",
"repo": "flake-parts",
"type": "github"
}
},
"flake-utils": {
"inputs": {
"systems": "systems_4"
@@ -276,6 +294,28 @@
"type": "github"
}
},
"llamacpp": {
"inputs": {
"flake-parts": "flake-parts",
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1775101360,
"narHash": "sha256-X1cyWED8lmsGKFc7Pb6nGJ8EVzpPqi5iKcyL8NVVIe8=",
"owner": "TheTom",
"repo": "llama-cpp-turboquant",
"rev": "04eeabb0d344b54ca12d4140b8af8c236ffe7beb",
"type": "github"
},
"original": {
"owner": "TheTom",
"ref": "feature/turboquant-kv-cache",
"repo": "llama-cpp-turboquant",
"type": "github"
}
},
"nix-minecraft": {
"inputs": {
"flake-compat": "flake-compat_3",
@@ -330,6 +370,18 @@
"type": "github"
}
},
"nixpkgs-lib": {
"locked": {
"lastModified": 1730504152,
"narHash": "sha256-lXvH/vOfb4aGYyvFmZK/HlsNsr/0CVWlwYvo2rxJk3s=",
"type": "tarball",
"url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz"
},
"original": {
"type": "tarball",
"url": "https://github.com/NixOS/nixpkgs/archive/cc2f28000298e1269cea6612cd06ec9979dd5d7f.tar.gz"
}
},
"nixpkgs-p2pool-module": {
"flake": false,
"locked": {
@@ -395,6 +447,7 @@
"home-manager": "home-manager",
"impermanence": "impermanence",
"lanzaboote": "lanzaboote",
"llamacpp": "llamacpp",
"nix-minecraft": "nix-minecraft",
"nixos-hardware": "nixos-hardware",
"nixpkgs": "nixpkgs",

View File

@@ -28,6 +28,11 @@
inputs.nixpkgs.follows = "nixpkgs";
};
llamacpp = {
url = "github:TheTom/llama-cpp-turboquant/feature/turboquant-kv-cache";
inputs.nixpkgs.follows = "nixpkgs";
};
srvos = {
url = "github:nix-community/srvos";
inputs.nixpkgs.follows = "nixpkgs";

93
modules/power.nix Normal file
View File

@@ -0,0 +1,93 @@
{
lib,
pkgs,
...
}:
{
powerManagement = {
enable = true;
powertop.enable = true;
cpuFreqGovernor = "powersave";
};
# Always-on server: disable all sleep targets.
systemd.targets = {
sleep.enable = false;
suspend.enable = false;
hibernate.enable = false;
hybrid-sleep.enable = false;
};
boot.kernelParams = [
# Disable NMI watchdog at boot. Eliminates periodic perf-counter interrupts
# across all cores (~1 W). Safe: apcupsd provides hardware hang detection
# via UPS, and softlockup watchdog remains active.
"nmi_watchdog=0"
# Route kernel work items to already-busy CPUs rather than waking idle ones.
# Reduces C-state exit frequency at the cost of slightly higher latency on
# work items -- irrelevant for a server whose latency-sensitive paths are
# all in userspace (caddy, jellyfin).
"workqueue.power_efficient=1"
# Force PCIe ASPM on even if the BIOS doesn't advertise support. ASRock
# B550M Pro4 BIOS defaults are conservative; the Zen 3 root complex and
# all downstream devices (NVMe, AHCI, Intel NIC) support L1 substates.
# powertop auto-tune sets the policy to powersupersave at runtime, but
# without `force` the kernel may refuse to enable ASPM at all if the BIOS
# opted out, making the policy write a no-op.
"pcie_aspm=force"
];
boot.kernel.sysctl = {
# Belt-and-suspenders: also set via boot param, but sysctl ensures it
# stays off if anything re-enables it at runtime.
"kernel.nmi_watchdog" = 0;
};
# Server has no audio consumers. Power-gate the HDA codec at module load
# rather than waiting for powertop auto-tune to do it after boot.
boot.extraModprobeConfig = ''
options snd_hda_intel power_save=1 power_save_controller=Y
'';
# Apply sysfs power knobs that powertop --auto-tune cannot reach (hardened
# kernel blocks debugfs mount, so powertop silently skips ASPM policy and
# may only lower EPP to balance_power instead of power).
#
# AMD pstate EPP "power": deepest P-states, fastest core parking. Safe because:
# - xmrig runs at Nice=19 / CPUSchedulingPolicy=idle and tolerates latency
# - web services (caddy, jellyfin) are I/O-bound; the ~50 us extra C-state
# exit latency is invisible behind network RTT
# - Minecraft server benefits from single-thread boost, which pstate still
# provides on demand even in "power" mode (just with slightly slower ramp)
#
# ASPM powersupersave: deepest PCIe link power states (L1.1/L1.2). The
# pcie_aspm=force boot param enables ASPM, but the runtime policy defaults
# to "default" which only uses L0s. powersupersave adds L1 substates for
# all downstream devices (NVMe, AHCI, NIC).
systemd.services.power-tune = {
description = "Apply power-saving sysfs knobs (EPP, ASPM policy)";
after = [ "multi-user.target" ];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
Type = "oneshot";
RemainAfterExit = true;
ExecStart = lib.getExe (
pkgs.writeShellApplication {
name = "power-tune";
text = ''
# AMD pstate energy performance preference
for epp in /sys/devices/system/cpu/cpu*/cpufreq/energy_performance_preference; do
[ -f "$epp" ] && echo power > "$epp"
done
# PCIe ASPM policy
aspm=/sys/module/pcie_aspm/parameters/policy
[ -f "$aspm" ] && echo powersupersave > "$aspm"
'';
}
);
};
};
}

View File

@@ -169,6 +169,10 @@ rec {
port = 9162;
proto = "tcp";
};
llama_cpp = {
port = 6688;
proto = "tcp";
};
};
};

View File

@@ -99,7 +99,7 @@ in
}
];
}
# x265 (HD) - override template -10000 penalty
# x265 (HD) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "dc98083864ea246d05a42df0d05f81cc" ];
assign_scores_to = [
@@ -109,7 +109,7 @@ in
}
];
}
# x265 (no HDR/DV) - override template -10000 penalty
# x265 (no HDR/DV) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "839bea857ed2c0a8e084f3cbdbd65ecb" ];
assign_scores_to = [
@@ -119,6 +119,28 @@ in
}
];
}
# Codec ranking: AV1 (20) > HEVC (10) > AVC (0)
#
# Positive scores only -- nothing drops below min_format_score.
# AVC stays at 0 implicitly (no custom format adds or removes score).
{
trash_ids = [ "cae4ca30163749b891686f95532519bd" ]; # AV1
assign_scores_to = [
{
name = "Remux + WEB 2160p";
score = 20;
}
];
}
{
trash_ids = [ "9170d55c319f4fe40da8711ba9d8050d" ]; # x265
assign_scores_to = [
{
name = "Remux + WEB 2160p";
score = 10;
}
];
}
];
};
@@ -177,7 +199,7 @@ in
}
];
}
# x265 (HD) - override template -10000 penalty
# x265 (HD) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "47435ece6b99a0b477caf360e79ba0bb" ];
assign_scores_to = [
@@ -187,7 +209,7 @@ in
}
];
}
# x265 (no HDR/DV) - override template -10000 penalty
# x265 (no HDR/DV) - override template -10000 penalty for non-2160p HEVC
{
trash_ids = [ "9b64dff695c2115facf1b6ea59c9bd07" ];
assign_scores_to = [
@@ -197,6 +219,28 @@ in
}
];
}
# Codec ranking: AV1 (20) > HEVC (10) > AVC (0)
#
# Positive scores only -- nothing drops below min_format_score.
# AVC stays at 0 implicitly (no custom format adds or removes score).
{
trash_ids = [ "15a05bc7c1a36e2b57fd628f8977e2fc" ]; # AV1
assign_scores_to = [
{
name = "WEB-2160p";
score = 20;
}
];
}
{
trash_ids = [ "c9eafd50846d299b862ca9bb6ea91950" ]; # x265
assign_scores_to = [
{
name = "WEB-2160p";
score = 10;
}
];
}
];
};
};

40
services/llama-cpp.nix Normal file
View File

@@ -0,0 +1,40 @@
{
pkgs,
service_configs,
config,
inputs,
lib,
...
}:
{
services.llama-cpp = {
enable = true;
model = toString (
pkgs.fetchurl {
url = "https://huggingface.co/Jackrong/Qwen3.5-9B-Claude-4.6-Opus-Reasoning-Distilled-v2-GGUF/resolve/main/Qwen3.5-9B.Q4_K_M.gguf";
sha256 = "8fbbc7b04a7d4b052d14b7aa97c8bf2014d39ceca8c2baaa043711712ba71ccc";
}
);
port = service_configs.ports.private.llama_cpp.port;
host = "0.0.0.0";
package = (lib.optimizePackage inputs.llamacpp.packages.${pkgs.system}.vulkan);
extraFlags = [
"-ngl"
"12"
"-c"
"16384"
"-ctk"
"q8_0"
"-ctv"
"turbo3"
];
};
# have to do this in order to get vulkan to work
systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
import ${config.age.secrets.caddy_auth.path}
reverse_proxy :${toString config.services.llama-cpp.port}
'';
}