grafana: replace custom metric collectors with community exporters

Replace three custom Prometheus textfile collector scripts with
dedicated community-maintained exporters:

- jellyfin-collector.nix (25 LoC shell) -> rebelcore/jellyfin_exporter
  Metric: jellyfin_active_streams -> count(jellyfin_now_playing_state)
  Bonus: per-session labels (user, title, device, codec info)

- qbittorrent-collector.nix (40 LoC shell) -> anriha/qbittorrent-metrics-exporter
  Metric: qbittorrent_{download,upload}_bytes_per_second -> qbit_{dl,up}speed
  Bonus: per-torrent metrics with category/tag aggregation

- intel-gpu-collector.nix + .py (130 LoC Python) -> mike1808/igpu-exporter
  Metric: intel_gpu_engine_busy_percent -> igpu_engines_busy_percent
  Bonus: persistent daemon vs oneshot timer, no streaming JSON parser

All three run as persistent daemons scraped by Prometheus, replacing
the textfile-collector pattern of systemd timers writing .prom files.
Dashboard PromQL queries updated to match new metric names.
This commit is contained in:
2026-04-03 15:23:47 -04:00
parent 479ec43b8f
commit 3f62b9c88e
12 changed files with 302 additions and 270 deletions

118
flake.lock generated
View File

@@ -102,6 +102,29 @@
"type": "github"
}
},
"fenix": {
"inputs": {
"nixpkgs": [
"qbittorrent-metrics-exporter",
"naersk",
"nixpkgs"
],
"rust-analyzer-src": "rust-analyzer-src"
},
"locked": {
"lastModified": 1752475459,
"narHash": "sha256-z6QEu4ZFuHiqdOPbYss4/Q8B0BFhacR8ts6jO/F/aOU=",
"owner": "nix-community",
"repo": "fenix",
"rev": "bf0d6f70f4c9a9cf8845f992105652173f4b617f",
"type": "github"
},
"original": {
"owner": "nix-community",
"repo": "fenix",
"type": "github"
}
},
"flake-compat": {
"flake": false,
"locked": {
@@ -170,7 +193,7 @@
},
"flake-utils": {
"inputs": {
"systems": "systems_4"
"systems": "systems_5"
},
"locked": {
"lastModified": 1731533236,
@@ -316,6 +339,26 @@
"type": "github"
}
},
"naersk": {
"inputs": {
"fenix": "fenix",
"nixpkgs": "nixpkgs_2"
},
"locked": {
"lastModified": 1763384566,
"narHash": "sha256-r+wgI+WvNaSdxQmqaM58lVNvJYJ16zoq+tKN20cLst4=",
"owner": "nix-community",
"repo": "naersk",
"rev": "d4155d6ebb70fbe2314959842f744aa7cabbbf6a",
"type": "github"
},
"original": {
"owner": "nix-community",
"ref": "master",
"repo": "naersk",
"type": "github"
}
},
"nix-minecraft": {
"inputs": {
"flake-compat": "flake-compat_3",
@@ -400,6 +443,22 @@
}
},
"nixpkgs_2": {
"locked": {
"lastModified": 1752077645,
"narHash": "sha256-HM791ZQtXV93xtCY+ZxG1REzhQenSQO020cu6rHtAPk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "be9e214982e20b8310878ac2baa063a961c1bdf6",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixpkgs-unstable",
"repo": "nixpkgs",
"type": "github"
}
},
"nixpkgs_3": {
"locked": {
"lastModified": 1764517877,
"narHash": "sha256-pp3uT4hHijIC8JUK5MEqeAWmParJrgBVzHLNfJDZxg4=",
@@ -438,6 +497,28 @@
"type": "github"
}
},
"qbittorrent-metrics-exporter": {
"inputs": {
"naersk": "naersk",
"nixpkgs": [
"nixpkgs"
],
"systems": "systems_4"
},
"locked": {
"lastModified": 1771989937,
"narHash": "sha256-bPUV4gVvSbF4VMkbLKYrfwVwzTeS+Sr41wucDj1///g=",
"ref": "refs/heads/main",
"rev": "cb94f866b7a2738532b1cae31d0b9f89adecbd54",
"revCount": 112,
"type": "git",
"url": "https://codeberg.org/anriha/qbittorrent-metrics-exporter"
},
"original": {
"type": "git",
"url": "https://codeberg.org/anriha/qbittorrent-metrics-exporter"
}
},
"root": {
"inputs": {
"agenix": "agenix",
@@ -452,6 +533,7 @@
"nixos-hardware": "nixos-hardware",
"nixpkgs": "nixpkgs",
"nixpkgs-p2pool-module": "nixpkgs-p2pool-module",
"qbittorrent-metrics-exporter": "qbittorrent-metrics-exporter",
"senior_project-website": "senior_project-website",
"srvos": "srvos",
"trackerlist": "trackerlist",
@@ -460,6 +542,23 @@
"ytbn-graphing-software": "ytbn-graphing-software"
}
},
"rust-analyzer-src": {
"flake": false,
"locked": {
"lastModified": 1752428706,
"narHash": "sha256-EJcdxw3aXfP8Ex1Nm3s0awyH9egQvB2Gu+QEnJn2Sfg=",
"owner": "rust-lang",
"repo": "rust-analyzer",
"rev": "591e3b7624be97e4443ea7b5542c191311aa141d",
"type": "github"
},
"original": {
"owner": "rust-lang",
"ref": "nightly",
"repo": "rust-analyzer",
"type": "github"
}
},
"rust-overlay": {
"inputs": {
"nixpkgs": [
@@ -598,6 +697,21 @@
"type": "github"
}
},
"systems_5": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"trackerlist": {
"flake": false,
"locked": {
@@ -666,7 +780,7 @@
"ytbn-graphing-software": {
"inputs": {
"flake-utils": "flake-utils",
"nixpkgs": "nixpkgs_2",
"nixpkgs": "nixpkgs_3",
"rust-overlay": "rust-overlay_2"
},
"locked": {

View File

@@ -83,6 +83,11 @@
url = "github:JacoMalan1/nixpkgs/create-p2pool-service";
flake = false;
};
qbittorrent-metrics-exporter = {
url = "git+https://codeberg.org/anriha/qbittorrent-metrics-exporter";
inputs.nixpkgs.follows = "nixpkgs";
};
};
outputs =

View File

@@ -43,4 +43,36 @@ final: prev: {
}
);
};
jellyfin-exporter = prev.buildGoModule rec {
pname = "jellyfin-exporter";
version = "unstable-2025-03-27";
src = prev.fetchFromGitHub {
owner = "rebelcore";
repo = "jellyfin_exporter";
rev = "8e3970cb1bdf3cb21fac099c13072bb7c1b20cf9";
hash = "sha256-wDnhepYj1MyLRZlwKfmwf4xiEEL3mgQY6V+7TnBd0MY=";
};
vendorHash = "sha256-e08u10e/wNapNZSsD/fGVN9ybMHe3sW0yDIOqI8ZcYs=";
# upstream tests require a running Jellyfin instance
doCheck = false;
meta.mainProgram = "jellyfin_exporter";
};
igpu-exporter = prev.buildGoModule rec {
pname = "igpu-exporter";
version = "unstable-2025-03-27";
src = prev.fetchFromGitHub {
owner = "mike1808";
repo = "igpu-exporter";
rev = "db2dace1a895c2b950f6d3ba1a2e46729251d124";
hash = "sha256-xWTiu26UzTZIK/6jeda+x6VePUgoWTS0AekejFdgFWs=";
};
vendorHash = "sha256-oeCSKwDKVwvYQ1fjXXTwQSXNl/upDE3WAAk680vqh3U=";
subPackages = [ "cmd" ];
postInstall = ''
mv $out/bin/cmd $out/bin/igpu-exporter
'';
meta.mainProgram = "igpu-exporter";
};
}

View File

@@ -177,6 +177,18 @@ rec {
port = 8787;
proto = "tcp";
};
jellyfin_exporter = {
port = 9594;
proto = "tcp";
};
qbittorrent_exporter = {
port = 9561;
proto = "tcp";
};
igpu_exporter = {
port = 9563;
proto = "tcp";
};
};
};

View File

@@ -387,7 +387,7 @@ let
targets = [
{
datasource = promDs;
expr = "jellyfin_active_streams";
expr = "count(jellyfin_now_playing_state) or vector(0)";
refId = "A";
}
];
@@ -439,25 +439,25 @@ let
targets = [
{
datasource = promDs;
expr = "qbittorrent_download_bytes_per_second";
expr = "sum(qbit_dlspeed) or vector(0)";
legendFormat = "Download";
refId = "A";
}
{
datasource = promDs;
expr = "qbittorrent_upload_bytes_per_second";
expr = "sum(qbit_upspeed) or vector(0)";
legendFormat = "Upload";
refId = "B";
}
{
datasource = promDs;
expr = "avg_over_time(qbittorrent_download_bytes_per_second[10m:])";
expr = "avg_over_time((sum(qbit_dlspeed) or vector(0))[10m:])";
legendFormat = "Download (10m avg)";
refId = "C";
}
{
datasource = promDs;
expr = "avg_over_time(qbittorrent_upload_bytes_per_second[10m:])";
expr = "avg_over_time((sum(qbit_upspeed) or vector(0))[10m:])";
legendFormat = "Upload (10m avg)";
refId = "D";
}
@@ -577,7 +577,7 @@ let
targets = [
{
datasource = promDs;
expr = "intel_gpu_engine_busy_percent";
expr = "igpu_engines_busy_percent";
legendFormat = "{{engine}}";
refId = "A";
}

View File

@@ -3,10 +3,8 @@
./grafana.nix
./prometheus.nix
./dashboard.nix
./jellyfin-collector.nix
./exporters.nix
./jellyfin-annotations.nix
./qbittorrent-collector.nix
./intel-gpu-collector.nix
./disk-usage-collector.nix
./llama-cpp-annotations.nix
./zfs-scrub-annotations.nix

View File

@@ -0,0 +1,112 @@
{
config,
pkgs,
inputs,
service_configs,
lib,
...
}:
let
jellyfinExporterPort = service_configs.ports.private.jellyfin_exporter.port;
qbitExporterPort = service_configs.ports.private.qbittorrent_exporter.port;
igpuExporterPort = service_configs.ports.private.igpu_exporter.port;
in
{
# -- Jellyfin Prometheus Exporter --
# Replaces custom jellyfin-collector.nix textfile timer.
# Exposes per-session metrics (jellyfin_now_playing_state) and library stats.
systemd.services.jellyfin-exporter =
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable)
{
description = "Prometheus exporter for Jellyfin";
after = [
"network.target"
"jellyfin.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = lib.getExe (
pkgs.writeShellApplication {
name = "jellyfin-exporter-wrapper";
runtimeInputs = [ pkgs.jellyfin-exporter ];
text = ''
exec jellyfin_exporter \
--jellyfin.address=http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port} \
--jellyfin.token="$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")" \
--web.listen-address=127.0.0.1:${toString jellyfinExporterPort}
'';
}
);
Restart = "on-failure";
RestartSec = "10s";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
MemoryDenyWriteExecute = true;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
# -- qBittorrent Prometheus Exporter --
# Replaces custom qbittorrent-collector.nix textfile timer.
# Exposes per-torrent metrics (qbit_dlspeed, qbit_upspeed) and aggregate stats.
# qBittorrent runs in a VPN namespace; the exporter reaches it via namespace address.
systemd.services.qbittorrent-exporter =
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable)
{
description = "Prometheus exporter for qBittorrent";
after = [
"network.target"
"qbittorrent.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart =
lib.getExe' inputs.qbittorrent-metrics-exporter.packages.${pkgs.system}.default
"qbittorrent-metrics-exporter";
Restart = "on-failure";
RestartSec = "10s";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
};
environment = {
HOST = "127.0.0.1";
PORT = toString qbitExporterPort;
SCRAPE_INTERVAL = "15";
BACKEND = "in-memory";
# qBittorrent has AuthSubnetWhitelist=0.0.0.0/0, so no real password needed.
# The exporter still expects the env var to be set.
QBITTORRENT_PASSWORD = "unused";
QBITTORRENT_USERNAME = "admin";
TORRENT_HOSTS = "qbit:main=http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}|http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}";
RUST_LOG = "warn";
};
};
# -- Intel GPU Prometheus Exporter --
# Replaces custom intel-gpu-collector.nix + intel-gpu-collector.py textfile timer.
# Exposes engine busy%, frequency, and RC6 metrics via /metrics.
# Requires privileged access to GPU debug interfaces (intel_gpu_top).
systemd.services.igpu-exporter = lib.mkIf config.services.grafana.enable {
description = "Prometheus exporter for Intel integrated GPU";
wantedBy = [ "multi-user.target" ];
path = [ pkgs.intel-gpu-tools ];
serviceConfig = {
ExecStart = lib.getExe pkgs.igpu-exporter;
Restart = "on-failure";
RestartSec = "10s";
# intel_gpu_top requires root-level access to GPU debug interfaces
ProtectHome = true;
PrivateTmp = true;
};
environment = {
PORT = toString igpuExporterPort;
REFRESH_PERIOD_MS = "30000";
};
};
}

View File

@@ -1,38 +0,0 @@
{
config,
pkgs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
intelGpuCollector = pkgs.writeShellApplication {
name = "intel-gpu-collector";
runtimeInputs = with pkgs; [
python3
intel-gpu-tools
];
text = ''
exec python3 ${./intel-gpu-collector.py}
'';
};
in
lib.mkIf config.services.grafana.enable {
systemd.services.intel-gpu-collector = {
description = "Collect Intel GPU metrics for Prometheus";
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe intelGpuCollector;
};
environment.TEXTFILE = "${textfileDir}/intel-gpu.prom";
};
systemd.timers.intel-gpu-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "10s";
};
};
}

View File

@@ -1,107 +0,0 @@
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
import time
TEXTFILE = os.environ.get(
"TEXTFILE",
"/var/lib/prometheus-node-exporter-textfiles/intel-gpu.prom",
)
def read_one_sample():
try:
proc = subprocess.Popen(
["intel_gpu_top", "-J", "-s", "1000"],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
buf = b""
depth = 0
in_obj = False
deadline = time.monotonic() + 8.0
try:
while time.monotonic() < deadline:
byte = proc.stdout.read(1)
if not byte:
break
if byte == b"{":
in_obj = True
depth += 1
if in_obj:
buf += byte
if in_obj and byte == b"}":
depth -= 1
if depth == 0:
break
finally:
proc.terminate()
proc.wait()
if not buf:
return None
try:
return json.loads(buf)
except json.JSONDecodeError:
print("Malformed JSON from intel_gpu_top", file=sys.stderr)
return None
except Exception as e:
print(f"intel_gpu_top unavailable: {e}", file=sys.stderr)
return None
def write_empty_metrics():
"""Write zero-valued metrics so Prometheus doesn't see stale data."""
lines = [
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
"# TYPE intel_gpu_engine_busy_percent gauge",
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
"# TYPE intel_gpu_frequency_mhz gauge",
"intel_gpu_frequency_mhz 0",
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
"# TYPE intel_gpu_rc6_percent gauge",
"intel_gpu_rc6_percent 0",
]
tmp = TEXTFILE + ".tmp"
with open(tmp, "w") as f:
f.write("\n".join(lines) + "\n")
os.replace(tmp, TEXTFILE)
def write_metrics(sample):
lines = [
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
"# TYPE intel_gpu_engine_busy_percent gauge",
]
for engine, data in sample.get("engines", {}).items():
lines.append(
f'intel_gpu_engine_busy_percent{{engine="{engine}"}} {data.get("busy", 0)}'
)
freq = sample.get("frequency", {})
lines += [
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
"# TYPE intel_gpu_frequency_mhz gauge",
f'intel_gpu_frequency_mhz {freq.get("actual", 0)}',
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
"# TYPE intel_gpu_rc6_percent gauge",
f'intel_gpu_rc6_percent {sample.get("rc6", {}).get("value", 0)}',
]
tmp = TEXTFILE + ".tmp"
with open(tmp, "w") as f:
f.write("\n".join(lines) + "\n")
os.replace(tmp, TEXTFILE)
def main():
sample = read_one_sample()
if sample is None:
print("Failed to read intel_gpu_top sample", file=sys.stderr)
write_empty_metrics()
sys.exit(0)
write_metrics(sample)
if __name__ == "__main__":
main()

View File

@@ -1,54 +0,0 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
jellyfinCollector = pkgs.writeShellApplication {
name = "jellyfin-metrics-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
API_KEY=$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")
JELLYFIN="http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}"
if response=$(curl -sf --max-time 5 "''${JELLYFIN}/Sessions?api_key=''${API_KEY}"); then
active_streams=$(echo "$response" | jq '[.[] | select(.NowPlayingItem != null)] | length')
else
active_streams=0
fi
{
echo '# HELP jellyfin_active_streams Number of currently active Jellyfin streams'
echo '# TYPE jellyfin_active_streams gauge'
echo "jellyfin_active_streams $active_streams"
} > "${textfileDir}/jellyfin.prom.$$.tmp"
mv "${textfileDir}/jellyfin.prom.$$.tmp" "${textfileDir}/jellyfin.prom"
'';
};
in
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) {
systemd.services.jellyfin-metrics-collector = {
description = "Collect Jellyfin metrics for Prometheus";
after = [ "network.target" ];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe jellyfinCollector;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
systemd.timers.jellyfin-metrics-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "5s";
};
};
}

View File

@@ -71,6 +71,24 @@ in
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; }
];
}
{
job_name = "jellyfin";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.jellyfin_exporter.port}" ]; }
];
}
{
job_name = "qbittorrent";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.qbittorrent_exporter.port}" ]; }
];
}
{
job_name = "igpu";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
];
}
];
};

View File

@@ -1,60 +0,0 @@
{
config,
pkgs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
qbittorrentCollector = pkgs.writeShellApplication {
name = "qbittorrent-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
QBIT="http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}"
OUT="${textfileDir}/qbittorrent.prom"
if info=$(curl -sf --max-time 5 "''${QBIT}/api/v2/transfer/info"); then
dl=$(echo "$info" | jq '.dl_info_speed')
ul=$(echo "$info" | jq '.up_info_speed')
else
dl=0
ul=0
fi
{
echo '# HELP qbittorrent_download_bytes_per_second Current download speed in bytes/s'
echo '# TYPE qbittorrent_download_bytes_per_second gauge'
echo "qbittorrent_download_bytes_per_second $dl"
echo '# HELP qbittorrent_upload_bytes_per_second Current upload speed in bytes/s'
echo '# TYPE qbittorrent_upload_bytes_per_second gauge'
echo "qbittorrent_upload_bytes_per_second $ul"
} > "''${OUT}.tmp"
mv "''${OUT}.tmp" "$OUT"
'';
};
in
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable) {
systemd.services.qbittorrent-collector = {
description = "Collect qBittorrent transfer metrics for Prometheus";
after = [
"network.target"
"qbittorrent.service"
];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe qbittorrentCollector;
};
};
systemd.timers.qbittorrent-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/15";
RandomizedDelaySec = "3s";
};
};
}