grafana: replace custom metric collectors with community exporters

Replace three custom Prometheus textfile collector scripts with
dedicated community-maintained exporters:

- jellyfin-collector.nix (25 LoC shell) -> rebelcore/jellyfin_exporter
  Metric: jellyfin_active_streams -> count(jellyfin_now_playing_state)
  Bonus: per-session labels (user, title, device, codec info)

- qbittorrent-collector.nix (40 LoC shell) -> anriha/qbittorrent-metrics-exporter
  Metric: qbittorrent_{download,upload}_bytes_per_second -> qbit_{dl,up}speed
  Bonus: per-torrent metrics with category/tag aggregation

- intel-gpu-collector.nix + .py (130 LoC Python) -> mike1808/igpu-exporter
  Metric: intel_gpu_engine_busy_percent -> igpu_engines_busy_percent
  Bonus: persistent daemon vs oneshot timer, no streaming JSON parser

All three run as persistent daemons scraped by Prometheus, replacing
the textfile-collector pattern of systemd timers writing .prom files.
Dashboard PromQL queries updated to match new metric names.
This commit is contained in:
2026-04-03 15:23:47 -04:00
parent 479ec43b8f
commit 3f62b9c88e
12 changed files with 302 additions and 270 deletions

View File

@@ -387,7 +387,7 @@ let
targets = [
{
datasource = promDs;
expr = "jellyfin_active_streams";
expr = "count(jellyfin_now_playing_state) or vector(0)";
refId = "A";
}
];
@@ -439,25 +439,25 @@ let
targets = [
{
datasource = promDs;
expr = "qbittorrent_download_bytes_per_second";
expr = "sum(qbit_dlspeed) or vector(0)";
legendFormat = "Download";
refId = "A";
}
{
datasource = promDs;
expr = "qbittorrent_upload_bytes_per_second";
expr = "sum(qbit_upspeed) or vector(0)";
legendFormat = "Upload";
refId = "B";
}
{
datasource = promDs;
expr = "avg_over_time(qbittorrent_download_bytes_per_second[10m:])";
expr = "avg_over_time((sum(qbit_dlspeed) or vector(0))[10m:])";
legendFormat = "Download (10m avg)";
refId = "C";
}
{
datasource = promDs;
expr = "avg_over_time(qbittorrent_upload_bytes_per_second[10m:])";
expr = "avg_over_time((sum(qbit_upspeed) or vector(0))[10m:])";
legendFormat = "Upload (10m avg)";
refId = "D";
}
@@ -577,7 +577,7 @@ let
targets = [
{
datasource = promDs;
expr = "intel_gpu_engine_busy_percent";
expr = "igpu_engines_busy_percent";
legendFormat = "{{engine}}";
refId = "A";
}

View File

@@ -3,10 +3,8 @@
./grafana.nix
./prometheus.nix
./dashboard.nix
./jellyfin-collector.nix
./exporters.nix
./jellyfin-annotations.nix
./qbittorrent-collector.nix
./intel-gpu-collector.nix
./disk-usage-collector.nix
./llama-cpp-annotations.nix
./zfs-scrub-annotations.nix

View File

@@ -0,0 +1,112 @@
{
config,
pkgs,
inputs,
service_configs,
lib,
...
}:
let
jellyfinExporterPort = service_configs.ports.private.jellyfin_exporter.port;
qbitExporterPort = service_configs.ports.private.qbittorrent_exporter.port;
igpuExporterPort = service_configs.ports.private.igpu_exporter.port;
in
{
# -- Jellyfin Prometheus Exporter --
# Replaces custom jellyfin-collector.nix textfile timer.
# Exposes per-session metrics (jellyfin_now_playing_state) and library stats.
systemd.services.jellyfin-exporter =
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable)
{
description = "Prometheus exporter for Jellyfin";
after = [
"network.target"
"jellyfin.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart = lib.getExe (
pkgs.writeShellApplication {
name = "jellyfin-exporter-wrapper";
runtimeInputs = [ pkgs.jellyfin-exporter ];
text = ''
exec jellyfin_exporter \
--jellyfin.address=http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port} \
--jellyfin.token="$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")" \
--web.listen-address=127.0.0.1:${toString jellyfinExporterPort}
'';
}
);
Restart = "on-failure";
RestartSec = "10s";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
MemoryDenyWriteExecute = true;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
# -- qBittorrent Prometheus Exporter --
# Replaces custom qbittorrent-collector.nix textfile timer.
# Exposes per-torrent metrics (qbit_dlspeed, qbit_upspeed) and aggregate stats.
# qBittorrent runs in a VPN namespace; the exporter reaches it via namespace address.
systemd.services.qbittorrent-exporter =
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable)
{
description = "Prometheus exporter for qBittorrent";
after = [
"network.target"
"qbittorrent.service"
];
wantedBy = [ "multi-user.target" ];
serviceConfig = {
ExecStart =
lib.getExe' inputs.qbittorrent-metrics-exporter.packages.${pkgs.system}.default
"qbittorrent-metrics-exporter";
Restart = "on-failure";
RestartSec = "10s";
DynamicUser = true;
NoNewPrivileges = true;
ProtectSystem = "strict";
ProtectHome = true;
PrivateTmp = true;
};
environment = {
HOST = "127.0.0.1";
PORT = toString qbitExporterPort;
SCRAPE_INTERVAL = "15";
BACKEND = "in-memory";
# qBittorrent has AuthSubnetWhitelist=0.0.0.0/0, so no real password needed.
# The exporter still expects the env var to be set.
QBITTORRENT_PASSWORD = "unused";
QBITTORRENT_USERNAME = "admin";
TORRENT_HOSTS = "qbit:main=http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}|http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}";
RUST_LOG = "warn";
};
};
# -- Intel GPU Prometheus Exporter --
# Replaces custom intel-gpu-collector.nix + intel-gpu-collector.py textfile timer.
# Exposes engine busy%, frequency, and RC6 metrics via /metrics.
# Requires privileged access to GPU debug interfaces (intel_gpu_top).
systemd.services.igpu-exporter = lib.mkIf config.services.grafana.enable {
description = "Prometheus exporter for Intel integrated GPU";
wantedBy = [ "multi-user.target" ];
path = [ pkgs.intel-gpu-tools ];
serviceConfig = {
ExecStart = lib.getExe pkgs.igpu-exporter;
Restart = "on-failure";
RestartSec = "10s";
# intel_gpu_top requires root-level access to GPU debug interfaces
ProtectHome = true;
PrivateTmp = true;
};
environment = {
PORT = toString igpuExporterPort;
REFRESH_PERIOD_MS = "30000";
};
};
}

View File

@@ -1,38 +0,0 @@
{
config,
pkgs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
intelGpuCollector = pkgs.writeShellApplication {
name = "intel-gpu-collector";
runtimeInputs = with pkgs; [
python3
intel-gpu-tools
];
text = ''
exec python3 ${./intel-gpu-collector.py}
'';
};
in
lib.mkIf config.services.grafana.enable {
systemd.services.intel-gpu-collector = {
description = "Collect Intel GPU metrics for Prometheus";
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe intelGpuCollector;
};
environment.TEXTFILE = "${textfileDir}/intel-gpu.prom";
};
systemd.timers.intel-gpu-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "10s";
};
};
}

View File

@@ -1,107 +0,0 @@
#!/usr/bin/env python3
import json
import os
import subprocess
import sys
import time
TEXTFILE = os.environ.get(
"TEXTFILE",
"/var/lib/prometheus-node-exporter-textfiles/intel-gpu.prom",
)
def read_one_sample():
try:
proc = subprocess.Popen(
["intel_gpu_top", "-J", "-s", "1000"],
stdout=subprocess.PIPE,
stderr=subprocess.DEVNULL,
)
buf = b""
depth = 0
in_obj = False
deadline = time.monotonic() + 8.0
try:
while time.monotonic() < deadline:
byte = proc.stdout.read(1)
if not byte:
break
if byte == b"{":
in_obj = True
depth += 1
if in_obj:
buf += byte
if in_obj and byte == b"}":
depth -= 1
if depth == 0:
break
finally:
proc.terminate()
proc.wait()
if not buf:
return None
try:
return json.loads(buf)
except json.JSONDecodeError:
print("Malformed JSON from intel_gpu_top", file=sys.stderr)
return None
except Exception as e:
print(f"intel_gpu_top unavailable: {e}", file=sys.stderr)
return None
def write_empty_metrics():
"""Write zero-valued metrics so Prometheus doesn't see stale data."""
lines = [
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
"# TYPE intel_gpu_engine_busy_percent gauge",
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
"# TYPE intel_gpu_frequency_mhz gauge",
"intel_gpu_frequency_mhz 0",
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
"# TYPE intel_gpu_rc6_percent gauge",
"intel_gpu_rc6_percent 0",
]
tmp = TEXTFILE + ".tmp"
with open(tmp, "w") as f:
f.write("\n".join(lines) + "\n")
os.replace(tmp, TEXTFILE)
def write_metrics(sample):
lines = [
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
"# TYPE intel_gpu_engine_busy_percent gauge",
]
for engine, data in sample.get("engines", {}).items():
lines.append(
f'intel_gpu_engine_busy_percent{{engine="{engine}"}} {data.get("busy", 0)}'
)
freq = sample.get("frequency", {})
lines += [
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
"# TYPE intel_gpu_frequency_mhz gauge",
f'intel_gpu_frequency_mhz {freq.get("actual", 0)}',
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
"# TYPE intel_gpu_rc6_percent gauge",
f'intel_gpu_rc6_percent {sample.get("rc6", {}).get("value", 0)}',
]
tmp = TEXTFILE + ".tmp"
with open(tmp, "w") as f:
f.write("\n".join(lines) + "\n")
os.replace(tmp, TEXTFILE)
def main():
sample = read_one_sample()
if sample is None:
print("Failed to read intel_gpu_top sample", file=sys.stderr)
write_empty_metrics()
sys.exit(0)
write_metrics(sample)
if __name__ == "__main__":
main()

View File

@@ -1,54 +0,0 @@
{
config,
pkgs,
service_configs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
jellyfinCollector = pkgs.writeShellApplication {
name = "jellyfin-metrics-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
API_KEY=$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")
JELLYFIN="http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}"
if response=$(curl -sf --max-time 5 "''${JELLYFIN}/Sessions?api_key=''${API_KEY}"); then
active_streams=$(echo "$response" | jq '[.[] | select(.NowPlayingItem != null)] | length')
else
active_streams=0
fi
{
echo '# HELP jellyfin_active_streams Number of currently active Jellyfin streams'
echo '# TYPE jellyfin_active_streams gauge'
echo "jellyfin_active_streams $active_streams"
} > "${textfileDir}/jellyfin.prom.$$.tmp"
mv "${textfileDir}/jellyfin.prom.$$.tmp" "${textfileDir}/jellyfin.prom"
'';
};
in
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) {
systemd.services.jellyfin-metrics-collector = {
description = "Collect Jellyfin metrics for Prometheus";
after = [ "network.target" ];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe jellyfinCollector;
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
};
};
systemd.timers.jellyfin-metrics-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/30";
RandomizedDelaySec = "5s";
};
};
}

View File

@@ -71,6 +71,24 @@ in
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; }
];
}
{
job_name = "jellyfin";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.jellyfin_exporter.port}" ]; }
];
}
{
job_name = "qbittorrent";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.qbittorrent_exporter.port}" ]; }
];
}
{
job_name = "igpu";
static_configs = [
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
];
}
];
};

View File

@@ -1,60 +0,0 @@
{
config,
pkgs,
lib,
...
}:
let
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
qbittorrentCollector = pkgs.writeShellApplication {
name = "qbittorrent-collector";
runtimeInputs = with pkgs; [
curl
jq
];
text = ''
QBIT="http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}"
OUT="${textfileDir}/qbittorrent.prom"
if info=$(curl -sf --max-time 5 "''${QBIT}/api/v2/transfer/info"); then
dl=$(echo "$info" | jq '.dl_info_speed')
ul=$(echo "$info" | jq '.up_info_speed')
else
dl=0
ul=0
fi
{
echo '# HELP qbittorrent_download_bytes_per_second Current download speed in bytes/s'
echo '# TYPE qbittorrent_download_bytes_per_second gauge'
echo "qbittorrent_download_bytes_per_second $dl"
echo '# HELP qbittorrent_upload_bytes_per_second Current upload speed in bytes/s'
echo '# TYPE qbittorrent_upload_bytes_per_second gauge'
echo "qbittorrent_upload_bytes_per_second $ul"
} > "''${OUT}.tmp"
mv "''${OUT}.tmp" "$OUT"
'';
};
in
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable) {
systemd.services.qbittorrent-collector = {
description = "Collect qBittorrent transfer metrics for Prometheus";
after = [
"network.target"
"qbittorrent.service"
];
serviceConfig = {
Type = "oneshot";
ExecStart = lib.getExe qbittorrentCollector;
};
};
systemd.timers.qbittorrent-collector = {
wantedBy = [ "timers.target" ];
timerConfig = {
OnCalendar = "*:*:0/15";
RandomizedDelaySec = "3s";
};
};
}