grafana: replace custom metric collectors with community exporters
Replace three custom Prometheus textfile collector scripts with
dedicated community-maintained exporters:
- jellyfin-collector.nix (25 LoC shell) -> rebelcore/jellyfin_exporter
Metric: jellyfin_active_streams -> count(jellyfin_now_playing_state)
Bonus: per-session labels (user, title, device, codec info)
- qbittorrent-collector.nix (40 LoC shell) -> anriha/qbittorrent-metrics-exporter
Metric: qbittorrent_{download,upload}_bytes_per_second -> qbit_{dl,up}speed
Bonus: per-torrent metrics with category/tag aggregation
- intel-gpu-collector.nix + .py (130 LoC Python) -> mike1808/igpu-exporter
Metric: intel_gpu_engine_busy_percent -> igpu_engines_busy_percent
Bonus: persistent daemon vs oneshot timer, no streaming JSON parser
All three run as persistent daemons scraped by Prometheus, replacing
the textfile-collector pattern of systemd timers writing .prom files.
Dashboard PromQL queries updated to match new metric names.
This commit is contained in:
@@ -387,7 +387,7 @@ let
|
||||
targets = [
|
||||
{
|
||||
datasource = promDs;
|
||||
expr = "jellyfin_active_streams";
|
||||
expr = "count(jellyfin_now_playing_state) or vector(0)";
|
||||
refId = "A";
|
||||
}
|
||||
];
|
||||
@@ -439,25 +439,25 @@ let
|
||||
targets = [
|
||||
{
|
||||
datasource = promDs;
|
||||
expr = "qbittorrent_download_bytes_per_second";
|
||||
expr = "sum(qbit_dlspeed) or vector(0)";
|
||||
legendFormat = "Download";
|
||||
refId = "A";
|
||||
}
|
||||
{
|
||||
datasource = promDs;
|
||||
expr = "qbittorrent_upload_bytes_per_second";
|
||||
expr = "sum(qbit_upspeed) or vector(0)";
|
||||
legendFormat = "Upload";
|
||||
refId = "B";
|
||||
}
|
||||
{
|
||||
datasource = promDs;
|
||||
expr = "avg_over_time(qbittorrent_download_bytes_per_second[10m:])";
|
||||
expr = "avg_over_time((sum(qbit_dlspeed) or vector(0))[10m:])";
|
||||
legendFormat = "Download (10m avg)";
|
||||
refId = "C";
|
||||
}
|
||||
{
|
||||
datasource = promDs;
|
||||
expr = "avg_over_time(qbittorrent_upload_bytes_per_second[10m:])";
|
||||
expr = "avg_over_time((sum(qbit_upspeed) or vector(0))[10m:])";
|
||||
legendFormat = "Upload (10m avg)";
|
||||
refId = "D";
|
||||
}
|
||||
@@ -577,7 +577,7 @@ let
|
||||
targets = [
|
||||
{
|
||||
datasource = promDs;
|
||||
expr = "intel_gpu_engine_busy_percent";
|
||||
expr = "igpu_engines_busy_percent";
|
||||
legendFormat = "{{engine}}";
|
||||
refId = "A";
|
||||
}
|
||||
|
||||
@@ -3,10 +3,8 @@
|
||||
./grafana.nix
|
||||
./prometheus.nix
|
||||
./dashboard.nix
|
||||
./jellyfin-collector.nix
|
||||
./exporters.nix
|
||||
./jellyfin-annotations.nix
|
||||
./qbittorrent-collector.nix
|
||||
./intel-gpu-collector.nix
|
||||
./disk-usage-collector.nix
|
||||
./llama-cpp-annotations.nix
|
||||
./zfs-scrub-annotations.nix
|
||||
|
||||
112
services/grafana/exporters.nix
Normal file
112
services/grafana/exporters.nix
Normal file
@@ -0,0 +1,112 @@
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
inputs,
|
||||
service_configs,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
let
|
||||
jellyfinExporterPort = service_configs.ports.private.jellyfin_exporter.port;
|
||||
qbitExporterPort = service_configs.ports.private.qbittorrent_exporter.port;
|
||||
igpuExporterPort = service_configs.ports.private.igpu_exporter.port;
|
||||
in
|
||||
{
|
||||
# -- Jellyfin Prometheus Exporter --
|
||||
# Replaces custom jellyfin-collector.nix textfile timer.
|
||||
# Exposes per-session metrics (jellyfin_now_playing_state) and library stats.
|
||||
systemd.services.jellyfin-exporter =
|
||||
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable)
|
||||
{
|
||||
description = "Prometheus exporter for Jellyfin";
|
||||
after = [
|
||||
"network.target"
|
||||
"jellyfin.service"
|
||||
];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart = lib.getExe (
|
||||
pkgs.writeShellApplication {
|
||||
name = "jellyfin-exporter-wrapper";
|
||||
runtimeInputs = [ pkgs.jellyfin-exporter ];
|
||||
text = ''
|
||||
exec jellyfin_exporter \
|
||||
--jellyfin.address=http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port} \
|
||||
--jellyfin.token="$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")" \
|
||||
--web.listen-address=127.0.0.1:${toString jellyfinExporterPort}
|
||||
'';
|
||||
}
|
||||
);
|
||||
Restart = "on-failure";
|
||||
RestartSec = "10s";
|
||||
DynamicUser = true;
|
||||
NoNewPrivileges = true;
|
||||
ProtectSystem = "strict";
|
||||
ProtectHome = true;
|
||||
PrivateTmp = true;
|
||||
MemoryDenyWriteExecute = true;
|
||||
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
|
||||
};
|
||||
};
|
||||
|
||||
# -- qBittorrent Prometheus Exporter --
|
||||
# Replaces custom qbittorrent-collector.nix textfile timer.
|
||||
# Exposes per-torrent metrics (qbit_dlspeed, qbit_upspeed) and aggregate stats.
|
||||
# qBittorrent runs in a VPN namespace; the exporter reaches it via namespace address.
|
||||
systemd.services.qbittorrent-exporter =
|
||||
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable)
|
||||
{
|
||||
description = "Prometheus exporter for qBittorrent";
|
||||
after = [
|
||||
"network.target"
|
||||
"qbittorrent.service"
|
||||
];
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
serviceConfig = {
|
||||
ExecStart =
|
||||
lib.getExe' inputs.qbittorrent-metrics-exporter.packages.${pkgs.system}.default
|
||||
"qbittorrent-metrics-exporter";
|
||||
Restart = "on-failure";
|
||||
RestartSec = "10s";
|
||||
DynamicUser = true;
|
||||
NoNewPrivileges = true;
|
||||
ProtectSystem = "strict";
|
||||
ProtectHome = true;
|
||||
PrivateTmp = true;
|
||||
};
|
||||
environment = {
|
||||
HOST = "127.0.0.1";
|
||||
PORT = toString qbitExporterPort;
|
||||
SCRAPE_INTERVAL = "15";
|
||||
BACKEND = "in-memory";
|
||||
# qBittorrent has AuthSubnetWhitelist=0.0.0.0/0, so no real password needed.
|
||||
# The exporter still expects the env var to be set.
|
||||
QBITTORRENT_PASSWORD = "unused";
|
||||
QBITTORRENT_USERNAME = "admin";
|
||||
TORRENT_HOSTS = "qbit:main=http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}|http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}";
|
||||
RUST_LOG = "warn";
|
||||
};
|
||||
};
|
||||
|
||||
# -- Intel GPU Prometheus Exporter --
|
||||
# Replaces custom intel-gpu-collector.nix + intel-gpu-collector.py textfile timer.
|
||||
# Exposes engine busy%, frequency, and RC6 metrics via /metrics.
|
||||
# Requires privileged access to GPU debug interfaces (intel_gpu_top).
|
||||
systemd.services.igpu-exporter = lib.mkIf config.services.grafana.enable {
|
||||
description = "Prometheus exporter for Intel integrated GPU";
|
||||
wantedBy = [ "multi-user.target" ];
|
||||
path = [ pkgs.intel-gpu-tools ];
|
||||
serviceConfig = {
|
||||
ExecStart = lib.getExe pkgs.igpu-exporter;
|
||||
Restart = "on-failure";
|
||||
RestartSec = "10s";
|
||||
# intel_gpu_top requires root-level access to GPU debug interfaces
|
||||
ProtectHome = true;
|
||||
PrivateTmp = true;
|
||||
};
|
||||
environment = {
|
||||
PORT = toString igpuExporterPort;
|
||||
REFRESH_PERIOD_MS = "30000";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
let
|
||||
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
|
||||
|
||||
intelGpuCollector = pkgs.writeShellApplication {
|
||||
name = "intel-gpu-collector";
|
||||
runtimeInputs = with pkgs; [
|
||||
python3
|
||||
intel-gpu-tools
|
||||
];
|
||||
text = ''
|
||||
exec python3 ${./intel-gpu-collector.py}
|
||||
'';
|
||||
};
|
||||
in
|
||||
lib.mkIf config.services.grafana.enable {
|
||||
systemd.services.intel-gpu-collector = {
|
||||
description = "Collect Intel GPU metrics for Prometheus";
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = lib.getExe intelGpuCollector;
|
||||
};
|
||||
environment.TEXTFILE = "${textfileDir}/intel-gpu.prom";
|
||||
};
|
||||
|
||||
systemd.timers.intel-gpu-collector = {
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "*:*:0/30";
|
||||
RandomizedDelaySec = "10s";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
TEXTFILE = os.environ.get(
|
||||
"TEXTFILE",
|
||||
"/var/lib/prometheus-node-exporter-textfiles/intel-gpu.prom",
|
||||
)
|
||||
|
||||
|
||||
def read_one_sample():
|
||||
try:
|
||||
proc = subprocess.Popen(
|
||||
["intel_gpu_top", "-J", "-s", "1000"],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
buf = b""
|
||||
depth = 0
|
||||
in_obj = False
|
||||
deadline = time.monotonic() + 8.0
|
||||
try:
|
||||
while time.monotonic() < deadline:
|
||||
byte = proc.stdout.read(1)
|
||||
if not byte:
|
||||
break
|
||||
if byte == b"{":
|
||||
in_obj = True
|
||||
depth += 1
|
||||
if in_obj:
|
||||
buf += byte
|
||||
if in_obj and byte == b"}":
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
break
|
||||
finally:
|
||||
proc.terminate()
|
||||
proc.wait()
|
||||
if not buf:
|
||||
return None
|
||||
try:
|
||||
return json.loads(buf)
|
||||
except json.JSONDecodeError:
|
||||
print("Malformed JSON from intel_gpu_top", file=sys.stderr)
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"intel_gpu_top unavailable: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def write_empty_metrics():
|
||||
"""Write zero-valued metrics so Prometheus doesn't see stale data."""
|
||||
lines = [
|
||||
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
|
||||
"# TYPE intel_gpu_engine_busy_percent gauge",
|
||||
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
|
||||
"# TYPE intel_gpu_frequency_mhz gauge",
|
||||
"intel_gpu_frequency_mhz 0",
|
||||
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
|
||||
"# TYPE intel_gpu_rc6_percent gauge",
|
||||
"intel_gpu_rc6_percent 0",
|
||||
]
|
||||
tmp = TEXTFILE + ".tmp"
|
||||
with open(tmp, "w") as f:
|
||||
f.write("\n".join(lines) + "\n")
|
||||
os.replace(tmp, TEXTFILE)
|
||||
|
||||
|
||||
def write_metrics(sample):
|
||||
lines = [
|
||||
"# HELP intel_gpu_engine_busy_percent Intel GPU engine busy percentage",
|
||||
"# TYPE intel_gpu_engine_busy_percent gauge",
|
||||
]
|
||||
for engine, data in sample.get("engines", {}).items():
|
||||
lines.append(
|
||||
f'intel_gpu_engine_busy_percent{{engine="{engine}"}} {data.get("busy", 0)}'
|
||||
)
|
||||
freq = sample.get("frequency", {})
|
||||
lines += [
|
||||
"# HELP intel_gpu_frequency_mhz Intel GPU actual frequency in MHz",
|
||||
"# TYPE intel_gpu_frequency_mhz gauge",
|
||||
f'intel_gpu_frequency_mhz {freq.get("actual", 0)}',
|
||||
"# HELP intel_gpu_rc6_percent Intel GPU RC6 power-saving state percentage",
|
||||
"# TYPE intel_gpu_rc6_percent gauge",
|
||||
f'intel_gpu_rc6_percent {sample.get("rc6", {}).get("value", 0)}',
|
||||
]
|
||||
|
||||
tmp = TEXTFILE + ".tmp"
|
||||
with open(tmp, "w") as f:
|
||||
f.write("\n".join(lines) + "\n")
|
||||
os.replace(tmp, TEXTFILE)
|
||||
|
||||
|
||||
def main():
|
||||
sample = read_one_sample()
|
||||
if sample is None:
|
||||
print("Failed to read intel_gpu_top sample", file=sys.stderr)
|
||||
write_empty_metrics()
|
||||
sys.exit(0)
|
||||
write_metrics(sample)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -1,54 +0,0 @@
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
service_configs,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
let
|
||||
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
|
||||
|
||||
jellyfinCollector = pkgs.writeShellApplication {
|
||||
name = "jellyfin-metrics-collector";
|
||||
runtimeInputs = with pkgs; [
|
||||
curl
|
||||
jq
|
||||
];
|
||||
text = ''
|
||||
API_KEY=$(cat "$CREDENTIALS_DIRECTORY/jellyfin-api-key")
|
||||
JELLYFIN="http://127.0.0.1:${toString service_configs.ports.private.jellyfin.port}"
|
||||
|
||||
if response=$(curl -sf --max-time 5 "''${JELLYFIN}/Sessions?api_key=''${API_KEY}"); then
|
||||
active_streams=$(echo "$response" | jq '[.[] | select(.NowPlayingItem != null)] | length')
|
||||
else
|
||||
active_streams=0
|
||||
fi
|
||||
|
||||
{
|
||||
echo '# HELP jellyfin_active_streams Number of currently active Jellyfin streams'
|
||||
echo '# TYPE jellyfin_active_streams gauge'
|
||||
echo "jellyfin_active_streams $active_streams"
|
||||
} > "${textfileDir}/jellyfin.prom.$$.tmp"
|
||||
mv "${textfileDir}/jellyfin.prom.$$.tmp" "${textfileDir}/jellyfin.prom"
|
||||
'';
|
||||
};
|
||||
in
|
||||
lib.mkIf (config.services.grafana.enable && config.services.jellyfin.enable) {
|
||||
systemd.services.jellyfin-metrics-collector = {
|
||||
description = "Collect Jellyfin metrics for Prometheus";
|
||||
after = [ "network.target" ];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = lib.getExe jellyfinCollector;
|
||||
LoadCredential = "jellyfin-api-key:${config.age.secrets.jellyfin-api-key.path}";
|
||||
};
|
||||
};
|
||||
|
||||
systemd.timers.jellyfin-metrics-collector = {
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "*:*:0/30";
|
||||
RandomizedDelaySec = "5s";
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -71,6 +71,24 @@ in
|
||||
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; }
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "jellyfin";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.jellyfin_exporter.port}" ]; }
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "qbittorrent";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.qbittorrent_exporter.port}" ]; }
|
||||
];
|
||||
}
|
||||
{
|
||||
job_name = "igpu";
|
||||
static_configs = [
|
||||
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
|
||||
];
|
||||
}
|
||||
];
|
||||
};
|
||||
|
||||
|
||||
@@ -1,60 +0,0 @@
|
||||
{
|
||||
config,
|
||||
pkgs,
|
||||
lib,
|
||||
...
|
||||
}:
|
||||
let
|
||||
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
|
||||
|
||||
qbittorrentCollector = pkgs.writeShellApplication {
|
||||
name = "qbittorrent-collector";
|
||||
runtimeInputs = with pkgs; [
|
||||
curl
|
||||
jq
|
||||
];
|
||||
text = ''
|
||||
QBIT="http://${config.vpnNamespaces.wg.namespaceAddress}:${toString config.services.qbittorrent.webuiPort}"
|
||||
OUT="${textfileDir}/qbittorrent.prom"
|
||||
|
||||
if info=$(curl -sf --max-time 5 "''${QBIT}/api/v2/transfer/info"); then
|
||||
dl=$(echo "$info" | jq '.dl_info_speed')
|
||||
ul=$(echo "$info" | jq '.up_info_speed')
|
||||
else
|
||||
dl=0
|
||||
ul=0
|
||||
fi
|
||||
|
||||
{
|
||||
echo '# HELP qbittorrent_download_bytes_per_second Current download speed in bytes/s'
|
||||
echo '# TYPE qbittorrent_download_bytes_per_second gauge'
|
||||
echo "qbittorrent_download_bytes_per_second $dl"
|
||||
echo '# HELP qbittorrent_upload_bytes_per_second Current upload speed in bytes/s'
|
||||
echo '# TYPE qbittorrent_upload_bytes_per_second gauge'
|
||||
echo "qbittorrent_upload_bytes_per_second $ul"
|
||||
} > "''${OUT}.tmp"
|
||||
mv "''${OUT}.tmp" "$OUT"
|
||||
'';
|
||||
};
|
||||
in
|
||||
lib.mkIf (config.services.grafana.enable && config.services.qbittorrent.enable) {
|
||||
systemd.services.qbittorrent-collector = {
|
||||
description = "Collect qBittorrent transfer metrics for Prometheus";
|
||||
after = [
|
||||
"network.target"
|
||||
"qbittorrent.service"
|
||||
];
|
||||
serviceConfig = {
|
||||
Type = "oneshot";
|
||||
ExecStart = lib.getExe qbittorrentCollector;
|
||||
};
|
||||
};
|
||||
|
||||
systemd.timers.qbittorrent-collector = {
|
||||
wantedBy = [ "timers.target" ];
|
||||
timerConfig = {
|
||||
OnCalendar = "*:*:0/15";
|
||||
RandomizedDelaySec = "3s";
|
||||
};
|
||||
};
|
||||
}
|
||||
Reference in New Issue
Block a user