grafana: replace disk-usage-collector with prometheus-zfs-exporter
The custom disk-usage-collector shell script + minutely timer is replaced by prometheus-zfs-exporter (pdf/zfs_exporter, packaged in nixpkgs as services.prometheus.exporters.zfs). The exporter provides pool capacity metrics (allocated/free/size) natively. Partition metrics (/boot, /persistent, /nix) now use node_exporter's built-in filesystem collector (node_filesystem_*_bytes) which already runs and collects these metrics. Also fixes a latent race condition in serviceMountWithZpool: the -mounts service now orders after zfs-mount.service (which runs 'zfs mount -a'), not just after pool import. Without this, the mount check could run before datasets are actually mounted.
This commit is contained in:
@@ -59,8 +59,12 @@ inputs.nixpkgs.lib.extend (
|
|||||||
{ pkgs, config, ... }:
|
{ pkgs, config, ... }:
|
||||||
{
|
{
|
||||||
systemd.services."${serviceName}-mounts" = {
|
systemd.services."${serviceName}-mounts" = {
|
||||||
wants = [ "zfs.target" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
|
wants = [
|
||||||
after = lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
|
"zfs.target"
|
||||||
|
"zfs-mount.service"
|
||||||
|
]
|
||||||
|
++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
|
||||||
|
after = [ "zfs-mount.service" ] ++ lib.optionals (zpool != "") [ "zfs-import-${zpool}.service" ];
|
||||||
before = [ "${serviceName}.service" ];
|
before = [ "${serviceName}.service" ];
|
||||||
|
|
||||||
serviceConfig = {
|
serviceConfig = {
|
||||||
|
|||||||
@@ -189,6 +189,10 @@ rec {
|
|||||||
port = 9563;
|
port = 9563;
|
||||||
proto = "tcp";
|
proto = "tcp";
|
||||||
};
|
};
|
||||||
|
prometheus_zfs = {
|
||||||
|
port = 9134;
|
||||||
|
proto = "tcp";
|
||||||
|
};
|
||||||
harmonia = {
|
harmonia = {
|
||||||
port = 5500;
|
port = 5500;
|
||||||
proto = "tcp";
|
proto = "tcp";
|
||||||
|
|||||||
@@ -613,13 +613,13 @@ let
|
|||||||
targets = [
|
targets = [
|
||||||
{
|
{
|
||||||
datasource = promDs;
|
datasource = promDs;
|
||||||
expr = "zpool_used_bytes{pool=\"tank\"} / zpool_size_bytes{pool=\"tank\"} * 100";
|
expr = "zfs_pool_allocated_bytes{pool=\"tank\"} / zfs_pool_size_bytes{pool=\"tank\"} * 100";
|
||||||
legendFormat = "tank";
|
legendFormat = "tank";
|
||||||
refId = "A";
|
refId = "A";
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
datasource = promDs;
|
datasource = promDs;
|
||||||
expr = "zpool_used_bytes{pool=\"hdds\"} / zpool_size_bytes{pool=\"hdds\"} * 100";
|
expr = "zfs_pool_allocated_bytes{pool=\"hdds\"} / zfs_pool_size_bytes{pool=\"hdds\"} * 100";
|
||||||
legendFormat = "hdds";
|
legendFormat = "hdds";
|
||||||
refId = "B";
|
refId = "B";
|
||||||
}
|
}
|
||||||
@@ -653,19 +653,19 @@ let
|
|||||||
targets = [
|
targets = [
|
||||||
{
|
{
|
||||||
datasource = promDs;
|
datasource = promDs;
|
||||||
expr = "partition_used_bytes{mount=\"/boot\"} / partition_size_bytes{mount=\"/boot\"} * 100";
|
expr = "(node_filesystem_size_bytes{mountpoint=\"/boot\"} - node_filesystem_avail_bytes{mountpoint=\"/boot\"}) / node_filesystem_size_bytes{mountpoint=\"/boot\"} * 100";
|
||||||
legendFormat = "/boot";
|
legendFormat = "/boot";
|
||||||
refId = "A";
|
refId = "A";
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
datasource = promDs;
|
datasource = promDs;
|
||||||
expr = "partition_used_bytes{mount=\"/persistent\"} / partition_size_bytes{mount=\"/persistent\"} * 100";
|
expr = "(node_filesystem_size_bytes{mountpoint=\"/persistent\"} - node_filesystem_avail_bytes{mountpoint=\"/persistent\"}) / node_filesystem_size_bytes{mountpoint=\"/persistent\"} * 100";
|
||||||
legendFormat = "/persistent";
|
legendFormat = "/persistent";
|
||||||
refId = "B";
|
refId = "B";
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
datasource = promDs;
|
datasource = promDs;
|
||||||
expr = "partition_used_bytes{mount=\"/nix\"} / partition_size_bytes{mount=\"/nix\"} * 100";
|
expr = "(node_filesystem_size_bytes{mountpoint=\"/nix\"} - node_filesystem_avail_bytes{mountpoint=\"/nix\"}) / node_filesystem_size_bytes{mountpoint=\"/nix\"} * 100";
|
||||||
legendFormat = "/nix";
|
legendFormat = "/nix";
|
||||||
refId = "C";
|
refId = "C";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
./dashboard.nix
|
./dashboard.nix
|
||||||
./exporters.nix
|
./exporters.nix
|
||||||
./jellyfin-annotations.nix
|
./jellyfin-annotations.nix
|
||||||
./disk-usage-collector.nix
|
|
||||||
./llama-cpp-annotations.nix
|
./llama-cpp-annotations.nix
|
||||||
./zfs-scrub-annotations.nix
|
./zfs-scrub-annotations.nix
|
||||||
];
|
];
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
{
|
|
||||||
config,
|
|
||||||
pkgs,
|
|
||||||
lib,
|
|
||||||
...
|
|
||||||
}:
|
|
||||||
let
|
|
||||||
textfileDir = "/var/lib/prometheus-node-exporter-textfiles";
|
|
||||||
|
|
||||||
diskUsageCollector = pkgs.writeShellApplication {
|
|
||||||
name = "disk-usage-collector";
|
|
||||||
runtimeInputs = with pkgs; [
|
|
||||||
coreutils
|
|
||||||
gawk
|
|
||||||
config.boot.zfs.package
|
|
||||||
util-linux # for mountpoint
|
|
||||||
];
|
|
||||||
text = builtins.readFile ./disk-usage-collector.sh;
|
|
||||||
};
|
|
||||||
in
|
|
||||||
lib.mkIf config.services.grafana.enable {
|
|
||||||
systemd.services.disk-usage-collector = {
|
|
||||||
description = "Collect ZFS pool and partition usage metrics for Prometheus";
|
|
||||||
serviceConfig = {
|
|
||||||
Type = "oneshot";
|
|
||||||
ExecStart = lib.getExe diskUsageCollector;
|
|
||||||
};
|
|
||||||
environment.TEXTFILE = "${textfileDir}/disk-usage.prom";
|
|
||||||
};
|
|
||||||
|
|
||||||
systemd.timers.disk-usage-collector = {
|
|
||||||
wantedBy = [ "timers.target" ];
|
|
||||||
timerConfig = {
|
|
||||||
OnCalendar = "minutely";
|
|
||||||
RandomizedDelaySec = "10s";
|
|
||||||
};
|
|
||||||
};
|
|
||||||
}
|
|
||||||
@@ -1,44 +0,0 @@
|
|||||||
#!/usr/bin/env bash
|
|
||||||
# Collects ZFS pool utilization and boot partition usage for Prometheus textfile collector
|
|
||||||
set -euo pipefail
|
|
||||||
|
|
||||||
TEXTFILE="${TEXTFILE:?TEXTFILE env required}"
|
|
||||||
TMP="${TEXTFILE}.$$"
|
|
||||||
|
|
||||||
{
|
|
||||||
echo '# HELP zpool_size_bytes Total size of ZFS pool in bytes'
|
|
||||||
echo '# TYPE zpool_size_bytes gauge'
|
|
||||||
echo '# HELP zpool_used_bytes Used space in ZFS pool in bytes'
|
|
||||||
echo '# TYPE zpool_used_bytes gauge'
|
|
||||||
echo '# HELP zpool_free_bytes Free space in ZFS pool in bytes'
|
|
||||||
echo '# TYPE zpool_free_bytes gauge'
|
|
||||||
|
|
||||||
# -Hp: scripting mode, parseable, bytes
|
|
||||||
zpool list -Hp -o name,size,alloc,free | while IFS=$'\t' read -r name size alloc free; do
|
|
||||||
echo "zpool_size_bytes{pool=\"${name}\"} ${size}"
|
|
||||||
echo "zpool_used_bytes{pool=\"${name}\"} ${alloc}"
|
|
||||||
echo "zpool_free_bytes{pool=\"${name}\"} ${free}"
|
|
||||||
done
|
|
||||||
|
|
||||||
echo '# HELP partition_size_bytes Total size of partition in bytes'
|
|
||||||
echo '# TYPE partition_size_bytes gauge'
|
|
||||||
echo '# HELP partition_used_bytes Used space on partition in bytes'
|
|
||||||
echo '# TYPE partition_used_bytes gauge'
|
|
||||||
echo '# HELP partition_free_bytes Free space on partition in bytes'
|
|
||||||
echo '# TYPE partition_free_bytes gauge'
|
|
||||||
|
|
||||||
# Boot drive partitions: /boot (ESP), /persistent, /nix
|
|
||||||
# Use df with 1K blocks and convert to bytes
|
|
||||||
for mount in /boot /persistent /nix; do
|
|
||||||
if mountpoint -q "$mount" 2>/dev/null; then
|
|
||||||
read -r size used avail _ <<< "$(df -k --output=size,used,avail "$mount" | tail -1)"
|
|
||||||
size_b=$((size * 1024))
|
|
||||||
used_b=$((used * 1024))
|
|
||||||
avail_b=$((avail * 1024))
|
|
||||||
echo "partition_size_bytes{mount=\"${mount}\"} ${size_b}"
|
|
||||||
echo "partition_used_bytes{mount=\"${mount}\"} ${used_b}"
|
|
||||||
echo "partition_free_bytes{mount=\"${mount}\"} ${avail_b}"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
} > "$TMP"
|
|
||||||
mv "$TMP" "$TEXTFILE"
|
|
||||||
@@ -44,6 +44,12 @@ in
|
|||||||
listenAddress = "127.0.0.1";
|
listenAddress = "127.0.0.1";
|
||||||
apcupsdAddress = "127.0.0.1:3551";
|
apcupsdAddress = "127.0.0.1:3551";
|
||||||
};
|
};
|
||||||
|
|
||||||
|
zfs = {
|
||||||
|
enable = true;
|
||||||
|
port = service_configs.ports.private.prometheus_zfs.port;
|
||||||
|
listenAddress = "127.0.0.1";
|
||||||
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
scrapeConfigs = [
|
scrapeConfigs = [
|
||||||
@@ -89,6 +95,12 @@ in
|
|||||||
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
|
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.igpu_exporter.port}" ]; }
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
job_name = "zfs";
|
||||||
|
static_configs = [
|
||||||
|
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_zfs.port}" ]; }
|
||||||
|
];
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user