monitoring: add zpool and boot partition usage metrics
Add textfile collector for ZFS pool utilization (tank, hdds) and boot drive partitions (/boot, /persistent, /nix). Runs every 60s. Add two Grafana dashboard panels: ZFS Pool Utilization and Boot Drive Partitions as Row 5.
This commit is contained in:
44
services/disk-usage-collector.sh
Normal file
44
services/disk-usage-collector.sh
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
# Collects ZFS pool utilization and boot partition usage for Prometheus textfile collector
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
TEXTFILE="${TEXTFILE:?TEXTFILE env required}"
|
||||||
|
TMP="${TEXTFILE}.$$"
|
||||||
|
|
||||||
|
{
|
||||||
|
echo '# HELP zpool_size_bytes Total size of ZFS pool in bytes'
|
||||||
|
echo '# TYPE zpool_size_bytes gauge'
|
||||||
|
echo '# HELP zpool_used_bytes Used space in ZFS pool in bytes'
|
||||||
|
echo '# TYPE zpool_used_bytes gauge'
|
||||||
|
echo '# HELP zpool_free_bytes Free space in ZFS pool in bytes'
|
||||||
|
echo '# TYPE zpool_free_bytes gauge'
|
||||||
|
|
||||||
|
# -Hp: scripting mode, parseable, bytes
|
||||||
|
zpool list -Hp -o name,size,alloc,free | while IFS=$'\t' read -r name size alloc free; do
|
||||||
|
echo "zpool_size_bytes{pool=\"${name}\"} ${size}"
|
||||||
|
echo "zpool_used_bytes{pool=\"${name}\"} ${alloc}"
|
||||||
|
echo "zpool_free_bytes{pool=\"${name}\"} ${free}"
|
||||||
|
done
|
||||||
|
|
||||||
|
echo '# HELP partition_size_bytes Total size of partition in bytes'
|
||||||
|
echo '# TYPE partition_size_bytes gauge'
|
||||||
|
echo '# HELP partition_used_bytes Used space on partition in bytes'
|
||||||
|
echo '# TYPE partition_used_bytes gauge'
|
||||||
|
echo '# HELP partition_free_bytes Free space on partition in bytes'
|
||||||
|
echo '# TYPE partition_free_bytes gauge'
|
||||||
|
|
||||||
|
# Boot drive partitions: /boot (ESP), /persistent, /nix
|
||||||
|
# Use df with 1K blocks and convert to bytes
|
||||||
|
for mount in /boot /persistent /nix; do
|
||||||
|
if mountpoint -q "$mount" 2>/dev/null; then
|
||||||
|
read -r size used avail _ <<< "$(df -k --output=size,used,avail "$mount" | tail -1)"
|
||||||
|
size_b=$((size * 1024))
|
||||||
|
used_b=$((used * 1024))
|
||||||
|
avail_b=$((avail * 1024))
|
||||||
|
echo "partition_size_bytes{mount=\"${mount}\"} ${size_b}"
|
||||||
|
echo "partition_used_bytes{mount=\"${mount}\"} ${used_b}"
|
||||||
|
echo "partition_free_bytes{mount=\"${mount}\"} ${avail_b}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
} > "$TMP"
|
||||||
|
mv "$TMP" "$TEXTFILE"
|
||||||
@@ -79,6 +79,17 @@ let
|
|||||||
'';
|
'';
|
||||||
};
|
};
|
||||||
|
|
||||||
|
diskUsageCollector = pkgs.writeShellApplication {
|
||||||
|
name = "disk-usage-collector";
|
||||||
|
runtimeInputs = with pkgs; [
|
||||||
|
coreutils
|
||||||
|
gawk
|
||||||
|
config.boot.zfs.package
|
||||||
|
util-linux # for mountpoint
|
||||||
|
];
|
||||||
|
text = builtins.readFile ./disk-usage-collector.sh;
|
||||||
|
};
|
||||||
|
|
||||||
dashboard = {
|
dashboard = {
|
||||||
editable = true;
|
editable = true;
|
||||||
graphTooltip = 1;
|
graphTooltip = 1;
|
||||||
@@ -669,6 +680,94 @@ let
|
|||||||
overrides = [ ];
|
overrides = [ ];
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# -- Row 5: Storage --
|
||||||
|
{
|
||||||
|
id = 12;
|
||||||
|
type = "timeseries";
|
||||||
|
title = "ZFS Pool Utilization";
|
||||||
|
gridPos = {
|
||||||
|
h = 8;
|
||||||
|
w = 12;
|
||||||
|
x = 0;
|
||||||
|
y = 32;
|
||||||
|
};
|
||||||
|
datasource = promDs;
|
||||||
|
targets = [
|
||||||
|
{
|
||||||
|
datasource = promDs;
|
||||||
|
expr = "zpool_used_bytes{pool=\"tank\"} / zpool_size_bytes{pool=\"tank\"} * 100";
|
||||||
|
legendFormat = "tank";
|
||||||
|
refId = "A";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
datasource = promDs;
|
||||||
|
expr = "zpool_used_bytes{pool=\"hdds\"} / zpool_size_bytes{pool=\"hdds\"} * 100";
|
||||||
|
legendFormat = "hdds";
|
||||||
|
refId = "B";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
fieldConfig = {
|
||||||
|
defaults = {
|
||||||
|
unit = "percent";
|
||||||
|
min = 0;
|
||||||
|
max = 100;
|
||||||
|
color.mode = "palette-classic";
|
||||||
|
custom = {
|
||||||
|
lineWidth = 2;
|
||||||
|
fillOpacity = 20;
|
||||||
|
spanNulls = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
overrides = [ ];
|
||||||
|
};
|
||||||
|
}
|
||||||
|
{
|
||||||
|
id = 13;
|
||||||
|
type = "timeseries";
|
||||||
|
title = "Boot Drive Partitions";
|
||||||
|
gridPos = {
|
||||||
|
h = 8;
|
||||||
|
w = 12;
|
||||||
|
x = 12;
|
||||||
|
y = 32;
|
||||||
|
};
|
||||||
|
datasource = promDs;
|
||||||
|
targets = [
|
||||||
|
{
|
||||||
|
datasource = promDs;
|
||||||
|
expr = "partition_used_bytes{mount=\"/boot\"} / partition_size_bytes{mount=\"/boot\"} * 100";
|
||||||
|
legendFormat = "/boot";
|
||||||
|
refId = "A";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
datasource = promDs;
|
||||||
|
expr = "partition_used_bytes{mount=\"/persistent\"} / partition_size_bytes{mount=\"/persistent\"} * 100";
|
||||||
|
legendFormat = "/persistent";
|
||||||
|
refId = "B";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
datasource = promDs;
|
||||||
|
expr = "partition_used_bytes{mount=\"/nix\"} / partition_size_bytes{mount=\"/nix\"} * 100";
|
||||||
|
legendFormat = "/nix";
|
||||||
|
refId = "C";
|
||||||
|
}
|
||||||
|
];
|
||||||
|
fieldConfig = {
|
||||||
|
defaults = {
|
||||||
|
unit = "percent";
|
||||||
|
min = 0;
|
||||||
|
max = 100;
|
||||||
|
color.mode = "palette-classic";
|
||||||
|
custom = {
|
||||||
|
lineWidth = 2;
|
||||||
|
fillOpacity = 20;
|
||||||
|
spanNulls = true;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
overrides = [ ];
|
||||||
|
};
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
in
|
in
|
||||||
@@ -875,6 +974,24 @@ in
|
|||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
# -- Disk/pool usage textfile collector --
|
||||||
|
systemd.services.disk-usage-collector = {
|
||||||
|
description = "Collect ZFS pool and partition usage metrics for Prometheus";
|
||||||
|
serviceConfig = {
|
||||||
|
Type = "oneshot";
|
||||||
|
ExecStart = lib.getExe diskUsageCollector;
|
||||||
|
};
|
||||||
|
environment.TEXTFILE = "${textfileDir}/disk-usage.prom";
|
||||||
|
};
|
||||||
|
|
||||||
|
systemd.timers.disk-usage-collector = {
|
||||||
|
wantedBy = [ "timers.target" ];
|
||||||
|
timerConfig = {
|
||||||
|
OnCalendar = "*:*:0/60"; # every 60 seconds
|
||||||
|
RandomizedDelaySec = "10s";
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
systemd.tmpfiles.rules = [
|
systemd.tmpfiles.rules = [
|
||||||
"d ${textfileDir} 0755 root root -"
|
"d ${textfileDir} 0755 root root -"
|
||||||
];
|
];
|
||||||
|
|||||||
Reference in New Issue
Block a user