diff --git a/services/disk-usage-collector.sh b/services/disk-usage-collector.sh new file mode 100644 index 0000000..3874b53 --- /dev/null +++ b/services/disk-usage-collector.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash +# Collects ZFS pool utilization and boot partition usage for Prometheus textfile collector +set -euo pipefail + +TEXTFILE="${TEXTFILE:?TEXTFILE env required}" +TMP="${TEXTFILE}.$$" + +{ + echo '# HELP zpool_size_bytes Total size of ZFS pool in bytes' + echo '# TYPE zpool_size_bytes gauge' + echo '# HELP zpool_used_bytes Used space in ZFS pool in bytes' + echo '# TYPE zpool_used_bytes gauge' + echo '# HELP zpool_free_bytes Free space in ZFS pool in bytes' + echo '# TYPE zpool_free_bytes gauge' + + # -Hp: scripting mode, parseable, bytes + zpool list -Hp -o name,size,alloc,free | while IFS=$'\t' read -r name size alloc free; do + echo "zpool_size_bytes{pool=\"${name}\"} ${size}" + echo "zpool_used_bytes{pool=\"${name}\"} ${alloc}" + echo "zpool_free_bytes{pool=\"${name}\"} ${free}" + done + + echo '# HELP partition_size_bytes Total size of partition in bytes' + echo '# TYPE partition_size_bytes gauge' + echo '# HELP partition_used_bytes Used space on partition in bytes' + echo '# TYPE partition_used_bytes gauge' + echo '# HELP partition_free_bytes Free space on partition in bytes' + echo '# TYPE partition_free_bytes gauge' + + # Boot drive partitions: /boot (ESP), /persistent, /nix + # Use df with 1K blocks and convert to bytes + for mount in /boot /persistent /nix; do + if mountpoint -q "$mount" 2>/dev/null; then + read -r size used avail _ <<< "$(df -k --output=size,used,avail "$mount" | tail -1)" + size_b=$((size * 1024)) + used_b=$((used * 1024)) + avail_b=$((avail * 1024)) + echo "partition_size_bytes{mount=\"${mount}\"} ${size_b}" + echo "partition_used_bytes{mount=\"${mount}\"} ${used_b}" + echo "partition_free_bytes{mount=\"${mount}\"} ${avail_b}" + fi + done +} > "$TMP" +mv "$TMP" "$TEXTFILE" diff --git a/services/monitoring.nix b/services/monitoring.nix index 5c4c6ab..c274257 100644 --- a/services/monitoring.nix +++ b/services/monitoring.nix @@ -79,6 +79,17 @@ let ''; }; + diskUsageCollector = pkgs.writeShellApplication { + name = "disk-usage-collector"; + runtimeInputs = with pkgs; [ + coreutils + gawk + config.boot.zfs.package + util-linux # for mountpoint + ]; + text = builtins.readFile ./disk-usage-collector.sh; + }; + dashboard = { editable = true; graphTooltip = 1; @@ -669,6 +680,94 @@ let overrides = [ ]; }; } + + # -- Row 5: Storage -- + { + id = 12; + type = "timeseries"; + title = "ZFS Pool Utilization"; + gridPos = { + h = 8; + w = 12; + x = 0; + y = 32; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "zpool_used_bytes{pool=\"tank\"} / zpool_size_bytes{pool=\"tank\"} * 100"; + legendFormat = "tank"; + refId = "A"; + } + { + datasource = promDs; + expr = "zpool_used_bytes{pool=\"hdds\"} / zpool_size_bytes{pool=\"hdds\"} * 100"; + legendFormat = "hdds"; + refId = "B"; + } + ]; + fieldConfig = { + defaults = { + unit = "percent"; + min = 0; + max = 100; + color.mode = "palette-classic"; + custom = { + lineWidth = 2; + fillOpacity = 20; + spanNulls = true; + }; + }; + overrides = [ ]; + }; + } + { + id = 13; + type = "timeseries"; + title = "Boot Drive Partitions"; + gridPos = { + h = 8; + w = 12; + x = 12; + y = 32; + }; + datasource = promDs; + targets = [ + { + datasource = promDs; + expr = "partition_used_bytes{mount=\"/boot\"} / partition_size_bytes{mount=\"/boot\"} * 100"; + legendFormat = "/boot"; + refId = "A"; + } + { + datasource = promDs; + expr = "partition_used_bytes{mount=\"/persistent\"} / partition_size_bytes{mount=\"/persistent\"} * 100"; + legendFormat = "/persistent"; + refId = "B"; + } + { + datasource = promDs; + expr = "partition_used_bytes{mount=\"/nix\"} / partition_size_bytes{mount=\"/nix\"} * 100"; + legendFormat = "/nix"; + refId = "C"; + } + ]; + fieldConfig = { + defaults = { + unit = "percent"; + min = 0; + max = 100; + color.mode = "palette-classic"; + custom = { + lineWidth = 2; + fillOpacity = 20; + spanNulls = true; + }; + }; + overrides = [ ]; + }; + } ]; }; in @@ -875,6 +974,24 @@ in }; }; + # -- Disk/pool usage textfile collector -- + systemd.services.disk-usage-collector = { + description = "Collect ZFS pool and partition usage metrics for Prometheus"; + serviceConfig = { + Type = "oneshot"; + ExecStart = lib.getExe diskUsageCollector; + }; + environment.TEXTFILE = "${textfileDir}/disk-usage.prom"; + }; + + systemd.timers.disk-usage-collector = { + wantedBy = [ "timers.target" ]; + timerConfig = { + OnCalendar = "*:*:0/60"; # every 60 seconds + RandomizedDelaySec = "10s"; + }; + }; + systemd.tmpfiles.rules = [ "d ${textfileDir} 0755 root root -" ];