monitoring: add grafana annotations for zfs scrub events
This commit is contained in:
@@ -25,6 +25,9 @@ in
|
||||
# jellyfin annotation service test
|
||||
jellyfinAnnotationsTest = handleTest ./jellyfin-annotations.nix;
|
||||
|
||||
# zfs scrub annotations test
|
||||
zfsScrubAnnotationsTest = handleTest ./zfs-scrub-annotations.nix;
|
||||
|
||||
# ntfy alerts test
|
||||
ntfyAlertsTest = handleTest ./ntfy-alerts.nix;
|
||||
|
||||
|
||||
123
tests/zfs-scrub-annotations.nix
Normal file
123
tests/zfs-scrub-annotations.nix
Normal file
@@ -0,0 +1,123 @@
|
||||
{
|
||||
lib,
|
||||
pkgs,
|
||||
...
|
||||
}:
|
||||
let
|
||||
mockServer = ./mock-grafana-server.py;
|
||||
|
||||
mockZpool = pkgs.writeShellScript "zpool" ''
|
||||
case "$1" in
|
||||
list)
|
||||
echo "tank"
|
||||
echo "hdds"
|
||||
;;
|
||||
status)
|
||||
pool="$2"
|
||||
if [ "$pool" = "tank" ]; then
|
||||
echo " scan: scrub repaired 0B in 00:24:39 with 0 errors on Mon Jan 1 02:24:39 2024"
|
||||
elif [ "$pool" = "hdds" ]; then
|
||||
echo " scan: scrub repaired 0B in 04:12:33 with 0 errors on Mon Jan 1 06:12:33 2024"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
'';
|
||||
|
||||
script = ../services/zfs-scrub-annotations.sh;
|
||||
python = pkgs.python3;
|
||||
in
|
||||
pkgs.testers.runNixOSTest {
|
||||
name = "zfs-scrub-annotations";
|
||||
|
||||
nodes.machine =
|
||||
{ pkgs, ... }:
|
||||
{
|
||||
environment.systemPackages = with pkgs; [
|
||||
python3
|
||||
curl
|
||||
jq
|
||||
];
|
||||
};
|
||||
|
||||
testScript = ''
|
||||
import json
|
||||
|
||||
GRAFANA_PORT = 13000
|
||||
ANNOTS_FILE = "/tmp/annotations.json"
|
||||
STATE_DIR = "/tmp/scrub-state"
|
||||
PYTHON = "${python}/bin/python3"
|
||||
MOCK = "${mockServer}"
|
||||
SCRIPT = "${script}"
|
||||
MOCK_ZPOOL = "${mockZpool}"
|
||||
|
||||
MOCK_BIN = "/tmp/mock-bin"
|
||||
ENV_PREFIX = (
|
||||
f"GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} "
|
||||
f"STATE_DIR={STATE_DIR} "
|
||||
f"PATH={MOCK_BIN}:$PATH "
|
||||
)
|
||||
|
||||
def read_annotations():
|
||||
out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'")
|
||||
return json.loads(out.strip())
|
||||
|
||||
start_all()
|
||||
machine.wait_for_unit("multi-user.target")
|
||||
|
||||
with subtest("Setup state directory and mock zpool"):
|
||||
machine.succeed(f"mkdir -p {STATE_DIR}")
|
||||
machine.succeed(f"mkdir -p {MOCK_BIN} && cp {MOCK_ZPOOL} {MOCK_BIN}/zpool && chmod +x {MOCK_BIN}/zpool")
|
||||
|
||||
with subtest("Start mock Grafana server"):
|
||||
machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
|
||||
machine.succeed(
|
||||
f"systemd-run --unit=mock-grafana {PYTHON} {MOCK} {GRAFANA_PORT} {ANNOTS_FILE}"
|
||||
)
|
||||
machine.wait_until_succeeds(
|
||||
f"curl -sf -X POST http://127.0.0.1:{GRAFANA_PORT}/api/annotations "
|
||||
f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id",
|
||||
timeout=10,
|
||||
)
|
||||
machine.succeed(f"echo '[]' > {ANNOTS_FILE}")
|
||||
|
||||
with subtest("Start action creates annotation with pool names and zfs-scrub tag"):
|
||||
machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} start")
|
||||
annots = read_annotations()
|
||||
assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
|
||||
assert "zfs-scrub" in annots[0].get("tags", []), f"Missing zfs-scrub tag: {annots[0]}"
|
||||
assert "tank" in annots[0]["text"], f"Missing tank in text: {annots[0]['text']}"
|
||||
assert "hdds" in annots[0]["text"], f"Missing hdds in text: {annots[0]['text']}"
|
||||
assert "time" in annots[0], f"Missing time field: {annots[0]}"
|
||||
assert "timeEnd" not in annots[0], f"timeEnd should not be set yet: {annots[0]}"
|
||||
|
||||
with subtest("State file contains annotation ID"):
|
||||
ann_id = machine.succeed(f"cat {STATE_DIR}/annotation-id").strip()
|
||||
assert ann_id == "1", f"Expected annotation ID 1, got: {ann_id}"
|
||||
|
||||
with subtest("Stop action closes annotation with per-pool scrub results"):
|
||||
machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} stop")
|
||||
annots = read_annotations()
|
||||
assert len(annots) == 1, f"Expected 1 annotation, got: {annots}"
|
||||
assert "timeEnd" in annots[0], f"timeEnd should be set: {annots[0]}"
|
||||
assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time"
|
||||
text = annots[0]["text"]
|
||||
assert "ZFS scrub completed" in text, f"Missing completed text: {text}"
|
||||
assert "tank:" in text, f"Missing tank results: {text}"
|
||||
assert "hdds:" in text, f"Missing hdds results: {text}"
|
||||
assert "00:24:39" in text, f"Missing tank scrub duration: {text}"
|
||||
assert "04:12:33" in text, f"Missing hdds scrub duration: {text}"
|
||||
|
||||
with subtest("State file cleaned up after stop"):
|
||||
machine.fail(f"test -f {STATE_DIR}/annotation-id")
|
||||
|
||||
with subtest("Stop action handles missing state file gracefully"):
|
||||
machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} stop")
|
||||
annots = read_annotations()
|
||||
assert len(annots) == 1, f"Expected no new annotations, got: {annots}"
|
||||
|
||||
with subtest("Start action handles Grafana being down gracefully"):
|
||||
machine.succeed("systemctl stop mock-grafana")
|
||||
machine.succeed(f"{ENV_PREFIX} bash {SCRIPT} start")
|
||||
machine.fail(f"test -f {STATE_DIR}/annotation-id")
|
||||
'';
|
||||
}
|
||||
Reference in New Issue
Block a user