{ pkgs, ... }: let mockGrafana = ./mock-grafana-server.py; script = ../services/llama-cpp-annotations.py; python = pkgs.python3; mockLlamaProcess = ./mock-llama-server-proc.py; in pkgs.testers.runNixOSTest { name = "llama-cpp-annotations"; nodes.machine = { pkgs, ... }: { environment.systemPackages = [ pkgs.python3 pkgs.curl pkgs.procps ]; }; testScript = '' import json import time GRAFANA_PORT = 13000 ANNOTS_FILE = "/tmp/annotations.json" LLAMA_STATE = "/tmp/llama-state.txt" STATE_FILE = "/tmp/llama-annot-state.json" PYTHON = "${python}/bin/python3" MOCK_GRAFANA = "${mockGrafana}" MOCK_LLAMA = "${mockLlamaProcess}" SCRIPT = "${script}" def read_annotations(): out = machine.succeed(f"cat {ANNOTS_FILE} 2>/dev/null || echo '[]'") return json.loads(out.strip()) def set_busy(): machine.succeed(f"echo busy > {LLAMA_STATE}") def set_idle(): machine.succeed(f"echo idle > {LLAMA_STATE}") start_all() machine.wait_for_unit("multi-user.target") with subtest("Start mock services"): machine.succeed(f"echo '[]' > {ANNOTS_FILE}") machine.succeed( f"systemd-run --unit=mock-grafana {PYTHON} {MOCK_GRAFANA} {GRAFANA_PORT} {ANNOTS_FILE}" ) machine.succeed( f"systemd-run --unit=mock-llama {PYTHON} {MOCK_LLAMA} {LLAMA_STATE}" ) machine.wait_until_succeeds( f"curl -sf http://127.0.0.1:{GRAFANA_PORT}/api/annotations -X POST " f"-H 'Content-Type: application/json' -d '{{\"text\":\"ping\",\"tags\":[]}}' | grep -q id", timeout=10, ) machine.wait_until_succeeds( "pgrep -x llama-server", timeout=10, ) machine.succeed(f"echo '[]' > {ANNOTS_FILE}") with subtest("Start annotation service"): machine.succeed( f"systemd-run --unit=llama-annot " f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} " f"--setenv=STATE_FILE={STATE_FILE} " f"--setenv=POLL_INTERVAL=2 " f"--setenv=CPU_THRESHOLD=10 " f"{PYTHON} {SCRIPT}" ) time.sleep(5) with subtest("No annotations when idle"): annots = read_annotations() assert annots == [], f"Expected no annotations, got: {annots}" with subtest("Annotation created when llama-server becomes busy"): set_busy() machine.wait_until_succeeds( f"cat {ANNOTS_FILE} | {PYTHON} -c " f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a else 1)\"", timeout=20, ) annots = read_annotations() assert len(annots) == 1, f"Expected 1 annotation, got: {annots}" assert "llama-cpp" in annots[0].get("tags", []), f"Missing tag: {annots[0]}" assert "LLM request" in annots[0]["text"], f"Missing text: {annots[0]['text']}" assert "timeEnd" not in annots[0], f"timeEnd should not be set: {annots[0]}" with subtest("Annotation closed when llama-server becomes idle"): set_idle() machine.wait_until_succeeds( f"cat {ANNOTS_FILE} | {PYTHON} -c " f"\"import sys,json; a=json.load(sys.stdin); exit(0 if a and 'timeEnd' in a[0] else 1)\"", timeout=20, ) annots = read_annotations() assert len(annots) == 1, f"Expected 1, got: {annots}" assert "timeEnd" in annots[0], f"timeEnd missing: {annots[0]}" assert annots[0]["timeEnd"] > annots[0]["time"], "timeEnd should be after time" assert "s)" in annots[0].get("text", ""), f"Duration missing: {annots[0]}" with subtest("State survives restart"): set_busy() machine.wait_until_succeeds( f"cat {ANNOTS_FILE} | {PYTHON} -c " f"\"import sys,json; a=json.load(sys.stdin); exit(0 if len(a)==2 else 1)\"", timeout=20, ) machine.succeed("systemctl stop llama-annot || true") time.sleep(1) machine.succeed( f"systemd-run --unit=llama-annot-2 " f"--setenv=GRAFANA_URL=http://127.0.0.1:{GRAFANA_PORT} " f"--setenv=STATE_FILE={STATE_FILE} " f"--setenv=POLL_INTERVAL=2 " f"--setenv=CPU_THRESHOLD=10 " f"{PYTHON} {SCRIPT}" ) time.sleep(6) annots = read_annotations() assert len(annots) == 2, f"Restart should not duplicate, got: {annots}" ''; }