llama-cpp: add grafana annotations for inference requests

Poll /slots endpoint, create annotations when slots start processing,
close with token count when complete. Includes NixOS VM test with
mock llama-cpp and grafana servers. Dashboard annotation entry added.
This commit is contained in:
2026-04-02 17:43:49 -04:00
parent 0235617627
commit 9baeaa5c23
6 changed files with 362 additions and 0 deletions

View File

@@ -120,6 +120,18 @@ let
type = "tags";
tags = [ "zfs-scrub" ];
}
{
name = "LLM Requests";
datasource = {
type = "grafana";
uid = "-- Grafana --";
};
enable = true;
iconColor = "purple";
showIn = 0;
type = "tags";
tags = [ "llama-cpp" ];
}
];
panels = [