llama-cpp: add grafana annotations for inference requests
Poll /slots endpoint, create annotations when slots start processing, close with token count when complete. Includes NixOS VM test with mock llama-cpp and grafana servers. Dashboard annotation entry added.
This commit is contained in:
@@ -120,6 +120,18 @@ let
|
||||
type = "tags";
|
||||
tags = [ "zfs-scrub" ];
|
||||
}
|
||||
{
|
||||
name = "LLM Requests";
|
||||
datasource = {
|
||||
type = "grafana";
|
||||
uid = "-- Grafana --";
|
||||
};
|
||||
enable = true;
|
||||
iconColor = "purple";
|
||||
showIn = 0;
|
||||
type = "tags";
|
||||
tags = [ "llama-cpp" ];
|
||||
}
|
||||
];
|
||||
|
||||
panels = [
|
||||
|
||||
Reference in New Issue
Block a user