From 479ec43b8fe7a2d2f73d14a241ffc098d67d9528 Mon Sep 17 00:00:00 2001 From: Simon Gardling Date: Fri, 3 Apr 2026 15:19:11 -0400 Subject: [PATCH] llama-cpp: integrate native prometheus /metrics endpoint llama.cpp server has a built-in /metrics endpoint exposing prompt_tokens_seconds, predicted_tokens_seconds, tokens_predicted_total, n_decode_total, and n_busy_slots_per_decode. Enable it with --metrics and add a Prometheus scrape target, replacing the need for any external metric collection for LLM inference monitoring. --- services/grafana/prometheus.nix | 6 ++++++ services/llama-cpp/llama-cpp.nix | 1 + 2 files changed, 7 insertions(+) diff --git a/services/grafana/prometheus.nix b/services/grafana/prometheus.nix index e9835d5..2939ec9 100644 --- a/services/grafana/prometheus.nix +++ b/services/grafana/prometheus.nix @@ -65,6 +65,12 @@ in { targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; } ]; } + { + job_name = "llama-cpp"; + static_configs = [ + { targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; } + ]; + } ]; }; diff --git a/services/llama-cpp/llama-cpp.nix b/services/llama-cpp/llama-cpp.nix index b126198..4f3800b 100644 --- a/services/llama-cpp/llama-cpp.nix +++ b/services/llama-cpp/llama-cpp.nix @@ -35,6 +35,7 @@ in "on" "--api-key-file" config.age.secrets.llama-cpp-api-key.path + "--metrics" ]; };