From 479ec43b8fe7a2d2f73d14a241ffc098d67d9528 Mon Sep 17 00:00:00 2001
From: Simon Gardling <titaniumtown@proton.me>
Date: Fri, 3 Apr 2026 15:19:11 -0400
Subject: [PATCH] llama-cpp: integrate native prometheus /metrics endpoint

llama.cpp server has a built-in /metrics endpoint exposing
prompt_tokens_seconds, predicted_tokens_seconds, tokens_predicted_total,
n_decode_total, and n_busy_slots_per_decode. Enable it with --metrics
and add a Prometheus scrape target, replacing the need for any external
metric collection for LLM inference monitoring.
---
 services/grafana/prometheus.nix  | 6 ++++++
 services/llama-cpp/llama-cpp.nix | 1 +
 2 files changed, 7 insertions(+)

diff --git a/services/grafana/prometheus.nix b/services/grafana/prometheus.nix
index e9835d5..2939ec9 100644
--- a/services/grafana/prometheus.nix
+++ b/services/grafana/prometheus.nix
@@ -65,6 +65,12 @@ in
           { targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; }
         ];
       }
+      {
+        job_name = "llama-cpp";
+        static_configs = [
+          { targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; }
+        ];
+      }
     ];
   };
 
diff --git a/services/llama-cpp/llama-cpp.nix b/services/llama-cpp/llama-cpp.nix
index b126198..4f3800b 100644
--- a/services/llama-cpp/llama-cpp.nix
+++ b/services/llama-cpp/llama-cpp.nix
@@ -35,6 +35,7 @@ in
       "on"
       "--api-key-file"
       config.age.secrets.llama-cpp-api-key.path
+      "--metrics"
     ];
   };