llama-cpp: integrate native prometheus /metrics endpoint
llama.cpp server has a built-in /metrics endpoint exposing prompt_tokens_seconds, predicted_tokens_seconds, tokens_predicted_total, n_decode_total, and n_busy_slots_per_decode. Enable it with --metrics and add a Prometheus scrape target, replacing the need for any external metric collection for LLM inference monitoring.
This commit is contained in:
@@ -65,6 +65,12 @@ in
|
|||||||
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; }
|
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.prometheus_apcupsd.port}" ]; }
|
||||||
];
|
];
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
job_name = "llama-cpp";
|
||||||
|
static_configs = [
|
||||||
|
{ targets = [ "127.0.0.1:${toString service_configs.ports.private.llama_cpp.port}" ]; }
|
||||||
|
];
|
||||||
|
}
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@@ -35,6 +35,7 @@ in
|
|||||||
"on"
|
"on"
|
||||||
"--api-key-file"
|
"--api-key-file"
|
||||||
config.age.secrets.llama-cpp-api-key.path
|
config.age.secrets.llama-cpp-api-key.path
|
||||||
|
"--metrics"
|
||||||
];
|
];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user