llama-cpp: do logging

2026-04-03 14:39:46 -04:00
parent daf82c16ba
commit 47aeb58f7a
1 changed files with 14 additions and 0 deletions
--- a/services/llama-cpp/llama-cpp.nix
+++ b/services/llama-cpp/llama-cpp.nix
@@ -4,8 +4,12 @@
  config,
  inputs,
  lib,
+  utils,
  ...
 }:
+let
+  cfg = config.services.llama-cpp;
+in
 {
  services.llama-cpp = {
    enable = true;
@@ -37,6 +41,16 @@
  # have to do this in order to get vulkan to work
  systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;

+  # upstream module hardcodes --log-disable; override ExecStart to keep logs
+  # so we can see prompt processing progress via journalctl
+  systemd.services.llama-cpp.serviceConfig.ExecStart = lib.mkForce (
+    "${cfg.package}/bin/llama-server"
+    + " --host ${cfg.host}"
+    + " --port ${toString cfg.port}"
+    + " -m ${cfg.model}"
+    + " ${utils.escapeSystemdExecArgs cfg.extraFlags}"
+  );
+
  # Auth handled by llama-cpp --api-key-file (Bearer token).
  # No caddy_auth — the API key is the auth layer, and caddy_auth's basic
  # auth would block Bearer-only clients like oh-my-pi.