diff --git a/services/llama-cpp/llama-cpp.nix b/services/llama-cpp/llama-cpp.nix index 7cd002b..b126198 100644 --- a/services/llama-cpp/llama-cpp.nix +++ b/services/llama-cpp/llama-cpp.nix @@ -4,8 +4,12 @@ config, inputs, lib, + utils, ... }: +let + cfg = config.services.llama-cpp; +in { services.llama-cpp = { enable = true; @@ -37,6 +41,16 @@ # have to do this in order to get vulkan to work systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false; + # upstream module hardcodes --log-disable; override ExecStart to keep logs + # so we can see prompt processing progress via journalctl + systemd.services.llama-cpp.serviceConfig.ExecStart = lib.mkForce ( + "${cfg.package}/bin/llama-server" + + " --host ${cfg.host}" + + " --port ${toString cfg.port}" + + " -m ${cfg.model}" + + " ${utils.escapeSystemdExecArgs cfg.extraFlags}" + ); + # Auth handled by llama-cpp --api-key-file (Bearer token). # No caddy_auth — the API key is the auth layer, and caddy_auth's basic # auth would block Bearer-only clients like oh-my-pi.