diff --git a/services/llama-cpp/llama-cpp.nix b/services/llama-cpp/llama-cpp.nix
index 7cd002b..b126198 100644
--- a/services/llama-cpp/llama-cpp.nix
+++ b/services/llama-cpp/llama-cpp.nix
@@ -4,8 +4,12 @@
   config,
   inputs,
   lib,
+  utils,
   ...
 }:
+let
+  cfg = config.services.llama-cpp;
+in
 {
   services.llama-cpp = {
     enable = true;
@@ -37,6 +41,16 @@
   # have to do this in order to get vulkan to work
   systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;
 
+  # upstream module hardcodes --log-disable; override ExecStart to keep logs
+  # so we can see prompt processing progress via journalctl
+  systemd.services.llama-cpp.serviceConfig.ExecStart = lib.mkForce (
+    "${cfg.package}/bin/llama-server"
+    + " --host ${cfg.host}"
+    + " --port ${toString cfg.port}"
+    + " -m ${cfg.model}"
+    + " ${utils.escapeSystemdExecArgs cfg.extraFlags}"
+  );
+
   # Auth handled by llama-cpp --api-key-file (Bearer token).
   # No caddy_auth — the API key is the auth layer, and caddy_auth's basic
   # auth would block Bearer-only clients like oh-my-pi.