llama-cpp: add API key auth via --api-key-file

Generate and encrypt a Bearer token for llama-cpp's built-in auth. Remove caddy_auth from the vhost since basic auth blocks Bearer-only clients. Internal sidecars (xmrig-pause, annotations) connect directly to localhost and are unaffected (/slots is public).
2026-04-02 18:02:23 -04:00
parent bfe7a65db2
commit 0aeb6c5523
3 changed files with 13 additions and 1 deletions
--- a/services/llama-cpp.nix
+++ b/services/llama-cpp.nix
@@ -29,14 +29,18 @@
      "turbo4"
      "-fa"
      "on"
+      "--api-key-file"
+      config.age.secrets.llama-cpp-api-key.path
    ];
  };

  # have to do this in order to get vulkan to work
  systemd.services.llama-cpp.serviceConfig.DynamicUser = lib.mkForce false;

+  # Auth handled by llama-cpp --api-key-file (Bearer token).
+  # No caddy_auth — the API key is the auth layer, and caddy_auth's basic
+  # auth would block Bearer-only clients like oh-my-pi.
  services.caddy.virtualHosts."llm.${service_configs.https.domain}".extraConfig = ''
-    import ${config.age.secrets.caddy_auth.path}
    reverse_proxy :${toString config.services.llama-cpp.port}
  '';
 }